VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 94698

Last change on this file since 94698 was 94698, checked in by vboxsync, 3 years ago

VMM/IEM: Build fix. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 249.5 KB
Line 
1/* $Id: IEMAllAImplC.cpp 94698 2022-04-23 00:12:02Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vmcc.h>
24#include <iprt/errcore.h>
25#include <iprt/x86.h>
26#include <iprt/uint128.h>
27#include <iprt/uint256.h>
28
29RT_C_DECLS_BEGIN
30#include <softfloat.h>
31RT_C_DECLS_END
32
33
34/*********************************************************************************************************************************
35* Defined Constants And Macros *
36*********************************************************************************************************************************/
37/** @def IEM_WITHOUT_ASSEMBLY
38 * Enables all the code in this file.
39 */
40#if !defined(IEM_WITHOUT_ASSEMBLY)
41# if defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
42# define IEM_WITHOUT_ASSEMBLY
43# endif
44#endif
45/* IEM_WITH_ASSEMBLY trumps IEM_WITHOUT_ASSEMBLY for tstIEMAImplAsm purposes. */
46#ifdef IEM_WITH_ASSEMBLY
47# undef IEM_WITHOUT_ASSEMBLY
48#endif
49
50/**
51 * Calculates the signed flag value given a result and it's bit width.
52 *
53 * The signed flag (SF) is a duplication of the most significant bit in the
54 * result.
55 *
56 * @returns X86_EFL_SF or 0.
57 * @param a_uResult Unsigned result value.
58 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
59 */
60#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
61 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
62
63/**
64 * Calculates the zero flag value given a result.
65 *
66 * The zero flag (ZF) indicates whether the result is zero or not.
67 *
68 * @returns X86_EFL_ZF or 0.
69 * @param a_uResult Unsigned result value.
70 */
71#define X86_EFL_CALC_ZF(a_uResult) \
72 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
73
74/**
75 * Extracts the OF flag from a OF calculation result.
76 *
77 * These are typically used by concating with a bitcount. The problem is that
78 * 8-bit values needs shifting in the other direction than the others.
79 */
80#define X86_EFL_GET_OF_8(a_uValue) (((uint32_t)(a_uValue) << (X86_EFL_OF_BIT - 8 + 1)) & X86_EFL_OF)
81#define X86_EFL_GET_OF_16(a_uValue) ((uint32_t)((a_uValue) >> (16 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
82#define X86_EFL_GET_OF_32(a_uValue) ((uint32_t)((a_uValue) >> (32 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
83#define X86_EFL_GET_OF_64(a_uValue) ((uint32_t)((a_uValue) >> (64 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
84
85/**
86 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
87 *
88 * @returns Status bits.
89 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
90 * @param a_uResult Unsigned result value.
91 * @param a_uSrc The source value (for AF calc).
92 * @param a_uDst The original destination value (for AF calc).
93 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
94 * @param a_CfExpr Bool expression for the carry flag (CF).
95 * @param a_uSrcOf The a_uSrc value to use for overflow calculation.
96 */
97#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_uSrcOf) \
98 do { \
99 uint32_t fEflTmp = *(a_pfEFlags); \
100 fEflTmp &= ~X86_EFL_STATUS_BITS; \
101 fEflTmp |= (a_CfExpr) << X86_EFL_CF_BIT; \
102 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
103 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
104 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
105 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
106 \
107 /* Overflow during ADDition happens when both inputs have the same signed \
108 bit value and the result has a different sign bit value. \
109 \
110 Since subtraction can be rewritten as addition: 2 - 1 == 2 + -1, it \
111 follows that for SUBtraction the signed bit value must differ between \
112 the two inputs and the result's signed bit diff from the first input. \
113 Note! Must xor with sign bit to convert, not do (0 - a_uSrc). \
114 \
115 See also: http://teaching.idallen.com/dat2343/10f/notes/040_overflow.txt */ \
116 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth( ( ((uint ## a_cBitsWidth ## _t)~((a_uDst) ^ (a_uSrcOf))) \
117 & RT_BIT_64(a_cBitsWidth - 1)) \
118 & ((a_uResult) ^ (a_uDst)) ); \
119 *(a_pfEFlags) = fEflTmp; \
120 } while (0)
121
122/**
123 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
124 *
125 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
126 * undefined. We do not set AF, as that seems to make the most sense (which
127 * probably makes it the most wrong in real life).
128 *
129 * @returns Status bits.
130 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
131 * @param a_uResult Unsigned result value.
132 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
133 * @param a_fExtra Additional bits to set.
134 */
135#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
136 do { \
137 uint32_t fEflTmp = *(a_pfEFlags); \
138 fEflTmp &= ~X86_EFL_STATUS_BITS; \
139 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
140 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
141 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
142 fEflTmp |= (a_fExtra); \
143 *(a_pfEFlags) = fEflTmp; \
144 } while (0)
145
146
147/*********************************************************************************************************************************
148* Global Variables *
149*********************************************************************************************************************************/
150/**
151 * Parity calculation table.
152 *
153 * This is also used by iemAllAImpl.asm.
154 *
155 * The generator code:
156 * @code
157 * #include <stdio.h>
158 *
159 * int main()
160 * {
161 * unsigned b;
162 * for (b = 0; b < 256; b++)
163 * {
164 * int cOnes = ( b & 1)
165 * + ((b >> 1) & 1)
166 * + ((b >> 2) & 1)
167 * + ((b >> 3) & 1)
168 * + ((b >> 4) & 1)
169 * + ((b >> 5) & 1)
170 * + ((b >> 6) & 1)
171 * + ((b >> 7) & 1);
172 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
173 * b,
174 * (b >> 7) & 1,
175 * (b >> 6) & 1,
176 * (b >> 5) & 1,
177 * (b >> 4) & 1,
178 * (b >> 3) & 1,
179 * (b >> 2) & 1,
180 * (b >> 1) & 1,
181 * b & 1,
182 * cOnes & 1 ? "0" : "X86_EFL_PF");
183 * }
184 * return 0;
185 * }
186 * @endcode
187 */
188uint8_t const g_afParity[256] =
189{
190 /* 0000 = 00000000b */ X86_EFL_PF,
191 /* 0x01 = 00000001b */ 0,
192 /* 0x02 = 00000010b */ 0,
193 /* 0x03 = 00000011b */ X86_EFL_PF,
194 /* 0x04 = 00000100b */ 0,
195 /* 0x05 = 00000101b */ X86_EFL_PF,
196 /* 0x06 = 00000110b */ X86_EFL_PF,
197 /* 0x07 = 00000111b */ 0,
198 /* 0x08 = 00001000b */ 0,
199 /* 0x09 = 00001001b */ X86_EFL_PF,
200 /* 0x0a = 00001010b */ X86_EFL_PF,
201 /* 0x0b = 00001011b */ 0,
202 /* 0x0c = 00001100b */ X86_EFL_PF,
203 /* 0x0d = 00001101b */ 0,
204 /* 0x0e = 00001110b */ 0,
205 /* 0x0f = 00001111b */ X86_EFL_PF,
206 /* 0x10 = 00010000b */ 0,
207 /* 0x11 = 00010001b */ X86_EFL_PF,
208 /* 0x12 = 00010010b */ X86_EFL_PF,
209 /* 0x13 = 00010011b */ 0,
210 /* 0x14 = 00010100b */ X86_EFL_PF,
211 /* 0x15 = 00010101b */ 0,
212 /* 0x16 = 00010110b */ 0,
213 /* 0x17 = 00010111b */ X86_EFL_PF,
214 /* 0x18 = 00011000b */ X86_EFL_PF,
215 /* 0x19 = 00011001b */ 0,
216 /* 0x1a = 00011010b */ 0,
217 /* 0x1b = 00011011b */ X86_EFL_PF,
218 /* 0x1c = 00011100b */ 0,
219 /* 0x1d = 00011101b */ X86_EFL_PF,
220 /* 0x1e = 00011110b */ X86_EFL_PF,
221 /* 0x1f = 00011111b */ 0,
222 /* 0x20 = 00100000b */ 0,
223 /* 0x21 = 00100001b */ X86_EFL_PF,
224 /* 0x22 = 00100010b */ X86_EFL_PF,
225 /* 0x23 = 00100011b */ 0,
226 /* 0x24 = 00100100b */ X86_EFL_PF,
227 /* 0x25 = 00100101b */ 0,
228 /* 0x26 = 00100110b */ 0,
229 /* 0x27 = 00100111b */ X86_EFL_PF,
230 /* 0x28 = 00101000b */ X86_EFL_PF,
231 /* 0x29 = 00101001b */ 0,
232 /* 0x2a = 00101010b */ 0,
233 /* 0x2b = 00101011b */ X86_EFL_PF,
234 /* 0x2c = 00101100b */ 0,
235 /* 0x2d = 00101101b */ X86_EFL_PF,
236 /* 0x2e = 00101110b */ X86_EFL_PF,
237 /* 0x2f = 00101111b */ 0,
238 /* 0x30 = 00110000b */ X86_EFL_PF,
239 /* 0x31 = 00110001b */ 0,
240 /* 0x32 = 00110010b */ 0,
241 /* 0x33 = 00110011b */ X86_EFL_PF,
242 /* 0x34 = 00110100b */ 0,
243 /* 0x35 = 00110101b */ X86_EFL_PF,
244 /* 0x36 = 00110110b */ X86_EFL_PF,
245 /* 0x37 = 00110111b */ 0,
246 /* 0x38 = 00111000b */ 0,
247 /* 0x39 = 00111001b */ X86_EFL_PF,
248 /* 0x3a = 00111010b */ X86_EFL_PF,
249 /* 0x3b = 00111011b */ 0,
250 /* 0x3c = 00111100b */ X86_EFL_PF,
251 /* 0x3d = 00111101b */ 0,
252 /* 0x3e = 00111110b */ 0,
253 /* 0x3f = 00111111b */ X86_EFL_PF,
254 /* 0x40 = 01000000b */ 0,
255 /* 0x41 = 01000001b */ X86_EFL_PF,
256 /* 0x42 = 01000010b */ X86_EFL_PF,
257 /* 0x43 = 01000011b */ 0,
258 /* 0x44 = 01000100b */ X86_EFL_PF,
259 /* 0x45 = 01000101b */ 0,
260 /* 0x46 = 01000110b */ 0,
261 /* 0x47 = 01000111b */ X86_EFL_PF,
262 /* 0x48 = 01001000b */ X86_EFL_PF,
263 /* 0x49 = 01001001b */ 0,
264 /* 0x4a = 01001010b */ 0,
265 /* 0x4b = 01001011b */ X86_EFL_PF,
266 /* 0x4c = 01001100b */ 0,
267 /* 0x4d = 01001101b */ X86_EFL_PF,
268 /* 0x4e = 01001110b */ X86_EFL_PF,
269 /* 0x4f = 01001111b */ 0,
270 /* 0x50 = 01010000b */ X86_EFL_PF,
271 /* 0x51 = 01010001b */ 0,
272 /* 0x52 = 01010010b */ 0,
273 /* 0x53 = 01010011b */ X86_EFL_PF,
274 /* 0x54 = 01010100b */ 0,
275 /* 0x55 = 01010101b */ X86_EFL_PF,
276 /* 0x56 = 01010110b */ X86_EFL_PF,
277 /* 0x57 = 01010111b */ 0,
278 /* 0x58 = 01011000b */ 0,
279 /* 0x59 = 01011001b */ X86_EFL_PF,
280 /* 0x5a = 01011010b */ X86_EFL_PF,
281 /* 0x5b = 01011011b */ 0,
282 /* 0x5c = 01011100b */ X86_EFL_PF,
283 /* 0x5d = 01011101b */ 0,
284 /* 0x5e = 01011110b */ 0,
285 /* 0x5f = 01011111b */ X86_EFL_PF,
286 /* 0x60 = 01100000b */ X86_EFL_PF,
287 /* 0x61 = 01100001b */ 0,
288 /* 0x62 = 01100010b */ 0,
289 /* 0x63 = 01100011b */ X86_EFL_PF,
290 /* 0x64 = 01100100b */ 0,
291 /* 0x65 = 01100101b */ X86_EFL_PF,
292 /* 0x66 = 01100110b */ X86_EFL_PF,
293 /* 0x67 = 01100111b */ 0,
294 /* 0x68 = 01101000b */ 0,
295 /* 0x69 = 01101001b */ X86_EFL_PF,
296 /* 0x6a = 01101010b */ X86_EFL_PF,
297 /* 0x6b = 01101011b */ 0,
298 /* 0x6c = 01101100b */ X86_EFL_PF,
299 /* 0x6d = 01101101b */ 0,
300 /* 0x6e = 01101110b */ 0,
301 /* 0x6f = 01101111b */ X86_EFL_PF,
302 /* 0x70 = 01110000b */ 0,
303 /* 0x71 = 01110001b */ X86_EFL_PF,
304 /* 0x72 = 01110010b */ X86_EFL_PF,
305 /* 0x73 = 01110011b */ 0,
306 /* 0x74 = 01110100b */ X86_EFL_PF,
307 /* 0x75 = 01110101b */ 0,
308 /* 0x76 = 01110110b */ 0,
309 /* 0x77 = 01110111b */ X86_EFL_PF,
310 /* 0x78 = 01111000b */ X86_EFL_PF,
311 /* 0x79 = 01111001b */ 0,
312 /* 0x7a = 01111010b */ 0,
313 /* 0x7b = 01111011b */ X86_EFL_PF,
314 /* 0x7c = 01111100b */ 0,
315 /* 0x7d = 01111101b */ X86_EFL_PF,
316 /* 0x7e = 01111110b */ X86_EFL_PF,
317 /* 0x7f = 01111111b */ 0,
318 /* 0x80 = 10000000b */ 0,
319 /* 0x81 = 10000001b */ X86_EFL_PF,
320 /* 0x82 = 10000010b */ X86_EFL_PF,
321 /* 0x83 = 10000011b */ 0,
322 /* 0x84 = 10000100b */ X86_EFL_PF,
323 /* 0x85 = 10000101b */ 0,
324 /* 0x86 = 10000110b */ 0,
325 /* 0x87 = 10000111b */ X86_EFL_PF,
326 /* 0x88 = 10001000b */ X86_EFL_PF,
327 /* 0x89 = 10001001b */ 0,
328 /* 0x8a = 10001010b */ 0,
329 /* 0x8b = 10001011b */ X86_EFL_PF,
330 /* 0x8c = 10001100b */ 0,
331 /* 0x8d = 10001101b */ X86_EFL_PF,
332 /* 0x8e = 10001110b */ X86_EFL_PF,
333 /* 0x8f = 10001111b */ 0,
334 /* 0x90 = 10010000b */ X86_EFL_PF,
335 /* 0x91 = 10010001b */ 0,
336 /* 0x92 = 10010010b */ 0,
337 /* 0x93 = 10010011b */ X86_EFL_PF,
338 /* 0x94 = 10010100b */ 0,
339 /* 0x95 = 10010101b */ X86_EFL_PF,
340 /* 0x96 = 10010110b */ X86_EFL_PF,
341 /* 0x97 = 10010111b */ 0,
342 /* 0x98 = 10011000b */ 0,
343 /* 0x99 = 10011001b */ X86_EFL_PF,
344 /* 0x9a = 10011010b */ X86_EFL_PF,
345 /* 0x9b = 10011011b */ 0,
346 /* 0x9c = 10011100b */ X86_EFL_PF,
347 /* 0x9d = 10011101b */ 0,
348 /* 0x9e = 10011110b */ 0,
349 /* 0x9f = 10011111b */ X86_EFL_PF,
350 /* 0xa0 = 10100000b */ X86_EFL_PF,
351 /* 0xa1 = 10100001b */ 0,
352 /* 0xa2 = 10100010b */ 0,
353 /* 0xa3 = 10100011b */ X86_EFL_PF,
354 /* 0xa4 = 10100100b */ 0,
355 /* 0xa5 = 10100101b */ X86_EFL_PF,
356 /* 0xa6 = 10100110b */ X86_EFL_PF,
357 /* 0xa7 = 10100111b */ 0,
358 /* 0xa8 = 10101000b */ 0,
359 /* 0xa9 = 10101001b */ X86_EFL_PF,
360 /* 0xaa = 10101010b */ X86_EFL_PF,
361 /* 0xab = 10101011b */ 0,
362 /* 0xac = 10101100b */ X86_EFL_PF,
363 /* 0xad = 10101101b */ 0,
364 /* 0xae = 10101110b */ 0,
365 /* 0xaf = 10101111b */ X86_EFL_PF,
366 /* 0xb0 = 10110000b */ 0,
367 /* 0xb1 = 10110001b */ X86_EFL_PF,
368 /* 0xb2 = 10110010b */ X86_EFL_PF,
369 /* 0xb3 = 10110011b */ 0,
370 /* 0xb4 = 10110100b */ X86_EFL_PF,
371 /* 0xb5 = 10110101b */ 0,
372 /* 0xb6 = 10110110b */ 0,
373 /* 0xb7 = 10110111b */ X86_EFL_PF,
374 /* 0xb8 = 10111000b */ X86_EFL_PF,
375 /* 0xb9 = 10111001b */ 0,
376 /* 0xba = 10111010b */ 0,
377 /* 0xbb = 10111011b */ X86_EFL_PF,
378 /* 0xbc = 10111100b */ 0,
379 /* 0xbd = 10111101b */ X86_EFL_PF,
380 /* 0xbe = 10111110b */ X86_EFL_PF,
381 /* 0xbf = 10111111b */ 0,
382 /* 0xc0 = 11000000b */ X86_EFL_PF,
383 /* 0xc1 = 11000001b */ 0,
384 /* 0xc2 = 11000010b */ 0,
385 /* 0xc3 = 11000011b */ X86_EFL_PF,
386 /* 0xc4 = 11000100b */ 0,
387 /* 0xc5 = 11000101b */ X86_EFL_PF,
388 /* 0xc6 = 11000110b */ X86_EFL_PF,
389 /* 0xc7 = 11000111b */ 0,
390 /* 0xc8 = 11001000b */ 0,
391 /* 0xc9 = 11001001b */ X86_EFL_PF,
392 /* 0xca = 11001010b */ X86_EFL_PF,
393 /* 0xcb = 11001011b */ 0,
394 /* 0xcc = 11001100b */ X86_EFL_PF,
395 /* 0xcd = 11001101b */ 0,
396 /* 0xce = 11001110b */ 0,
397 /* 0xcf = 11001111b */ X86_EFL_PF,
398 /* 0xd0 = 11010000b */ 0,
399 /* 0xd1 = 11010001b */ X86_EFL_PF,
400 /* 0xd2 = 11010010b */ X86_EFL_PF,
401 /* 0xd3 = 11010011b */ 0,
402 /* 0xd4 = 11010100b */ X86_EFL_PF,
403 /* 0xd5 = 11010101b */ 0,
404 /* 0xd6 = 11010110b */ 0,
405 /* 0xd7 = 11010111b */ X86_EFL_PF,
406 /* 0xd8 = 11011000b */ X86_EFL_PF,
407 /* 0xd9 = 11011001b */ 0,
408 /* 0xda = 11011010b */ 0,
409 /* 0xdb = 11011011b */ X86_EFL_PF,
410 /* 0xdc = 11011100b */ 0,
411 /* 0xdd = 11011101b */ X86_EFL_PF,
412 /* 0xde = 11011110b */ X86_EFL_PF,
413 /* 0xdf = 11011111b */ 0,
414 /* 0xe0 = 11100000b */ 0,
415 /* 0xe1 = 11100001b */ X86_EFL_PF,
416 /* 0xe2 = 11100010b */ X86_EFL_PF,
417 /* 0xe3 = 11100011b */ 0,
418 /* 0xe4 = 11100100b */ X86_EFL_PF,
419 /* 0xe5 = 11100101b */ 0,
420 /* 0xe6 = 11100110b */ 0,
421 /* 0xe7 = 11100111b */ X86_EFL_PF,
422 /* 0xe8 = 11101000b */ X86_EFL_PF,
423 /* 0xe9 = 11101001b */ 0,
424 /* 0xea = 11101010b */ 0,
425 /* 0xeb = 11101011b */ X86_EFL_PF,
426 /* 0xec = 11101100b */ 0,
427 /* 0xed = 11101101b */ X86_EFL_PF,
428 /* 0xee = 11101110b */ X86_EFL_PF,
429 /* 0xef = 11101111b */ 0,
430 /* 0xf0 = 11110000b */ X86_EFL_PF,
431 /* 0xf1 = 11110001b */ 0,
432 /* 0xf2 = 11110010b */ 0,
433 /* 0xf3 = 11110011b */ X86_EFL_PF,
434 /* 0xf4 = 11110100b */ 0,
435 /* 0xf5 = 11110101b */ X86_EFL_PF,
436 /* 0xf6 = 11110110b */ X86_EFL_PF,
437 /* 0xf7 = 11110111b */ 0,
438 /* 0xf8 = 11111000b */ 0,
439 /* 0xf9 = 11111001b */ X86_EFL_PF,
440 /* 0xfa = 11111010b */ X86_EFL_PF,
441 /* 0xfb = 11111011b */ 0,
442 /* 0xfc = 11111100b */ X86_EFL_PF,
443 /* 0xfd = 11111101b */ 0,
444 /* 0xfe = 11111110b */ 0,
445 /* 0xff = 11111111b */ X86_EFL_PF,
446};
447
448/* for clang: */
449extern const RTFLOAT80U g_ar80Zero[];
450extern const RTFLOAT80U g_ar80One[];
451extern const RTFLOAT80U g_r80Indefinite;
452extern const RTFLOAT80U g_ar80Infinity[];
453extern const RTFLOAT128U g_r128Ln2;
454extern const RTUINT128U g_u128Ln2Mantissa;
455extern const RTUINT128U g_u128Ln2MantissaIntel;
456extern const RTFLOAT128U g_ar128F2xm1HornerConsts[];
457
458/** Zero values (indexed by fSign). */
459RTFLOAT80U const g_ar80Zero[] = { RTFLOAT80U_INIT_ZERO(0), RTFLOAT80U_INIT_ZERO(1) };
460
461/** One values (indexed by fSign). */
462RTFLOAT80U const g_ar80One[] =
463{ RTFLOAT80U_INIT(0, RT_BIT_64(63), RTFLOAT80U_EXP_BIAS), RTFLOAT80U_INIT(1, RT_BIT_64(63), RTFLOAT80U_EXP_BIAS) };
464
465/** Indefinite (negative). */
466RTFLOAT80U const g_r80Indefinite = RTFLOAT80U_INIT_INDEFINITE(1);
467
468/** Infinities (indexed by fSign). */
469RTFLOAT80U const g_ar80Infinity[] = { RTFLOAT80U_INIT_INF(0), RTFLOAT80U_INIT_INF(1) };
470
471#if 0
472/** 128-bit floating point constant: 2.0 */
473const RTFLOAT128U g_r128Two = RTFLOAT128U_INIT_C(0, 0, 0, RTFLOAT128U_EXP_BIAS + 1);
474#endif
475
476
477/* The next section is generated by tools/IEMGenFpuConstants: */
478
479/** The ln2 constant as 128-bit floating point value.
480 * base-10: 6.93147180559945309417232121458176575e-1
481 * base-16: b.17217f7d1cf79abc9e3b39803f30@-1
482 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100100111100011101100111001100000000011111100110e-1 */
483//const RTFLOAT128U g_r128Ln2 = RTFLOAT128U_INIT_C(0, 0x62e42fefa39e, 0xf35793c7673007e6, 0x3ffe);
484const RTFLOAT128U g_r128Ln2 = RTFLOAT128U_INIT_C(0, 0x62e42fefa39e, 0xf357900000000000, 0x3ffe);
485/** High precision ln2 value.
486 * base-10: 6.931471805599453094172321214581765680747e-1
487 * base-16: b.17217f7d1cf79abc9e3b39803f2f6af0@-1
488 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100100111100011101100111001100000000011111100101111011010101111e-1 */
489const RTUINT128U g_u128Ln2Mantissa = RTUINT128_INIT_C(0xb17217f7d1cf79ab, 0xc9e3b39803f2f6af);
490/** High precision ln2 value, compatible with f2xm1 results on intel 10980XE.
491 * base-10: 6.931471805599453094151379470289064954613e-1
492 * base-16: b.17217f7d1cf79abc0000000000000000@-1
493 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100000000000000000000000000000000000000000000000000000000000000e-1 */
494const RTUINT128U g_u128Ln2MantissaIntel = RTUINT128_INIT_C(0xb17217f7d1cf79ab, 0xc000000000000000);
495
496/** Horner constants for f2xm1 */
497const RTFLOAT128U g_ar128F2xm1HornerConsts[] =
498{
499 /* a0
500 * base-10: 1.00000000000000000000000000000000000e0
501 * base-16: 1.0000000000000000000000000000@0
502 * base-2 : 1.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e0 */
503 RTFLOAT128U_INIT_C(0, 0x000000000000, 0x0000000000000000, 0x3fff),
504 /* a1
505 * base-10: 5.00000000000000000000000000000000000e-1
506 * base-16: 8.0000000000000000000000000000@-1
507 * base-2 : 1.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e-1 */
508 RTFLOAT128U_INIT_C(0, 0x000000000000, 0x0000000000000000, 0x3ffe),
509 /* a2
510 * base-10: 1.66666666666666666666666666666666658e-1
511 * base-16: 2.aaaaaaaaaaaaaaaaaaaaaaaaaaaa@-1
512 * base-2 : 1.0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101e-3 */
513 RTFLOAT128U_INIT_C(0, 0x555555555555, 0x5555555555555555, 0x3ffc),
514 /* a3
515 * base-10: 4.16666666666666666666666666666666646e-2
516 * base-16: a.aaaaaaaaaaaaaaaaaaaaaaaaaaa8@-2
517 * base-2 : 1.0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101e-5 */
518 RTFLOAT128U_INIT_C(0, 0x555555555555, 0x5555555555555555, 0x3ffa),
519 /* a4
520 * base-10: 8.33333333333333333333333333333333323e-3
521 * base-16: 2.2222222222222222222222222222@-2
522 * base-2 : 1.0001000100010001000100010001000100010001000100010001000100010001000100010001000100010001000100010001000100010001e-7 */
523 RTFLOAT128U_INIT_C(0, 0x111111111111, 0x1111111111111111, 0x3ff8),
524 /* a5
525 * base-10: 1.38888888888888888888888888888888874e-3
526 * base-16: 5.b05b05b05b05b05b05b05b05b058@-3
527 * base-2 : 1.0110110000010110110000010110110000010110110000010110110000010110110000010110110000010110110000010110110000010110e-10 */
528 RTFLOAT128U_INIT_C(0, 0x6c16c16c16c1, 0x6c16c16c16c16c16, 0x3ff5),
529 /* a6
530 * base-10: 1.98412698412698412698412698412698412e-4
531 * base-16: d.00d00d00d00d00d00d00d00d00d0@-4
532 * base-2 : 1.1010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010e-13 */
533 RTFLOAT128U_INIT_C(0, 0xa01a01a01a01, 0xa01a01a01a01a01a, 0x3ff2),
534 /* a7
535 * base-10: 2.48015873015873015873015873015873015e-5
536 * base-16: 1.a01a01a01a01a01a01a01a01a01a@-4
537 * base-2 : 1.1010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010e-16 */
538 RTFLOAT128U_INIT_C(0, 0xa01a01a01a01, 0xa01a01a01a01a01a, 0x3fef),
539 /* a8
540 * base-10: 2.75573192239858906525573192239858902e-6
541 * base-16: 2.e3bc74aad8e671f5583911ca002e@-5
542 * base-2 : 1.0111000111011110001110100101010101101100011100110011100011111010101011000001110010001000111001010000000000010111e-19 */
543 RTFLOAT128U_INIT_C(0, 0x71de3a556c73, 0x38faac1c88e50017, 0x3fec),
544 /* a9
545 * base-10: 2.75573192239858906525573192239858865e-7
546 * base-16: 4.9f93edde27d71cbbc05b4fa999e0@-6
547 * base-2 : 1.0010011111100100111110110111011110001001111101011100011100101110111100000001011011010011111010100110011001111000e-22 */
548 RTFLOAT128U_INIT_C(0, 0x27e4fb7789f5, 0xc72ef016d3ea6678, 0x3fe9),
549 /* a10
550 * base-10: 2.50521083854417187750521083854417184e-8
551 * base-16: 6.b99159fd5138e3f9d1f92e0df71c@-7
552 * base-2 : 1.1010111001100100010101100111111101010100010011100011100011111110011101000111111001001011100000110111110111000111e-26 */
553 RTFLOAT128U_INIT_C(0, 0xae64567f544e, 0x38fe747e4b837dc7, 0x3fe5),
554 /* a11
555 * base-10: 2.08767569878680989792100903212014296e-9
556 * base-16: 8.f76c77fc6c4bdaa26d4c3d67f420@-8
557 * base-2 : 1.0001111011101101100011101111111110001101100010010111101101010100010011011010100110000111101011001111111010000100e-29 */
558 RTFLOAT128U_INIT_C(0, 0x1eed8eff8d89, 0x7b544da987acfe84, 0x3fe2),
559 /* a12
560 * base-10: 1.60590438368216145993923771701549472e-10
561 * base-16: b.092309d43684be51c198e91d7b40@-9
562 * base-2 : 1.0110000100100100011000010011101010000110110100001001011111001010001110000011001100011101001000111010111101101000e-33 */
563 RTFLOAT128U_INIT_C(0, 0x6124613a86d0, 0x97ca38331d23af68, 0x3fde),
564 /* a13
565 * base-10: 1.14707455977297247138516979786821043e-11
566 * base-16: c.9cba54603e4e905d6f8a2efd1f20@-10
567 * base-2 : 1.1001001110010111010010101000110000000111110010011101001000001011101011011111000101000101110111111010001111100100e-37 */
568 RTFLOAT128U_INIT_C(0, 0x93974a8c07c9, 0xd20badf145dfa3e4, 0x3fda),
569 /* a14
570 * base-10: 7.64716373181981647590113198578806964e-13
571 * base-16: d.73f9f399dc0f88ec32b587746578@-11
572 * base-2 : 1.1010111001111111001111100111001100111011100000011111000100011101100001100101011010110000111011101000110010101111e-41 */
573 RTFLOAT128U_INIT_C(0, 0xae7f3e733b81, 0xf11d8656b0ee8caf, 0x3fd6),
574 /* a15
575 * base-10: 4.77947733238738529743820749111754352e-14
576 * base-16: d.73f9f399dc0f88ec32b587746578@-12
577 * base-2 : 1.1010111001111111001111100111001100111011100000011111000100011101100001100101011010110000111011101000110010101111e-45 */
578 RTFLOAT128U_INIT_C(0, 0xae7f3e733b81, 0xf11d8656b0ee8caf, 0x3fd2),
579 /* a16
580 * base-10: 2.81145725434552076319894558301031970e-15
581 * base-16: c.a963b81856a53593028cbbb8d7f8@-13
582 * base-2 : 1.1001010100101100011101110000001100001010110101001010011010110010011000000101000110010111011101110001101011111111e-49 */
583 RTFLOAT128U_INIT_C(0, 0x952c77030ad4, 0xa6b2605197771aff, 0x3fce),
584 /* a17
585 * base-10: 1.56192069685862264622163643500573321e-16
586 * base-16: b.413c31dcbecbbdd8024435161550@-14
587 * base-2 : 1.0110100000100111100001100011101110010111110110010111011110111011000000000100100010000110101000101100001010101010e-53 */
588 RTFLOAT128U_INIT_C(0, 0x6827863b97d9, 0x77bb004886a2c2aa, 0x3fca),
589 /* a18
590 * base-10: 8.22063524662432971695598123687227980e-18
591 * base-16: 9.7a4da340a0ab92650f61dbdcb3a0@-15
592 * base-2 : 1.0010111101001001101101000110100000010100000101010111001001001100101000011110110000111011011110111001011001110100e-57 */
593 RTFLOAT128U_INIT_C(0, 0x2f49b4681415, 0x724ca1ec3b7b9674, 0x3fc6),
594 /* a19
595 * base-10: 4.11031762331216485847799061843614006e-19
596 * base-16: 7.950ae900808941ea72b4afe3c2e8@-16
597 * base-2 : 1.1110010101000010101110100100000000100000001000100101000001111010100111001010110100101011111110001111000010111010e-62 */
598 RTFLOAT128U_INIT_C(0, 0xe542ba402022, 0x507a9cad2bf8f0ba, 0x3fc1),
599 /* a20
600 * base-10: 7.04351638180413298434020229233492164e-20
601 * base-16: 1.4c9ee35db1d1f3c946fdcd48fd88@-16
602 * base-2 : 1.0100110010011110111000110101110110110001110100011111001111001001010001101111110111001101010010001111110110001000e-64 */
603 RTFLOAT128U_INIT_C(0, 0x4c9ee35db1d1, 0xf3c946fdcd48fd88, 0x3fbf),
604 /* a21
605 * base-10: 5.81527769640186708776361513365257702e-20
606 * base-16: 1.129e64bff606a2b9c9fc624481cd@-16
607 * base-2 : 1.0001001010011110011001001011111111110110000001101010001010111001110010011111110001100010010001001000000111001101e-64 */
608 RTFLOAT128U_INIT_C(0, 0x129e64bff606, 0xa2b9c9fc624481cd, 0x3fbf),
609};
610
611
612/*
613 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
614 * it all in C is probably safer atm., optimize what's necessary later, maybe.
615 */
616#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
617
618
619/*********************************************************************************************************************************
620* Binary Operations *
621*********************************************************************************************************************************/
622
623/*
624 * ADD
625 */
626
627IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
628{
629 uint64_t uDst = *puDst;
630 uint64_t uResult = uDst + uSrc;
631 *puDst = uResult;
632 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, uSrc);
633}
634
635# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
636
637IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
638{
639 uint32_t uDst = *puDst;
640 uint32_t uResult = uDst + uSrc;
641 *puDst = uResult;
642 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, uSrc);
643}
644
645
646IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
647{
648 uint16_t uDst = *puDst;
649 uint16_t uResult = uDst + uSrc;
650 *puDst = uResult;
651 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, uSrc);
652}
653
654
655IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
656{
657 uint8_t uDst = *puDst;
658 uint8_t uResult = uDst + uSrc;
659 *puDst = uResult;
660 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, uSrc);
661}
662
663# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
664
665/*
666 * ADC
667 */
668
669IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
670{
671 if (!(*pfEFlags & X86_EFL_CF))
672 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
673 else
674 {
675 uint64_t uDst = *puDst;
676 uint64_t uResult = uDst + uSrc + 1;
677 *puDst = uResult;
678 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, uSrc);
679 }
680}
681
682# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
683
684IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
685{
686 if (!(*pfEFlags & X86_EFL_CF))
687 iemAImpl_add_u32(puDst, uSrc, pfEFlags);
688 else
689 {
690 uint32_t uDst = *puDst;
691 uint32_t uResult = uDst + uSrc + 1;
692 *puDst = uResult;
693 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, uSrc);
694 }
695}
696
697
698IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
699{
700 if (!(*pfEFlags & X86_EFL_CF))
701 iemAImpl_add_u16(puDst, uSrc, pfEFlags);
702 else
703 {
704 uint16_t uDst = *puDst;
705 uint16_t uResult = uDst + uSrc + 1;
706 *puDst = uResult;
707 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, uSrc);
708 }
709}
710
711
712IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
713{
714 if (!(*pfEFlags & X86_EFL_CF))
715 iemAImpl_add_u8(puDst, uSrc, pfEFlags);
716 else
717 {
718 uint8_t uDst = *puDst;
719 uint8_t uResult = uDst + uSrc + 1;
720 *puDst = uResult;
721 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, uSrc);
722 }
723}
724
725# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
726
727/*
728 * SUB
729 */
730
731IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
732{
733 uint64_t uDst = *puDst;
734 uint64_t uResult = uDst - uSrc;
735 *puDst = uResult;
736 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst < uSrc, uSrc ^ RT_BIT_64(63));
737}
738
739# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
740
741IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
742{
743 uint32_t uDst = *puDst;
744 uint32_t uResult = uDst - uSrc;
745 *puDst = uResult;
746 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst < uSrc, uSrc ^ RT_BIT_32(31));
747}
748
749
750IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
751{
752 uint16_t uDst = *puDst;
753 uint16_t uResult = uDst - uSrc;
754 *puDst = uResult;
755 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst < uSrc, uSrc ^ (uint16_t)0x8000);
756}
757
758
759IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
760{
761 uint8_t uDst = *puDst;
762 uint8_t uResult = uDst - uSrc;
763 *puDst = uResult;
764 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst < uSrc, uSrc ^ (uint8_t)0x80);
765}
766
767# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
768
769/*
770 * SBB
771 */
772
773IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
774{
775 if (!(*pfEFlags & X86_EFL_CF))
776 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
777 else
778 {
779 uint64_t uDst = *puDst;
780 uint64_t uResult = uDst - uSrc - 1;
781 *puDst = uResult;
782 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst <= uSrc, uSrc ^ RT_BIT_64(63));
783 }
784}
785
786# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
787
788IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
789{
790 if (!(*pfEFlags & X86_EFL_CF))
791 iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
792 else
793 {
794 uint32_t uDst = *puDst;
795 uint32_t uResult = uDst - uSrc - 1;
796 *puDst = uResult;
797 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst <= uSrc, uSrc ^ RT_BIT_32(31));
798 }
799}
800
801
802IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
803{
804 if (!(*pfEFlags & X86_EFL_CF))
805 iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
806 else
807 {
808 uint16_t uDst = *puDst;
809 uint16_t uResult = uDst - uSrc - 1;
810 *puDst = uResult;
811 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst <= uSrc, uSrc ^ (uint16_t)0x8000);
812 }
813}
814
815
816IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
817{
818 if (!(*pfEFlags & X86_EFL_CF))
819 iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
820 else
821 {
822 uint8_t uDst = *puDst;
823 uint8_t uResult = uDst - uSrc - 1;
824 *puDst = uResult;
825 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst <= uSrc, uSrc ^ (uint8_t)0x80);
826 }
827}
828
829# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
830
831
832/*
833 * OR
834 */
835
836IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
837{
838 uint64_t uResult = *puDst | uSrc;
839 *puDst = uResult;
840 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
841}
842
843# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
844
845IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
846{
847 uint32_t uResult = *puDst | uSrc;
848 *puDst = uResult;
849 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
850}
851
852
853IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
854{
855 uint16_t uResult = *puDst | uSrc;
856 *puDst = uResult;
857 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
858}
859
860
861IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
862{
863 uint8_t uResult = *puDst | uSrc;
864 *puDst = uResult;
865 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
866}
867
868# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
869
870/*
871 * XOR
872 */
873
874IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
875{
876 uint64_t uResult = *puDst ^ uSrc;
877 *puDst = uResult;
878 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
879}
880
881# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
882
883IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
884{
885 uint32_t uResult = *puDst ^ uSrc;
886 *puDst = uResult;
887 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
888}
889
890
891IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
892{
893 uint16_t uResult = *puDst ^ uSrc;
894 *puDst = uResult;
895 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
896}
897
898
899IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
900{
901 uint8_t uResult = *puDst ^ uSrc;
902 *puDst = uResult;
903 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
904}
905
906# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
907
908/*
909 * AND
910 */
911
912IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
913{
914 uint64_t uResult = *puDst & uSrc;
915 *puDst = uResult;
916 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
917}
918
919# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
920
921IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
922{
923 uint32_t uResult = *puDst & uSrc;
924 *puDst = uResult;
925 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
926}
927
928
929IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
930{
931 uint16_t uResult = *puDst & uSrc;
932 *puDst = uResult;
933 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
934}
935
936
937IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
938{
939 uint8_t uResult = *puDst & uSrc;
940 *puDst = uResult;
941 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
942}
943
944# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
945
946/*
947 * CMP
948 */
949
950IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
951{
952 uint64_t uDstTmp = *puDst;
953 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
954}
955
956# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
957
958IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
959{
960 uint32_t uDstTmp = *puDst;
961 iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
962}
963
964
965IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
966{
967 uint16_t uDstTmp = *puDst;
968 iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
969}
970
971
972IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
973{
974 uint8_t uDstTmp = *puDst;
975 iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
976}
977
978# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
979
980/*
981 * TEST
982 */
983
984IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
985{
986 uint64_t uResult = *puDst & uSrc;
987 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
988}
989
990# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
991
992IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
993{
994 uint32_t uResult = *puDst & uSrc;
995 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
996}
997
998
999IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1000{
1001 uint16_t uResult = *puDst & uSrc;
1002 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
1003}
1004
1005
1006IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
1007{
1008 uint8_t uResult = *puDst & uSrc;
1009 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
1010}
1011
1012# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1013
1014
1015/*
1016 * LOCK prefixed variants of the above
1017 */
1018
1019/** 64-bit locked binary operand operation. */
1020# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
1021 do { \
1022 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
1023 uint ## a_cBitsWidth ## _t uTmp; \
1024 uint32_t fEflTmp; \
1025 do \
1026 { \
1027 uTmp = uOld; \
1028 fEflTmp = *pfEFlags; \
1029 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
1030 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
1031 *pfEFlags = fEflTmp; \
1032 } while (0)
1033
1034
1035#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
1036 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
1037 uint ## a_cBitsWidth ## _t uSrc, \
1038 uint32_t *pfEFlags)) \
1039 { \
1040 DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
1041 }
1042
1043EMIT_LOCKED_BIN_OP(add, 64)
1044EMIT_LOCKED_BIN_OP(adc, 64)
1045EMIT_LOCKED_BIN_OP(sub, 64)
1046EMIT_LOCKED_BIN_OP(sbb, 64)
1047EMIT_LOCKED_BIN_OP(or, 64)
1048EMIT_LOCKED_BIN_OP(xor, 64)
1049EMIT_LOCKED_BIN_OP(and, 64)
1050# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1051EMIT_LOCKED_BIN_OP(add, 32)
1052EMIT_LOCKED_BIN_OP(adc, 32)
1053EMIT_LOCKED_BIN_OP(sub, 32)
1054EMIT_LOCKED_BIN_OP(sbb, 32)
1055EMIT_LOCKED_BIN_OP(or, 32)
1056EMIT_LOCKED_BIN_OP(xor, 32)
1057EMIT_LOCKED_BIN_OP(and, 32)
1058
1059EMIT_LOCKED_BIN_OP(add, 16)
1060EMIT_LOCKED_BIN_OP(adc, 16)
1061EMIT_LOCKED_BIN_OP(sub, 16)
1062EMIT_LOCKED_BIN_OP(sbb, 16)
1063EMIT_LOCKED_BIN_OP(or, 16)
1064EMIT_LOCKED_BIN_OP(xor, 16)
1065EMIT_LOCKED_BIN_OP(and, 16)
1066
1067EMIT_LOCKED_BIN_OP(add, 8)
1068EMIT_LOCKED_BIN_OP(adc, 8)
1069EMIT_LOCKED_BIN_OP(sub, 8)
1070EMIT_LOCKED_BIN_OP(sbb, 8)
1071EMIT_LOCKED_BIN_OP(or, 8)
1072EMIT_LOCKED_BIN_OP(xor, 8)
1073EMIT_LOCKED_BIN_OP(and, 8)
1074# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1075
1076
1077/*
1078 * Bit operations (same signature as above).
1079 */
1080
1081/*
1082 * BT
1083 */
1084
1085IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1086{
1087 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1088 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1089 Assert(uSrc < 64);
1090 uint64_t uDst = *puDst;
1091 if (uDst & RT_BIT_64(uSrc))
1092 *pfEFlags |= X86_EFL_CF;
1093 else
1094 *pfEFlags &= ~X86_EFL_CF;
1095}
1096
1097# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1098
1099IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1100{
1101 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1102 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1103 Assert(uSrc < 32);
1104 uint32_t uDst = *puDst;
1105 if (uDst & RT_BIT_32(uSrc))
1106 *pfEFlags |= X86_EFL_CF;
1107 else
1108 *pfEFlags &= ~X86_EFL_CF;
1109}
1110
1111IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1112{
1113 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1114 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1115 Assert(uSrc < 16);
1116 uint16_t uDst = *puDst;
1117 if (uDst & RT_BIT_32(uSrc))
1118 *pfEFlags |= X86_EFL_CF;
1119 else
1120 *pfEFlags &= ~X86_EFL_CF;
1121}
1122
1123# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1124
1125/*
1126 * BTC
1127 */
1128
1129IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1130{
1131 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1132 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1133 Assert(uSrc < 64);
1134 uint64_t fMask = RT_BIT_64(uSrc);
1135 uint64_t uDst = *puDst;
1136 if (uDst & fMask)
1137 {
1138 uDst &= ~fMask;
1139 *puDst = uDst;
1140 *pfEFlags |= X86_EFL_CF;
1141 }
1142 else
1143 {
1144 uDst |= fMask;
1145 *puDst = uDst;
1146 *pfEFlags &= ~X86_EFL_CF;
1147 }
1148}
1149
1150# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1151
1152IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1153{
1154 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1155 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1156 Assert(uSrc < 32);
1157 uint32_t fMask = RT_BIT_32(uSrc);
1158 uint32_t uDst = *puDst;
1159 if (uDst & fMask)
1160 {
1161 uDst &= ~fMask;
1162 *puDst = uDst;
1163 *pfEFlags |= X86_EFL_CF;
1164 }
1165 else
1166 {
1167 uDst |= fMask;
1168 *puDst = uDst;
1169 *pfEFlags &= ~X86_EFL_CF;
1170 }
1171}
1172
1173
1174IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1175{
1176 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1177 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1178 Assert(uSrc < 16);
1179 uint16_t fMask = RT_BIT_32(uSrc);
1180 uint16_t uDst = *puDst;
1181 if (uDst & fMask)
1182 {
1183 uDst &= ~fMask;
1184 *puDst = uDst;
1185 *pfEFlags |= X86_EFL_CF;
1186 }
1187 else
1188 {
1189 uDst |= fMask;
1190 *puDst = uDst;
1191 *pfEFlags &= ~X86_EFL_CF;
1192 }
1193}
1194
1195# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1196
1197/*
1198 * BTR
1199 */
1200
1201IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1202{
1203 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1204 logical operation (AND/OR/whatever). */
1205 Assert(uSrc < 64);
1206 uint64_t fMask = RT_BIT_64(uSrc);
1207 uint64_t uDst = *puDst;
1208 if (uDst & fMask)
1209 {
1210 uDst &= ~fMask;
1211 *puDst = uDst;
1212 *pfEFlags |= X86_EFL_CF;
1213 }
1214 else
1215 *pfEFlags &= ~X86_EFL_CF;
1216}
1217
1218# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1219
1220IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1221{
1222 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1223 logical operation (AND/OR/whatever). */
1224 Assert(uSrc < 32);
1225 uint32_t fMask = RT_BIT_32(uSrc);
1226 uint32_t uDst = *puDst;
1227 if (uDst & fMask)
1228 {
1229 uDst &= ~fMask;
1230 *puDst = uDst;
1231 *pfEFlags |= X86_EFL_CF;
1232 }
1233 else
1234 *pfEFlags &= ~X86_EFL_CF;
1235}
1236
1237
1238IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1239{
1240 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1241 logical operation (AND/OR/whatever). */
1242 Assert(uSrc < 16);
1243 uint16_t fMask = RT_BIT_32(uSrc);
1244 uint16_t uDst = *puDst;
1245 if (uDst & fMask)
1246 {
1247 uDst &= ~fMask;
1248 *puDst = uDst;
1249 *pfEFlags |= X86_EFL_CF;
1250 }
1251 else
1252 *pfEFlags &= ~X86_EFL_CF;
1253}
1254
1255# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1256
1257/*
1258 * BTS
1259 */
1260
1261IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1262{
1263 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1264 logical operation (AND/OR/whatever). */
1265 Assert(uSrc < 64);
1266 uint64_t fMask = RT_BIT_64(uSrc);
1267 uint64_t uDst = *puDst;
1268 if (uDst & fMask)
1269 *pfEFlags |= X86_EFL_CF;
1270 else
1271 {
1272 uDst |= fMask;
1273 *puDst = uDst;
1274 *pfEFlags &= ~X86_EFL_CF;
1275 }
1276}
1277
1278# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1279
1280IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1281{
1282 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1283 logical operation (AND/OR/whatever). */
1284 Assert(uSrc < 32);
1285 uint32_t fMask = RT_BIT_32(uSrc);
1286 uint32_t uDst = *puDst;
1287 if (uDst & fMask)
1288 *pfEFlags |= X86_EFL_CF;
1289 else
1290 {
1291 uDst |= fMask;
1292 *puDst = uDst;
1293 *pfEFlags &= ~X86_EFL_CF;
1294 }
1295}
1296
1297
1298IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1299{
1300 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1301 logical operation (AND/OR/whatever). */
1302 Assert(uSrc < 16);
1303 uint16_t fMask = RT_BIT_32(uSrc);
1304 uint32_t uDst = *puDst;
1305 if (uDst & fMask)
1306 *pfEFlags |= X86_EFL_CF;
1307 else
1308 {
1309 uDst |= fMask;
1310 *puDst = uDst;
1311 *pfEFlags &= ~X86_EFL_CF;
1312 }
1313}
1314
1315# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1316
1317
1318EMIT_LOCKED_BIN_OP(btc, 64)
1319EMIT_LOCKED_BIN_OP(btr, 64)
1320EMIT_LOCKED_BIN_OP(bts, 64)
1321# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1322EMIT_LOCKED_BIN_OP(btc, 32)
1323EMIT_LOCKED_BIN_OP(btr, 32)
1324EMIT_LOCKED_BIN_OP(bts, 32)
1325
1326EMIT_LOCKED_BIN_OP(btc, 16)
1327EMIT_LOCKED_BIN_OP(btr, 16)
1328EMIT_LOCKED_BIN_OP(bts, 16)
1329# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1330
1331
1332/*
1333 * Helpers for BSR and BSF.
1334 *
1335 * Note! "undefined" flags: OF, SF, AF, PF, CF.
1336 * Intel behavior modelled on 10980xe, AMD on 3990X. Other marchs may
1337 * produce different result (see https://www.sandpile.org/x86/flags.htm),
1338 * but we restrict ourselves to emulating these recent marchs.
1339 */
1340#define SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlag, a_iBit) do { \
1341 unsigned iBit = (a_iBit); \
1342 uint32_t fEfl = *pfEFlags & ~(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF); \
1343 if (iBit) \
1344 { \
1345 *puDst = --iBit; \
1346 fEfl |= g_afParity[iBit]; \
1347 } \
1348 else \
1349 fEfl |= X86_EFL_ZF | X86_EFL_PF; \
1350 *pfEFlags = fEfl; \
1351 } while (0)
1352#define SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlag, a_iBit) do { \
1353 unsigned const iBit = (a_iBit); \
1354 if (iBit) \
1355 { \
1356 *puDst = iBit - 1; \
1357 *pfEFlags &= ~X86_EFL_ZF; \
1358 } \
1359 else \
1360 *pfEFlags |= X86_EFL_ZF; \
1361 } while (0)
1362
1363
1364/*
1365 * BSF - first (least significant) bit set
1366 */
1367IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1368{
1369 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1370}
1371
1372IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1373{
1374 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1375}
1376
1377IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1378{
1379 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1380}
1381
1382# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1383
1384IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1385{
1386 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1387}
1388
1389IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1390{
1391 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1392}
1393
1394IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1395{
1396 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1397}
1398
1399
1400IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1401{
1402 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1403}
1404
1405IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1406{
1407 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1408}
1409
1410IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1411{
1412 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1413}
1414
1415# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1416
1417
1418/*
1419 * BSR - last (most significant) bit set
1420 */
1421IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1422{
1423 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1424}
1425
1426IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1427{
1428 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1429}
1430
1431IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1432{
1433 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1434}
1435
1436# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1437
1438IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1439{
1440 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1441}
1442
1443IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1444{
1445 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1446}
1447
1448IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1449{
1450 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1451}
1452
1453
1454IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1455{
1456 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1457}
1458
1459IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1460{
1461 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1462}
1463
1464IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1465{
1466 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1467}
1468
1469# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1470
1471
1472/*
1473 * XCHG
1474 */
1475
1476IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_locked,(uint64_t *puMem, uint64_t *puReg))
1477{
1478#if ARCH_BITS >= 64
1479 *puReg = ASMAtomicXchgU64(puMem, *puReg);
1480#else
1481 uint64_t uOldMem = *puMem;
1482 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1483 ASMNopPause();
1484 *puReg = uOldMem;
1485#endif
1486}
1487
1488# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1489
1490IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_locked,(uint32_t *puMem, uint32_t *puReg))
1491{
1492 *puReg = ASMAtomicXchgU32(puMem, *puReg);
1493}
1494
1495
1496IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_locked,(uint16_t *puMem, uint16_t *puReg))
1497{
1498 *puReg = ASMAtomicXchgU16(puMem, *puReg);
1499}
1500
1501
1502IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_locked,(uint8_t *puMem, uint8_t *puReg))
1503{
1504 *puReg = ASMAtomicXchgU8(puMem, *puReg);
1505}
1506
1507# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1508
1509
1510/* Unlocked variants for fDisregardLock mode: */
1511
1512IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_unlocked,(uint64_t *puMem, uint64_t *puReg))
1513{
1514 uint64_t const uOld = *puMem;
1515 *puMem = *puReg;
1516 *puReg = uOld;
1517}
1518
1519# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1520
1521IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_unlocked,(uint32_t *puMem, uint32_t *puReg))
1522{
1523 uint32_t const uOld = *puMem;
1524 *puMem = *puReg;
1525 *puReg = uOld;
1526}
1527
1528
1529IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_unlocked,(uint16_t *puMem, uint16_t *puReg))
1530{
1531 uint16_t const uOld = *puMem;
1532 *puMem = *puReg;
1533 *puReg = uOld;
1534}
1535
1536
1537IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_unlocked,(uint8_t *puMem, uint8_t *puReg))
1538{
1539 uint8_t const uOld = *puMem;
1540 *puMem = *puReg;
1541 *puReg = uOld;
1542}
1543
1544# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1545
1546
1547/*
1548 * XADD and LOCK XADD.
1549 */
1550#define EMIT_XADD(a_cBitsWidth, a_Type) \
1551IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1552{ \
1553 a_Type uDst = *puDst; \
1554 a_Type uResult = uDst; \
1555 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, pfEFlags); \
1556 *puDst = uResult; \
1557 *puReg = uDst; \
1558} \
1559\
1560IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth ## _locked,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1561{ \
1562 a_Type uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
1563 a_Type uResult; \
1564 uint32_t fEflTmp; \
1565 do \
1566 { \
1567 uResult = uOld; \
1568 fEflTmp = *pfEFlags; \
1569 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, &fEflTmp); \
1570 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uResult, uOld, &uOld)); \
1571 *puReg = uOld; \
1572 *pfEFlags = fEflTmp; \
1573}
1574EMIT_XADD(64, uint64_t)
1575# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1576EMIT_XADD(32, uint32_t)
1577EMIT_XADD(16, uint16_t)
1578EMIT_XADD(8, uint8_t)
1579# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1580
1581#endif
1582
1583/*
1584 * CMPXCHG, CMPXCHG8B, CMPXCHG16B
1585 *
1586 * Note! We don't have non-locking/atomic cmpxchg primitives, so all cmpxchg
1587 * instructions are emulated as locked.
1588 */
1589#if defined(IEM_WITHOUT_ASSEMBLY)
1590
1591IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8_locked, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1592{
1593 uint8_t uOld = *puAl;
1594 if (ASMAtomicCmpXchgExU8(pu8Dst, uSrcReg, uOld, puAl))
1595 Assert(*puAl == uOld);
1596 iemAImpl_cmp_u8(&uOld, *puAl, pEFlags);
1597}
1598
1599
1600IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16_locked,(uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1601{
1602 uint16_t uOld = *puAx;
1603 if (ASMAtomicCmpXchgExU16(pu16Dst, uSrcReg, uOld, puAx))
1604 Assert(*puAx == uOld);
1605 iemAImpl_cmp_u16(&uOld, *puAx, pEFlags);
1606}
1607
1608
1609IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32_locked,(uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1610{
1611 uint32_t uOld = *puEax;
1612 if (ASMAtomicCmpXchgExU32(pu32Dst, uSrcReg, uOld, puEax))
1613 Assert(*puEax == uOld);
1614 iemAImpl_cmp_u32(&uOld, *puEax, pEFlags);
1615}
1616
1617
1618# if ARCH_BITS == 32
1619IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1620# else
1621IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1622# endif
1623{
1624# if ARCH_BITS == 32
1625 uint64_t const uSrcReg = *puSrcReg;
1626# endif
1627 uint64_t uOld = *puRax;
1628 if (ASMAtomicCmpXchgExU64(pu64Dst, uSrcReg, uOld, puRax))
1629 Assert(*puRax == uOld);
1630 iemAImpl_cmp_u64(&uOld, *puRax, pEFlags);
1631}
1632
1633
1634IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b_locked,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
1635 uint32_t *pEFlags))
1636{
1637 uint64_t const uNew = pu64EbxEcx->u;
1638 uint64_t const uOld = pu64EaxEdx->u;
1639 if (ASMAtomicCmpXchgExU64(pu64Dst, uNew, uOld, &pu64EaxEdx->u))
1640 {
1641 Assert(pu64EaxEdx->u == uOld);
1642 *pEFlags |= X86_EFL_ZF;
1643 }
1644 else
1645 *pEFlags &= ~X86_EFL_ZF;
1646}
1647
1648
1649# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
1650IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1651 uint32_t *pEFlags))
1652{
1653# ifdef VBOX_STRICT
1654 RTUINT128U const uOld = *pu128RaxRdx;
1655# endif
1656# if defined(RT_ARCH_AMD64)
1657 if (ASMAtomicCmpXchgU128v2(&pu128Dst->u, pu128RbxRcx->s.Hi, pu128RbxRcx->s.Lo, pu128RaxRdx->s.Hi, pu128RaxRdx->s.Lo,
1658 &pu128RaxRdx->u))
1659# else
1660 if (ASMAtomicCmpXchgU128(&pu128Dst->u, pu128RbxRcx->u, pu128RaxRdx->u, &pu128RaxRdx->u))
1661# endif
1662 {
1663 Assert(pu128RaxRdx->s.Lo == uOld.s.Lo && pu128RaxRdx->s.Hi == uOld.s.Hi);
1664 *pEFlags |= X86_EFL_ZF;
1665 }
1666 else
1667 *pEFlags &= ~X86_EFL_ZF;
1668}
1669# endif
1670
1671#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1672
1673# if !defined(RT_ARCH_ARM64) /** @todo may need this for unaligned accesses... */
1674IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
1675 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
1676{
1677 RTUINT128U u128Tmp = *pu128Dst;
1678 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
1679 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
1680 {
1681 *pu128Dst = *pu128RbxRcx;
1682 *pEFlags |= X86_EFL_ZF;
1683 }
1684 else
1685 {
1686 *pu128RaxRdx = u128Tmp;
1687 *pEFlags &= ~X86_EFL_ZF;
1688 }
1689}
1690#endif /* !RT_ARCH_ARM64 */
1691
1692#if defined(IEM_WITHOUT_ASSEMBLY)
1693
1694/* Unlocked versions mapped to the locked ones: */
1695
1696IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1697{
1698 iemAImpl_cmpxchg_u8_locked(pu8Dst, puAl, uSrcReg, pEFlags);
1699}
1700
1701
1702IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16, (uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1703{
1704 iemAImpl_cmpxchg_u16_locked(pu16Dst, puAx, uSrcReg, pEFlags);
1705}
1706
1707
1708IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32, (uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1709{
1710 iemAImpl_cmpxchg_u32_locked(pu32Dst, puEax, uSrcReg, pEFlags);
1711}
1712
1713
1714# if ARCH_BITS == 32
1715IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1716{
1717 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, puSrcReg, pEFlags);
1718}
1719# else
1720IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1721{
1722 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, uSrcReg, pEFlags);
1723}
1724# endif
1725
1726
1727IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx, uint32_t *pEFlags))
1728{
1729 iemAImpl_cmpxchg8b_locked(pu64Dst, pu64EaxEdx, pu64EbxEcx, pEFlags);
1730}
1731
1732
1733IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1734 uint32_t *pEFlags))
1735{
1736 iemAImpl_cmpxchg16b_locked(pu128Dst, pu128RaxRdx, pu128RbxRcx, pEFlags);
1737}
1738
1739#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1740
1741#if (!defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)) \
1742 && !defined(DOXYGEN_RUNNING) /* Doxygen has some groking issues here and ends up mixing up input. Not worth tracking down now. */
1743
1744/*
1745 * MUL, IMUL, DIV and IDIV helpers.
1746 *
1747 * - The U64 versions must use 128-bit intermediates, so we need to abstract the
1748 * division step so we can select between using C operators and
1749 * RTUInt128DivRem/RTUInt128MulU64ByU64.
1750 *
1751 * - The U8 versions work returns output in AL + AH instead of xDX + xAX, with the
1752 * IDIV/DIV taking all the input in AX too. This means we have to abstract some
1753 * input loads and the result storing.
1754 */
1755
1756DECLINLINE(void) RTUInt128DivRemByU64(PRTUINT128U pQuotient, PRTUINT128U pRemainder, PCRTUINT128U pDividend, uint64_t u64Divisor)
1757{
1758# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1759 pQuotient->s.Lo = 0;
1760 pQuotient->s.Hi = 0;
1761# endif
1762 RTUINT128U Divisor;
1763 Divisor.s.Lo = u64Divisor;
1764 Divisor.s.Hi = 0;
1765 RTUInt128DivRem(pQuotient, pRemainder, pDividend, &Divisor);
1766}
1767
1768# define DIV_LOAD(a_Dividend) \
1769 a_Dividend.s.Lo = *puA, a_Dividend.s.Hi = *puD
1770# define DIV_LOAD_U8(a_Dividend) \
1771 a_Dividend.u = *puAX
1772
1773# define DIV_STORE(a_Quotient, a_uReminder) *puA = (a_Quotient), *puD = (a_uReminder)
1774# define DIV_STORE_U8(a_Quotient, a_uReminder) *puAX = (uint8_t)(a_Quotient) | ((uint16_t)(a_uReminder) << 8)
1775
1776# define MUL_LOAD_F1() *puA
1777# define MUL_LOAD_F1_U8() ((uint8_t)*puAX)
1778
1779# define MUL_STORE(a_Result) *puA = (a_Result).s.Lo, *puD = (a_Result).s.Hi
1780# define MUL_STORE_U8(a_Result) *puAX = a_Result.u
1781
1782# define MULDIV_NEG(a_Value, a_cBitsWidth2x) \
1783 (a_Value).u = UINT ## a_cBitsWidth2x ## _C(0) - (a_Value).u
1784# define MULDIV_NEG_U128(a_Value, a_cBitsWidth2x) \
1785 RTUInt128AssignNeg(&(a_Value))
1786
1787# define MULDIV_MUL(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1788 (a_Result).u = (uint ## a_cBitsWidth2x ## _t)(a_Factor1) * (a_Factor2)
1789# define MULDIV_MUL_U128(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1790 RTUInt128MulU64ByU64(&(a_Result), a_Factor1, a_Factor2);
1791
1792# define MULDIV_MODDIV(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1793 a_Quotient.u = (a_Dividend).u / (a_uDivisor), \
1794 a_Remainder.u = (a_Dividend).u % (a_uDivisor)
1795# define MULDIV_MODDIV_U128(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1796 RTUInt128DivRemByU64(&a_Quotient, &a_Remainder, &a_Dividend, a_uDivisor)
1797
1798
1799/*
1800 * MUL
1801 */
1802# define EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, a_Suffix, a_fIntelFlags) \
1803IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_mul_u,a_cBitsWidth,a_Suffix), a_Args) \
1804{ \
1805 RTUINT ## a_cBitsWidth2x ## U Result; \
1806 a_fnMul(Result, a_fnLoadF1(), uFactor, a_cBitsWidth2x); \
1807 a_fnStore(Result); \
1808 \
1809 /* Calc EFLAGS: */ \
1810 uint32_t fEfl = *pfEFlags; \
1811 if (a_fIntelFlags) \
1812 { /* Intel: 6700K and 10980XE behavior */ \
1813 fEfl &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF); \
1814 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1815 fEfl |= X86_EFL_SF; \
1816 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1817 if (Result.s.Hi != 0) \
1818 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1819 } \
1820 else \
1821 { /* AMD: 3990X */ \
1822 if (Result.s.Hi != 0) \
1823 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1824 else \
1825 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
1826 } \
1827 *pfEFlags = fEfl; \
1828 return 0; \
1829} \
1830
1831# define EMIT_MUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul) \
1832 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, RT_NOTHING, 1) \
1833 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _intel, 1) \
1834 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _amd, 0) \
1835
1836# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1837EMIT_MUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1838 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL_U128)
1839# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1840EMIT_MUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1841 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1842EMIT_MUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1843 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1844EMIT_MUL(8, 16, (uint16_t *puAX, uint8_t uFactor, uint32_t *pfEFlags), (puAX, uFactor, pfEFlags),
1845 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_MUL)
1846# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1847# endif /* !DOXYGEN_RUNNING */
1848
1849
1850/*
1851 * IMUL
1852 *
1853 * The SF, ZF, AF and PF flags are "undefined". AMD (3990x) leaves these
1854 * flags as is. Whereas Intel skylake (6700K and 10980X (Cascade Lake)) always
1855 * clear AF and ZF and calculates SF and PF as per the lower half of the result.
1856 */
1857# define EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, \
1858 a_Suffix, a_fIntelFlags) \
1859IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_imul_u,a_cBitsWidth,a_Suffix),a_Args) \
1860{ \
1861 RTUINT ## a_cBitsWidth2x ## U Result; \
1862 uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF | X86_EFL_OF); \
1863 \
1864 uint ## a_cBitsWidth ## _t const uFactor1 = a_fnLoadF1(); \
1865 if (!(uFactor1 & RT_BIT_64(a_cBitsWidth - 1))) \
1866 { \
1867 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1868 { \
1869 a_fnMul(Result, uFactor1, uFactor2, a_cBitsWidth2x); \
1870 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1871 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1872 } \
1873 else \
1874 { \
1875 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1876 a_fnMul(Result, uFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1877 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1878 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1879 a_fnNeg(Result, a_cBitsWidth2x); \
1880 } \
1881 } \
1882 else \
1883 { \
1884 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1885 { \
1886 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1887 a_fnMul(Result, uPositiveFactor1, uFactor2, a_cBitsWidth2x); \
1888 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1889 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1890 a_fnNeg(Result, a_cBitsWidth2x); \
1891 } \
1892 else \
1893 { \
1894 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1895 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1896 a_fnMul(Result, uPositiveFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1897 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1898 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1899 } \
1900 } \
1901 a_fnStore(Result); \
1902 \
1903 if (a_fIntelFlags) \
1904 { \
1905 fEfl &= ~(X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_PF); \
1906 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1907 fEfl |= X86_EFL_SF; \
1908 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1909 } \
1910 *pfEFlags = fEfl; \
1911 return 0; \
1912}
1913# define EMIT_IMUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul) \
1914 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, RT_NOTHING, 1) \
1915 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _intel, 1) \
1916 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _amd, 0)
1917
1918# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1919EMIT_IMUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1920 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG_U128, MULDIV_MUL_U128)
1921# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1922EMIT_IMUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1923 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1924EMIT_IMUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1925 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1926EMIT_IMUL(8, 16, (uint16_t *puAX, uint8_t uFactor2, uint32_t *pfEFlags), (puAX, uFactor2, pfEFlags),
1927 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_NEG, MULDIV_MUL)
1928# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1929# endif /* !DOXYGEN_RUNNING */
1930
1931
1932/*
1933 * IMUL with two operands are mapped onto the three operand variant, ignoring
1934 * the high part of the product.
1935 */
1936# define EMIT_IMUL_TWO(a_cBits, a_uType) \
1937IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1938{ \
1939 a_uType uIgn; \
1940 iemAImpl_imul_u ## a_cBits(puDst, &uIgn, uSrc, pfEFlags); \
1941} \
1942\
1943IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _intel,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1944{ \
1945 a_uType uIgn; \
1946 iemAImpl_imul_u ## a_cBits ## _intel(puDst, &uIgn, uSrc, pfEFlags); \
1947} \
1948\
1949IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _amd,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1950{ \
1951 a_uType uIgn; \
1952 iemAImpl_imul_u ## a_cBits ## _amd(puDst, &uIgn, uSrc, pfEFlags); \
1953}
1954
1955EMIT_IMUL_TWO(64, uint64_t)
1956# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1957EMIT_IMUL_TWO(32, uint32_t)
1958EMIT_IMUL_TWO(16, uint16_t)
1959# endif
1960
1961
1962/*
1963 * DIV
1964 */
1965# define EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, \
1966 a_Suffix, a_fIntelFlags) \
1967IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_div_u,a_cBitsWidth,a_Suffix),a_Args) \
1968{ \
1969 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1970 a_fnLoad(Dividend); \
1971 if ( uDivisor != 0 \
1972 && Dividend.s.Hi < uDivisor) \
1973 { \
1974 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1975 a_fnDivRem(Quotient, Remainder, Dividend, uDivisor); \
1976 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1977 \
1978 /* Calc EFLAGS: Intel 6700K and 10980XE leaves them alone. AMD 3990X sets AF and clears PF, ZF and SF. */ \
1979 if (!a_fIntelFlags) \
1980 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1981 return 0; \
1982 } \
1983 /* #DE */ \
1984 return -1; \
1985}
1986# define EMIT_DIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem) \
1987 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, RT_NOTHING, 1) \
1988 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _intel, 1) \
1989 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _amd, 0)
1990
1991# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1992EMIT_DIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1993 DIV_LOAD, DIV_STORE, MULDIV_MODDIV_U128)
1994# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1995EMIT_DIV(32,64, (uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1996 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1997EMIT_DIV(16,32, (uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1998 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1999EMIT_DIV(8,16, (uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
2000 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_MODDIV)
2001# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2002# endif /* !DOXYGEN_RUNNING */
2003
2004
2005/*
2006 * IDIV
2007 *
2008 * EFLAGS are ignored and left as-is by Intel 6700K and 10980XE. AMD 3990X will
2009 * set AF and clear PF, ZF and SF just like it does for DIV.
2010 *
2011 */
2012# define EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, \
2013 a_Suffix, a_fIntelFlags) \
2014IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_idiv_u,a_cBitsWidth,a_Suffix),a_Args) \
2015{ \
2016 /* Note! Skylake leaves all flags alone. */ \
2017 \
2018 /** @todo overflow checks */ \
2019 if (uDivisor != 0) \
2020 { \
2021 /* \
2022 * Convert to unsigned division. \
2023 */ \
2024 RTUINT ## a_cBitsWidth2x ## U Dividend; \
2025 a_fnLoad(Dividend); \
2026 bool const fSignedDividend = RT_BOOL(Dividend.s.Hi & RT_BIT_64(a_cBitsWidth - 1)); \
2027 if (fSignedDividend) \
2028 a_fnNeg(Dividend, a_cBitsWidth2x); \
2029 \
2030 uint ## a_cBitsWidth ## _t uDivisorPositive; \
2031 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
2032 uDivisorPositive = uDivisor; \
2033 else \
2034 uDivisorPositive = UINT ## a_cBitsWidth ## _C(0) - uDivisor; \
2035 \
2036 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
2037 a_fnDivRem(Quotient, Remainder, Dividend, uDivisorPositive); \
2038 \
2039 /* \
2040 * Setup the result, checking for overflows. \
2041 */ \
2042 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
2043 { \
2044 if (!fSignedDividend) \
2045 { \
2046 /* Positive divisor, positive dividend => result positive. */ \
2047 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
2048 { \
2049 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
2050 if (!a_fIntelFlags) \
2051 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2052 return 0; \
2053 } \
2054 } \
2055 else \
2056 { \
2057 /* Positive divisor, negative dividend => result negative. */ \
2058 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
2059 { \
2060 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
2061 if (!a_fIntelFlags) \
2062 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2063 return 0; \
2064 } \
2065 } \
2066 } \
2067 else \
2068 { \
2069 if (!fSignedDividend) \
2070 { \
2071 /* Negative divisor, positive dividend => negative quotient, positive remainder. */ \
2072 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
2073 { \
2074 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, Remainder.s.Lo); \
2075 if (!a_fIntelFlags) \
2076 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2077 return 0; \
2078 } \
2079 } \
2080 else \
2081 { \
2082 /* Negative divisor, negative dividend => positive quotient, negative remainder. */ \
2083 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
2084 { \
2085 a_fnStore(Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
2086 if (!a_fIntelFlags) \
2087 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2088 return 0; \
2089 } \
2090 } \
2091 } \
2092 } \
2093 /* #DE */ \
2094 return -1; \
2095}
2096# define EMIT_IDIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem) \
2097 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, RT_NOTHING, 1) \
2098 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _intel, 1) \
2099 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _amd, 0)
2100
2101# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
2102EMIT_IDIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2103 DIV_LOAD, DIV_STORE, MULDIV_NEG_U128, MULDIV_MODDIV_U128)
2104# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2105EMIT_IDIV(32,64,(uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2106 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
2107EMIT_IDIV(16,32,(uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2108 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
2109EMIT_IDIV(8,16,(uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
2110 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_NEG, MULDIV_MODDIV)
2111# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2112# endif /* !DOXYGEN_RUNNING */
2113
2114#endif /* (!defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)) && !defined(DOXYGEN_RUNNING) */
2115
2116
2117/*********************************************************************************************************************************
2118* Unary operations. *
2119*********************************************************************************************************************************/
2120#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2121
2122/** @def IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC
2123 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an INC or DEC instruction.
2124 *
2125 * CF is NOT modified for hysterical raisins (allegedly for carrying and
2126 * borrowing in arithmetic loops on intel 8008).
2127 *
2128 * @returns Status bits.
2129 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2130 * @param a_uResult Unsigned result value.
2131 * @param a_uDst The original destination value (for AF calc).
2132 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2133 * @param a_OfMethod 0 for INC-style, 1 for DEC-style.
2134 */
2135#define IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth, a_OfMethod) \
2136 do { \
2137 uint32_t fEflTmp = *(a_pfEFlags); \
2138 fEflTmp &= ~X86_EFL_STATUS_BITS | X86_EFL_CF; \
2139 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2140 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2141 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2142 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2143 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth(a_OfMethod == 0 ? (((a_uDst) ^ RT_BIT_64(a_cBitsWidth - 1)) & (a_uResult)) \
2144 : ((a_uDst) & ((a_uResult) ^ RT_BIT_64(a_cBitsWidth - 1))) ); \
2145 *(a_pfEFlags) = fEflTmp; \
2146 } while (0)
2147
2148/*
2149 * INC
2150 */
2151
2152IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2153{
2154 uint64_t uDst = *puDst;
2155 uint64_t uResult = uDst + 1;
2156 *puDst = uResult;
2157 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 0 /*INC*/);
2158}
2159
2160# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2161
2162IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2163{
2164 uint32_t uDst = *puDst;
2165 uint32_t uResult = uDst + 1;
2166 *puDst = uResult;
2167 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 0 /*INC*/);
2168}
2169
2170
2171IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2172{
2173 uint16_t uDst = *puDst;
2174 uint16_t uResult = uDst + 1;
2175 *puDst = uResult;
2176 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 0 /*INC*/);
2177}
2178
2179IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2180{
2181 uint8_t uDst = *puDst;
2182 uint8_t uResult = uDst + 1;
2183 *puDst = uResult;
2184 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 0 /*INC*/);
2185}
2186
2187# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2188
2189
2190/*
2191 * DEC
2192 */
2193
2194IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2195{
2196 uint64_t uDst = *puDst;
2197 uint64_t uResult = uDst - 1;
2198 *puDst = uResult;
2199 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 1 /*INC*/);
2200}
2201
2202# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2203
2204IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2205{
2206 uint32_t uDst = *puDst;
2207 uint32_t uResult = uDst - 1;
2208 *puDst = uResult;
2209 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 1 /*INC*/);
2210}
2211
2212
2213IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2214{
2215 uint16_t uDst = *puDst;
2216 uint16_t uResult = uDst - 1;
2217 *puDst = uResult;
2218 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 1 /*INC*/);
2219}
2220
2221
2222IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2223{
2224 uint8_t uDst = *puDst;
2225 uint8_t uResult = uDst - 1;
2226 *puDst = uResult;
2227 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 1 /*INC*/);
2228}
2229
2230# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2231
2232
2233/*
2234 * NOT
2235 */
2236
2237IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2238{
2239 uint64_t uDst = *puDst;
2240 uint64_t uResult = ~uDst;
2241 *puDst = uResult;
2242 /* EFLAGS are not modified. */
2243 RT_NOREF_PV(pfEFlags);
2244}
2245
2246# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2247
2248IEM_DECL_IMPL_DEF(void, iemAImpl_not_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2249{
2250 uint32_t uDst = *puDst;
2251 uint32_t uResult = ~uDst;
2252 *puDst = uResult;
2253 /* EFLAGS are not modified. */
2254 RT_NOREF_PV(pfEFlags);
2255}
2256
2257IEM_DECL_IMPL_DEF(void, iemAImpl_not_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2258{
2259 uint16_t uDst = *puDst;
2260 uint16_t uResult = ~uDst;
2261 *puDst = uResult;
2262 /* EFLAGS are not modified. */
2263 RT_NOREF_PV(pfEFlags);
2264}
2265
2266IEM_DECL_IMPL_DEF(void, iemAImpl_not_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2267{
2268 uint8_t uDst = *puDst;
2269 uint8_t uResult = ~uDst;
2270 *puDst = uResult;
2271 /* EFLAGS are not modified. */
2272 RT_NOREF_PV(pfEFlags);
2273}
2274
2275# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2276
2277
2278/*
2279 * NEG
2280 */
2281
2282/**
2283 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an NEG instruction.
2284 *
2285 * @returns Status bits.
2286 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2287 * @param a_uResult Unsigned result value.
2288 * @param a_uDst The original destination value (for AF calc).
2289 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2290 */
2291#define IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth) \
2292 do { \
2293 uint32_t fEflTmp = *(a_pfEFlags); \
2294 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2295 fEflTmp |= ((a_uDst) != 0) << X86_EFL_CF_BIT; \
2296 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2297 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2298 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2299 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2300 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth((a_uDst) & (a_uResult)); \
2301 *(a_pfEFlags) = fEflTmp; \
2302 } while (0)
2303
2304IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2305{
2306 uint64_t uDst = *puDst;
2307 uint64_t uResult = (uint64_t)0 - uDst;
2308 *puDst = uResult;
2309 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 64);
2310}
2311
2312# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2313
2314IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2315{
2316 uint32_t uDst = *puDst;
2317 uint32_t uResult = (uint32_t)0 - uDst;
2318 *puDst = uResult;
2319 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 32);
2320}
2321
2322
2323IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2324{
2325 uint16_t uDst = *puDst;
2326 uint16_t uResult = (uint16_t)0 - uDst;
2327 *puDst = uResult;
2328 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 16);
2329}
2330
2331
2332IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2333{
2334 uint8_t uDst = *puDst;
2335 uint8_t uResult = (uint8_t)0 - uDst;
2336 *puDst = uResult;
2337 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 8);
2338}
2339
2340# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2341
2342/*
2343 * Locked variants.
2344 */
2345
2346/** Emit a function for doing a locked unary operand operation. */
2347# define EMIT_LOCKED_UNARY_OP(a_Mnemonic, a_cBitsWidth) \
2348 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
2349 uint32_t *pfEFlags)) \
2350 { \
2351 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
2352 uint ## a_cBitsWidth ## _t uTmp; \
2353 uint32_t fEflTmp; \
2354 do \
2355 { \
2356 uTmp = uOld; \
2357 fEflTmp = *pfEFlags; \
2358 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, &fEflTmp); \
2359 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
2360 *pfEFlags = fEflTmp; \
2361 }
2362
2363EMIT_LOCKED_UNARY_OP(inc, 64)
2364EMIT_LOCKED_UNARY_OP(dec, 64)
2365EMIT_LOCKED_UNARY_OP(not, 64)
2366EMIT_LOCKED_UNARY_OP(neg, 64)
2367# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2368EMIT_LOCKED_UNARY_OP(inc, 32)
2369EMIT_LOCKED_UNARY_OP(dec, 32)
2370EMIT_LOCKED_UNARY_OP(not, 32)
2371EMIT_LOCKED_UNARY_OP(neg, 32)
2372
2373EMIT_LOCKED_UNARY_OP(inc, 16)
2374EMIT_LOCKED_UNARY_OP(dec, 16)
2375EMIT_LOCKED_UNARY_OP(not, 16)
2376EMIT_LOCKED_UNARY_OP(neg, 16)
2377
2378EMIT_LOCKED_UNARY_OP(inc, 8)
2379EMIT_LOCKED_UNARY_OP(dec, 8)
2380EMIT_LOCKED_UNARY_OP(not, 8)
2381EMIT_LOCKED_UNARY_OP(neg, 8)
2382# endif
2383
2384#endif /* !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) */
2385
2386
2387/*********************************************************************************************************************************
2388* Shifting and Rotating *
2389*********************************************************************************************************************************/
2390
2391/*
2392 * ROL
2393 */
2394#define EMIT_ROL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2395IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rol_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2396{ \
2397 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2398 if (cShift) \
2399 { \
2400 if (a_cBitsWidth < 32) \
2401 cShift &= a_cBitsWidth - 1; \
2402 a_uType const uDst = *puDst; \
2403 a_uType const uResult = a_fnHlp(uDst, cShift); \
2404 *puDst = uResult; \
2405 \
2406 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2407 it the same way as for 1 bit shifts. */ \
2408 AssertCompile(X86_EFL_CF_BIT == 0); \
2409 uint32_t fEfl = *pfEFlags; \
2410 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2411 uint32_t const fCarry = (uResult & X86_EFL_CF); \
2412 fEfl |= fCarry; \
2413 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2414 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2415 else /* Intel 10980XE: According to the first sub-shift: */ \
2416 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2417 *pfEFlags = fEfl; \
2418 } \
2419}
2420
2421#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2422EMIT_ROL(64, uint64_t, RT_NOTHING, 1, ASMRotateLeftU64)
2423#endif
2424EMIT_ROL(64, uint64_t, _intel, 1, ASMRotateLeftU64)
2425EMIT_ROL(64, uint64_t, _amd, 0, ASMRotateLeftU64)
2426
2427#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2428EMIT_ROL(32, uint32_t, RT_NOTHING, 1, ASMRotateLeftU32)
2429#endif
2430EMIT_ROL(32, uint32_t, _intel, 1, ASMRotateLeftU32)
2431EMIT_ROL(32, uint32_t, _amd, 0, ASMRotateLeftU32)
2432
2433DECL_FORCE_INLINE(uint16_t) iemAImpl_rol_u16_hlp(uint16_t uValue, uint8_t cShift)
2434{
2435 return (uValue << cShift) | (uValue >> (16 - cShift));
2436}
2437#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2438EMIT_ROL(16, uint16_t, RT_NOTHING, 1, iemAImpl_rol_u16_hlp)
2439#endif
2440EMIT_ROL(16, uint16_t, _intel, 1, iemAImpl_rol_u16_hlp)
2441EMIT_ROL(16, uint16_t, _amd, 0, iemAImpl_rol_u16_hlp)
2442
2443DECL_FORCE_INLINE(uint8_t) iemAImpl_rol_u8_hlp(uint8_t uValue, uint8_t cShift)
2444{
2445 return (uValue << cShift) | (uValue >> (8 - cShift));
2446}
2447#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2448EMIT_ROL(8, uint8_t, RT_NOTHING, 1, iemAImpl_rol_u8_hlp)
2449#endif
2450EMIT_ROL(8, uint8_t, _intel, 1, iemAImpl_rol_u8_hlp)
2451EMIT_ROL(8, uint8_t, _amd, 0, iemAImpl_rol_u8_hlp)
2452
2453
2454/*
2455 * ROR
2456 */
2457#define EMIT_ROR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2458IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_ror_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2459{ \
2460 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2461 if (cShift) \
2462 { \
2463 if (a_cBitsWidth < 32) \
2464 cShift &= a_cBitsWidth - 1; \
2465 a_uType const uDst = *puDst; \
2466 a_uType const uResult = a_fnHlp(uDst, cShift); \
2467 *puDst = uResult; \
2468 \
2469 /* Calc EFLAGS: */ \
2470 AssertCompile(X86_EFL_CF_BIT == 0); \
2471 uint32_t fEfl = *pfEFlags; \
2472 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2473 uint32_t const fCarry = (uResult >> ((a_cBitsWidth) - 1)) & X86_EFL_CF; \
2474 fEfl |= fCarry; \
2475 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2476 fEfl |= (((uResult >> ((a_cBitsWidth) - 2)) ^ fCarry) & 1) << X86_EFL_OF_BIT; \
2477 else /* Intel 10980XE: According to the first sub-shift: */ \
2478 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << (a_cBitsWidth - 1))); \
2479 *pfEFlags = fEfl; \
2480 } \
2481}
2482
2483#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2484EMIT_ROR(64, uint64_t, RT_NOTHING, 1, ASMRotateRightU64)
2485#endif
2486EMIT_ROR(64, uint64_t, _intel, 1, ASMRotateRightU64)
2487EMIT_ROR(64, uint64_t, _amd, 0, ASMRotateRightU64)
2488
2489#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2490EMIT_ROR(32, uint32_t, RT_NOTHING, 1, ASMRotateRightU32)
2491#endif
2492EMIT_ROR(32, uint32_t, _intel, 1, ASMRotateRightU32)
2493EMIT_ROR(32, uint32_t, _amd, 0, ASMRotateRightU32)
2494
2495DECL_FORCE_INLINE(uint16_t) iemAImpl_ror_u16_hlp(uint16_t uValue, uint8_t cShift)
2496{
2497 return (uValue >> cShift) | (uValue << (16 - cShift));
2498}
2499#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2500EMIT_ROR(16, uint16_t, RT_NOTHING, 1, iemAImpl_ror_u16_hlp)
2501#endif
2502EMIT_ROR(16, uint16_t, _intel, 1, iemAImpl_ror_u16_hlp)
2503EMIT_ROR(16, uint16_t, _amd, 0, iemAImpl_ror_u16_hlp)
2504
2505DECL_FORCE_INLINE(uint8_t) iemAImpl_ror_u8_hlp(uint8_t uValue, uint8_t cShift)
2506{
2507 return (uValue >> cShift) | (uValue << (8 - cShift));
2508}
2509#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2510EMIT_ROR(8, uint8_t, RT_NOTHING, 1, iemAImpl_ror_u8_hlp)
2511#endif
2512EMIT_ROR(8, uint8_t, _intel, 1, iemAImpl_ror_u8_hlp)
2513EMIT_ROR(8, uint8_t, _amd, 0, iemAImpl_ror_u8_hlp)
2514
2515
2516/*
2517 * RCL
2518 */
2519#define EMIT_RCL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2520IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2521{ \
2522 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2523 if (a_cBitsWidth < 32 && a_fIntelFlags) \
2524 cShift %= a_cBitsWidth + 1; \
2525 if (cShift) \
2526 { \
2527 if (a_cBitsWidth < 32 && !a_fIntelFlags) \
2528 cShift %= a_cBitsWidth + 1; \
2529 a_uType const uDst = *puDst; \
2530 a_uType uResult = uDst << cShift; \
2531 if (cShift > 1) \
2532 uResult |= uDst >> (a_cBitsWidth + 1 - cShift); \
2533 \
2534 AssertCompile(X86_EFL_CF_BIT == 0); \
2535 uint32_t fEfl = *pfEFlags; \
2536 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2537 uResult |= (a_uType)fInCarry << (cShift - 1); \
2538 \
2539 *puDst = uResult; \
2540 \
2541 /* Calc EFLAGS. */ \
2542 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2543 uint32_t const fOutCarry = a_cBitsWidth >= 32 || a_fIntelFlags || cShift \
2544 ? (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF : fInCarry; \
2545 fEfl |= fOutCarry; \
2546 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2547 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fOutCarry) << X86_EFL_OF_BIT; \
2548 else /* Intel 10980XE: According to the first sub-shift: */ \
2549 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2550 *pfEFlags = fEfl; \
2551 } \
2552}
2553
2554#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2555EMIT_RCL(64, uint64_t, RT_NOTHING, 1)
2556#endif
2557EMIT_RCL(64, uint64_t, _intel, 1)
2558EMIT_RCL(64, uint64_t, _amd, 0)
2559
2560#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2561EMIT_RCL(32, uint32_t, RT_NOTHING, 1)
2562#endif
2563EMIT_RCL(32, uint32_t, _intel, 1)
2564EMIT_RCL(32, uint32_t, _amd, 0)
2565
2566#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2567EMIT_RCL(16, uint16_t, RT_NOTHING, 1)
2568#endif
2569EMIT_RCL(16, uint16_t, _intel, 1)
2570EMIT_RCL(16, uint16_t, _amd, 0)
2571
2572#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2573EMIT_RCL(8, uint8_t, RT_NOTHING, 1)
2574#endif
2575EMIT_RCL(8, uint8_t, _intel, 1)
2576EMIT_RCL(8, uint8_t, _amd, 0)
2577
2578
2579/*
2580 * RCR
2581 */
2582#define EMIT_RCR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2583IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2584{ \
2585 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2586 if (a_cBitsWidth < 32 && a_fIntelFlags) \
2587 cShift %= a_cBitsWidth + 1; \
2588 if (cShift) \
2589 { \
2590 if (a_cBitsWidth < 32 && !a_fIntelFlags) \
2591 cShift %= a_cBitsWidth + 1; \
2592 a_uType const uDst = *puDst; \
2593 a_uType uResult = uDst >> cShift; \
2594 if (cShift > 1) \
2595 uResult |= uDst << (a_cBitsWidth + 1 - cShift); \
2596 \
2597 AssertCompile(X86_EFL_CF_BIT == 0); \
2598 uint32_t fEfl = *pfEFlags; \
2599 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2600 uResult |= (a_uType)fInCarry << (a_cBitsWidth - cShift); \
2601 *puDst = uResult; \
2602 \
2603 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2604 it the same way as for 1 bit shifts. */ \
2605 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2606 uint32_t const fOutCarry = a_cBitsWidth >= 32 || a_fIntelFlags || cShift \
2607 ? (uDst >> (cShift - 1)) & X86_EFL_CF : fInCarry; \
2608 fEfl |= fOutCarry; \
2609 if (!a_fIntelFlags) /* AMD 3990X: XOR two most signficant bits of the result: */ \
2610 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uResult ^ (uResult << 1)); \
2611 else /* Intel 10980XE: same as AMD, but only for the first sub-shift: */ \
2612 fEfl |= (fInCarry ^ (uint32_t)(uDst >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2613 *pfEFlags = fEfl; \
2614 } \
2615}
2616
2617#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2618EMIT_RCR(64, uint64_t, RT_NOTHING, 1)
2619#endif
2620EMIT_RCR(64, uint64_t, _intel, 1)
2621EMIT_RCR(64, uint64_t, _amd, 0)
2622
2623#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2624EMIT_RCR(32, uint32_t, RT_NOTHING, 1)
2625#endif
2626EMIT_RCR(32, uint32_t, _intel, 1)
2627EMIT_RCR(32, uint32_t, _amd, 0)
2628
2629#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2630EMIT_RCR(16, uint16_t, RT_NOTHING, 1)
2631#endif
2632EMIT_RCR(16, uint16_t, _intel, 1)
2633EMIT_RCR(16, uint16_t, _amd, 0)
2634
2635#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2636EMIT_RCR(8, uint8_t, RT_NOTHING, 1)
2637#endif
2638EMIT_RCR(8, uint8_t, _intel, 1)
2639EMIT_RCR(8, uint8_t, _amd, 0)
2640
2641
2642/*
2643 * SHL
2644 */
2645#define EMIT_SHL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2646IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2647{ \
2648 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2649 if (cShift) \
2650 { \
2651 a_uType const uDst = *puDst; \
2652 a_uType uResult = uDst << cShift; \
2653 *puDst = uResult; \
2654 \
2655 /* Calc EFLAGS. */ \
2656 AssertCompile(X86_EFL_CF_BIT == 0); \
2657 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2658 uint32_t fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2659 fEfl |= fCarry; \
2660 if (!a_fIntelFlags) \
2661 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; /* AMD 3990X: Last shift result. */ \
2662 else \
2663 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); /* Intel 10980XE: First shift result. */ \
2664 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2665 fEfl |= X86_EFL_CALC_ZF(uResult); \
2666 fEfl |= g_afParity[uResult & 0xff]; \
2667 if (!a_fIntelFlags) \
2668 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2669 *pfEFlags = fEfl; \
2670 } \
2671}
2672
2673#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2674EMIT_SHL(64, uint64_t, RT_NOTHING, 1)
2675#endif
2676EMIT_SHL(64, uint64_t, _intel, 1)
2677EMIT_SHL(64, uint64_t, _amd, 0)
2678
2679#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2680EMIT_SHL(32, uint32_t, RT_NOTHING, 1)
2681#endif
2682EMIT_SHL(32, uint32_t, _intel, 1)
2683EMIT_SHL(32, uint32_t, _amd, 0)
2684
2685#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2686EMIT_SHL(16, uint16_t, RT_NOTHING, 1)
2687#endif
2688EMIT_SHL(16, uint16_t, _intel, 1)
2689EMIT_SHL(16, uint16_t, _amd, 0)
2690
2691#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2692EMIT_SHL(8, uint8_t, RT_NOTHING, 1)
2693#endif
2694EMIT_SHL(8, uint8_t, _intel, 1)
2695EMIT_SHL(8, uint8_t, _amd, 0)
2696
2697
2698/*
2699 * SHR
2700 */
2701#define EMIT_SHR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2702IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2703{ \
2704 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2705 if (cShift) \
2706 { \
2707 a_uType const uDst = *puDst; \
2708 a_uType uResult = uDst >> cShift; \
2709 *puDst = uResult; \
2710 \
2711 /* Calc EFLAGS. */ \
2712 AssertCompile(X86_EFL_CF_BIT == 0); \
2713 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2714 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2715 if (a_fIntelFlags || cShift == 1) /* AMD 3990x does what intel documents; Intel 10980XE does this for all shift counts. */ \
2716 fEfl |= (uDst >> (a_cBitsWidth - 1)) << X86_EFL_OF_BIT; \
2717 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2718 fEfl |= X86_EFL_CALC_ZF(uResult); \
2719 fEfl |= g_afParity[uResult & 0xff]; \
2720 if (!a_fIntelFlags) \
2721 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2722 *pfEFlags = fEfl; \
2723 } \
2724}
2725
2726#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2727EMIT_SHR(64, uint64_t, RT_NOTHING, 1)
2728#endif
2729EMIT_SHR(64, uint64_t, _intel, 1)
2730EMIT_SHR(64, uint64_t, _amd, 0)
2731
2732#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2733EMIT_SHR(32, uint32_t, RT_NOTHING, 1)
2734#endif
2735EMIT_SHR(32, uint32_t, _intel, 1)
2736EMIT_SHR(32, uint32_t, _amd, 0)
2737
2738#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2739EMIT_SHR(16, uint16_t, RT_NOTHING, 1)
2740#endif
2741EMIT_SHR(16, uint16_t, _intel, 1)
2742EMIT_SHR(16, uint16_t, _amd, 0)
2743
2744#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2745EMIT_SHR(8, uint8_t, RT_NOTHING, 1)
2746#endif
2747EMIT_SHR(8, uint8_t, _intel, 1)
2748EMIT_SHR(8, uint8_t, _amd, 0)
2749
2750
2751/*
2752 * SAR
2753 */
2754#define EMIT_SAR(a_cBitsWidth, a_uType, a_iType, a_Suffix, a_fIntelFlags) \
2755IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_sar_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2756{ \
2757 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2758 if (cShift) \
2759 { \
2760 a_iType const iDst = (a_iType)*puDst; \
2761 a_uType uResult = iDst >> cShift; \
2762 *puDst = uResult; \
2763 \
2764 /* Calc EFLAGS. \
2765 Note! The OF flag is always zero because the result never differs from the input. */ \
2766 AssertCompile(X86_EFL_CF_BIT == 0); \
2767 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2768 fEfl |= (iDst >> (cShift - 1)) & X86_EFL_CF; \
2769 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2770 fEfl |= X86_EFL_CALC_ZF(uResult); \
2771 fEfl |= g_afParity[uResult & 0xff]; \
2772 if (!a_fIntelFlags) \
2773 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2774 *pfEFlags = fEfl; \
2775 } \
2776}
2777
2778#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2779EMIT_SAR(64, uint64_t, int64_t, RT_NOTHING, 1)
2780#endif
2781EMIT_SAR(64, uint64_t, int64_t, _intel, 1)
2782EMIT_SAR(64, uint64_t, int64_t, _amd, 0)
2783
2784#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2785EMIT_SAR(32, uint32_t, int32_t, RT_NOTHING, 1)
2786#endif
2787EMIT_SAR(32, uint32_t, int32_t, _intel, 1)
2788EMIT_SAR(32, uint32_t, int32_t, _amd, 0)
2789
2790#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2791EMIT_SAR(16, uint16_t, int16_t, RT_NOTHING, 1)
2792#endif
2793EMIT_SAR(16, uint16_t, int16_t, _intel, 1)
2794EMIT_SAR(16, uint16_t, int16_t, _amd, 0)
2795
2796#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2797EMIT_SAR(8, uint8_t, int8_t, RT_NOTHING, 1)
2798#endif
2799EMIT_SAR(8, uint8_t, int8_t, _intel, 1)
2800EMIT_SAR(8, uint8_t, int8_t, _amd, 0)
2801
2802
2803/*
2804 * SHLD
2805 *
2806 * - CF is the last bit shifted out of puDst.
2807 * - AF is always cleared by Intel 10980XE.
2808 * - AF is always set by AMD 3990X.
2809 * - OF is set according to the first shift on Intel 10980XE, it seems.
2810 * - OF is set according to the last sub-shift on AMD 3990X.
2811 * - ZF, SF and PF are calculated according to the result by both vendors.
2812 *
2813 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2814 * pick either the source register or the destination register for input bits
2815 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2816 * intel has changed behaviour here several times. We implement what current
2817 * skylake based does for now, we can extend this later as needed.
2818 */
2819#define EMIT_SHLD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2820IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shld_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, \
2821 uint32_t *pfEFlags)) \
2822{ \
2823 cShift &= a_cBitsWidth - 1; \
2824 if (cShift) \
2825 { \
2826 a_uType const uDst = *puDst; \
2827 a_uType uResult = uDst << cShift; \
2828 uResult |= uSrc >> (a_cBitsWidth - cShift); \
2829 *puDst = uResult; \
2830 \
2831 /* CALC EFLAGS: */ \
2832 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2833 if (a_fIntelFlags) \
2834 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2835 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2836 else \
2837 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2838 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uDst << (cShift - 1)) ^ uResult); \
2839 fEfl |= X86_EFL_AF; \
2840 } \
2841 AssertCompile(X86_EFL_CF_BIT == 0); \
2842 fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; /* CF = last bit shifted out */ \
2843 fEfl |= g_afParity[uResult & 0xff]; \
2844 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2845 fEfl |= X86_EFL_CALC_ZF(uResult); \
2846 *pfEFlags = fEfl; \
2847 } \
2848}
2849
2850#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2851EMIT_SHLD(64, uint64_t, RT_NOTHING, 1)
2852#endif
2853EMIT_SHLD(64, uint64_t, _intel, 1)
2854EMIT_SHLD(64, uint64_t, _amd, 0)
2855
2856#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2857EMIT_SHLD(32, uint32_t, RT_NOTHING, 1)
2858#endif
2859EMIT_SHLD(32, uint32_t, _intel, 1)
2860EMIT_SHLD(32, uint32_t, _amd, 0)
2861
2862#define EMIT_SHLD_16(a_Suffix, a_fIntelFlags) \
2863IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shld_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2864{ \
2865 cShift &= 31; \
2866 if (cShift) \
2867 { \
2868 uint16_t const uDst = *puDst; \
2869 uint64_t const uTmp = a_fIntelFlags \
2870 ? ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uDst \
2871 : ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uSrc; \
2872 uint16_t const uResult = (uint16_t)((uTmp << cShift) >> 32); \
2873 *puDst = uResult; \
2874 \
2875 /* CALC EFLAGS: */ \
2876 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2877 AssertCompile(X86_EFL_CF_BIT == 0); \
2878 if (a_fIntelFlags) \
2879 { \
2880 fEfl |= (uTmp >> (48 - cShift)) & X86_EFL_CF; /* CF = last bit shifted out of the combined operand */ \
2881 /* Intel 6700K & 10980XE: OF is et according to the first shift. AF always cleared. */ \
2882 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uDst << 1)); \
2883 } \
2884 else \
2885 { \
2886 /* AMD 3990X: OF is set according to last shift, with some weirdness. AF always set. CF = last bit shifted out of uDst. */ \
2887 if (cShift < 16) \
2888 { \
2889 fEfl |= (uDst >> (16 - cShift)) & X86_EFL_CF; \
2890 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ uResult); \
2891 } \
2892 else \
2893 { \
2894 if (cShift == 16) \
2895 fEfl |= uDst & X86_EFL_CF; \
2896 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ 0); \
2897 } \
2898 fEfl |= X86_EFL_AF; \
2899 } \
2900 fEfl |= g_afParity[uResult & 0xff]; \
2901 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
2902 fEfl |= X86_EFL_CALC_ZF(uResult); \
2903 *pfEFlags = fEfl; \
2904 } \
2905}
2906
2907#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2908EMIT_SHLD_16(RT_NOTHING, 1)
2909#endif
2910EMIT_SHLD_16(_intel, 1)
2911EMIT_SHLD_16(_amd, 0)
2912
2913
2914/*
2915 * SHRD
2916 *
2917 * EFLAGS behaviour seems to be the same as with SHLD:
2918 * - CF is the last bit shifted out of puDst.
2919 * - AF is always cleared by Intel 10980XE.
2920 * - AF is always set by AMD 3990X.
2921 * - OF is set according to the first shift on Intel 10980XE, it seems.
2922 * - OF is set according to the last sub-shift on AMD 3990X.
2923 * - ZF, SF and PF are calculated according to the result by both vendors.
2924 *
2925 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2926 * pick either the source register or the destination register for input bits
2927 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2928 * intel has changed behaviour here several times. We implement what current
2929 * skylake based does for now, we can extend this later as needed.
2930 */
2931#define EMIT_SHRD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2932IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shrd_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2933{ \
2934 cShift &= a_cBitsWidth - 1; \
2935 if (cShift) \
2936 { \
2937 a_uType const uDst = *puDst; \
2938 a_uType uResult = uDst >> cShift; \
2939 uResult |= uSrc << (a_cBitsWidth - cShift); \
2940 *puDst = uResult; \
2941 \
2942 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2943 AssertCompile(X86_EFL_CF_BIT == 0); \
2944 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2945 if (a_fIntelFlags) \
2946 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2947 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uSrc << (a_cBitsWidth - 1))); \
2948 else \
2949 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2950 if (cShift > 1) /* Set according to last shift. */ \
2951 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uSrc << (a_cBitsWidth - cShift + 1)) ^ uResult); \
2952 else \
2953 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ uResult); \
2954 fEfl |= X86_EFL_AF; \
2955 } \
2956 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2957 fEfl |= X86_EFL_CALC_ZF(uResult); \
2958 fEfl |= g_afParity[uResult & 0xff]; \
2959 *pfEFlags = fEfl; \
2960 } \
2961}
2962
2963#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2964EMIT_SHRD(64, uint64_t, RT_NOTHING, 1)
2965#endif
2966EMIT_SHRD(64, uint64_t, _intel, 1)
2967EMIT_SHRD(64, uint64_t, _amd, 0)
2968
2969#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2970EMIT_SHRD(32, uint32_t, RT_NOTHING, 1)
2971#endif
2972EMIT_SHRD(32, uint32_t, _intel, 1)
2973EMIT_SHRD(32, uint32_t, _amd, 0)
2974
2975#define EMIT_SHRD_16(a_Suffix, a_fIntelFlags) \
2976IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shrd_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2977{ \
2978 cShift &= 31; \
2979 if (cShift) \
2980 { \
2981 uint16_t const uDst = *puDst; \
2982 uint64_t const uTmp = a_fIntelFlags \
2983 ? uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uDst << 32) \
2984 : uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uSrc << 32); \
2985 uint16_t const uResult = (uint16_t)(uTmp >> cShift); \
2986 *puDst = uResult; \
2987 \
2988 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2989 AssertCompile(X86_EFL_CF_BIT == 0); \
2990 if (a_fIntelFlags) \
2991 { \
2992 /* Intel 10980XE: The CF is the last shifted out of the combined uTmp operand. */ \
2993 fEfl |= (uTmp >> (cShift - 1)) & X86_EFL_CF; \
2994 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2995 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uSrc << 15)); \
2996 } \
2997 else \
2998 { \
2999 /* AMD 3990X: CF flag seems to be last bit shifted out of uDst, not the combined uSrc:uSrc:uDst operand. */ \
3000 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
3001 /* AMD 3990X: Set according to last shift. AF always set. */ \
3002 if (cShift > 1) /* Set according to last shift. */ \
3003 fEfl |= X86_EFL_GET_OF_16((uint16_t)(uTmp >> (cShift - 1)) ^ uResult); \
3004 else \
3005 fEfl |= X86_EFL_GET_OF_16(uDst ^ uResult); \
3006 fEfl |= X86_EFL_AF; \
3007 } \
3008 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
3009 fEfl |= X86_EFL_CALC_ZF(uResult); \
3010 fEfl |= g_afParity[uResult & 0xff]; \
3011 *pfEFlags = fEfl; \
3012 } \
3013}
3014
3015#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
3016EMIT_SHRD_16(RT_NOTHING, 1)
3017#endif
3018EMIT_SHRD_16(_intel, 1)
3019EMIT_SHRD_16(_amd, 0)
3020
3021
3022#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
3023
3024# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
3025/*
3026 * BSWAP
3027 */
3028
3029IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u64,(uint64_t *puDst))
3030{
3031 *puDst = ASMByteSwapU64(*puDst);
3032}
3033
3034
3035IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u32,(uint32_t *puDst))
3036{
3037 *puDst = ASMByteSwapU32(*puDst);
3038}
3039
3040
3041/* Note! undocument, so 32-bit arg */
3042IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u16,(uint32_t *puDst))
3043{
3044#if 0
3045 *(uint16_t *)puDst = ASMByteSwapU16(*(uint16_t *)puDst);
3046#else
3047 /* This is the behaviour AMD 3990x (64-bit mode): */
3048 *(uint16_t *)puDst = 0;
3049#endif
3050}
3051
3052# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
3053
3054
3055
3056# if defined(IEM_WITHOUT_ASSEMBLY)
3057
3058/*
3059 * LFENCE, SFENCE & MFENCE.
3060 */
3061
3062IEM_DECL_IMPL_DEF(void, iemAImpl_lfence,(void))
3063{
3064 ASMReadFence();
3065}
3066
3067
3068IEM_DECL_IMPL_DEF(void, iemAImpl_sfence,(void))
3069{
3070 ASMWriteFence();
3071}
3072
3073
3074IEM_DECL_IMPL_DEF(void, iemAImpl_mfence,(void))
3075{
3076 ASMMemoryFence();
3077}
3078
3079
3080# ifndef RT_ARCH_ARM64
3081IEM_DECL_IMPL_DEF(void, iemAImpl_alt_mem_fence,(void))
3082{
3083 ASMMemoryFence();
3084}
3085# endif
3086
3087# endif
3088
3089#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
3090
3091
3092IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
3093{
3094 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
3095 {
3096 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
3097 *pu16Dst |= u16Src & X86_SEL_RPL;
3098
3099 *pfEFlags |= X86_EFL_ZF;
3100 }
3101 else
3102 *pfEFlags &= ~X86_EFL_ZF;
3103}
3104
3105
3106#if defined(IEM_WITHOUT_ASSEMBLY)
3107
3108/*********************************************************************************************************************************
3109* x87 FPU Loads *
3110*********************************************************************************************************************************/
3111
3112IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT32U pr32Val))
3113{
3114 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3115 if (RTFLOAT32U_IS_NORMAL(pr32Val))
3116 {
3117 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3118 pFpuRes->r80Result.sj64.fInteger = 1;
3119 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3120 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
3121 pFpuRes->r80Result.sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
3122 Assert(RTFLOAT80U_IS_NORMAL(&pFpuRes->r80Result));
3123 }
3124 else if (RTFLOAT32U_IS_ZERO(pr32Val))
3125 {
3126 pFpuRes->r80Result.s.fSign = pr32Val->s.fSign;
3127 pFpuRes->r80Result.s.uExponent = 0;
3128 pFpuRes->r80Result.s.uMantissa = 0;
3129 Assert(RTFLOAT80U_IS_ZERO(&pFpuRes->r80Result));
3130 }
3131 else if (RTFLOAT32U_IS_SUBNORMAL(pr32Val))
3132 {
3133 /* Subnormal values gets normalized. */
3134 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3135 pFpuRes->r80Result.sj64.fInteger = 1;
3136 unsigned const cExtraShift = RTFLOAT32U_FRACTION_BITS - ASMBitLastSetU32(pr32Val->s.uFraction);
3137 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3138 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS + cExtraShift + 1);
3139 pFpuRes->r80Result.sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
3140 pFpuRes->FSW |= X86_FSW_DE;
3141 if (!(pFpuState->FCW & X86_FCW_DM))
3142 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B; /* The value is still pushed. */
3143 }
3144 else if (RTFLOAT32U_IS_INF(pr32Val))
3145 {
3146 pFpuRes->r80Result.s.fSign = pr32Val->s.fSign;
3147 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_MAX;
3148 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
3149 Assert(RTFLOAT80U_IS_INF(&pFpuRes->r80Result));
3150 }
3151 else
3152 {
3153 /* Signalling and quiet NaNs, both turn into quiet ones when loaded (weird). */
3154 Assert(RTFLOAT32U_IS_NAN(pr32Val));
3155 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3156 pFpuRes->r80Result.sj64.uExponent = RTFLOAT80U_EXP_MAX;
3157 pFpuRes->r80Result.sj64.fInteger = 1;
3158 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3159 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
3160 if (RTFLOAT32U_IS_SIGNALLING_NAN(pr32Val))
3161 {
3162 pFpuRes->r80Result.sj64.uFraction |= RT_BIT_64(62); /* make quiet */
3163 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3164 pFpuRes->FSW |= X86_FSW_IE;
3165
3166 if (!(pFpuState->FCW & X86_FCW_IM))
3167 {
3168 /* The value is not pushed. */
3169 pFpuRes->FSW &= ~X86_FSW_TOP_MASK;
3170 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B;
3171 pFpuRes->r80Result.au64[0] = 0;
3172 pFpuRes->r80Result.au16[4] = 0;
3173 }
3174 }
3175 else
3176 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3177 }
3178}
3179
3180
3181IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT64U pr64Val))
3182{
3183 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3184 if (RTFLOAT64U_IS_NORMAL(pr64Val))
3185 {
3186 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3187 pFpuRes->r80Result.sj64.fInteger = 1;
3188 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3189 pFpuRes->r80Result.sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
3190 Assert(RTFLOAT80U_IS_NORMAL(&pFpuRes->r80Result));
3191 }
3192 else if (RTFLOAT64U_IS_ZERO(pr64Val))
3193 {
3194 pFpuRes->r80Result.s.fSign = pr64Val->s.fSign;
3195 pFpuRes->r80Result.s.uExponent = 0;
3196 pFpuRes->r80Result.s.uMantissa = 0;
3197 Assert(RTFLOAT80U_IS_ZERO(&pFpuRes->r80Result));
3198 }
3199 else if (RTFLOAT64U_IS_SUBNORMAL(pr64Val))
3200 {
3201 /* Subnormal values gets normalized. */
3202 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3203 pFpuRes->r80Result.sj64.fInteger = 1;
3204 unsigned const cExtraShift = RTFLOAT64U_FRACTION_BITS - ASMBitLastSetU64(pr64Val->s64.uFraction);
3205 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction
3206 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS + cExtraShift + 1);
3207 pFpuRes->r80Result.sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
3208 pFpuRes->FSW |= X86_FSW_DE;
3209 if (!(pFpuState->FCW & X86_FCW_DM))
3210 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B; /* The value is still pushed. */
3211 }
3212 else if (RTFLOAT64U_IS_INF(pr64Val))
3213 {
3214 pFpuRes->r80Result.s.fSign = pr64Val->s.fSign;
3215 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_MAX;
3216 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
3217 Assert(RTFLOAT80U_IS_INF(&pFpuRes->r80Result));
3218 }
3219 else
3220 {
3221 /* Signalling and quiet NaNs, both turn into quiet ones when loaded (weird). */
3222 Assert(RTFLOAT64U_IS_NAN(pr64Val));
3223 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3224 pFpuRes->r80Result.sj64.uExponent = RTFLOAT80U_EXP_MAX;
3225 pFpuRes->r80Result.sj64.fInteger = 1;
3226 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3227 if (RTFLOAT64U_IS_SIGNALLING_NAN(pr64Val))
3228 {
3229 pFpuRes->r80Result.sj64.uFraction |= RT_BIT_64(62); /* make quiet */
3230 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3231 pFpuRes->FSW |= X86_FSW_IE;
3232
3233 if (!(pFpuState->FCW & X86_FCW_IM))
3234 {
3235 /* The value is not pushed. */
3236 pFpuRes->FSW &= ~X86_FSW_TOP_MASK;
3237 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B;
3238 pFpuRes->r80Result.au64[0] = 0;
3239 pFpuRes->r80Result.au16[4] = 0;
3240 }
3241 }
3242 else
3243 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3244 }
3245}
3246
3247
3248IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
3249{
3250 pFpuRes->r80Result.au64[0] = pr80Val->au64[0];
3251 pFpuRes->r80Result.au16[4] = pr80Val->au16[4];
3252 /* Raises no exceptions. */
3253 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3254}
3255
3256
3257IEM_DECL_IMPL_DEF(void, iemAImpl_fld1,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3258{
3259 pFpuRes->r80Result.sj64.fSign = 0;
3260 pFpuRes->r80Result.sj64.uExponent = 0 + 16383;
3261 pFpuRes->r80Result.sj64.fInteger = 1;
3262 pFpuRes->r80Result.sj64.uFraction = 0;
3263
3264 /*
3265 * FPU status word:
3266 * - TOP is irrelevant, but we must match x86 assembly version.
3267 * - C1 is always cleared as we don't have any stack overflows.
3268 * - C0, C2, and C3 are undefined and Intel 10980XE does not touch them.
3269 */
3270 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3271}
3272
3273
3274IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2e,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3275{
3276 pFpuRes->r80Result.sj64.fSign = 0;
3277 pFpuRes->r80Result.sj64.uExponent = 0 + 16383;
3278 pFpuRes->r80Result.sj64.fInteger = 1;
3279 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3280 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3281 ? UINT64_C(0x38aa3b295c17f0bc) : UINT64_C(0x38aa3b295c17f0bb);
3282 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3283}
3284
3285
3286IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2t,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3287{
3288 pFpuRes->r80Result.sj64.fSign = 0;
3289 pFpuRes->r80Result.sj64.uExponent = 1 + 16383;
3290 pFpuRes->r80Result.sj64.fInteger = 1;
3291 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) != X86_FCW_RC_UP
3292 ? UINT64_C(0x549a784bcd1b8afe) : UINT64_C(0x549a784bcd1b8aff);
3293 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3294}
3295
3296
3297IEM_DECL_IMPL_DEF(void, iemAImpl_fldlg2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3298{
3299 pFpuRes->r80Result.sj64.fSign = 0;
3300 pFpuRes->r80Result.sj64.uExponent = -2 + 16383;
3301 pFpuRes->r80Result.sj64.fInteger = 1;
3302 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3303 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3304 ? UINT64_C(0x1a209a84fbcff799) : UINT64_C(0x1a209a84fbcff798);
3305 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3306}
3307
3308
3309IEM_DECL_IMPL_DEF(void, iemAImpl_fldln2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3310{
3311 pFpuRes->r80Result.sj64.fSign = 0;
3312 pFpuRes->r80Result.sj64.uExponent = -1 + 16383;
3313 pFpuRes->r80Result.sj64.fInteger = 1;
3314 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3315 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3316 ? UINT64_C(0x317217f7d1cf79ac) : UINT64_C(0x317217f7d1cf79ab);
3317 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3318}
3319
3320
3321IEM_DECL_IMPL_DEF(void, iemAImpl_fldpi,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3322{
3323 pFpuRes->r80Result.sj64.fSign = 0;
3324 pFpuRes->r80Result.sj64.uExponent = 1 + 16383;
3325 pFpuRes->r80Result.sj64.fInteger = 1;
3326 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3327 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3328 ? UINT64_C(0x490fdaa22168c235) : UINT64_C(0x490fdaa22168c234);
3329 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3330}
3331
3332
3333IEM_DECL_IMPL_DEF(void, iemAImpl_fldz,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3334{
3335 pFpuRes->r80Result.s.fSign = 0;
3336 pFpuRes->r80Result.s.uExponent = 0;
3337 pFpuRes->r80Result.s.uMantissa = 0;
3338 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3339}
3340
3341#define EMIT_FILD(a_cBits) \
3342IEM_DECL_IMPL_DEF(void, iemAImpl_fild_r80_from_i ## a_cBits,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, \
3343 int ## a_cBits ## _t const *piVal)) \
3344{ \
3345 int ## a_cBits ## _t iVal = *piVal; \
3346 if (iVal == 0) \
3347 { \
3348 pFpuRes->r80Result.s.fSign = 0; \
3349 pFpuRes->r80Result.s.uExponent = 0; \
3350 pFpuRes->r80Result.s.uMantissa = 0; \
3351 } \
3352 else \
3353 { \
3354 if (iVal > 0) \
3355 pFpuRes->r80Result.s.fSign = 0; \
3356 else \
3357 { \
3358 pFpuRes->r80Result.s.fSign = 1; \
3359 iVal = -iVal; \
3360 } \
3361 unsigned const cBits = ASMBitLastSetU ## a_cBits((uint ## a_cBits ## _t)iVal); \
3362 pFpuRes->r80Result.s.uExponent = cBits - 1 + RTFLOAT80U_EXP_BIAS; \
3363 pFpuRes->r80Result.s.uMantissa = (uint64_t)iVal << (RTFLOAT80U_FRACTION_BITS + 1 - cBits); \
3364 } \
3365 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */ \
3366}
3367EMIT_FILD(16)
3368EMIT_FILD(32)
3369EMIT_FILD(64)
3370
3371
3372IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_d80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTPBCD80U pd80Val))
3373{
3374 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3375 if ( pd80Val->s.abPairs[0] == 0
3376 && pd80Val->s.abPairs[1] == 0
3377 && pd80Val->s.abPairs[2] == 0
3378 && pd80Val->s.abPairs[3] == 0
3379 && pd80Val->s.abPairs[4] == 0
3380 && pd80Val->s.abPairs[5] == 0
3381 && pd80Val->s.abPairs[6] == 0
3382 && pd80Val->s.abPairs[7] == 0
3383 && pd80Val->s.abPairs[8] == 0)
3384 {
3385 pFpuRes->r80Result.s.fSign = pd80Val->s.fSign;
3386 pFpuRes->r80Result.s.uExponent = 0;
3387 pFpuRes->r80Result.s.uMantissa = 0;
3388 }
3389 else
3390 {
3391 pFpuRes->r80Result.s.fSign = pd80Val->s.fSign;
3392
3393 size_t cPairs = RT_ELEMENTS(pd80Val->s.abPairs);
3394 while (cPairs > 0 && pd80Val->s.abPairs[cPairs - 1] == 0)
3395 cPairs--;
3396
3397 uint64_t uVal = 0;
3398 uint64_t uFactor = 1;
3399 for (size_t iPair = 0; iPair < cPairs; iPair++, uFactor *= 100)
3400 uVal += RTPBCD80U_LO_DIGIT(pd80Val->s.abPairs[iPair]) * uFactor
3401 + RTPBCD80U_HI_DIGIT(pd80Val->s.abPairs[iPair]) * uFactor * 10;
3402
3403 unsigned const cBits = ASMBitLastSetU64(uVal);
3404 pFpuRes->r80Result.s.uExponent = cBits - 1 + RTFLOAT80U_EXP_BIAS;
3405 pFpuRes->r80Result.s.uMantissa = uVal << (RTFLOAT80U_FRACTION_BITS + 1 - cBits);
3406 }
3407}
3408
3409
3410/*********************************************************************************************************************************
3411* x87 FPU Stores *
3412*********************************************************************************************************************************/
3413
3414/**
3415 * Helper for storing a deconstructed and normal R80 value as a 64-bit one.
3416 *
3417 * This uses the rounding rules indicated by fFcw and returns updated fFsw.
3418 *
3419 * @returns Updated FPU status word value.
3420 * @param fSignIn Incoming sign indicator.
3421 * @param uMantissaIn Incoming mantissa (dot between bit 63 and 62).
3422 * @param iExponentIn Unbiased exponent.
3423 * @param fFcw The FPU control word.
3424 * @param fFsw Prepped FPU status word, i.e. exceptions and C1 clear.
3425 * @param pr32Dst Where to return the output value, if one should be
3426 * returned.
3427 *
3428 * @note Tailored as a helper for iemAImpl_fst_r80_to_r32 right now.
3429 * @note Exact same logic as iemAImpl_StoreNormalR80AsR64.
3430 */
3431static uint16_t iemAImpl_StoreNormalR80AsR32(bool fSignIn, uint64_t uMantissaIn, int32_t iExponentIn,
3432 uint16_t fFcw, uint16_t fFsw, PRTFLOAT32U pr32Dst)
3433{
3434 uint64_t const fRoundingOffMask = RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS) - 1; /* 0x7ff */
3435 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3436 ? RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS - 1) /* 0x400 */
3437 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
3438 ? fRoundingOffMask
3439 : 0;
3440 uint64_t fRoundedOff = uMantissaIn & fRoundingOffMask;
3441
3442 /*
3443 * Deal with potential overflows/underflows first, optimizing for none.
3444 * 0 and MAX are used for special values; MAX-1 may be rounded up to MAX.
3445 */
3446 int32_t iExponentOut = (int32_t)iExponentIn + RTFLOAT32U_EXP_BIAS;
3447 if ((uint32_t)iExponentOut - 1 < (uint32_t)(RTFLOAT32U_EXP_MAX - 3))
3448 { /* likely? */ }
3449 /*
3450 * Underflow if the exponent zero or negative. This is attempted mapped
3451 * to a subnormal number when possible, with some additional trickery ofc.
3452 */
3453 else if (iExponentOut <= 0)
3454 {
3455 bool const fIsTiny = iExponentOut < 0
3456 || UINT64_MAX - uMantissaIn > uRoundingAdd;
3457 if (!(fFcw & X86_FCW_UM) && fIsTiny)
3458 /* Note! 754-1985 sec 7.4 has something about bias adjust of 192 here, not in 2008 & 2019. Perhaps only 8087 & 287? */
3459 return fFsw | X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3460
3461 if (iExponentOut <= 0)
3462 {
3463 uMantissaIn = iExponentOut <= -63
3464 ? uMantissaIn != 0
3465 : (uMantissaIn >> (-iExponentOut + 1)) | ((uMantissaIn & (RT_BIT_64(-iExponentOut + 1) - 1)) != 0);
3466 fRoundedOff = uMantissaIn & fRoundingOffMask;
3467 if (fRoundedOff && fIsTiny)
3468 fFsw |= X86_FSW_UE;
3469 iExponentOut = 0;
3470 }
3471 }
3472 /*
3473 * Overflow if at or above max exponent value or if we will reach max
3474 * when rounding. Will return +/-zero or +/-max value depending on
3475 * whether we're rounding or not.
3476 */
3477 else if ( iExponentOut >= RTFLOAT32U_EXP_MAX
3478 || ( iExponentOut == RTFLOAT32U_EXP_MAX - 1
3479 && UINT64_MAX - uMantissaIn <= uRoundingAdd))
3480 {
3481 fFsw |= X86_FSW_OE;
3482 if (!(fFcw & X86_FCW_OM))
3483 return fFsw | X86_FSW_ES | X86_FSW_B;
3484 fFsw |= X86_FSW_PE;
3485 if (uRoundingAdd)
3486 fFsw |= X86_FSW_C1;
3487 if (!(fFcw & X86_FCW_PM))
3488 fFsw |= X86_FSW_ES | X86_FSW_B;
3489
3490 pr32Dst->s.fSign = fSignIn;
3491 if (uRoundingAdd)
3492 { /* Zero */
3493 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3494 pr32Dst->s.uFraction = 0;
3495 }
3496 else
3497 { /* Max */
3498 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX - 1;
3499 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1;
3500 }
3501 return fFsw;
3502 }
3503
3504 /*
3505 * Normal or subnormal number.
3506 */
3507 /* Do rounding - just truncate in near mode when midway on an even outcome. */
3508 uint64_t uMantissaOut = uMantissaIn;
3509 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
3510 || (uMantissaIn & RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS))
3511 || fRoundedOff != uRoundingAdd)
3512 {
3513 uMantissaOut = uMantissaIn + uRoundingAdd;
3514 if (uMantissaOut >= uMantissaIn)
3515 { /* likely */ }
3516 else
3517 {
3518 uMantissaOut >>= 1; /* (We don't need to add bit 63 here (the integer bit), as it will be chopped off below.) */
3519 iExponentOut++;
3520 Assert(iExponentOut < RTFLOAT32U_EXP_MAX); /* checked above */
3521 fFsw |= X86_FSW_C1;
3522 }
3523 }
3524 else
3525 uMantissaOut = uMantissaIn;
3526
3527 /* Truncate the mantissa and set the return value. */
3528 uMantissaOut >>= RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS;
3529
3530 pr32Dst->s.uFraction = (uint32_t)uMantissaOut; /* Note! too big for bitfield if normal. */
3531 pr32Dst->s.uExponent = iExponentOut;
3532 pr32Dst->s.fSign = fSignIn;
3533
3534 /* Set status flags realted to rounding. */
3535 if (fRoundedOff)
3536 {
3537 fFsw |= X86_FSW_PE;
3538 if (uMantissaOut > (uMantissaIn >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS)))
3539 fFsw |= X86_FSW_C1;
3540 if (!(fFcw & X86_FCW_PM))
3541 fFsw |= X86_FSW_ES | X86_FSW_B;
3542 }
3543
3544 return fFsw;
3545}
3546
3547
3548/**
3549 * @note Exact same logic as iemAImpl_fst_r80_to_r64.
3550 */
3551IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r32,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3552 PRTFLOAT32U pr32Dst, PCRTFLOAT80U pr80Src))
3553{
3554 uint16_t const fFcw = pFpuState->FCW;
3555 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3556 if (RTFLOAT80U_IS_NORMAL(pr80Src))
3557 fFsw = iemAImpl_StoreNormalR80AsR32(pr80Src->s.fSign, pr80Src->s.uMantissa,
3558 (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS, fFcw, fFsw, pr32Dst);
3559 else if (RTFLOAT80U_IS_ZERO(pr80Src))
3560 {
3561 pr32Dst->s.fSign = pr80Src->s.fSign;
3562 pr32Dst->s.uExponent = 0;
3563 pr32Dst->s.uFraction = 0;
3564 Assert(RTFLOAT32U_IS_ZERO(pr32Dst));
3565 }
3566 else if (RTFLOAT80U_IS_INF(pr80Src))
3567 {
3568 pr32Dst->s.fSign = pr80Src->s.fSign;
3569 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3570 pr32Dst->s.uFraction = 0;
3571 Assert(RTFLOAT32U_IS_INF(pr32Dst));
3572 }
3573 else if (RTFLOAT80U_IS_INDEFINITE(pr80Src))
3574 {
3575 /* Mapped to +/-QNaN */
3576 pr32Dst->s.fSign = pr80Src->s.fSign;
3577 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3578 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3579 }
3580 else if (RTFLOAT80U_IS_PSEUDO_INF(pr80Src) || RTFLOAT80U_IS_UNNORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_NAN(pr80Src))
3581 {
3582 /* Pseudo-Inf / Pseudo-Nan / Unnormal -> QNaN (during load, probably) */
3583 if (fFcw & X86_FCW_IM)
3584 {
3585 pr32Dst->s.fSign = 1;
3586 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3587 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3588 fFsw |= X86_FSW_IE;
3589 }
3590 else
3591 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;;
3592 }
3593 else if (RTFLOAT80U_IS_NAN(pr80Src))
3594 {
3595 /* IM applies to signalled NaN input only. Everything is converted to quiet NaN. */
3596 if ((fFcw & X86_FCW_IM) || !RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3597 {
3598 pr32Dst->s.fSign = pr80Src->s.fSign;
3599 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3600 pr32Dst->s.uFraction = (uint32_t)(pr80Src->sj64.uFraction >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS));
3601 pr32Dst->s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3602 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3603 fFsw |= X86_FSW_IE;
3604 }
3605 else
3606 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;
3607 }
3608 else
3609 {
3610 /* Denormal values causes both an underflow and precision exception. */
3611 Assert(RTFLOAT80U_IS_DENORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src));
3612 if (fFcw & X86_FCW_UM)
3613 {
3614 pr32Dst->s.fSign = pr80Src->s.fSign;
3615 pr32Dst->s.uExponent = 0;
3616 if ((fFcw & X86_FCW_RC_MASK) == (!pr80Src->s.fSign ? X86_FCW_RC_UP : X86_FCW_RC_DOWN))
3617 {
3618 pr32Dst->s.uFraction = 1;
3619 fFsw |= X86_FSW_UE | X86_FSW_PE | X86_FSW_C1;
3620 if (!(fFcw & X86_FCW_PM))
3621 fFsw |= X86_FSW_ES | X86_FSW_B;
3622 }
3623 else
3624 {
3625 pr32Dst->s.uFraction = 0;
3626 fFsw |= X86_FSW_UE | X86_FSW_PE;
3627 if (!(fFcw & X86_FCW_PM))
3628 fFsw |= X86_FSW_ES | X86_FSW_B;
3629 }
3630 }
3631 else
3632 fFsw |= X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3633 }
3634 *pu16FSW = fFsw;
3635}
3636
3637
3638/**
3639 * Helper for storing a deconstructed and normal R80 value as a 64-bit one.
3640 *
3641 * This uses the rounding rules indicated by fFcw and returns updated fFsw.
3642 *
3643 * @returns Updated FPU status word value.
3644 * @param fSignIn Incoming sign indicator.
3645 * @param uMantissaIn Incoming mantissa (dot between bit 63 and 62).
3646 * @param iExponentIn Unbiased exponent.
3647 * @param fFcw The FPU control word.
3648 * @param fFsw Prepped FPU status word, i.e. exceptions and C1 clear.
3649 * @param pr64Dst Where to return the output value, if one should be
3650 * returned.
3651 *
3652 * @note Tailored as a helper for iemAImpl_fst_r80_to_r64 right now.
3653 * @note Exact same logic as iemAImpl_StoreNormalR80AsR32.
3654 */
3655static uint16_t iemAImpl_StoreNormalR80AsR64(bool fSignIn, uint64_t uMantissaIn, int32_t iExponentIn,
3656 uint16_t fFcw, uint16_t fFsw, PRTFLOAT64U pr64Dst)
3657{
3658 uint64_t const fRoundingOffMask = RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS) - 1; /* 0x7ff */
3659 uint32_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3660 ? RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS - 1) /* 0x400 */
3661 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
3662 ? fRoundingOffMask
3663 : 0;
3664 uint32_t fRoundedOff = uMantissaIn & fRoundingOffMask;
3665
3666 /*
3667 * Deal with potential overflows/underflows first, optimizing for none.
3668 * 0 and MAX are used for special values; MAX-1 may be rounded up to MAX.
3669 */
3670 int32_t iExponentOut = (int32_t)iExponentIn + RTFLOAT64U_EXP_BIAS;
3671 if ((uint32_t)iExponentOut - 1 < (uint32_t)(RTFLOAT64U_EXP_MAX - 3))
3672 { /* likely? */ }
3673 /*
3674 * Underflow if the exponent zero or negative. This is attempted mapped
3675 * to a subnormal number when possible, with some additional trickery ofc.
3676 */
3677 else if (iExponentOut <= 0)
3678 {
3679 bool const fIsTiny = iExponentOut < 0
3680 || UINT64_MAX - uMantissaIn > uRoundingAdd;
3681 if (!(fFcw & X86_FCW_UM) && fIsTiny)
3682 /* Note! 754-1985 sec 7.4 has something about bias adjust of 1536 here, not in 2008 & 2019. Perhaps only 8087 & 287? */
3683 return fFsw | X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3684
3685 if (iExponentOut <= 0)
3686 {
3687 uMantissaIn = iExponentOut <= -63
3688 ? uMantissaIn != 0
3689 : (uMantissaIn >> (-iExponentOut + 1)) | ((uMantissaIn & (RT_BIT_64(-iExponentOut + 1) - 1)) != 0);
3690 fRoundedOff = uMantissaIn & fRoundingOffMask;
3691 if (fRoundedOff && fIsTiny)
3692 fFsw |= X86_FSW_UE;
3693 iExponentOut = 0;
3694 }
3695 }
3696 /*
3697 * Overflow if at or above max exponent value or if we will reach max
3698 * when rounding. Will return +/-zero or +/-max value depending on
3699 * whether we're rounding or not.
3700 */
3701 else if ( iExponentOut >= RTFLOAT64U_EXP_MAX
3702 || ( iExponentOut == RTFLOAT64U_EXP_MAX - 1
3703 && UINT64_MAX - uMantissaIn <= uRoundingAdd))
3704 {
3705 fFsw |= X86_FSW_OE;
3706 if (!(fFcw & X86_FCW_OM))
3707 return fFsw | X86_FSW_ES | X86_FSW_B;
3708 fFsw |= X86_FSW_PE;
3709 if (uRoundingAdd)
3710 fFsw |= X86_FSW_C1;
3711 if (!(fFcw & X86_FCW_PM))
3712 fFsw |= X86_FSW_ES | X86_FSW_B;
3713
3714 pr64Dst->s64.fSign = fSignIn;
3715 if (uRoundingAdd)
3716 { /* Zero */
3717 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3718 pr64Dst->s64.uFraction = 0;
3719 }
3720 else
3721 { /* Max */
3722 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX - 1;
3723 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1;
3724 }
3725 return fFsw;
3726 }
3727
3728 /*
3729 * Normal or subnormal number.
3730 */
3731 /* Do rounding - just truncate in near mode when midway on an even outcome. */
3732 uint64_t uMantissaOut = uMantissaIn;
3733 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
3734 || (uMantissaIn & RT_BIT_32(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS))
3735 || fRoundedOff != uRoundingAdd)
3736 {
3737 uMantissaOut = uMantissaIn + uRoundingAdd;
3738 if (uMantissaOut >= uMantissaIn)
3739 { /* likely */ }
3740 else
3741 {
3742 uMantissaOut >>= 1; /* (We don't need to add bit 63 here (the integer bit), as it will be chopped off below.) */
3743 iExponentOut++;
3744 Assert(iExponentOut < RTFLOAT64U_EXP_MAX); /* checked above */
3745 fFsw |= X86_FSW_C1;
3746 }
3747 }
3748 else
3749 uMantissaOut = uMantissaIn;
3750
3751 /* Truncate the mantissa and set the return value. */
3752 uMantissaOut >>= RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS;
3753
3754 pr64Dst->s64.uFraction = uMantissaOut; /* Note! too big for bitfield if normal. */
3755 pr64Dst->s64.uExponent = iExponentOut;
3756 pr64Dst->s64.fSign = fSignIn;
3757
3758 /* Set status flags realted to rounding. */
3759 if (fRoundedOff)
3760 {
3761 fFsw |= X86_FSW_PE;
3762 if (uMantissaOut > (uMantissaIn >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS)))
3763 fFsw |= X86_FSW_C1;
3764 if (!(fFcw & X86_FCW_PM))
3765 fFsw |= X86_FSW_ES | X86_FSW_B;
3766 }
3767
3768 return fFsw;
3769}
3770
3771
3772/**
3773 * @note Exact same logic as iemAImpl_fst_r80_to_r32.
3774 */
3775IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r64,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3776 PRTFLOAT64U pr64Dst, PCRTFLOAT80U pr80Src))
3777{
3778 uint16_t const fFcw = pFpuState->FCW;
3779 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3780 if (RTFLOAT80U_IS_NORMAL(pr80Src))
3781 fFsw = iemAImpl_StoreNormalR80AsR64(pr80Src->s.fSign, pr80Src->s.uMantissa,
3782 (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS, fFcw, fFsw, pr64Dst);
3783 else if (RTFLOAT80U_IS_ZERO(pr80Src))
3784 {
3785 pr64Dst->s64.fSign = pr80Src->s.fSign;
3786 pr64Dst->s64.uExponent = 0;
3787 pr64Dst->s64.uFraction = 0;
3788 Assert(RTFLOAT64U_IS_ZERO(pr64Dst));
3789 }
3790 else if (RTFLOAT80U_IS_INF(pr80Src))
3791 {
3792 pr64Dst->s64.fSign = pr80Src->s.fSign;
3793 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3794 pr64Dst->s64.uFraction = 0;
3795 Assert(RTFLOAT64U_IS_INF(pr64Dst));
3796 }
3797 else if (RTFLOAT80U_IS_INDEFINITE(pr80Src))
3798 {
3799 /* Mapped to +/-QNaN */
3800 pr64Dst->s64.fSign = pr80Src->s.fSign;
3801 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3802 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3803 }
3804 else if (RTFLOAT80U_IS_PSEUDO_INF(pr80Src) || RTFLOAT80U_IS_UNNORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_NAN(pr80Src))
3805 {
3806 /* Pseudo-Inf / Pseudo-Nan / Unnormal -> QNaN (during load, probably) */
3807 if (fFcw & X86_FCW_IM)
3808 {
3809 pr64Dst->s64.fSign = 1;
3810 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3811 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3812 fFsw |= X86_FSW_IE;
3813 }
3814 else
3815 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;;
3816 }
3817 else if (RTFLOAT80U_IS_NAN(pr80Src))
3818 {
3819 /* IM applies to signalled NaN input only. Everything is converted to quiet NaN. */
3820 if ((fFcw & X86_FCW_IM) || !RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3821 {
3822 pr64Dst->s64.fSign = pr80Src->s.fSign;
3823 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3824 pr64Dst->s64.uFraction = pr80Src->sj64.uFraction >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3825 pr64Dst->s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3826 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3827 fFsw |= X86_FSW_IE;
3828 }
3829 else
3830 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;
3831 }
3832 else
3833 {
3834 /* Denormal values causes both an underflow and precision exception. */
3835 Assert(RTFLOAT80U_IS_DENORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src));
3836 if (fFcw & X86_FCW_UM)
3837 {
3838 pr64Dst->s64.fSign = pr80Src->s.fSign;
3839 pr64Dst->s64.uExponent = 0;
3840 if ((fFcw & X86_FCW_RC_MASK) == (!pr80Src->s.fSign ? X86_FCW_RC_UP : X86_FCW_RC_DOWN))
3841 {
3842 pr64Dst->s64.uFraction = 1;
3843 fFsw |= X86_FSW_UE | X86_FSW_PE | X86_FSW_C1;
3844 if (!(fFcw & X86_FCW_PM))
3845 fFsw |= X86_FSW_ES | X86_FSW_B;
3846 }
3847 else
3848 {
3849 pr64Dst->s64.uFraction = 0;
3850 fFsw |= X86_FSW_UE | X86_FSW_PE;
3851 if (!(fFcw & X86_FCW_PM))
3852 fFsw |= X86_FSW_ES | X86_FSW_B;
3853 }
3854 }
3855 else
3856 fFsw |= X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3857 }
3858 *pu16FSW = fFsw;
3859}
3860
3861
3862IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3863 PRTFLOAT80U pr80Dst, PCRTFLOAT80U pr80Src))
3864{
3865 /*
3866 * FPU status word:
3867 * - TOP is irrelevant, but we must match x86 assembly version (0).
3868 * - C1 is always cleared as we don't have any stack overflows.
3869 * - C0, C2, and C3 are undefined and Intel 10980XE does not touch them.
3870 */
3871 *pu16FSW = pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3); /* see iemAImpl_fld1 */
3872 *pr80Dst = *pr80Src;
3873}
3874
3875
3876/*
3877 *
3878 * Mantissa:
3879 * 63 56 48 40 32 24 16 8 0
3880 * v v v v v v v v v
3881 * 1[.]111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000
3882 * \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \
3883 * Exp: 0 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60
3884 *
3885 * int64_t has the same width, only bit 63 is the sign bit. So, the max we can map over
3886 * are bits 1 thru 63, dropping off bit 0, with an exponent of 62. The number of bits we
3887 * drop off from the mantissa increases with decreasing exponent, till an exponent of 0
3888 * where we'll drop off all but bit 63.
3889 */
3890#define EMIT_FIST(a_cBits, a_iType, a_iTypeMin, a_iTypeIndefinite) \
3891IEM_DECL_IMPL_DEF(void, iemAImpl_fist_r80_to_i ## a_cBits,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, \
3892 a_iType *piDst, PCRTFLOAT80U pr80Val)) \
3893{ \
3894 uint16_t const fFcw = pFpuState->FCW; \
3895 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); \
3896 bool const fSignIn = pr80Val->s.fSign; \
3897 \
3898 /* \
3899 * Deal with normal numbers first. \
3900 */ \
3901 if (RTFLOAT80U_IS_NORMAL(pr80Val)) \
3902 { \
3903 uint64_t uMantissa = pr80Val->s.uMantissa; \
3904 int32_t iExponent = (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS; \
3905 \
3906 if ((uint32_t)iExponent <= a_cBits - 2) \
3907 { \
3908 unsigned const cShiftOff = 63 - iExponent; \
3909 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1; \
3910 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST \
3911 ? RT_BIT_64(cShiftOff - 1) \
3912 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP) \
3913 ? fRoundingOffMask \
3914 : 0; \
3915 uint64_t fRoundedOff = uMantissa & fRoundingOffMask; \
3916 \
3917 uMantissa >>= cShiftOff; \
3918 uint64_t const uRounding = (fRoundedOff + uRoundingAdd) >> cShiftOff; \
3919 uMantissa += uRounding; \
3920 if (!(uMantissa & RT_BIT_64(a_cBits - 1))) \
3921 { \
3922 if (fRoundedOff) \
3923 { \
3924 if ((uMantissa & 1) && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST && fRoundedOff == uRoundingAdd) \
3925 uMantissa &= ~(uint64_t)1; /* round to even number if equal distance between up/down. */ \
3926 else if (uRounding) \
3927 fFsw |= X86_FSW_C1; \
3928 fFsw |= X86_FSW_PE; \
3929 if (!(fFcw & X86_FCW_PM)) \
3930 fFsw |= X86_FSW_ES | X86_FSW_B; \
3931 } \
3932 \
3933 if (!fSignIn) \
3934 *piDst = (a_iType)uMantissa; \
3935 else \
3936 *piDst = -(a_iType)uMantissa; \
3937 } \
3938 else \
3939 { \
3940 /* overflowed after rounding. */ \
3941 AssertMsg(iExponent == a_cBits - 2 && uMantissa == RT_BIT_64(a_cBits - 1), \
3942 ("e=%d m=%#RX64 (org %#RX64) s=%d; shift=%d ro=%#RX64 rm=%#RX64 ra=%#RX64\n", iExponent, uMantissa, \
3943 pr80Val->s.uMantissa, fSignIn, cShiftOff, fRoundedOff, fRoundingOffMask, uRoundingAdd)); \
3944 \
3945 /* Special case for the integer minimum value. */ \
3946 if (fSignIn) \
3947 { \
3948 *piDst = a_iTypeMin; \
3949 fFsw |= X86_FSW_PE | X86_FSW_C1; \
3950 if (!(fFcw & X86_FCW_PM)) \
3951 fFsw |= X86_FSW_ES | X86_FSW_B; \
3952 } \
3953 else \
3954 { \
3955 fFsw |= X86_FSW_IE; \
3956 if (fFcw & X86_FCW_IM) \
3957 *piDst = a_iTypeMin; \
3958 else \
3959 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
3960 } \
3961 } \
3962 } \
3963 /* \
3964 * Tiny sub-zero numbers. \
3965 */ \
3966 else if (iExponent < 0) \
3967 { \
3968 if (!fSignIn) \
3969 { \
3970 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP \
3971 || (iExponent == -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST)) \
3972 { \
3973 *piDst = 1; \
3974 fFsw |= X86_FSW_C1; \
3975 } \
3976 else \
3977 *piDst = 0; \
3978 } \
3979 else \
3980 { \
3981 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP \
3982 || (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_ZERO \
3983 || (iExponent < -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST)) \
3984 *piDst = 0; \
3985 else \
3986 { \
3987 *piDst = -1; \
3988 fFsw |= X86_FSW_C1; \
3989 } \
3990 } \
3991 fFsw |= X86_FSW_PE; \
3992 if (!(fFcw & X86_FCW_PM)) \
3993 fFsw |= X86_FSW_ES | X86_FSW_B; \
3994 } \
3995 /* \
3996 * Special MIN case. \
3997 */ \
3998 else if ( fSignIn && iExponent == a_cBits - 1 \
3999 && ( a_cBits < 64 && (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_DOWN \
4000 ? uMantissa < (RT_BIT_64(63) | RT_BIT_64(65 - a_cBits)) \
4001 : uMantissa == RT_BIT_64(63))) \
4002 { \
4003 *piDst = a_iTypeMin; \
4004 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
4005 { \
4006 fFsw |= X86_FSW_PE; \
4007 if (!(fFcw & X86_FCW_PM)) \
4008 fFsw |= X86_FSW_ES | X86_FSW_B; \
4009 } \
4010 } \
4011 /* \
4012 * Too large/small number outside the target integer range. \
4013 */ \
4014 else \
4015 { \
4016 fFsw |= X86_FSW_IE; \
4017 if (fFcw & X86_FCW_IM) \
4018 *piDst = a_iTypeIndefinite; \
4019 else \
4020 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4021 } \
4022 } \
4023 /* \
4024 * Map both +0 and -0 to integer zero (signless/+). \
4025 */ \
4026 else if (RTFLOAT80U_IS_ZERO(pr80Val)) \
4027 *piDst = 0; \
4028 /* \
4029 * Denormals are just really tiny sub-zero numbers that are either rounded \
4030 * to zero, 1 or -1 depending on sign and rounding control. \
4031 */ \
4032 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) || RTFLOAT80U_IS_DENORMAL(pr80Val)) \
4033 { \
4034 if ((fFcw & X86_FCW_RC_MASK) != (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)) \
4035 *piDst = 0; \
4036 else \
4037 { \
4038 *piDst = fSignIn ? -1 : 1; \
4039 fFsw |= X86_FSW_C1; \
4040 } \
4041 fFsw |= X86_FSW_PE; \
4042 if (!(fFcw & X86_FCW_PM)) \
4043 fFsw |= X86_FSW_ES | X86_FSW_B; \
4044 } \
4045 /* \
4046 * All other special values are considered invalid arguments and result \
4047 * in an IE exception and indefinite value if masked. \
4048 */ \
4049 else \
4050 { \
4051 fFsw |= X86_FSW_IE; \
4052 if (fFcw & X86_FCW_IM) \
4053 *piDst = a_iTypeIndefinite; \
4054 else \
4055 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4056 } \
4057 *pu16FSW = fFsw; \
4058}
4059EMIT_FIST(64, int64_t, INT64_MIN, X86_FPU_INT64_INDEFINITE)
4060EMIT_FIST(32, int32_t, INT32_MIN, X86_FPU_INT32_INDEFINITE)
4061EMIT_FIST(16, int16_t, INT16_MIN, X86_FPU_INT16_INDEFINITE)
4062
4063#endif /*IEM_WITHOUT_ASSEMBLY */
4064
4065
4066/*
4067 * The FISTT instruction was added with SSE3 and are a lot simpler than FIST.
4068 *
4069 * The 16-bit version is a bit peculiar, though, as it seems to be raising IE
4070 * as if it was the 32-bit version (i.e. starting with exp 31 instead of 15),
4071 * thus the @a a_cBitsIn.
4072 */
4073#define EMIT_FISTT(a_cBits, a_cBitsIn, a_iType, a_iTypeMin, a_iTypeMax, a_iTypeIndefinite, a_Suffix, a_fIntelVersion) \
4074IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_fistt_r80_to_i,a_cBits,a_Suffix),(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, \
4075 a_iType *piDst, PCRTFLOAT80U pr80Val)) \
4076{ \
4077 uint16_t const fFcw = pFpuState->FCW; \
4078 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); \
4079 bool const fSignIn = pr80Val->s.fSign; \
4080 \
4081 /* \
4082 * Deal with normal numbers first. \
4083 */ \
4084 if (RTFLOAT80U_IS_NORMAL(pr80Val)) \
4085 { \
4086 uint64_t uMantissa = pr80Val->s.uMantissa; \
4087 int32_t iExponent = (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS; \
4088 \
4089 if ((uint32_t)iExponent <= a_cBitsIn - 2) \
4090 { \
4091 unsigned const cShiftOff = 63 - iExponent; \
4092 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1; \
4093 uint64_t const fRoundedOff = uMantissa & fRoundingOffMask; \
4094 uMantissa >>= cShiftOff; \
4095 /*Assert(!(uMantissa & RT_BIT_64(a_cBits - 1)));*/ \
4096 if (!fSignIn) \
4097 *piDst = (a_iType)uMantissa; \
4098 else \
4099 *piDst = -(a_iType)uMantissa; \
4100 \
4101 if (fRoundedOff) \
4102 { \
4103 fFsw |= X86_FSW_PE; \
4104 if (!(fFcw & X86_FCW_PM)) \
4105 fFsw |= X86_FSW_ES | X86_FSW_B; \
4106 } \
4107 } \
4108 /* \
4109 * Tiny sub-zero numbers. \
4110 */ \
4111 else if (iExponent < 0) \
4112 { \
4113 *piDst = 0; \
4114 fFsw |= X86_FSW_PE; \
4115 if (!(fFcw & X86_FCW_PM)) \
4116 fFsw |= X86_FSW_ES | X86_FSW_B; \
4117 } \
4118 /* \
4119 * Special MIN case. \
4120 */ \
4121 else if ( fSignIn && iExponent == a_cBits - 1 \
4122 && (a_cBits < 64 \
4123 ? uMantissa < (RT_BIT_64(63) | RT_BIT_64(65 - a_cBits)) \
4124 : uMantissa == RT_BIT_64(63)) ) \
4125 { \
4126 *piDst = a_iTypeMin; \
4127 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
4128 { \
4129 fFsw |= X86_FSW_PE; \
4130 if (!(fFcw & X86_FCW_PM)) \
4131 fFsw |= X86_FSW_ES | X86_FSW_B; \
4132 } \
4133 } \
4134 /* \
4135 * Figure this weirdness. \
4136 */ \
4137 else if (0 /* huh? gone? */ && a_cBits == 16 && fSignIn && iExponent == 31 && uMantissa < UINT64_C(0x8000100000000000) ) \
4138 { \
4139 *piDst = 0; \
4140 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
4141 { \
4142 fFsw |= X86_FSW_PE; \
4143 if (!(fFcw & X86_FCW_PM)) \
4144 fFsw |= X86_FSW_ES | X86_FSW_B; \
4145 } \
4146 } \
4147 /* \
4148 * Too large/small number outside the target integer range. \
4149 */ \
4150 else \
4151 { \
4152 fFsw |= X86_FSW_IE; \
4153 if (fFcw & X86_FCW_IM) \
4154 *piDst = a_iTypeIndefinite; \
4155 else \
4156 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4157 } \
4158 } \
4159 /* \
4160 * Map both +0 and -0 to integer zero (signless/+). \
4161 */ \
4162 else if (RTFLOAT80U_IS_ZERO(pr80Val)) \
4163 *piDst = 0; \
4164 /* \
4165 * Denormals are just really tiny sub-zero numbers that are trucated to zero. \
4166 */ \
4167 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) || RTFLOAT80U_IS_DENORMAL(pr80Val)) \
4168 { \
4169 *piDst = 0; \
4170 fFsw |= X86_FSW_PE; \
4171 if (!(fFcw & X86_FCW_PM)) \
4172 fFsw |= X86_FSW_ES | X86_FSW_B; \
4173 } \
4174 /* \
4175 * All other special values are considered invalid arguments and result \
4176 * in an IE exception and indefinite value if masked. \
4177 */ \
4178 else \
4179 { \
4180 fFsw |= X86_FSW_IE; \
4181 if (fFcw & X86_FCW_IM) \
4182 *piDst = a_iTypeIndefinite; \
4183 else \
4184 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4185 } \
4186 *pu16FSW = fFsw; \
4187}
4188#if defined(IEM_WITHOUT_ASSEMBLY)
4189EMIT_FISTT(64, 64, int64_t, INT64_MIN, INT64_MAX, X86_FPU_INT64_INDEFINITE, RT_NOTHING, 1)
4190EMIT_FISTT(32, 32, int32_t, INT32_MIN, INT32_MAX, X86_FPU_INT32_INDEFINITE, RT_NOTHING, 1)
4191EMIT_FISTT(16, 16, int16_t, INT16_MIN, INT16_MAX, X86_FPU_INT16_INDEFINITE, RT_NOTHING, 1)
4192#endif
4193EMIT_FISTT(16, 16, int16_t, INT16_MIN, INT16_MAX, X86_FPU_INT16_INDEFINITE, _intel, 1)
4194EMIT_FISTT(16, 16, int16_t, INT16_MIN, INT16_MAX, X86_FPU_INT16_INDEFINITE, _amd, 0)
4195
4196
4197#if defined(IEM_WITHOUT_ASSEMBLY)
4198
4199IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_d80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
4200 PRTPBCD80U pd80Dst, PCRTFLOAT80U pr80Src))
4201{
4202 /*static RTPBCD80U const s_ad80MaxMin[2] = { RTPBCD80U_INIT_MAX(), RTPBCD80U_INIT_MIN() };*/
4203 static RTPBCD80U const s_ad80Zeros[2] = { RTPBCD80U_INIT_ZERO(0), RTPBCD80U_INIT_ZERO(1) };
4204 static RTPBCD80U const s_ad80One[2] = { RTPBCD80U_INIT_C(0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,1),
4205 RTPBCD80U_INIT_C(1, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,1) };
4206 static RTPBCD80U const s_d80Indefinite = RTPBCD80U_INIT_INDEFINITE();
4207
4208 uint16_t const fFcw = pFpuState->FCW;
4209 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
4210 bool const fSignIn = pr80Src->s.fSign;
4211
4212 /*
4213 * Deal with normal numbers first.
4214 */
4215 if (RTFLOAT80U_IS_NORMAL(pr80Src))
4216 {
4217 uint64_t uMantissa = pr80Src->s.uMantissa;
4218 int32_t iExponent = (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS;
4219 if ( (uint32_t)iExponent <= 58
4220 || ((uint32_t)iExponent == 59 && uMantissa <= UINT64_C(0xde0b6b3a763fffff)) )
4221 {
4222 unsigned const cShiftOff = 63 - iExponent;
4223 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4224 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4225 ? RT_BIT_64(cShiftOff - 1)
4226 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4227 ? fRoundingOffMask
4228 : 0;
4229 uint64_t fRoundedOff = uMantissa & fRoundingOffMask;
4230
4231 uMantissa >>= cShiftOff;
4232 uint64_t const uRounding = (fRoundedOff + uRoundingAdd) >> cShiftOff;
4233 uMantissa += uRounding;
4234 if (uMantissa <= (uint64_t)RTPBCD80U_MAX)
4235 {
4236 if (fRoundedOff)
4237 {
4238 if ((uMantissa & 1) && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST && fRoundedOff == uRoundingAdd)
4239 uMantissa &= ~(uint64_t)1; /* round to even number if equal distance between up/down. */
4240 else if (uRounding)
4241 fFsw |= X86_FSW_C1;
4242 fFsw |= X86_FSW_PE;
4243 if (!(fFcw & X86_FCW_PM))
4244 fFsw |= X86_FSW_ES | X86_FSW_B;
4245 }
4246
4247 pd80Dst->s.fSign = fSignIn;
4248 pd80Dst->s.uPad = 0;
4249 for (size_t iPair = 0; iPair < RT_ELEMENTS(pd80Dst->s.abPairs); iPair++)
4250 {
4251 unsigned const uDigits = uMantissa % 100;
4252 uMantissa /= 100;
4253 uint8_t const bLo = uDigits % 10;
4254 uint8_t const bHi = uDigits / 10;
4255 pd80Dst->s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(bHi, bLo);
4256 }
4257 }
4258 else
4259 {
4260 /* overflowed after rounding. */
4261 fFsw |= X86_FSW_IE;
4262 if (fFcw & X86_FCW_IM)
4263 *pd80Dst = s_d80Indefinite;
4264 else
4265 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4266 }
4267 }
4268 /*
4269 * Tiny sub-zero numbers.
4270 */
4271 else if (iExponent < 0)
4272 {
4273 if (!fSignIn)
4274 {
4275 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP
4276 || (iExponent == -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST))
4277 {
4278 *pd80Dst = s_ad80One[fSignIn];
4279 fFsw |= X86_FSW_C1;
4280 }
4281 else
4282 *pd80Dst = s_ad80Zeros[fSignIn];
4283 }
4284 else
4285 {
4286 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP
4287 || (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_ZERO
4288 || (iExponent < -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST))
4289 *pd80Dst = s_ad80Zeros[fSignIn];
4290 else
4291 {
4292 *pd80Dst = s_ad80One[fSignIn];
4293 fFsw |= X86_FSW_C1;
4294 }
4295 }
4296 fFsw |= X86_FSW_PE;
4297 if (!(fFcw & X86_FCW_PM))
4298 fFsw |= X86_FSW_ES | X86_FSW_B;
4299 }
4300 /*
4301 * Too large/small number outside the target integer range.
4302 */
4303 else
4304 {
4305 fFsw |= X86_FSW_IE;
4306 if (fFcw & X86_FCW_IM)
4307 *pd80Dst = s_d80Indefinite;
4308 else
4309 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4310 }
4311 }
4312 /*
4313 * Map both +0 and -0 to integer zero (signless/+).
4314 */
4315 else if (RTFLOAT80U_IS_ZERO(pr80Src))
4316 *pd80Dst = s_ad80Zeros[fSignIn];
4317 /*
4318 * Denormals are just really tiny sub-zero numbers that are either rounded
4319 * to zero, 1 or -1 depending on sign and rounding control.
4320 */
4321 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src) || RTFLOAT80U_IS_DENORMAL(pr80Src))
4322 {
4323 if ((fFcw & X86_FCW_RC_MASK) != (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP))
4324 *pd80Dst = s_ad80Zeros[fSignIn];
4325 else
4326 {
4327 *pd80Dst = s_ad80One[fSignIn];
4328 fFsw |= X86_FSW_C1;
4329 }
4330 fFsw |= X86_FSW_PE;
4331 if (!(fFcw & X86_FCW_PM))
4332 fFsw |= X86_FSW_ES | X86_FSW_B;
4333 }
4334 /*
4335 * All other special values are considered invalid arguments and result
4336 * in an IE exception and indefinite value if masked.
4337 */
4338 else
4339 {
4340 fFsw |= X86_FSW_IE;
4341 if (fFcw & X86_FCW_IM)
4342 *pd80Dst = s_d80Indefinite;
4343 else
4344 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4345 }
4346 *pu16FSW = fFsw;
4347}
4348
4349
4350/*********************************************************************************************************************************
4351* FPU Helpers *
4352*********************************************************************************************************************************/
4353AssertCompileSize(RTFLOAT128U, 16);
4354AssertCompileSize(RTFLOAT80U, 10);
4355AssertCompileSize(RTFLOAT64U, 8);
4356AssertCompileSize(RTFLOAT32U, 4);
4357
4358/**
4359 * Normalizes a possible pseudo-normal value.
4360 *
4361 * Psuedo-normal values are some oddities from the 8087 & 287 days. They are
4362 * denormals with the J-bit set, so they can simply be rewritten as 2**-16382,
4363 * i.e. changing uExponent from 0 to 1.
4364 *
4365 * This macro will declare a RTFLOAT80U with the name given by
4366 * @a a_r80ValNormalized and update the @a a_pr80Val variable to point to it if
4367 * a normalization was performed.
4368 *
4369 * @note This must be applied before calling SoftFloat with a value that couldbe
4370 * a pseudo-denormal, as SoftFloat doesn't handle pseudo-denormals
4371 * correctly.
4372 */
4373#define IEM_NORMALIZE_PSEUDO_DENORMAL(a_pr80Val, a_r80ValNormalized) \
4374 RTFLOAT80U a_r80ValNormalized; \
4375 if (RTFLOAT80U_IS_PSEUDO_DENORMAL(a_pr80Val)) \
4376 { \
4377 a_r80ValNormalized = *a_pr80Val; \
4378 a_r80ValNormalized.s.uExponent = 1; \
4379 a_pr80Val = &a_r80ValNormalized; \
4380 } else do {} while (0)
4381
4382#ifdef IEM_WITH_FLOAT128_FOR_FPU
4383
4384DECLINLINE(int) iemFpuF128SetRounding(uint16_t fFcw)
4385{
4386 int fNew;
4387 switch (fFcw & X86_FCW_RC_MASK)
4388 {
4389 default:
4390 case X86_FCW_RC_NEAREST: fNew = FE_TONEAREST; break;
4391 case X86_FCW_RC_ZERO: fNew = FE_TOWARDZERO; break;
4392 case X86_FCW_RC_UP: fNew = FE_UPWARD; break;
4393 case X86_FCW_RC_DOWN: fNew = FE_DOWNWARD; break;
4394 }
4395 int fOld = fegetround();
4396 fesetround(fNew);
4397 return fOld;
4398}
4399
4400
4401DECLINLINE(void) iemFpuF128RestoreRounding(int fOld)
4402{
4403 fesetround(fOld);
4404}
4405
4406DECLINLINE(_Float128) iemFpuF128FromFloat80(PCRTFLOAT80U pr80Val, uint16_t fFcw)
4407{
4408 RT_NOREF(fFcw);
4409 RTFLOAT128U Tmp;
4410 Tmp.s2.uSignAndExponent = pr80Val->s2.uSignAndExponent;
4411 Tmp.s2.uFractionHigh = (uint16_t)((pr80Val->s2.uMantissa & (RT_BIT_64(63) - 1)) >> 48);
4412 Tmp.s2.uFractionMid = (uint32_t)((pr80Val->s2.uMantissa & UINT32_MAX) >> 16);
4413 Tmp.s2.uFractionLow = pr80Val->s2.uMantissa << 48;
4414 if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val))
4415 {
4416 Assert(Tmp.s.uExponent == 0);
4417 Tmp.s2.uSignAndExponent++;
4418 }
4419 return *(_Float128 *)&Tmp;
4420}
4421
4422
4423DECLINLINE(uint16_t) iemFpuF128ToFloat80(PRTFLOAT80U pr80Dst, _Float128 rd128ValSrc, uint16_t fFcw, uint16_t fFsw)
4424{
4425 RT_NOREF(fFcw);
4426 RTFLOAT128U Tmp;
4427 *(_Float128 *)&Tmp = rd128ValSrc;
4428 ASMCompilerBarrier();
4429 if (RTFLOAT128U_IS_NORMAL(&Tmp))
4430 {
4431 pr80Dst->s.fSign = Tmp.s64.fSign;
4432 pr80Dst->s.uExponent = Tmp.s64.uExponent;
4433 uint64_t uFraction = Tmp.s64.uFractionHi << (63 - 48)
4434 | Tmp.s64.uFractionLo >> (64 - 15);
4435
4436 /* Do rounding - just truncate in near mode when midway on an even outcome. */
4437 unsigned const cShiftOff = 64 - 15;
4438 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4439 uint64_t const uRoundedOff = Tmp.s64.uFractionLo & fRoundingOffMask;
4440 if (uRoundedOff)
4441 {
4442 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4443 ? RT_BIT_64(cShiftOff - 1)
4444 : (fFcw & X86_FCW_RC_MASK) == (Tmp.s64.fSign ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4445 ? fRoundingOffMask
4446 : 0;
4447 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
4448 || (Tmp.s64.uFractionLo & RT_BIT_64(cShiftOff))
4449 || uRoundedOff != uRoundingAdd)
4450 {
4451 if ((uRoundedOff + uRoundingAdd) >> cShiftOff)
4452 {
4453 uFraction += 1;
4454 if (!(uFraction & RT_BIT_64(63)))
4455 { /* likely */ }
4456 else
4457 {
4458 uFraction >>= 1;
4459 pr80Dst->s.uExponent++;
4460 if (pr80Dst->s.uExponent == RTFLOAT64U_EXP_MAX)
4461 return fFsw;
4462 }
4463 fFsw |= X86_FSW_C1;
4464 }
4465 }
4466 fFsw |= X86_FSW_PE;
4467 if (!(fFcw & X86_FCW_PM))
4468 fFsw |= X86_FSW_ES | X86_FSW_B;
4469 }
4470 pr80Dst->s.uMantissa = RT_BIT_64(63) | uFraction;
4471 }
4472 else if (RTFLOAT128U_IS_ZERO(&Tmp))
4473 {
4474 pr80Dst->s.fSign = Tmp.s64.fSign;
4475 pr80Dst->s.uExponent = 0;
4476 pr80Dst->s.uMantissa = 0;
4477 }
4478 else if (RTFLOAT128U_IS_INF(&Tmp))
4479 {
4480 pr80Dst->s.fSign = Tmp.s64.fSign;
4481 pr80Dst->s.uExponent = 0;
4482 pr80Dst->s.uMantissa = 0;
4483 }
4484 return fFsw;
4485}
4486
4487
4488#else /* !IEM_WITH_FLOAT128_FOR_FPU - SoftFloat */
4489
4490/** Initializer for the SoftFloat state structure. */
4491# define IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(a_fFcw) \
4492 { \
4493 softfloat_tininess_afterRounding, \
4494 ((a_fFcw) & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST ? (uint8_t)softfloat_round_near_even \
4495 : ((a_fFcw) & X86_FCW_RC_MASK) == X86_FCW_RC_UP ? (uint8_t)softfloat_round_max \
4496 : ((a_fFcw) & X86_FCW_RC_MASK) == X86_FCW_RC_DOWN ? (uint8_t)softfloat_round_min \
4497 : (uint8_t)softfloat_round_minMag, \
4498 0, \
4499 (uint8_t)((a_fFcw) & X86_FCW_XCPT_MASK), \
4500 ((a_fFcw) & X86_FCW_PC_MASK) == X86_FCW_PC_53 ? (uint8_t)64 \
4501 : ((a_fFcw) & X86_FCW_PC_MASK) == X86_FCW_PC_24 ? (uint8_t)32 : (uint8_t)80 \
4502 }
4503
4504/** Returns updated FSW from a SoftFloat state and exception mask (FCW). */
4505# define IEM_SOFTFLOAT_STATE_TO_FSW(a_fFsw, a_pSoftState, a_fFcw) \
4506 ( (a_fFsw) \
4507 | (uint16_t)(((a_pSoftState)->exceptionFlags & softfloat_flag_c1) << 2) \
4508 | ((a_pSoftState)->exceptionFlags & X86_FSW_XCPT_MASK) \
4509 | ( ((a_pSoftState)->exceptionFlags & X86_FSW_XCPT_MASK) & (~(a_fFcw) & X86_FSW_XCPT_MASK) \
4510 ? X86_FSW_ES | X86_FSW_B : 0) )
4511
4512
4513DECLINLINE(float128_t) iemFpuSoftF128Precision(float128_t r128, unsigned cBits, uint16_t fFcw = X86_FCW_RC_NEAREST)
4514{
4515 RT_NOREF(fFcw);
4516 Assert(cBits > 64);
4517# if 0 /* rounding does not seem to help */
4518 uint64_t off = r128.v[0] & (RT_BIT_64(1 + 112 - cBits) - 1);
4519 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4520 if (off >= RT_BIT_64(1 + 112 - cBits - 1)
4521 && (r128.v[0] & RT_BIT_64(1 + 112 - cBits)))
4522 {
4523 uint64_t uOld = r128.v[0];
4524 r128.v[0] += RT_BIT_64(1 + 112 - cBits);
4525 if (r128.v[0] < uOld)
4526 r128.v[1] += 1;
4527 }
4528# else
4529 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4530# endif
4531 return r128;
4532}
4533
4534
4535DECLINLINE(float128_t) iemFpuSoftF128PrecisionIprt(PCRTFLOAT128U pr128, unsigned cBits, uint16_t fFcw = X86_FCW_RC_NEAREST)
4536{
4537 RT_NOREF(fFcw);
4538 Assert(cBits > 64);
4539# if 0 /* rounding does not seem to help, not even on constants */
4540 float128_t r128 = { pr128->au64[0], pr128->au64[1] };
4541 uint64_t off = r128.v[0] & (RT_BIT_64(1 + 112 - cBits) - 1);
4542 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4543 if (off >= RT_BIT_64(1 + 112 - cBits - 1)
4544 && (r128.v[0] & RT_BIT_64(1 + 112 - cBits)))
4545 {
4546 uint64_t uOld = r128.v[0];
4547 r128.v[0] += RT_BIT_64(1 + 112 - cBits);
4548 if (r128.v[0] < uOld)
4549 r128.v[1] += 1;
4550 }
4551 return r128;
4552# else
4553 float128_t r128 = { { pr128->au64[0] & ~(RT_BIT_64(1 + 112 - cBits) - 1), pr128->au64[1] } };
4554 return r128;
4555# endif
4556}
4557
4558
4559# if 0 /* unused */
4560DECLINLINE(float128_t) iemFpuSoftF128FromIprt(PCRTFLOAT128U pr128)
4561{
4562 float128_t r128 = { { pr128->au64[0], pr128->au64[1] } };
4563 return r128;
4564}
4565# endif
4566
4567
4568/** Converts a 80-bit floating point value to SoftFloat 128-bit floating point. */
4569DECLINLINE(float128_t) iemFpuSoftF128FromFloat80(PCRTFLOAT80U pr80Val)
4570{
4571 extFloat80_t Tmp;
4572 Tmp.signExp = pr80Val->s2.uSignAndExponent;
4573 Tmp.signif = pr80Val->s2.uMantissa;
4574 softfloat_state_t Ignored = SOFTFLOAT_STATE_INIT_DEFAULTS();
4575 return extF80_to_f128(Tmp, &Ignored);
4576}
4577
4578
4579/**
4580 * Converts from the packed IPRT 80-bit floating point (RTFLOAT80U) format to
4581 * the SoftFloat extended 80-bit floating point format (extFloat80_t).
4582 *
4583 * This is only a structure format conversion, nothing else.
4584 */
4585DECLINLINE(extFloat80_t) iemFpuSoftF80FromIprt(PCRTFLOAT80U pr80Val)
4586{
4587 extFloat80_t Tmp;
4588 Tmp.signExp = pr80Val->s2.uSignAndExponent;
4589 Tmp.signif = pr80Val->s2.uMantissa;
4590 return Tmp;
4591}
4592
4593
4594/**
4595 * Converts from SoftFloat extended 80-bit floating point format (extFloat80_t)
4596 * to the packed IPRT 80-bit floating point (RTFLOAT80U) format.
4597 *
4598 * This is only a structure format conversion, nothing else.
4599 */
4600DECLINLINE(PRTFLOAT80U) iemFpuSoftF80ToIprt(PRTFLOAT80U pr80Dst, extFloat80_t const r80XSrc)
4601{
4602 pr80Dst->s2.uSignAndExponent = r80XSrc.signExp;
4603 pr80Dst->s2.uMantissa = r80XSrc.signif;
4604 return pr80Dst;
4605}
4606
4607
4608DECLINLINE(uint16_t) iemFpuSoftF128ToFloat80(PRTFLOAT80U pr80Dst, float128_t r128Src, uint16_t fFcw, uint16_t fFsw)
4609{
4610 RT_NOREF(fFcw);
4611 RTFLOAT128U Tmp;
4612 *(float128_t *)&Tmp = r128Src;
4613 ASMCompilerBarrier();
4614
4615 if (RTFLOAT128U_IS_NORMAL(&Tmp))
4616 {
4617 pr80Dst->s.fSign = Tmp.s64.fSign;
4618 pr80Dst->s.uExponent = Tmp.s64.uExponent;
4619 uint64_t uFraction = Tmp.s64.uFractionHi << (63 - 48)
4620 | Tmp.s64.uFractionLo >> (64 - 15);
4621
4622 /* Do rounding - just truncate in near mode when midway on an even outcome. */
4623 unsigned const cShiftOff = 64 - 15;
4624 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4625 uint64_t const uRoundedOff = Tmp.s64.uFractionLo & fRoundingOffMask;
4626 if (uRoundedOff)
4627 {
4628 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4629 ? RT_BIT_64(cShiftOff - 1)
4630 : (fFcw & X86_FCW_RC_MASK) == (Tmp.s64.fSign ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4631 ? fRoundingOffMask
4632 : 0;
4633 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
4634 || (Tmp.s64.uFractionLo & RT_BIT_64(cShiftOff))
4635 || uRoundedOff != uRoundingAdd)
4636 {
4637 if ((uRoundedOff + uRoundingAdd) >> cShiftOff)
4638 {
4639 uFraction += 1;
4640 if (!(uFraction & RT_BIT_64(63)))
4641 { /* likely */ }
4642 else
4643 {
4644 uFraction >>= 1;
4645 pr80Dst->s.uExponent++;
4646 if (pr80Dst->s.uExponent == RTFLOAT64U_EXP_MAX)
4647 return fFsw;
4648 }
4649 fFsw |= X86_FSW_C1;
4650 }
4651 }
4652 fFsw |= X86_FSW_PE;
4653 if (!(fFcw & X86_FCW_PM))
4654 fFsw |= X86_FSW_ES | X86_FSW_B;
4655 }
4656
4657 pr80Dst->s.uMantissa = RT_BIT_64(63) | uFraction;
4658 }
4659 else if (RTFLOAT128U_IS_ZERO(&Tmp))
4660 {
4661 pr80Dst->s.fSign = Tmp.s64.fSign;
4662 pr80Dst->s.uExponent = 0;
4663 pr80Dst->s.uMantissa = 0;
4664 }
4665 else if (RTFLOAT128U_IS_INF(&Tmp))
4666 {
4667 pr80Dst->s.fSign = Tmp.s64.fSign;
4668 pr80Dst->s.uExponent = 0;
4669 pr80Dst->s.uMantissa = 0;
4670 }
4671 return fFsw;
4672}
4673
4674
4675/**
4676 * Helper for transfering exception and C1 to FSW and setting the result value
4677 * accordingly.
4678 *
4679 * @returns Updated FSW.
4680 * @param pSoftState The SoftFloat state following the operation.
4681 * @param r80XResult The result of the SoftFloat operation.
4682 * @param pr80Result Where to store the result for IEM.
4683 * @param fFcw The FPU control word.
4684 * @param fFsw The FSW before the operation, with necessary bits
4685 * cleared and such.
4686 * @param pr80XcptResult Alternative return value for use an unmasked \#IE is
4687 * raised.
4688 */
4689DECLINLINE(uint16_t) iemFpuSoftStateAndF80ToFswAndIprtResult(softfloat_state_t const *pSoftState, extFloat80_t r80XResult,
4690 PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw,
4691 PCRTFLOAT80U pr80XcptResult)
4692{
4693 fFsw |= (pSoftState->exceptionFlags & X86_FSW_XCPT_MASK)
4694 | (uint16_t)((pSoftState->exceptionFlags & softfloat_flag_c1) << 2);
4695 if (fFsw & ~fFcw & X86_FSW_XCPT_MASK)
4696 fFsw |= X86_FSW_ES | X86_FSW_B;
4697
4698 if (!(fFsw & ~fFcw & (X86_FSW_IE | X86_FSW_DE)))
4699 iemFpuSoftF80ToIprt(pr80Result, r80XResult);
4700 else
4701 {
4702 fFsw &= ~(X86_FSW_OE | X86_FSW_UE | X86_FSW_PE | X86_FSW_ZE | X86_FSW_C1);
4703 *pr80Result = *pr80XcptResult;
4704 }
4705 return fFsw;
4706}
4707
4708
4709/**
4710 * Helper doing polynomial evaluation using Horner's method.
4711 *
4712 * See https://en.wikipedia.org/wiki/Horner%27s_method for details.
4713 */
4714float128_t iemFpuSoftF128HornerPoly(float128_t z, PCRTFLOAT128U g_par128HornerConsts, size_t cHornerConsts,
4715 unsigned cPrecision, softfloat_state_t *pSoftState)
4716{
4717 Assert(cHornerConsts > 1);
4718 size_t i = cHornerConsts - 1;
4719 float128_t r128Result = iemFpuSoftF128PrecisionIprt(&g_par128HornerConsts[i], cPrecision);
4720 while (i-- > 0)
4721 {
4722 r128Result = iemFpuSoftF128Precision(f128_mul(r128Result, z, pSoftState), cPrecision);
4723 r128Result = f128_add(r128Result, iemFpuSoftF128PrecisionIprt(&g_par128HornerConsts[i], cPrecision), pSoftState);
4724 r128Result = iemFpuSoftF128Precision(r128Result, cPrecision);
4725 }
4726 return r128Result;
4727}
4728
4729#endif /* !IEM_WITH_FLOAT128_FOR_FPU - SoftFloat */
4730
4731
4732/**
4733 * Composes a normalized and rounded RTFLOAT80U result from a 192 bit wide
4734 * mantissa, exponent and sign.
4735 *
4736 * @returns Updated FSW.
4737 * @param pr80Dst Where to return the composed value.
4738 * @param fSign The sign.
4739 * @param puMantissa The mantissa, 256-bit type but the to 64-bits are
4740 * ignored and should be zero. This will probably be
4741 * modified during normalization and rounding.
4742 * @param iExponent Unbiased exponent.
4743 * @param fFcw The FPU control word.
4744 * @param fFsw The FPU status word.
4745 */
4746static uint16_t iemFpuFloat80RoundAndComposeFrom192(PRTFLOAT80U pr80Dst, bool fSign, PRTUINT256U puMantissa,
4747 int32_t iExponent, uint16_t fFcw, uint16_t fFsw)
4748{
4749 AssertStmt(puMantissa->QWords.qw3 == 0, puMantissa->QWords.qw3 = 0);
4750
4751 iExponent += RTFLOAT80U_EXP_BIAS;
4752
4753 /* Do normalization if necessary and possible. */
4754 if (!(puMantissa->QWords.qw2 & RT_BIT_64(63)))
4755 {
4756 int cShift = 192 - RTUInt256BitCount(puMantissa);
4757 if (iExponent > cShift)
4758 iExponent -= cShift;
4759 else
4760 {
4761 if (fFcw & X86_FCW_UM)
4762 {
4763 if (iExponent > 0)
4764 cShift = --iExponent;
4765 else
4766 cShift = 0;
4767 }
4768 iExponent -= cShift;
4769 }
4770 RTUInt256AssignShiftLeft(puMantissa, cShift);
4771 }
4772
4773 /* Do rounding. */
4774 uint64_t uMantissa = puMantissa->QWords.qw2;
4775 if (puMantissa->QWords.qw1 || puMantissa->QWords.qw0)
4776 {
4777 bool fAdd;
4778 switch (fFcw & X86_FCW_RC_MASK)
4779 {
4780 default: /* (for the simple-minded MSC which otherwise things fAdd would be used uninitialized) */
4781 case X86_FCW_RC_NEAREST:
4782 if (puMantissa->QWords.qw1 & RT_BIT_64(63))
4783 {
4784 if ( (uMantissa & 1)
4785 || puMantissa->QWords.qw0 != 0
4786 || puMantissa->QWords.qw1 != RT_BIT_64(63))
4787 {
4788 fAdd = true;
4789 break;
4790 }
4791 uMantissa &= ~(uint64_t)1;
4792 }
4793 fAdd = false;
4794 break;
4795 case X86_FCW_RC_ZERO:
4796 fAdd = false;
4797 break;
4798 case X86_FCW_RC_UP:
4799 fAdd = !fSign;
4800 break;
4801 case X86_FCW_RC_DOWN:
4802 fAdd = fSign;
4803 break;
4804 }
4805 if (fAdd)
4806 {
4807 uint64_t const uTmp = uMantissa;
4808 uMantissa = uTmp + 1;
4809 if (uMantissa < uTmp)
4810 {
4811 uMantissa >>= 1;
4812 uMantissa |= RT_BIT_64(63);
4813 iExponent++;
4814 }
4815 fFsw |= X86_FSW_C1;
4816 }
4817 fFsw |= X86_FSW_PE;
4818 if (!(fFcw & X86_FCW_PM))
4819 fFsw |= X86_FSW_ES | X86_FSW_B;
4820 }
4821
4822 /* Check for underflow (denormals). */
4823 if (iExponent <= 0)
4824 {
4825 if (fFcw & X86_FCW_UM)
4826 {
4827 if (uMantissa & RT_BIT_64(63))
4828 uMantissa >>= 1;
4829 iExponent = 0;
4830 }
4831 else
4832 {
4833 iExponent += RTFLOAT80U_EXP_BIAS_ADJUST;
4834 fFsw |= X86_FSW_ES | X86_FSW_B;
4835 }
4836 fFsw |= X86_FSW_UE;
4837 }
4838 /* Check for overflow */
4839 else if (iExponent >= RTFLOAT80U_EXP_MAX)
4840 {
4841 Assert(iExponent < RTFLOAT80U_EXP_MAX);
4842 }
4843
4844 /* Compose the result. */
4845 pr80Dst->s.uMantissa = uMantissa;
4846 pr80Dst->s.uExponent = iExponent;
4847 pr80Dst->s.fSign = fSign;
4848 return fFsw;
4849}
4850
4851
4852/**
4853 * See also iemAImpl_fld_r80_from_r32
4854 */
4855static uint16_t iemAImplConvertR32ToR80(PCRTFLOAT32U pr32Val, PRTFLOAT80U pr80Dst)
4856{
4857 uint16_t fFsw = 0;
4858 if (RTFLOAT32U_IS_NORMAL(pr32Val))
4859 {
4860 pr80Dst->sj64.fSign = pr32Val->s.fSign;
4861 pr80Dst->sj64.fInteger = 1;
4862 pr80Dst->sj64.uFraction = (uint64_t)pr32Val->s.uFraction
4863 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
4864 pr80Dst->sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
4865 Assert(RTFLOAT80U_IS_NORMAL(pr80Dst));
4866 }
4867 else if (RTFLOAT32U_IS_ZERO(pr32Val))
4868 {
4869 pr80Dst->s.fSign = pr32Val->s.fSign;
4870 pr80Dst->s.uExponent = 0;
4871 pr80Dst->s.uMantissa = 0;
4872 Assert(RTFLOAT80U_IS_ZERO(pr80Dst));
4873 }
4874 else if (RTFLOAT32U_IS_SUBNORMAL(pr32Val))
4875 {
4876 /* Subnormal -> normalized + X86_FSW_DE return. */
4877 pr80Dst->sj64.fSign = pr32Val->s.fSign;
4878 pr80Dst->sj64.fInteger = 1;
4879 unsigned const cExtraShift = RTFLOAT32U_FRACTION_BITS - ASMBitLastSetU32(pr32Val->s.uFraction);
4880 pr80Dst->sj64.uFraction = (uint64_t)pr32Val->s.uFraction
4881 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS + cExtraShift + 1);
4882 pr80Dst->sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
4883 fFsw = X86_FSW_DE;
4884 }
4885 else if (RTFLOAT32U_IS_INF(pr32Val))
4886 {
4887 pr80Dst->s.fSign = pr32Val->s.fSign;
4888 pr80Dst->s.uExponent = RTFLOAT80U_EXP_MAX;
4889 pr80Dst->s.uMantissa = RT_BIT_64(63);
4890 Assert(RTFLOAT80U_IS_INF(pr80Dst));
4891 }
4892 else
4893 {
4894 Assert(RTFLOAT32U_IS_NAN(pr32Val));
4895 pr80Dst->sj64.fSign = pr32Val->s.fSign;
4896 pr80Dst->sj64.uExponent = RTFLOAT80U_EXP_MAX;
4897 pr80Dst->sj64.fInteger = 1;
4898 pr80Dst->sj64.uFraction = (uint64_t)pr32Val->s.uFraction
4899 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
4900 Assert(RTFLOAT80U_IS_NAN(pr80Dst));
4901 Assert(RTFLOAT80U_IS_SIGNALLING_NAN(pr80Dst) == RTFLOAT32U_IS_SIGNALLING_NAN(pr32Val));
4902 }
4903 return fFsw;
4904}
4905
4906
4907/**
4908 * See also iemAImpl_fld_r80_from_r64
4909 */
4910static uint16_t iemAImplConvertR64ToR80(PCRTFLOAT64U pr64Val, PRTFLOAT80U pr80Dst)
4911{
4912 uint16_t fFsw = 0;
4913 if (RTFLOAT64U_IS_NORMAL(pr64Val))
4914 {
4915 pr80Dst->sj64.fSign = pr64Val->s.fSign;
4916 pr80Dst->sj64.fInteger = 1;
4917 pr80Dst->sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
4918 pr80Dst->sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
4919 Assert(RTFLOAT80U_IS_NORMAL(pr80Dst));
4920 }
4921 else if (RTFLOAT64U_IS_ZERO(pr64Val))
4922 {
4923 pr80Dst->s.fSign = pr64Val->s.fSign;
4924 pr80Dst->s.uExponent = 0;
4925 pr80Dst->s.uMantissa = 0;
4926 Assert(RTFLOAT80U_IS_ZERO(pr80Dst));
4927 }
4928 else if (RTFLOAT64U_IS_SUBNORMAL(pr64Val))
4929 {
4930 /* Subnormal values gets normalized. */
4931 pr80Dst->sj64.fSign = pr64Val->s.fSign;
4932 pr80Dst->sj64.fInteger = 1;
4933 unsigned const cExtraShift = RTFLOAT64U_FRACTION_BITS - ASMBitLastSetU64(pr64Val->s64.uFraction);
4934 pr80Dst->sj64.uFraction = pr64Val->s64.uFraction
4935 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS + cExtraShift + 1);
4936 pr80Dst->sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
4937 fFsw = X86_FSW_DE;
4938 }
4939 else if (RTFLOAT64U_IS_INF(pr64Val))
4940 {
4941 pr80Dst->s.fSign = pr64Val->s.fSign;
4942 pr80Dst->s.uExponent = RTFLOAT80U_EXP_MAX;
4943 pr80Dst->s.uMantissa = RT_BIT_64(63);
4944 Assert(RTFLOAT80U_IS_INF(pr80Dst));
4945 }
4946 else
4947 {
4948 /* Signalling and quiet NaNs, both turn into quiet ones when loaded (weird). */
4949 Assert(RTFLOAT64U_IS_NAN(pr64Val));
4950 pr80Dst->sj64.fSign = pr64Val->s.fSign;
4951 pr80Dst->sj64.uExponent = RTFLOAT80U_EXP_MAX;
4952 pr80Dst->sj64.fInteger = 1;
4953 pr80Dst->sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
4954 Assert(RTFLOAT80U_IS_NAN(pr80Dst));
4955 Assert(RTFLOAT80U_IS_SIGNALLING_NAN(pr80Dst) == RTFLOAT64U_IS_SIGNALLING_NAN(pr64Val));
4956 }
4957 return fFsw;
4958}
4959
4960
4961/**
4962 * See also EMIT_FILD.
4963 */
4964#define EMIT_CONVERT_IXX_TO_R80(a_cBits) \
4965static PRTFLOAT80U iemAImplConvertI ## a_cBits ## ToR80(int ## a_cBits ## _t iVal, PRTFLOAT80U pr80Dst) \
4966{ \
4967 if (iVal == 0) \
4968 { \
4969 pr80Dst->s.fSign = 0; \
4970 pr80Dst->s.uExponent = 0; \
4971 pr80Dst->s.uMantissa = 0; \
4972 } \
4973 else \
4974 { \
4975 if (iVal > 0) \
4976 pr80Dst->s.fSign = 0; \
4977 else \
4978 { \
4979 pr80Dst->s.fSign = 1; \
4980 iVal = -iVal; \
4981 } \
4982 unsigned const cBits = ASMBitLastSetU ## a_cBits((uint ## a_cBits ## _t)iVal); \
4983 pr80Dst->s.uExponent = cBits - 1 + RTFLOAT80U_EXP_BIAS; \
4984 pr80Dst->s.uMantissa = (uint64_t)iVal << (RTFLOAT80U_FRACTION_BITS + 1 - cBits); \
4985 } \
4986 return pr80Dst; \
4987}
4988EMIT_CONVERT_IXX_TO_R80(16)
4989EMIT_CONVERT_IXX_TO_R80(32)
4990//EMIT_CONVERT_IXX_TO_R80(64)
4991
4992/** For implementing iemAImpl_fmul_r80_by_r64 and such. */
4993#define EMIT_R80_BY_R64(a_Name, a_fnR80ByR80, a_DenormalException) \
4994IEM_DECL_IMPL_DEF(void, a_Name,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2)) \
4995{ \
4996 RTFLOAT80U r80Val2; \
4997 uint16_t fFsw = iemAImplConvertR64ToR80(pr64Val2, &r80Val2); \
4998 Assert(!fFsw || fFsw == X86_FSW_DE); \
4999 if (fFsw) \
5000 { \
5001 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_NAN(pr80Val1) || (a_DenormalException)) \
5002 fFsw = 0; \
5003 else if (!(pFpuState->FCW & X86_FCW_DM)) \
5004 { \
5005 pFpuRes->r80Result = *pr80Val1; \
5006 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT) \
5007 | X86_FSW_DE | X86_FSW_ES | X86_FSW_B; \
5008 return; \
5009 } \
5010 } \
5011 a_fnR80ByR80(pFpuState, pFpuRes, pr80Val1, &r80Val2); \
5012 pFpuRes->FSW = (pFpuRes->FSW & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT) | fFsw; \
5013}
5014
5015/** For implementing iemAImpl_fmul_r80_by_r32 and such. */
5016#define EMIT_R80_BY_R32(a_Name, a_fnR80ByR80, a_DenormalException) \
5017IEM_DECL_IMPL_DEF(void, a_Name,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2)) \
5018{ \
5019 RTFLOAT80U r80Val2; \
5020 uint16_t fFsw = iemAImplConvertR32ToR80(pr32Val2, &r80Val2); \
5021 Assert(!fFsw || fFsw == X86_FSW_DE); \
5022 if (fFsw) \
5023 { \
5024 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_NAN(pr80Val1) || (a_DenormalException)) \
5025 fFsw = 0; \
5026 else if (!(pFpuState->FCW & X86_FCW_DM)) \
5027 { \
5028 pFpuRes->r80Result = *pr80Val1; \
5029 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT) \
5030 | X86_FSW_DE | X86_FSW_ES | X86_FSW_B; \
5031 return; \
5032 } \
5033 } \
5034 a_fnR80ByR80(pFpuState, pFpuRes, pr80Val1, &r80Val2); \
5035 pFpuRes->FSW = (pFpuRes->FSW & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT) | fFsw; \
5036}
5037
5038/** For implementing iemAImpl_fimul_r80_by_i32 and such. */
5039#define EMIT_R80_BY_I32(a_Name, a_fnR80ByR80) \
5040IEM_DECL_IMPL_DEF(void, a_Name,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2)) \
5041{ \
5042 RTFLOAT80U r80Val2; \
5043 a_fnR80ByR80(pFpuState, pFpuRes, pr80Val1, iemAImplConvertI32ToR80(*pi32Val2, &r80Val2)); \
5044 pFpuRes->FSW = (pFpuRes->FSW & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT); \
5045}
5046
5047/** For implementing iemAImpl_fimul_r80_by_i16 and such. */
5048#define EMIT_R80_BY_I16(a_Name, a_fnR80ByR80) \
5049IEM_DECL_IMPL_DEF(void, a_Name,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2)) \
5050{ \
5051 RTFLOAT80U r80Val2; \
5052 a_fnR80ByR80(pFpuState, pFpuRes, pr80Val1, iemAImplConvertI16ToR80(*pi16Val2, &r80Val2)); \
5053 pFpuRes->FSW = (pFpuRes->FSW & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT); \
5054}
5055
5056
5057
5058/*********************************************************************************************************************************
5059* x86 FPU Division Operations *
5060*********************************************************************************************************************************/
5061
5062/** Worker for iemAImpl_fdiv_r80_by_r80 & iemAImpl_fdivr_r80_by_r80. */
5063static uint16_t iemAImpl_fdiv_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5064 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
5065{
5066 if (!RTFLOAT80U_IS_ZERO(pr80Val2) || RTFLOAT80U_IS_NAN(pr80Val1) || RTFLOAT80U_IS_INF(pr80Val1))
5067 {
5068 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5069 extFloat80_t r80XResult = extF80_div(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
5070 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5071 }
5072 if (!RTFLOAT80U_IS_ZERO(pr80Val1))
5073 { /* Div by zero. */
5074 if (fFcw & X86_FCW_ZM)
5075 *pr80Result = g_ar80Infinity[pr80Val1->s.fSign != pr80Val2->s.fSign];
5076 else
5077 {
5078 *pr80Result = *pr80Val1Org;
5079 fFsw |= X86_FSW_ES | X86_FSW_B;
5080 }
5081 fFsw |= X86_FSW_ZE;
5082 }
5083 else
5084 { /* Invalid operand */
5085 if (fFcw & X86_FCW_IM)
5086 *pr80Result = g_r80Indefinite;
5087 else
5088 {
5089 *pr80Result = *pr80Val1Org;
5090 fFsw |= X86_FSW_ES | X86_FSW_B;
5091 }
5092 fFsw |= X86_FSW_IE;
5093 }
5094 return fFsw;
5095}
5096
5097
5098IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5099 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5100{
5101 uint16_t const fFcw = pFpuState->FCW;
5102 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5103
5104 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5105 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5106 {
5107 if (fFcw & X86_FCW_IM)
5108 pFpuRes->r80Result = g_r80Indefinite;
5109 else
5110 {
5111 pFpuRes->r80Result = *pr80Val1;
5112 fFsw |= X86_FSW_ES | X86_FSW_B;
5113 }
5114 fFsw |= X86_FSW_IE;
5115 }
5116 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs & /0 trumps denormals. */
5117 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2) && !RTFLOAT80U_IS_ZERO(pr80Val2))
5118 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5119 {
5120 if (fFcw & X86_FCW_DM)
5121 {
5122 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5123 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5124 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5125 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5126 }
5127 else
5128 {
5129 pFpuRes->r80Result = *pr80Val1;
5130 fFsw |= X86_FSW_ES | X86_FSW_B;
5131 }
5132 fFsw |= X86_FSW_DE;
5133 }
5134 /* SoftFloat can handle the rest: */
5135 else
5136 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5137
5138 pFpuRes->FSW = fFsw;
5139}
5140
5141
5142EMIT_R80_BY_R64(iemAImpl_fdiv_r80_by_r64, iemAImpl_fdiv_r80_by_r80, 0)
5143EMIT_R80_BY_R32(iemAImpl_fdiv_r80_by_r32, iemAImpl_fdiv_r80_by_r80, 0)
5144EMIT_R80_BY_I32(iemAImpl_fidiv_r80_by_i32, iemAImpl_fdiv_r80_by_r80)
5145EMIT_R80_BY_I16(iemAImpl_fidiv_r80_by_i16, iemAImpl_fdiv_r80_by_r80)
5146
5147
5148IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5149 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5150{
5151 uint16_t const fFcw = pFpuState->FCW;
5152 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5153
5154 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5155 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5156 {
5157 if (fFcw & X86_FCW_IM)
5158 pFpuRes->r80Result = g_r80Indefinite;
5159 else
5160 {
5161 pFpuRes->r80Result = *pr80Val1;
5162 fFsw |= X86_FSW_ES | X86_FSW_B;
5163 }
5164 fFsw |= X86_FSW_IE;
5165 }
5166 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs & /0 trumps denormals. */
5167 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5168 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1) && !RTFLOAT80U_IS_ZERO(pr80Val1)) )
5169 {
5170 if (fFcw & X86_FCW_DM)
5171 {
5172 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5173 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5174 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5175 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5176 }
5177 else
5178 {
5179 pFpuRes->r80Result = *pr80Val1;
5180 fFsw |= X86_FSW_ES | X86_FSW_B;
5181 }
5182 fFsw |= X86_FSW_DE;
5183 }
5184 /* SoftFloat can handle the rest: */
5185 else
5186 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5187
5188 pFpuRes->FSW = fFsw;
5189}
5190
5191
5192EMIT_R80_BY_R64(iemAImpl_fdivr_r80_by_r64, iemAImpl_fdivr_r80_by_r80, RTFLOAT80U_IS_ZERO(pr80Val1))
5193EMIT_R80_BY_R32(iemAImpl_fdivr_r80_by_r32, iemAImpl_fdivr_r80_by_r80, RTFLOAT80U_IS_ZERO(pr80Val1))
5194EMIT_R80_BY_I32(iemAImpl_fidivr_r80_by_i32, iemAImpl_fdivr_r80_by_r80)
5195EMIT_R80_BY_I16(iemAImpl_fidivr_r80_by_i16, iemAImpl_fdivr_r80_by_r80)
5196
5197
5198/** Worker for iemAImpl_fprem_r80_by_r80 & iemAImpl_fprem1_r80_by_r80. */
5199static uint16_t iemAImpl_fprem_fprem1_r80_by_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5200 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org, bool fLegacyInstr)
5201{
5202 if (!RTFLOAT80U_IS_ZERO(pr80Val2) || RTFLOAT80U_IS_NAN(pr80Val1) || RTFLOAT80U_IS_INF(pr80Val1))
5203 {
5204 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5205 uint16_t fCxFlags = 0;
5206 extFloat80_t r80XResult = extF80_partialRem(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2),
5207 fLegacyInstr ? softfloat_round_minMag : softfloat_round_near_even,
5208 &fCxFlags, &SoftState);
5209 Assert(!(fCxFlags & ~X86_FSW_C_MASK));
5210 fFsw = iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5211 if ( !(fFsw & X86_FSW_IE)
5212 && !RTFLOAT80U_IS_NAN(pr80Result)
5213 && !RTFLOAT80U_IS_INDEFINITE(pr80Result))
5214 {
5215 fFsw &= ~(uint16_t)X86_FSW_C_MASK;
5216 fFsw |= fCxFlags & X86_FSW_C_MASK;
5217 }
5218 return fFsw;
5219 }
5220
5221 /* Invalid operand */
5222 if (fFcw & X86_FCW_IM)
5223 *pr80Result = g_r80Indefinite;
5224 else
5225 {
5226 *pr80Result = *pr80Val1Org;
5227 fFsw |= X86_FSW_ES | X86_FSW_B;
5228 }
5229 return fFsw | X86_FSW_IE;
5230}
5231
5232
5233static void iemAImpl_fprem_fprem1_r80_by_r80(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5234 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fLegacyInstr)
5235{
5236 uint16_t const fFcw = pFpuState->FCW;
5237 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 /*| X86_FSW_C2*/ | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5238
5239 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals.
5240 In addition, we'd like to handle zero ST(1) now as SoftFloat returns Inf instead
5241 of Indefinite. (Note! There is no #Z like the footnotes to tables 3-31 and 3-32
5242 for the FPREM1 & FPREM1 instructions in the intel reference manual claims!) */
5243 if ( RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2)
5244 || (RTFLOAT80U_IS_ZERO(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1) && !RTFLOAT80U_IS_INDEFINITE(pr80Val1)))
5245 {
5246 if (fFcw & X86_FCW_IM)
5247 pFpuRes->r80Result = g_r80Indefinite;
5248 else
5249 {
5250 pFpuRes->r80Result = *pr80Val1;
5251 fFsw |= X86_FSW_ES | X86_FSW_B;
5252 }
5253 fFsw |= X86_FSW_IE;
5254 }
5255 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs & /0 trumps denormals. */
5256 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2) && !RTFLOAT80U_IS_ZERO(pr80Val2))
5257 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1) && !RTFLOAT80U_IS_INF(pr80Val1)) )
5258 {
5259 if (fFcw & X86_FCW_DM)
5260 {
5261 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5262 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5263 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5264 fFsw = iemAImpl_fprem_fprem1_r80_by_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw,
5265 pr80Val1Org, fLegacyInstr);
5266 }
5267 else
5268 {
5269 pFpuRes->r80Result = *pr80Val1;
5270 fFsw |= X86_FSW_ES | X86_FSW_B;
5271 }
5272 fFsw |= X86_FSW_DE;
5273 }
5274 /* SoftFloat can handle the rest: */
5275 else
5276 fFsw = iemAImpl_fprem_fprem1_r80_by_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw,
5277 pr80Val1, fLegacyInstr);
5278
5279 pFpuRes->FSW = fFsw;
5280}
5281
5282
5283IEM_DECL_IMPL_DEF(void, iemAImpl_fprem_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5284 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5285{
5286 iemAImpl_fprem_fprem1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2, true /*fLegacyInstr*/);
5287}
5288
5289
5290IEM_DECL_IMPL_DEF(void, iemAImpl_fprem1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5291 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5292{
5293 iemAImpl_fprem_fprem1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2, false /*fLegacyInstr*/);
5294}
5295
5296
5297/*********************************************************************************************************************************
5298* x87 FPU Multiplication Operations *
5299*********************************************************************************************************************************/
5300
5301/** Worker for iemAImpl_fmul_r80_by_r80. */
5302static uint16_t iemAImpl_fmul_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5303 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
5304{
5305 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5306 extFloat80_t r80XResult = extF80_mul(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
5307 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5308}
5309
5310
5311IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5312 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5313{
5314 uint16_t const fFcw = pFpuState->FCW;
5315 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5316
5317 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5318 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5319 {
5320 if (fFcw & X86_FCW_IM)
5321 pFpuRes->r80Result = g_r80Indefinite;
5322 else
5323 {
5324 pFpuRes->r80Result = *pr80Val1;
5325 fFsw |= X86_FSW_ES | X86_FSW_B;
5326 }
5327 fFsw |= X86_FSW_IE;
5328 }
5329 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5330 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5331 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5332 {
5333 if (fFcw & X86_FCW_DM)
5334 {
5335 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5336 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5337 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5338 fFsw = iemAImpl_fmul_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5339 }
5340 else
5341 {
5342 pFpuRes->r80Result = *pr80Val1;
5343 fFsw |= X86_FSW_ES | X86_FSW_B;
5344 }
5345 fFsw |= X86_FSW_DE;
5346 }
5347 /* SoftFloat can handle the rest: */
5348 else
5349 fFsw = iemAImpl_fmul_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5350
5351 pFpuRes->FSW = fFsw;
5352}
5353
5354
5355EMIT_R80_BY_R64(iemAImpl_fmul_r80_by_r64, iemAImpl_fmul_r80_by_r80, 0)
5356EMIT_R80_BY_R32(iemAImpl_fmul_r80_by_r32, iemAImpl_fmul_r80_by_r80, 0)
5357EMIT_R80_BY_I32(iemAImpl_fimul_r80_by_i32, iemAImpl_fmul_r80_by_r80)
5358EMIT_R80_BY_I16(iemAImpl_fimul_r80_by_i16, iemAImpl_fmul_r80_by_r80)
5359
5360
5361/*********************************************************************************************************************************
5362* x87 FPU Addition *
5363*********************************************************************************************************************************/
5364
5365/** Worker for iemAImpl_fadd_r80_by_r80. */
5366static uint16_t iemAImpl_fadd_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5367 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
5368{
5369 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5370 extFloat80_t r80XResult = extF80_add(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
5371 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5372}
5373
5374
5375IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5376 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5377{
5378 uint16_t const fFcw = pFpuState->FCW;
5379 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5380
5381 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5382 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5383 {
5384 if (fFcw & X86_FCW_IM)
5385 pFpuRes->r80Result = g_r80Indefinite;
5386 else
5387 {
5388 pFpuRes->r80Result = *pr80Val1;
5389 fFsw |= X86_FSW_ES | X86_FSW_B;
5390 }
5391 fFsw |= X86_FSW_IE;
5392 }
5393 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5394 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5395 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5396 {
5397 if (fFcw & X86_FCW_DM)
5398 {
5399 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5400 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5401 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5402 fFsw = iemAImpl_fadd_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5403 }
5404 else
5405 {
5406 pFpuRes->r80Result = *pr80Val1;
5407 fFsw |= X86_FSW_ES | X86_FSW_B;
5408 }
5409 fFsw |= X86_FSW_DE;
5410 }
5411 /* SoftFloat can handle the rest: */
5412 else
5413 fFsw = iemAImpl_fadd_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5414
5415 pFpuRes->FSW = fFsw;
5416}
5417
5418
5419EMIT_R80_BY_R64(iemAImpl_fadd_r80_by_r64, iemAImpl_fadd_r80_by_r80, 0)
5420EMIT_R80_BY_R32(iemAImpl_fadd_r80_by_r32, iemAImpl_fadd_r80_by_r80, 0)
5421EMIT_R80_BY_I32(iemAImpl_fiadd_r80_by_i32, iemAImpl_fadd_r80_by_r80)
5422EMIT_R80_BY_I16(iemAImpl_fiadd_r80_by_i16, iemAImpl_fadd_r80_by_r80)
5423
5424
5425/*********************************************************************************************************************************
5426* x87 FPU Subtraction *
5427*********************************************************************************************************************************/
5428
5429/** Worker for iemAImpl_fsub_r80_by_r80 and iemAImpl_fsubr_r80_by_r80. */
5430static uint16_t iemAImpl_fsub_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5431 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
5432{
5433 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5434 extFloat80_t r80XResult = extF80_sub(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
5435 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5436}
5437
5438
5439IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5440 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5441{
5442 uint16_t const fFcw = pFpuState->FCW;
5443 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5444
5445 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5446 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5447 {
5448 if (fFcw & X86_FCW_IM)
5449 pFpuRes->r80Result = g_r80Indefinite;
5450 else
5451 {
5452 pFpuRes->r80Result = *pr80Val1;
5453 fFsw |= X86_FSW_ES | X86_FSW_B;
5454 }
5455 fFsw |= X86_FSW_IE;
5456 }
5457 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5458 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5459 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5460 {
5461 if (fFcw & X86_FCW_DM)
5462 {
5463 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5464 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5465 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5466 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5467 }
5468 else
5469 {
5470 pFpuRes->r80Result = *pr80Val1;
5471 fFsw |= X86_FSW_ES | X86_FSW_B;
5472 }
5473 fFsw |= X86_FSW_DE;
5474 }
5475 /* SoftFloat can handle the rest: */
5476 else
5477 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5478
5479 pFpuRes->FSW = fFsw;
5480}
5481
5482
5483EMIT_R80_BY_R64(iemAImpl_fsub_r80_by_r64, iemAImpl_fsub_r80_by_r80, 0)
5484EMIT_R80_BY_R32(iemAImpl_fsub_r80_by_r32, iemAImpl_fsub_r80_by_r80, 0)
5485EMIT_R80_BY_I32(iemAImpl_fisub_r80_by_i32, iemAImpl_fsub_r80_by_r80)
5486EMIT_R80_BY_I16(iemAImpl_fisub_r80_by_i16, iemAImpl_fsub_r80_by_r80)
5487
5488
5489/* Same as iemAImpl_fsub_r80_by_r80, but with input operands switched. */
5490IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5491 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5492{
5493 uint16_t const fFcw = pFpuState->FCW;
5494 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5495
5496 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5497 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5498 {
5499 if (fFcw & X86_FCW_IM)
5500 pFpuRes->r80Result = g_r80Indefinite;
5501 else
5502 {
5503 pFpuRes->r80Result = *pr80Val1;
5504 fFsw |= X86_FSW_ES | X86_FSW_B;
5505 }
5506 fFsw |= X86_FSW_IE;
5507 }
5508 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5509 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5510 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5511 {
5512 if (fFcw & X86_FCW_DM)
5513 {
5514 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5515 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5516 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5517 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5518 }
5519 else
5520 {
5521 pFpuRes->r80Result = *pr80Val1;
5522 fFsw |= X86_FSW_ES | X86_FSW_B;
5523 }
5524 fFsw |= X86_FSW_DE;
5525 }
5526 /* SoftFloat can handle the rest: */
5527 else
5528 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5529
5530 pFpuRes->FSW = fFsw;
5531}
5532
5533
5534EMIT_R80_BY_R64(iemAImpl_fsubr_r80_by_r64, iemAImpl_fsubr_r80_by_r80, 0)
5535EMIT_R80_BY_R32(iemAImpl_fsubr_r80_by_r32, iemAImpl_fsubr_r80_by_r80, 0)
5536EMIT_R80_BY_I32(iemAImpl_fisubr_r80_by_i32, iemAImpl_fsubr_r80_by_r80)
5537EMIT_R80_BY_I16(iemAImpl_fisubr_r80_by_i16, iemAImpl_fsubr_r80_by_r80)
5538
5539
5540/*********************************************************************************************************************************
5541* x87 FPU Trigometric Operations *
5542*********************************************************************************************************************************/
5543
5544
5545IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5546 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5547{
5548 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5549 AssertReleaseFailed();
5550}
5551
5552#endif /* IEM_WITHOUT_ASSEMBLY */
5553
5554IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5555 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5556{
5557 iemAImpl_fpatan_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5558}
5559
5560IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5561 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5562{
5563 iemAImpl_fpatan_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5564}
5565
5566
5567#if defined(IEM_WITHOUT_ASSEMBLY)
5568IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5569{
5570 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
5571 AssertReleaseFailed();
5572}
5573#endif /* IEM_WITHOUT_ASSEMBLY */
5574
5575IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5576{
5577 iemAImpl_fptan_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5578}
5579
5580IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5581{
5582 iemAImpl_fptan_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5583}
5584
5585
5586#ifdef IEM_WITHOUT_ASSEMBLY
5587IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5588{
5589 RT_NOREF(pFpuState, pFpuRes, pr80Val);
5590 AssertReleaseFailed();
5591}
5592#endif /* IEM_WITHOUT_ASSEMBLY */
5593
5594IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5595{
5596 iemAImpl_fsin_r80(pFpuState, pFpuRes, pr80Val);
5597}
5598
5599IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5600{
5601 iemAImpl_fsin_r80(pFpuState, pFpuRes, pr80Val);
5602}
5603
5604#ifdef IEM_WITHOUT_ASSEMBLY
5605IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5606{
5607 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
5608 AssertReleaseFailed();
5609}
5610#endif /* IEM_WITHOUT_ASSEMBLY */
5611
5612IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5613{
5614 iemAImpl_fsincos_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5615}
5616
5617IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5618{
5619 iemAImpl_fsincos_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5620}
5621
5622
5623#ifdef IEM_WITHOUT_ASSEMBLY
5624IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5625{
5626 RT_NOREF(pFpuState, pFpuRes, pr80Val);
5627 AssertReleaseFailed();
5628}
5629#endif /* IEM_WITHOUT_ASSEMBLY */
5630
5631IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5632{
5633 iemAImpl_fcos_r80(pFpuState, pFpuRes, pr80Val);
5634}
5635
5636IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5637{
5638 iemAImpl_fcos_r80(pFpuState, pFpuRes, pr80Val);
5639}
5640
5641#ifdef IEM_WITHOUT_ASSEMBLY
5642
5643
5644/*********************************************************************************************************************************
5645* x87 FPU Compare and Testing Operations *
5646*********************************************************************************************************************************/
5647
5648IEM_DECL_IMPL_DEF(void, iemAImpl_ftst_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
5649{
5650 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT);
5651
5652 if (RTFLOAT80U_IS_ZERO(pr80Val))
5653 fFsw |= X86_FSW_C3;
5654 else if (RTFLOAT80U_IS_NORMAL(pr80Val) || RTFLOAT80U_IS_INF(pr80Val))
5655 fFsw |= pr80Val->s.fSign ? X86_FSW_C0 : 0;
5656 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
5657 {
5658 fFsw |= pr80Val->s.fSign ? X86_FSW_C0 | X86_FSW_DE : X86_FSW_DE;
5659 if (!(pFpuState->FCW & X86_FCW_DM))
5660 fFsw |= X86_FSW_ES | X86_FSW_B;
5661 }
5662 else
5663 {
5664 fFsw |= X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3 | X86_FSW_IE;
5665 if (!(pFpuState->FCW & X86_FCW_IM))
5666 fFsw |= X86_FSW_ES | X86_FSW_B;
5667 }
5668
5669 *pu16Fsw = fFsw;
5670}
5671
5672
5673IEM_DECL_IMPL_DEF(void, iemAImpl_fxam_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
5674{
5675 RT_NOREF(pFpuState);
5676 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT);
5677
5678 /* C1 = sign bit (always, even if empty Intel says). */
5679 if (pr80Val->s.fSign)
5680 fFsw |= X86_FSW_C1;
5681
5682 /* Classify the value in C0, C2, C3. */
5683 if (!(pFpuState->FTW & RT_BIT_32(X86_FSW_TOP_GET(pFpuState->FSW))))
5684 fFsw |= X86_FSW_C0 | X86_FSW_C3; /* empty */
5685 else if (RTFLOAT80U_IS_NORMAL(pr80Val))
5686 fFsw |= X86_FSW_C2;
5687 else if (RTFLOAT80U_IS_ZERO(pr80Val))
5688 fFsw |= X86_FSW_C3;
5689 else if (RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(pr80Val))
5690 fFsw |= X86_FSW_C0;
5691 else if (RTFLOAT80U_IS_INF(pr80Val))
5692 fFsw |= X86_FSW_C0 | X86_FSW_C2;
5693 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
5694 fFsw |= X86_FSW_C2 | X86_FSW_C3;
5695 /* whatever else: 0 */
5696
5697 *pu16Fsw = fFsw;
5698}
5699
5700
5701/**
5702 * Worker for fcom, fucom, and friends.
5703 */
5704static uint16_t iemAImpl_fcom_r80_by_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2,
5705 uint16_t fFcw, uint16_t fFsw, bool fIeOnAllNaNs)
5706{
5707 /*
5708 * Unpack the values.
5709 */
5710 bool const fSign1 = pr80Val1->s.fSign;
5711 int32_t iExponent1 = pr80Val1->s.uExponent;
5712 uint64_t uMantissa1 = pr80Val1->s.uMantissa;
5713
5714 bool const fSign2 = pr80Val2->s.fSign;
5715 int32_t iExponent2 = pr80Val2->s.uExponent;
5716 uint64_t uMantissa2 = pr80Val2->s.uMantissa;
5717
5718 /*
5719 * Check for invalid inputs.
5720 */
5721 if ( RTFLOAT80U_IS_387_INVALID_EX(uMantissa1, iExponent1)
5722 || RTFLOAT80U_IS_387_INVALID_EX(uMantissa2, iExponent2))
5723 {
5724 if (!(fFcw & X86_FCW_IM))
5725 fFsw |= X86_FSW_ES | X86_FSW_B;
5726 return fFsw | X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3 | X86_FSW_IE;
5727 }
5728
5729 /*
5730 * Check for NaNs and indefinites, they are all unordered and trumps #DE.
5731 */
5732 if ( RTFLOAT80U_IS_INDEFINITE_OR_QUIET_OR_SIGNALLING_NAN_EX(uMantissa1, iExponent1)
5733 || RTFLOAT80U_IS_INDEFINITE_OR_QUIET_OR_SIGNALLING_NAN_EX(uMantissa2, iExponent2))
5734 {
5735 if ( fIeOnAllNaNs
5736 || RTFLOAT80U_IS_SIGNALLING_NAN_EX(uMantissa1, iExponent1)
5737 || RTFLOAT80U_IS_SIGNALLING_NAN_EX(uMantissa2, iExponent2))
5738 {
5739 fFsw |= X86_FSW_IE;
5740 if (!(fFcw & X86_FCW_IM))
5741 fFsw |= X86_FSW_ES | X86_FSW_B;
5742 }
5743 return fFsw | X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3;
5744 }
5745
5746 /*
5747 * Normalize the values.
5748 */
5749 if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL_EX(uMantissa1, iExponent1))
5750 {
5751 if (RTFLOAT80U_IS_PSEUDO_DENORMAL_EX(uMantissa1, iExponent1))
5752 iExponent1 = 1;
5753 else
5754 {
5755 iExponent1 = 64 - ASMBitLastSetU64(uMantissa1);
5756 uMantissa1 <<= iExponent1;
5757 iExponent1 = 1 - iExponent1;
5758 }
5759 fFsw |= X86_FSW_DE;
5760 if (!(fFcw & X86_FCW_DM))
5761 fFsw |= X86_FSW_ES | X86_FSW_B;
5762 }
5763
5764 if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL_EX(uMantissa2, iExponent2))
5765 {
5766 if (RTFLOAT80U_IS_PSEUDO_DENORMAL_EX(uMantissa2, iExponent2))
5767 iExponent2 = 1;
5768 else
5769 {
5770 iExponent2 = 64 - ASMBitLastSetU64(uMantissa2);
5771 uMantissa2 <<= iExponent2;
5772 iExponent2 = 1 - iExponent2;
5773 }
5774 fFsw |= X86_FSW_DE;
5775 if (!(fFcw & X86_FCW_DM))
5776 fFsw |= X86_FSW_ES | X86_FSW_B;
5777 }
5778
5779 /*
5780 * Test if equal (val1 == val2):
5781 */
5782 if ( uMantissa1 == uMantissa2
5783 && iExponent1 == iExponent2
5784 && ( fSign1 == fSign2
5785 || (uMantissa1 == 0 && iExponent1 == 0) /* ignore sign for zero */ ) )
5786 fFsw |= X86_FSW_C3;
5787 /*
5788 * Test if less than (val1 < val2):
5789 */
5790 else if (fSign1 && !fSign2)
5791 fFsw |= X86_FSW_C0;
5792 else if (fSign1 == fSign2)
5793 {
5794 /* Zeros are problematic, however at the most one can be zero here. */
5795 if (RTFLOAT80U_IS_ZERO_EX(uMantissa1, iExponent1))
5796 return !fSign1 ? fFsw | X86_FSW_C0 : fFsw;
5797 if (RTFLOAT80U_IS_ZERO_EX(uMantissa2, iExponent2))
5798 return fSign1 ? fFsw | X86_FSW_C0 : fFsw;
5799
5800 if ( fSign1
5801 ^ ( iExponent1 < iExponent2
5802 || ( iExponent1 == iExponent2
5803 && uMantissa1 < uMantissa2 ) ) )
5804 fFsw |= X86_FSW_C0;
5805 }
5806 /* else: No flags set if greater. */
5807
5808 return fFsw;
5809}
5810
5811
5812IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pfFsw,
5813 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5814{
5815 *pfFsw = iemAImpl_fcom_r80_by_r80_worker(pr80Val1, pr80Val2, pFpuState->FCW, 6 << X86_FSW_TOP_SHIFT, true /*fIeOnAllNaNs*/);
5816}
5817
5818
5819
5820
5821IEM_DECL_IMPL_DEF(void, iemAImpl_fucom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pfFsw,
5822 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5823{
5824 *pfFsw = iemAImpl_fcom_r80_by_r80_worker(pr80Val1, pr80Val2, pFpuState->FCW, 6 << X86_FSW_TOP_SHIFT, false /*fIeOnAllNaNs*/);
5825}
5826
5827
5828IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r64,(PCX86FXSTATE pFpuState, uint16_t *pfFsw,
5829 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5830{
5831 RTFLOAT80U r80Val2;
5832 uint16_t fFsw = iemAImplConvertR64ToR80(pr64Val2, &r80Val2);
5833 Assert(!fFsw || fFsw == X86_FSW_DE);
5834 *pfFsw = iemAImpl_fcom_r80_by_r80_worker(pr80Val1, &r80Val2, pFpuState->FCW, 7 << X86_FSW_TOP_SHIFT, true /*fIeOnAllNaNs*/);
5835 if (fFsw != 0 && !(*pfFsw & X86_FSW_IE))
5836 {
5837 if (!(pFpuState->FCW & X86_FCW_DM))
5838 fFsw |= X86_FSW_ES | X86_FSW_B;
5839 *pfFsw |= fFsw;
5840 }
5841}
5842
5843
5844IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r32,(PCX86FXSTATE pFpuState, uint16_t *pfFsw,
5845 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5846{
5847 RTFLOAT80U r80Val2;
5848 uint16_t fFsw = iemAImplConvertR32ToR80(pr32Val2, &r80Val2);
5849 Assert(!fFsw || fFsw == X86_FSW_DE);
5850 *pfFsw = iemAImpl_fcom_r80_by_r80_worker(pr80Val1, &r80Val2, pFpuState->FCW, 7 << X86_FSW_TOP_SHIFT, true /*fIeOnAllNaNs*/);
5851 if (fFsw != 0 && !(*pfFsw & X86_FSW_IE))
5852 {
5853 if (!(pFpuState->FCW & X86_FCW_DM))
5854 fFsw |= X86_FSW_ES | X86_FSW_B;
5855 *pfFsw |= fFsw;
5856 }
5857}
5858
5859
5860IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i32,(PCX86FXSTATE pFpuState, uint16_t *pfFsw,
5861 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5862{
5863 RTFLOAT80U r80Val2;
5864 iemAImpl_fcom_r80_by_r80(pFpuState, pfFsw, pr80Val1, iemAImplConvertI32ToR80(*pi32Val2, &r80Val2));
5865 *pfFsw = (*pfFsw & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT);
5866}
5867
5868
5869IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i16,(PCX86FXSTATE pFpuState, uint16_t *pfFsw,
5870 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5871{
5872 RTFLOAT80U r80Val2;
5873 iemAImpl_fcom_r80_by_r80(pFpuState, pfFsw, pr80Val1, iemAImplConvertI16ToR80(*pi16Val2, &r80Val2));
5874 *pfFsw = (*pfFsw & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT);
5875}
5876
5877
5878/**
5879 * Worker for fcomi & fucomi.
5880 */
5881static uint32_t iemAImpl_fcomi_r80_by_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2,
5882 uint16_t fFcw, uint16_t fFswIn, bool fIeOnAllNaNs, uint16_t *pfFsw)
5883{
5884 uint16_t fFsw = iemAImpl_fcom_r80_by_r80_worker(pr80Val1, pr80Val2, fFcw, 6 << X86_FSW_TOP_SHIFT, fIeOnAllNaNs);
5885 uint32_t fEflags = ((fFsw & X86_FSW_C3) >> (X86_FSW_C3_BIT - X86_EFL_ZF_BIT))
5886 | ((fFsw & X86_FSW_C2) >> (X86_FSW_C2_BIT - X86_EFL_PF_BIT))
5887 | ((fFsw & X86_FSW_C0) >> (X86_FSW_C0_BIT - X86_EFL_CF_BIT));
5888
5889 /* Note! C1 is not cleared as per docs! Everything is preserved. */
5890 *pfFsw = (fFsw & ~X86_FSW_C_MASK) | (fFswIn & X86_FSW_C_MASK);
5891 return fEflags | X86_EFL_IF | X86_EFL_RA1_MASK;
5892}
5893
5894
5895IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fcomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pfFsw,
5896 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5897{
5898 return iemAImpl_fcomi_r80_by_r80_worker(pr80Val1, pr80Val2, pFpuState->FCW, pFpuState->FSW, true /*fIeOnAllNaNs*/, pfFsw);
5899}
5900
5901
5902IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fucomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pfFsw,
5903 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5904{
5905 return iemAImpl_fcomi_r80_by_r80_worker(pr80Val1, pr80Val2, pFpuState->FCW, pFpuState->FSW, false /*fIeOnAllNaNs*/, pfFsw);
5906}
5907
5908
5909/*********************************************************************************************************************************
5910* x87 FPU Other Operations *
5911*********************************************************************************************************************************/
5912
5913/**
5914 * Helper for iemAImpl_frndint_r80, called both on normal and denormal numbers.
5915 */
5916static uint16_t iemAImpl_frndint_r80_normal(PCRTFLOAT80U pr80Val, PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw)
5917{
5918 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5919 iemFpuSoftF80ToIprt(pr80Result, extF80_roundToInt(iemFpuSoftF80FromIprt(pr80Val), SoftState.roundingMode,
5920 true /*exact / generate #PE */, &SoftState));
5921 return IEM_SOFTFLOAT_STATE_TO_FSW(fFsw, &SoftState, fFcw);
5922}
5923
5924
5925IEM_DECL_IMPL_DEF(void, iemAImpl_frndint_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5926{
5927 uint16_t const fFcw = pFpuState->FCW;
5928 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5929
5930 if (RTFLOAT80U_IS_NORMAL(pr80Val))
5931 fFsw = iemAImpl_frndint_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5932 else if ( RTFLOAT80U_IS_ZERO(pr80Val)
5933 || RTFLOAT80U_IS_QUIET_NAN(pr80Val)
5934 || RTFLOAT80U_IS_INDEFINITE(pr80Val)
5935 || RTFLOAT80U_IS_INF(pr80Val))
5936 pFpuRes->r80Result = *pr80Val;
5937 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
5938 {
5939 fFsw |= X86_FSW_DE;
5940 if (fFcw & X86_FCW_DM)
5941 fFsw = iemAImpl_frndint_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5942 else
5943 {
5944 pFpuRes->r80Result = *pr80Val;
5945 fFsw |= X86_FSW_ES | X86_FSW_B;
5946 }
5947 }
5948 else
5949 {
5950 if (fFcw & X86_FCW_IM)
5951 {
5952 if (!RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val))
5953 pFpuRes->r80Result = g_r80Indefinite;
5954 else
5955 {
5956 pFpuRes->r80Result = *pr80Val;
5957 pFpuRes->r80Result.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
5958 }
5959 }
5960 else
5961 {
5962 pFpuRes->r80Result = *pr80Val;
5963 fFsw |= X86_FSW_ES | X86_FSW_B;
5964 }
5965 fFsw |= X86_FSW_IE;
5966 }
5967 pFpuRes->FSW = fFsw;
5968}
5969
5970
5971IEM_DECL_IMPL_DEF(void, iemAImpl_fscale_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5972 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5973{
5974 /* The SoftFloat worker function extF80_scale_extF80 is of our creation, so
5975 it does everything we need it to do. */
5976 uint16_t const fFcw = pFpuState->FCW;
5977 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5978 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5979 extFloat80_t r80XResult = extF80_scale_extF80(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
5980 pFpuRes->FSW = iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5981}
5982
5983
5984/**
5985 * Helper for iemAImpl_fsqrt_r80, called both on normal and denormal numbers.
5986 */
5987static uint16_t iemAImpl_fsqrt_r80_normal(PCRTFLOAT80U pr80Val, PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw)
5988{
5989 Assert(!pr80Val->s.fSign);
5990 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5991 iemFpuSoftF80ToIprt(pr80Result, extF80_sqrt(iemFpuSoftF80FromIprt(pr80Val), &SoftState));
5992 return IEM_SOFTFLOAT_STATE_TO_FSW(fFsw, &SoftState, fFcw);
5993}
5994
5995
5996IEM_DECL_IMPL_DEF(void, iemAImpl_fsqrt_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5997{
5998 uint16_t const fFcw = pFpuState->FCW;
5999 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
6000
6001 if (RTFLOAT80U_IS_NORMAL(pr80Val) && !pr80Val->s.fSign)
6002 fFsw = iemAImpl_fsqrt_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
6003 else if ( RTFLOAT80U_IS_ZERO(pr80Val)
6004 || RTFLOAT80U_IS_QUIET_NAN(pr80Val)
6005 || RTFLOAT80U_IS_INDEFINITE(pr80Val)
6006 || (RTFLOAT80U_IS_INF(pr80Val) && !pr80Val->s.fSign))
6007 pFpuRes->r80Result = *pr80Val;
6008 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val) && !pr80Val->s.fSign) /* Negative denormals only generate #IE! */
6009 {
6010 fFsw |= X86_FSW_DE;
6011 if (fFcw & X86_FCW_DM)
6012 fFsw = iemAImpl_fsqrt_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
6013 else
6014 {
6015 pFpuRes->r80Result = *pr80Val;
6016 fFsw |= X86_FSW_ES | X86_FSW_B;
6017 }
6018 }
6019 else
6020 {
6021 if (fFcw & X86_FCW_IM)
6022 {
6023 if (!RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val))
6024 pFpuRes->r80Result = g_r80Indefinite;
6025 else
6026 {
6027 pFpuRes->r80Result = *pr80Val;
6028 pFpuRes->r80Result.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
6029 }
6030 }
6031 else
6032 {
6033 pFpuRes->r80Result = *pr80Val;
6034 fFsw |= X86_FSW_ES | X86_FSW_B;
6035 }
6036 fFsw |= X86_FSW_IE;
6037 }
6038 pFpuRes->FSW = fFsw;
6039}
6040
6041
6042/**
6043 * @code{.unparsed}
6044 * x x * ln2
6045 * f(x) = 2 - 1 = e - 1
6046 *
6047 * @endcode
6048 *
6049 * We can approximate e^x by a Taylor/Maclaurin series (see
6050 * https://en.wikipedia.org/wiki/Taylor_series#Exponential_function):
6051 * @code{.unparsed}
6052 * n 0 1 2 3 4
6053 * inf x x x x x x
6054 * SUM ----- = --- + --- + --- + --- + --- + ...
6055 * n=0 n! 0! 1! 2! 3! 4!
6056 *
6057 * 2 3 4
6058 * x x x
6059 * = 1 + x + --- + --- + --- + ...
6060 * 2! 3! 4!
6061 * @endcode
6062 *
6063 * Given z = x * ln2, we get:
6064 * @code{.unparsed}
6065 * 2 3 4 n
6066 * z z z z z
6067 * e - 1 = z + --- + --- + --- + ... + ---
6068 * 2! 3! 4! n!
6069 * @endcode
6070 *
6071 * Wanting to use Horner's method, we move one z outside and get:
6072 * @code{.unparsed}
6073 * 2 3 (n-1)
6074 * z z z z
6075 * = z ( 1 + --- + --- + --- + ... + ------- )
6076 * 2! 3! 4! n!
6077 * @endcode
6078 *
6079 * The constants we need for using Horner's methods are 1 and 1 / n!.
6080 *
6081 * For very tiny x values, we can get away with f(x) = x * ln 2, because
6082 * because we don't have the necessary precision to represent 1.0 + z/3 + ...
6083 * and can approximate it to be 1.0. For a visual demonstration of this
6084 * check out https://www.desmos.com/calculator/vidcdxizd9 (for as long
6085 * as it valid), plotting f(x) = 2^x - 1 and f(x) = x * ln2.
6086 *
6087 *
6088 * As constant accuracy goes, figure 0.1 "80387 Block Diagram" in the "80387
6089 * Data Sheet" (order 231920-002; Appendix E in 80387 PRM 231917-001; Military
6090 * i387SX 271166-002), indicates that constants are 67-bit (constant rom block)
6091 * and the internal mantissa size is 68-bit (mantissa adder & barrel shifter
6092 * blocks). (The one bit difference is probably an implicit one missing from
6093 * the constant ROM.) A paper on division and sqrt on the AMD-K7 by Stuart F.
6094 * Oberman states that it internally used a 68 bit mantissa with a 18-bit
6095 * exponent.
6096 *
6097 * However, even when sticking to 67 constants / 68 mantissas, I have not yet
6098 * successfully reproduced the exact results from an Intel 10980XE, there is
6099 * always a portition of rounding differences. Not going to spend too much time
6100 * on getting this 100% the same, at least not now.
6101 *
6102 * P.S. If someone are really curious about 8087 and its contstants:
6103 * http://www.righto.com/2020/05/extracting-rom-constants-from-8087-math.html
6104 *
6105 *
6106 * @param pr80Val The exponent value (x), less than 1.0, greater than
6107 * -1.0 and not zero. This can be a normal, denormal
6108 * or pseudo-denormal value.
6109 * @param pr80Result Where to return the result.
6110 * @param fFcw FPU control word.
6111 * @param fFsw FPU status word.
6112 */
6113static uint16_t iemAImpl_f2xm1_r80_normal(PCRTFLOAT80U pr80Val, PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw)
6114{
6115 /* As mentioned above, we can skip the expensive polynomial calculation
6116 as it will be close enough to 1.0 that it makes no difference.
6117
6118 The cutoff point for intel 10980XE is exponents >= -69. Intel
6119 also seems to be using a 67-bit or 68-bit constant value, and we get
6120 a smattering of rounding differences if we go for higher precision. */
6121 if (pr80Val->s.uExponent <= RTFLOAT80U_EXP_BIAS - 69)
6122 {
6123 RTUINT256U u256;
6124 RTUInt128MulByU64Ex(&u256, &g_u128Ln2MantissaIntel, pr80Val->s.uMantissa);
6125 u256.QWords.qw0 |= 1; /* force #PE */
6126 fFsw = iemFpuFloat80RoundAndComposeFrom192(pr80Result, pr80Val->s.fSign, &u256,
6127 !RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) && !RTFLOAT80U_IS_DENORMAL(pr80Val)
6128 ? (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS
6129 : 1 - RTFLOAT80U_EXP_BIAS,
6130 fFcw, fFsw);
6131 }
6132 else
6133 {
6134#ifdef IEM_WITH_FLOAT128_FOR_FPU
6135 /* This approach is not good enough for small values, we end up with zero. */
6136 int const fOldRounding = iemFpuF128SetRounding(fFcw);
6137 _Float128 rd128Val = iemFpuF128FromFloat80(pr80Val, fFcw);
6138 _Float128 rd128Result = powf128(2.0L, rd128Val);
6139 rd128Result -= 1.0L;
6140 fFsw = iemFpuF128ToFloat80(pr80Result, rd128Result, fFcw, fFsw);
6141 iemFpuF128RestoreRounding(fOldRounding);
6142
6143# else
6144 softfloat_state_t SoftState = SOFTFLOAT_STATE_INIT_DEFAULTS();
6145 float128_t const x = iemFpuSoftF128FromFloat80(pr80Val);
6146
6147 /* As mentioned above, enforce 68-bit internal mantissa width to better
6148 match the Intel 10980XE results. */
6149 unsigned const cPrecision = 68;
6150
6151 /* first calculate z = x * ln2 */
6152 float128_t z = iemFpuSoftF128Precision(f128_mul(x, iemFpuSoftF128PrecisionIprt(&g_r128Ln2, cPrecision), &SoftState),
6153 cPrecision);
6154
6155 /* Then do the polynomial evaluation. */
6156 float128_t r = iemFpuSoftF128HornerPoly(z, g_ar128F2xm1HornerConsts, RT_ELEMENTS(g_ar128F2xm1HornerConsts),
6157 cPrecision, &SoftState);
6158 r = f128_mul(z, r, &SoftState);
6159
6160 /* Output the result. */
6161 fFsw = iemFpuSoftF128ToFloat80(pr80Result, r, fFcw, fFsw);
6162# endif
6163 }
6164 return fFsw;
6165}
6166
6167
6168IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
6169{
6170 uint16_t const fFcw = pFpuState->FCW;
6171 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
6172
6173 if (RTFLOAT80U_IS_NORMAL(pr80Val))
6174 {
6175 if (pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS)
6176 fFsw = iemAImpl_f2xm1_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
6177 else
6178 {
6179 /* Special case:
6180 2^+1.0 - 1.0 = 1.0
6181 2^-1.0 - 1.0 = -0.5 */
6182 if ( pr80Val->s.uExponent == RTFLOAT80U_EXP_BIAS
6183 && pr80Val->s.uMantissa == RT_BIT_64(63))
6184 {
6185 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
6186 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_BIAS - pr80Val->s.fSign;
6187 pFpuRes->r80Result.s.fSign = pr80Val->s.fSign;
6188 }
6189 /* ST(0) > 1.0 || ST(0) < -1.0: undefined behavior */
6190 /** @todo 287 is documented to only accept values 0 <= ST(0) <= 0.5. */
6191 else
6192 pFpuRes->r80Result = *pr80Val;
6193 fFsw |= X86_FSW_PE;
6194 if (!(fFcw & X86_FCW_PM))
6195 fFsw |= X86_FSW_ES | X86_FSW_B;
6196 }
6197 }
6198 else if ( RTFLOAT80U_IS_ZERO(pr80Val)
6199 || RTFLOAT80U_IS_QUIET_NAN(pr80Val)
6200 || RTFLOAT80U_IS_INDEFINITE(pr80Val))
6201 pFpuRes->r80Result = *pr80Val;
6202 else if (RTFLOAT80U_IS_INF(pr80Val))
6203 pFpuRes->r80Result = pr80Val->s.fSign ? g_ar80One[1] : *pr80Val;
6204 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
6205 {
6206 fFsw |= X86_FSW_DE;
6207 if (fFcw & X86_FCW_DM)
6208 fFsw = iemAImpl_f2xm1_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
6209 else
6210 {
6211 pFpuRes->r80Result = *pr80Val;
6212 fFsw |= X86_FSW_ES | X86_FSW_B;
6213 }
6214 }
6215 else
6216 {
6217 if ( ( RTFLOAT80U_IS_UNNORMAL(pr80Val)
6218 || RTFLOAT80U_IS_PSEUDO_NAN(pr80Val))
6219 && (fFcw & X86_FCW_IM))
6220 pFpuRes->r80Result = g_r80Indefinite;
6221 else
6222 {
6223 pFpuRes->r80Result = *pr80Val;
6224 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val) && (fFcw & X86_FCW_IM))
6225 pFpuRes->r80Result.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
6226 }
6227 fFsw |= X86_FSW_IE;
6228 if (!(fFcw & X86_FCW_IM))
6229 fFsw |= X86_FSW_ES | X86_FSW_B;
6230 }
6231 pFpuRes->FSW = fFsw;
6232}
6233
6234#endif /* IEM_WITHOUT_ASSEMBLY */
6235
6236IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
6237{
6238 iemAImpl_f2xm1_r80(pFpuState, pFpuRes, pr80Val);
6239}
6240
6241IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
6242{
6243 iemAImpl_f2xm1_r80(pFpuState, pFpuRes, pr80Val);
6244}
6245
6246#ifdef IEM_WITHOUT_ASSEMBLY
6247
6248IEM_DECL_IMPL_DEF(void, iemAImpl_fabs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
6249{
6250 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
6251 pFpuRes->r80Result = *pr80Val;
6252 pFpuRes->r80Result.s.fSign = 0;
6253}
6254
6255
6256IEM_DECL_IMPL_DEF(void, iemAImpl_fchs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
6257{
6258 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
6259 pFpuRes->r80Result = *pr80Val;
6260 pFpuRes->r80Result.s.fSign = !pr80Val->s.fSign;
6261}
6262
6263
6264IEM_DECL_IMPL_DEF(void, iemAImpl_fxtract_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
6265{
6266 uint16_t const fFcw = pFpuState->FCW;
6267 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
6268
6269 if (RTFLOAT80U_IS_NORMAL(pr80Val))
6270 {
6271 softfloat_state_t Ignored = SOFTFLOAT_STATE_INIT_DEFAULTS();
6272 iemFpuSoftF80ToIprt(&pFpuResTwo->r80Result1, i32_to_extF80((int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS, &Ignored));
6273
6274 pFpuResTwo->r80Result2.s.fSign = pr80Val->s.fSign;
6275 pFpuResTwo->r80Result2.s.uExponent = RTFLOAT80U_EXP_BIAS;
6276 pFpuResTwo->r80Result2.s.uMantissa = pr80Val->s.uMantissa;
6277 }
6278 else if (RTFLOAT80U_IS_ZERO(pr80Val))
6279 {
6280 fFsw |= X86_FSW_ZE;
6281 if (fFcw & X86_FCW_ZM)
6282 {
6283 pFpuResTwo->r80Result1 = g_ar80Infinity[1];
6284 pFpuResTwo->r80Result2 = *pr80Val;
6285 }
6286 else
6287 {
6288 pFpuResTwo->r80Result2 = *pr80Val;
6289 fFsw = X86_FSW_ES | X86_FSW_B | (fFsw & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT);
6290 }
6291 }
6292 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
6293 {
6294 fFsw |= X86_FSW_DE;
6295 if (fFcw & X86_FCW_DM)
6296 {
6297 pFpuResTwo->r80Result2.s.fSign = pr80Val->s.fSign;
6298 pFpuResTwo->r80Result2.s.uExponent = RTFLOAT80U_EXP_BIAS;
6299 pFpuResTwo->r80Result2.s.uMantissa = pr80Val->s.uMantissa;
6300 int32_t iExponent = -16382;
6301 while (!(pFpuResTwo->r80Result2.s.uMantissa & RT_BIT_64(63)))
6302 {
6303 pFpuResTwo->r80Result2.s.uMantissa <<= 1;
6304 iExponent--;
6305 }
6306
6307 softfloat_state_t Ignored = SOFTFLOAT_STATE_INIT_DEFAULTS();
6308 iemFpuSoftF80ToIprt(&pFpuResTwo->r80Result1, i32_to_extF80(iExponent, &Ignored));
6309 }
6310 else
6311 {
6312 pFpuResTwo->r80Result2 = *pr80Val;
6313 fFsw = X86_FSW_ES | X86_FSW_B | (fFsw & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT);
6314 }
6315 }
6316 else if ( RTFLOAT80U_IS_QUIET_NAN(pr80Val)
6317 || RTFLOAT80U_IS_INDEFINITE(pr80Val))
6318 {
6319 pFpuResTwo->r80Result1 = *pr80Val;
6320 pFpuResTwo->r80Result2 = *pr80Val;
6321 }
6322 else if (RTFLOAT80U_IS_INF(pr80Val))
6323 {
6324 pFpuResTwo->r80Result1 = g_ar80Infinity[0];
6325 pFpuResTwo->r80Result2 = *pr80Val;
6326 }
6327 else
6328 {
6329 if (fFcw & X86_FCW_IM)
6330 {
6331 if (!RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val))
6332 pFpuResTwo->r80Result1 = g_r80Indefinite;
6333 else
6334 {
6335 pFpuResTwo->r80Result1 = *pr80Val;
6336 pFpuResTwo->r80Result1.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
6337 }
6338 pFpuResTwo->r80Result2 = pFpuResTwo->r80Result1;
6339 }
6340 else
6341 {
6342 pFpuResTwo->r80Result2 = *pr80Val;
6343 fFsw = X86_FSW_ES | X86_FSW_B | (fFsw & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT);
6344 }
6345 fFsw |= X86_FSW_IE;
6346 }
6347 pFpuResTwo->FSW = fFsw;
6348}
6349
6350
6351IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6352 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6353{
6354 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6355 AssertReleaseFailed();
6356}
6357
6358#endif /* IEM_WITHOUT_ASSEMBLY */
6359
6360IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6361 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6362{
6363 iemAImpl_fyl2x_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6364}
6365
6366IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6367 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6368{
6369 iemAImpl_fyl2x_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6370}
6371
6372#if defined(IEM_WITHOUT_ASSEMBLY)
6373
6374IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6375 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6376{
6377 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6378 AssertReleaseFailed();
6379}
6380
6381#endif /* IEM_WITHOUT_ASSEMBLY */
6382
6383IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6384 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6385{
6386 iemAImpl_fyl2xp1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6387}
6388
6389IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6390 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6391{
6392 iemAImpl_fyl2xp1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6393}
6394
6395
6396/*********************************************************************************************************************************
6397* MMX, SSE & AVX *
6398*********************************************************************************************************************************/
6399
6400IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
6401{
6402 RT_NOREF(pFpuState);
6403 puDst->au32[0] = puSrc->au32[0];
6404 puDst->au32[1] = puSrc->au32[0];
6405 puDst->au32[2] = puSrc->au32[2];
6406 puDst->au32[3] = puSrc->au32[2];
6407}
6408
6409#ifdef IEM_WITH_VEX
6410
6411IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
6412{
6413 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
6414 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
6415 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
6416 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
6417 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
6418 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
6419 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
6420 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
6421}
6422
6423
6424IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
6425{
6426 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
6427 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
6428 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
6429 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
6430 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
6431 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
6432 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
6433 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
6434}
6435
6436#endif /* IEM_WITH_VEX */
6437
6438
6439IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
6440{
6441 RT_NOREF(pFpuState);
6442 puDst->au32[0] = puSrc->au32[1];
6443 puDst->au32[1] = puSrc->au32[1];
6444 puDst->au32[2] = puSrc->au32[3];
6445 puDst->au32[3] = puSrc->au32[3];
6446}
6447
6448
6449IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
6450{
6451 RT_NOREF(pFpuState);
6452 puDst->au64[0] = uSrc;
6453 puDst->au64[1] = uSrc;
6454}
6455
6456#ifdef IEM_WITH_VEX
6457
6458IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
6459{
6460 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
6461 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
6462 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
6463 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
6464}
6465
6466IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
6467{
6468 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
6469 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
6470 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
6471 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
6472}
6473
6474#endif /* IEM_WITH_VEX */
6475
6476#ifdef IEM_WITHOUT_ASSEMBLY
6477
6478IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6479{
6480 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6481 AssertReleaseFailed();
6482}
6483
6484
6485IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6486{
6487 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6488 AssertReleaseFailed();
6489}
6490
6491
6492IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6493{
6494 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6495 AssertReleaseFailed();
6496}
6497
6498
6499IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6500{
6501 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6502 AssertReleaseFailed();
6503}
6504
6505
6506IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6507{
6508 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6509 AssertReleaseFailed();
6510}
6511
6512
6513IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6514{
6515 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6516 AssertReleaseFailed();
6517}
6518
6519
6520IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6521{
6522 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6523 AssertReleaseFailed();
6524}
6525
6526
6527IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6528{
6529 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6530 AssertReleaseFailed();
6531}
6532
6533
6534IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6535{
6536 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6537 AssertReleaseFailed();
6538
6539}
6540
6541
6542IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u128,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, PCRTUINT128U pu128Src))
6543{
6544 RT_NOREF(pFpuState, pu64Dst, pu128Src);
6545 AssertReleaseFailed();
6546}
6547
6548
6549IEM_DECL_IMPL_DEF(void, iemAImpl_pshufw,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src, uint8_t bEvil))
6550{
6551 RT_NOREF(pFpuState, pu64Dst, pu64Src, bEvil);
6552 AssertReleaseFailed();
6553}
6554
6555
6556IEM_DECL_IMPL_DEF(void, iemAImpl_pshufhw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
6557{
6558 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
6559 AssertReleaseFailed();
6560}
6561
6562
6563IEM_DECL_IMPL_DEF(void, iemAImpl_pshuflw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
6564{
6565 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
6566 AssertReleaseFailed();
6567}
6568
6569
6570IEM_DECL_IMPL_DEF(void, iemAImpl_pshufd,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
6571{
6572 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
6573 AssertReleaseFailed();
6574}
6575
6576/* PUNPCKHxxx */
6577
6578IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6579{
6580 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6581 AssertReleaseFailed();
6582}
6583
6584
6585IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6586{
6587 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6588 AssertReleaseFailed();
6589}
6590
6591
6592IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6593{
6594 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6595 AssertReleaseFailed();
6596}
6597
6598
6599IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6600{
6601 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6602 AssertReleaseFailed();
6603}
6604
6605
6606IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6607{
6608 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6609 AssertReleaseFailed();
6610}
6611
6612
6613IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6614{
6615 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6616 AssertReleaseFailed();
6617}
6618
6619
6620IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6621{
6622 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6623 AssertReleaseFailed();
6624}
6625
6626/* PUNPCKLxxx */
6627
6628IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
6629{
6630 RT_NOREF(pFpuState, pu64Dst, pu32Src);
6631 AssertReleaseFailed();
6632}
6633
6634
6635IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6636{
6637 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6638 AssertReleaseFailed();
6639}
6640
6641
6642IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
6643{
6644 RT_NOREF(pFpuState, pu64Dst, pu32Src);
6645 AssertReleaseFailed();
6646}
6647
6648
6649IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6650{
6651 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6652 AssertReleaseFailed();
6653}
6654
6655
6656IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
6657{
6658 RT_NOREF(pFpuState, pu64Dst, pu32Src);
6659 AssertReleaseFailed();
6660}
6661
6662
6663IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6664{
6665 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6666 AssertReleaseFailed();
6667}
6668
6669
6670IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6671{
6672 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6673 AssertReleaseFailed();
6674}
6675
6676#endif /* IEM_WITHOUT_ASSEMBLY */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette