VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 94640

Last change on this file since 94640 was 94640, checked in by vboxsync, 3 years ago

VMM/IEM: C implemention of FPREM and FPREM1 instruction helpers. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 240.1 KB
Line 
1/* $Id: IEMAllAImplC.cpp 94640 2022-04-19 21:24:15Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vmcc.h>
24#include <iprt/errcore.h>
25#include <iprt/x86.h>
26#include <iprt/uint128.h>
27#include <iprt/uint256.h>
28
29RT_C_DECLS_BEGIN
30#include <softfloat.h>
31RT_C_DECLS_END
32
33
34/*********************************************************************************************************************************
35* Defined Constants And Macros *
36*********************************************************************************************************************************/
37/** @def IEM_WITHOUT_ASSEMBLY
38 * Enables all the code in this file.
39 */
40#if !defined(IEM_WITHOUT_ASSEMBLY)
41# if defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
42# define IEM_WITHOUT_ASSEMBLY
43# endif
44#endif
45/* IEM_WITH_ASSEMBLY trumps IEM_WITHOUT_ASSEMBLY for tstIEMAImplAsm purposes. */
46#ifdef IEM_WITH_ASSEMBLY
47# undef IEM_WITHOUT_ASSEMBLY
48#endif
49
50/**
51 * Calculates the signed flag value given a result and it's bit width.
52 *
53 * The signed flag (SF) is a duplication of the most significant bit in the
54 * result.
55 *
56 * @returns X86_EFL_SF or 0.
57 * @param a_uResult Unsigned result value.
58 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
59 */
60#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
61 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
62
63/**
64 * Calculates the zero flag value given a result.
65 *
66 * The zero flag (ZF) indicates whether the result is zero or not.
67 *
68 * @returns X86_EFL_ZF or 0.
69 * @param a_uResult Unsigned result value.
70 */
71#define X86_EFL_CALC_ZF(a_uResult) \
72 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
73
74/**
75 * Extracts the OF flag from a OF calculation result.
76 *
77 * These are typically used by concating with a bitcount. The problem is that
78 * 8-bit values needs shifting in the other direction than the others.
79 */
80#define X86_EFL_GET_OF_8(a_uValue) (((uint32_t)(a_uValue) << (X86_EFL_OF_BIT - 8 + 1)) & X86_EFL_OF)
81#define X86_EFL_GET_OF_16(a_uValue) ((uint32_t)((a_uValue) >> (16 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
82#define X86_EFL_GET_OF_32(a_uValue) ((uint32_t)((a_uValue) >> (32 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
83#define X86_EFL_GET_OF_64(a_uValue) ((uint32_t)((a_uValue) >> (64 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
84
85/**
86 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
87 *
88 * @returns Status bits.
89 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
90 * @param a_uResult Unsigned result value.
91 * @param a_uSrc The source value (for AF calc).
92 * @param a_uDst The original destination value (for AF calc).
93 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
94 * @param a_CfExpr Bool expression for the carry flag (CF).
95 * @param a_uSrcOf The a_uSrc value to use for overflow calculation.
96 */
97#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_uSrcOf) \
98 do { \
99 uint32_t fEflTmp = *(a_pfEFlags); \
100 fEflTmp &= ~X86_EFL_STATUS_BITS; \
101 fEflTmp |= (a_CfExpr) << X86_EFL_CF_BIT; \
102 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
103 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
104 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
105 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
106 \
107 /* Overflow during ADDition happens when both inputs have the same signed \
108 bit value and the result has a different sign bit value. \
109 \
110 Since subtraction can be rewritten as addition: 2 - 1 == 2 + -1, it \
111 follows that for SUBtraction the signed bit value must differ between \
112 the two inputs and the result's signed bit diff from the first input. \
113 Note! Must xor with sign bit to convert, not do (0 - a_uSrc). \
114 \
115 See also: http://teaching.idallen.com/dat2343/10f/notes/040_overflow.txt */ \
116 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth( ( ((uint ## a_cBitsWidth ## _t)~((a_uDst) ^ (a_uSrcOf))) \
117 & RT_BIT_64(a_cBitsWidth - 1)) \
118 & ((a_uResult) ^ (a_uDst)) ); \
119 *(a_pfEFlags) = fEflTmp; \
120 } while (0)
121
122/**
123 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
124 *
125 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
126 * undefined. We do not set AF, as that seems to make the most sense (which
127 * probably makes it the most wrong in real life).
128 *
129 * @returns Status bits.
130 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
131 * @param a_uResult Unsigned result value.
132 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
133 * @param a_fExtra Additional bits to set.
134 */
135#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
136 do { \
137 uint32_t fEflTmp = *(a_pfEFlags); \
138 fEflTmp &= ~X86_EFL_STATUS_BITS; \
139 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
140 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
141 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
142 fEflTmp |= (a_fExtra); \
143 *(a_pfEFlags) = fEflTmp; \
144 } while (0)
145
146
147/*********************************************************************************************************************************
148* Global Variables *
149*********************************************************************************************************************************/
150/**
151 * Parity calculation table.
152 *
153 * This is also used by iemAllAImpl.asm.
154 *
155 * The generator code:
156 * @code
157 * #include <stdio.h>
158 *
159 * int main()
160 * {
161 * unsigned b;
162 * for (b = 0; b < 256; b++)
163 * {
164 * int cOnes = ( b & 1)
165 * + ((b >> 1) & 1)
166 * + ((b >> 2) & 1)
167 * + ((b >> 3) & 1)
168 * + ((b >> 4) & 1)
169 * + ((b >> 5) & 1)
170 * + ((b >> 6) & 1)
171 * + ((b >> 7) & 1);
172 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
173 * b,
174 * (b >> 7) & 1,
175 * (b >> 6) & 1,
176 * (b >> 5) & 1,
177 * (b >> 4) & 1,
178 * (b >> 3) & 1,
179 * (b >> 2) & 1,
180 * (b >> 1) & 1,
181 * b & 1,
182 * cOnes & 1 ? "0" : "X86_EFL_PF");
183 * }
184 * return 0;
185 * }
186 * @endcode
187 */
188uint8_t const g_afParity[256] =
189{
190 /* 0000 = 00000000b */ X86_EFL_PF,
191 /* 0x01 = 00000001b */ 0,
192 /* 0x02 = 00000010b */ 0,
193 /* 0x03 = 00000011b */ X86_EFL_PF,
194 /* 0x04 = 00000100b */ 0,
195 /* 0x05 = 00000101b */ X86_EFL_PF,
196 /* 0x06 = 00000110b */ X86_EFL_PF,
197 /* 0x07 = 00000111b */ 0,
198 /* 0x08 = 00001000b */ 0,
199 /* 0x09 = 00001001b */ X86_EFL_PF,
200 /* 0x0a = 00001010b */ X86_EFL_PF,
201 /* 0x0b = 00001011b */ 0,
202 /* 0x0c = 00001100b */ X86_EFL_PF,
203 /* 0x0d = 00001101b */ 0,
204 /* 0x0e = 00001110b */ 0,
205 /* 0x0f = 00001111b */ X86_EFL_PF,
206 /* 0x10 = 00010000b */ 0,
207 /* 0x11 = 00010001b */ X86_EFL_PF,
208 /* 0x12 = 00010010b */ X86_EFL_PF,
209 /* 0x13 = 00010011b */ 0,
210 /* 0x14 = 00010100b */ X86_EFL_PF,
211 /* 0x15 = 00010101b */ 0,
212 /* 0x16 = 00010110b */ 0,
213 /* 0x17 = 00010111b */ X86_EFL_PF,
214 /* 0x18 = 00011000b */ X86_EFL_PF,
215 /* 0x19 = 00011001b */ 0,
216 /* 0x1a = 00011010b */ 0,
217 /* 0x1b = 00011011b */ X86_EFL_PF,
218 /* 0x1c = 00011100b */ 0,
219 /* 0x1d = 00011101b */ X86_EFL_PF,
220 /* 0x1e = 00011110b */ X86_EFL_PF,
221 /* 0x1f = 00011111b */ 0,
222 /* 0x20 = 00100000b */ 0,
223 /* 0x21 = 00100001b */ X86_EFL_PF,
224 /* 0x22 = 00100010b */ X86_EFL_PF,
225 /* 0x23 = 00100011b */ 0,
226 /* 0x24 = 00100100b */ X86_EFL_PF,
227 /* 0x25 = 00100101b */ 0,
228 /* 0x26 = 00100110b */ 0,
229 /* 0x27 = 00100111b */ X86_EFL_PF,
230 /* 0x28 = 00101000b */ X86_EFL_PF,
231 /* 0x29 = 00101001b */ 0,
232 /* 0x2a = 00101010b */ 0,
233 /* 0x2b = 00101011b */ X86_EFL_PF,
234 /* 0x2c = 00101100b */ 0,
235 /* 0x2d = 00101101b */ X86_EFL_PF,
236 /* 0x2e = 00101110b */ X86_EFL_PF,
237 /* 0x2f = 00101111b */ 0,
238 /* 0x30 = 00110000b */ X86_EFL_PF,
239 /* 0x31 = 00110001b */ 0,
240 /* 0x32 = 00110010b */ 0,
241 /* 0x33 = 00110011b */ X86_EFL_PF,
242 /* 0x34 = 00110100b */ 0,
243 /* 0x35 = 00110101b */ X86_EFL_PF,
244 /* 0x36 = 00110110b */ X86_EFL_PF,
245 /* 0x37 = 00110111b */ 0,
246 /* 0x38 = 00111000b */ 0,
247 /* 0x39 = 00111001b */ X86_EFL_PF,
248 /* 0x3a = 00111010b */ X86_EFL_PF,
249 /* 0x3b = 00111011b */ 0,
250 /* 0x3c = 00111100b */ X86_EFL_PF,
251 /* 0x3d = 00111101b */ 0,
252 /* 0x3e = 00111110b */ 0,
253 /* 0x3f = 00111111b */ X86_EFL_PF,
254 /* 0x40 = 01000000b */ 0,
255 /* 0x41 = 01000001b */ X86_EFL_PF,
256 /* 0x42 = 01000010b */ X86_EFL_PF,
257 /* 0x43 = 01000011b */ 0,
258 /* 0x44 = 01000100b */ X86_EFL_PF,
259 /* 0x45 = 01000101b */ 0,
260 /* 0x46 = 01000110b */ 0,
261 /* 0x47 = 01000111b */ X86_EFL_PF,
262 /* 0x48 = 01001000b */ X86_EFL_PF,
263 /* 0x49 = 01001001b */ 0,
264 /* 0x4a = 01001010b */ 0,
265 /* 0x4b = 01001011b */ X86_EFL_PF,
266 /* 0x4c = 01001100b */ 0,
267 /* 0x4d = 01001101b */ X86_EFL_PF,
268 /* 0x4e = 01001110b */ X86_EFL_PF,
269 /* 0x4f = 01001111b */ 0,
270 /* 0x50 = 01010000b */ X86_EFL_PF,
271 /* 0x51 = 01010001b */ 0,
272 /* 0x52 = 01010010b */ 0,
273 /* 0x53 = 01010011b */ X86_EFL_PF,
274 /* 0x54 = 01010100b */ 0,
275 /* 0x55 = 01010101b */ X86_EFL_PF,
276 /* 0x56 = 01010110b */ X86_EFL_PF,
277 /* 0x57 = 01010111b */ 0,
278 /* 0x58 = 01011000b */ 0,
279 /* 0x59 = 01011001b */ X86_EFL_PF,
280 /* 0x5a = 01011010b */ X86_EFL_PF,
281 /* 0x5b = 01011011b */ 0,
282 /* 0x5c = 01011100b */ X86_EFL_PF,
283 /* 0x5d = 01011101b */ 0,
284 /* 0x5e = 01011110b */ 0,
285 /* 0x5f = 01011111b */ X86_EFL_PF,
286 /* 0x60 = 01100000b */ X86_EFL_PF,
287 /* 0x61 = 01100001b */ 0,
288 /* 0x62 = 01100010b */ 0,
289 /* 0x63 = 01100011b */ X86_EFL_PF,
290 /* 0x64 = 01100100b */ 0,
291 /* 0x65 = 01100101b */ X86_EFL_PF,
292 /* 0x66 = 01100110b */ X86_EFL_PF,
293 /* 0x67 = 01100111b */ 0,
294 /* 0x68 = 01101000b */ 0,
295 /* 0x69 = 01101001b */ X86_EFL_PF,
296 /* 0x6a = 01101010b */ X86_EFL_PF,
297 /* 0x6b = 01101011b */ 0,
298 /* 0x6c = 01101100b */ X86_EFL_PF,
299 /* 0x6d = 01101101b */ 0,
300 /* 0x6e = 01101110b */ 0,
301 /* 0x6f = 01101111b */ X86_EFL_PF,
302 /* 0x70 = 01110000b */ 0,
303 /* 0x71 = 01110001b */ X86_EFL_PF,
304 /* 0x72 = 01110010b */ X86_EFL_PF,
305 /* 0x73 = 01110011b */ 0,
306 /* 0x74 = 01110100b */ X86_EFL_PF,
307 /* 0x75 = 01110101b */ 0,
308 /* 0x76 = 01110110b */ 0,
309 /* 0x77 = 01110111b */ X86_EFL_PF,
310 /* 0x78 = 01111000b */ X86_EFL_PF,
311 /* 0x79 = 01111001b */ 0,
312 /* 0x7a = 01111010b */ 0,
313 /* 0x7b = 01111011b */ X86_EFL_PF,
314 /* 0x7c = 01111100b */ 0,
315 /* 0x7d = 01111101b */ X86_EFL_PF,
316 /* 0x7e = 01111110b */ X86_EFL_PF,
317 /* 0x7f = 01111111b */ 0,
318 /* 0x80 = 10000000b */ 0,
319 /* 0x81 = 10000001b */ X86_EFL_PF,
320 /* 0x82 = 10000010b */ X86_EFL_PF,
321 /* 0x83 = 10000011b */ 0,
322 /* 0x84 = 10000100b */ X86_EFL_PF,
323 /* 0x85 = 10000101b */ 0,
324 /* 0x86 = 10000110b */ 0,
325 /* 0x87 = 10000111b */ X86_EFL_PF,
326 /* 0x88 = 10001000b */ X86_EFL_PF,
327 /* 0x89 = 10001001b */ 0,
328 /* 0x8a = 10001010b */ 0,
329 /* 0x8b = 10001011b */ X86_EFL_PF,
330 /* 0x8c = 10001100b */ 0,
331 /* 0x8d = 10001101b */ X86_EFL_PF,
332 /* 0x8e = 10001110b */ X86_EFL_PF,
333 /* 0x8f = 10001111b */ 0,
334 /* 0x90 = 10010000b */ X86_EFL_PF,
335 /* 0x91 = 10010001b */ 0,
336 /* 0x92 = 10010010b */ 0,
337 /* 0x93 = 10010011b */ X86_EFL_PF,
338 /* 0x94 = 10010100b */ 0,
339 /* 0x95 = 10010101b */ X86_EFL_PF,
340 /* 0x96 = 10010110b */ X86_EFL_PF,
341 /* 0x97 = 10010111b */ 0,
342 /* 0x98 = 10011000b */ 0,
343 /* 0x99 = 10011001b */ X86_EFL_PF,
344 /* 0x9a = 10011010b */ X86_EFL_PF,
345 /* 0x9b = 10011011b */ 0,
346 /* 0x9c = 10011100b */ X86_EFL_PF,
347 /* 0x9d = 10011101b */ 0,
348 /* 0x9e = 10011110b */ 0,
349 /* 0x9f = 10011111b */ X86_EFL_PF,
350 /* 0xa0 = 10100000b */ X86_EFL_PF,
351 /* 0xa1 = 10100001b */ 0,
352 /* 0xa2 = 10100010b */ 0,
353 /* 0xa3 = 10100011b */ X86_EFL_PF,
354 /* 0xa4 = 10100100b */ 0,
355 /* 0xa5 = 10100101b */ X86_EFL_PF,
356 /* 0xa6 = 10100110b */ X86_EFL_PF,
357 /* 0xa7 = 10100111b */ 0,
358 /* 0xa8 = 10101000b */ 0,
359 /* 0xa9 = 10101001b */ X86_EFL_PF,
360 /* 0xaa = 10101010b */ X86_EFL_PF,
361 /* 0xab = 10101011b */ 0,
362 /* 0xac = 10101100b */ X86_EFL_PF,
363 /* 0xad = 10101101b */ 0,
364 /* 0xae = 10101110b */ 0,
365 /* 0xaf = 10101111b */ X86_EFL_PF,
366 /* 0xb0 = 10110000b */ 0,
367 /* 0xb1 = 10110001b */ X86_EFL_PF,
368 /* 0xb2 = 10110010b */ X86_EFL_PF,
369 /* 0xb3 = 10110011b */ 0,
370 /* 0xb4 = 10110100b */ X86_EFL_PF,
371 /* 0xb5 = 10110101b */ 0,
372 /* 0xb6 = 10110110b */ 0,
373 /* 0xb7 = 10110111b */ X86_EFL_PF,
374 /* 0xb8 = 10111000b */ X86_EFL_PF,
375 /* 0xb9 = 10111001b */ 0,
376 /* 0xba = 10111010b */ 0,
377 /* 0xbb = 10111011b */ X86_EFL_PF,
378 /* 0xbc = 10111100b */ 0,
379 /* 0xbd = 10111101b */ X86_EFL_PF,
380 /* 0xbe = 10111110b */ X86_EFL_PF,
381 /* 0xbf = 10111111b */ 0,
382 /* 0xc0 = 11000000b */ X86_EFL_PF,
383 /* 0xc1 = 11000001b */ 0,
384 /* 0xc2 = 11000010b */ 0,
385 /* 0xc3 = 11000011b */ X86_EFL_PF,
386 /* 0xc4 = 11000100b */ 0,
387 /* 0xc5 = 11000101b */ X86_EFL_PF,
388 /* 0xc6 = 11000110b */ X86_EFL_PF,
389 /* 0xc7 = 11000111b */ 0,
390 /* 0xc8 = 11001000b */ 0,
391 /* 0xc9 = 11001001b */ X86_EFL_PF,
392 /* 0xca = 11001010b */ X86_EFL_PF,
393 /* 0xcb = 11001011b */ 0,
394 /* 0xcc = 11001100b */ X86_EFL_PF,
395 /* 0xcd = 11001101b */ 0,
396 /* 0xce = 11001110b */ 0,
397 /* 0xcf = 11001111b */ X86_EFL_PF,
398 /* 0xd0 = 11010000b */ 0,
399 /* 0xd1 = 11010001b */ X86_EFL_PF,
400 /* 0xd2 = 11010010b */ X86_EFL_PF,
401 /* 0xd3 = 11010011b */ 0,
402 /* 0xd4 = 11010100b */ X86_EFL_PF,
403 /* 0xd5 = 11010101b */ 0,
404 /* 0xd6 = 11010110b */ 0,
405 /* 0xd7 = 11010111b */ X86_EFL_PF,
406 /* 0xd8 = 11011000b */ X86_EFL_PF,
407 /* 0xd9 = 11011001b */ 0,
408 /* 0xda = 11011010b */ 0,
409 /* 0xdb = 11011011b */ X86_EFL_PF,
410 /* 0xdc = 11011100b */ 0,
411 /* 0xdd = 11011101b */ X86_EFL_PF,
412 /* 0xde = 11011110b */ X86_EFL_PF,
413 /* 0xdf = 11011111b */ 0,
414 /* 0xe0 = 11100000b */ 0,
415 /* 0xe1 = 11100001b */ X86_EFL_PF,
416 /* 0xe2 = 11100010b */ X86_EFL_PF,
417 /* 0xe3 = 11100011b */ 0,
418 /* 0xe4 = 11100100b */ X86_EFL_PF,
419 /* 0xe5 = 11100101b */ 0,
420 /* 0xe6 = 11100110b */ 0,
421 /* 0xe7 = 11100111b */ X86_EFL_PF,
422 /* 0xe8 = 11101000b */ X86_EFL_PF,
423 /* 0xe9 = 11101001b */ 0,
424 /* 0xea = 11101010b */ 0,
425 /* 0xeb = 11101011b */ X86_EFL_PF,
426 /* 0xec = 11101100b */ 0,
427 /* 0xed = 11101101b */ X86_EFL_PF,
428 /* 0xee = 11101110b */ X86_EFL_PF,
429 /* 0xef = 11101111b */ 0,
430 /* 0xf0 = 11110000b */ X86_EFL_PF,
431 /* 0xf1 = 11110001b */ 0,
432 /* 0xf2 = 11110010b */ 0,
433 /* 0xf3 = 11110011b */ X86_EFL_PF,
434 /* 0xf4 = 11110100b */ 0,
435 /* 0xf5 = 11110101b */ X86_EFL_PF,
436 /* 0xf6 = 11110110b */ X86_EFL_PF,
437 /* 0xf7 = 11110111b */ 0,
438 /* 0xf8 = 11111000b */ 0,
439 /* 0xf9 = 11111001b */ X86_EFL_PF,
440 /* 0xfa = 11111010b */ X86_EFL_PF,
441 /* 0xfb = 11111011b */ 0,
442 /* 0xfc = 11111100b */ X86_EFL_PF,
443 /* 0xfd = 11111101b */ 0,
444 /* 0xfe = 11111110b */ 0,
445 /* 0xff = 11111111b */ X86_EFL_PF,
446};
447
448/* for clang: */
449extern const RTFLOAT80U g_ar80Zero[];
450extern const RTFLOAT80U g_ar80One[];
451extern const RTFLOAT80U g_r80Indefinite;
452extern const RTFLOAT80U g_ar80Infinity[];
453extern const RTFLOAT128U g_r128Ln2;
454extern const RTUINT128U g_u128Ln2Mantissa;
455extern const RTUINT128U g_u128Ln2MantissaIntel;
456extern const RTFLOAT128U g_ar128F2xm1HornerConsts[];
457
458/** Zero values (indexed by fSign). */
459RTFLOAT80U const g_ar80Zero[] = { RTFLOAT80U_INIT_ZERO(0), RTFLOAT80U_INIT_ZERO(1) };
460
461/** One values (indexed by fSign). */
462RTFLOAT80U const g_ar80One[] =
463{ RTFLOAT80U_INIT(0, RT_BIT_64(63), RTFLOAT80U_EXP_BIAS), RTFLOAT80U_INIT(1, RT_BIT_64(63), RTFLOAT80U_EXP_BIAS) };
464
465/** Indefinite (negative). */
466RTFLOAT80U const g_r80Indefinite = RTFLOAT80U_INIT_INDEFINITE(1);
467
468/** Infinities (indexed by fSign). */
469RTFLOAT80U const g_ar80Infinity[] = { RTFLOAT80U_INIT_INF(0), RTFLOAT80U_INIT_INF(1) };
470
471#if 0
472/** 128-bit floating point constant: 2.0 */
473const RTFLOAT128U g_r128Two = RTFLOAT128U_INIT_C(0, 0, 0, RTFLOAT128U_EXP_BIAS + 1);
474#endif
475
476
477/* The next section is generated by tools/IEMGenFpuConstants: */
478
479/** The ln2 constant as 128-bit floating point value.
480 * base-10: 6.93147180559945309417232121458176575e-1
481 * base-16: b.17217f7d1cf79abc9e3b39803f30@-1
482 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100100111100011101100111001100000000011111100110e-1 */
483//const RTFLOAT128U g_r128Ln2 = RTFLOAT128U_INIT_C(0, 0x62e42fefa39e, 0xf35793c7673007e6, 0x3ffe);
484const RTFLOAT128U g_r128Ln2 = RTFLOAT128U_INIT_C(0, 0x62e42fefa39e, 0xf357900000000000, 0x3ffe);
485/** High precision ln2 value.
486 * base-10: 6.931471805599453094172321214581765680747e-1
487 * base-16: b.17217f7d1cf79abc9e3b39803f2f6af0@-1
488 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100100111100011101100111001100000000011111100101111011010101111e-1 */
489const RTUINT128U g_u128Ln2Mantissa = RTUINT128_INIT_C(0xb17217f7d1cf79ab, 0xc9e3b39803f2f6af);
490/** High precision ln2 value, compatible with f2xm1 results on intel 10980XE.
491 * base-10: 6.931471805599453094151379470289064954613e-1
492 * base-16: b.17217f7d1cf79abc0000000000000000@-1
493 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100000000000000000000000000000000000000000000000000000000000000e-1 */
494const RTUINT128U g_u128Ln2MantissaIntel = RTUINT128_INIT_C(0xb17217f7d1cf79ab, 0xc000000000000000);
495
496/** Horner constants for f2xm1 */
497const RTFLOAT128U g_ar128F2xm1HornerConsts[] =
498{
499 /* a0
500 * base-10: 1.00000000000000000000000000000000000e0
501 * base-16: 1.0000000000000000000000000000@0
502 * base-2 : 1.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e0 */
503 RTFLOAT128U_INIT_C(0, 0x000000000000, 0x0000000000000000, 0x3fff),
504 /* a1
505 * base-10: 5.00000000000000000000000000000000000e-1
506 * base-16: 8.0000000000000000000000000000@-1
507 * base-2 : 1.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e-1 */
508 RTFLOAT128U_INIT_C(0, 0x000000000000, 0x0000000000000000, 0x3ffe),
509 /* a2
510 * base-10: 1.66666666666666666666666666666666658e-1
511 * base-16: 2.aaaaaaaaaaaaaaaaaaaaaaaaaaaa@-1
512 * base-2 : 1.0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101e-3 */
513 RTFLOAT128U_INIT_C(0, 0x555555555555, 0x5555555555555555, 0x3ffc),
514 /* a3
515 * base-10: 4.16666666666666666666666666666666646e-2
516 * base-16: a.aaaaaaaaaaaaaaaaaaaaaaaaaaa8@-2
517 * base-2 : 1.0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101e-5 */
518 RTFLOAT128U_INIT_C(0, 0x555555555555, 0x5555555555555555, 0x3ffa),
519 /* a4
520 * base-10: 8.33333333333333333333333333333333323e-3
521 * base-16: 2.2222222222222222222222222222@-2
522 * base-2 : 1.0001000100010001000100010001000100010001000100010001000100010001000100010001000100010001000100010001000100010001e-7 */
523 RTFLOAT128U_INIT_C(0, 0x111111111111, 0x1111111111111111, 0x3ff8),
524 /* a5
525 * base-10: 1.38888888888888888888888888888888874e-3
526 * base-16: 5.b05b05b05b05b05b05b05b05b058@-3
527 * base-2 : 1.0110110000010110110000010110110000010110110000010110110000010110110000010110110000010110110000010110110000010110e-10 */
528 RTFLOAT128U_INIT_C(0, 0x6c16c16c16c1, 0x6c16c16c16c16c16, 0x3ff5),
529 /* a6
530 * base-10: 1.98412698412698412698412698412698412e-4
531 * base-16: d.00d00d00d00d00d00d00d00d00d0@-4
532 * base-2 : 1.1010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010e-13 */
533 RTFLOAT128U_INIT_C(0, 0xa01a01a01a01, 0xa01a01a01a01a01a, 0x3ff2),
534 /* a7
535 * base-10: 2.48015873015873015873015873015873015e-5
536 * base-16: 1.a01a01a01a01a01a01a01a01a01a@-4
537 * base-2 : 1.1010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010e-16 */
538 RTFLOAT128U_INIT_C(0, 0xa01a01a01a01, 0xa01a01a01a01a01a, 0x3fef),
539 /* a8
540 * base-10: 2.75573192239858906525573192239858902e-6
541 * base-16: 2.e3bc74aad8e671f5583911ca002e@-5
542 * base-2 : 1.0111000111011110001110100101010101101100011100110011100011111010101011000001110010001000111001010000000000010111e-19 */
543 RTFLOAT128U_INIT_C(0, 0x71de3a556c73, 0x38faac1c88e50017, 0x3fec),
544 /* a9
545 * base-10: 2.75573192239858906525573192239858865e-7
546 * base-16: 4.9f93edde27d71cbbc05b4fa999e0@-6
547 * base-2 : 1.0010011111100100111110110111011110001001111101011100011100101110111100000001011011010011111010100110011001111000e-22 */
548 RTFLOAT128U_INIT_C(0, 0x27e4fb7789f5, 0xc72ef016d3ea6678, 0x3fe9),
549 /* a10
550 * base-10: 2.50521083854417187750521083854417184e-8
551 * base-16: 6.b99159fd5138e3f9d1f92e0df71c@-7
552 * base-2 : 1.1010111001100100010101100111111101010100010011100011100011111110011101000111111001001011100000110111110111000111e-26 */
553 RTFLOAT128U_INIT_C(0, 0xae64567f544e, 0x38fe747e4b837dc7, 0x3fe5),
554 /* a11
555 * base-10: 2.08767569878680989792100903212014296e-9
556 * base-16: 8.f76c77fc6c4bdaa26d4c3d67f420@-8
557 * base-2 : 1.0001111011101101100011101111111110001101100010010111101101010100010011011010100110000111101011001111111010000100e-29 */
558 RTFLOAT128U_INIT_C(0, 0x1eed8eff8d89, 0x7b544da987acfe84, 0x3fe2),
559 /* a12
560 * base-10: 1.60590438368216145993923771701549472e-10
561 * base-16: b.092309d43684be51c198e91d7b40@-9
562 * base-2 : 1.0110000100100100011000010011101010000110110100001001011111001010001110000011001100011101001000111010111101101000e-33 */
563 RTFLOAT128U_INIT_C(0, 0x6124613a86d0, 0x97ca38331d23af68, 0x3fde),
564 /* a13
565 * base-10: 1.14707455977297247138516979786821043e-11
566 * base-16: c.9cba54603e4e905d6f8a2efd1f20@-10
567 * base-2 : 1.1001001110010111010010101000110000000111110010011101001000001011101011011111000101000101110111111010001111100100e-37 */
568 RTFLOAT128U_INIT_C(0, 0x93974a8c07c9, 0xd20badf145dfa3e4, 0x3fda),
569 /* a14
570 * base-10: 7.64716373181981647590113198578806964e-13
571 * base-16: d.73f9f399dc0f88ec32b587746578@-11
572 * base-2 : 1.1010111001111111001111100111001100111011100000011111000100011101100001100101011010110000111011101000110010101111e-41 */
573 RTFLOAT128U_INIT_C(0, 0xae7f3e733b81, 0xf11d8656b0ee8caf, 0x3fd6),
574 /* a15
575 * base-10: 4.77947733238738529743820749111754352e-14
576 * base-16: d.73f9f399dc0f88ec32b587746578@-12
577 * base-2 : 1.1010111001111111001111100111001100111011100000011111000100011101100001100101011010110000111011101000110010101111e-45 */
578 RTFLOAT128U_INIT_C(0, 0xae7f3e733b81, 0xf11d8656b0ee8caf, 0x3fd2),
579 /* a16
580 * base-10: 2.81145725434552076319894558301031970e-15
581 * base-16: c.a963b81856a53593028cbbb8d7f8@-13
582 * base-2 : 1.1001010100101100011101110000001100001010110101001010011010110010011000000101000110010111011101110001101011111111e-49 */
583 RTFLOAT128U_INIT_C(0, 0x952c77030ad4, 0xa6b2605197771aff, 0x3fce),
584 /* a17
585 * base-10: 1.56192069685862264622163643500573321e-16
586 * base-16: b.413c31dcbecbbdd8024435161550@-14
587 * base-2 : 1.0110100000100111100001100011101110010111110110010111011110111011000000000100100010000110101000101100001010101010e-53 */
588 RTFLOAT128U_INIT_C(0, 0x6827863b97d9, 0x77bb004886a2c2aa, 0x3fca),
589 /* a18
590 * base-10: 8.22063524662432971695598123687227980e-18
591 * base-16: 9.7a4da340a0ab92650f61dbdcb3a0@-15
592 * base-2 : 1.0010111101001001101101000110100000010100000101010111001001001100101000011110110000111011011110111001011001110100e-57 */
593 RTFLOAT128U_INIT_C(0, 0x2f49b4681415, 0x724ca1ec3b7b9674, 0x3fc6),
594 /* a19
595 * base-10: 4.11031762331216485847799061843614006e-19
596 * base-16: 7.950ae900808941ea72b4afe3c2e8@-16
597 * base-2 : 1.1110010101000010101110100100000000100000001000100101000001111010100111001010110100101011111110001111000010111010e-62 */
598 RTFLOAT128U_INIT_C(0, 0xe542ba402022, 0x507a9cad2bf8f0ba, 0x3fc1),
599 /* a20
600 * base-10: 7.04351638180413298434020229233492164e-20
601 * base-16: 1.4c9ee35db1d1f3c946fdcd48fd88@-16
602 * base-2 : 1.0100110010011110111000110101110110110001110100011111001111001001010001101111110111001101010010001111110110001000e-64 */
603 RTFLOAT128U_INIT_C(0, 0x4c9ee35db1d1, 0xf3c946fdcd48fd88, 0x3fbf),
604 /* a21
605 * base-10: 5.81527769640186708776361513365257702e-20
606 * base-16: 1.129e64bff606a2b9c9fc624481cd@-16
607 * base-2 : 1.0001001010011110011001001011111111110110000001101010001010111001110010011111110001100010010001001000000111001101e-64 */
608 RTFLOAT128U_INIT_C(0, 0x129e64bff606, 0xa2b9c9fc624481cd, 0x3fbf),
609};
610
611
612/*
613 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
614 * it all in C is probably safer atm., optimize what's necessary later, maybe.
615 */
616#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
617
618
619/*********************************************************************************************************************************
620* Binary Operations *
621*********************************************************************************************************************************/
622
623/*
624 * ADD
625 */
626
627IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
628{
629 uint64_t uDst = *puDst;
630 uint64_t uResult = uDst + uSrc;
631 *puDst = uResult;
632 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, uSrc);
633}
634
635# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
636
637IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
638{
639 uint32_t uDst = *puDst;
640 uint32_t uResult = uDst + uSrc;
641 *puDst = uResult;
642 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, uSrc);
643}
644
645
646IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
647{
648 uint16_t uDst = *puDst;
649 uint16_t uResult = uDst + uSrc;
650 *puDst = uResult;
651 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, uSrc);
652}
653
654
655IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
656{
657 uint8_t uDst = *puDst;
658 uint8_t uResult = uDst + uSrc;
659 *puDst = uResult;
660 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, uSrc);
661}
662
663# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
664
665/*
666 * ADC
667 */
668
669IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
670{
671 if (!(*pfEFlags & X86_EFL_CF))
672 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
673 else
674 {
675 uint64_t uDst = *puDst;
676 uint64_t uResult = uDst + uSrc + 1;
677 *puDst = uResult;
678 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, uSrc);
679 }
680}
681
682# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
683
684IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
685{
686 if (!(*pfEFlags & X86_EFL_CF))
687 iemAImpl_add_u32(puDst, uSrc, pfEFlags);
688 else
689 {
690 uint32_t uDst = *puDst;
691 uint32_t uResult = uDst + uSrc + 1;
692 *puDst = uResult;
693 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, uSrc);
694 }
695}
696
697
698IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
699{
700 if (!(*pfEFlags & X86_EFL_CF))
701 iemAImpl_add_u16(puDst, uSrc, pfEFlags);
702 else
703 {
704 uint16_t uDst = *puDst;
705 uint16_t uResult = uDst + uSrc + 1;
706 *puDst = uResult;
707 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, uSrc);
708 }
709}
710
711
712IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
713{
714 if (!(*pfEFlags & X86_EFL_CF))
715 iemAImpl_add_u8(puDst, uSrc, pfEFlags);
716 else
717 {
718 uint8_t uDst = *puDst;
719 uint8_t uResult = uDst + uSrc + 1;
720 *puDst = uResult;
721 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, uSrc);
722 }
723}
724
725# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
726
727/*
728 * SUB
729 */
730
731IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
732{
733 uint64_t uDst = *puDst;
734 uint64_t uResult = uDst - uSrc;
735 *puDst = uResult;
736 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst < uSrc, uSrc ^ RT_BIT_64(63));
737}
738
739# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
740
741IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
742{
743 uint32_t uDst = *puDst;
744 uint32_t uResult = uDst - uSrc;
745 *puDst = uResult;
746 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst < uSrc, uSrc ^ RT_BIT_32(31));
747}
748
749
750IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
751{
752 uint16_t uDst = *puDst;
753 uint16_t uResult = uDst - uSrc;
754 *puDst = uResult;
755 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst < uSrc, uSrc ^ (uint16_t)0x8000);
756}
757
758
759IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
760{
761 uint8_t uDst = *puDst;
762 uint8_t uResult = uDst - uSrc;
763 *puDst = uResult;
764 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst < uSrc, uSrc ^ (uint8_t)0x80);
765}
766
767# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
768
769/*
770 * SBB
771 */
772
773IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
774{
775 if (!(*pfEFlags & X86_EFL_CF))
776 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
777 else
778 {
779 uint64_t uDst = *puDst;
780 uint64_t uResult = uDst - uSrc - 1;
781 *puDst = uResult;
782 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst <= uSrc, uSrc ^ RT_BIT_64(63));
783 }
784}
785
786# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
787
788IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
789{
790 if (!(*pfEFlags & X86_EFL_CF))
791 iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
792 else
793 {
794 uint32_t uDst = *puDst;
795 uint32_t uResult = uDst - uSrc - 1;
796 *puDst = uResult;
797 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst <= uSrc, uSrc ^ RT_BIT_32(31));
798 }
799}
800
801
802IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
803{
804 if (!(*pfEFlags & X86_EFL_CF))
805 iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
806 else
807 {
808 uint16_t uDst = *puDst;
809 uint16_t uResult = uDst - uSrc - 1;
810 *puDst = uResult;
811 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst <= uSrc, uSrc ^ (uint16_t)0x8000);
812 }
813}
814
815
816IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
817{
818 if (!(*pfEFlags & X86_EFL_CF))
819 iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
820 else
821 {
822 uint8_t uDst = *puDst;
823 uint8_t uResult = uDst - uSrc - 1;
824 *puDst = uResult;
825 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst <= uSrc, uSrc ^ (uint8_t)0x80);
826 }
827}
828
829# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
830
831
832/*
833 * OR
834 */
835
836IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
837{
838 uint64_t uResult = *puDst | uSrc;
839 *puDst = uResult;
840 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
841}
842
843# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
844
845IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
846{
847 uint32_t uResult = *puDst | uSrc;
848 *puDst = uResult;
849 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
850}
851
852
853IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
854{
855 uint16_t uResult = *puDst | uSrc;
856 *puDst = uResult;
857 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
858}
859
860
861IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
862{
863 uint8_t uResult = *puDst | uSrc;
864 *puDst = uResult;
865 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
866}
867
868# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
869
870/*
871 * XOR
872 */
873
874IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
875{
876 uint64_t uResult = *puDst ^ uSrc;
877 *puDst = uResult;
878 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
879}
880
881# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
882
883IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
884{
885 uint32_t uResult = *puDst ^ uSrc;
886 *puDst = uResult;
887 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
888}
889
890
891IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
892{
893 uint16_t uResult = *puDst ^ uSrc;
894 *puDst = uResult;
895 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
896}
897
898
899IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
900{
901 uint8_t uResult = *puDst ^ uSrc;
902 *puDst = uResult;
903 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
904}
905
906# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
907
908/*
909 * AND
910 */
911
912IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
913{
914 uint64_t uResult = *puDst & uSrc;
915 *puDst = uResult;
916 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
917}
918
919# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
920
921IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
922{
923 uint32_t uResult = *puDst & uSrc;
924 *puDst = uResult;
925 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
926}
927
928
929IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
930{
931 uint16_t uResult = *puDst & uSrc;
932 *puDst = uResult;
933 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
934}
935
936
937IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
938{
939 uint8_t uResult = *puDst & uSrc;
940 *puDst = uResult;
941 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
942}
943
944# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
945
946/*
947 * CMP
948 */
949
950IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
951{
952 uint64_t uDstTmp = *puDst;
953 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
954}
955
956# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
957
958IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
959{
960 uint32_t uDstTmp = *puDst;
961 iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
962}
963
964
965IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
966{
967 uint16_t uDstTmp = *puDst;
968 iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
969}
970
971
972IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
973{
974 uint8_t uDstTmp = *puDst;
975 iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
976}
977
978# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
979
980/*
981 * TEST
982 */
983
984IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
985{
986 uint64_t uResult = *puDst & uSrc;
987 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
988}
989
990# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
991
992IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
993{
994 uint32_t uResult = *puDst & uSrc;
995 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
996}
997
998
999IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1000{
1001 uint16_t uResult = *puDst & uSrc;
1002 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
1003}
1004
1005
1006IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
1007{
1008 uint8_t uResult = *puDst & uSrc;
1009 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
1010}
1011
1012# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1013
1014
1015/*
1016 * LOCK prefixed variants of the above
1017 */
1018
1019/** 64-bit locked binary operand operation. */
1020# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
1021 do { \
1022 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
1023 uint ## a_cBitsWidth ## _t uTmp; \
1024 uint32_t fEflTmp; \
1025 do \
1026 { \
1027 uTmp = uOld; \
1028 fEflTmp = *pfEFlags; \
1029 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
1030 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
1031 *pfEFlags = fEflTmp; \
1032 } while (0)
1033
1034
1035#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
1036 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
1037 uint ## a_cBitsWidth ## _t uSrc, \
1038 uint32_t *pfEFlags)) \
1039 { \
1040 DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
1041 }
1042
1043EMIT_LOCKED_BIN_OP(add, 64)
1044EMIT_LOCKED_BIN_OP(adc, 64)
1045EMIT_LOCKED_BIN_OP(sub, 64)
1046EMIT_LOCKED_BIN_OP(sbb, 64)
1047EMIT_LOCKED_BIN_OP(or, 64)
1048EMIT_LOCKED_BIN_OP(xor, 64)
1049EMIT_LOCKED_BIN_OP(and, 64)
1050# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1051EMIT_LOCKED_BIN_OP(add, 32)
1052EMIT_LOCKED_BIN_OP(adc, 32)
1053EMIT_LOCKED_BIN_OP(sub, 32)
1054EMIT_LOCKED_BIN_OP(sbb, 32)
1055EMIT_LOCKED_BIN_OP(or, 32)
1056EMIT_LOCKED_BIN_OP(xor, 32)
1057EMIT_LOCKED_BIN_OP(and, 32)
1058
1059EMIT_LOCKED_BIN_OP(add, 16)
1060EMIT_LOCKED_BIN_OP(adc, 16)
1061EMIT_LOCKED_BIN_OP(sub, 16)
1062EMIT_LOCKED_BIN_OP(sbb, 16)
1063EMIT_LOCKED_BIN_OP(or, 16)
1064EMIT_LOCKED_BIN_OP(xor, 16)
1065EMIT_LOCKED_BIN_OP(and, 16)
1066
1067EMIT_LOCKED_BIN_OP(add, 8)
1068EMIT_LOCKED_BIN_OP(adc, 8)
1069EMIT_LOCKED_BIN_OP(sub, 8)
1070EMIT_LOCKED_BIN_OP(sbb, 8)
1071EMIT_LOCKED_BIN_OP(or, 8)
1072EMIT_LOCKED_BIN_OP(xor, 8)
1073EMIT_LOCKED_BIN_OP(and, 8)
1074# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1075
1076
1077/*
1078 * Bit operations (same signature as above).
1079 */
1080
1081/*
1082 * BT
1083 */
1084
1085IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1086{
1087 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1088 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1089 Assert(uSrc < 64);
1090 uint64_t uDst = *puDst;
1091 if (uDst & RT_BIT_64(uSrc))
1092 *pfEFlags |= X86_EFL_CF;
1093 else
1094 *pfEFlags &= ~X86_EFL_CF;
1095}
1096
1097# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1098
1099IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1100{
1101 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1102 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1103 Assert(uSrc < 32);
1104 uint32_t uDst = *puDst;
1105 if (uDst & RT_BIT_32(uSrc))
1106 *pfEFlags |= X86_EFL_CF;
1107 else
1108 *pfEFlags &= ~X86_EFL_CF;
1109}
1110
1111IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1112{
1113 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1114 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1115 Assert(uSrc < 16);
1116 uint16_t uDst = *puDst;
1117 if (uDst & RT_BIT_32(uSrc))
1118 *pfEFlags |= X86_EFL_CF;
1119 else
1120 *pfEFlags &= ~X86_EFL_CF;
1121}
1122
1123# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1124
1125/*
1126 * BTC
1127 */
1128
1129IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1130{
1131 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1132 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1133 Assert(uSrc < 64);
1134 uint64_t fMask = RT_BIT_64(uSrc);
1135 uint64_t uDst = *puDst;
1136 if (uDst & fMask)
1137 {
1138 uDst &= ~fMask;
1139 *puDst = uDst;
1140 *pfEFlags |= X86_EFL_CF;
1141 }
1142 else
1143 {
1144 uDst |= fMask;
1145 *puDst = uDst;
1146 *pfEFlags &= ~X86_EFL_CF;
1147 }
1148}
1149
1150# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1151
1152IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1153{
1154 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1155 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1156 Assert(uSrc < 32);
1157 uint32_t fMask = RT_BIT_32(uSrc);
1158 uint32_t uDst = *puDst;
1159 if (uDst & fMask)
1160 {
1161 uDst &= ~fMask;
1162 *puDst = uDst;
1163 *pfEFlags |= X86_EFL_CF;
1164 }
1165 else
1166 {
1167 uDst |= fMask;
1168 *puDst = uDst;
1169 *pfEFlags &= ~X86_EFL_CF;
1170 }
1171}
1172
1173
1174IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1175{
1176 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1177 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1178 Assert(uSrc < 16);
1179 uint16_t fMask = RT_BIT_32(uSrc);
1180 uint16_t uDst = *puDst;
1181 if (uDst & fMask)
1182 {
1183 uDst &= ~fMask;
1184 *puDst = uDst;
1185 *pfEFlags |= X86_EFL_CF;
1186 }
1187 else
1188 {
1189 uDst |= fMask;
1190 *puDst = uDst;
1191 *pfEFlags &= ~X86_EFL_CF;
1192 }
1193}
1194
1195# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1196
1197/*
1198 * BTR
1199 */
1200
1201IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1202{
1203 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1204 logical operation (AND/OR/whatever). */
1205 Assert(uSrc < 64);
1206 uint64_t fMask = RT_BIT_64(uSrc);
1207 uint64_t uDst = *puDst;
1208 if (uDst & fMask)
1209 {
1210 uDst &= ~fMask;
1211 *puDst = uDst;
1212 *pfEFlags |= X86_EFL_CF;
1213 }
1214 else
1215 *pfEFlags &= ~X86_EFL_CF;
1216}
1217
1218# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1219
1220IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1221{
1222 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1223 logical operation (AND/OR/whatever). */
1224 Assert(uSrc < 32);
1225 uint32_t fMask = RT_BIT_32(uSrc);
1226 uint32_t uDst = *puDst;
1227 if (uDst & fMask)
1228 {
1229 uDst &= ~fMask;
1230 *puDst = uDst;
1231 *pfEFlags |= X86_EFL_CF;
1232 }
1233 else
1234 *pfEFlags &= ~X86_EFL_CF;
1235}
1236
1237
1238IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1239{
1240 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1241 logical operation (AND/OR/whatever). */
1242 Assert(uSrc < 16);
1243 uint16_t fMask = RT_BIT_32(uSrc);
1244 uint16_t uDst = *puDst;
1245 if (uDst & fMask)
1246 {
1247 uDst &= ~fMask;
1248 *puDst = uDst;
1249 *pfEFlags |= X86_EFL_CF;
1250 }
1251 else
1252 *pfEFlags &= ~X86_EFL_CF;
1253}
1254
1255# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1256
1257/*
1258 * BTS
1259 */
1260
1261IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1262{
1263 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1264 logical operation (AND/OR/whatever). */
1265 Assert(uSrc < 64);
1266 uint64_t fMask = RT_BIT_64(uSrc);
1267 uint64_t uDst = *puDst;
1268 if (uDst & fMask)
1269 *pfEFlags |= X86_EFL_CF;
1270 else
1271 {
1272 uDst |= fMask;
1273 *puDst = uDst;
1274 *pfEFlags &= ~X86_EFL_CF;
1275 }
1276}
1277
1278# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1279
1280IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1281{
1282 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1283 logical operation (AND/OR/whatever). */
1284 Assert(uSrc < 32);
1285 uint32_t fMask = RT_BIT_32(uSrc);
1286 uint32_t uDst = *puDst;
1287 if (uDst & fMask)
1288 *pfEFlags |= X86_EFL_CF;
1289 else
1290 {
1291 uDst |= fMask;
1292 *puDst = uDst;
1293 *pfEFlags &= ~X86_EFL_CF;
1294 }
1295}
1296
1297
1298IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1299{
1300 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1301 logical operation (AND/OR/whatever). */
1302 Assert(uSrc < 16);
1303 uint16_t fMask = RT_BIT_32(uSrc);
1304 uint32_t uDst = *puDst;
1305 if (uDst & fMask)
1306 *pfEFlags |= X86_EFL_CF;
1307 else
1308 {
1309 uDst |= fMask;
1310 *puDst = uDst;
1311 *pfEFlags &= ~X86_EFL_CF;
1312 }
1313}
1314
1315# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1316
1317
1318EMIT_LOCKED_BIN_OP(btc, 64)
1319EMIT_LOCKED_BIN_OP(btr, 64)
1320EMIT_LOCKED_BIN_OP(bts, 64)
1321# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1322EMIT_LOCKED_BIN_OP(btc, 32)
1323EMIT_LOCKED_BIN_OP(btr, 32)
1324EMIT_LOCKED_BIN_OP(bts, 32)
1325
1326EMIT_LOCKED_BIN_OP(btc, 16)
1327EMIT_LOCKED_BIN_OP(btr, 16)
1328EMIT_LOCKED_BIN_OP(bts, 16)
1329# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1330
1331
1332/*
1333 * Helpers for BSR and BSF.
1334 *
1335 * Note! "undefined" flags: OF, SF, AF, PF, CF.
1336 * Intel behavior modelled on 10980xe, AMD on 3990X. Other marchs may
1337 * produce different result (see https://www.sandpile.org/x86/flags.htm),
1338 * but we restrict ourselves to emulating these recent marchs.
1339 */
1340#define SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlag, a_iBit) do { \
1341 unsigned iBit = (a_iBit); \
1342 uint32_t fEfl = *pfEFlags & ~(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF); \
1343 if (iBit) \
1344 { \
1345 *puDst = --iBit; \
1346 fEfl |= g_afParity[iBit]; \
1347 } \
1348 else \
1349 fEfl |= X86_EFL_ZF | X86_EFL_PF; \
1350 *pfEFlags = fEfl; \
1351 } while (0)
1352#define SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlag, a_iBit) do { \
1353 unsigned const iBit = (a_iBit); \
1354 if (iBit) \
1355 { \
1356 *puDst = iBit - 1; \
1357 *pfEFlags &= ~X86_EFL_ZF; \
1358 } \
1359 else \
1360 *pfEFlags |= X86_EFL_ZF; \
1361 } while (0)
1362
1363
1364/*
1365 * BSF - first (least significant) bit set
1366 */
1367IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1368{
1369 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1370}
1371
1372IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1373{
1374 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1375}
1376
1377IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1378{
1379 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1380}
1381
1382# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1383
1384IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1385{
1386 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1387}
1388
1389IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1390{
1391 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1392}
1393
1394IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1395{
1396 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1397}
1398
1399
1400IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1401{
1402 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1403}
1404
1405IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1406{
1407 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1408}
1409
1410IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1411{
1412 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1413}
1414
1415# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1416
1417
1418/*
1419 * BSR - last (most significant) bit set
1420 */
1421IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1422{
1423 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1424}
1425
1426IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1427{
1428 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1429}
1430
1431IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1432{
1433 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1434}
1435
1436# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1437
1438IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1439{
1440 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1441}
1442
1443IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1444{
1445 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1446}
1447
1448IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1449{
1450 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1451}
1452
1453
1454IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1455{
1456 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1457}
1458
1459IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1460{
1461 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1462}
1463
1464IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1465{
1466 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1467}
1468
1469# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1470
1471
1472/*
1473 * XCHG
1474 */
1475
1476IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_locked,(uint64_t *puMem, uint64_t *puReg))
1477{
1478#if ARCH_BITS >= 64
1479 *puReg = ASMAtomicXchgU64(puMem, *puReg);
1480#else
1481 uint64_t uOldMem = *puMem;
1482 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1483 ASMNopPause();
1484 *puReg = uOldMem;
1485#endif
1486}
1487
1488# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1489
1490IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_locked,(uint32_t *puMem, uint32_t *puReg))
1491{
1492 *puReg = ASMAtomicXchgU32(puMem, *puReg);
1493}
1494
1495
1496IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_locked,(uint16_t *puMem, uint16_t *puReg))
1497{
1498 *puReg = ASMAtomicXchgU16(puMem, *puReg);
1499}
1500
1501
1502IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_locked,(uint8_t *puMem, uint8_t *puReg))
1503{
1504 *puReg = ASMAtomicXchgU8(puMem, *puReg);
1505}
1506
1507# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1508
1509
1510/* Unlocked variants for fDisregardLock mode: */
1511
1512IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_unlocked,(uint64_t *puMem, uint64_t *puReg))
1513{
1514 uint64_t const uOld = *puMem;
1515 *puMem = *puReg;
1516 *puReg = uOld;
1517}
1518
1519# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1520
1521IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_unlocked,(uint32_t *puMem, uint32_t *puReg))
1522{
1523 uint32_t const uOld = *puMem;
1524 *puMem = *puReg;
1525 *puReg = uOld;
1526}
1527
1528
1529IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_unlocked,(uint16_t *puMem, uint16_t *puReg))
1530{
1531 uint16_t const uOld = *puMem;
1532 *puMem = *puReg;
1533 *puReg = uOld;
1534}
1535
1536
1537IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_unlocked,(uint8_t *puMem, uint8_t *puReg))
1538{
1539 uint8_t const uOld = *puMem;
1540 *puMem = *puReg;
1541 *puReg = uOld;
1542}
1543
1544# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1545
1546
1547/*
1548 * XADD and LOCK XADD.
1549 */
1550#define EMIT_XADD(a_cBitsWidth, a_Type) \
1551IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1552{ \
1553 a_Type uDst = *puDst; \
1554 a_Type uResult = uDst; \
1555 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, pfEFlags); \
1556 *puDst = uResult; \
1557 *puReg = uDst; \
1558} \
1559\
1560IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth ## _locked,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1561{ \
1562 a_Type uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
1563 a_Type uResult; \
1564 uint32_t fEflTmp; \
1565 do \
1566 { \
1567 uResult = uOld; \
1568 fEflTmp = *pfEFlags; \
1569 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, &fEflTmp); \
1570 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uResult, uOld, &uOld)); \
1571 *puReg = uOld; \
1572 *pfEFlags = fEflTmp; \
1573}
1574EMIT_XADD(64, uint64_t)
1575# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1576EMIT_XADD(32, uint32_t)
1577EMIT_XADD(16, uint16_t)
1578EMIT_XADD(8, uint8_t)
1579# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1580
1581#endif
1582
1583/*
1584 * CMPXCHG, CMPXCHG8B, CMPXCHG16B
1585 *
1586 * Note! We don't have non-locking/atomic cmpxchg primitives, so all cmpxchg
1587 * instructions are emulated as locked.
1588 */
1589#if defined(IEM_WITHOUT_ASSEMBLY)
1590
1591IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8_locked, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1592{
1593 uint8_t uOld = *puAl;
1594 if (ASMAtomicCmpXchgExU8(pu8Dst, uSrcReg, uOld, puAl))
1595 Assert(*puAl == uOld);
1596 iemAImpl_cmp_u8(&uOld, *puAl, pEFlags);
1597}
1598
1599
1600IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16_locked,(uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1601{
1602 uint16_t uOld = *puAx;
1603 if (ASMAtomicCmpXchgExU16(pu16Dst, uSrcReg, uOld, puAx))
1604 Assert(*puAx == uOld);
1605 iemAImpl_cmp_u16(&uOld, *puAx, pEFlags);
1606}
1607
1608
1609IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32_locked,(uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1610{
1611 uint32_t uOld = *puEax;
1612 if (ASMAtomicCmpXchgExU32(pu32Dst, uSrcReg, uOld, puEax))
1613 Assert(*puEax == uOld);
1614 iemAImpl_cmp_u32(&uOld, *puEax, pEFlags);
1615}
1616
1617
1618# if ARCH_BITS == 32
1619IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1620# else
1621IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1622# endif
1623{
1624# if ARCH_BITS == 32
1625 uint64_t const uSrcReg = *puSrcReg;
1626# endif
1627 uint64_t uOld = *puRax;
1628 if (ASMAtomicCmpXchgExU64(pu64Dst, uSrcReg, uOld, puRax))
1629 Assert(*puRax == uOld);
1630 iemAImpl_cmp_u64(&uOld, *puRax, pEFlags);
1631}
1632
1633
1634IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b_locked,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
1635 uint32_t *pEFlags))
1636{
1637 uint64_t const uNew = pu64EbxEcx->u;
1638 uint64_t const uOld = pu64EaxEdx->u;
1639 if (ASMAtomicCmpXchgExU64(pu64Dst, uNew, uOld, &pu64EaxEdx->u))
1640 {
1641 Assert(pu64EaxEdx->u == uOld);
1642 *pEFlags |= X86_EFL_ZF;
1643 }
1644 else
1645 *pEFlags &= ~X86_EFL_ZF;
1646}
1647
1648
1649# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
1650IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1651 uint32_t *pEFlags))
1652{
1653# ifdef VBOX_STRICT
1654 RTUINT128U const uOld = *pu128RaxRdx;
1655# endif
1656# if defined(RT_ARCH_AMD64)
1657 if (ASMAtomicCmpXchgU128v2(&pu128Dst->u, pu128RbxRcx->s.Hi, pu128RbxRcx->s.Lo, pu128RaxRdx->s.Hi, pu128RaxRdx->s.Lo,
1658 &pu128RaxRdx->u))
1659# else
1660 if (ASMAtomicCmpXchgU128(&pu128Dst->u, pu128RbxRcx->u, pu128RaxRdx->u, &pu128RaxRdx->u))
1661# endif
1662 {
1663 Assert(pu128RaxRdx->s.Lo == uOld.s.Lo && pu128RaxRdx->s.Hi == uOld.s.Hi);
1664 *pEFlags |= X86_EFL_ZF;
1665 }
1666 else
1667 *pEFlags &= ~X86_EFL_ZF;
1668}
1669# endif
1670
1671#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1672
1673# if !defined(RT_ARCH_ARM64) /** @todo may need this for unaligned accesses... */
1674IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
1675 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
1676{
1677 RTUINT128U u128Tmp = *pu128Dst;
1678 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
1679 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
1680 {
1681 *pu128Dst = *pu128RbxRcx;
1682 *pEFlags |= X86_EFL_ZF;
1683 }
1684 else
1685 {
1686 *pu128RaxRdx = u128Tmp;
1687 *pEFlags &= ~X86_EFL_ZF;
1688 }
1689}
1690#endif /* !RT_ARCH_ARM64 */
1691
1692#if defined(IEM_WITHOUT_ASSEMBLY)
1693
1694/* Unlocked versions mapped to the locked ones: */
1695
1696IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1697{
1698 iemAImpl_cmpxchg_u8_locked(pu8Dst, puAl, uSrcReg, pEFlags);
1699}
1700
1701
1702IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16, (uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1703{
1704 iemAImpl_cmpxchg_u16_locked(pu16Dst, puAx, uSrcReg, pEFlags);
1705}
1706
1707
1708IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32, (uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1709{
1710 iemAImpl_cmpxchg_u32_locked(pu32Dst, puEax, uSrcReg, pEFlags);
1711}
1712
1713
1714# if ARCH_BITS == 32
1715IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1716{
1717 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, puSrcReg, pEFlags);
1718}
1719# else
1720IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1721{
1722 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, uSrcReg, pEFlags);
1723}
1724# endif
1725
1726
1727IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx, uint32_t *pEFlags))
1728{
1729 iemAImpl_cmpxchg8b_locked(pu64Dst, pu64EaxEdx, pu64EbxEcx, pEFlags);
1730}
1731
1732
1733IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1734 uint32_t *pEFlags))
1735{
1736 iemAImpl_cmpxchg16b_locked(pu128Dst, pu128RaxRdx, pu128RbxRcx, pEFlags);
1737}
1738
1739#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1740
1741#if (!defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)) \
1742 && !defined(DOXYGEN_RUNNING) /* Doxygen has some groking issues here and ends up mixing up input. Not worth tracking down now. */
1743
1744/*
1745 * MUL, IMUL, DIV and IDIV helpers.
1746 *
1747 * - The U64 versions must use 128-bit intermediates, so we need to abstract the
1748 * division step so we can select between using C operators and
1749 * RTUInt128DivRem/RTUInt128MulU64ByU64.
1750 *
1751 * - The U8 versions work returns output in AL + AH instead of xDX + xAX, with the
1752 * IDIV/DIV taking all the input in AX too. This means we have to abstract some
1753 * input loads and the result storing.
1754 */
1755
1756DECLINLINE(void) RTUInt128DivRemByU64(PRTUINT128U pQuotient, PRTUINT128U pRemainder, PCRTUINT128U pDividend, uint64_t u64Divisor)
1757{
1758# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1759 pQuotient->s.Lo = 0;
1760 pQuotient->s.Hi = 0;
1761# endif
1762 RTUINT128U Divisor;
1763 Divisor.s.Lo = u64Divisor;
1764 Divisor.s.Hi = 0;
1765 RTUInt128DivRem(pQuotient, pRemainder, pDividend, &Divisor);
1766}
1767
1768# define DIV_LOAD(a_Dividend) \
1769 a_Dividend.s.Lo = *puA, a_Dividend.s.Hi = *puD
1770# define DIV_LOAD_U8(a_Dividend) \
1771 a_Dividend.u = *puAX
1772
1773# define DIV_STORE(a_Quotient, a_uReminder) *puA = (a_Quotient), *puD = (a_uReminder)
1774# define DIV_STORE_U8(a_Quotient, a_uReminder) *puAX = (uint8_t)(a_Quotient) | ((uint16_t)(a_uReminder) << 8)
1775
1776# define MUL_LOAD_F1() *puA
1777# define MUL_LOAD_F1_U8() ((uint8_t)*puAX)
1778
1779# define MUL_STORE(a_Result) *puA = (a_Result).s.Lo, *puD = (a_Result).s.Hi
1780# define MUL_STORE_U8(a_Result) *puAX = a_Result.u
1781
1782# define MULDIV_NEG(a_Value, a_cBitsWidth2x) \
1783 (a_Value).u = UINT ## a_cBitsWidth2x ## _C(0) - (a_Value).u
1784# define MULDIV_NEG_U128(a_Value, a_cBitsWidth2x) \
1785 RTUInt128AssignNeg(&(a_Value))
1786
1787# define MULDIV_MUL(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1788 (a_Result).u = (uint ## a_cBitsWidth2x ## _t)(a_Factor1) * (a_Factor2)
1789# define MULDIV_MUL_U128(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1790 RTUInt128MulU64ByU64(&(a_Result), a_Factor1, a_Factor2);
1791
1792# define MULDIV_MODDIV(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1793 a_Quotient.u = (a_Dividend).u / (a_uDivisor), \
1794 a_Remainder.u = (a_Dividend).u % (a_uDivisor)
1795# define MULDIV_MODDIV_U128(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1796 RTUInt128DivRemByU64(&a_Quotient, &a_Remainder, &a_Dividend, a_uDivisor)
1797
1798
1799/*
1800 * MUL
1801 */
1802# define EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, a_Suffix, a_fIntelFlags) \
1803IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_mul_u,a_cBitsWidth,a_Suffix), a_Args) \
1804{ \
1805 RTUINT ## a_cBitsWidth2x ## U Result; \
1806 a_fnMul(Result, a_fnLoadF1(), uFactor, a_cBitsWidth2x); \
1807 a_fnStore(Result); \
1808 \
1809 /* Calc EFLAGS: */ \
1810 uint32_t fEfl = *pfEFlags; \
1811 if (a_fIntelFlags) \
1812 { /* Intel: 6700K and 10980XE behavior */ \
1813 fEfl &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF); \
1814 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1815 fEfl |= X86_EFL_SF; \
1816 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1817 if (Result.s.Hi != 0) \
1818 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1819 } \
1820 else \
1821 { /* AMD: 3990X */ \
1822 if (Result.s.Hi != 0) \
1823 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1824 else \
1825 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
1826 } \
1827 *pfEFlags = fEfl; \
1828 return 0; \
1829} \
1830
1831# define EMIT_MUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul) \
1832 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, RT_NOTHING, 1) \
1833 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _intel, 1) \
1834 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _amd, 0) \
1835
1836# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1837EMIT_MUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1838 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL_U128)
1839# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1840EMIT_MUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1841 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1842EMIT_MUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1843 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1844EMIT_MUL(8, 16, (uint16_t *puAX, uint8_t uFactor, uint32_t *pfEFlags), (puAX, uFactor, pfEFlags),
1845 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_MUL)
1846# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1847# endif /* !DOXYGEN_RUNNING */
1848
1849
1850/*
1851 * IMUL
1852 *
1853 * The SF, ZF, AF and PF flags are "undefined". AMD (3990x) leaves these
1854 * flags as is. Whereas Intel skylake (6700K and 10980X (Cascade Lake)) always
1855 * clear AF and ZF and calculates SF and PF as per the lower half of the result.
1856 */
1857# define EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, \
1858 a_Suffix, a_fIntelFlags) \
1859IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_imul_u,a_cBitsWidth,a_Suffix),a_Args) \
1860{ \
1861 RTUINT ## a_cBitsWidth2x ## U Result; \
1862 uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF | X86_EFL_OF); \
1863 \
1864 uint ## a_cBitsWidth ## _t const uFactor1 = a_fnLoadF1(); \
1865 if (!(uFactor1 & RT_BIT_64(a_cBitsWidth - 1))) \
1866 { \
1867 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1868 { \
1869 a_fnMul(Result, uFactor1, uFactor2, a_cBitsWidth2x); \
1870 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1871 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1872 } \
1873 else \
1874 { \
1875 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1876 a_fnMul(Result, uFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1877 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1878 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1879 a_fnNeg(Result, a_cBitsWidth2x); \
1880 } \
1881 } \
1882 else \
1883 { \
1884 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1885 { \
1886 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1887 a_fnMul(Result, uPositiveFactor1, uFactor2, a_cBitsWidth2x); \
1888 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1889 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1890 a_fnNeg(Result, a_cBitsWidth2x); \
1891 } \
1892 else \
1893 { \
1894 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1895 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1896 a_fnMul(Result, uPositiveFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1897 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1898 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1899 } \
1900 } \
1901 a_fnStore(Result); \
1902 \
1903 if (a_fIntelFlags) \
1904 { \
1905 fEfl &= ~(X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_PF); \
1906 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1907 fEfl |= X86_EFL_SF; \
1908 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1909 } \
1910 *pfEFlags = fEfl; \
1911 return 0; \
1912}
1913# define EMIT_IMUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul) \
1914 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, RT_NOTHING, 1) \
1915 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _intel, 1) \
1916 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _amd, 0)
1917
1918# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1919EMIT_IMUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1920 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG_U128, MULDIV_MUL_U128)
1921# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1922EMIT_IMUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1923 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1924EMIT_IMUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1925 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1926EMIT_IMUL(8, 16, (uint16_t *puAX, uint8_t uFactor2, uint32_t *pfEFlags), (puAX, uFactor2, pfEFlags),
1927 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_NEG, MULDIV_MUL)
1928# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1929# endif /* !DOXYGEN_RUNNING */
1930
1931
1932/*
1933 * IMUL with two operands are mapped onto the three operand variant, ignoring
1934 * the high part of the product.
1935 */
1936# define EMIT_IMUL_TWO(a_cBits, a_uType) \
1937IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1938{ \
1939 a_uType uIgn; \
1940 iemAImpl_imul_u ## a_cBits(puDst, &uIgn, uSrc, pfEFlags); \
1941} \
1942\
1943IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _intel,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1944{ \
1945 a_uType uIgn; \
1946 iemAImpl_imul_u ## a_cBits ## _intel(puDst, &uIgn, uSrc, pfEFlags); \
1947} \
1948\
1949IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _amd,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1950{ \
1951 a_uType uIgn; \
1952 iemAImpl_imul_u ## a_cBits ## _amd(puDst, &uIgn, uSrc, pfEFlags); \
1953}
1954
1955EMIT_IMUL_TWO(64, uint64_t)
1956# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1957EMIT_IMUL_TWO(32, uint32_t)
1958EMIT_IMUL_TWO(16, uint16_t)
1959# endif
1960
1961
1962/*
1963 * DIV
1964 */
1965# define EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, \
1966 a_Suffix, a_fIntelFlags) \
1967IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_div_u,a_cBitsWidth,a_Suffix),a_Args) \
1968{ \
1969 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1970 a_fnLoad(Dividend); \
1971 if ( uDivisor != 0 \
1972 && Dividend.s.Hi < uDivisor) \
1973 { \
1974 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1975 a_fnDivRem(Quotient, Remainder, Dividend, uDivisor); \
1976 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1977 \
1978 /* Calc EFLAGS: Intel 6700K and 10980XE leaves them alone. AMD 3990X sets AF and clears PF, ZF and SF. */ \
1979 if (!a_fIntelFlags) \
1980 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1981 return 0; \
1982 } \
1983 /* #DE */ \
1984 return -1; \
1985}
1986# define EMIT_DIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem) \
1987 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, RT_NOTHING, 1) \
1988 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _intel, 1) \
1989 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _amd, 0)
1990
1991# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1992EMIT_DIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1993 DIV_LOAD, DIV_STORE, MULDIV_MODDIV_U128)
1994# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1995EMIT_DIV(32,64, (uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1996 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1997EMIT_DIV(16,32, (uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1998 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1999EMIT_DIV(8,16, (uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
2000 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_MODDIV)
2001# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2002# endif /* !DOXYGEN_RUNNING */
2003
2004
2005/*
2006 * IDIV
2007 *
2008 * EFLAGS are ignored and left as-is by Intel 6700K and 10980XE. AMD 3990X will
2009 * set AF and clear PF, ZF and SF just like it does for DIV.
2010 *
2011 */
2012# define EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, \
2013 a_Suffix, a_fIntelFlags) \
2014IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_idiv_u,a_cBitsWidth,a_Suffix),a_Args) \
2015{ \
2016 /* Note! Skylake leaves all flags alone. */ \
2017 \
2018 /** @todo overflow checks */ \
2019 if (uDivisor != 0) \
2020 { \
2021 /* \
2022 * Convert to unsigned division. \
2023 */ \
2024 RTUINT ## a_cBitsWidth2x ## U Dividend; \
2025 a_fnLoad(Dividend); \
2026 bool const fSignedDividend = RT_BOOL(Dividend.s.Hi & RT_BIT_64(a_cBitsWidth - 1)); \
2027 if (fSignedDividend) \
2028 a_fnNeg(Dividend, a_cBitsWidth2x); \
2029 \
2030 uint ## a_cBitsWidth ## _t uDivisorPositive; \
2031 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
2032 uDivisorPositive = uDivisor; \
2033 else \
2034 uDivisorPositive = UINT ## a_cBitsWidth ## _C(0) - uDivisor; \
2035 \
2036 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
2037 a_fnDivRem(Quotient, Remainder, Dividend, uDivisorPositive); \
2038 \
2039 /* \
2040 * Setup the result, checking for overflows. \
2041 */ \
2042 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
2043 { \
2044 if (!fSignedDividend) \
2045 { \
2046 /* Positive divisor, positive dividend => result positive. */ \
2047 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
2048 { \
2049 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
2050 if (!a_fIntelFlags) \
2051 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2052 return 0; \
2053 } \
2054 } \
2055 else \
2056 { \
2057 /* Positive divisor, negative dividend => result negative. */ \
2058 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
2059 { \
2060 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
2061 if (!a_fIntelFlags) \
2062 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2063 return 0; \
2064 } \
2065 } \
2066 } \
2067 else \
2068 { \
2069 if (!fSignedDividend) \
2070 { \
2071 /* Negative divisor, positive dividend => negative quotient, positive remainder. */ \
2072 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
2073 { \
2074 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, Remainder.s.Lo); \
2075 if (!a_fIntelFlags) \
2076 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2077 return 0; \
2078 } \
2079 } \
2080 else \
2081 { \
2082 /* Negative divisor, negative dividend => positive quotient, negative remainder. */ \
2083 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
2084 { \
2085 a_fnStore(Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
2086 if (!a_fIntelFlags) \
2087 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2088 return 0; \
2089 } \
2090 } \
2091 } \
2092 } \
2093 /* #DE */ \
2094 return -1; \
2095}
2096# define EMIT_IDIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem) \
2097 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, RT_NOTHING, 1) \
2098 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _intel, 1) \
2099 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _amd, 0)
2100
2101# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
2102EMIT_IDIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2103 DIV_LOAD, DIV_STORE, MULDIV_NEG_U128, MULDIV_MODDIV_U128)
2104# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2105EMIT_IDIV(32,64,(uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2106 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
2107EMIT_IDIV(16,32,(uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2108 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
2109EMIT_IDIV(8,16,(uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
2110 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_NEG, MULDIV_MODDIV)
2111# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2112# endif /* !DOXYGEN_RUNNING */
2113
2114#endif /* (!defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)) && !defined(DOXYGEN_RUNNING) */
2115
2116
2117/*********************************************************************************************************************************
2118* Unary operations. *
2119*********************************************************************************************************************************/
2120#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2121
2122/** @def IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC
2123 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an INC or DEC instruction.
2124 *
2125 * CF is NOT modified for hysterical raisins (allegedly for carrying and
2126 * borrowing in arithmetic loops on intel 8008).
2127 *
2128 * @returns Status bits.
2129 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2130 * @param a_uResult Unsigned result value.
2131 * @param a_uDst The original destination value (for AF calc).
2132 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2133 * @param a_OfMethod 0 for INC-style, 1 for DEC-style.
2134 */
2135#define IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth, a_OfMethod) \
2136 do { \
2137 uint32_t fEflTmp = *(a_pfEFlags); \
2138 fEflTmp &= ~X86_EFL_STATUS_BITS | X86_EFL_CF; \
2139 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2140 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2141 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2142 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2143 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth(a_OfMethod == 0 ? (((a_uDst) ^ RT_BIT_64(a_cBitsWidth - 1)) & (a_uResult)) \
2144 : ((a_uDst) & ((a_uResult) ^ RT_BIT_64(a_cBitsWidth - 1))) ); \
2145 *(a_pfEFlags) = fEflTmp; \
2146 } while (0)
2147
2148/*
2149 * INC
2150 */
2151
2152IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2153{
2154 uint64_t uDst = *puDst;
2155 uint64_t uResult = uDst + 1;
2156 *puDst = uResult;
2157 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 0 /*INC*/);
2158}
2159
2160# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2161
2162IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2163{
2164 uint32_t uDst = *puDst;
2165 uint32_t uResult = uDst + 1;
2166 *puDst = uResult;
2167 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 0 /*INC*/);
2168}
2169
2170
2171IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2172{
2173 uint16_t uDst = *puDst;
2174 uint16_t uResult = uDst + 1;
2175 *puDst = uResult;
2176 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 0 /*INC*/);
2177}
2178
2179IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2180{
2181 uint8_t uDst = *puDst;
2182 uint8_t uResult = uDst + 1;
2183 *puDst = uResult;
2184 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 0 /*INC*/);
2185}
2186
2187# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2188
2189
2190/*
2191 * DEC
2192 */
2193
2194IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2195{
2196 uint64_t uDst = *puDst;
2197 uint64_t uResult = uDst - 1;
2198 *puDst = uResult;
2199 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 1 /*INC*/);
2200}
2201
2202# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2203
2204IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2205{
2206 uint32_t uDst = *puDst;
2207 uint32_t uResult = uDst - 1;
2208 *puDst = uResult;
2209 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 1 /*INC*/);
2210}
2211
2212
2213IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2214{
2215 uint16_t uDst = *puDst;
2216 uint16_t uResult = uDst - 1;
2217 *puDst = uResult;
2218 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 1 /*INC*/);
2219}
2220
2221
2222IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2223{
2224 uint8_t uDst = *puDst;
2225 uint8_t uResult = uDst - 1;
2226 *puDst = uResult;
2227 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 1 /*INC*/);
2228}
2229
2230# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2231
2232
2233/*
2234 * NOT
2235 */
2236
2237IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2238{
2239 uint64_t uDst = *puDst;
2240 uint64_t uResult = ~uDst;
2241 *puDst = uResult;
2242 /* EFLAGS are not modified. */
2243 RT_NOREF_PV(pfEFlags);
2244}
2245
2246# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2247
2248IEM_DECL_IMPL_DEF(void, iemAImpl_not_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2249{
2250 uint32_t uDst = *puDst;
2251 uint32_t uResult = ~uDst;
2252 *puDst = uResult;
2253 /* EFLAGS are not modified. */
2254 RT_NOREF_PV(pfEFlags);
2255}
2256
2257IEM_DECL_IMPL_DEF(void, iemAImpl_not_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2258{
2259 uint16_t uDst = *puDst;
2260 uint16_t uResult = ~uDst;
2261 *puDst = uResult;
2262 /* EFLAGS are not modified. */
2263 RT_NOREF_PV(pfEFlags);
2264}
2265
2266IEM_DECL_IMPL_DEF(void, iemAImpl_not_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2267{
2268 uint8_t uDst = *puDst;
2269 uint8_t uResult = ~uDst;
2270 *puDst = uResult;
2271 /* EFLAGS are not modified. */
2272 RT_NOREF_PV(pfEFlags);
2273}
2274
2275# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2276
2277
2278/*
2279 * NEG
2280 */
2281
2282/**
2283 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an NEG instruction.
2284 *
2285 * @returns Status bits.
2286 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2287 * @param a_uResult Unsigned result value.
2288 * @param a_uDst The original destination value (for AF calc).
2289 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2290 */
2291#define IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth) \
2292 do { \
2293 uint32_t fEflTmp = *(a_pfEFlags); \
2294 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2295 fEflTmp |= ((a_uDst) != 0) << X86_EFL_CF_BIT; \
2296 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2297 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2298 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2299 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2300 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth((a_uDst) & (a_uResult)); \
2301 *(a_pfEFlags) = fEflTmp; \
2302 } while (0)
2303
2304IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2305{
2306 uint64_t uDst = *puDst;
2307 uint64_t uResult = (uint64_t)0 - uDst;
2308 *puDst = uResult;
2309 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 64);
2310}
2311
2312# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2313
2314IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2315{
2316 uint32_t uDst = *puDst;
2317 uint32_t uResult = (uint32_t)0 - uDst;
2318 *puDst = uResult;
2319 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 32);
2320}
2321
2322
2323IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2324{
2325 uint16_t uDst = *puDst;
2326 uint16_t uResult = (uint16_t)0 - uDst;
2327 *puDst = uResult;
2328 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 16);
2329}
2330
2331
2332IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2333{
2334 uint8_t uDst = *puDst;
2335 uint8_t uResult = (uint8_t)0 - uDst;
2336 *puDst = uResult;
2337 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 8);
2338}
2339
2340# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2341
2342/*
2343 * Locked variants.
2344 */
2345
2346/** Emit a function for doing a locked unary operand operation. */
2347# define EMIT_LOCKED_UNARY_OP(a_Mnemonic, a_cBitsWidth) \
2348 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
2349 uint32_t *pfEFlags)) \
2350 { \
2351 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
2352 uint ## a_cBitsWidth ## _t uTmp; \
2353 uint32_t fEflTmp; \
2354 do \
2355 { \
2356 uTmp = uOld; \
2357 fEflTmp = *pfEFlags; \
2358 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, &fEflTmp); \
2359 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
2360 *pfEFlags = fEflTmp; \
2361 }
2362
2363EMIT_LOCKED_UNARY_OP(inc, 64)
2364EMIT_LOCKED_UNARY_OP(dec, 64)
2365EMIT_LOCKED_UNARY_OP(not, 64)
2366EMIT_LOCKED_UNARY_OP(neg, 64)
2367# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2368EMIT_LOCKED_UNARY_OP(inc, 32)
2369EMIT_LOCKED_UNARY_OP(dec, 32)
2370EMIT_LOCKED_UNARY_OP(not, 32)
2371EMIT_LOCKED_UNARY_OP(neg, 32)
2372
2373EMIT_LOCKED_UNARY_OP(inc, 16)
2374EMIT_LOCKED_UNARY_OP(dec, 16)
2375EMIT_LOCKED_UNARY_OP(not, 16)
2376EMIT_LOCKED_UNARY_OP(neg, 16)
2377
2378EMIT_LOCKED_UNARY_OP(inc, 8)
2379EMIT_LOCKED_UNARY_OP(dec, 8)
2380EMIT_LOCKED_UNARY_OP(not, 8)
2381EMIT_LOCKED_UNARY_OP(neg, 8)
2382# endif
2383
2384#endif /* !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) */
2385
2386
2387/*********************************************************************************************************************************
2388* Shifting and Rotating *
2389*********************************************************************************************************************************/
2390
2391/*
2392 * ROL
2393 */
2394#define EMIT_ROL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2395IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rol_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2396{ \
2397 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2398 if (cShift) \
2399 { \
2400 if (a_cBitsWidth < 32) \
2401 cShift &= a_cBitsWidth - 1; \
2402 a_uType const uDst = *puDst; \
2403 a_uType const uResult = a_fnHlp(uDst, cShift); \
2404 *puDst = uResult; \
2405 \
2406 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2407 it the same way as for 1 bit shifts. */ \
2408 AssertCompile(X86_EFL_CF_BIT == 0); \
2409 uint32_t fEfl = *pfEFlags; \
2410 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2411 uint32_t const fCarry = (uResult & X86_EFL_CF); \
2412 fEfl |= fCarry; \
2413 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2414 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2415 else /* Intel 10980XE: According to the first sub-shift: */ \
2416 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2417 *pfEFlags = fEfl; \
2418 } \
2419}
2420
2421#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2422EMIT_ROL(64, uint64_t, RT_NOTHING, 1, ASMRotateLeftU64)
2423#endif
2424EMIT_ROL(64, uint64_t, _intel, 1, ASMRotateLeftU64)
2425EMIT_ROL(64, uint64_t, _amd, 0, ASMRotateLeftU64)
2426
2427#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2428EMIT_ROL(32, uint32_t, RT_NOTHING, 1, ASMRotateLeftU32)
2429#endif
2430EMIT_ROL(32, uint32_t, _intel, 1, ASMRotateLeftU32)
2431EMIT_ROL(32, uint32_t, _amd, 0, ASMRotateLeftU32)
2432
2433DECL_FORCE_INLINE(uint16_t) iemAImpl_rol_u16_hlp(uint16_t uValue, uint8_t cShift)
2434{
2435 return (uValue << cShift) | (uValue >> (16 - cShift));
2436}
2437#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2438EMIT_ROL(16, uint16_t, RT_NOTHING, 1, iemAImpl_rol_u16_hlp)
2439#endif
2440EMIT_ROL(16, uint16_t, _intel, 1, iemAImpl_rol_u16_hlp)
2441EMIT_ROL(16, uint16_t, _amd, 0, iemAImpl_rol_u16_hlp)
2442
2443DECL_FORCE_INLINE(uint8_t) iemAImpl_rol_u8_hlp(uint8_t uValue, uint8_t cShift)
2444{
2445 return (uValue << cShift) | (uValue >> (8 - cShift));
2446}
2447#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2448EMIT_ROL(8, uint8_t, RT_NOTHING, 1, iemAImpl_rol_u8_hlp)
2449#endif
2450EMIT_ROL(8, uint8_t, _intel, 1, iemAImpl_rol_u8_hlp)
2451EMIT_ROL(8, uint8_t, _amd, 0, iemAImpl_rol_u8_hlp)
2452
2453
2454/*
2455 * ROR
2456 */
2457#define EMIT_ROR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2458IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_ror_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2459{ \
2460 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2461 if (cShift) \
2462 { \
2463 if (a_cBitsWidth < 32) \
2464 cShift &= a_cBitsWidth - 1; \
2465 a_uType const uDst = *puDst; \
2466 a_uType const uResult = a_fnHlp(uDst, cShift); \
2467 *puDst = uResult; \
2468 \
2469 /* Calc EFLAGS: */ \
2470 AssertCompile(X86_EFL_CF_BIT == 0); \
2471 uint32_t fEfl = *pfEFlags; \
2472 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2473 uint32_t const fCarry = (uResult >> ((a_cBitsWidth) - 1)) & X86_EFL_CF; \
2474 fEfl |= fCarry; \
2475 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2476 fEfl |= (((uResult >> ((a_cBitsWidth) - 2)) ^ fCarry) & 1) << X86_EFL_OF_BIT; \
2477 else /* Intel 10980XE: According to the first sub-shift: */ \
2478 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << (a_cBitsWidth - 1))); \
2479 *pfEFlags = fEfl; \
2480 } \
2481}
2482
2483#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2484EMIT_ROR(64, uint64_t, RT_NOTHING, 1, ASMRotateRightU64)
2485#endif
2486EMIT_ROR(64, uint64_t, _intel, 1, ASMRotateRightU64)
2487EMIT_ROR(64, uint64_t, _amd, 0, ASMRotateRightU64)
2488
2489#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2490EMIT_ROR(32, uint32_t, RT_NOTHING, 1, ASMRotateRightU32)
2491#endif
2492EMIT_ROR(32, uint32_t, _intel, 1, ASMRotateRightU32)
2493EMIT_ROR(32, uint32_t, _amd, 0, ASMRotateRightU32)
2494
2495DECL_FORCE_INLINE(uint16_t) iemAImpl_ror_u16_hlp(uint16_t uValue, uint8_t cShift)
2496{
2497 return (uValue >> cShift) | (uValue << (16 - cShift));
2498}
2499#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2500EMIT_ROR(16, uint16_t, RT_NOTHING, 1, iemAImpl_ror_u16_hlp)
2501#endif
2502EMIT_ROR(16, uint16_t, _intel, 1, iemAImpl_ror_u16_hlp)
2503EMIT_ROR(16, uint16_t, _amd, 0, iemAImpl_ror_u16_hlp)
2504
2505DECL_FORCE_INLINE(uint8_t) iemAImpl_ror_u8_hlp(uint8_t uValue, uint8_t cShift)
2506{
2507 return (uValue >> cShift) | (uValue << (8 - cShift));
2508}
2509#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2510EMIT_ROR(8, uint8_t, RT_NOTHING, 1, iemAImpl_ror_u8_hlp)
2511#endif
2512EMIT_ROR(8, uint8_t, _intel, 1, iemAImpl_ror_u8_hlp)
2513EMIT_ROR(8, uint8_t, _amd, 0, iemAImpl_ror_u8_hlp)
2514
2515
2516/*
2517 * RCL
2518 */
2519#define EMIT_RCL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2520IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2521{ \
2522 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2523 if (a_cBitsWidth < 32 && a_fIntelFlags) \
2524 cShift %= a_cBitsWidth + 1; \
2525 if (cShift) \
2526 { \
2527 if (a_cBitsWidth < 32 && !a_fIntelFlags) \
2528 cShift %= a_cBitsWidth + 1; \
2529 a_uType const uDst = *puDst; \
2530 a_uType uResult = uDst << cShift; \
2531 if (cShift > 1) \
2532 uResult |= uDst >> (a_cBitsWidth + 1 - cShift); \
2533 \
2534 AssertCompile(X86_EFL_CF_BIT == 0); \
2535 uint32_t fEfl = *pfEFlags; \
2536 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2537 uResult |= (a_uType)fInCarry << (cShift - 1); \
2538 \
2539 *puDst = uResult; \
2540 \
2541 /* Calc EFLAGS. */ \
2542 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2543 uint32_t const fOutCarry = a_cBitsWidth >= 32 || a_fIntelFlags || cShift \
2544 ? (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF : fInCarry; \
2545 fEfl |= fOutCarry; \
2546 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2547 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fOutCarry) << X86_EFL_OF_BIT; \
2548 else /* Intel 10980XE: According to the first sub-shift: */ \
2549 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2550 *pfEFlags = fEfl; \
2551 } \
2552}
2553
2554#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2555EMIT_RCL(64, uint64_t, RT_NOTHING, 1)
2556#endif
2557EMIT_RCL(64, uint64_t, _intel, 1)
2558EMIT_RCL(64, uint64_t, _amd, 0)
2559
2560#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2561EMIT_RCL(32, uint32_t, RT_NOTHING, 1)
2562#endif
2563EMIT_RCL(32, uint32_t, _intel, 1)
2564EMIT_RCL(32, uint32_t, _amd, 0)
2565
2566#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2567EMIT_RCL(16, uint16_t, RT_NOTHING, 1)
2568#endif
2569EMIT_RCL(16, uint16_t, _intel, 1)
2570EMIT_RCL(16, uint16_t, _amd, 0)
2571
2572#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2573EMIT_RCL(8, uint8_t, RT_NOTHING, 1)
2574#endif
2575EMIT_RCL(8, uint8_t, _intel, 1)
2576EMIT_RCL(8, uint8_t, _amd, 0)
2577
2578
2579/*
2580 * RCR
2581 */
2582#define EMIT_RCR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2583IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2584{ \
2585 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2586 if (a_cBitsWidth < 32 && a_fIntelFlags) \
2587 cShift %= a_cBitsWidth + 1; \
2588 if (cShift) \
2589 { \
2590 if (a_cBitsWidth < 32 && !a_fIntelFlags) \
2591 cShift %= a_cBitsWidth + 1; \
2592 a_uType const uDst = *puDst; \
2593 a_uType uResult = uDst >> cShift; \
2594 if (cShift > 1) \
2595 uResult |= uDst << (a_cBitsWidth + 1 - cShift); \
2596 \
2597 AssertCompile(X86_EFL_CF_BIT == 0); \
2598 uint32_t fEfl = *pfEFlags; \
2599 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2600 uResult |= (a_uType)fInCarry << (a_cBitsWidth - cShift); \
2601 *puDst = uResult; \
2602 \
2603 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2604 it the same way as for 1 bit shifts. */ \
2605 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2606 uint32_t const fOutCarry = a_cBitsWidth >= 32 || a_fIntelFlags || cShift \
2607 ? (uDst >> (cShift - 1)) & X86_EFL_CF : fInCarry; \
2608 fEfl |= fOutCarry; \
2609 if (!a_fIntelFlags) /* AMD 3990X: XOR two most signficant bits of the result: */ \
2610 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uResult ^ (uResult << 1)); \
2611 else /* Intel 10980XE: same as AMD, but only for the first sub-shift: */ \
2612 fEfl |= (fInCarry ^ (uint32_t)(uDst >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2613 *pfEFlags = fEfl; \
2614 } \
2615}
2616
2617#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2618EMIT_RCR(64, uint64_t, RT_NOTHING, 1)
2619#endif
2620EMIT_RCR(64, uint64_t, _intel, 1)
2621EMIT_RCR(64, uint64_t, _amd, 0)
2622
2623#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2624EMIT_RCR(32, uint32_t, RT_NOTHING, 1)
2625#endif
2626EMIT_RCR(32, uint32_t, _intel, 1)
2627EMIT_RCR(32, uint32_t, _amd, 0)
2628
2629#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2630EMIT_RCR(16, uint16_t, RT_NOTHING, 1)
2631#endif
2632EMIT_RCR(16, uint16_t, _intel, 1)
2633EMIT_RCR(16, uint16_t, _amd, 0)
2634
2635#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2636EMIT_RCR(8, uint8_t, RT_NOTHING, 1)
2637#endif
2638EMIT_RCR(8, uint8_t, _intel, 1)
2639EMIT_RCR(8, uint8_t, _amd, 0)
2640
2641
2642/*
2643 * SHL
2644 */
2645#define EMIT_SHL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2646IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2647{ \
2648 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2649 if (cShift) \
2650 { \
2651 a_uType const uDst = *puDst; \
2652 a_uType uResult = uDst << cShift; \
2653 *puDst = uResult; \
2654 \
2655 /* Calc EFLAGS. */ \
2656 AssertCompile(X86_EFL_CF_BIT == 0); \
2657 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2658 uint32_t fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2659 fEfl |= fCarry; \
2660 if (!a_fIntelFlags) \
2661 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; /* AMD 3990X: Last shift result. */ \
2662 else \
2663 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); /* Intel 10980XE: First shift result. */ \
2664 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2665 fEfl |= X86_EFL_CALC_ZF(uResult); \
2666 fEfl |= g_afParity[uResult & 0xff]; \
2667 if (!a_fIntelFlags) \
2668 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2669 *pfEFlags = fEfl; \
2670 } \
2671}
2672
2673#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2674EMIT_SHL(64, uint64_t, RT_NOTHING, 1)
2675#endif
2676EMIT_SHL(64, uint64_t, _intel, 1)
2677EMIT_SHL(64, uint64_t, _amd, 0)
2678
2679#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2680EMIT_SHL(32, uint32_t, RT_NOTHING, 1)
2681#endif
2682EMIT_SHL(32, uint32_t, _intel, 1)
2683EMIT_SHL(32, uint32_t, _amd, 0)
2684
2685#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2686EMIT_SHL(16, uint16_t, RT_NOTHING, 1)
2687#endif
2688EMIT_SHL(16, uint16_t, _intel, 1)
2689EMIT_SHL(16, uint16_t, _amd, 0)
2690
2691#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2692EMIT_SHL(8, uint8_t, RT_NOTHING, 1)
2693#endif
2694EMIT_SHL(8, uint8_t, _intel, 1)
2695EMIT_SHL(8, uint8_t, _amd, 0)
2696
2697
2698/*
2699 * SHR
2700 */
2701#define EMIT_SHR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2702IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2703{ \
2704 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2705 if (cShift) \
2706 { \
2707 a_uType const uDst = *puDst; \
2708 a_uType uResult = uDst >> cShift; \
2709 *puDst = uResult; \
2710 \
2711 /* Calc EFLAGS. */ \
2712 AssertCompile(X86_EFL_CF_BIT == 0); \
2713 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2714 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2715 if (a_fIntelFlags || cShift == 1) /* AMD 3990x does what intel documents; Intel 10980XE does this for all shift counts. */ \
2716 fEfl |= (uDst >> (a_cBitsWidth - 1)) << X86_EFL_OF_BIT; \
2717 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2718 fEfl |= X86_EFL_CALC_ZF(uResult); \
2719 fEfl |= g_afParity[uResult & 0xff]; \
2720 if (!a_fIntelFlags) \
2721 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2722 *pfEFlags = fEfl; \
2723 } \
2724}
2725
2726#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2727EMIT_SHR(64, uint64_t, RT_NOTHING, 1)
2728#endif
2729EMIT_SHR(64, uint64_t, _intel, 1)
2730EMIT_SHR(64, uint64_t, _amd, 0)
2731
2732#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2733EMIT_SHR(32, uint32_t, RT_NOTHING, 1)
2734#endif
2735EMIT_SHR(32, uint32_t, _intel, 1)
2736EMIT_SHR(32, uint32_t, _amd, 0)
2737
2738#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2739EMIT_SHR(16, uint16_t, RT_NOTHING, 1)
2740#endif
2741EMIT_SHR(16, uint16_t, _intel, 1)
2742EMIT_SHR(16, uint16_t, _amd, 0)
2743
2744#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2745EMIT_SHR(8, uint8_t, RT_NOTHING, 1)
2746#endif
2747EMIT_SHR(8, uint8_t, _intel, 1)
2748EMIT_SHR(8, uint8_t, _amd, 0)
2749
2750
2751/*
2752 * SAR
2753 */
2754#define EMIT_SAR(a_cBitsWidth, a_uType, a_iType, a_Suffix, a_fIntelFlags) \
2755IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_sar_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2756{ \
2757 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2758 if (cShift) \
2759 { \
2760 a_iType const iDst = (a_iType)*puDst; \
2761 a_uType uResult = iDst >> cShift; \
2762 *puDst = uResult; \
2763 \
2764 /* Calc EFLAGS. \
2765 Note! The OF flag is always zero because the result never differs from the input. */ \
2766 AssertCompile(X86_EFL_CF_BIT == 0); \
2767 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2768 fEfl |= (iDst >> (cShift - 1)) & X86_EFL_CF; \
2769 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2770 fEfl |= X86_EFL_CALC_ZF(uResult); \
2771 fEfl |= g_afParity[uResult & 0xff]; \
2772 if (!a_fIntelFlags) \
2773 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2774 *pfEFlags = fEfl; \
2775 } \
2776}
2777
2778#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2779EMIT_SAR(64, uint64_t, int64_t, RT_NOTHING, 1)
2780#endif
2781EMIT_SAR(64, uint64_t, int64_t, _intel, 1)
2782EMIT_SAR(64, uint64_t, int64_t, _amd, 0)
2783
2784#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2785EMIT_SAR(32, uint32_t, int32_t, RT_NOTHING, 1)
2786#endif
2787EMIT_SAR(32, uint32_t, int32_t, _intel, 1)
2788EMIT_SAR(32, uint32_t, int32_t, _amd, 0)
2789
2790#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2791EMIT_SAR(16, uint16_t, int16_t, RT_NOTHING, 1)
2792#endif
2793EMIT_SAR(16, uint16_t, int16_t, _intel, 1)
2794EMIT_SAR(16, uint16_t, int16_t, _amd, 0)
2795
2796#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2797EMIT_SAR(8, uint8_t, int8_t, RT_NOTHING, 1)
2798#endif
2799EMIT_SAR(8, uint8_t, int8_t, _intel, 1)
2800EMIT_SAR(8, uint8_t, int8_t, _amd, 0)
2801
2802
2803/*
2804 * SHLD
2805 *
2806 * - CF is the last bit shifted out of puDst.
2807 * - AF is always cleared by Intel 10980XE.
2808 * - AF is always set by AMD 3990X.
2809 * - OF is set according to the first shift on Intel 10980XE, it seems.
2810 * - OF is set according to the last sub-shift on AMD 3990X.
2811 * - ZF, SF and PF are calculated according to the result by both vendors.
2812 *
2813 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2814 * pick either the source register or the destination register for input bits
2815 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2816 * intel has changed behaviour here several times. We implement what current
2817 * skylake based does for now, we can extend this later as needed.
2818 */
2819#define EMIT_SHLD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2820IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shld_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, \
2821 uint32_t *pfEFlags)) \
2822{ \
2823 cShift &= a_cBitsWidth - 1; \
2824 if (cShift) \
2825 { \
2826 a_uType const uDst = *puDst; \
2827 a_uType uResult = uDst << cShift; \
2828 uResult |= uSrc >> (a_cBitsWidth - cShift); \
2829 *puDst = uResult; \
2830 \
2831 /* CALC EFLAGS: */ \
2832 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2833 if (a_fIntelFlags) \
2834 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2835 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2836 else \
2837 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2838 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uDst << (cShift - 1)) ^ uResult); \
2839 fEfl |= X86_EFL_AF; \
2840 } \
2841 AssertCompile(X86_EFL_CF_BIT == 0); \
2842 fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; /* CF = last bit shifted out */ \
2843 fEfl |= g_afParity[uResult & 0xff]; \
2844 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2845 fEfl |= X86_EFL_CALC_ZF(uResult); \
2846 *pfEFlags = fEfl; \
2847 } \
2848}
2849
2850#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2851EMIT_SHLD(64, uint64_t, RT_NOTHING, 1)
2852#endif
2853EMIT_SHLD(64, uint64_t, _intel, 1)
2854EMIT_SHLD(64, uint64_t, _amd, 0)
2855
2856#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2857EMIT_SHLD(32, uint32_t, RT_NOTHING, 1)
2858#endif
2859EMIT_SHLD(32, uint32_t, _intel, 1)
2860EMIT_SHLD(32, uint32_t, _amd, 0)
2861
2862#define EMIT_SHLD_16(a_Suffix, a_fIntelFlags) \
2863IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shld_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2864{ \
2865 cShift &= 31; \
2866 if (cShift) \
2867 { \
2868 uint16_t const uDst = *puDst; \
2869 uint64_t const uTmp = a_fIntelFlags \
2870 ? ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uDst \
2871 : ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uSrc; \
2872 uint16_t const uResult = (uint16_t)((uTmp << cShift) >> 32); \
2873 *puDst = uResult; \
2874 \
2875 /* CALC EFLAGS: */ \
2876 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2877 AssertCompile(X86_EFL_CF_BIT == 0); \
2878 if (a_fIntelFlags) \
2879 { \
2880 fEfl |= (uTmp >> (48 - cShift)) & X86_EFL_CF; /* CF = last bit shifted out of the combined operand */ \
2881 /* Intel 6700K & 10980XE: OF is et according to the first shift. AF always cleared. */ \
2882 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uDst << 1)); \
2883 } \
2884 else \
2885 { \
2886 /* AMD 3990X: OF is set according to last shift, with some weirdness. AF always set. CF = last bit shifted out of uDst. */ \
2887 if (cShift < 16) \
2888 { \
2889 fEfl |= (uDst >> (16 - cShift)) & X86_EFL_CF; \
2890 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ uResult); \
2891 } \
2892 else \
2893 { \
2894 if (cShift == 16) \
2895 fEfl |= uDst & X86_EFL_CF; \
2896 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ 0); \
2897 } \
2898 fEfl |= X86_EFL_AF; \
2899 } \
2900 fEfl |= g_afParity[uResult & 0xff]; \
2901 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
2902 fEfl |= X86_EFL_CALC_ZF(uResult); \
2903 *pfEFlags = fEfl; \
2904 } \
2905}
2906
2907#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2908EMIT_SHLD_16(RT_NOTHING, 1)
2909#endif
2910EMIT_SHLD_16(_intel, 1)
2911EMIT_SHLD_16(_amd, 0)
2912
2913
2914/*
2915 * SHRD
2916 *
2917 * EFLAGS behaviour seems to be the same as with SHLD:
2918 * - CF is the last bit shifted out of puDst.
2919 * - AF is always cleared by Intel 10980XE.
2920 * - AF is always set by AMD 3990X.
2921 * - OF is set according to the first shift on Intel 10980XE, it seems.
2922 * - OF is set according to the last sub-shift on AMD 3990X.
2923 * - ZF, SF and PF are calculated according to the result by both vendors.
2924 *
2925 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2926 * pick either the source register or the destination register for input bits
2927 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2928 * intel has changed behaviour here several times. We implement what current
2929 * skylake based does for now, we can extend this later as needed.
2930 */
2931#define EMIT_SHRD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2932IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shrd_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2933{ \
2934 cShift &= a_cBitsWidth - 1; \
2935 if (cShift) \
2936 { \
2937 a_uType const uDst = *puDst; \
2938 a_uType uResult = uDst >> cShift; \
2939 uResult |= uSrc << (a_cBitsWidth - cShift); \
2940 *puDst = uResult; \
2941 \
2942 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2943 AssertCompile(X86_EFL_CF_BIT == 0); \
2944 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2945 if (a_fIntelFlags) \
2946 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2947 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uSrc << (a_cBitsWidth - 1))); \
2948 else \
2949 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2950 if (cShift > 1) /* Set according to last shift. */ \
2951 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uSrc << (a_cBitsWidth - cShift + 1)) ^ uResult); \
2952 else \
2953 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ uResult); \
2954 fEfl |= X86_EFL_AF; \
2955 } \
2956 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2957 fEfl |= X86_EFL_CALC_ZF(uResult); \
2958 fEfl |= g_afParity[uResult & 0xff]; \
2959 *pfEFlags = fEfl; \
2960 } \
2961}
2962
2963#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2964EMIT_SHRD(64, uint64_t, RT_NOTHING, 1)
2965#endif
2966EMIT_SHRD(64, uint64_t, _intel, 1)
2967EMIT_SHRD(64, uint64_t, _amd, 0)
2968
2969#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2970EMIT_SHRD(32, uint32_t, RT_NOTHING, 1)
2971#endif
2972EMIT_SHRD(32, uint32_t, _intel, 1)
2973EMIT_SHRD(32, uint32_t, _amd, 0)
2974
2975#define EMIT_SHRD_16(a_Suffix, a_fIntelFlags) \
2976IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shrd_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2977{ \
2978 cShift &= 31; \
2979 if (cShift) \
2980 { \
2981 uint16_t const uDst = *puDst; \
2982 uint64_t const uTmp = a_fIntelFlags \
2983 ? uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uDst << 32) \
2984 : uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uSrc << 32); \
2985 uint16_t const uResult = (uint16_t)(uTmp >> cShift); \
2986 *puDst = uResult; \
2987 \
2988 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2989 AssertCompile(X86_EFL_CF_BIT == 0); \
2990 if (a_fIntelFlags) \
2991 { \
2992 /* Intel 10980XE: The CF is the last shifted out of the combined uTmp operand. */ \
2993 fEfl |= (uTmp >> (cShift - 1)) & X86_EFL_CF; \
2994 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2995 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uSrc << 15)); \
2996 } \
2997 else \
2998 { \
2999 /* AMD 3990X: CF flag seems to be last bit shifted out of uDst, not the combined uSrc:uSrc:uDst operand. */ \
3000 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
3001 /* AMD 3990X: Set according to last shift. AF always set. */ \
3002 if (cShift > 1) /* Set according to last shift. */ \
3003 fEfl |= X86_EFL_GET_OF_16((uint16_t)(uTmp >> (cShift - 1)) ^ uResult); \
3004 else \
3005 fEfl |= X86_EFL_GET_OF_16(uDst ^ uResult); \
3006 fEfl |= X86_EFL_AF; \
3007 } \
3008 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
3009 fEfl |= X86_EFL_CALC_ZF(uResult); \
3010 fEfl |= g_afParity[uResult & 0xff]; \
3011 *pfEFlags = fEfl; \
3012 } \
3013}
3014
3015#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
3016EMIT_SHRD_16(RT_NOTHING, 1)
3017#endif
3018EMIT_SHRD_16(_intel, 1)
3019EMIT_SHRD_16(_amd, 0)
3020
3021
3022#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
3023
3024# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
3025/*
3026 * BSWAP
3027 */
3028
3029IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u64,(uint64_t *puDst))
3030{
3031 *puDst = ASMByteSwapU64(*puDst);
3032}
3033
3034
3035IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u32,(uint32_t *puDst))
3036{
3037 *puDst = ASMByteSwapU32(*puDst);
3038}
3039
3040
3041/* Note! undocument, so 32-bit arg */
3042IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u16,(uint32_t *puDst))
3043{
3044#if 0
3045 *(uint16_t *)puDst = ASMByteSwapU16(*(uint16_t *)puDst);
3046#else
3047 /* This is the behaviour AMD 3990x (64-bit mode): */
3048 *(uint16_t *)puDst = 0;
3049#endif
3050}
3051
3052# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
3053
3054
3055
3056# if defined(IEM_WITHOUT_ASSEMBLY)
3057
3058/*
3059 * LFENCE, SFENCE & MFENCE.
3060 */
3061
3062IEM_DECL_IMPL_DEF(void, iemAImpl_lfence,(void))
3063{
3064 ASMReadFence();
3065}
3066
3067
3068IEM_DECL_IMPL_DEF(void, iemAImpl_sfence,(void))
3069{
3070 ASMWriteFence();
3071}
3072
3073
3074IEM_DECL_IMPL_DEF(void, iemAImpl_mfence,(void))
3075{
3076 ASMMemoryFence();
3077}
3078
3079
3080# ifndef RT_ARCH_ARM64
3081IEM_DECL_IMPL_DEF(void, iemAImpl_alt_mem_fence,(void))
3082{
3083 ASMMemoryFence();
3084}
3085# endif
3086
3087# endif
3088
3089#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
3090
3091
3092IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
3093{
3094 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
3095 {
3096 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
3097 *pu16Dst |= u16Src & X86_SEL_RPL;
3098
3099 *pfEFlags |= X86_EFL_ZF;
3100 }
3101 else
3102 *pfEFlags &= ~X86_EFL_ZF;
3103}
3104
3105
3106#if defined(IEM_WITHOUT_ASSEMBLY)
3107
3108/*********************************************************************************************************************************
3109* x87 FPU Loads *
3110*********************************************************************************************************************************/
3111
3112IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT32U pr32Val))
3113{
3114 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3115 if (RTFLOAT32U_IS_NORMAL(pr32Val))
3116 {
3117 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3118 pFpuRes->r80Result.sj64.fInteger = 1;
3119 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3120 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
3121 pFpuRes->r80Result.sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
3122 Assert(RTFLOAT80U_IS_NORMAL(&pFpuRes->r80Result));
3123 }
3124 else if (RTFLOAT32U_IS_ZERO(pr32Val))
3125 {
3126 pFpuRes->r80Result.s.fSign = pr32Val->s.fSign;
3127 pFpuRes->r80Result.s.uExponent = 0;
3128 pFpuRes->r80Result.s.uMantissa = 0;
3129 Assert(RTFLOAT80U_IS_ZERO(&pFpuRes->r80Result));
3130 }
3131 else if (RTFLOAT32U_IS_SUBNORMAL(pr32Val))
3132 {
3133 /* Subnormal values gets normalized. */
3134 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3135 pFpuRes->r80Result.sj64.fInteger = 1;
3136 unsigned const cExtraShift = RTFLOAT32U_FRACTION_BITS - ASMBitLastSetU32(pr32Val->s.uFraction);
3137 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3138 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS + cExtraShift + 1);
3139 pFpuRes->r80Result.sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
3140 pFpuRes->FSW |= X86_FSW_DE;
3141 if (!(pFpuState->FCW & X86_FCW_DM))
3142 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B; /* The value is still pushed. */
3143 }
3144 else if (RTFLOAT32U_IS_INF(pr32Val))
3145 {
3146 pFpuRes->r80Result.s.fSign = pr32Val->s.fSign;
3147 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_MAX;
3148 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
3149 Assert(RTFLOAT80U_IS_INF(&pFpuRes->r80Result));
3150 }
3151 else
3152 {
3153 /* Signalling and quiet NaNs, both turn into quiet ones when loaded (weird). */
3154 Assert(RTFLOAT32U_IS_NAN(pr32Val));
3155 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3156 pFpuRes->r80Result.sj64.uExponent = RTFLOAT80U_EXP_MAX;
3157 pFpuRes->r80Result.sj64.fInteger = 1;
3158 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3159 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
3160 if (RTFLOAT32U_IS_SIGNALLING_NAN(pr32Val))
3161 {
3162 pFpuRes->r80Result.sj64.uFraction |= RT_BIT_64(62); /* make quiet */
3163 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3164 pFpuRes->FSW |= X86_FSW_IE;
3165
3166 if (!(pFpuState->FCW & X86_FCW_IM))
3167 {
3168 /* The value is not pushed. */
3169 pFpuRes->FSW &= ~X86_FSW_TOP_MASK;
3170 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B;
3171 pFpuRes->r80Result.au64[0] = 0;
3172 pFpuRes->r80Result.au16[4] = 0;
3173 }
3174 }
3175 else
3176 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3177 }
3178}
3179
3180
3181IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT64U pr64Val))
3182{
3183 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3184 if (RTFLOAT64U_IS_NORMAL(pr64Val))
3185 {
3186 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3187 pFpuRes->r80Result.sj64.fInteger = 1;
3188 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3189 pFpuRes->r80Result.sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
3190 Assert(RTFLOAT80U_IS_NORMAL(&pFpuRes->r80Result));
3191 }
3192 else if (RTFLOAT64U_IS_ZERO(pr64Val))
3193 {
3194 pFpuRes->r80Result.s.fSign = pr64Val->s.fSign;
3195 pFpuRes->r80Result.s.uExponent = 0;
3196 pFpuRes->r80Result.s.uMantissa = 0;
3197 Assert(RTFLOAT80U_IS_ZERO(&pFpuRes->r80Result));
3198 }
3199 else if (RTFLOAT64U_IS_SUBNORMAL(pr64Val))
3200 {
3201 /* Subnormal values gets normalized. */
3202 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3203 pFpuRes->r80Result.sj64.fInteger = 1;
3204 unsigned const cExtraShift = RTFLOAT64U_FRACTION_BITS - ASMBitLastSetU64(pr64Val->s64.uFraction);
3205 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction
3206 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS + cExtraShift + 1);
3207 pFpuRes->r80Result.sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
3208 pFpuRes->FSW |= X86_FSW_DE;
3209 if (!(pFpuState->FCW & X86_FCW_DM))
3210 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B; /* The value is still pushed. */
3211 }
3212 else if (RTFLOAT64U_IS_INF(pr64Val))
3213 {
3214 pFpuRes->r80Result.s.fSign = pr64Val->s.fSign;
3215 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_MAX;
3216 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
3217 Assert(RTFLOAT80U_IS_INF(&pFpuRes->r80Result));
3218 }
3219 else
3220 {
3221 /* Signalling and quiet NaNs, both turn into quiet ones when loaded (weird). */
3222 Assert(RTFLOAT64U_IS_NAN(pr64Val));
3223 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3224 pFpuRes->r80Result.sj64.uExponent = RTFLOAT80U_EXP_MAX;
3225 pFpuRes->r80Result.sj64.fInteger = 1;
3226 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3227 if (RTFLOAT64U_IS_SIGNALLING_NAN(pr64Val))
3228 {
3229 pFpuRes->r80Result.sj64.uFraction |= RT_BIT_64(62); /* make quiet */
3230 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3231 pFpuRes->FSW |= X86_FSW_IE;
3232
3233 if (!(pFpuState->FCW & X86_FCW_IM))
3234 {
3235 /* The value is not pushed. */
3236 pFpuRes->FSW &= ~X86_FSW_TOP_MASK;
3237 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B;
3238 pFpuRes->r80Result.au64[0] = 0;
3239 pFpuRes->r80Result.au16[4] = 0;
3240 }
3241 }
3242 else
3243 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3244 }
3245}
3246
3247
3248IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
3249{
3250 pFpuRes->r80Result.au64[0] = pr80Val->au64[0];
3251 pFpuRes->r80Result.au16[4] = pr80Val->au16[4];
3252 /* Raises no exceptions. */
3253 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3254}
3255
3256
3257IEM_DECL_IMPL_DEF(void, iemAImpl_fld1,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3258{
3259 pFpuRes->r80Result.sj64.fSign = 0;
3260 pFpuRes->r80Result.sj64.uExponent = 0 + 16383;
3261 pFpuRes->r80Result.sj64.fInteger = 1;
3262 pFpuRes->r80Result.sj64.uFraction = 0;
3263
3264 /*
3265 * FPU status word:
3266 * - TOP is irrelevant, but we must match x86 assembly version.
3267 * - C1 is always cleared as we don't have any stack overflows.
3268 * - C0, C2, and C3 are undefined and Intel 10980XE does not touch them.
3269 */
3270 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3271}
3272
3273
3274IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2e,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3275{
3276 pFpuRes->r80Result.sj64.fSign = 0;
3277 pFpuRes->r80Result.sj64.uExponent = 0 + 16383;
3278 pFpuRes->r80Result.sj64.fInteger = 1;
3279 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3280 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3281 ? UINT64_C(0x38aa3b295c17f0bc) : UINT64_C(0x38aa3b295c17f0bb);
3282 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3283}
3284
3285
3286IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2t,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3287{
3288 pFpuRes->r80Result.sj64.fSign = 0;
3289 pFpuRes->r80Result.sj64.uExponent = 1 + 16383;
3290 pFpuRes->r80Result.sj64.fInteger = 1;
3291 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) != X86_FCW_RC_UP
3292 ? UINT64_C(0x549a784bcd1b8afe) : UINT64_C(0x549a784bcd1b8aff);
3293 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3294}
3295
3296
3297IEM_DECL_IMPL_DEF(void, iemAImpl_fldlg2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3298{
3299 pFpuRes->r80Result.sj64.fSign = 0;
3300 pFpuRes->r80Result.sj64.uExponent = -2 + 16383;
3301 pFpuRes->r80Result.sj64.fInteger = 1;
3302 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3303 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3304 ? UINT64_C(0x1a209a84fbcff799) : UINT64_C(0x1a209a84fbcff798);
3305 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3306}
3307
3308
3309IEM_DECL_IMPL_DEF(void, iemAImpl_fldln2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3310{
3311 pFpuRes->r80Result.sj64.fSign = 0;
3312 pFpuRes->r80Result.sj64.uExponent = -1 + 16383;
3313 pFpuRes->r80Result.sj64.fInteger = 1;
3314 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3315 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3316 ? UINT64_C(0x317217f7d1cf79ac) : UINT64_C(0x317217f7d1cf79ab);
3317 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3318}
3319
3320
3321IEM_DECL_IMPL_DEF(void, iemAImpl_fldpi,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3322{
3323 pFpuRes->r80Result.sj64.fSign = 0;
3324 pFpuRes->r80Result.sj64.uExponent = 1 + 16383;
3325 pFpuRes->r80Result.sj64.fInteger = 1;
3326 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3327 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3328 ? UINT64_C(0x490fdaa22168c235) : UINT64_C(0x490fdaa22168c234);
3329 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3330}
3331
3332
3333IEM_DECL_IMPL_DEF(void, iemAImpl_fldz,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3334{
3335 pFpuRes->r80Result.s.fSign = 0;
3336 pFpuRes->r80Result.s.uExponent = 0;
3337 pFpuRes->r80Result.s.uMantissa = 0;
3338 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3339}
3340
3341#define EMIT_FILD(a_cBits) \
3342IEM_DECL_IMPL_DEF(void, iemAImpl_fild_r80_from_i ## a_cBits,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, \
3343 int ## a_cBits ## _t const *piVal)) \
3344{ \
3345 int ## a_cBits ## _t iVal = *piVal; \
3346 if (iVal == 0) \
3347 { \
3348 pFpuRes->r80Result.s.fSign = 0; \
3349 pFpuRes->r80Result.s.uExponent = 0; \
3350 pFpuRes->r80Result.s.uMantissa = 0; \
3351 } \
3352 else \
3353 { \
3354 if (iVal > 0) \
3355 pFpuRes->r80Result.s.fSign = 0; \
3356 else \
3357 { \
3358 pFpuRes->r80Result.s.fSign = 1; \
3359 iVal = -iVal; \
3360 } \
3361 unsigned const cBits = ASMBitLastSetU ## a_cBits((uint ## a_cBits ## _t)iVal); \
3362 pFpuRes->r80Result.s.uExponent = cBits - 1 + RTFLOAT80U_EXP_BIAS; \
3363 pFpuRes->r80Result.s.uMantissa = (uint64_t)iVal << (RTFLOAT80U_FRACTION_BITS + 1 - cBits); \
3364 } \
3365 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */ \
3366}
3367EMIT_FILD(16)
3368EMIT_FILD(32)
3369EMIT_FILD(64)
3370
3371
3372IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_d80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTPBCD80U pd80Val))
3373{
3374 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3375 if ( pd80Val->s.abPairs[0] == 0
3376 && pd80Val->s.abPairs[1] == 0
3377 && pd80Val->s.abPairs[2] == 0
3378 && pd80Val->s.abPairs[3] == 0
3379 && pd80Val->s.abPairs[4] == 0
3380 && pd80Val->s.abPairs[5] == 0
3381 && pd80Val->s.abPairs[6] == 0
3382 && pd80Val->s.abPairs[7] == 0
3383 && pd80Val->s.abPairs[8] == 0)
3384 {
3385 pFpuRes->r80Result.s.fSign = pd80Val->s.fSign;
3386 pFpuRes->r80Result.s.uExponent = 0;
3387 pFpuRes->r80Result.s.uMantissa = 0;
3388 }
3389 else
3390 {
3391 pFpuRes->r80Result.s.fSign = pd80Val->s.fSign;
3392
3393 size_t cPairs = RT_ELEMENTS(pd80Val->s.abPairs);
3394 while (cPairs > 0 && pd80Val->s.abPairs[cPairs - 1] == 0)
3395 cPairs--;
3396
3397 uint64_t uVal = 0;
3398 uint64_t uFactor = 1;
3399 for (size_t iPair = 0; iPair < cPairs; iPair++, uFactor *= 100)
3400 uVal += RTPBCD80U_LO_DIGIT(pd80Val->s.abPairs[iPair]) * uFactor
3401 + RTPBCD80U_HI_DIGIT(pd80Val->s.abPairs[iPair]) * uFactor * 10;
3402
3403 unsigned const cBits = ASMBitLastSetU64(uVal);
3404 pFpuRes->r80Result.s.uExponent = cBits - 1 + RTFLOAT80U_EXP_BIAS;
3405 pFpuRes->r80Result.s.uMantissa = uVal << (RTFLOAT80U_FRACTION_BITS + 1 - cBits);
3406 }
3407}
3408
3409
3410/*********************************************************************************************************************************
3411* x87 FPU Stores *
3412*********************************************************************************************************************************/
3413
3414/**
3415 * Helper for storing a deconstructed and normal R80 value as a 64-bit one.
3416 *
3417 * This uses the rounding rules indicated by fFcw and returns updated fFsw.
3418 *
3419 * @returns Updated FPU status word value.
3420 * @param fSignIn Incoming sign indicator.
3421 * @param uMantissaIn Incoming mantissa (dot between bit 63 and 62).
3422 * @param iExponentIn Unbiased exponent.
3423 * @param fFcw The FPU control word.
3424 * @param fFsw Prepped FPU status word, i.e. exceptions and C1 clear.
3425 * @param pr32Dst Where to return the output value, if one should be
3426 * returned.
3427 *
3428 * @note Tailored as a helper for iemAImpl_fst_r80_to_r32 right now.
3429 * @note Exact same logic as iemAImpl_StoreNormalR80AsR64.
3430 */
3431static uint16_t iemAImpl_StoreNormalR80AsR32(bool fSignIn, uint64_t uMantissaIn, int32_t iExponentIn,
3432 uint16_t fFcw, uint16_t fFsw, PRTFLOAT32U pr32Dst)
3433{
3434 uint64_t const fRoundingOffMask = RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS) - 1; /* 0x7ff */
3435 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3436 ? RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS - 1) /* 0x400 */
3437 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
3438 ? fRoundingOffMask
3439 : 0;
3440 uint64_t fRoundedOff = uMantissaIn & fRoundingOffMask;
3441
3442 /*
3443 * Deal with potential overflows/underflows first, optimizing for none.
3444 * 0 and MAX are used for special values; MAX-1 may be rounded up to MAX.
3445 */
3446 int32_t iExponentOut = (int32_t)iExponentIn + RTFLOAT32U_EXP_BIAS;
3447 if ((uint32_t)iExponentOut - 1 < (uint32_t)(RTFLOAT32U_EXP_MAX - 3))
3448 { /* likely? */ }
3449 /*
3450 * Underflow if the exponent zero or negative. This is attempted mapped
3451 * to a subnormal number when possible, with some additional trickery ofc.
3452 */
3453 else if (iExponentOut <= 0)
3454 {
3455 bool const fIsTiny = iExponentOut < 0
3456 || UINT64_MAX - uMantissaIn > uRoundingAdd;
3457 if (!(fFcw & X86_FCW_UM) && fIsTiny)
3458 /* Note! 754-1985 sec 7.4 has something about bias adjust of 192 here, not in 2008 & 2019. Perhaps only 8087 & 287? */
3459 return fFsw | X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3460
3461 if (iExponentOut <= 0)
3462 {
3463 uMantissaIn = iExponentOut <= -63
3464 ? uMantissaIn != 0
3465 : (uMantissaIn >> (-iExponentOut + 1)) | ((uMantissaIn & (RT_BIT_64(-iExponentOut + 1) - 1)) != 0);
3466 fRoundedOff = uMantissaIn & fRoundingOffMask;
3467 if (fRoundedOff && fIsTiny)
3468 fFsw |= X86_FSW_UE;
3469 iExponentOut = 0;
3470 }
3471 }
3472 /*
3473 * Overflow if at or above max exponent value or if we will reach max
3474 * when rounding. Will return +/-zero or +/-max value depending on
3475 * whether we're rounding or not.
3476 */
3477 else if ( iExponentOut >= RTFLOAT32U_EXP_MAX
3478 || ( iExponentOut == RTFLOAT32U_EXP_MAX - 1
3479 && UINT64_MAX - uMantissaIn <= uRoundingAdd))
3480 {
3481 fFsw |= X86_FSW_OE;
3482 if (!(fFcw & X86_FCW_OM))
3483 return fFsw | X86_FSW_ES | X86_FSW_B;
3484 fFsw |= X86_FSW_PE;
3485 if (uRoundingAdd)
3486 fFsw |= X86_FSW_C1;
3487 if (!(fFcw & X86_FCW_PM))
3488 fFsw |= X86_FSW_ES | X86_FSW_B;
3489
3490 pr32Dst->s.fSign = fSignIn;
3491 if (uRoundingAdd)
3492 { /* Zero */
3493 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3494 pr32Dst->s.uFraction = 0;
3495 }
3496 else
3497 { /* Max */
3498 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX - 1;
3499 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1;
3500 }
3501 return fFsw;
3502 }
3503
3504 /*
3505 * Normal or subnormal number.
3506 */
3507 /* Do rounding - just truncate in near mode when midway on an even outcome. */
3508 uint64_t uMantissaOut = uMantissaIn;
3509 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
3510 || (uMantissaIn & RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS))
3511 || fRoundedOff != uRoundingAdd)
3512 {
3513 uMantissaOut = uMantissaIn + uRoundingAdd;
3514 if (uMantissaOut >= uMantissaIn)
3515 { /* likely */ }
3516 else
3517 {
3518 uMantissaOut >>= 1; /* (We don't need to add bit 63 here (the integer bit), as it will be chopped off below.) */
3519 iExponentOut++;
3520 Assert(iExponentOut < RTFLOAT32U_EXP_MAX); /* checked above */
3521 fFsw |= X86_FSW_C1;
3522 }
3523 }
3524 else
3525 uMantissaOut = uMantissaIn;
3526
3527 /* Truncate the mantissa and set the return value. */
3528 uMantissaOut >>= RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS;
3529
3530 pr32Dst->s.uFraction = (uint32_t)uMantissaOut; /* Note! too big for bitfield if normal. */
3531 pr32Dst->s.uExponent = iExponentOut;
3532 pr32Dst->s.fSign = fSignIn;
3533
3534 /* Set status flags realted to rounding. */
3535 if (fRoundedOff)
3536 {
3537 fFsw |= X86_FSW_PE;
3538 if (uMantissaOut > (uMantissaIn >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS)))
3539 fFsw |= X86_FSW_C1;
3540 if (!(fFcw & X86_FCW_PM))
3541 fFsw |= X86_FSW_ES | X86_FSW_B;
3542 }
3543
3544 return fFsw;
3545}
3546
3547
3548/**
3549 * @note Exact same logic as iemAImpl_fst_r80_to_r64.
3550 */
3551IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r32,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3552 PRTFLOAT32U pr32Dst, PCRTFLOAT80U pr80Src))
3553{
3554 uint16_t const fFcw = pFpuState->FCW;
3555 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3556 if (RTFLOAT80U_IS_NORMAL(pr80Src))
3557 fFsw = iemAImpl_StoreNormalR80AsR32(pr80Src->s.fSign, pr80Src->s.uMantissa,
3558 (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS, fFcw, fFsw, pr32Dst);
3559 else if (RTFLOAT80U_IS_ZERO(pr80Src))
3560 {
3561 pr32Dst->s.fSign = pr80Src->s.fSign;
3562 pr32Dst->s.uExponent = 0;
3563 pr32Dst->s.uFraction = 0;
3564 Assert(RTFLOAT32U_IS_ZERO(pr32Dst));
3565 }
3566 else if (RTFLOAT80U_IS_INF(pr80Src))
3567 {
3568 pr32Dst->s.fSign = pr80Src->s.fSign;
3569 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3570 pr32Dst->s.uFraction = 0;
3571 Assert(RTFLOAT32U_IS_INF(pr32Dst));
3572 }
3573 else if (RTFLOAT80U_IS_INDEFINITE(pr80Src))
3574 {
3575 /* Mapped to +/-QNaN */
3576 pr32Dst->s.fSign = pr80Src->s.fSign;
3577 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3578 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3579 }
3580 else if (RTFLOAT80U_IS_PSEUDO_INF(pr80Src) || RTFLOAT80U_IS_UNNORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_NAN(pr80Src))
3581 {
3582 /* Pseudo-Inf / Pseudo-Nan / Unnormal -> QNaN (during load, probably) */
3583 if (fFcw & X86_FCW_IM)
3584 {
3585 pr32Dst->s.fSign = 1;
3586 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3587 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3588 fFsw |= X86_FSW_IE;
3589 }
3590 else
3591 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;;
3592 }
3593 else if (RTFLOAT80U_IS_NAN(pr80Src))
3594 {
3595 /* IM applies to signalled NaN input only. Everything is converted to quiet NaN. */
3596 if ((fFcw & X86_FCW_IM) || !RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3597 {
3598 pr32Dst->s.fSign = pr80Src->s.fSign;
3599 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3600 pr32Dst->s.uFraction = (uint32_t)(pr80Src->sj64.uFraction >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS));
3601 pr32Dst->s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3602 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3603 fFsw |= X86_FSW_IE;
3604 }
3605 else
3606 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;
3607 }
3608 else
3609 {
3610 /* Denormal values causes both an underflow and precision exception. */
3611 Assert(RTFLOAT80U_IS_DENORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src));
3612 if (fFcw & X86_FCW_UM)
3613 {
3614 pr32Dst->s.fSign = pr80Src->s.fSign;
3615 pr32Dst->s.uExponent = 0;
3616 if ((fFcw & X86_FCW_RC_MASK) == (!pr80Src->s.fSign ? X86_FCW_RC_UP : X86_FCW_RC_DOWN))
3617 {
3618 pr32Dst->s.uFraction = 1;
3619 fFsw |= X86_FSW_UE | X86_FSW_PE | X86_FSW_C1;
3620 if (!(fFcw & X86_FCW_PM))
3621 fFsw |= X86_FSW_ES | X86_FSW_B;
3622 }
3623 else
3624 {
3625 pr32Dst->s.uFraction = 0;
3626 fFsw |= X86_FSW_UE | X86_FSW_PE;
3627 if (!(fFcw & X86_FCW_PM))
3628 fFsw |= X86_FSW_ES | X86_FSW_B;
3629 }
3630 }
3631 else
3632 fFsw |= X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3633 }
3634 *pu16FSW = fFsw;
3635}
3636
3637
3638/**
3639 * Helper for storing a deconstructed and normal R80 value as a 64-bit one.
3640 *
3641 * This uses the rounding rules indicated by fFcw and returns updated fFsw.
3642 *
3643 * @returns Updated FPU status word value.
3644 * @param fSignIn Incoming sign indicator.
3645 * @param uMantissaIn Incoming mantissa (dot between bit 63 and 62).
3646 * @param iExponentIn Unbiased exponent.
3647 * @param fFcw The FPU control word.
3648 * @param fFsw Prepped FPU status word, i.e. exceptions and C1 clear.
3649 * @param pr64Dst Where to return the output value, if one should be
3650 * returned.
3651 *
3652 * @note Tailored as a helper for iemAImpl_fst_r80_to_r64 right now.
3653 * @note Exact same logic as iemAImpl_StoreNormalR80AsR32.
3654 */
3655static uint16_t iemAImpl_StoreNormalR80AsR64(bool fSignIn, uint64_t uMantissaIn, int32_t iExponentIn,
3656 uint16_t fFcw, uint16_t fFsw, PRTFLOAT64U pr64Dst)
3657{
3658 uint64_t const fRoundingOffMask = RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS) - 1; /* 0x7ff */
3659 uint32_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3660 ? RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS - 1) /* 0x400 */
3661 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
3662 ? fRoundingOffMask
3663 : 0;
3664 uint32_t fRoundedOff = uMantissaIn & fRoundingOffMask;
3665
3666 /*
3667 * Deal with potential overflows/underflows first, optimizing for none.
3668 * 0 and MAX are used for special values; MAX-1 may be rounded up to MAX.
3669 */
3670 int32_t iExponentOut = (int32_t)iExponentIn + RTFLOAT64U_EXP_BIAS;
3671 if ((uint32_t)iExponentOut - 1 < (uint32_t)(RTFLOAT64U_EXP_MAX - 3))
3672 { /* likely? */ }
3673 /*
3674 * Underflow if the exponent zero or negative. This is attempted mapped
3675 * to a subnormal number when possible, with some additional trickery ofc.
3676 */
3677 else if (iExponentOut <= 0)
3678 {
3679 bool const fIsTiny = iExponentOut < 0
3680 || UINT64_MAX - uMantissaIn > uRoundingAdd;
3681 if (!(fFcw & X86_FCW_UM) && fIsTiny)
3682 /* Note! 754-1985 sec 7.4 has something about bias adjust of 1536 here, not in 2008 & 2019. Perhaps only 8087 & 287? */
3683 return fFsw | X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3684
3685 if (iExponentOut <= 0)
3686 {
3687 uMantissaIn = iExponentOut <= -63
3688 ? uMantissaIn != 0
3689 : (uMantissaIn >> (-iExponentOut + 1)) | ((uMantissaIn & (RT_BIT_64(-iExponentOut + 1) - 1)) != 0);
3690 fRoundedOff = uMantissaIn & fRoundingOffMask;
3691 if (fRoundedOff && fIsTiny)
3692 fFsw |= X86_FSW_UE;
3693 iExponentOut = 0;
3694 }
3695 }
3696 /*
3697 * Overflow if at or above max exponent value or if we will reach max
3698 * when rounding. Will return +/-zero or +/-max value depending on
3699 * whether we're rounding or not.
3700 */
3701 else if ( iExponentOut >= RTFLOAT64U_EXP_MAX
3702 || ( iExponentOut == RTFLOAT64U_EXP_MAX - 1
3703 && UINT64_MAX - uMantissaIn <= uRoundingAdd))
3704 {
3705 fFsw |= X86_FSW_OE;
3706 if (!(fFcw & X86_FCW_OM))
3707 return fFsw | X86_FSW_ES | X86_FSW_B;
3708 fFsw |= X86_FSW_PE;
3709 if (uRoundingAdd)
3710 fFsw |= X86_FSW_C1;
3711 if (!(fFcw & X86_FCW_PM))
3712 fFsw |= X86_FSW_ES | X86_FSW_B;
3713
3714 pr64Dst->s64.fSign = fSignIn;
3715 if (uRoundingAdd)
3716 { /* Zero */
3717 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3718 pr64Dst->s64.uFraction = 0;
3719 }
3720 else
3721 { /* Max */
3722 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX - 1;
3723 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1;
3724 }
3725 return fFsw;
3726 }
3727
3728 /*
3729 * Normal or subnormal number.
3730 */
3731 /* Do rounding - just truncate in near mode when midway on an even outcome. */
3732 uint64_t uMantissaOut = uMantissaIn;
3733 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
3734 || (uMantissaIn & RT_BIT_32(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS))
3735 || fRoundedOff != uRoundingAdd)
3736 {
3737 uMantissaOut = uMantissaIn + uRoundingAdd;
3738 if (uMantissaOut >= uMantissaIn)
3739 { /* likely */ }
3740 else
3741 {
3742 uMantissaOut >>= 1; /* (We don't need to add bit 63 here (the integer bit), as it will be chopped off below.) */
3743 iExponentOut++;
3744 Assert(iExponentOut < RTFLOAT64U_EXP_MAX); /* checked above */
3745 fFsw |= X86_FSW_C1;
3746 }
3747 }
3748 else
3749 uMantissaOut = uMantissaIn;
3750
3751 /* Truncate the mantissa and set the return value. */
3752 uMantissaOut >>= RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS;
3753
3754 pr64Dst->s64.uFraction = uMantissaOut; /* Note! too big for bitfield if normal. */
3755 pr64Dst->s64.uExponent = iExponentOut;
3756 pr64Dst->s64.fSign = fSignIn;
3757
3758 /* Set status flags realted to rounding. */
3759 if (fRoundedOff)
3760 {
3761 fFsw |= X86_FSW_PE;
3762 if (uMantissaOut > (uMantissaIn >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS)))
3763 fFsw |= X86_FSW_C1;
3764 if (!(fFcw & X86_FCW_PM))
3765 fFsw |= X86_FSW_ES | X86_FSW_B;
3766 }
3767
3768 return fFsw;
3769}
3770
3771
3772/**
3773 * @note Exact same logic as iemAImpl_fst_r80_to_r32.
3774 */
3775IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r64,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3776 PRTFLOAT64U pr64Dst, PCRTFLOAT80U pr80Src))
3777{
3778 uint16_t const fFcw = pFpuState->FCW;
3779 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3780 if (RTFLOAT80U_IS_NORMAL(pr80Src))
3781 fFsw = iemAImpl_StoreNormalR80AsR64(pr80Src->s.fSign, pr80Src->s.uMantissa,
3782 (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS, fFcw, fFsw, pr64Dst);
3783 else if (RTFLOAT80U_IS_ZERO(pr80Src))
3784 {
3785 pr64Dst->s64.fSign = pr80Src->s.fSign;
3786 pr64Dst->s64.uExponent = 0;
3787 pr64Dst->s64.uFraction = 0;
3788 Assert(RTFLOAT64U_IS_ZERO(pr64Dst));
3789 }
3790 else if (RTFLOAT80U_IS_INF(pr80Src))
3791 {
3792 pr64Dst->s64.fSign = pr80Src->s.fSign;
3793 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3794 pr64Dst->s64.uFraction = 0;
3795 Assert(RTFLOAT64U_IS_INF(pr64Dst));
3796 }
3797 else if (RTFLOAT80U_IS_INDEFINITE(pr80Src))
3798 {
3799 /* Mapped to +/-QNaN */
3800 pr64Dst->s64.fSign = pr80Src->s.fSign;
3801 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3802 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3803 }
3804 else if (RTFLOAT80U_IS_PSEUDO_INF(pr80Src) || RTFLOAT80U_IS_UNNORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_NAN(pr80Src))
3805 {
3806 /* Pseudo-Inf / Pseudo-Nan / Unnormal -> QNaN (during load, probably) */
3807 if (fFcw & X86_FCW_IM)
3808 {
3809 pr64Dst->s64.fSign = 1;
3810 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3811 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3812 fFsw |= X86_FSW_IE;
3813 }
3814 else
3815 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;;
3816 }
3817 else if (RTFLOAT80U_IS_NAN(pr80Src))
3818 {
3819 /* IM applies to signalled NaN input only. Everything is converted to quiet NaN. */
3820 if ((fFcw & X86_FCW_IM) || !RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3821 {
3822 pr64Dst->s64.fSign = pr80Src->s.fSign;
3823 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3824 pr64Dst->s64.uFraction = pr80Src->sj64.uFraction >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3825 pr64Dst->s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3826 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3827 fFsw |= X86_FSW_IE;
3828 }
3829 else
3830 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;
3831 }
3832 else
3833 {
3834 /* Denormal values causes both an underflow and precision exception. */
3835 Assert(RTFLOAT80U_IS_DENORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src));
3836 if (fFcw & X86_FCW_UM)
3837 {
3838 pr64Dst->s64.fSign = pr80Src->s.fSign;
3839 pr64Dst->s64.uExponent = 0;
3840 if ((fFcw & X86_FCW_RC_MASK) == (!pr80Src->s.fSign ? X86_FCW_RC_UP : X86_FCW_RC_DOWN))
3841 {
3842 pr64Dst->s64.uFraction = 1;
3843 fFsw |= X86_FSW_UE | X86_FSW_PE | X86_FSW_C1;
3844 if (!(fFcw & X86_FCW_PM))
3845 fFsw |= X86_FSW_ES | X86_FSW_B;
3846 }
3847 else
3848 {
3849 pr64Dst->s64.uFraction = 0;
3850 fFsw |= X86_FSW_UE | X86_FSW_PE;
3851 if (!(fFcw & X86_FCW_PM))
3852 fFsw |= X86_FSW_ES | X86_FSW_B;
3853 }
3854 }
3855 else
3856 fFsw |= X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3857 }
3858 *pu16FSW = fFsw;
3859}
3860
3861
3862IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3863 PRTFLOAT80U pr80Dst, PCRTFLOAT80U pr80Src))
3864{
3865 /*
3866 * FPU status word:
3867 * - TOP is irrelevant, but we must match x86 assembly version (0).
3868 * - C1 is always cleared as we don't have any stack overflows.
3869 * - C0, C2, and C3 are undefined and Intel 10980XE does not touch them.
3870 */
3871 *pu16FSW = pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3); /* see iemAImpl_fld1 */
3872 *pr80Dst = *pr80Src;
3873}
3874
3875
3876/*
3877 *
3878 * Mantissa:
3879 * 63 56 48 40 32 24 16 8 0
3880 * v v v v v v v v v
3881 * 1[.]111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000
3882 * \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \
3883 * Exp: 0 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60
3884 *
3885 * int64_t has the same width, only bit 63 is the sign bit. So, the max we can map over
3886 * are bits 1 thru 63, dropping off bit 0, with an exponent of 62. The number of bits we
3887 * drop off from the mantissa increases with decreasing exponent, till an exponent of 0
3888 * where we'll drop off all but bit 63.
3889 */
3890#define EMIT_FIST(a_cBits, a_iType, a_iTypeMin, a_iTypeIndefinite) \
3891IEM_DECL_IMPL_DEF(void, iemAImpl_fist_r80_to_i ## a_cBits,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, \
3892 a_iType *piDst, PCRTFLOAT80U pr80Val)) \
3893{ \
3894 uint16_t const fFcw = pFpuState->FCW; \
3895 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); \
3896 bool const fSignIn = pr80Val->s.fSign; \
3897 \
3898 /* \
3899 * Deal with normal numbers first. \
3900 */ \
3901 if (RTFLOAT80U_IS_NORMAL(pr80Val)) \
3902 { \
3903 uint64_t uMantissa = pr80Val->s.uMantissa; \
3904 int32_t iExponent = (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS; \
3905 \
3906 if ((uint32_t)iExponent <= a_cBits - 2) \
3907 { \
3908 unsigned const cShiftOff = 63 - iExponent; \
3909 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1; \
3910 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST \
3911 ? RT_BIT_64(cShiftOff - 1) \
3912 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP) \
3913 ? fRoundingOffMask \
3914 : 0; \
3915 uint64_t fRoundedOff = uMantissa & fRoundingOffMask; \
3916 \
3917 uMantissa >>= cShiftOff; \
3918 uint64_t const uRounding = (fRoundedOff + uRoundingAdd) >> cShiftOff; \
3919 uMantissa += uRounding; \
3920 if (!(uMantissa & RT_BIT_64(a_cBits - 1))) \
3921 { \
3922 if (fRoundedOff) \
3923 { \
3924 if ((uMantissa & 1) && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST && fRoundedOff == uRoundingAdd) \
3925 uMantissa &= ~(uint64_t)1; /* round to even number if equal distance between up/down. */ \
3926 else if (uRounding) \
3927 fFsw |= X86_FSW_C1; \
3928 fFsw |= X86_FSW_PE; \
3929 if (!(fFcw & X86_FCW_PM)) \
3930 fFsw |= X86_FSW_ES | X86_FSW_B; \
3931 } \
3932 \
3933 if (!fSignIn) \
3934 *piDst = (a_iType)uMantissa; \
3935 else \
3936 *piDst = -(a_iType)uMantissa; \
3937 } \
3938 else \
3939 { \
3940 /* overflowed after rounding. */ \
3941 AssertMsg(iExponent == a_cBits - 2 && uMantissa == RT_BIT_64(a_cBits - 1), \
3942 ("e=%d m=%#RX64 (org %#RX64) s=%d; shift=%d ro=%#RX64 rm=%#RX64 ra=%#RX64\n", iExponent, uMantissa, \
3943 pr80Val->s.uMantissa, fSignIn, cShiftOff, fRoundedOff, fRoundingOffMask, uRoundingAdd)); \
3944 \
3945 /* Special case for the integer minimum value. */ \
3946 if (fSignIn) \
3947 { \
3948 *piDst = a_iTypeMin; \
3949 fFsw |= X86_FSW_PE | X86_FSW_C1; \
3950 if (!(fFcw & X86_FCW_PM)) \
3951 fFsw |= X86_FSW_ES | X86_FSW_B; \
3952 } \
3953 else \
3954 { \
3955 fFsw |= X86_FSW_IE; \
3956 if (fFcw & X86_FCW_IM) \
3957 *piDst = a_iTypeMin; \
3958 else \
3959 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
3960 } \
3961 } \
3962 } \
3963 /* \
3964 * Tiny sub-zero numbers. \
3965 */ \
3966 else if (iExponent < 0) \
3967 { \
3968 if (!fSignIn) \
3969 { \
3970 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP \
3971 || (iExponent == -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST)) \
3972 { \
3973 *piDst = 1; \
3974 fFsw |= X86_FSW_C1; \
3975 } \
3976 else \
3977 *piDst = 0; \
3978 } \
3979 else \
3980 { \
3981 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP \
3982 || (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_ZERO \
3983 || (iExponent < -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST)) \
3984 *piDst = 0; \
3985 else \
3986 { \
3987 *piDst = -1; \
3988 fFsw |= X86_FSW_C1; \
3989 } \
3990 } \
3991 fFsw |= X86_FSW_PE; \
3992 if (!(fFcw & X86_FCW_PM)) \
3993 fFsw |= X86_FSW_ES | X86_FSW_B; \
3994 } \
3995 /* \
3996 * Special MIN case. \
3997 */ \
3998 else if ( fSignIn && iExponent == a_cBits - 1 \
3999 && ( a_cBits < 64 && (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_DOWN \
4000 ? uMantissa < (RT_BIT_64(63) | RT_BIT_64(65 - a_cBits)) \
4001 : uMantissa == RT_BIT_64(63))) \
4002 { \
4003 *piDst = a_iTypeMin; \
4004 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
4005 { \
4006 fFsw |= X86_FSW_PE; \
4007 if (!(fFcw & X86_FCW_PM)) \
4008 fFsw |= X86_FSW_ES | X86_FSW_B; \
4009 } \
4010 } \
4011 /* \
4012 * Too large/small number outside the target integer range. \
4013 */ \
4014 else \
4015 { \
4016 fFsw |= X86_FSW_IE; \
4017 if (fFcw & X86_FCW_IM) \
4018 *piDst = a_iTypeIndefinite; \
4019 else \
4020 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4021 } \
4022 } \
4023 /* \
4024 * Map both +0 and -0 to integer zero (signless/+). \
4025 */ \
4026 else if (RTFLOAT80U_IS_ZERO(pr80Val)) \
4027 *piDst = 0; \
4028 /* \
4029 * Denormals are just really tiny sub-zero numbers that are either rounded \
4030 * to zero, 1 or -1 depending on sign and rounding control. \
4031 */ \
4032 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) || RTFLOAT80U_IS_DENORMAL(pr80Val)) \
4033 { \
4034 if ((fFcw & X86_FCW_RC_MASK) != (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)) \
4035 *piDst = 0; \
4036 else \
4037 { \
4038 *piDst = fSignIn ? -1 : 1; \
4039 fFsw |= X86_FSW_C1; \
4040 } \
4041 fFsw |= X86_FSW_PE; \
4042 if (!(fFcw & X86_FCW_PM)) \
4043 fFsw |= X86_FSW_ES | X86_FSW_B; \
4044 } \
4045 /* \
4046 * All other special values are considered invalid arguments and result \
4047 * in an IE exception and indefinite value if masked. \
4048 */ \
4049 else \
4050 { \
4051 fFsw |= X86_FSW_IE; \
4052 if (fFcw & X86_FCW_IM) \
4053 *piDst = a_iTypeIndefinite; \
4054 else \
4055 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4056 } \
4057 *pu16FSW = fFsw; \
4058}
4059EMIT_FIST(64, int64_t, INT64_MIN, X86_FPU_INT64_INDEFINITE)
4060EMIT_FIST(32, int32_t, INT32_MIN, X86_FPU_INT32_INDEFINITE)
4061EMIT_FIST(16, int16_t, INT16_MIN, X86_FPU_INT16_INDEFINITE)
4062
4063#endif /*IEM_WITHOUT_ASSEMBLY */
4064
4065
4066/*
4067 * The FISTT instruction was added with SSE3 and are a lot simpler than FIST.
4068 *
4069 * The 16-bit version is a bit peculiar, though, as it seems to be raising IE
4070 * as if it was the 32-bit version (i.e. starting with exp 31 instead of 15),
4071 * thus the @a a_cBitsIn.
4072 */
4073#define EMIT_FISTT(a_cBits, a_cBitsIn, a_iType, a_iTypeMin, a_iTypeMax, a_iTypeIndefinite, a_Suffix, a_fIntelVersion) \
4074IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_fistt_r80_to_i,a_cBits,a_Suffix),(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, \
4075 a_iType *piDst, PCRTFLOAT80U pr80Val)) \
4076{ \
4077 uint16_t const fFcw = pFpuState->FCW; \
4078 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); \
4079 bool const fSignIn = pr80Val->s.fSign; \
4080 \
4081 /* \
4082 * Deal with normal numbers first. \
4083 */ \
4084 if (RTFLOAT80U_IS_NORMAL(pr80Val)) \
4085 { \
4086 uint64_t uMantissa = pr80Val->s.uMantissa; \
4087 int32_t iExponent = (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS; \
4088 \
4089 if ((uint32_t)iExponent <= a_cBitsIn - 2) \
4090 { \
4091 unsigned const cShiftOff = 63 - iExponent; \
4092 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1; \
4093 uint64_t const fRoundedOff = uMantissa & fRoundingOffMask; \
4094 uMantissa >>= cShiftOff; \
4095 /*Assert(!(uMantissa & RT_BIT_64(a_cBits - 1)));*/ \
4096 if (!fSignIn) \
4097 *piDst = (a_iType)uMantissa; \
4098 else \
4099 *piDst = -(a_iType)uMantissa; \
4100 \
4101 if (fRoundedOff) \
4102 { \
4103 fFsw |= X86_FSW_PE; \
4104 if (!(fFcw & X86_FCW_PM)) \
4105 fFsw |= X86_FSW_ES | X86_FSW_B; \
4106 } \
4107 } \
4108 /* \
4109 * Tiny sub-zero numbers. \
4110 */ \
4111 else if (iExponent < 0) \
4112 { \
4113 *piDst = 0; \
4114 fFsw |= X86_FSW_PE; \
4115 if (!(fFcw & X86_FCW_PM)) \
4116 fFsw |= X86_FSW_ES | X86_FSW_B; \
4117 } \
4118 /* \
4119 * Special MIN case. \
4120 */ \
4121 else if ( fSignIn && iExponent == a_cBits - 1 \
4122 && (a_cBits < 64 \
4123 ? uMantissa < (RT_BIT_64(63) | RT_BIT_64(65 - a_cBits)) \
4124 : uMantissa == RT_BIT_64(63)) ) \
4125 { \
4126 *piDst = a_iTypeMin; \
4127 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
4128 { \
4129 fFsw |= X86_FSW_PE; \
4130 if (!(fFcw & X86_FCW_PM)) \
4131 fFsw |= X86_FSW_ES | X86_FSW_B; \
4132 } \
4133 } \
4134 /* \
4135 * Figure this weirdness. \
4136 */ \
4137 else if (a_cBits == 16 && fSignIn && iExponent == 31 && uMantissa < UINT64_C(0x8000100000000000) ) \
4138 { \
4139 *piDst = 0; \
4140 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
4141 { \
4142 fFsw |= X86_FSW_PE; \
4143 if (!(fFcw & X86_FCW_PM)) \
4144 fFsw |= X86_FSW_ES | X86_FSW_B; \
4145 } \
4146 } \
4147 /* \
4148 * Too large/small number outside the target integer range. \
4149 */ \
4150 else \
4151 { \
4152 fFsw |= X86_FSW_IE; \
4153 if (fFcw & X86_FCW_IM) \
4154 *piDst = a_iTypeIndefinite; \
4155 else \
4156 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4157 } \
4158 } \
4159 /* \
4160 * Map both +0 and -0 to integer zero (signless/+). \
4161 */ \
4162 else if (RTFLOAT80U_IS_ZERO(pr80Val)) \
4163 *piDst = 0; \
4164 /* \
4165 * Denormals are just really tiny sub-zero numbers that are trucated to zero. \
4166 */ \
4167 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) || RTFLOAT80U_IS_DENORMAL(pr80Val)) \
4168 { \
4169 *piDst = 0; \
4170 fFsw |= X86_FSW_PE; \
4171 if (!(fFcw & X86_FCW_PM)) \
4172 fFsw |= X86_FSW_ES | X86_FSW_B; \
4173 } \
4174 /* \
4175 * All other special values are considered invalid arguments and result \
4176 * in an IE exception and indefinite value if masked. \
4177 */ \
4178 else \
4179 { \
4180 fFsw |= X86_FSW_IE; \
4181 if (fFcw & X86_FCW_IM) \
4182 *piDst = a_iTypeIndefinite; \
4183 else \
4184 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4185 } \
4186 *pu16FSW = fFsw; \
4187}
4188#if defined(IEM_WITHOUT_ASSEMBLY)
4189EMIT_FISTT(64, 64, int64_t, INT64_MIN, INT64_MAX, X86_FPU_INT64_INDEFINITE, RT_NOTHING, 1)
4190EMIT_FISTT(32, 32, int32_t, INT32_MIN, INT32_MAX, X86_FPU_INT32_INDEFINITE, RT_NOTHING, 1)
4191EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, RT_NOTHING, 1)
4192#endif
4193EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, _intel, 1)
4194EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, _amd, 0)
4195
4196
4197#if defined(IEM_WITHOUT_ASSEMBLY)
4198
4199IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_d80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
4200 PRTPBCD80U pd80Dst, PCRTFLOAT80U pr80Src))
4201{
4202 /*static RTPBCD80U const s_ad80MaxMin[2] = { RTPBCD80U_INIT_MAX(), RTPBCD80U_INIT_MIN() };*/
4203 static RTPBCD80U const s_ad80Zeros[2] = { RTPBCD80U_INIT_ZERO(0), RTPBCD80U_INIT_ZERO(1) };
4204 static RTPBCD80U const s_ad80One[2] = { RTPBCD80U_INIT_C(0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,1),
4205 RTPBCD80U_INIT_C(1, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,1) };
4206 static RTPBCD80U const s_d80Indefinite = RTPBCD80U_INIT_INDEFINITE();
4207
4208 uint16_t const fFcw = pFpuState->FCW;
4209 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
4210 bool const fSignIn = pr80Src->s.fSign;
4211
4212 /*
4213 * Deal with normal numbers first.
4214 */
4215 if (RTFLOAT80U_IS_NORMAL(pr80Src))
4216 {
4217 uint64_t uMantissa = pr80Src->s.uMantissa;
4218 int32_t iExponent = (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS;
4219 if ( (uint32_t)iExponent <= 58
4220 || ((uint32_t)iExponent == 59 && uMantissa <= UINT64_C(0xde0b6b3a763fffff)) )
4221 {
4222 unsigned const cShiftOff = 63 - iExponent;
4223 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4224 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4225 ? RT_BIT_64(cShiftOff - 1)
4226 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4227 ? fRoundingOffMask
4228 : 0;
4229 uint64_t fRoundedOff = uMantissa & fRoundingOffMask;
4230
4231 uMantissa >>= cShiftOff;
4232 uint64_t const uRounding = (fRoundedOff + uRoundingAdd) >> cShiftOff;
4233 uMantissa += uRounding;
4234 if (uMantissa <= (uint64_t)RTPBCD80U_MAX)
4235 {
4236 if (fRoundedOff)
4237 {
4238 if ((uMantissa & 1) && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST && fRoundedOff == uRoundingAdd)
4239 uMantissa &= ~(uint64_t)1; /* round to even number if equal distance between up/down. */
4240 else if (uRounding)
4241 fFsw |= X86_FSW_C1;
4242 fFsw |= X86_FSW_PE;
4243 if (!(fFcw & X86_FCW_PM))
4244 fFsw |= X86_FSW_ES | X86_FSW_B;
4245 }
4246
4247 pd80Dst->s.fSign = fSignIn;
4248 pd80Dst->s.uPad = 0;
4249 for (size_t iPair = 0; iPair < RT_ELEMENTS(pd80Dst->s.abPairs); iPair++)
4250 {
4251 unsigned const uDigits = uMantissa % 100;
4252 uMantissa /= 100;
4253 uint8_t const bLo = uDigits % 10;
4254 uint8_t const bHi = uDigits / 10;
4255 pd80Dst->s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(bHi, bLo);
4256 }
4257 }
4258 else
4259 {
4260 /* overflowed after rounding. */
4261 fFsw |= X86_FSW_IE;
4262 if (fFcw & X86_FCW_IM)
4263 *pd80Dst = s_d80Indefinite;
4264 else
4265 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4266 }
4267 }
4268 /*
4269 * Tiny sub-zero numbers.
4270 */
4271 else if (iExponent < 0)
4272 {
4273 if (!fSignIn)
4274 {
4275 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP
4276 || (iExponent == -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST))
4277 {
4278 *pd80Dst = s_ad80One[fSignIn];
4279 fFsw |= X86_FSW_C1;
4280 }
4281 else
4282 *pd80Dst = s_ad80Zeros[fSignIn];
4283 }
4284 else
4285 {
4286 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP
4287 || (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_ZERO
4288 || (iExponent < -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST))
4289 *pd80Dst = s_ad80Zeros[fSignIn];
4290 else
4291 {
4292 *pd80Dst = s_ad80One[fSignIn];
4293 fFsw |= X86_FSW_C1;
4294 }
4295 }
4296 fFsw |= X86_FSW_PE;
4297 if (!(fFcw & X86_FCW_PM))
4298 fFsw |= X86_FSW_ES | X86_FSW_B;
4299 }
4300 /*
4301 * Too large/small number outside the target integer range.
4302 */
4303 else
4304 {
4305 fFsw |= X86_FSW_IE;
4306 if (fFcw & X86_FCW_IM)
4307 *pd80Dst = s_d80Indefinite;
4308 else
4309 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4310 }
4311 }
4312 /*
4313 * Map both +0 and -0 to integer zero (signless/+).
4314 */
4315 else if (RTFLOAT80U_IS_ZERO(pr80Src))
4316 *pd80Dst = s_ad80Zeros[fSignIn];
4317 /*
4318 * Denormals are just really tiny sub-zero numbers that are either rounded
4319 * to zero, 1 or -1 depending on sign and rounding control.
4320 */
4321 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src) || RTFLOAT80U_IS_DENORMAL(pr80Src))
4322 {
4323 if ((fFcw & X86_FCW_RC_MASK) != (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP))
4324 *pd80Dst = s_ad80Zeros[fSignIn];
4325 else
4326 {
4327 *pd80Dst = s_ad80One[fSignIn];
4328 fFsw |= X86_FSW_C1;
4329 }
4330 fFsw |= X86_FSW_PE;
4331 if (!(fFcw & X86_FCW_PM))
4332 fFsw |= X86_FSW_ES | X86_FSW_B;
4333 }
4334 /*
4335 * All other special values are considered invalid arguments and result
4336 * in an IE exception and indefinite value if masked.
4337 */
4338 else
4339 {
4340 fFsw |= X86_FSW_IE;
4341 if (fFcw & X86_FCW_IM)
4342 *pd80Dst = s_d80Indefinite;
4343 else
4344 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4345 }
4346 *pu16FSW = fFsw;
4347}
4348
4349
4350/*********************************************************************************************************************************
4351* FPU Helpers *
4352*********************************************************************************************************************************/
4353AssertCompileSize(RTFLOAT128U, 16);
4354AssertCompileSize(RTFLOAT80U, 10);
4355AssertCompileSize(RTFLOAT64U, 8);
4356AssertCompileSize(RTFLOAT32U, 4);
4357
4358/**
4359 * Normalizes a possible pseudo-normal value.
4360 *
4361 * Psuedo-normal values are some oddities from the 8087 & 287 days. They are
4362 * denormals with the J-bit set, so they can simply be rewritten as 2**-16382,
4363 * i.e. changing uExponent from 0 to 1.
4364 *
4365 * This macro will declare a RTFLOAT80U with the name given by
4366 * @a a_r80ValNormalized and update the @a a_pr80Val variable to point to it if
4367 * a normalization was performed.
4368 *
4369 * @note This must be applied before calling SoftFloat with a value that couldbe
4370 * a pseudo-denormal, as SoftFloat doesn't handle pseudo-denormals
4371 * correctly.
4372 */
4373#define IEM_NORMALIZE_PSEUDO_DENORMAL(a_pr80Val, a_r80ValNormalized) \
4374 RTFLOAT80U a_r80ValNormalized; \
4375 if (RTFLOAT80U_IS_PSEUDO_DENORMAL(a_pr80Val)) \
4376 { \
4377 a_r80ValNormalized = *a_pr80Val; \
4378 a_r80ValNormalized.s.uExponent = 1; \
4379 a_pr80Val = &a_r80ValNormalized; \
4380 } else do {} while (0)
4381
4382#ifdef IEM_WITH_FLOAT128_FOR_FPU
4383
4384DECLINLINE(int) iemFpuF128SetRounding(uint16_t fFcw)
4385{
4386 int fNew;
4387 switch (fFcw & X86_FCW_RC_MASK)
4388 {
4389 default:
4390 case X86_FCW_RC_NEAREST: fNew = FE_TONEAREST; break;
4391 case X86_FCW_RC_ZERO: fNew = FE_TOWARDZERO; break;
4392 case X86_FCW_RC_UP: fNew = FE_UPWARD; break;
4393 case X86_FCW_RC_DOWN: fNew = FE_DOWNWARD; break;
4394 }
4395 int fOld = fegetround();
4396 fesetround(fNew);
4397 return fOld;
4398}
4399
4400
4401DECLINLINE(void) iemFpuF128RestoreRounding(int fOld)
4402{
4403 fesetround(fOld);
4404}
4405
4406DECLINLINE(_Float128) iemFpuF128FromFloat80(PCRTFLOAT80U pr80Val, uint16_t fFcw)
4407{
4408 RT_NOREF(fFcw);
4409 RTFLOAT128U Tmp;
4410 Tmp.s2.uSignAndExponent = pr80Val->s2.uSignAndExponent;
4411 Tmp.s2.uFractionHigh = (uint16_t)((pr80Val->s2.uMantissa & (RT_BIT_64(63) - 1)) >> 48);
4412 Tmp.s2.uFractionMid = (uint32_t)((pr80Val->s2.uMantissa & UINT32_MAX) >> 16);
4413 Tmp.s2.uFractionLow = pr80Val->s2.uMantissa << 48;
4414 if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val))
4415 {
4416 Assert(Tmp.s.uExponent == 0);
4417 Tmp.s2.uSignAndExponent++;
4418 }
4419 return *(_Float128 *)&Tmp;
4420}
4421
4422
4423DECLINLINE(uint16_t) iemFpuF128ToFloat80(PRTFLOAT80U pr80Dst, _Float128 rd128ValSrc, uint16_t fFcw, uint16_t fFsw)
4424{
4425 RT_NOREF(fFcw);
4426 RTFLOAT128U Tmp;
4427 *(_Float128 *)&Tmp = rd128ValSrc;
4428 ASMCompilerBarrier();
4429 if (RTFLOAT128U_IS_NORMAL(&Tmp))
4430 {
4431 pr80Dst->s.fSign = Tmp.s64.fSign;
4432 pr80Dst->s.uExponent = Tmp.s64.uExponent;
4433 uint64_t uFraction = Tmp.s64.uFractionHi << (63 - 48)
4434 | Tmp.s64.uFractionLo >> (64 - 15);
4435
4436 /* Do rounding - just truncate in near mode when midway on an even outcome. */
4437 unsigned const cShiftOff = 64 - 15;
4438 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4439 uint64_t const uRoundedOff = Tmp.s64.uFractionLo & fRoundingOffMask;
4440 if (uRoundedOff)
4441 {
4442 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4443 ? RT_BIT_64(cShiftOff - 1)
4444 : (fFcw & X86_FCW_RC_MASK) == (Tmp.s64.fSign ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4445 ? fRoundingOffMask
4446 : 0;
4447 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
4448 || (Tmp.s64.uFractionLo & RT_BIT_64(cShiftOff))
4449 || uRoundedOff != uRoundingAdd)
4450 {
4451 if ((uRoundedOff + uRoundingAdd) >> cShiftOff)
4452 {
4453 uFraction += 1;
4454 if (!(uFraction & RT_BIT_64(63)))
4455 { /* likely */ }
4456 else
4457 {
4458 uFraction >>= 1;
4459 pr80Dst->s.uExponent++;
4460 if (pr80Dst->s.uExponent == RTFLOAT64U_EXP_MAX)
4461 return fFsw;
4462 }
4463 fFsw |= X86_FSW_C1;
4464 }
4465 }
4466 fFsw |= X86_FSW_PE;
4467 if (!(fFcw & X86_FCW_PM))
4468 fFsw |= X86_FSW_ES | X86_FSW_B;
4469 }
4470 pr80Dst->s.uMantissa = RT_BIT_64(63) | uFraction;
4471 }
4472 else if (RTFLOAT128U_IS_ZERO(&Tmp))
4473 {
4474 pr80Dst->s.fSign = Tmp.s64.fSign;
4475 pr80Dst->s.uExponent = 0;
4476 pr80Dst->s.uMantissa = 0;
4477 }
4478 else if (RTFLOAT128U_IS_INF(&Tmp))
4479 {
4480 pr80Dst->s.fSign = Tmp.s64.fSign;
4481 pr80Dst->s.uExponent = 0;
4482 pr80Dst->s.uMantissa = 0;
4483 }
4484 return fFsw;
4485}
4486
4487
4488#else /* !IEM_WITH_FLOAT128_FOR_FPU - SoftFloat */
4489
4490/** Initializer for the SoftFloat state structure. */
4491# define IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(a_fFcw) \
4492 { \
4493 softfloat_tininess_afterRounding, \
4494 ((a_fFcw) & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST ? (uint8_t)softfloat_round_near_even \
4495 : ((a_fFcw) & X86_FCW_RC_MASK) == X86_FCW_RC_UP ? (uint8_t)softfloat_round_max \
4496 : ((a_fFcw) & X86_FCW_RC_MASK) == X86_FCW_RC_DOWN ? (uint8_t)softfloat_round_min \
4497 : (uint8_t)softfloat_round_minMag, \
4498 0, \
4499 (uint8_t)((a_fFcw) & X86_FCW_XCPT_MASK), \
4500 ((a_fFcw) & X86_FCW_PC_MASK) == X86_FCW_PC_53 ? (uint8_t)64 \
4501 : ((a_fFcw) & X86_FCW_PC_MASK) == X86_FCW_PC_24 ? (uint8_t)32 : (uint8_t)80 \
4502 }
4503
4504/** Returns updated FSW from a SoftFloat state and exception mask (FCW). */
4505# define IEM_SOFTFLOAT_STATE_TO_FSW(a_fFsw, a_pSoftState, a_fFcw) \
4506 ( (a_fFsw) \
4507 | (uint16_t)(((a_pSoftState)->exceptionFlags & softfloat_flag_c1) << 2) \
4508 | ((a_pSoftState)->exceptionFlags & X86_FSW_XCPT_MASK) \
4509 | ( ((a_pSoftState)->exceptionFlags & X86_FSW_XCPT_MASK) & (~(a_fFcw) & X86_FSW_XCPT_MASK) \
4510 ? X86_FSW_ES | X86_FSW_B : 0) )
4511
4512
4513DECLINLINE(float128_t) iemFpuSoftF128Precision(float128_t r128, unsigned cBits, uint16_t fFcw = X86_FCW_RC_NEAREST)
4514{
4515 RT_NOREF(fFcw);
4516 Assert(cBits > 64);
4517# if 0 /* rounding does not seem to help */
4518 uint64_t off = r128.v[0] & (RT_BIT_64(1 + 112 - cBits) - 1);
4519 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4520 if (off >= RT_BIT_64(1 + 112 - cBits - 1)
4521 && (r128.v[0] & RT_BIT_64(1 + 112 - cBits)))
4522 {
4523 uint64_t uOld = r128.v[0];
4524 r128.v[0] += RT_BIT_64(1 + 112 - cBits);
4525 if (r128.v[0] < uOld)
4526 r128.v[1] += 1;
4527 }
4528# else
4529 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4530# endif
4531 return r128;
4532}
4533
4534
4535DECLINLINE(float128_t) iemFpuSoftF128PrecisionIprt(PCRTFLOAT128U pr128, unsigned cBits, uint16_t fFcw = X86_FCW_RC_NEAREST)
4536{
4537 RT_NOREF(fFcw);
4538 Assert(cBits > 64);
4539# if 0 /* rounding does not seem to help, not even on constants */
4540 float128_t r128 = { pr128->au64[0], pr128->au64[1] };
4541 uint64_t off = r128.v[0] & (RT_BIT_64(1 + 112 - cBits) - 1);
4542 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4543 if (off >= RT_BIT_64(1 + 112 - cBits - 1)
4544 && (r128.v[0] & RT_BIT_64(1 + 112 - cBits)))
4545 {
4546 uint64_t uOld = r128.v[0];
4547 r128.v[0] += RT_BIT_64(1 + 112 - cBits);
4548 if (r128.v[0] < uOld)
4549 r128.v[1] += 1;
4550 }
4551 return r128;
4552# else
4553 float128_t r128 = { { pr128->au64[0] & ~(RT_BIT_64(1 + 112 - cBits) - 1), pr128->au64[1] } };
4554 return r128;
4555# endif
4556}
4557
4558
4559# if 0 /* unused */
4560DECLINLINE(float128_t) iemFpuSoftF128FromIprt(PCRTFLOAT128U pr128)
4561{
4562 float128_t r128 = { { pr128->au64[0], pr128->au64[1] } };
4563 return r128;
4564}
4565# endif
4566
4567
4568/** Converts a 80-bit floating point value to SoftFloat 128-bit floating point. */
4569DECLINLINE(float128_t) iemFpuSoftF128FromFloat80(PCRTFLOAT80U pr80Val)
4570{
4571 extFloat80_t Tmp;
4572 Tmp.signExp = pr80Val->s2.uSignAndExponent;
4573 Tmp.signif = pr80Val->s2.uMantissa;
4574 softfloat_state_t Ignored = SOFTFLOAT_STATE_INIT_DEFAULTS();
4575 return extF80_to_f128(Tmp, &Ignored);
4576}
4577
4578
4579/**
4580 * Converts from the packed IPRT 80-bit floating point (RTFLOAT80U) format to
4581 * the SoftFloat extended 80-bit floating point format (extFloat80_t).
4582 *
4583 * This is only a structure format conversion, nothing else.
4584 */
4585DECLINLINE(extFloat80_t) iemFpuSoftF80FromIprt(PCRTFLOAT80U pr80Val)
4586{
4587 extFloat80_t Tmp;
4588 Tmp.signExp = pr80Val->s2.uSignAndExponent;
4589 Tmp.signif = pr80Val->s2.uMantissa;
4590 return Tmp;
4591}
4592
4593
4594/**
4595 * Converts from SoftFloat extended 80-bit floating point format (extFloat80_t)
4596 * to the packed IPRT 80-bit floating point (RTFLOAT80U) format.
4597 *
4598 * This is only a structure format conversion, nothing else.
4599 */
4600DECLINLINE(PRTFLOAT80U) iemFpuSoftF80ToIprt(PRTFLOAT80U pr80Dst, extFloat80_t const r80XSrc)
4601{
4602 pr80Dst->s2.uSignAndExponent = r80XSrc.signExp;
4603 pr80Dst->s2.uMantissa = r80XSrc.signif;
4604 return pr80Dst;
4605}
4606
4607
4608DECLINLINE(uint16_t) iemFpuSoftF128ToFloat80(PRTFLOAT80U pr80Dst, float128_t r128Src, uint16_t fFcw, uint16_t fFsw)
4609{
4610 RT_NOREF(fFcw);
4611 RTFLOAT128U Tmp;
4612 *(float128_t *)&Tmp = r128Src;
4613 ASMCompilerBarrier();
4614
4615 if (RTFLOAT128U_IS_NORMAL(&Tmp))
4616 {
4617 pr80Dst->s.fSign = Tmp.s64.fSign;
4618 pr80Dst->s.uExponent = Tmp.s64.uExponent;
4619 uint64_t uFraction = Tmp.s64.uFractionHi << (63 - 48)
4620 | Tmp.s64.uFractionLo >> (64 - 15);
4621
4622 /* Do rounding - just truncate in near mode when midway on an even outcome. */
4623 unsigned const cShiftOff = 64 - 15;
4624 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4625 uint64_t const uRoundedOff = Tmp.s64.uFractionLo & fRoundingOffMask;
4626 if (uRoundedOff)
4627 {
4628 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4629 ? RT_BIT_64(cShiftOff - 1)
4630 : (fFcw & X86_FCW_RC_MASK) == (Tmp.s64.fSign ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4631 ? fRoundingOffMask
4632 : 0;
4633 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
4634 || (Tmp.s64.uFractionLo & RT_BIT_64(cShiftOff))
4635 || uRoundedOff != uRoundingAdd)
4636 {
4637 if ((uRoundedOff + uRoundingAdd) >> cShiftOff)
4638 {
4639 uFraction += 1;
4640 if (!(uFraction & RT_BIT_64(63)))
4641 { /* likely */ }
4642 else
4643 {
4644 uFraction >>= 1;
4645 pr80Dst->s.uExponent++;
4646 if (pr80Dst->s.uExponent == RTFLOAT64U_EXP_MAX)
4647 return fFsw;
4648 }
4649 fFsw |= X86_FSW_C1;
4650 }
4651 }
4652 fFsw |= X86_FSW_PE;
4653 if (!(fFcw & X86_FCW_PM))
4654 fFsw |= X86_FSW_ES | X86_FSW_B;
4655 }
4656
4657 pr80Dst->s.uMantissa = RT_BIT_64(63) | uFraction;
4658 }
4659 else if (RTFLOAT128U_IS_ZERO(&Tmp))
4660 {
4661 pr80Dst->s.fSign = Tmp.s64.fSign;
4662 pr80Dst->s.uExponent = 0;
4663 pr80Dst->s.uMantissa = 0;
4664 }
4665 else if (RTFLOAT128U_IS_INF(&Tmp))
4666 {
4667 pr80Dst->s.fSign = Tmp.s64.fSign;
4668 pr80Dst->s.uExponent = 0;
4669 pr80Dst->s.uMantissa = 0;
4670 }
4671 return fFsw;
4672}
4673
4674
4675/**
4676 * Helper for transfering exception and C1 to FSW and setting the result value
4677 * accordingly.
4678 *
4679 * @returns Updated FSW.
4680 * @param pSoftState The SoftFloat state following the operation.
4681 * @param r80XResult The result of the SoftFloat operation.
4682 * @param pr80Result Where to store the result for IEM.
4683 * @param fFcw The FPU control word.
4684 * @param fFsw The FSW before the operation, with necessary bits
4685 * cleared and such.
4686 * @param pr80XcptResult Alternative return value for use an unmasked \#IE is
4687 * raised.
4688 */
4689DECLINLINE(uint16_t) iemFpuSoftStateAndF80ToFswAndIprtResult(softfloat_state_t const *pSoftState, extFloat80_t r80XResult,
4690 PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw,
4691 PCRTFLOAT80U pr80XcptResult)
4692{
4693 fFsw |= (pSoftState->exceptionFlags & X86_FSW_XCPT_MASK)
4694 | (uint16_t)((pSoftState->exceptionFlags & softfloat_flag_c1) << 2);
4695 if (fFsw & ~fFcw & X86_FSW_XCPT_MASK)
4696 fFsw |= X86_FSW_ES | X86_FSW_B;
4697
4698 if (!(fFsw & ~fFcw & X86_FSW_IE))
4699 iemFpuSoftF80ToIprt(pr80Result, r80XResult);
4700 else
4701 *pr80Result = *pr80XcptResult;
4702 return fFsw;
4703}
4704
4705
4706/**
4707 * Helper doing polynomial evaluation using Horner's method.
4708 *
4709 * See https://en.wikipedia.org/wiki/Horner%27s_method for details.
4710 */
4711float128_t iemFpuSoftF128HornerPoly(float128_t z, PCRTFLOAT128U g_par128HornerConsts, size_t cHornerConsts,
4712 unsigned cPrecision, softfloat_state_t *pSoftState)
4713{
4714 Assert(cHornerConsts > 1);
4715 size_t i = cHornerConsts - 1;
4716 float128_t r128Result = iemFpuSoftF128PrecisionIprt(&g_par128HornerConsts[i], cPrecision);
4717 while (i-- > 0)
4718 {
4719 r128Result = iemFpuSoftF128Precision(f128_mul(r128Result, z, pSoftState), cPrecision);
4720 r128Result = f128_add(r128Result, iemFpuSoftF128PrecisionIprt(&g_par128HornerConsts[i], cPrecision), pSoftState);
4721 r128Result = iemFpuSoftF128Precision(r128Result, cPrecision);
4722 }
4723 return r128Result;
4724}
4725
4726#endif /* !IEM_WITH_FLOAT128_FOR_FPU - SoftFloat */
4727
4728
4729/**
4730 * Composes a normalized and rounded RTFLOAT80U result from a 192 bit wide
4731 * mantissa, exponent and sign.
4732 *
4733 * @returns Updated FSW.
4734 * @param pr80Dst Where to return the composed value.
4735 * @param fSign The sign.
4736 * @param puMantissa The mantissa, 256-bit type but the to 64-bits are
4737 * ignored and should be zero. This will probably be
4738 * modified during normalization and rounding.
4739 * @param iExponent Unbiased exponent.
4740 * @param fFcw The FPU control word.
4741 * @param fFsw The FPU status word.
4742 */
4743static uint16_t iemFpuFloat80RoundAndComposeFrom192(PRTFLOAT80U pr80Dst, bool fSign, PRTUINT256U puMantissa,
4744 int32_t iExponent, uint16_t fFcw, uint16_t fFsw)
4745{
4746 AssertStmt(puMantissa->QWords.qw3 == 0, puMantissa->QWords.qw3 = 0);
4747
4748 iExponent += RTFLOAT80U_EXP_BIAS;
4749
4750 /* Do normalization if necessary and possible. */
4751 unsigned cShifted = 0;
4752 if (!(puMantissa->QWords.qw2 & RT_BIT_64(63)))
4753 {
4754 int cShift = 192 - RTUInt256BitCount(puMantissa);
4755 if (iExponent > cShift)
4756 iExponent -= cShift;
4757 else
4758 {
4759 if (fFcw & X86_FCW_UM)
4760 {
4761 if (iExponent > 0)
4762 cShift = --iExponent;
4763 else
4764 cShift = 0;
4765 }
4766 iExponent -= cShift;
4767 }
4768 cShifted = cShift;
4769 RTUInt256AssignShiftLeft(puMantissa, cShift);
4770 }
4771
4772 /* Do rounding. */
4773 uint64_t uMantissa = puMantissa->QWords.qw2;
4774 if (puMantissa->QWords.qw1 || puMantissa->QWords.qw0)
4775 {
4776 bool fAdd;
4777 switch (fFcw & X86_FCW_RC_MASK)
4778 {
4779 default: /* (for the simple-minded MSC which otherwise things fAdd would be used uninitialized) */
4780 case X86_FCW_RC_NEAREST:
4781 if (puMantissa->QWords.qw1 & RT_BIT_64(63))
4782 {
4783 if ( (uMantissa & 1)
4784 || puMantissa->QWords.qw0 != 0
4785 || puMantissa->QWords.qw1 != RT_BIT_64(63))
4786 {
4787 fAdd = true;
4788 break;
4789 }
4790 uMantissa &= ~(uint64_t)1;
4791 }
4792 fAdd = false;
4793 break;
4794 case X86_FCW_RC_ZERO:
4795 fAdd = false;
4796 break;
4797 case X86_FCW_RC_UP:
4798 fAdd = !fSign;
4799 break;
4800 case X86_FCW_RC_DOWN:
4801 fAdd = fSign;
4802 break;
4803 }
4804 if (fAdd)
4805 {
4806 uint64_t const uTmp = uMantissa;
4807 uMantissa = uTmp + 1;
4808 if (uMantissa < uTmp)
4809 {
4810 uMantissa >>= 1;
4811 uMantissa |= RT_BIT_64(63);
4812 iExponent++;
4813 }
4814 fFsw |= X86_FSW_C1;
4815 }
4816 fFsw |= X86_FSW_PE;
4817 if (!(fFcw & X86_FCW_PM))
4818 fFsw |= X86_FSW_ES | X86_FSW_B;
4819 }
4820
4821 /* Check for underflow (denormals). */
4822 if (iExponent <= 0)
4823 {
4824 if (fFcw & X86_FCW_UM)
4825 {
4826 if (uMantissa & RT_BIT_64(63))
4827 uMantissa >>= 1;
4828 iExponent = 0;
4829 }
4830 else
4831 {
4832 iExponent += RTFLOAT80U_EXP_BIAS_ADJUST;
4833 fFsw |= X86_FSW_ES | X86_FSW_B;
4834 }
4835 fFsw |= X86_FSW_UE;
4836 }
4837 /* Check for overflow */
4838 else if (iExponent >= RTFLOAT80U_EXP_MAX)
4839 {
4840 Assert(iExponent < RTFLOAT80U_EXP_MAX);
4841 }
4842
4843 /* Compose the result. */
4844 pr80Dst->s.uMantissa = uMantissa;
4845 pr80Dst->s.uExponent = iExponent;
4846 pr80Dst->s.fSign = fSign;
4847 return fFsw;
4848}
4849
4850
4851
4852
4853/*********************************************************************************************************************************
4854* x86 FPU Division Operations *
4855*********************************************************************************************************************************/
4856
4857IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4858 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4859{
4860 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4861 AssertReleaseFailed();
4862}
4863
4864
4865IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4866 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4867{
4868 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4869 AssertReleaseFailed();
4870}
4871
4872
4873/** Worker for iemAImpl_fdiv_r80_by_r80 & iemAImpl_fdivr_r80_by_r80. */
4874static uint16_t iemAImpl_fdiv_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
4875 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
4876{
4877 if (!RTFLOAT80U_IS_ZERO(pr80Val2) || RTFLOAT80U_IS_NAN(pr80Val1) || RTFLOAT80U_IS_INF(pr80Val1))
4878 {
4879 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
4880 extFloat80_t r80XResult = extF80_div(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
4881 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
4882 }
4883 if (!RTFLOAT80U_IS_ZERO(pr80Val1))
4884 { /* Div by zero. */
4885 if (fFcw & X86_FCW_ZM)
4886 *pr80Result = g_ar80Infinity[pr80Val1->s.fSign != pr80Val2->s.fSign];
4887 else
4888 {
4889 *pr80Result = *pr80Val1Org;
4890 fFsw |= X86_FSW_ES | X86_FSW_B;
4891 }
4892 fFsw |= X86_FSW_ZE;
4893 }
4894 else
4895 { /* Invalid operand */
4896 if (fFcw & X86_FCW_IM)
4897 *pr80Result = g_r80Indefinite;
4898 else
4899 {
4900 *pr80Result = *pr80Val1Org;
4901 fFsw |= X86_FSW_ES | X86_FSW_B;
4902 }
4903 fFsw |= X86_FSW_IE;
4904 }
4905 return fFsw;
4906}
4907
4908
4909IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4910 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4911{
4912 uint16_t const fFcw = pFpuState->FCW;
4913 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
4914
4915 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
4916 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
4917 {
4918 if (fFcw & X86_FCW_IM)
4919 pFpuRes->r80Result = g_r80Indefinite;
4920 else
4921 {
4922 pFpuRes->r80Result = *pr80Val1;
4923 fFsw |= X86_FSW_ES | X86_FSW_B;
4924 }
4925 fFsw |= X86_FSW_IE;
4926 }
4927 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs & /0 trumps denormals. */
4928 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2) && !RTFLOAT80U_IS_ZERO(pr80Val2))
4929 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
4930 {
4931 if (fFcw & X86_FCW_DM)
4932 {
4933 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
4934 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
4935 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
4936 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
4937 }
4938 else
4939 {
4940 pFpuRes->r80Result = *pr80Val1;
4941 fFsw |= X86_FSW_ES | X86_FSW_B;
4942 }
4943 fFsw |= X86_FSW_DE;
4944 }
4945 /* SoftFloat can handle the rest: */
4946 else
4947 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
4948
4949 pFpuRes->FSW = fFsw;
4950}
4951
4952
4953IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4954 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4955{
4956 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4957 AssertReleaseFailed();
4958}
4959
4960
4961IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4962 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4963{
4964 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4965 AssertReleaseFailed();
4966}
4967
4968
4969IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4970 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4971{
4972 uint16_t const fFcw = pFpuState->FCW;
4973 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
4974
4975 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
4976 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
4977 {
4978 if (fFcw & X86_FCW_IM)
4979 pFpuRes->r80Result = g_r80Indefinite;
4980 else
4981 {
4982 pFpuRes->r80Result = *pr80Val1;
4983 fFsw |= X86_FSW_ES | X86_FSW_B;
4984 }
4985 fFsw |= X86_FSW_IE;
4986 }
4987 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs & /0 trumps denormals. */
4988 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
4989 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1) && !RTFLOAT80U_IS_ZERO(pr80Val1)) )
4990 {
4991 if (fFcw & X86_FCW_DM)
4992 {
4993 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
4994 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
4995 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
4996 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
4997 }
4998 else
4999 {
5000 pFpuRes->r80Result = *pr80Val1;
5001 fFsw |= X86_FSW_ES | X86_FSW_B;
5002 }
5003 fFsw |= X86_FSW_DE;
5004 }
5005 /* SoftFloat can handle the rest: */
5006 else
5007 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5008
5009 pFpuRes->FSW = fFsw;
5010}
5011
5012
5013IEM_DECL_IMPL_DEF(void, iemAImpl_fidiv_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5014 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5015{
5016 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5017 AssertReleaseFailed();
5018}
5019
5020
5021IEM_DECL_IMPL_DEF(void, iemAImpl_fidiv_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5022 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5023{
5024 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5025 AssertReleaseFailed();
5026}
5027
5028
5029IEM_DECL_IMPL_DEF(void, iemAImpl_fidivr_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5030 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5031{
5032 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5033 AssertReleaseFailed();
5034}
5035
5036
5037IEM_DECL_IMPL_DEF(void, iemAImpl_fidivr_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5038 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5039{
5040 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5041 AssertReleaseFailed();
5042}
5043
5044
5045/** Worker for iemAImpl_fdiv_r80_by_r80 & iemAImpl_fdivr_r80_by_r80. */
5046static uint16_t iemAImpl_fprem_fprem1_r80_by_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5047 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org, bool fLegacyInstr)
5048{
5049 if (!RTFLOAT80U_IS_ZERO(pr80Val2) || RTFLOAT80U_IS_NAN(pr80Val1) || RTFLOAT80U_IS_INF(pr80Val1))
5050 {
5051 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5052 uint16_t fCxFlags = 0;
5053 extFloat80_t r80XResult = extF80_partialRem(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2),
5054 fLegacyInstr ? softfloat_round_minMag : softfloat_round_near_even,
5055 &fCxFlags, &SoftState);
5056 Assert(!(fCxFlags & ~X86_FSW_C_MASK));
5057 fFsw = iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5058 if ( !(fFsw & X86_FSW_IE)
5059 && !RTFLOAT80U_IS_NAN(pr80Result)
5060 && !RTFLOAT80U_IS_INDEFINITE(pr80Result))
5061 {
5062 fFsw &= ~(uint16_t)X86_FSW_C_MASK;
5063 fFsw |= fCxFlags & X86_FSW_C_MASK;
5064 }
5065 return fFsw;
5066 }
5067
5068 /* Invalid operand */
5069 if (fFcw & X86_FCW_IM)
5070 *pr80Result = g_r80Indefinite;
5071 else
5072 {
5073 *pr80Result = *pr80Val1Org;
5074 fFsw |= X86_FSW_ES | X86_FSW_B;
5075 }
5076 return fFsw | X86_FSW_IE;
5077}
5078
5079
5080static void iemAImpl_fprem_fprem1_r80_by_r80(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5081 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fLegacyInstr)
5082{
5083 uint16_t const fFcw = pFpuState->FCW;
5084 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 /*| X86_FSW_C2*/ | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5085
5086 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals.
5087 In addition, we'd like to handle zero ST(1) now as SoftFloat returns Inf instead
5088 of Indefinite. (Note! There is no #Z like the footnotes to tables 3-31 and 3-32
5089 for the FPREM1 & FPREM1 instructions in the intel reference manual claims!) */
5090 if ( RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2)
5091 || (RTFLOAT80U_IS_ZERO(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1) && !RTFLOAT80U_IS_INDEFINITE(pr80Val1)))
5092 {
5093 if (fFcw & X86_FCW_IM)
5094 pFpuRes->r80Result = g_r80Indefinite;
5095 else
5096 {
5097 pFpuRes->r80Result = *pr80Val1;
5098 fFsw |= X86_FSW_ES | X86_FSW_B;
5099 }
5100 fFsw |= X86_FSW_IE;
5101 }
5102 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs & /0 trumps denormals. */
5103 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2) && !RTFLOAT80U_IS_ZERO(pr80Val2))
5104 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1) && !RTFLOAT80U_IS_INF(pr80Val1)) )
5105 {
5106 if (fFcw & X86_FCW_DM)
5107 {
5108 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5109 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5110 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5111 fFsw = iemAImpl_fprem_fprem1_r80_by_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw,
5112 pr80Val1Org, fLegacyInstr);
5113 }
5114 else
5115 {
5116 pFpuRes->r80Result = *pr80Val1;
5117 fFsw |= X86_FSW_ES | X86_FSW_B;
5118 }
5119 fFsw |= X86_FSW_DE;
5120 }
5121 /* SoftFloat can handle the rest: */
5122 else
5123 fFsw = iemAImpl_fprem_fprem1_r80_by_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw,
5124 pr80Val1, fLegacyInstr);
5125
5126 pFpuRes->FSW = fFsw;
5127}
5128
5129
5130IEM_DECL_IMPL_DEF(void, iemAImpl_fprem_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5131 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5132{
5133 iemAImpl_fprem_fprem1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2, true /*fLegacyInstr*/);
5134}
5135
5136
5137IEM_DECL_IMPL_DEF(void, iemAImpl_fprem1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5138 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5139{
5140 iemAImpl_fprem_fprem1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2, false /*fLegacyInstr*/);
5141}
5142
5143
5144/*********************************************************************************************************************************
5145* x87 FPU Multiplication Operations *
5146*********************************************************************************************************************************/
5147
5148IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5149 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5150{
5151 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
5152 AssertReleaseFailed();
5153}
5154
5155
5156IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5157 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5158{
5159 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
5160 AssertReleaseFailed();
5161}
5162
5163
5164/** Worker for iemAImpl_fmul_r80_by_r80. */
5165static uint16_t iemAImpl_fmul_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5166 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
5167{
5168 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5169 extFloat80_t r80XResult = extF80_mul(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
5170 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5171}
5172
5173
5174IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5175 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5176{
5177 uint16_t const fFcw = pFpuState->FCW;
5178 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5179
5180 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5181 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5182 {
5183 if (fFcw & X86_FCW_IM)
5184 pFpuRes->r80Result = g_r80Indefinite;
5185 else
5186 {
5187 pFpuRes->r80Result = *pr80Val1;
5188 fFsw |= X86_FSW_ES | X86_FSW_B;
5189 }
5190 fFsw |= X86_FSW_IE;
5191 }
5192 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5193 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5194 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5195 {
5196 if (fFcw & X86_FCW_DM)
5197 {
5198 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5199 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5200 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5201 fFsw = iemAImpl_fmul_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5202 }
5203 else
5204 {
5205 pFpuRes->r80Result = *pr80Val1;
5206 fFsw |= X86_FSW_ES | X86_FSW_B;
5207 }
5208 fFsw |= X86_FSW_DE;
5209 }
5210 /* SoftFloat can handle the rest: */
5211 else
5212 fFsw = iemAImpl_fmul_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5213
5214 pFpuRes->FSW = fFsw;
5215}
5216
5217
5218IEM_DECL_IMPL_DEF(void, iemAImpl_fimul_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5219 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5220{
5221 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5222 AssertReleaseFailed();
5223}
5224
5225
5226IEM_DECL_IMPL_DEF(void, iemAImpl_fimul_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5227 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5228{
5229 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5230 AssertReleaseFailed();
5231}
5232
5233
5234/*********************************************************************************************************************************
5235* x87 FPU Addition and Subtraction *
5236*********************************************************************************************************************************/
5237
5238IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5239 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5240{
5241 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
5242 AssertReleaseFailed();
5243}
5244
5245
5246IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5247 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5248{
5249 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
5250 AssertReleaseFailed();
5251}
5252
5253
5254/** Worker for iemAImpl_fadd_r80_by_r80. */
5255static uint16_t iemAImpl_fadd_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5256 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
5257{
5258 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5259 extFloat80_t r80XResult = extF80_add(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
5260 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5261}
5262
5263
5264IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5265 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5266{
5267 uint16_t const fFcw = pFpuState->FCW;
5268 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5269
5270 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5271 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5272 {
5273 if (fFcw & X86_FCW_IM)
5274 pFpuRes->r80Result = g_r80Indefinite;
5275 else
5276 {
5277 pFpuRes->r80Result = *pr80Val1;
5278 fFsw |= X86_FSW_ES | X86_FSW_B;
5279 }
5280 fFsw |= X86_FSW_IE;
5281 }
5282 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5283 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5284 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5285 {
5286 if (fFcw & X86_FCW_DM)
5287 {
5288 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5289 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5290 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5291 fFsw = iemAImpl_fadd_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5292 }
5293 else
5294 {
5295 pFpuRes->r80Result = *pr80Val1;
5296 fFsw |= X86_FSW_ES | X86_FSW_B;
5297 }
5298 fFsw |= X86_FSW_DE;
5299 }
5300 /* SoftFloat can handle the rest: */
5301 else
5302 fFsw = iemAImpl_fadd_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5303
5304 pFpuRes->FSW = fFsw;
5305}
5306
5307
5308IEM_DECL_IMPL_DEF(void, iemAImpl_fiadd_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5309 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5310{
5311 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5312 AssertReleaseFailed();
5313}
5314
5315
5316IEM_DECL_IMPL_DEF(void, iemAImpl_fiadd_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5317 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5318{
5319 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5320 AssertReleaseFailed();
5321}
5322
5323
5324IEM_DECL_IMPL_DEF(void, iemAImpl_fisub_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5325 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5326{
5327 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5328 AssertReleaseFailed();
5329}
5330
5331
5332IEM_DECL_IMPL_DEF(void, iemAImpl_fisub_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5333 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5334{
5335 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5336 AssertReleaseFailed();
5337}
5338
5339
5340IEM_DECL_IMPL_DEF(void, iemAImpl_fisubr_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5341 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5342{
5343 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5344 AssertReleaseFailed();
5345}
5346
5347
5348IEM_DECL_IMPL_DEF(void, iemAImpl_fisubr_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5349 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5350{
5351 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5352 AssertReleaseFailed();
5353}
5354
5355
5356IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5357 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5358{
5359 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
5360 AssertReleaseFailed();
5361}
5362
5363
5364IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5365 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5366{
5367 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
5368 AssertReleaseFailed();
5369}
5370
5371
5372/** Worker for iemAImpl_fsub_r80_by_r80 and iemAImpl_fsubr_r80_by_r80. */
5373static uint16_t iemAImpl_fsub_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5374 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
5375{
5376 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5377 extFloat80_t r80XResult = extF80_sub(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
5378 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5379}
5380
5381
5382IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5383 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5384{
5385 uint16_t const fFcw = pFpuState->FCW;
5386 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5387
5388 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5389 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5390 {
5391 if (fFcw & X86_FCW_IM)
5392 pFpuRes->r80Result = g_r80Indefinite;
5393 else
5394 {
5395 pFpuRes->r80Result = *pr80Val1;
5396 fFsw |= X86_FSW_ES | X86_FSW_B;
5397 }
5398 fFsw |= X86_FSW_IE;
5399 }
5400 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5401 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5402 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5403 {
5404 if (fFcw & X86_FCW_DM)
5405 {
5406 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5407 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5408 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5409 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5410 }
5411 else
5412 {
5413 pFpuRes->r80Result = *pr80Val1;
5414 fFsw |= X86_FSW_ES | X86_FSW_B;
5415 }
5416 fFsw |= X86_FSW_DE;
5417 }
5418 /* SoftFloat can handle the rest: */
5419 else
5420 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5421
5422 pFpuRes->FSW = fFsw;
5423}
5424
5425
5426IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5427 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5428{
5429 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
5430 AssertReleaseFailed();
5431}
5432
5433
5434IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5435 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5436{
5437 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
5438 AssertReleaseFailed();
5439}
5440
5441
5442/* Same as iemAImpl_fsub_r80_by_r80, but with input operands switched. */
5443IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5444 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5445{
5446 uint16_t const fFcw = pFpuState->FCW;
5447 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5448
5449 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5450 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5451 {
5452 if (fFcw & X86_FCW_IM)
5453 pFpuRes->r80Result = g_r80Indefinite;
5454 else
5455 {
5456 pFpuRes->r80Result = *pr80Val1;
5457 fFsw |= X86_FSW_ES | X86_FSW_B;
5458 }
5459 fFsw |= X86_FSW_IE;
5460 }
5461 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5462 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5463 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5464 {
5465 if (fFcw & X86_FCW_DM)
5466 {
5467 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5468 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5469 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5470 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5471 }
5472 else
5473 {
5474 pFpuRes->r80Result = *pr80Val1;
5475 fFsw |= X86_FSW_ES | X86_FSW_B;
5476 }
5477 fFsw |= X86_FSW_DE;
5478 }
5479 /* SoftFloat can handle the rest: */
5480 else
5481 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5482
5483 pFpuRes->FSW = fFsw;
5484}
5485
5486
5487/*********************************************************************************************************************************
5488* x87 FPU Trigometric Operations *
5489*********************************************************************************************************************************/
5490
5491
5492IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5493 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5494{
5495 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5496 AssertReleaseFailed();
5497}
5498
5499#endif /* IEM_WITHOUT_ASSEMBLY */
5500
5501IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5502 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5503{
5504 iemAImpl_fpatan_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5505}
5506
5507IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5508 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5509{
5510 iemAImpl_fpatan_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5511}
5512
5513
5514#if defined(IEM_WITHOUT_ASSEMBLY)
5515IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5516{
5517 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
5518 AssertReleaseFailed();
5519}
5520#endif /* IEM_WITHOUT_ASSEMBLY */
5521
5522IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5523{
5524 iemAImpl_fptan_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5525}
5526
5527IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5528{
5529 iemAImpl_fptan_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5530}
5531
5532
5533#ifdef IEM_WITHOUT_ASSEMBLY
5534IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5535{
5536 RT_NOREF(pFpuState, pFpuRes, pr80Val);
5537 AssertReleaseFailed();
5538}
5539#endif /* IEM_WITHOUT_ASSEMBLY */
5540
5541IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5542{
5543 iemAImpl_fsin_r80(pFpuState, pFpuRes, pr80Val);
5544}
5545
5546IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5547{
5548 iemAImpl_fsin_r80(pFpuState, pFpuRes, pr80Val);
5549}
5550
5551#ifdef IEM_WITHOUT_ASSEMBLY
5552IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5553{
5554 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
5555 AssertReleaseFailed();
5556}
5557#endif /* IEM_WITHOUT_ASSEMBLY */
5558
5559IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5560{
5561 iemAImpl_fsincos_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5562}
5563
5564IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5565{
5566 iemAImpl_fsincos_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5567}
5568
5569
5570#ifdef IEM_WITHOUT_ASSEMBLY
5571IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5572{
5573 RT_NOREF(pFpuState, pFpuRes, pr80Val);
5574 AssertReleaseFailed();
5575}
5576#endif /* IEM_WITHOUT_ASSEMBLY */
5577
5578IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5579{
5580 iemAImpl_fcos_r80(pFpuState, pFpuRes, pr80Val);
5581}
5582
5583IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5584{
5585 iemAImpl_fcos_r80(pFpuState, pFpuRes, pr80Val);
5586}
5587
5588#ifdef IEM_WITHOUT_ASSEMBLY
5589
5590
5591/*********************************************************************************************************************************
5592* x87 FPU Compare and Testing Operations *
5593*********************************************************************************************************************************/
5594
5595IEM_DECL_IMPL_DEF(void, iemAImpl_ftst_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
5596{
5597 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT);
5598
5599 if (RTFLOAT80U_IS_ZERO(pr80Val))
5600 fFsw |= X86_FSW_C3;
5601 else if (RTFLOAT80U_IS_NORMAL(pr80Val) || RTFLOAT80U_IS_INF(pr80Val))
5602 fFsw |= pr80Val->s.fSign ? X86_FSW_C0 : 0;
5603 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
5604 {
5605 fFsw |= pr80Val->s.fSign ? X86_FSW_C0 | X86_FSW_DE : X86_FSW_DE;
5606 if (!(pFpuState->FCW & X86_FCW_DM))
5607 fFsw |= X86_FSW_ES | X86_FSW_B;
5608 }
5609 else
5610 {
5611 fFsw |= X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3 | X86_FSW_IE;
5612 if (!(pFpuState->FCW & X86_FCW_IM))
5613 fFsw |= X86_FSW_ES | X86_FSW_B;
5614 }
5615
5616 *pu16Fsw = fFsw;
5617}
5618
5619
5620IEM_DECL_IMPL_DEF(void, iemAImpl_fxam_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
5621{
5622 RT_NOREF(pFpuState);
5623 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT);
5624
5625 /* C1 = sign bit (always, even if empty Intel says). */
5626 if (pr80Val->s.fSign)
5627 fFsw |= X86_FSW_C1;
5628
5629 /* Classify the value in C0, C2, C3. */
5630 if (!(pFpuState->FTW & RT_BIT_32(X86_FSW_TOP_GET(pFpuState->FSW))))
5631 fFsw |= X86_FSW_C0 | X86_FSW_C3; /* empty */
5632 else if (RTFLOAT80U_IS_NORMAL(pr80Val))
5633 fFsw |= X86_FSW_C2;
5634 else if (RTFLOAT80U_IS_ZERO(pr80Val))
5635 fFsw |= X86_FSW_C3;
5636 else if (RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(pr80Val))
5637 fFsw |= X86_FSW_C0;
5638 else if (RTFLOAT80U_IS_INF(pr80Val))
5639 fFsw |= X86_FSW_C0 | X86_FSW_C2;
5640 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
5641 fFsw |= X86_FSW_C2 | X86_FSW_C3;
5642 /* whatever else: 0 */
5643
5644 *pu16Fsw = fFsw;
5645}
5646
5647
5648IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r32,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5649 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5650{
5651 RT_NOREF(pFpuState, pFSW, pr80Val1, pr32Val2);
5652 AssertReleaseFailed();
5653}
5654
5655
5656IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r64,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5657 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5658{
5659 RT_NOREF(pFpuState, pFSW, pr80Val1, pr64Val2);
5660 AssertReleaseFailed();
5661}
5662
5663
5664IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5665 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5666{
5667 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
5668 AssertReleaseFailed();
5669}
5670
5671
5672IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fcomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5673 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5674{
5675 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
5676 AssertReleaseFailed();
5677 return 0;
5678}
5679
5680
5681IEM_DECL_IMPL_DEF(void, iemAImpl_fucom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5682 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5683{
5684 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
5685 AssertReleaseFailed();
5686}
5687
5688
5689IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fucomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
5690 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5691{
5692 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pr80Val2);
5693 AssertReleaseFailed();
5694 return 0;
5695}
5696
5697
5698IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i16,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
5699 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5700{
5701 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pi16Val2);
5702 AssertReleaseFailed();
5703}
5704
5705
5706IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i32,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
5707 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5708{
5709 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pi32Val2);
5710 AssertReleaseFailed();
5711}
5712
5713
5714/*********************************************************************************************************************************
5715* x87 FPU Other Operations *
5716*********************************************************************************************************************************/
5717
5718/**
5719 * Helper for iemAImpl_frndint_r80, called both on normal and denormal numbers.
5720 */
5721static uint16_t iemAImpl_frndint_r80_normal(PCRTFLOAT80U pr80Val, PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw)
5722{
5723 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5724 iemFpuSoftF80ToIprt(pr80Result, extF80_roundToInt(iemFpuSoftF80FromIprt(pr80Val), SoftState.roundingMode,
5725 true /*exact / generate #PE */, &SoftState));
5726 return IEM_SOFTFLOAT_STATE_TO_FSW(fFsw, &SoftState, fFcw);
5727}
5728
5729
5730IEM_DECL_IMPL_DEF(void, iemAImpl_frndint_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5731{
5732 uint16_t const fFcw = pFpuState->FCW;
5733 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5734
5735 if (RTFLOAT80U_IS_NORMAL(pr80Val))
5736 fFsw = iemAImpl_frndint_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5737 else if ( RTFLOAT80U_IS_ZERO(pr80Val)
5738 || RTFLOAT80U_IS_QUIET_NAN(pr80Val)
5739 || RTFLOAT80U_IS_INDEFINITE(pr80Val)
5740 || RTFLOAT80U_IS_INF(pr80Val))
5741 pFpuRes->r80Result = *pr80Val;
5742 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
5743 {
5744 fFsw |= X86_FSW_DE;
5745 if (fFcw & X86_FCW_DM)
5746 fFsw = iemAImpl_frndint_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5747 else
5748 {
5749 pFpuRes->r80Result = *pr80Val;
5750 fFsw |= X86_FSW_ES | X86_FSW_B;
5751 }
5752 }
5753 else
5754 {
5755 if (fFcw & X86_FCW_IM)
5756 {
5757 if (!RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val))
5758 pFpuRes->r80Result = g_r80Indefinite;
5759 else
5760 {
5761 pFpuRes->r80Result = *pr80Val;
5762 pFpuRes->r80Result.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
5763 }
5764 }
5765 else
5766 {
5767 pFpuRes->r80Result = *pr80Val;
5768 fFsw |= X86_FSW_ES | X86_FSW_B;
5769 }
5770 fFsw |= X86_FSW_IE;
5771 }
5772 pFpuRes->FSW = fFsw;
5773}
5774
5775
5776IEM_DECL_IMPL_DEF(void, iemAImpl_fscale_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5777 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5778{
5779 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5780 AssertReleaseFailed();
5781}
5782
5783
5784/**
5785 * Helper for iemAImpl_fsqrt_r80, called both on normal and denormal numbers.
5786 */
5787static uint16_t iemAImpl_fsqrt_r80_normal(PCRTFLOAT80U pr80Val, PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw)
5788{
5789 Assert(!pr80Val->s.fSign);
5790 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5791 iemFpuSoftF80ToIprt(pr80Result, extF80_sqrt(iemFpuSoftF80FromIprt(pr80Val), &SoftState));
5792 return IEM_SOFTFLOAT_STATE_TO_FSW(fFsw, &SoftState, fFcw);
5793}
5794
5795
5796IEM_DECL_IMPL_DEF(void, iemAImpl_fsqrt_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5797{
5798 uint16_t const fFcw = pFpuState->FCW;
5799 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5800
5801 if (RTFLOAT80U_IS_NORMAL(pr80Val) && !pr80Val->s.fSign)
5802 fFsw = iemAImpl_fsqrt_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5803 else if ( RTFLOAT80U_IS_ZERO(pr80Val)
5804 || RTFLOAT80U_IS_QUIET_NAN(pr80Val)
5805 || RTFLOAT80U_IS_INDEFINITE(pr80Val)
5806 || (RTFLOAT80U_IS_INF(pr80Val) && !pr80Val->s.fSign))
5807 pFpuRes->r80Result = *pr80Val;
5808 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val) && !pr80Val->s.fSign) /* Negative denormals only generate #IE! */
5809 {
5810 fFsw |= X86_FSW_DE;
5811 if (fFcw & X86_FCW_DM)
5812 fFsw = iemAImpl_fsqrt_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5813 else
5814 {
5815 pFpuRes->r80Result = *pr80Val;
5816 fFsw |= X86_FSW_ES | X86_FSW_B;
5817 }
5818 }
5819 else
5820 {
5821 if (fFcw & X86_FCW_IM)
5822 {
5823 if (!RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val))
5824 pFpuRes->r80Result = g_r80Indefinite;
5825 else
5826 {
5827 pFpuRes->r80Result = *pr80Val;
5828 pFpuRes->r80Result.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
5829 }
5830 }
5831 else
5832 {
5833 pFpuRes->r80Result = *pr80Val;
5834 fFsw |= X86_FSW_ES | X86_FSW_B;
5835 }
5836 fFsw |= X86_FSW_IE;
5837 }
5838 pFpuRes->FSW = fFsw;
5839}
5840
5841
5842/**
5843 * @code{.unparsed}
5844 * x x * ln2
5845 * f(x) = 2 - 1 = e - 1
5846 *
5847 * @endcode
5848 *
5849 * We can approximate e^x by a Taylor/Maclaurin series (see
5850 * https://en.wikipedia.org/wiki/Taylor_series#Exponential_function):
5851 * @code{.unparsed}
5852 * n 0 1 2 3 4
5853 * inf x x x x x x
5854 * SUM ----- = --- + --- + --- + --- + --- + ...
5855 * n=0 n! 0! 1! 2! 3! 4!
5856 *
5857 * 2 3 4
5858 * x x x
5859 * = 1 + x + --- + --- + --- + ...
5860 * 2! 3! 4!
5861 * @endcode
5862 *
5863 * Given z = x * ln2, we get:
5864 * @code{.unparsed}
5865 * 2 3 4 n
5866 * z z z z z
5867 * e - 1 = z + --- + --- + --- + ... + ---
5868 * 2! 3! 4! n!
5869 * @endcode
5870 *
5871 * Wanting to use Horner's method, we move one z outside and get:
5872 * @code{.unparsed}
5873 * 2 3 (n-1)
5874 * z z z z
5875 * = z ( 1 + --- + --- + --- + ... + ------- )
5876 * 2! 3! 4! n!
5877 * @endcode
5878 *
5879 * The constants we need for using Horner's methods are 1 and 1 / n!.
5880 *
5881 * For very tiny x values, we can get away with f(x) = x * ln 2, because
5882 * because we don't have the necessary precision to represent 1.0 + z/3 + ...
5883 * and can approximate it to be 1.0. For a visual demonstration of this
5884 * check out https://www.desmos.com/calculator/vidcdxizd9 (for as long
5885 * as it valid), plotting f(x) = 2^x - 1 and f(x) = x * ln2.
5886 *
5887 *
5888 * As constant accuracy goes, figure 0.1 "80387 Block Diagram" in the "80387
5889 * Data Sheet" (order 231920-002; Appendix E in 80387 PRM 231917-001; Military
5890 * i387SX 271166-002), indicates that constants are 67-bit (constant rom block)
5891 * and the internal mantissa size is 68-bit (mantissa adder & barrel shifter
5892 * blocks). (The one bit difference is probably an implicit one missing from
5893 * the constant ROM.) A paper on division and sqrt on the AMD-K7 by Stuart F.
5894 * Oberman states that it internally used a 68 bit mantissa with a 18-bit
5895 * exponent.
5896 *
5897 * However, even when sticking to 67 constants / 68 mantissas, I have not yet
5898 * successfully reproduced the exact results from an Intel 10980XE, there is
5899 * always a portition of rounding differences. Not going to spend too much time
5900 * on getting this 100% the same, at least not now.
5901 *
5902 * P.S. If someone are really curious about 8087 and its contstants:
5903 * http://www.righto.com/2020/05/extracting-rom-constants-from-8087-math.html
5904 *
5905 *
5906 * @param pr80Val The exponent value (x), less than 1.0, greater than
5907 * -1.0 and not zero. This can be a normal, denormal
5908 * or pseudo-denormal value.
5909 * @param pr80Result Where to return the result.
5910 * @param fFcw FPU control word.
5911 * @param fFsw FPU status word.
5912 */
5913static uint16_t iemAImpl_f2xm1_r80_normal(PCRTFLOAT80U pr80Val, PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw)
5914{
5915 /* As mentioned above, we can skip the expensive polynomial calculation
5916 as it will be close enough to 1.0 that it makes no difference.
5917
5918 The cutoff point for intel 10980XE is exponents >= -69. Intel
5919 also seems to be using a 67-bit or 68-bit constant value, and we get
5920 a smattering of rounding differences if we go for higher precision. */
5921 if (pr80Val->s.uExponent <= RTFLOAT80U_EXP_BIAS - 69)
5922 {
5923 RTUINT256U u256;
5924 RTUInt128MulByU64Ex(&u256, &g_u128Ln2MantissaIntel, pr80Val->s.uMantissa);
5925 u256.QWords.qw0 |= 1; /* force #PE */
5926 fFsw = iemFpuFloat80RoundAndComposeFrom192(pr80Result, pr80Val->s.fSign, &u256,
5927 !RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) && !RTFLOAT80U_IS_DENORMAL(pr80Val)
5928 ? (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS
5929 : 1 - RTFLOAT80U_EXP_BIAS,
5930 fFcw, fFsw);
5931 }
5932 else
5933 {
5934#ifdef IEM_WITH_FLOAT128_FOR_FPU
5935 /* This approach is not good enough for small values, we end up with zero. */
5936 int const fOldRounding = iemFpuF128SetRounding(fFcw);
5937 _Float128 rd128Val = iemFpuF128FromFloat80(pr80Val, fFcw);
5938 _Float128 rd128Result = powf128(2.0L, rd128Val);
5939 rd128Result -= 1.0L;
5940 fFsw = iemFpuF128ToFloat80(pr80Result, rd128Result, fFcw, fFsw);
5941 iemFpuF128RestoreRounding(fOldRounding);
5942
5943# else
5944 softfloat_state_t SoftState = SOFTFLOAT_STATE_INIT_DEFAULTS();
5945 float128_t const x = iemFpuSoftF128FromFloat80(pr80Val);
5946
5947 /* As mentioned above, enforce 68-bit internal mantissa width to better
5948 match the Intel 10980XE results. */
5949 unsigned const cPrecision = 68;
5950
5951 /* first calculate z = x * ln2 */
5952 float128_t z = iemFpuSoftF128Precision(f128_mul(x, iemFpuSoftF128PrecisionIprt(&g_r128Ln2, cPrecision), &SoftState),
5953 cPrecision);
5954
5955 /* Then do the polynomial evaluation. */
5956 float128_t r = iemFpuSoftF128HornerPoly(z, g_ar128F2xm1HornerConsts, RT_ELEMENTS(g_ar128F2xm1HornerConsts),
5957 cPrecision, &SoftState);
5958 r = f128_mul(z, r, &SoftState);
5959
5960 /* Output the result. */
5961 fFsw = iemFpuSoftF128ToFloat80(pr80Result, r, fFcw, fFsw);
5962# endif
5963 }
5964 return fFsw;
5965}
5966
5967
5968IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5969{
5970 uint16_t const fFcw = pFpuState->FCW;
5971 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5972
5973 if (RTFLOAT80U_IS_NORMAL(pr80Val))
5974 {
5975 if (pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS)
5976 fFsw = iemAImpl_f2xm1_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5977 else
5978 {
5979 /* Special case:
5980 2^+1.0 - 1.0 = 1.0
5981 2^-1.0 - 1.0 = -0.5 */
5982 if ( pr80Val->s.uExponent == RTFLOAT80U_EXP_BIAS
5983 && pr80Val->s.uMantissa == RT_BIT_64(63))
5984 {
5985 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
5986 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_BIAS - pr80Val->s.fSign;
5987 pFpuRes->r80Result.s.fSign = pr80Val->s.fSign;
5988 }
5989 /* ST(0) > 1.0 || ST(0) < -1.0: undefined behavior */
5990 /** @todo 287 is documented to only accept values 0 <= ST(0) <= 0.5. */
5991 else
5992 pFpuRes->r80Result = *pr80Val;
5993 fFsw |= X86_FSW_PE;
5994 if (!(fFcw & X86_FCW_PM))
5995 fFsw |= X86_FSW_ES | X86_FSW_B;
5996 }
5997 }
5998 else if ( RTFLOAT80U_IS_ZERO(pr80Val)
5999 || RTFLOAT80U_IS_QUIET_NAN(pr80Val)
6000 || RTFLOAT80U_IS_INDEFINITE(pr80Val))
6001 pFpuRes->r80Result = *pr80Val;
6002 else if (RTFLOAT80U_IS_INF(pr80Val))
6003 pFpuRes->r80Result = pr80Val->s.fSign ? g_ar80One[1] : *pr80Val;
6004 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
6005 {
6006 fFsw |= X86_FSW_DE;
6007 if (fFcw & X86_FCW_DM)
6008 fFsw = iemAImpl_f2xm1_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
6009 else
6010 {
6011 pFpuRes->r80Result = *pr80Val;
6012 fFsw |= X86_FSW_ES | X86_FSW_B;
6013 }
6014 }
6015 else
6016 {
6017 if ( ( RTFLOAT80U_IS_UNNORMAL(pr80Val)
6018 || RTFLOAT80U_IS_PSEUDO_NAN(pr80Val))
6019 && (fFcw & X86_FCW_IM))
6020 pFpuRes->r80Result = g_r80Indefinite;
6021 else
6022 {
6023 pFpuRes->r80Result = *pr80Val;
6024 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val) && (fFcw & X86_FCW_IM))
6025 pFpuRes->r80Result.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
6026 }
6027 fFsw |= X86_FSW_IE;
6028 if (!(fFcw & X86_FCW_IM))
6029 fFsw |= X86_FSW_ES | X86_FSW_B;
6030 }
6031 pFpuRes->FSW = fFsw;
6032}
6033
6034#endif /* IEM_WITHOUT_ASSEMBLY */
6035
6036IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
6037{
6038 iemAImpl_f2xm1_r80(pFpuState, pFpuRes, pr80Val);
6039}
6040
6041IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
6042{
6043 iemAImpl_f2xm1_r80(pFpuState, pFpuRes, pr80Val);
6044}
6045
6046#ifdef IEM_WITHOUT_ASSEMBLY
6047
6048IEM_DECL_IMPL_DEF(void, iemAImpl_fabs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
6049{
6050 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
6051 pFpuRes->r80Result = *pr80Val;
6052 pFpuRes->r80Result.s.fSign = 0;
6053}
6054
6055
6056IEM_DECL_IMPL_DEF(void, iemAImpl_fchs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
6057{
6058 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
6059 pFpuRes->r80Result = *pr80Val;
6060 pFpuRes->r80Result.s.fSign = !pr80Val->s.fSign;
6061}
6062
6063
6064IEM_DECL_IMPL_DEF(void, iemAImpl_fxtract_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
6065{
6066 uint16_t const fFcw = pFpuState->FCW;
6067 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
6068
6069 if (RTFLOAT80U_IS_NORMAL(pr80Val))
6070 {
6071 softfloat_state_t Ignored = SOFTFLOAT_STATE_INIT_DEFAULTS();
6072 iemFpuSoftF80ToIprt(&pFpuResTwo->r80Result1, i32_to_extF80((int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS, &Ignored));
6073
6074 pFpuResTwo->r80Result2.s.fSign = pr80Val->s.fSign;
6075 pFpuResTwo->r80Result2.s.uExponent = RTFLOAT80U_EXP_BIAS;
6076 pFpuResTwo->r80Result2.s.uMantissa = pr80Val->s.uMantissa;
6077 }
6078 else if (RTFLOAT80U_IS_ZERO(pr80Val))
6079 {
6080 fFsw |= X86_FSW_ZE;
6081 if (fFcw & X86_FCW_ZM)
6082 {
6083 pFpuResTwo->r80Result1 = g_ar80Infinity[1];
6084 pFpuResTwo->r80Result2 = *pr80Val;
6085 }
6086 else
6087 {
6088 pFpuResTwo->r80Result2 = *pr80Val;
6089 fFsw = X86_FSW_ES | X86_FSW_B | (fFsw & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT);
6090 }
6091 }
6092 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
6093 {
6094 fFsw |= X86_FSW_DE;
6095 if (fFcw & X86_FCW_DM)
6096 {
6097 pFpuResTwo->r80Result2.s.fSign = pr80Val->s.fSign;
6098 pFpuResTwo->r80Result2.s.uExponent = RTFLOAT80U_EXP_BIAS;
6099 pFpuResTwo->r80Result2.s.uMantissa = pr80Val->s.uMantissa;
6100 int32_t iExponent = -16382;
6101 while (!(pFpuResTwo->r80Result2.s.uMantissa & RT_BIT_64(63)))
6102 {
6103 pFpuResTwo->r80Result2.s.uMantissa <<= 1;
6104 iExponent--;
6105 }
6106
6107 softfloat_state_t Ignored = SOFTFLOAT_STATE_INIT_DEFAULTS();
6108 iemFpuSoftF80ToIprt(&pFpuResTwo->r80Result1, i32_to_extF80(iExponent, &Ignored));
6109 }
6110 else
6111 {
6112 pFpuResTwo->r80Result2 = *pr80Val;
6113 fFsw = X86_FSW_ES | X86_FSW_B | (fFsw & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT);
6114 }
6115 }
6116 else if ( RTFLOAT80U_IS_QUIET_NAN(pr80Val)
6117 || RTFLOAT80U_IS_INDEFINITE(pr80Val))
6118 {
6119 pFpuResTwo->r80Result1 = *pr80Val;
6120 pFpuResTwo->r80Result2 = *pr80Val;
6121 }
6122 else if (RTFLOAT80U_IS_INF(pr80Val))
6123 {
6124 pFpuResTwo->r80Result1 = g_ar80Infinity[0];
6125 pFpuResTwo->r80Result2 = *pr80Val;
6126 }
6127 else
6128 {
6129 if (fFcw & X86_FCW_IM)
6130 {
6131 if (!RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val))
6132 pFpuResTwo->r80Result1 = g_r80Indefinite;
6133 else
6134 {
6135 pFpuResTwo->r80Result1 = *pr80Val;
6136 pFpuResTwo->r80Result1.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
6137 }
6138 pFpuResTwo->r80Result2 = pFpuResTwo->r80Result1;
6139 }
6140 else
6141 {
6142 pFpuResTwo->r80Result2 = *pr80Val;
6143 fFsw = X86_FSW_ES | X86_FSW_B | (fFsw & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT);
6144 }
6145 fFsw |= X86_FSW_IE;
6146 }
6147 pFpuResTwo->FSW = fFsw;
6148}
6149
6150
6151IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6152 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6153{
6154 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6155 AssertReleaseFailed();
6156}
6157
6158#endif /* IEM_WITHOUT_ASSEMBLY */
6159
6160IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6161 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6162{
6163 iemAImpl_fyl2x_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6164}
6165
6166IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6167 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6168{
6169 iemAImpl_fyl2x_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6170}
6171
6172#if defined(IEM_WITHOUT_ASSEMBLY)
6173
6174IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6175 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6176{
6177 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6178 AssertReleaseFailed();
6179}
6180
6181#endif /* IEM_WITHOUT_ASSEMBLY */
6182
6183IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6184 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6185{
6186 iemAImpl_fyl2xp1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6187}
6188
6189IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6190 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6191{
6192 iemAImpl_fyl2xp1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6193}
6194
6195
6196/*********************************************************************************************************************************
6197* MMX, SSE & AVX *
6198*********************************************************************************************************************************/
6199
6200IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
6201{
6202 RT_NOREF(pFpuState);
6203 puDst->au32[0] = puSrc->au32[0];
6204 puDst->au32[1] = puSrc->au32[0];
6205 puDst->au32[2] = puSrc->au32[2];
6206 puDst->au32[3] = puSrc->au32[2];
6207}
6208
6209#ifdef IEM_WITH_VEX
6210
6211IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
6212{
6213 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
6214 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
6215 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
6216 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
6217 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
6218 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
6219 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
6220 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
6221}
6222
6223
6224IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
6225{
6226 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
6227 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
6228 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
6229 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
6230 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
6231 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
6232 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
6233 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
6234}
6235
6236#endif /* IEM_WITH_VEX */
6237
6238
6239IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
6240{
6241 RT_NOREF(pFpuState);
6242 puDst->au32[0] = puSrc->au32[1];
6243 puDst->au32[1] = puSrc->au32[1];
6244 puDst->au32[2] = puSrc->au32[3];
6245 puDst->au32[3] = puSrc->au32[3];
6246}
6247
6248
6249IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
6250{
6251 RT_NOREF(pFpuState);
6252 puDst->au64[0] = uSrc;
6253 puDst->au64[1] = uSrc;
6254}
6255
6256#ifdef IEM_WITH_VEX
6257
6258IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
6259{
6260 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
6261 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
6262 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
6263 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
6264}
6265
6266IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
6267{
6268 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
6269 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
6270 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
6271 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
6272}
6273
6274#endif /* IEM_WITH_VEX */
6275
6276#ifdef IEM_WITHOUT_ASSEMBLY
6277
6278IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6279{
6280 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6281 AssertReleaseFailed();
6282}
6283
6284
6285IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6286{
6287 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6288 AssertReleaseFailed();
6289}
6290
6291
6292IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6293{
6294 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6295 AssertReleaseFailed();
6296}
6297
6298
6299IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6300{
6301 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6302 AssertReleaseFailed();
6303}
6304
6305
6306IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6307{
6308 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6309 AssertReleaseFailed();
6310}
6311
6312
6313IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6314{
6315 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6316 AssertReleaseFailed();
6317}
6318
6319
6320IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6321{
6322 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6323 AssertReleaseFailed();
6324}
6325
6326
6327IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6328{
6329 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6330 AssertReleaseFailed();
6331}
6332
6333
6334IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6335{
6336 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6337 AssertReleaseFailed();
6338
6339}
6340
6341
6342IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u128,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, PCRTUINT128U pu128Src))
6343{
6344 RT_NOREF(pFpuState, pu64Dst, pu128Src);
6345 AssertReleaseFailed();
6346}
6347
6348
6349IEM_DECL_IMPL_DEF(void, iemAImpl_pshufw,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src, uint8_t bEvil))
6350{
6351 RT_NOREF(pFpuState, pu64Dst, pu64Src, bEvil);
6352 AssertReleaseFailed();
6353}
6354
6355
6356IEM_DECL_IMPL_DEF(void, iemAImpl_pshufhw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
6357{
6358 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
6359 AssertReleaseFailed();
6360}
6361
6362
6363IEM_DECL_IMPL_DEF(void, iemAImpl_pshuflw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
6364{
6365 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
6366 AssertReleaseFailed();
6367}
6368
6369
6370IEM_DECL_IMPL_DEF(void, iemAImpl_pshufd,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
6371{
6372 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
6373 AssertReleaseFailed();
6374}
6375
6376/* PUNPCKHxxx */
6377
6378IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6379{
6380 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6381 AssertReleaseFailed();
6382}
6383
6384
6385IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6386{
6387 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6388 AssertReleaseFailed();
6389}
6390
6391
6392IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6393{
6394 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6395 AssertReleaseFailed();
6396}
6397
6398
6399IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6400{
6401 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6402 AssertReleaseFailed();
6403}
6404
6405
6406IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6407{
6408 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6409 AssertReleaseFailed();
6410}
6411
6412
6413IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6414{
6415 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6416 AssertReleaseFailed();
6417}
6418
6419
6420IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6421{
6422 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6423 AssertReleaseFailed();
6424}
6425
6426/* PUNPCKLxxx */
6427
6428IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
6429{
6430 RT_NOREF(pFpuState, pu64Dst, pu32Src);
6431 AssertReleaseFailed();
6432}
6433
6434
6435IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6436{
6437 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6438 AssertReleaseFailed();
6439}
6440
6441
6442IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
6443{
6444 RT_NOREF(pFpuState, pu64Dst, pu32Src);
6445 AssertReleaseFailed();
6446}
6447
6448
6449IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6450{
6451 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6452 AssertReleaseFailed();
6453}
6454
6455
6456IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
6457{
6458 RT_NOREF(pFpuState, pu64Dst, pu32Src);
6459 AssertReleaseFailed();
6460}
6461
6462
6463IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6464{
6465 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6466 AssertReleaseFailed();
6467}
6468
6469
6470IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6471{
6472 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6473 AssertReleaseFailed();
6474}
6475
6476#endif /* IEM_WITHOUT_ASSEMBLY */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette