VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 94538

Last change on this file since 94538 was 94538, checked in by vboxsync, 3 years ago

VMM/IEM: Implemented f2xm1. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 212.3 KB
Line 
1/* $Id: IEMAllAImplC.cpp 94538 2022-04-10 14:16:03Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vmcc.h>
24#include <iprt/errcore.h>
25#include <iprt/x86.h>
26#include <iprt/uint128.h>
27#include <iprt/uint256.h>
28
29RT_C_DECLS_BEGIN
30#include <softfloat.h>
31RT_C_DECLS_END
32
33
34/*********************************************************************************************************************************
35* Defined Constants And Macros *
36*********************************************************************************************************************************/
37/** @def IEM_WITHOUT_ASSEMBLY
38 * Enables all the code in this file.
39 */
40#if !defined(IEM_WITHOUT_ASSEMBLY)
41# if defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
42# define IEM_WITHOUT_ASSEMBLY
43# endif
44#endif
45/* IEM_WITH_ASSEMBLY trumps IEM_WITHOUT_ASSEMBLY for tstIEMAImplAsm purposes. */
46#ifdef IEM_WITH_ASSEMBLY
47# undef IEM_WITHOUT_ASSEMBLY
48#endif
49
50/**
51 * Calculates the signed flag value given a result and it's bit width.
52 *
53 * The signed flag (SF) is a duplication of the most significant bit in the
54 * result.
55 *
56 * @returns X86_EFL_SF or 0.
57 * @param a_uResult Unsigned result value.
58 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
59 */
60#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
61 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
62
63/**
64 * Calculates the zero flag value given a result.
65 *
66 * The zero flag (ZF) indicates whether the result is zero or not.
67 *
68 * @returns X86_EFL_ZF or 0.
69 * @param a_uResult Unsigned result value.
70 */
71#define X86_EFL_CALC_ZF(a_uResult) \
72 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
73
74/**
75 * Extracts the OF flag from a OF calculation result.
76 *
77 * These are typically used by concating with a bitcount. The problem is that
78 * 8-bit values needs shifting in the other direction than the others.
79 */
80#define X86_EFL_GET_OF_8(a_uValue) (((uint32_t)(a_uValue) << (X86_EFL_OF_BIT - 8 + 1)) & X86_EFL_OF)
81#define X86_EFL_GET_OF_16(a_uValue) ((uint32_t)((a_uValue) >> (16 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
82#define X86_EFL_GET_OF_32(a_uValue) ((uint32_t)((a_uValue) >> (32 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
83#define X86_EFL_GET_OF_64(a_uValue) ((uint32_t)((a_uValue) >> (64 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
84
85/**
86 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
87 *
88 * @returns Status bits.
89 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
90 * @param a_uResult Unsigned result value.
91 * @param a_uSrc The source value (for AF calc).
92 * @param a_uDst The original destination value (for AF calc).
93 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
94 * @param a_CfExpr Bool expression for the carry flag (CF).
95 * @param a_uSrcOf The a_uSrc value to use for overflow calculation.
96 */
97#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_uSrcOf) \
98 do { \
99 uint32_t fEflTmp = *(a_pfEFlags); \
100 fEflTmp &= ~X86_EFL_STATUS_BITS; \
101 fEflTmp |= (a_CfExpr) << X86_EFL_CF_BIT; \
102 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
103 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
104 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
105 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
106 \
107 /* Overflow during ADDition happens when both inputs have the same signed \
108 bit value and the result has a different sign bit value. \
109 \
110 Since subtraction can be rewritten as addition: 2 - 1 == 2 + -1, it \
111 follows that for SUBtraction the signed bit value must differ between \
112 the two inputs and the result's signed bit diff from the first input. \
113 Note! Must xor with sign bit to convert, not do (0 - a_uSrc). \
114 \
115 See also: http://teaching.idallen.com/dat2343/10f/notes/040_overflow.txt */ \
116 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth( ( ((uint ## a_cBitsWidth ## _t)~((a_uDst) ^ (a_uSrcOf))) \
117 & RT_BIT_64(a_cBitsWidth - 1)) \
118 & ((a_uResult) ^ (a_uDst)) ); \
119 *(a_pfEFlags) = fEflTmp; \
120 } while (0)
121
122/**
123 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
124 *
125 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
126 * undefined. We do not set AF, as that seems to make the most sense (which
127 * probably makes it the most wrong in real life).
128 *
129 * @returns Status bits.
130 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
131 * @param a_uResult Unsigned result value.
132 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
133 * @param a_fExtra Additional bits to set.
134 */
135#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
136 do { \
137 uint32_t fEflTmp = *(a_pfEFlags); \
138 fEflTmp &= ~X86_EFL_STATUS_BITS; \
139 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
140 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
141 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
142 fEflTmp |= (a_fExtra); \
143 *(a_pfEFlags) = fEflTmp; \
144 } while (0)
145
146
147/*********************************************************************************************************************************
148* Global Variables *
149*********************************************************************************************************************************/
150/**
151 * Parity calculation table.
152 *
153 * This is also used by iemAllAImpl.asm.
154 *
155 * The generator code:
156 * @code
157 * #include <stdio.h>
158 *
159 * int main()
160 * {
161 * unsigned b;
162 * for (b = 0; b < 256; b++)
163 * {
164 * int cOnes = ( b & 1)
165 * + ((b >> 1) & 1)
166 * + ((b >> 2) & 1)
167 * + ((b >> 3) & 1)
168 * + ((b >> 4) & 1)
169 * + ((b >> 5) & 1)
170 * + ((b >> 6) & 1)
171 * + ((b >> 7) & 1);
172 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
173 * b,
174 * (b >> 7) & 1,
175 * (b >> 6) & 1,
176 * (b >> 5) & 1,
177 * (b >> 4) & 1,
178 * (b >> 3) & 1,
179 * (b >> 2) & 1,
180 * (b >> 1) & 1,
181 * b & 1,
182 * cOnes & 1 ? "0" : "X86_EFL_PF");
183 * }
184 * return 0;
185 * }
186 * @endcode
187 */
188uint8_t const g_afParity[256] =
189{
190 /* 0000 = 00000000b */ X86_EFL_PF,
191 /* 0x01 = 00000001b */ 0,
192 /* 0x02 = 00000010b */ 0,
193 /* 0x03 = 00000011b */ X86_EFL_PF,
194 /* 0x04 = 00000100b */ 0,
195 /* 0x05 = 00000101b */ X86_EFL_PF,
196 /* 0x06 = 00000110b */ X86_EFL_PF,
197 /* 0x07 = 00000111b */ 0,
198 /* 0x08 = 00001000b */ 0,
199 /* 0x09 = 00001001b */ X86_EFL_PF,
200 /* 0x0a = 00001010b */ X86_EFL_PF,
201 /* 0x0b = 00001011b */ 0,
202 /* 0x0c = 00001100b */ X86_EFL_PF,
203 /* 0x0d = 00001101b */ 0,
204 /* 0x0e = 00001110b */ 0,
205 /* 0x0f = 00001111b */ X86_EFL_PF,
206 /* 0x10 = 00010000b */ 0,
207 /* 0x11 = 00010001b */ X86_EFL_PF,
208 /* 0x12 = 00010010b */ X86_EFL_PF,
209 /* 0x13 = 00010011b */ 0,
210 /* 0x14 = 00010100b */ X86_EFL_PF,
211 /* 0x15 = 00010101b */ 0,
212 /* 0x16 = 00010110b */ 0,
213 /* 0x17 = 00010111b */ X86_EFL_PF,
214 /* 0x18 = 00011000b */ X86_EFL_PF,
215 /* 0x19 = 00011001b */ 0,
216 /* 0x1a = 00011010b */ 0,
217 /* 0x1b = 00011011b */ X86_EFL_PF,
218 /* 0x1c = 00011100b */ 0,
219 /* 0x1d = 00011101b */ X86_EFL_PF,
220 /* 0x1e = 00011110b */ X86_EFL_PF,
221 /* 0x1f = 00011111b */ 0,
222 /* 0x20 = 00100000b */ 0,
223 /* 0x21 = 00100001b */ X86_EFL_PF,
224 /* 0x22 = 00100010b */ X86_EFL_PF,
225 /* 0x23 = 00100011b */ 0,
226 /* 0x24 = 00100100b */ X86_EFL_PF,
227 /* 0x25 = 00100101b */ 0,
228 /* 0x26 = 00100110b */ 0,
229 /* 0x27 = 00100111b */ X86_EFL_PF,
230 /* 0x28 = 00101000b */ X86_EFL_PF,
231 /* 0x29 = 00101001b */ 0,
232 /* 0x2a = 00101010b */ 0,
233 /* 0x2b = 00101011b */ X86_EFL_PF,
234 /* 0x2c = 00101100b */ 0,
235 /* 0x2d = 00101101b */ X86_EFL_PF,
236 /* 0x2e = 00101110b */ X86_EFL_PF,
237 /* 0x2f = 00101111b */ 0,
238 /* 0x30 = 00110000b */ X86_EFL_PF,
239 /* 0x31 = 00110001b */ 0,
240 /* 0x32 = 00110010b */ 0,
241 /* 0x33 = 00110011b */ X86_EFL_PF,
242 /* 0x34 = 00110100b */ 0,
243 /* 0x35 = 00110101b */ X86_EFL_PF,
244 /* 0x36 = 00110110b */ X86_EFL_PF,
245 /* 0x37 = 00110111b */ 0,
246 /* 0x38 = 00111000b */ 0,
247 /* 0x39 = 00111001b */ X86_EFL_PF,
248 /* 0x3a = 00111010b */ X86_EFL_PF,
249 /* 0x3b = 00111011b */ 0,
250 /* 0x3c = 00111100b */ X86_EFL_PF,
251 /* 0x3d = 00111101b */ 0,
252 /* 0x3e = 00111110b */ 0,
253 /* 0x3f = 00111111b */ X86_EFL_PF,
254 /* 0x40 = 01000000b */ 0,
255 /* 0x41 = 01000001b */ X86_EFL_PF,
256 /* 0x42 = 01000010b */ X86_EFL_PF,
257 /* 0x43 = 01000011b */ 0,
258 /* 0x44 = 01000100b */ X86_EFL_PF,
259 /* 0x45 = 01000101b */ 0,
260 /* 0x46 = 01000110b */ 0,
261 /* 0x47 = 01000111b */ X86_EFL_PF,
262 /* 0x48 = 01001000b */ X86_EFL_PF,
263 /* 0x49 = 01001001b */ 0,
264 /* 0x4a = 01001010b */ 0,
265 /* 0x4b = 01001011b */ X86_EFL_PF,
266 /* 0x4c = 01001100b */ 0,
267 /* 0x4d = 01001101b */ X86_EFL_PF,
268 /* 0x4e = 01001110b */ X86_EFL_PF,
269 /* 0x4f = 01001111b */ 0,
270 /* 0x50 = 01010000b */ X86_EFL_PF,
271 /* 0x51 = 01010001b */ 0,
272 /* 0x52 = 01010010b */ 0,
273 /* 0x53 = 01010011b */ X86_EFL_PF,
274 /* 0x54 = 01010100b */ 0,
275 /* 0x55 = 01010101b */ X86_EFL_PF,
276 /* 0x56 = 01010110b */ X86_EFL_PF,
277 /* 0x57 = 01010111b */ 0,
278 /* 0x58 = 01011000b */ 0,
279 /* 0x59 = 01011001b */ X86_EFL_PF,
280 /* 0x5a = 01011010b */ X86_EFL_PF,
281 /* 0x5b = 01011011b */ 0,
282 /* 0x5c = 01011100b */ X86_EFL_PF,
283 /* 0x5d = 01011101b */ 0,
284 /* 0x5e = 01011110b */ 0,
285 /* 0x5f = 01011111b */ X86_EFL_PF,
286 /* 0x60 = 01100000b */ X86_EFL_PF,
287 /* 0x61 = 01100001b */ 0,
288 /* 0x62 = 01100010b */ 0,
289 /* 0x63 = 01100011b */ X86_EFL_PF,
290 /* 0x64 = 01100100b */ 0,
291 /* 0x65 = 01100101b */ X86_EFL_PF,
292 /* 0x66 = 01100110b */ X86_EFL_PF,
293 /* 0x67 = 01100111b */ 0,
294 /* 0x68 = 01101000b */ 0,
295 /* 0x69 = 01101001b */ X86_EFL_PF,
296 /* 0x6a = 01101010b */ X86_EFL_PF,
297 /* 0x6b = 01101011b */ 0,
298 /* 0x6c = 01101100b */ X86_EFL_PF,
299 /* 0x6d = 01101101b */ 0,
300 /* 0x6e = 01101110b */ 0,
301 /* 0x6f = 01101111b */ X86_EFL_PF,
302 /* 0x70 = 01110000b */ 0,
303 /* 0x71 = 01110001b */ X86_EFL_PF,
304 /* 0x72 = 01110010b */ X86_EFL_PF,
305 /* 0x73 = 01110011b */ 0,
306 /* 0x74 = 01110100b */ X86_EFL_PF,
307 /* 0x75 = 01110101b */ 0,
308 /* 0x76 = 01110110b */ 0,
309 /* 0x77 = 01110111b */ X86_EFL_PF,
310 /* 0x78 = 01111000b */ X86_EFL_PF,
311 /* 0x79 = 01111001b */ 0,
312 /* 0x7a = 01111010b */ 0,
313 /* 0x7b = 01111011b */ X86_EFL_PF,
314 /* 0x7c = 01111100b */ 0,
315 /* 0x7d = 01111101b */ X86_EFL_PF,
316 /* 0x7e = 01111110b */ X86_EFL_PF,
317 /* 0x7f = 01111111b */ 0,
318 /* 0x80 = 10000000b */ 0,
319 /* 0x81 = 10000001b */ X86_EFL_PF,
320 /* 0x82 = 10000010b */ X86_EFL_PF,
321 /* 0x83 = 10000011b */ 0,
322 /* 0x84 = 10000100b */ X86_EFL_PF,
323 /* 0x85 = 10000101b */ 0,
324 /* 0x86 = 10000110b */ 0,
325 /* 0x87 = 10000111b */ X86_EFL_PF,
326 /* 0x88 = 10001000b */ X86_EFL_PF,
327 /* 0x89 = 10001001b */ 0,
328 /* 0x8a = 10001010b */ 0,
329 /* 0x8b = 10001011b */ X86_EFL_PF,
330 /* 0x8c = 10001100b */ 0,
331 /* 0x8d = 10001101b */ X86_EFL_PF,
332 /* 0x8e = 10001110b */ X86_EFL_PF,
333 /* 0x8f = 10001111b */ 0,
334 /* 0x90 = 10010000b */ X86_EFL_PF,
335 /* 0x91 = 10010001b */ 0,
336 /* 0x92 = 10010010b */ 0,
337 /* 0x93 = 10010011b */ X86_EFL_PF,
338 /* 0x94 = 10010100b */ 0,
339 /* 0x95 = 10010101b */ X86_EFL_PF,
340 /* 0x96 = 10010110b */ X86_EFL_PF,
341 /* 0x97 = 10010111b */ 0,
342 /* 0x98 = 10011000b */ 0,
343 /* 0x99 = 10011001b */ X86_EFL_PF,
344 /* 0x9a = 10011010b */ X86_EFL_PF,
345 /* 0x9b = 10011011b */ 0,
346 /* 0x9c = 10011100b */ X86_EFL_PF,
347 /* 0x9d = 10011101b */ 0,
348 /* 0x9e = 10011110b */ 0,
349 /* 0x9f = 10011111b */ X86_EFL_PF,
350 /* 0xa0 = 10100000b */ X86_EFL_PF,
351 /* 0xa1 = 10100001b */ 0,
352 /* 0xa2 = 10100010b */ 0,
353 /* 0xa3 = 10100011b */ X86_EFL_PF,
354 /* 0xa4 = 10100100b */ 0,
355 /* 0xa5 = 10100101b */ X86_EFL_PF,
356 /* 0xa6 = 10100110b */ X86_EFL_PF,
357 /* 0xa7 = 10100111b */ 0,
358 /* 0xa8 = 10101000b */ 0,
359 /* 0xa9 = 10101001b */ X86_EFL_PF,
360 /* 0xaa = 10101010b */ X86_EFL_PF,
361 /* 0xab = 10101011b */ 0,
362 /* 0xac = 10101100b */ X86_EFL_PF,
363 /* 0xad = 10101101b */ 0,
364 /* 0xae = 10101110b */ 0,
365 /* 0xaf = 10101111b */ X86_EFL_PF,
366 /* 0xb0 = 10110000b */ 0,
367 /* 0xb1 = 10110001b */ X86_EFL_PF,
368 /* 0xb2 = 10110010b */ X86_EFL_PF,
369 /* 0xb3 = 10110011b */ 0,
370 /* 0xb4 = 10110100b */ X86_EFL_PF,
371 /* 0xb5 = 10110101b */ 0,
372 /* 0xb6 = 10110110b */ 0,
373 /* 0xb7 = 10110111b */ X86_EFL_PF,
374 /* 0xb8 = 10111000b */ X86_EFL_PF,
375 /* 0xb9 = 10111001b */ 0,
376 /* 0xba = 10111010b */ 0,
377 /* 0xbb = 10111011b */ X86_EFL_PF,
378 /* 0xbc = 10111100b */ 0,
379 /* 0xbd = 10111101b */ X86_EFL_PF,
380 /* 0xbe = 10111110b */ X86_EFL_PF,
381 /* 0xbf = 10111111b */ 0,
382 /* 0xc0 = 11000000b */ X86_EFL_PF,
383 /* 0xc1 = 11000001b */ 0,
384 /* 0xc2 = 11000010b */ 0,
385 /* 0xc3 = 11000011b */ X86_EFL_PF,
386 /* 0xc4 = 11000100b */ 0,
387 /* 0xc5 = 11000101b */ X86_EFL_PF,
388 /* 0xc6 = 11000110b */ X86_EFL_PF,
389 /* 0xc7 = 11000111b */ 0,
390 /* 0xc8 = 11001000b */ 0,
391 /* 0xc9 = 11001001b */ X86_EFL_PF,
392 /* 0xca = 11001010b */ X86_EFL_PF,
393 /* 0xcb = 11001011b */ 0,
394 /* 0xcc = 11001100b */ X86_EFL_PF,
395 /* 0xcd = 11001101b */ 0,
396 /* 0xce = 11001110b */ 0,
397 /* 0xcf = 11001111b */ X86_EFL_PF,
398 /* 0xd0 = 11010000b */ 0,
399 /* 0xd1 = 11010001b */ X86_EFL_PF,
400 /* 0xd2 = 11010010b */ X86_EFL_PF,
401 /* 0xd3 = 11010011b */ 0,
402 /* 0xd4 = 11010100b */ X86_EFL_PF,
403 /* 0xd5 = 11010101b */ 0,
404 /* 0xd6 = 11010110b */ 0,
405 /* 0xd7 = 11010111b */ X86_EFL_PF,
406 /* 0xd8 = 11011000b */ X86_EFL_PF,
407 /* 0xd9 = 11011001b */ 0,
408 /* 0xda = 11011010b */ 0,
409 /* 0xdb = 11011011b */ X86_EFL_PF,
410 /* 0xdc = 11011100b */ 0,
411 /* 0xdd = 11011101b */ X86_EFL_PF,
412 /* 0xde = 11011110b */ X86_EFL_PF,
413 /* 0xdf = 11011111b */ 0,
414 /* 0xe0 = 11100000b */ 0,
415 /* 0xe1 = 11100001b */ X86_EFL_PF,
416 /* 0xe2 = 11100010b */ X86_EFL_PF,
417 /* 0xe3 = 11100011b */ 0,
418 /* 0xe4 = 11100100b */ X86_EFL_PF,
419 /* 0xe5 = 11100101b */ 0,
420 /* 0xe6 = 11100110b */ 0,
421 /* 0xe7 = 11100111b */ X86_EFL_PF,
422 /* 0xe8 = 11101000b */ X86_EFL_PF,
423 /* 0xe9 = 11101001b */ 0,
424 /* 0xea = 11101010b */ 0,
425 /* 0xeb = 11101011b */ X86_EFL_PF,
426 /* 0xec = 11101100b */ 0,
427 /* 0xed = 11101101b */ X86_EFL_PF,
428 /* 0xee = 11101110b */ X86_EFL_PF,
429 /* 0xef = 11101111b */ 0,
430 /* 0xf0 = 11110000b */ X86_EFL_PF,
431 /* 0xf1 = 11110001b */ 0,
432 /* 0xf2 = 11110010b */ 0,
433 /* 0xf3 = 11110011b */ X86_EFL_PF,
434 /* 0xf4 = 11110100b */ 0,
435 /* 0xf5 = 11110101b */ X86_EFL_PF,
436 /* 0xf6 = 11110110b */ X86_EFL_PF,
437 /* 0xf7 = 11110111b */ 0,
438 /* 0xf8 = 11111000b */ 0,
439 /* 0xf9 = 11111001b */ X86_EFL_PF,
440 /* 0xfa = 11111010b */ X86_EFL_PF,
441 /* 0xfb = 11111011b */ 0,
442 /* 0xfc = 11111100b */ X86_EFL_PF,
443 /* 0xfd = 11111101b */ 0,
444 /* 0xfe = 11111110b */ 0,
445 /* 0xff = 11111111b */ X86_EFL_PF,
446};
447
448
449/** Zero values (indexed by fSign). */
450RTFLOAT80U const g_ar80Zero[] = { RTFLOAT80U_INIT_ZERO(0), RTFLOAT80U_INIT_ZERO(1) };
451
452/** One values (indexed by fSign). */
453RTFLOAT80U const g_ar80One[] =
454{ RTFLOAT80U_INIT(0, RT_BIT_64(63), RTFLOAT80U_EXP_BIAS), RTFLOAT80U_INIT(1, RT_BIT_64(63), RTFLOAT80U_EXP_BIAS) };
455
456/** Indefinite (negative). */
457RTFLOAT80U const g_r80Indefinite = RTFLOAT80U_INIT_INDEFINITE(1);
458
459/** 128-bit floating point constant: 2.0 */
460const RTFLOAT128U g_r128Two = RTFLOAT128U_INIT_C(0, 0, 0, RTFLOAT128U_EXP_BIAS + 1);
461
462
463/* The next section is generated by tools/IEMGenFpuConstants: */
464
465/** The ln2 constant as 128-bit floating point value.
466 * base-10: 6.93147180559945309417232121458176575e-1
467 * base-16: b.17217f7d1cf79abc9e3b39803f30@-1
468 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100100111100011101100111001100000000011111100110e-1 */
469//const RTFLOAT128U g_r128Ln2 = RTFLOAT128U_INIT_C(0, 0x62e42fefa39e, 0xf35793c7673007e6, 0x3ffe);
470const RTFLOAT128U g_r128Ln2 = RTFLOAT128U_INIT_C(0, 0x62e42fefa39e, 0xf357900000000000, 0x3ffe);
471/** High precision ln2 value.
472 * base-10: 6.931471805599453094172321214581765680747e-1
473 * base-16: b.17217f7d1cf79abc9e3b39803f2f6af0@-1
474 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100100111100011101100111001100000000011111100101111011010101111e-1 */
475const RTUINT128U g_u128Ln2Mantissa = RTUINT128_INIT_C(0xb17217f7d1cf79ab, 0xc9e3b39803f2f6af);
476/** High precision ln2 value, compatible with f2xm1 results on intel 10980XE.
477 * base-10: 6.931471805599453094151379470289064954613e-1
478 * base-16: b.17217f7d1cf79abc0000000000000000@-1
479 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100000000000000000000000000000000000000000000000000000000000000e-1 */
480const RTUINT128U g_u128Ln2MantissaIntel = RTUINT128_INIT_C(0xb17217f7d1cf79ab, 0xc000000000000000);
481
482/** Horner constants for f2xm1 */
483const RTFLOAT128U g_ar128F2xm1HornerConsts[] =
484{
485 /* a0
486 * base-10: 1.00000000000000000000000000000000000e0
487 * base-16: 1.0000000000000000000000000000@0
488 * base-2 : 1.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e0 */
489 RTFLOAT128U_INIT_C(0, 0x000000000000, 0x0000000000000000, 0x3fff),
490 /* a1
491 * base-10: 5.00000000000000000000000000000000000e-1
492 * base-16: 8.0000000000000000000000000000@-1
493 * base-2 : 1.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e-1 */
494 RTFLOAT128U_INIT_C(0, 0x000000000000, 0x0000000000000000, 0x3ffe),
495 /* a2
496 * base-10: 1.66666666666666666666666666666666658e-1
497 * base-16: 2.aaaaaaaaaaaaaaaaaaaaaaaaaaaa@-1
498 * base-2 : 1.0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101e-3 */
499 RTFLOAT128U_INIT_C(0, 0x555555555555, 0x5555555555555555, 0x3ffc),
500 /* a3
501 * base-10: 4.16666666666666666666666666666666646e-2
502 * base-16: a.aaaaaaaaaaaaaaaaaaaaaaaaaaa8@-2
503 * base-2 : 1.0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101e-5 */
504 RTFLOAT128U_INIT_C(0, 0x555555555555, 0x5555555555555555, 0x3ffa),
505 /* a4
506 * base-10: 8.33333333333333333333333333333333323e-3
507 * base-16: 2.2222222222222222222222222222@-2
508 * base-2 : 1.0001000100010001000100010001000100010001000100010001000100010001000100010001000100010001000100010001000100010001e-7 */
509 RTFLOAT128U_INIT_C(0, 0x111111111111, 0x1111111111111111, 0x3ff8),
510 /* a5
511 * base-10: 1.38888888888888888888888888888888874e-3
512 * base-16: 5.b05b05b05b05b05b05b05b05b058@-3
513 * base-2 : 1.0110110000010110110000010110110000010110110000010110110000010110110000010110110000010110110000010110110000010110e-10 */
514 RTFLOAT128U_INIT_C(0, 0x6c16c16c16c1, 0x6c16c16c16c16c16, 0x3ff5),
515 /* a6
516 * base-10: 1.98412698412698412698412698412698412e-4
517 * base-16: d.00d00d00d00d00d00d00d00d00d0@-4
518 * base-2 : 1.1010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010e-13 */
519 RTFLOAT128U_INIT_C(0, 0xa01a01a01a01, 0xa01a01a01a01a01a, 0x3ff2),
520 /* a7
521 * base-10: 2.48015873015873015873015873015873015e-5
522 * base-16: 1.a01a01a01a01a01a01a01a01a01a@-4
523 * base-2 : 1.1010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010e-16 */
524 RTFLOAT128U_INIT_C(0, 0xa01a01a01a01, 0xa01a01a01a01a01a, 0x3fef),
525 /* a8
526 * base-10: 2.75573192239858906525573192239858902e-6
527 * base-16: 2.e3bc74aad8e671f5583911ca002e@-5
528 * base-2 : 1.0111000111011110001110100101010101101100011100110011100011111010101011000001110010001000111001010000000000010111e-19 */
529 RTFLOAT128U_INIT_C(0, 0x71de3a556c73, 0x38faac1c88e50017, 0x3fec),
530 /* a9
531 * base-10: 2.75573192239858906525573192239858865e-7
532 * base-16: 4.9f93edde27d71cbbc05b4fa999e0@-6
533 * base-2 : 1.0010011111100100111110110111011110001001111101011100011100101110111100000001011011010011111010100110011001111000e-22 */
534 RTFLOAT128U_INIT_C(0, 0x27e4fb7789f5, 0xc72ef016d3ea6678, 0x3fe9),
535 /* a10
536 * base-10: 2.50521083854417187750521083854417184e-8
537 * base-16: 6.b99159fd5138e3f9d1f92e0df71c@-7
538 * base-2 : 1.1010111001100100010101100111111101010100010011100011100011111110011101000111111001001011100000110111110111000111e-26 */
539 RTFLOAT128U_INIT_C(0, 0xae64567f544e, 0x38fe747e4b837dc7, 0x3fe5),
540 /* a11
541 * base-10: 2.08767569878680989792100903212014296e-9
542 * base-16: 8.f76c77fc6c4bdaa26d4c3d67f420@-8
543 * base-2 : 1.0001111011101101100011101111111110001101100010010111101101010100010011011010100110000111101011001111111010000100e-29 */
544 RTFLOAT128U_INIT_C(0, 0x1eed8eff8d89, 0x7b544da987acfe84, 0x3fe2),
545 /* a12
546 * base-10: 1.60590438368216145993923771701549472e-10
547 * base-16: b.092309d43684be51c198e91d7b40@-9
548 * base-2 : 1.0110000100100100011000010011101010000110110100001001011111001010001110000011001100011101001000111010111101101000e-33 */
549 RTFLOAT128U_INIT_C(0, 0x6124613a86d0, 0x97ca38331d23af68, 0x3fde),
550 /* a13
551 * base-10: 1.14707455977297247138516979786821043e-11
552 * base-16: c.9cba54603e4e905d6f8a2efd1f20@-10
553 * base-2 : 1.1001001110010111010010101000110000000111110010011101001000001011101011011111000101000101110111111010001111100100e-37 */
554 RTFLOAT128U_INIT_C(0, 0x93974a8c07c9, 0xd20badf145dfa3e4, 0x3fda),
555 /* a14
556 * base-10: 7.64716373181981647590113198578806964e-13
557 * base-16: d.73f9f399dc0f88ec32b587746578@-11
558 * base-2 : 1.1010111001111111001111100111001100111011100000011111000100011101100001100101011010110000111011101000110010101111e-41 */
559 RTFLOAT128U_INIT_C(0, 0xae7f3e733b81, 0xf11d8656b0ee8caf, 0x3fd6),
560 /* a15
561 * base-10: 4.77947733238738529743820749111754352e-14
562 * base-16: d.73f9f399dc0f88ec32b587746578@-12
563 * base-2 : 1.1010111001111111001111100111001100111011100000011111000100011101100001100101011010110000111011101000110010101111e-45 */
564 RTFLOAT128U_INIT_C(0, 0xae7f3e733b81, 0xf11d8656b0ee8caf, 0x3fd2),
565 /* a16
566 * base-10: 2.81145725434552076319894558301031970e-15
567 * base-16: c.a963b81856a53593028cbbb8d7f8@-13
568 * base-2 : 1.1001010100101100011101110000001100001010110101001010011010110010011000000101000110010111011101110001101011111111e-49 */
569 RTFLOAT128U_INIT_C(0, 0x952c77030ad4, 0xa6b2605197771aff, 0x3fce),
570 /* a17
571 * base-10: 1.56192069685862264622163643500573321e-16
572 * base-16: b.413c31dcbecbbdd8024435161550@-14
573 * base-2 : 1.0110100000100111100001100011101110010111110110010111011110111011000000000100100010000110101000101100001010101010e-53 */
574 RTFLOAT128U_INIT_C(0, 0x6827863b97d9, 0x77bb004886a2c2aa, 0x3fca),
575 /* a18
576 * base-10: 8.22063524662432971695598123687227980e-18
577 * base-16: 9.7a4da340a0ab92650f61dbdcb3a0@-15
578 * base-2 : 1.0010111101001001101101000110100000010100000101010111001001001100101000011110110000111011011110111001011001110100e-57 */
579 RTFLOAT128U_INIT_C(0, 0x2f49b4681415, 0x724ca1ec3b7b9674, 0x3fc6),
580 /* a19
581 * base-10: 4.11031762331216485847799061843614006e-19
582 * base-16: 7.950ae900808941ea72b4afe3c2e8@-16
583 * base-2 : 1.1110010101000010101110100100000000100000001000100101000001111010100111001010110100101011111110001111000010111010e-62 */
584 RTFLOAT128U_INIT_C(0, 0xe542ba402022, 0x507a9cad2bf8f0ba, 0x3fc1),
585 /* a20
586 * base-10: 7.04351638180413298434020229233492164e-20
587 * base-16: 1.4c9ee35db1d1f3c946fdcd48fd88@-16
588 * base-2 : 1.0100110010011110111000110101110110110001110100011111001111001001010001101111110111001101010010001111110110001000e-64 */
589 RTFLOAT128U_INIT_C(0, 0x4c9ee35db1d1, 0xf3c946fdcd48fd88, 0x3fbf),
590 /* a21
591 * base-10: 5.81527769640186708776361513365257702e-20
592 * base-16: 1.129e64bff606a2b9c9fc624481cd@-16
593 * base-2 : 1.0001001010011110011001001011111111110110000001101010001010111001110010011111110001100010010001001000000111001101e-64 */
594 RTFLOAT128U_INIT_C(0, 0x129e64bff606, 0xa2b9c9fc624481cd, 0x3fbf),
595};
596
597
598/*
599 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
600 * it all in C is probably safer atm., optimize what's necessary later, maybe.
601 */
602#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
603
604
605/*********************************************************************************************************************************
606* Binary Operations *
607*********************************************************************************************************************************/
608
609/*
610 * ADD
611 */
612
613IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
614{
615 uint64_t uDst = *puDst;
616 uint64_t uResult = uDst + uSrc;
617 *puDst = uResult;
618 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, uSrc);
619}
620
621# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
622
623IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
624{
625 uint32_t uDst = *puDst;
626 uint32_t uResult = uDst + uSrc;
627 *puDst = uResult;
628 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, uSrc);
629}
630
631
632IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
633{
634 uint16_t uDst = *puDst;
635 uint16_t uResult = uDst + uSrc;
636 *puDst = uResult;
637 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, uSrc);
638}
639
640
641IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
642{
643 uint8_t uDst = *puDst;
644 uint8_t uResult = uDst + uSrc;
645 *puDst = uResult;
646 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, uSrc);
647}
648
649# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
650
651/*
652 * ADC
653 */
654
655IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
656{
657 if (!(*pfEFlags & X86_EFL_CF))
658 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
659 else
660 {
661 uint64_t uDst = *puDst;
662 uint64_t uResult = uDst + uSrc + 1;
663 *puDst = uResult;
664 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, uSrc);
665 }
666}
667
668# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
669
670IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
671{
672 if (!(*pfEFlags & X86_EFL_CF))
673 iemAImpl_add_u32(puDst, uSrc, pfEFlags);
674 else
675 {
676 uint32_t uDst = *puDst;
677 uint32_t uResult = uDst + uSrc + 1;
678 *puDst = uResult;
679 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, uSrc);
680 }
681}
682
683
684IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
685{
686 if (!(*pfEFlags & X86_EFL_CF))
687 iemAImpl_add_u16(puDst, uSrc, pfEFlags);
688 else
689 {
690 uint16_t uDst = *puDst;
691 uint16_t uResult = uDst + uSrc + 1;
692 *puDst = uResult;
693 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, uSrc);
694 }
695}
696
697
698IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
699{
700 if (!(*pfEFlags & X86_EFL_CF))
701 iemAImpl_add_u8(puDst, uSrc, pfEFlags);
702 else
703 {
704 uint8_t uDst = *puDst;
705 uint8_t uResult = uDst + uSrc + 1;
706 *puDst = uResult;
707 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, uSrc);
708 }
709}
710
711# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
712
713/*
714 * SUB
715 */
716
717IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
718{
719 uint64_t uDst = *puDst;
720 uint64_t uResult = uDst - uSrc;
721 *puDst = uResult;
722 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst < uSrc, uSrc ^ RT_BIT_64(63));
723}
724
725# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
726
727IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
728{
729 uint32_t uDst = *puDst;
730 uint32_t uResult = uDst - uSrc;
731 *puDst = uResult;
732 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst < uSrc, uSrc ^ RT_BIT_32(31));
733}
734
735
736IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
737{
738 uint16_t uDst = *puDst;
739 uint16_t uResult = uDst - uSrc;
740 *puDst = uResult;
741 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst < uSrc, uSrc ^ (uint16_t)0x8000);
742}
743
744
745IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
746{
747 uint8_t uDst = *puDst;
748 uint8_t uResult = uDst - uSrc;
749 *puDst = uResult;
750 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst < uSrc, uSrc ^ (uint8_t)0x80);
751}
752
753# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
754
755/*
756 * SBB
757 */
758
759IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
760{
761 if (!(*pfEFlags & X86_EFL_CF))
762 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
763 else
764 {
765 uint64_t uDst = *puDst;
766 uint64_t uResult = uDst - uSrc - 1;
767 *puDst = uResult;
768 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst <= uSrc, uSrc ^ RT_BIT_64(63));
769 }
770}
771
772# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
773
774IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
775{
776 if (!(*pfEFlags & X86_EFL_CF))
777 iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
778 else
779 {
780 uint32_t uDst = *puDst;
781 uint32_t uResult = uDst - uSrc - 1;
782 *puDst = uResult;
783 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst <= uSrc, uSrc ^ RT_BIT_32(31));
784 }
785}
786
787
788IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
789{
790 if (!(*pfEFlags & X86_EFL_CF))
791 iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
792 else
793 {
794 uint16_t uDst = *puDst;
795 uint16_t uResult = uDst - uSrc - 1;
796 *puDst = uResult;
797 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst <= uSrc, uSrc ^ (uint16_t)0x8000);
798 }
799}
800
801
802IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
803{
804 if (!(*pfEFlags & X86_EFL_CF))
805 iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
806 else
807 {
808 uint8_t uDst = *puDst;
809 uint8_t uResult = uDst - uSrc - 1;
810 *puDst = uResult;
811 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst <= uSrc, uSrc ^ (uint8_t)0x80);
812 }
813}
814
815# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
816
817
818/*
819 * OR
820 */
821
822IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
823{
824 uint64_t uResult = *puDst | uSrc;
825 *puDst = uResult;
826 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
827}
828
829# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
830
831IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
832{
833 uint32_t uResult = *puDst | uSrc;
834 *puDst = uResult;
835 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
836}
837
838
839IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
840{
841 uint16_t uResult = *puDst | uSrc;
842 *puDst = uResult;
843 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
844}
845
846
847IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
848{
849 uint8_t uResult = *puDst | uSrc;
850 *puDst = uResult;
851 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
852}
853
854# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
855
856/*
857 * XOR
858 */
859
860IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
861{
862 uint64_t uResult = *puDst ^ uSrc;
863 *puDst = uResult;
864 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
865}
866
867# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
868
869IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
870{
871 uint32_t uResult = *puDst ^ uSrc;
872 *puDst = uResult;
873 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
874}
875
876
877IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
878{
879 uint16_t uResult = *puDst ^ uSrc;
880 *puDst = uResult;
881 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
882}
883
884
885IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
886{
887 uint8_t uResult = *puDst ^ uSrc;
888 *puDst = uResult;
889 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
890}
891
892# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
893
894/*
895 * AND
896 */
897
898IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
899{
900 uint64_t uResult = *puDst & uSrc;
901 *puDst = uResult;
902 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
903}
904
905# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
906
907IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
908{
909 uint32_t uResult = *puDst & uSrc;
910 *puDst = uResult;
911 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
912}
913
914
915IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
916{
917 uint16_t uResult = *puDst & uSrc;
918 *puDst = uResult;
919 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
920}
921
922
923IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
924{
925 uint8_t uResult = *puDst & uSrc;
926 *puDst = uResult;
927 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
928}
929
930# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
931
932/*
933 * CMP
934 */
935
936IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
937{
938 uint64_t uDstTmp = *puDst;
939 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
940}
941
942# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
943
944IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
945{
946 uint32_t uDstTmp = *puDst;
947 iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
948}
949
950
951IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
952{
953 uint16_t uDstTmp = *puDst;
954 iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
955}
956
957
958IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
959{
960 uint8_t uDstTmp = *puDst;
961 iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
962}
963
964# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
965
966/*
967 * TEST
968 */
969
970IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
971{
972 uint64_t uResult = *puDst & uSrc;
973 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
974}
975
976# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
977
978IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
979{
980 uint32_t uResult = *puDst & uSrc;
981 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
982}
983
984
985IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
986{
987 uint16_t uResult = *puDst & uSrc;
988 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
989}
990
991
992IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
993{
994 uint8_t uResult = *puDst & uSrc;
995 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
996}
997
998# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
999
1000
1001/*
1002 * LOCK prefixed variants of the above
1003 */
1004
1005/** 64-bit locked binary operand operation. */
1006# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
1007 do { \
1008 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
1009 uint ## a_cBitsWidth ## _t uTmp; \
1010 uint32_t fEflTmp; \
1011 do \
1012 { \
1013 uTmp = uOld; \
1014 fEflTmp = *pfEFlags; \
1015 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
1016 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
1017 *pfEFlags = fEflTmp; \
1018 } while (0)
1019
1020
1021#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
1022 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
1023 uint ## a_cBitsWidth ## _t uSrc, \
1024 uint32_t *pfEFlags)) \
1025 { \
1026 DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
1027 }
1028
1029EMIT_LOCKED_BIN_OP(add, 64)
1030EMIT_LOCKED_BIN_OP(adc, 64)
1031EMIT_LOCKED_BIN_OP(sub, 64)
1032EMIT_LOCKED_BIN_OP(sbb, 64)
1033EMIT_LOCKED_BIN_OP(or, 64)
1034EMIT_LOCKED_BIN_OP(xor, 64)
1035EMIT_LOCKED_BIN_OP(and, 64)
1036# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1037EMIT_LOCKED_BIN_OP(add, 32)
1038EMIT_LOCKED_BIN_OP(adc, 32)
1039EMIT_LOCKED_BIN_OP(sub, 32)
1040EMIT_LOCKED_BIN_OP(sbb, 32)
1041EMIT_LOCKED_BIN_OP(or, 32)
1042EMIT_LOCKED_BIN_OP(xor, 32)
1043EMIT_LOCKED_BIN_OP(and, 32)
1044
1045EMIT_LOCKED_BIN_OP(add, 16)
1046EMIT_LOCKED_BIN_OP(adc, 16)
1047EMIT_LOCKED_BIN_OP(sub, 16)
1048EMIT_LOCKED_BIN_OP(sbb, 16)
1049EMIT_LOCKED_BIN_OP(or, 16)
1050EMIT_LOCKED_BIN_OP(xor, 16)
1051EMIT_LOCKED_BIN_OP(and, 16)
1052
1053EMIT_LOCKED_BIN_OP(add, 8)
1054EMIT_LOCKED_BIN_OP(adc, 8)
1055EMIT_LOCKED_BIN_OP(sub, 8)
1056EMIT_LOCKED_BIN_OP(sbb, 8)
1057EMIT_LOCKED_BIN_OP(or, 8)
1058EMIT_LOCKED_BIN_OP(xor, 8)
1059EMIT_LOCKED_BIN_OP(and, 8)
1060# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1061
1062
1063/*
1064 * Bit operations (same signature as above).
1065 */
1066
1067/*
1068 * BT
1069 */
1070
1071IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1072{
1073 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1074 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1075 Assert(uSrc < 64);
1076 uint64_t uDst = *puDst;
1077 if (uDst & RT_BIT_64(uSrc))
1078 *pfEFlags |= X86_EFL_CF;
1079 else
1080 *pfEFlags &= ~X86_EFL_CF;
1081}
1082
1083# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1084
1085IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1086{
1087 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1088 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1089 Assert(uSrc < 32);
1090 uint32_t uDst = *puDst;
1091 if (uDst & RT_BIT_32(uSrc))
1092 *pfEFlags |= X86_EFL_CF;
1093 else
1094 *pfEFlags &= ~X86_EFL_CF;
1095}
1096
1097IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1098{
1099 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1100 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1101 Assert(uSrc < 16);
1102 uint16_t uDst = *puDst;
1103 if (uDst & RT_BIT_32(uSrc))
1104 *pfEFlags |= X86_EFL_CF;
1105 else
1106 *pfEFlags &= ~X86_EFL_CF;
1107}
1108
1109# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1110
1111/*
1112 * BTC
1113 */
1114
1115IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1116{
1117 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1118 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1119 Assert(uSrc < 64);
1120 uint64_t fMask = RT_BIT_64(uSrc);
1121 uint64_t uDst = *puDst;
1122 if (uDst & fMask)
1123 {
1124 uDst &= ~fMask;
1125 *puDst = uDst;
1126 *pfEFlags |= X86_EFL_CF;
1127 }
1128 else
1129 {
1130 uDst |= fMask;
1131 *puDst = uDst;
1132 *pfEFlags &= ~X86_EFL_CF;
1133 }
1134}
1135
1136# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1137
1138IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1139{
1140 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1141 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1142 Assert(uSrc < 32);
1143 uint32_t fMask = RT_BIT_32(uSrc);
1144 uint32_t uDst = *puDst;
1145 if (uDst & fMask)
1146 {
1147 uDst &= ~fMask;
1148 *puDst = uDst;
1149 *pfEFlags |= X86_EFL_CF;
1150 }
1151 else
1152 {
1153 uDst |= fMask;
1154 *puDst = uDst;
1155 *pfEFlags &= ~X86_EFL_CF;
1156 }
1157}
1158
1159
1160IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1161{
1162 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1163 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1164 Assert(uSrc < 16);
1165 uint16_t fMask = RT_BIT_32(uSrc);
1166 uint16_t uDst = *puDst;
1167 if (uDst & fMask)
1168 {
1169 uDst &= ~fMask;
1170 *puDst = uDst;
1171 *pfEFlags |= X86_EFL_CF;
1172 }
1173 else
1174 {
1175 uDst |= fMask;
1176 *puDst = uDst;
1177 *pfEFlags &= ~X86_EFL_CF;
1178 }
1179}
1180
1181# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1182
1183/*
1184 * BTR
1185 */
1186
1187IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1188{
1189 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1190 logical operation (AND/OR/whatever). */
1191 Assert(uSrc < 64);
1192 uint64_t fMask = RT_BIT_64(uSrc);
1193 uint64_t uDst = *puDst;
1194 if (uDst & fMask)
1195 {
1196 uDst &= ~fMask;
1197 *puDst = uDst;
1198 *pfEFlags |= X86_EFL_CF;
1199 }
1200 else
1201 *pfEFlags &= ~X86_EFL_CF;
1202}
1203
1204# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1205
1206IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1207{
1208 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1209 logical operation (AND/OR/whatever). */
1210 Assert(uSrc < 32);
1211 uint32_t fMask = RT_BIT_32(uSrc);
1212 uint32_t uDst = *puDst;
1213 if (uDst & fMask)
1214 {
1215 uDst &= ~fMask;
1216 *puDst = uDst;
1217 *pfEFlags |= X86_EFL_CF;
1218 }
1219 else
1220 *pfEFlags &= ~X86_EFL_CF;
1221}
1222
1223
1224IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1225{
1226 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1227 logical operation (AND/OR/whatever). */
1228 Assert(uSrc < 16);
1229 uint16_t fMask = RT_BIT_32(uSrc);
1230 uint16_t uDst = *puDst;
1231 if (uDst & fMask)
1232 {
1233 uDst &= ~fMask;
1234 *puDst = uDst;
1235 *pfEFlags |= X86_EFL_CF;
1236 }
1237 else
1238 *pfEFlags &= ~X86_EFL_CF;
1239}
1240
1241# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1242
1243/*
1244 * BTS
1245 */
1246
1247IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1248{
1249 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1250 logical operation (AND/OR/whatever). */
1251 Assert(uSrc < 64);
1252 uint64_t fMask = RT_BIT_64(uSrc);
1253 uint64_t uDst = *puDst;
1254 if (uDst & fMask)
1255 *pfEFlags |= X86_EFL_CF;
1256 else
1257 {
1258 uDst |= fMask;
1259 *puDst = uDst;
1260 *pfEFlags &= ~X86_EFL_CF;
1261 }
1262}
1263
1264# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1265
1266IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1267{
1268 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1269 logical operation (AND/OR/whatever). */
1270 Assert(uSrc < 32);
1271 uint32_t fMask = RT_BIT_32(uSrc);
1272 uint32_t uDst = *puDst;
1273 if (uDst & fMask)
1274 *pfEFlags |= X86_EFL_CF;
1275 else
1276 {
1277 uDst |= fMask;
1278 *puDst = uDst;
1279 *pfEFlags &= ~X86_EFL_CF;
1280 }
1281}
1282
1283
1284IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1285{
1286 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1287 logical operation (AND/OR/whatever). */
1288 Assert(uSrc < 16);
1289 uint16_t fMask = RT_BIT_32(uSrc);
1290 uint32_t uDst = *puDst;
1291 if (uDst & fMask)
1292 *pfEFlags |= X86_EFL_CF;
1293 else
1294 {
1295 uDst |= fMask;
1296 *puDst = uDst;
1297 *pfEFlags &= ~X86_EFL_CF;
1298 }
1299}
1300
1301# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1302
1303
1304EMIT_LOCKED_BIN_OP(btc, 64)
1305EMIT_LOCKED_BIN_OP(btr, 64)
1306EMIT_LOCKED_BIN_OP(bts, 64)
1307# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1308EMIT_LOCKED_BIN_OP(btc, 32)
1309EMIT_LOCKED_BIN_OP(btr, 32)
1310EMIT_LOCKED_BIN_OP(bts, 32)
1311
1312EMIT_LOCKED_BIN_OP(btc, 16)
1313EMIT_LOCKED_BIN_OP(btr, 16)
1314EMIT_LOCKED_BIN_OP(bts, 16)
1315# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1316
1317
1318/*
1319 * Helpers for BSR and BSF.
1320 *
1321 * Note! "undefined" flags: OF, SF, AF, PF, CF.
1322 * Intel behavior modelled on 10980xe, AMD on 3990X. Other marchs may
1323 * produce different result (see https://www.sandpile.org/x86/flags.htm),
1324 * but we restrict ourselves to emulating these recent marchs.
1325 */
1326#define SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlag, a_iBit) do { \
1327 unsigned iBit = (a_iBit); \
1328 uint32_t fEfl = *pfEFlags & ~(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF); \
1329 if (iBit) \
1330 { \
1331 *puDst = --iBit; \
1332 fEfl |= g_afParity[iBit]; \
1333 } \
1334 else \
1335 fEfl |= X86_EFL_ZF | X86_EFL_PF; \
1336 *pfEFlags = fEfl; \
1337 } while (0)
1338#define SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlag, a_iBit) do { \
1339 unsigned const iBit = (a_iBit); \
1340 if (iBit) \
1341 { \
1342 *puDst = iBit - 1; \
1343 *pfEFlags &= ~X86_EFL_ZF; \
1344 } \
1345 else \
1346 *pfEFlags |= X86_EFL_ZF; \
1347 } while (0)
1348
1349
1350/*
1351 * BSF - first (least significant) bit set
1352 */
1353IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1354{
1355 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1356}
1357
1358IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1359{
1360 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1361}
1362
1363IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1364{
1365 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1366}
1367
1368# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1369
1370IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1371{
1372 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1373}
1374
1375IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1376{
1377 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1378}
1379
1380IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1381{
1382 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1383}
1384
1385
1386IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1387{
1388 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1389}
1390
1391IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1392{
1393 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1394}
1395
1396IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1397{
1398 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1399}
1400
1401# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1402
1403
1404/*
1405 * BSR - last (most significant) bit set
1406 */
1407IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1408{
1409 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1410}
1411
1412IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1413{
1414 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1415}
1416
1417IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1418{
1419 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1420}
1421
1422# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1423
1424IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1425{
1426 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1427}
1428
1429IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1430{
1431 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1432}
1433
1434IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1435{
1436 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1437}
1438
1439
1440IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1441{
1442 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1443}
1444
1445IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1446{
1447 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1448}
1449
1450IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1451{
1452 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1453}
1454
1455# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1456
1457
1458/*
1459 * XCHG
1460 */
1461
1462IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_locked,(uint64_t *puMem, uint64_t *puReg))
1463{
1464#if ARCH_BITS >= 64
1465 *puReg = ASMAtomicXchgU64(puMem, *puReg);
1466#else
1467 uint64_t uOldMem = *puMem;
1468 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1469 ASMNopPause();
1470 *puReg = uOldMem;
1471#endif
1472}
1473
1474# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1475
1476IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_locked,(uint32_t *puMem, uint32_t *puReg))
1477{
1478 *puReg = ASMAtomicXchgU32(puMem, *puReg);
1479}
1480
1481
1482IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_locked,(uint16_t *puMem, uint16_t *puReg))
1483{
1484 *puReg = ASMAtomicXchgU16(puMem, *puReg);
1485}
1486
1487
1488IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_locked,(uint8_t *puMem, uint8_t *puReg))
1489{
1490 *puReg = ASMAtomicXchgU8(puMem, *puReg);
1491}
1492
1493# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1494
1495
1496/* Unlocked variants for fDisregardLock mode: */
1497
1498IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_unlocked,(uint64_t *puMem, uint64_t *puReg))
1499{
1500 uint64_t const uOld = *puMem;
1501 *puMem = *puReg;
1502 *puReg = uOld;
1503}
1504
1505# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1506
1507IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_unlocked,(uint32_t *puMem, uint32_t *puReg))
1508{
1509 uint32_t const uOld = *puMem;
1510 *puMem = *puReg;
1511 *puReg = uOld;
1512}
1513
1514
1515IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_unlocked,(uint16_t *puMem, uint16_t *puReg))
1516{
1517 uint16_t const uOld = *puMem;
1518 *puMem = *puReg;
1519 *puReg = uOld;
1520}
1521
1522
1523IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_unlocked,(uint8_t *puMem, uint8_t *puReg))
1524{
1525 uint8_t const uOld = *puMem;
1526 *puMem = *puReg;
1527 *puReg = uOld;
1528}
1529
1530# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1531
1532
1533/*
1534 * XADD and LOCK XADD.
1535 */
1536#define EMIT_XADD(a_cBitsWidth, a_Type) \
1537IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1538{ \
1539 a_Type uDst = *puDst; \
1540 a_Type uResult = uDst; \
1541 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, pfEFlags); \
1542 *puDst = uResult; \
1543 *puReg = uDst; \
1544} \
1545\
1546IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth ## _locked,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1547{ \
1548 a_Type uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
1549 a_Type uResult; \
1550 uint32_t fEflTmp; \
1551 do \
1552 { \
1553 uResult = uOld; \
1554 fEflTmp = *pfEFlags; \
1555 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, &fEflTmp); \
1556 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uResult, uOld, &uOld)); \
1557 *puReg = uOld; \
1558 *pfEFlags = fEflTmp; \
1559}
1560EMIT_XADD(64, uint64_t)
1561# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1562EMIT_XADD(32, uint32_t)
1563EMIT_XADD(16, uint16_t)
1564EMIT_XADD(8, uint8_t)
1565# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1566
1567#endif
1568
1569/*
1570 * CMPXCHG, CMPXCHG8B, CMPXCHG16B
1571 *
1572 * Note! We don't have non-locking/atomic cmpxchg primitives, so all cmpxchg
1573 * instructions are emulated as locked.
1574 */
1575#if defined(IEM_WITHOUT_ASSEMBLY)
1576
1577IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8_locked, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1578{
1579 uint8_t uOld = *puAl;
1580 if (ASMAtomicCmpXchgExU8(pu8Dst, uSrcReg, uOld, puAl))
1581 Assert(*puAl == uOld);
1582 iemAImpl_cmp_u8(&uOld, *puAl, pEFlags);
1583}
1584
1585
1586IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16_locked,(uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1587{
1588 uint16_t uOld = *puAx;
1589 if (ASMAtomicCmpXchgExU16(pu16Dst, uSrcReg, uOld, puAx))
1590 Assert(*puAx == uOld);
1591 iemAImpl_cmp_u16(&uOld, *puAx, pEFlags);
1592}
1593
1594
1595IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32_locked,(uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1596{
1597 uint32_t uOld = *puEax;
1598 if (ASMAtomicCmpXchgExU32(pu32Dst, uSrcReg, uOld, puEax))
1599 Assert(*puEax == uOld);
1600 iemAImpl_cmp_u32(&uOld, *puEax, pEFlags);
1601}
1602
1603
1604# if ARCH_BITS == 32
1605IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1606# else
1607IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1608# endif
1609{
1610# if ARCH_BITS == 32
1611 uint64_t const uSrcReg = *puSrcReg;
1612# endif
1613 uint64_t uOld = *puRax;
1614 if (ASMAtomicCmpXchgExU64(pu64Dst, uSrcReg, uOld, puRax))
1615 Assert(*puRax == uOld);
1616 iemAImpl_cmp_u64(&uOld, *puRax, pEFlags);
1617}
1618
1619
1620IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b_locked,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
1621 uint32_t *pEFlags))
1622{
1623 uint64_t const uNew = pu64EbxEcx->u;
1624 uint64_t const uOld = pu64EaxEdx->u;
1625 if (ASMAtomicCmpXchgExU64(pu64Dst, uNew, uOld, &pu64EaxEdx->u))
1626 {
1627 Assert(pu64EaxEdx->u == uOld);
1628 *pEFlags |= X86_EFL_ZF;
1629 }
1630 else
1631 *pEFlags &= ~X86_EFL_ZF;
1632}
1633
1634
1635# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
1636IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1637 uint32_t *pEFlags))
1638{
1639# ifdef VBOX_STRICT
1640 RTUINT128U const uOld = *pu128RaxRdx;
1641# endif
1642# if defined(RT_ARCH_AMD64)
1643 if (ASMAtomicCmpXchgU128v2(&pu128Dst->u, pu128RbxRcx->s.Hi, pu128RbxRcx->s.Lo, pu128RaxRdx->s.Hi, pu128RaxRdx->s.Lo,
1644 &pu128RaxRdx->u))
1645# else
1646 if (ASMAtomicCmpXchgU128(&pu128Dst->u, pu128RbxRcx->u, pu128RaxRdx->u, &pu128RaxRdx->u))
1647# endif
1648 {
1649 Assert(pu128RaxRdx->s.Lo == uOld.s.Lo && pu128RaxRdx->s.Hi == uOld.s.Hi);
1650 *pEFlags |= X86_EFL_ZF;
1651 }
1652 else
1653 *pEFlags &= ~X86_EFL_ZF;
1654}
1655# endif
1656
1657#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1658
1659# if !defined(RT_ARCH_ARM64) /** @todo may need this for unaligned accesses... */
1660IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
1661 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
1662{
1663 RTUINT128U u128Tmp = *pu128Dst;
1664 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
1665 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
1666 {
1667 *pu128Dst = *pu128RbxRcx;
1668 *pEFlags |= X86_EFL_ZF;
1669 }
1670 else
1671 {
1672 *pu128RaxRdx = u128Tmp;
1673 *pEFlags &= ~X86_EFL_ZF;
1674 }
1675}
1676#endif /* !RT_ARCH_ARM64 */
1677
1678#if defined(IEM_WITHOUT_ASSEMBLY)
1679
1680/* Unlocked versions mapped to the locked ones: */
1681
1682IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1683{
1684 iemAImpl_cmpxchg_u8_locked(pu8Dst, puAl, uSrcReg, pEFlags);
1685}
1686
1687
1688IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16, (uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1689{
1690 iemAImpl_cmpxchg_u16_locked(pu16Dst, puAx, uSrcReg, pEFlags);
1691}
1692
1693
1694IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32, (uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1695{
1696 iemAImpl_cmpxchg_u32_locked(pu32Dst, puEax, uSrcReg, pEFlags);
1697}
1698
1699
1700# if ARCH_BITS == 32
1701IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1702{
1703 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, puSrcReg, pEFlags);
1704}
1705# else
1706IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1707{
1708 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, uSrcReg, pEFlags);
1709}
1710# endif
1711
1712
1713IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx, uint32_t *pEFlags))
1714{
1715 iemAImpl_cmpxchg8b_locked(pu64Dst, pu64EaxEdx, pu64EbxEcx, pEFlags);
1716}
1717
1718
1719IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1720 uint32_t *pEFlags))
1721{
1722 iemAImpl_cmpxchg16b_locked(pu128Dst, pu128RaxRdx, pu128RbxRcx, pEFlags);
1723}
1724
1725#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1726
1727#if (!defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)) \
1728 && !defined(DOXYGEN_RUNNING) /* Doxygen has some groking issues here and ends up mixing up input. Not worth tracking down now. */
1729
1730/*
1731 * MUL, IMUL, DIV and IDIV helpers.
1732 *
1733 * - The U64 versions must use 128-bit intermediates, so we need to abstract the
1734 * division step so we can select between using C operators and
1735 * RTUInt128DivRem/RTUInt128MulU64ByU64.
1736 *
1737 * - The U8 versions work returns output in AL + AH instead of xDX + xAX, with the
1738 * IDIV/DIV taking all the input in AX too. This means we have to abstract some
1739 * input loads and the result storing.
1740 */
1741
1742DECLINLINE(void) RTUInt128DivRemByU64(PRTUINT128U pQuotient, PRTUINT128U pRemainder, PCRTUINT128U pDividend, uint64_t u64Divisor)
1743{
1744# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1745 pQuotient->s.Lo = 0;
1746 pQuotient->s.Hi = 0;
1747# endif
1748 RTUINT128U Divisor;
1749 Divisor.s.Lo = u64Divisor;
1750 Divisor.s.Hi = 0;
1751 RTUInt128DivRem(pQuotient, pRemainder, pDividend, &Divisor);
1752}
1753
1754# define DIV_LOAD(a_Dividend) \
1755 a_Dividend.s.Lo = *puA, a_Dividend.s.Hi = *puD
1756# define DIV_LOAD_U8(a_Dividend) \
1757 a_Dividend.u = *puAX
1758
1759# define DIV_STORE(a_Quotient, a_uReminder) *puA = (a_Quotient), *puD = (a_uReminder)
1760# define DIV_STORE_U8(a_Quotient, a_uReminder) *puAX = (uint8_t)(a_Quotient) | ((uint16_t)(a_uReminder) << 8)
1761
1762# define MUL_LOAD_F1() *puA
1763# define MUL_LOAD_F1_U8() ((uint8_t)*puAX)
1764
1765# define MUL_STORE(a_Result) *puA = (a_Result).s.Lo, *puD = (a_Result).s.Hi
1766# define MUL_STORE_U8(a_Result) *puAX = a_Result.u
1767
1768# define MULDIV_NEG(a_Value, a_cBitsWidth2x) \
1769 (a_Value).u = UINT ## a_cBitsWidth2x ## _C(0) - (a_Value).u
1770# define MULDIV_NEG_U128(a_Value, a_cBitsWidth2x) \
1771 RTUInt128AssignNeg(&(a_Value))
1772
1773# define MULDIV_MUL(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1774 (a_Result).u = (uint ## a_cBitsWidth2x ## _t)(a_Factor1) * (a_Factor2)
1775# define MULDIV_MUL_U128(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1776 RTUInt128MulU64ByU64(&(a_Result), a_Factor1, a_Factor2);
1777
1778# define MULDIV_MODDIV(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1779 a_Quotient.u = (a_Dividend).u / (a_uDivisor), \
1780 a_Remainder.u = (a_Dividend).u % (a_uDivisor)
1781# define MULDIV_MODDIV_U128(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1782 RTUInt128DivRemByU64(&a_Quotient, &a_Remainder, &a_Dividend, a_uDivisor)
1783
1784
1785/*
1786 * MUL
1787 */
1788# define EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, a_Suffix, a_fIntelFlags) \
1789IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_mul_u,a_cBitsWidth,a_Suffix), a_Args) \
1790{ \
1791 RTUINT ## a_cBitsWidth2x ## U Result; \
1792 a_fnMul(Result, a_fnLoadF1(), uFactor, a_cBitsWidth2x); \
1793 a_fnStore(Result); \
1794 \
1795 /* Calc EFLAGS: */ \
1796 uint32_t fEfl = *pfEFlags; \
1797 if (a_fIntelFlags) \
1798 { /* Intel: 6700K and 10980XE behavior */ \
1799 fEfl &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF); \
1800 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1801 fEfl |= X86_EFL_SF; \
1802 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1803 if (Result.s.Hi != 0) \
1804 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1805 } \
1806 else \
1807 { /* AMD: 3990X */ \
1808 if (Result.s.Hi != 0) \
1809 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1810 else \
1811 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
1812 } \
1813 *pfEFlags = fEfl; \
1814 return 0; \
1815} \
1816
1817# define EMIT_MUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul) \
1818 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, RT_NOTHING, 1) \
1819 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _intel, 1) \
1820 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _amd, 0) \
1821
1822# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1823EMIT_MUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1824 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL_U128)
1825# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1826EMIT_MUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1827 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1828EMIT_MUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1829 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1830EMIT_MUL(8, 16, (uint16_t *puAX, uint8_t uFactor, uint32_t *pfEFlags), (puAX, uFactor, pfEFlags),
1831 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_MUL)
1832# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1833# endif /* !DOXYGEN_RUNNING */
1834
1835
1836/*
1837 * IMUL
1838 *
1839 * The SF, ZF, AF and PF flags are "undefined". AMD (3990x) leaves these
1840 * flags as is. Whereas Intel skylake (6700K and 10980X (Cascade Lake)) always
1841 * clear AF and ZF and calculates SF and PF as per the lower half of the result.
1842 */
1843# define EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, \
1844 a_Suffix, a_fIntelFlags) \
1845IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_imul_u,a_cBitsWidth,a_Suffix),a_Args) \
1846{ \
1847 RTUINT ## a_cBitsWidth2x ## U Result; \
1848 uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF | X86_EFL_OF); \
1849 \
1850 uint ## a_cBitsWidth ## _t const uFactor1 = a_fnLoadF1(); \
1851 if (!(uFactor1 & RT_BIT_64(a_cBitsWidth - 1))) \
1852 { \
1853 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1854 { \
1855 a_fnMul(Result, uFactor1, uFactor2, a_cBitsWidth2x); \
1856 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1857 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1858 } \
1859 else \
1860 { \
1861 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1862 a_fnMul(Result, uFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1863 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1864 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1865 a_fnNeg(Result, a_cBitsWidth2x); \
1866 } \
1867 } \
1868 else \
1869 { \
1870 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1871 { \
1872 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1873 a_fnMul(Result, uPositiveFactor1, uFactor2, a_cBitsWidth2x); \
1874 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1875 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1876 a_fnNeg(Result, a_cBitsWidth2x); \
1877 } \
1878 else \
1879 { \
1880 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1881 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1882 a_fnMul(Result, uPositiveFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1883 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1884 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1885 } \
1886 } \
1887 a_fnStore(Result); \
1888 \
1889 if (a_fIntelFlags) \
1890 { \
1891 fEfl &= ~(X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_PF); \
1892 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1893 fEfl |= X86_EFL_SF; \
1894 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1895 } \
1896 *pfEFlags = fEfl; \
1897 return 0; \
1898}
1899# define EMIT_IMUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul) \
1900 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, RT_NOTHING, 1) \
1901 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _intel, 1) \
1902 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _amd, 0)
1903
1904# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1905EMIT_IMUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1906 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG_U128, MULDIV_MUL_U128)
1907# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1908EMIT_IMUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1909 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1910EMIT_IMUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1911 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1912EMIT_IMUL(8, 16, (uint16_t *puAX, uint8_t uFactor2, uint32_t *pfEFlags), (puAX, uFactor2, pfEFlags),
1913 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_NEG, MULDIV_MUL)
1914# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1915# endif /* !DOXYGEN_RUNNING */
1916
1917
1918/*
1919 * IMUL with two operands are mapped onto the three operand variant, ignoring
1920 * the high part of the product.
1921 */
1922# define EMIT_IMUL_TWO(a_cBits, a_uType) \
1923IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1924{ \
1925 a_uType uIgn; \
1926 iemAImpl_imul_u ## a_cBits(puDst, &uIgn, uSrc, pfEFlags); \
1927} \
1928\
1929IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _intel,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1930{ \
1931 a_uType uIgn; \
1932 iemAImpl_imul_u ## a_cBits ## _intel(puDst, &uIgn, uSrc, pfEFlags); \
1933} \
1934\
1935IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _amd,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1936{ \
1937 a_uType uIgn; \
1938 iemAImpl_imul_u ## a_cBits ## _amd(puDst, &uIgn, uSrc, pfEFlags); \
1939}
1940
1941EMIT_IMUL_TWO(64, uint64_t)
1942# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1943EMIT_IMUL_TWO(32, uint32_t)
1944EMIT_IMUL_TWO(16, uint16_t)
1945# endif
1946
1947
1948/*
1949 * DIV
1950 */
1951# define EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, \
1952 a_Suffix, a_fIntelFlags) \
1953IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_div_u,a_cBitsWidth,a_Suffix),a_Args) \
1954{ \
1955 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1956 a_fnLoad(Dividend); \
1957 if ( uDivisor != 0 \
1958 && Dividend.s.Hi < uDivisor) \
1959 { \
1960 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1961 a_fnDivRem(Quotient, Remainder, Dividend, uDivisor); \
1962 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1963 \
1964 /* Calc EFLAGS: Intel 6700K and 10980XE leaves them alone. AMD 3990X sets AF and clears PF, ZF and SF. */ \
1965 if (!a_fIntelFlags) \
1966 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1967 return 0; \
1968 } \
1969 /* #DE */ \
1970 return -1; \
1971}
1972# define EMIT_DIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem) \
1973 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, RT_NOTHING, 1) \
1974 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _intel, 1) \
1975 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _amd, 0)
1976
1977# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1978EMIT_DIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1979 DIV_LOAD, DIV_STORE, MULDIV_MODDIV_U128)
1980# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1981EMIT_DIV(32,64, (uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1982 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1983EMIT_DIV(16,32, (uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1984 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1985EMIT_DIV(8,16, (uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
1986 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_MODDIV)
1987# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1988# endif /* !DOXYGEN_RUNNING */
1989
1990
1991/*
1992 * IDIV
1993 *
1994 * EFLAGS are ignored and left as-is by Intel 6700K and 10980XE. AMD 3990X will
1995 * set AF and clear PF, ZF and SF just like it does for DIV.
1996 *
1997 */
1998# define EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, \
1999 a_Suffix, a_fIntelFlags) \
2000IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_idiv_u,a_cBitsWidth,a_Suffix),a_Args) \
2001{ \
2002 /* Note! Skylake leaves all flags alone. */ \
2003 \
2004 /** @todo overflow checks */ \
2005 if (uDivisor != 0) \
2006 { \
2007 /* \
2008 * Convert to unsigned division. \
2009 */ \
2010 RTUINT ## a_cBitsWidth2x ## U Dividend; \
2011 a_fnLoad(Dividend); \
2012 bool const fSignedDividend = RT_BOOL(Dividend.s.Hi & RT_BIT_64(a_cBitsWidth - 1)); \
2013 if (fSignedDividend) \
2014 a_fnNeg(Dividend, a_cBitsWidth2x); \
2015 \
2016 uint ## a_cBitsWidth ## _t uDivisorPositive; \
2017 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
2018 uDivisorPositive = uDivisor; \
2019 else \
2020 uDivisorPositive = UINT ## a_cBitsWidth ## _C(0) - uDivisor; \
2021 \
2022 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
2023 a_fnDivRem(Quotient, Remainder, Dividend, uDivisorPositive); \
2024 \
2025 /* \
2026 * Setup the result, checking for overflows. \
2027 */ \
2028 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
2029 { \
2030 if (!fSignedDividend) \
2031 { \
2032 /* Positive divisor, positive dividend => result positive. */ \
2033 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
2034 { \
2035 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
2036 if (!a_fIntelFlags) \
2037 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2038 return 0; \
2039 } \
2040 } \
2041 else \
2042 { \
2043 /* Positive divisor, negative dividend => result negative. */ \
2044 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
2045 { \
2046 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
2047 if (!a_fIntelFlags) \
2048 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2049 return 0; \
2050 } \
2051 } \
2052 } \
2053 else \
2054 { \
2055 if (!fSignedDividend) \
2056 { \
2057 /* Negative divisor, positive dividend => negative quotient, positive remainder. */ \
2058 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
2059 { \
2060 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, Remainder.s.Lo); \
2061 if (!a_fIntelFlags) \
2062 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2063 return 0; \
2064 } \
2065 } \
2066 else \
2067 { \
2068 /* Negative divisor, negative dividend => positive quotient, negative remainder. */ \
2069 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
2070 { \
2071 a_fnStore(Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
2072 if (!a_fIntelFlags) \
2073 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2074 return 0; \
2075 } \
2076 } \
2077 } \
2078 } \
2079 /* #DE */ \
2080 return -1; \
2081}
2082# define EMIT_IDIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem) \
2083 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, RT_NOTHING, 1) \
2084 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _intel, 1) \
2085 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _amd, 0)
2086
2087# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
2088EMIT_IDIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2089 DIV_LOAD, DIV_STORE, MULDIV_NEG_U128, MULDIV_MODDIV_U128)
2090# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2091EMIT_IDIV(32,64,(uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2092 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
2093EMIT_IDIV(16,32,(uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2094 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
2095EMIT_IDIV(8,16,(uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
2096 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_NEG, MULDIV_MODDIV)
2097# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2098# endif /* !DOXYGEN_RUNNING */
2099
2100#endif /* (!defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)) && !defined(DOXYGEN_RUNNING) */
2101
2102
2103/*********************************************************************************************************************************
2104* Unary operations. *
2105*********************************************************************************************************************************/
2106#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2107
2108/** @def IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC
2109 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an INC or DEC instruction.
2110 *
2111 * CF is NOT modified for hysterical raisins (allegedly for carrying and
2112 * borrowing in arithmetic loops on intel 8008).
2113 *
2114 * @returns Status bits.
2115 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2116 * @param a_uResult Unsigned result value.
2117 * @param a_uDst The original destination value (for AF calc).
2118 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2119 * @param a_OfMethod 0 for INC-style, 1 for DEC-style.
2120 */
2121#define IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth, a_OfMethod) \
2122 do { \
2123 uint32_t fEflTmp = *(a_pfEFlags); \
2124 fEflTmp &= ~X86_EFL_STATUS_BITS | X86_EFL_CF; \
2125 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2126 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2127 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2128 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2129 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth(a_OfMethod == 0 ? (((a_uDst) ^ RT_BIT_64(a_cBitsWidth - 1)) & (a_uResult)) \
2130 : ((a_uDst) & ((a_uResult) ^ RT_BIT_64(a_cBitsWidth - 1))) ); \
2131 *(a_pfEFlags) = fEflTmp; \
2132 } while (0)
2133
2134/*
2135 * INC
2136 */
2137
2138IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2139{
2140 uint64_t uDst = *puDst;
2141 uint64_t uResult = uDst + 1;
2142 *puDst = uResult;
2143 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 0 /*INC*/);
2144}
2145
2146# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2147
2148IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2149{
2150 uint32_t uDst = *puDst;
2151 uint32_t uResult = uDst + 1;
2152 *puDst = uResult;
2153 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 0 /*INC*/);
2154}
2155
2156
2157IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2158{
2159 uint16_t uDst = *puDst;
2160 uint16_t uResult = uDst + 1;
2161 *puDst = uResult;
2162 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 0 /*INC*/);
2163}
2164
2165IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2166{
2167 uint8_t uDst = *puDst;
2168 uint8_t uResult = uDst + 1;
2169 *puDst = uResult;
2170 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 0 /*INC*/);
2171}
2172
2173# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2174
2175
2176/*
2177 * DEC
2178 */
2179
2180IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2181{
2182 uint64_t uDst = *puDst;
2183 uint64_t uResult = uDst - 1;
2184 *puDst = uResult;
2185 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 1 /*INC*/);
2186}
2187
2188# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2189
2190IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2191{
2192 uint32_t uDst = *puDst;
2193 uint32_t uResult = uDst - 1;
2194 *puDst = uResult;
2195 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 1 /*INC*/);
2196}
2197
2198
2199IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2200{
2201 uint16_t uDst = *puDst;
2202 uint16_t uResult = uDst - 1;
2203 *puDst = uResult;
2204 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 1 /*INC*/);
2205}
2206
2207
2208IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2209{
2210 uint8_t uDst = *puDst;
2211 uint8_t uResult = uDst - 1;
2212 *puDst = uResult;
2213 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 1 /*INC*/);
2214}
2215
2216# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2217
2218
2219/*
2220 * NOT
2221 */
2222
2223IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2224{
2225 uint64_t uDst = *puDst;
2226 uint64_t uResult = ~uDst;
2227 *puDst = uResult;
2228 /* EFLAGS are not modified. */
2229 RT_NOREF_PV(pfEFlags);
2230}
2231
2232# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2233
2234IEM_DECL_IMPL_DEF(void, iemAImpl_not_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2235{
2236 uint32_t uDst = *puDst;
2237 uint32_t uResult = ~uDst;
2238 *puDst = uResult;
2239 /* EFLAGS are not modified. */
2240 RT_NOREF_PV(pfEFlags);
2241}
2242
2243IEM_DECL_IMPL_DEF(void, iemAImpl_not_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2244{
2245 uint16_t uDst = *puDst;
2246 uint16_t uResult = ~uDst;
2247 *puDst = uResult;
2248 /* EFLAGS are not modified. */
2249 RT_NOREF_PV(pfEFlags);
2250}
2251
2252IEM_DECL_IMPL_DEF(void, iemAImpl_not_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2253{
2254 uint8_t uDst = *puDst;
2255 uint8_t uResult = ~uDst;
2256 *puDst = uResult;
2257 /* EFLAGS are not modified. */
2258 RT_NOREF_PV(pfEFlags);
2259}
2260
2261# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2262
2263
2264/*
2265 * NEG
2266 */
2267
2268/**
2269 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an NEG instruction.
2270 *
2271 * @returns Status bits.
2272 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2273 * @param a_uResult Unsigned result value.
2274 * @param a_uDst The original destination value (for AF calc).
2275 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2276 */
2277#define IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth) \
2278 do { \
2279 uint32_t fEflTmp = *(a_pfEFlags); \
2280 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2281 fEflTmp |= ((a_uDst) != 0) << X86_EFL_CF_BIT; \
2282 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2283 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2284 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2285 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2286 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth((a_uDst) & (a_uResult)); \
2287 *(a_pfEFlags) = fEflTmp; \
2288 } while (0)
2289
2290IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2291{
2292 uint64_t uDst = *puDst;
2293 uint64_t uResult = (uint64_t)0 - uDst;
2294 *puDst = uResult;
2295 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 64);
2296}
2297
2298# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2299
2300IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2301{
2302 uint32_t uDst = *puDst;
2303 uint32_t uResult = (uint32_t)0 - uDst;
2304 *puDst = uResult;
2305 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 32);
2306}
2307
2308
2309IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2310{
2311 uint16_t uDst = *puDst;
2312 uint16_t uResult = (uint16_t)0 - uDst;
2313 *puDst = uResult;
2314 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 16);
2315}
2316
2317
2318IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2319{
2320 uint8_t uDst = *puDst;
2321 uint8_t uResult = (uint8_t)0 - uDst;
2322 *puDst = uResult;
2323 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 8);
2324}
2325
2326# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2327
2328/*
2329 * Locked variants.
2330 */
2331
2332/** Emit a function for doing a locked unary operand operation. */
2333# define EMIT_LOCKED_UNARY_OP(a_Mnemonic, a_cBitsWidth) \
2334 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
2335 uint32_t *pfEFlags)) \
2336 { \
2337 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
2338 uint ## a_cBitsWidth ## _t uTmp; \
2339 uint32_t fEflTmp; \
2340 do \
2341 { \
2342 uTmp = uOld; \
2343 fEflTmp = *pfEFlags; \
2344 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, &fEflTmp); \
2345 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
2346 *pfEFlags = fEflTmp; \
2347 }
2348
2349EMIT_LOCKED_UNARY_OP(inc, 64)
2350EMIT_LOCKED_UNARY_OP(dec, 64)
2351EMIT_LOCKED_UNARY_OP(not, 64)
2352EMIT_LOCKED_UNARY_OP(neg, 64)
2353# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2354EMIT_LOCKED_UNARY_OP(inc, 32)
2355EMIT_LOCKED_UNARY_OP(dec, 32)
2356EMIT_LOCKED_UNARY_OP(not, 32)
2357EMIT_LOCKED_UNARY_OP(neg, 32)
2358
2359EMIT_LOCKED_UNARY_OP(inc, 16)
2360EMIT_LOCKED_UNARY_OP(dec, 16)
2361EMIT_LOCKED_UNARY_OP(not, 16)
2362EMIT_LOCKED_UNARY_OP(neg, 16)
2363
2364EMIT_LOCKED_UNARY_OP(inc, 8)
2365EMIT_LOCKED_UNARY_OP(dec, 8)
2366EMIT_LOCKED_UNARY_OP(not, 8)
2367EMIT_LOCKED_UNARY_OP(neg, 8)
2368# endif
2369
2370#endif /* !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) */
2371
2372
2373/*********************************************************************************************************************************
2374* Shifting and Rotating *
2375*********************************************************************************************************************************/
2376
2377/*
2378 * ROL
2379 */
2380#define EMIT_ROL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2381IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rol_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2382{ \
2383 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2384 if (cShift) \
2385 { \
2386 if (a_cBitsWidth < 32) \
2387 cShift &= a_cBitsWidth - 1; \
2388 a_uType const uDst = *puDst; \
2389 a_uType const uResult = a_fnHlp(uDst, cShift); \
2390 *puDst = uResult; \
2391 \
2392 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2393 it the same way as for 1 bit shifts. */ \
2394 AssertCompile(X86_EFL_CF_BIT == 0); \
2395 uint32_t fEfl = *pfEFlags; \
2396 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2397 uint32_t const fCarry = (uResult & X86_EFL_CF); \
2398 fEfl |= fCarry; \
2399 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2400 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2401 else /* Intel 10980XE: According to the first sub-shift: */ \
2402 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2403 *pfEFlags = fEfl; \
2404 } \
2405}
2406
2407#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2408EMIT_ROL(64, uint64_t, RT_NOTHING, 1, ASMRotateLeftU64)
2409#endif
2410EMIT_ROL(64, uint64_t, _intel, 1, ASMRotateLeftU64)
2411EMIT_ROL(64, uint64_t, _amd, 0, ASMRotateLeftU64)
2412
2413#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2414EMIT_ROL(32, uint32_t, RT_NOTHING, 1, ASMRotateLeftU32)
2415#endif
2416EMIT_ROL(32, uint32_t, _intel, 1, ASMRotateLeftU32)
2417EMIT_ROL(32, uint32_t, _amd, 0, ASMRotateLeftU32)
2418
2419DECL_FORCE_INLINE(uint16_t) iemAImpl_rol_u16_hlp(uint16_t uValue, uint8_t cShift)
2420{
2421 return (uValue << cShift) | (uValue >> (16 - cShift));
2422}
2423#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2424EMIT_ROL(16, uint16_t, RT_NOTHING, 1, iemAImpl_rol_u16_hlp)
2425#endif
2426EMIT_ROL(16, uint16_t, _intel, 1, iemAImpl_rol_u16_hlp)
2427EMIT_ROL(16, uint16_t, _amd, 0, iemAImpl_rol_u16_hlp)
2428
2429DECL_FORCE_INLINE(uint8_t) iemAImpl_rol_u8_hlp(uint8_t uValue, uint8_t cShift)
2430{
2431 return (uValue << cShift) | (uValue >> (8 - cShift));
2432}
2433#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2434EMIT_ROL(8, uint8_t, RT_NOTHING, 1, iemAImpl_rol_u8_hlp)
2435#endif
2436EMIT_ROL(8, uint8_t, _intel, 1, iemAImpl_rol_u8_hlp)
2437EMIT_ROL(8, uint8_t, _amd, 0, iemAImpl_rol_u8_hlp)
2438
2439
2440/*
2441 * ROR
2442 */
2443#define EMIT_ROR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2444IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_ror_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2445{ \
2446 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2447 if (cShift) \
2448 { \
2449 if (a_cBitsWidth < 32) \
2450 cShift &= a_cBitsWidth - 1; \
2451 a_uType const uDst = *puDst; \
2452 a_uType const uResult = a_fnHlp(uDst, cShift); \
2453 *puDst = uResult; \
2454 \
2455 /* Calc EFLAGS: */ \
2456 AssertCompile(X86_EFL_CF_BIT == 0); \
2457 uint32_t fEfl = *pfEFlags; \
2458 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2459 uint32_t const fCarry = (uResult >> ((a_cBitsWidth) - 1)) & X86_EFL_CF; \
2460 fEfl |= fCarry; \
2461 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2462 fEfl |= (((uResult >> ((a_cBitsWidth) - 2)) ^ fCarry) & 1) << X86_EFL_OF_BIT; \
2463 else /* Intel 10980XE: According to the first sub-shift: */ \
2464 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << (a_cBitsWidth - 1))); \
2465 *pfEFlags = fEfl; \
2466 } \
2467}
2468
2469#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2470EMIT_ROR(64, uint64_t, RT_NOTHING, 1, ASMRotateRightU64)
2471#endif
2472EMIT_ROR(64, uint64_t, _intel, 1, ASMRotateRightU64)
2473EMIT_ROR(64, uint64_t, _amd, 0, ASMRotateRightU64)
2474
2475#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2476EMIT_ROR(32, uint32_t, RT_NOTHING, 1, ASMRotateRightU32)
2477#endif
2478EMIT_ROR(32, uint32_t, _intel, 1, ASMRotateRightU32)
2479EMIT_ROR(32, uint32_t, _amd, 0, ASMRotateRightU32)
2480
2481DECL_FORCE_INLINE(uint16_t) iemAImpl_ror_u16_hlp(uint16_t uValue, uint8_t cShift)
2482{
2483 return (uValue >> cShift) | (uValue << (16 - cShift));
2484}
2485#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2486EMIT_ROR(16, uint16_t, RT_NOTHING, 1, iemAImpl_ror_u16_hlp)
2487#endif
2488EMIT_ROR(16, uint16_t, _intel, 1, iemAImpl_ror_u16_hlp)
2489EMIT_ROR(16, uint16_t, _amd, 0, iemAImpl_ror_u16_hlp)
2490
2491DECL_FORCE_INLINE(uint8_t) iemAImpl_ror_u8_hlp(uint8_t uValue, uint8_t cShift)
2492{
2493 return (uValue >> cShift) | (uValue << (8 - cShift));
2494}
2495#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2496EMIT_ROR(8, uint8_t, RT_NOTHING, 1, iemAImpl_ror_u8_hlp)
2497#endif
2498EMIT_ROR(8, uint8_t, _intel, 1, iemAImpl_ror_u8_hlp)
2499EMIT_ROR(8, uint8_t, _amd, 0, iemAImpl_ror_u8_hlp)
2500
2501
2502/*
2503 * RCL
2504 */
2505#define EMIT_RCL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2506IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2507{ \
2508 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2509 if (a_cBitsWidth < 32 && a_fIntelFlags) \
2510 cShift %= a_cBitsWidth + 1; \
2511 if (cShift) \
2512 { \
2513 if (a_cBitsWidth < 32 && !a_fIntelFlags) \
2514 cShift %= a_cBitsWidth + 1; \
2515 a_uType const uDst = *puDst; \
2516 a_uType uResult = uDst << cShift; \
2517 if (cShift > 1) \
2518 uResult |= uDst >> (a_cBitsWidth + 1 - cShift); \
2519 \
2520 AssertCompile(X86_EFL_CF_BIT == 0); \
2521 uint32_t fEfl = *pfEFlags; \
2522 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2523 uResult |= (a_uType)fInCarry << (cShift - 1); \
2524 \
2525 *puDst = uResult; \
2526 \
2527 /* Calc EFLAGS. */ \
2528 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2529 uint32_t const fOutCarry = a_cBitsWidth >= 32 || a_fIntelFlags || cShift \
2530 ? (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF : fInCarry; \
2531 fEfl |= fOutCarry; \
2532 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2533 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fOutCarry) << X86_EFL_OF_BIT; \
2534 else /* Intel 10980XE: According to the first sub-shift: */ \
2535 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2536 *pfEFlags = fEfl; \
2537 } \
2538}
2539
2540#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2541EMIT_RCL(64, uint64_t, RT_NOTHING, 1)
2542#endif
2543EMIT_RCL(64, uint64_t, _intel, 1)
2544EMIT_RCL(64, uint64_t, _amd, 0)
2545
2546#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2547EMIT_RCL(32, uint32_t, RT_NOTHING, 1)
2548#endif
2549EMIT_RCL(32, uint32_t, _intel, 1)
2550EMIT_RCL(32, uint32_t, _amd, 0)
2551
2552#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2553EMIT_RCL(16, uint16_t, RT_NOTHING, 1)
2554#endif
2555EMIT_RCL(16, uint16_t, _intel, 1)
2556EMIT_RCL(16, uint16_t, _amd, 0)
2557
2558#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2559EMIT_RCL(8, uint8_t, RT_NOTHING, 1)
2560#endif
2561EMIT_RCL(8, uint8_t, _intel, 1)
2562EMIT_RCL(8, uint8_t, _amd, 0)
2563
2564
2565/*
2566 * RCR
2567 */
2568#define EMIT_RCR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2569IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2570{ \
2571 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2572 if (a_cBitsWidth < 32 && a_fIntelFlags) \
2573 cShift %= a_cBitsWidth + 1; \
2574 if (cShift) \
2575 { \
2576 if (a_cBitsWidth < 32 && !a_fIntelFlags) \
2577 cShift %= a_cBitsWidth + 1; \
2578 a_uType const uDst = *puDst; \
2579 a_uType uResult = uDst >> cShift; \
2580 if (cShift > 1) \
2581 uResult |= uDst << (a_cBitsWidth + 1 - cShift); \
2582 \
2583 AssertCompile(X86_EFL_CF_BIT == 0); \
2584 uint32_t fEfl = *pfEFlags; \
2585 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2586 uResult |= (a_uType)fInCarry << (a_cBitsWidth - cShift); \
2587 *puDst = uResult; \
2588 \
2589 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2590 it the same way as for 1 bit shifts. */ \
2591 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2592 uint32_t const fOutCarry = a_cBitsWidth >= 32 || a_fIntelFlags || cShift \
2593 ? (uDst >> (cShift - 1)) & X86_EFL_CF : fInCarry; \
2594 fEfl |= fOutCarry; \
2595 if (!a_fIntelFlags) /* AMD 3990X: XOR two most signficant bits of the result: */ \
2596 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uResult ^ (uResult << 1)); \
2597 else /* Intel 10980XE: same as AMD, but only for the first sub-shift: */ \
2598 fEfl |= (fInCarry ^ (uint32_t)(uDst >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2599 *pfEFlags = fEfl; \
2600 } \
2601}
2602
2603#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2604EMIT_RCR(64, uint64_t, RT_NOTHING, 1)
2605#endif
2606EMIT_RCR(64, uint64_t, _intel, 1)
2607EMIT_RCR(64, uint64_t, _amd, 0)
2608
2609#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2610EMIT_RCR(32, uint32_t, RT_NOTHING, 1)
2611#endif
2612EMIT_RCR(32, uint32_t, _intel, 1)
2613EMIT_RCR(32, uint32_t, _amd, 0)
2614
2615#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2616EMIT_RCR(16, uint16_t, RT_NOTHING, 1)
2617#endif
2618EMIT_RCR(16, uint16_t, _intel, 1)
2619EMIT_RCR(16, uint16_t, _amd, 0)
2620
2621#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2622EMIT_RCR(8, uint8_t, RT_NOTHING, 1)
2623#endif
2624EMIT_RCR(8, uint8_t, _intel, 1)
2625EMIT_RCR(8, uint8_t, _amd, 0)
2626
2627
2628/*
2629 * SHL
2630 */
2631#define EMIT_SHL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2632IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2633{ \
2634 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2635 if (cShift) \
2636 { \
2637 a_uType const uDst = *puDst; \
2638 a_uType uResult = uDst << cShift; \
2639 *puDst = uResult; \
2640 \
2641 /* Calc EFLAGS. */ \
2642 AssertCompile(X86_EFL_CF_BIT == 0); \
2643 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2644 uint32_t fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2645 fEfl |= fCarry; \
2646 if (!a_fIntelFlags) \
2647 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; /* AMD 3990X: Last shift result. */ \
2648 else \
2649 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); /* Intel 10980XE: First shift result. */ \
2650 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2651 fEfl |= X86_EFL_CALC_ZF(uResult); \
2652 fEfl |= g_afParity[uResult & 0xff]; \
2653 if (!a_fIntelFlags) \
2654 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2655 *pfEFlags = fEfl; \
2656 } \
2657}
2658
2659#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2660EMIT_SHL(64, uint64_t, RT_NOTHING, 1)
2661#endif
2662EMIT_SHL(64, uint64_t, _intel, 1)
2663EMIT_SHL(64, uint64_t, _amd, 0)
2664
2665#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2666EMIT_SHL(32, uint32_t, RT_NOTHING, 1)
2667#endif
2668EMIT_SHL(32, uint32_t, _intel, 1)
2669EMIT_SHL(32, uint32_t, _amd, 0)
2670
2671#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2672EMIT_SHL(16, uint16_t, RT_NOTHING, 1)
2673#endif
2674EMIT_SHL(16, uint16_t, _intel, 1)
2675EMIT_SHL(16, uint16_t, _amd, 0)
2676
2677#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2678EMIT_SHL(8, uint8_t, RT_NOTHING, 1)
2679#endif
2680EMIT_SHL(8, uint8_t, _intel, 1)
2681EMIT_SHL(8, uint8_t, _amd, 0)
2682
2683
2684/*
2685 * SHR
2686 */
2687#define EMIT_SHR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2688IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2689{ \
2690 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2691 if (cShift) \
2692 { \
2693 a_uType const uDst = *puDst; \
2694 a_uType uResult = uDst >> cShift; \
2695 *puDst = uResult; \
2696 \
2697 /* Calc EFLAGS. */ \
2698 AssertCompile(X86_EFL_CF_BIT == 0); \
2699 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2700 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2701 if (a_fIntelFlags || cShift == 1) /* AMD 3990x does what intel documents; Intel 10980XE does this for all shift counts. */ \
2702 fEfl |= (uDst >> (a_cBitsWidth - 1)) << X86_EFL_OF_BIT; \
2703 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2704 fEfl |= X86_EFL_CALC_ZF(uResult); \
2705 fEfl |= g_afParity[uResult & 0xff]; \
2706 if (!a_fIntelFlags) \
2707 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2708 *pfEFlags = fEfl; \
2709 } \
2710}
2711
2712#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2713EMIT_SHR(64, uint64_t, RT_NOTHING, 1)
2714#endif
2715EMIT_SHR(64, uint64_t, _intel, 1)
2716EMIT_SHR(64, uint64_t, _amd, 0)
2717
2718#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2719EMIT_SHR(32, uint32_t, RT_NOTHING, 1)
2720#endif
2721EMIT_SHR(32, uint32_t, _intel, 1)
2722EMIT_SHR(32, uint32_t, _amd, 0)
2723
2724#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2725EMIT_SHR(16, uint16_t, RT_NOTHING, 1)
2726#endif
2727EMIT_SHR(16, uint16_t, _intel, 1)
2728EMIT_SHR(16, uint16_t, _amd, 0)
2729
2730#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2731EMIT_SHR(8, uint8_t, RT_NOTHING, 1)
2732#endif
2733EMIT_SHR(8, uint8_t, _intel, 1)
2734EMIT_SHR(8, uint8_t, _amd, 0)
2735
2736
2737/*
2738 * SAR
2739 */
2740#define EMIT_SAR(a_cBitsWidth, a_uType, a_iType, a_Suffix, a_fIntelFlags) \
2741IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_sar_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2742{ \
2743 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2744 if (cShift) \
2745 { \
2746 a_iType const iDst = (a_iType)*puDst; \
2747 a_uType uResult = iDst >> cShift; \
2748 *puDst = uResult; \
2749 \
2750 /* Calc EFLAGS. \
2751 Note! The OF flag is always zero because the result never differs from the input. */ \
2752 AssertCompile(X86_EFL_CF_BIT == 0); \
2753 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2754 fEfl |= (iDst >> (cShift - 1)) & X86_EFL_CF; \
2755 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2756 fEfl |= X86_EFL_CALC_ZF(uResult); \
2757 fEfl |= g_afParity[uResult & 0xff]; \
2758 if (!a_fIntelFlags) \
2759 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2760 *pfEFlags = fEfl; \
2761 } \
2762}
2763
2764#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2765EMIT_SAR(64, uint64_t, int64_t, RT_NOTHING, 1)
2766#endif
2767EMIT_SAR(64, uint64_t, int64_t, _intel, 1)
2768EMIT_SAR(64, uint64_t, int64_t, _amd, 0)
2769
2770#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2771EMIT_SAR(32, uint32_t, int32_t, RT_NOTHING, 1)
2772#endif
2773EMIT_SAR(32, uint32_t, int32_t, _intel, 1)
2774EMIT_SAR(32, uint32_t, int32_t, _amd, 0)
2775
2776#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2777EMIT_SAR(16, uint16_t, int16_t, RT_NOTHING, 1)
2778#endif
2779EMIT_SAR(16, uint16_t, int16_t, _intel, 1)
2780EMIT_SAR(16, uint16_t, int16_t, _amd, 0)
2781
2782#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2783EMIT_SAR(8, uint8_t, int8_t, RT_NOTHING, 1)
2784#endif
2785EMIT_SAR(8, uint8_t, int8_t, _intel, 1)
2786EMIT_SAR(8, uint8_t, int8_t, _amd, 0)
2787
2788
2789/*
2790 * SHLD
2791 *
2792 * - CF is the last bit shifted out of puDst.
2793 * - AF is always cleared by Intel 10980XE.
2794 * - AF is always set by AMD 3990X.
2795 * - OF is set according to the first shift on Intel 10980XE, it seems.
2796 * - OF is set according to the last sub-shift on AMD 3990X.
2797 * - ZF, SF and PF are calculated according to the result by both vendors.
2798 *
2799 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2800 * pick either the source register or the destination register for input bits
2801 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2802 * intel has changed behaviour here several times. We implement what current
2803 * skylake based does for now, we can extend this later as needed.
2804 */
2805#define EMIT_SHLD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2806IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shld_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, \
2807 uint32_t *pfEFlags)) \
2808{ \
2809 cShift &= a_cBitsWidth - 1; \
2810 if (cShift) \
2811 { \
2812 a_uType const uDst = *puDst; \
2813 a_uType uResult = uDst << cShift; \
2814 uResult |= uSrc >> (a_cBitsWidth - cShift); \
2815 *puDst = uResult; \
2816 \
2817 /* CALC EFLAGS: */ \
2818 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2819 if (a_fIntelFlags) \
2820 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2821 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2822 else \
2823 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2824 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uDst << (cShift - 1)) ^ uResult); \
2825 fEfl |= X86_EFL_AF; \
2826 } \
2827 AssertCompile(X86_EFL_CF_BIT == 0); \
2828 fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; /* CF = last bit shifted out */ \
2829 fEfl |= g_afParity[uResult & 0xff]; \
2830 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2831 fEfl |= X86_EFL_CALC_ZF(uResult); \
2832 *pfEFlags = fEfl; \
2833 } \
2834}
2835
2836#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2837EMIT_SHLD(64, uint64_t, RT_NOTHING, 1)
2838#endif
2839EMIT_SHLD(64, uint64_t, _intel, 1)
2840EMIT_SHLD(64, uint64_t, _amd, 0)
2841
2842#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2843EMIT_SHLD(32, uint32_t, RT_NOTHING, 1)
2844#endif
2845EMIT_SHLD(32, uint32_t, _intel, 1)
2846EMIT_SHLD(32, uint32_t, _amd, 0)
2847
2848#define EMIT_SHLD_16(a_Suffix, a_fIntelFlags) \
2849IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shld_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2850{ \
2851 cShift &= 31; \
2852 if (cShift) \
2853 { \
2854 uint16_t const uDst = *puDst; \
2855 uint64_t const uTmp = a_fIntelFlags \
2856 ? ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uDst \
2857 : ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uSrc; \
2858 uint16_t const uResult = (uint16_t)((uTmp << cShift) >> 32); \
2859 *puDst = uResult; \
2860 \
2861 /* CALC EFLAGS: */ \
2862 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2863 AssertCompile(X86_EFL_CF_BIT == 0); \
2864 if (a_fIntelFlags) \
2865 { \
2866 fEfl |= (uTmp >> (48 - cShift)) & X86_EFL_CF; /* CF = last bit shifted out of the combined operand */ \
2867 /* Intel 6700K & 10980XE: OF is et according to the first shift. AF always cleared. */ \
2868 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uDst << 1)); \
2869 } \
2870 else \
2871 { \
2872 /* AMD 3990X: OF is set according to last shift, with some weirdness. AF always set. CF = last bit shifted out of uDst. */ \
2873 if (cShift < 16) \
2874 { \
2875 fEfl |= (uDst >> (16 - cShift)) & X86_EFL_CF; \
2876 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ uResult); \
2877 } \
2878 else \
2879 { \
2880 if (cShift == 16) \
2881 fEfl |= uDst & X86_EFL_CF; \
2882 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ 0); \
2883 } \
2884 fEfl |= X86_EFL_AF; \
2885 } \
2886 fEfl |= g_afParity[uResult & 0xff]; \
2887 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
2888 fEfl |= X86_EFL_CALC_ZF(uResult); \
2889 *pfEFlags = fEfl; \
2890 } \
2891}
2892
2893#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2894EMIT_SHLD_16(RT_NOTHING, 1)
2895#endif
2896EMIT_SHLD_16(_intel, 1)
2897EMIT_SHLD_16(_amd, 0)
2898
2899
2900/*
2901 * SHRD
2902 *
2903 * EFLAGS behaviour seems to be the same as with SHLD:
2904 * - CF is the last bit shifted out of puDst.
2905 * - AF is always cleared by Intel 10980XE.
2906 * - AF is always set by AMD 3990X.
2907 * - OF is set according to the first shift on Intel 10980XE, it seems.
2908 * - OF is set according to the last sub-shift on AMD 3990X.
2909 * - ZF, SF and PF are calculated according to the result by both vendors.
2910 *
2911 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2912 * pick either the source register or the destination register for input bits
2913 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2914 * intel has changed behaviour here several times. We implement what current
2915 * skylake based does for now, we can extend this later as needed.
2916 */
2917#define EMIT_SHRD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2918IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shrd_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2919{ \
2920 cShift &= a_cBitsWidth - 1; \
2921 if (cShift) \
2922 { \
2923 a_uType const uDst = *puDst; \
2924 a_uType uResult = uDst >> cShift; \
2925 uResult |= uSrc << (a_cBitsWidth - cShift); \
2926 *puDst = uResult; \
2927 \
2928 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2929 AssertCompile(X86_EFL_CF_BIT == 0); \
2930 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2931 if (a_fIntelFlags) \
2932 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2933 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uSrc << (a_cBitsWidth - 1))); \
2934 else \
2935 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2936 if (cShift > 1) /* Set according to last shift. */ \
2937 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uSrc << (a_cBitsWidth - cShift + 1)) ^ uResult); \
2938 else \
2939 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ uResult); \
2940 fEfl |= X86_EFL_AF; \
2941 } \
2942 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2943 fEfl |= X86_EFL_CALC_ZF(uResult); \
2944 fEfl |= g_afParity[uResult & 0xff]; \
2945 *pfEFlags = fEfl; \
2946 } \
2947}
2948
2949#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2950EMIT_SHRD(64, uint64_t, RT_NOTHING, 1)
2951#endif
2952EMIT_SHRD(64, uint64_t, _intel, 1)
2953EMIT_SHRD(64, uint64_t, _amd, 0)
2954
2955#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2956EMIT_SHRD(32, uint32_t, RT_NOTHING, 1)
2957#endif
2958EMIT_SHRD(32, uint32_t, _intel, 1)
2959EMIT_SHRD(32, uint32_t, _amd, 0)
2960
2961#define EMIT_SHRD_16(a_Suffix, a_fIntelFlags) \
2962IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shrd_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2963{ \
2964 cShift &= 31; \
2965 if (cShift) \
2966 { \
2967 uint16_t const uDst = *puDst; \
2968 uint64_t const uTmp = a_fIntelFlags \
2969 ? uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uDst << 32) \
2970 : uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uSrc << 32); \
2971 uint16_t const uResult = (uint16_t)(uTmp >> cShift); \
2972 *puDst = uResult; \
2973 \
2974 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2975 AssertCompile(X86_EFL_CF_BIT == 0); \
2976 if (a_fIntelFlags) \
2977 { \
2978 /* Intel 10980XE: The CF is the last shifted out of the combined uTmp operand. */ \
2979 fEfl |= (uTmp >> (cShift - 1)) & X86_EFL_CF; \
2980 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2981 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uSrc << 15)); \
2982 } \
2983 else \
2984 { \
2985 /* AMD 3990X: CF flag seems to be last bit shifted out of uDst, not the combined uSrc:uSrc:uDst operand. */ \
2986 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2987 /* AMD 3990X: Set according to last shift. AF always set. */ \
2988 if (cShift > 1) /* Set according to last shift. */ \
2989 fEfl |= X86_EFL_GET_OF_16((uint16_t)(uTmp >> (cShift - 1)) ^ uResult); \
2990 else \
2991 fEfl |= X86_EFL_GET_OF_16(uDst ^ uResult); \
2992 fEfl |= X86_EFL_AF; \
2993 } \
2994 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
2995 fEfl |= X86_EFL_CALC_ZF(uResult); \
2996 fEfl |= g_afParity[uResult & 0xff]; \
2997 *pfEFlags = fEfl; \
2998 } \
2999}
3000
3001#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
3002EMIT_SHRD_16(RT_NOTHING, 1)
3003#endif
3004EMIT_SHRD_16(_intel, 1)
3005EMIT_SHRD_16(_amd, 0)
3006
3007
3008#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
3009
3010# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
3011/*
3012 * BSWAP
3013 */
3014
3015IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u64,(uint64_t *puDst))
3016{
3017 *puDst = ASMByteSwapU64(*puDst);
3018}
3019
3020
3021IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u32,(uint32_t *puDst))
3022{
3023 *puDst = ASMByteSwapU32(*puDst);
3024}
3025
3026
3027/* Note! undocument, so 32-bit arg */
3028IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u16,(uint32_t *puDst))
3029{
3030#if 0
3031 *(uint16_t *)puDst = ASMByteSwapU16(*(uint16_t *)puDst);
3032#else
3033 /* This is the behaviour AMD 3990x (64-bit mode): */
3034 *(uint16_t *)puDst = 0;
3035#endif
3036}
3037
3038# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
3039
3040
3041
3042# if defined(IEM_WITHOUT_ASSEMBLY)
3043
3044/*
3045 * LFENCE, SFENCE & MFENCE.
3046 */
3047
3048IEM_DECL_IMPL_DEF(void, iemAImpl_lfence,(void))
3049{
3050 ASMReadFence();
3051}
3052
3053
3054IEM_DECL_IMPL_DEF(void, iemAImpl_sfence,(void))
3055{
3056 ASMWriteFence();
3057}
3058
3059
3060IEM_DECL_IMPL_DEF(void, iemAImpl_mfence,(void))
3061{
3062 ASMMemoryFence();
3063}
3064
3065
3066# ifndef RT_ARCH_ARM64
3067IEM_DECL_IMPL_DEF(void, iemAImpl_alt_mem_fence,(void))
3068{
3069 ASMMemoryFence();
3070}
3071# endif
3072
3073# endif
3074
3075#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
3076
3077
3078IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
3079{
3080 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
3081 {
3082 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
3083 *pu16Dst |= u16Src & X86_SEL_RPL;
3084
3085 *pfEFlags |= X86_EFL_ZF;
3086 }
3087 else
3088 *pfEFlags &= ~X86_EFL_ZF;
3089}
3090
3091
3092#if defined(IEM_WITHOUT_ASSEMBLY)
3093
3094/*********************************************************************************************************************************
3095* x87 FPU Loads *
3096*********************************************************************************************************************************/
3097
3098IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT32U pr32Val))
3099{
3100 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3101 if (RTFLOAT32U_IS_NORMAL(pr32Val))
3102 {
3103 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3104 pFpuRes->r80Result.sj64.fInteger = 1;
3105 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3106 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
3107 pFpuRes->r80Result.sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
3108 Assert(RTFLOAT80U_IS_NORMAL(&pFpuRes->r80Result));
3109 }
3110 else if (RTFLOAT32U_IS_ZERO(pr32Val))
3111 {
3112 pFpuRes->r80Result.s.fSign = pr32Val->s.fSign;
3113 pFpuRes->r80Result.s.uExponent = 0;
3114 pFpuRes->r80Result.s.uMantissa = 0;
3115 Assert(RTFLOAT80U_IS_ZERO(&pFpuRes->r80Result));
3116 }
3117 else if (RTFLOAT32U_IS_SUBNORMAL(pr32Val))
3118 {
3119 /* Subnormal values gets normalized. */
3120 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3121 pFpuRes->r80Result.sj64.fInteger = 1;
3122 unsigned const cExtraShift = RTFLOAT32U_FRACTION_BITS - ASMBitLastSetU32(pr32Val->s.uFraction);
3123 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3124 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS + cExtraShift + 1);
3125 pFpuRes->r80Result.sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
3126 pFpuRes->FSW |= X86_FSW_DE;
3127 if (!(pFpuState->FCW & X86_FCW_DM))
3128 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B; /* The value is still pushed. */
3129 }
3130 else if (RTFLOAT32U_IS_INF(pr32Val))
3131 {
3132 pFpuRes->r80Result.s.fSign = pr32Val->s.fSign;
3133 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_MAX;
3134 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
3135 Assert(RTFLOAT80U_IS_INF(&pFpuRes->r80Result));
3136 }
3137 else
3138 {
3139 /* Signalling and quiet NaNs, both turn into quiet ones when loaded (weird). */
3140 Assert(RTFLOAT32U_IS_NAN(pr32Val));
3141 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3142 pFpuRes->r80Result.sj64.uExponent = RTFLOAT80U_EXP_MAX;
3143 pFpuRes->r80Result.sj64.fInteger = 1;
3144 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3145 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
3146 if (RTFLOAT32U_IS_SIGNALLING_NAN(pr32Val))
3147 {
3148 pFpuRes->r80Result.sj64.uFraction |= RT_BIT_64(62); /* make quiet */
3149 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3150 pFpuRes->FSW |= X86_FSW_IE;
3151
3152 if (!(pFpuState->FCW & X86_FCW_IM))
3153 {
3154 /* The value is not pushed. */
3155 pFpuRes->FSW &= ~X86_FSW_TOP_MASK;
3156 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B;
3157 pFpuRes->r80Result.au64[0] = 0;
3158 pFpuRes->r80Result.au16[4] = 0;
3159 }
3160 }
3161 else
3162 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3163 }
3164}
3165
3166
3167IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT64U pr64Val))
3168{
3169 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3170 if (RTFLOAT64U_IS_NORMAL(pr64Val))
3171 {
3172 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3173 pFpuRes->r80Result.sj64.fInteger = 1;
3174 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3175 pFpuRes->r80Result.sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
3176 Assert(RTFLOAT80U_IS_NORMAL(&pFpuRes->r80Result));
3177 }
3178 else if (RTFLOAT64U_IS_ZERO(pr64Val))
3179 {
3180 pFpuRes->r80Result.s.fSign = pr64Val->s.fSign;
3181 pFpuRes->r80Result.s.uExponent = 0;
3182 pFpuRes->r80Result.s.uMantissa = 0;
3183 Assert(RTFLOAT80U_IS_ZERO(&pFpuRes->r80Result));
3184 }
3185 else if (RTFLOAT64U_IS_SUBNORMAL(pr64Val))
3186 {
3187 /* Subnormal values gets normalized. */
3188 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3189 pFpuRes->r80Result.sj64.fInteger = 1;
3190 unsigned const cExtraShift = RTFLOAT64U_FRACTION_BITS - ASMBitLastSetU64(pr64Val->s64.uFraction);
3191 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction
3192 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS + cExtraShift + 1);
3193 pFpuRes->r80Result.sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
3194 pFpuRes->FSW |= X86_FSW_DE;
3195 if (!(pFpuState->FCW & X86_FCW_DM))
3196 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B; /* The value is still pushed. */
3197 }
3198 else if (RTFLOAT64U_IS_INF(pr64Val))
3199 {
3200 pFpuRes->r80Result.s.fSign = pr64Val->s.fSign;
3201 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_MAX;
3202 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
3203 Assert(RTFLOAT80U_IS_INF(&pFpuRes->r80Result));
3204 }
3205 else
3206 {
3207 /* Signalling and quiet NaNs, both turn into quiet ones when loaded (weird). */
3208 Assert(RTFLOAT64U_IS_NAN(pr64Val));
3209 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3210 pFpuRes->r80Result.sj64.uExponent = RTFLOAT80U_EXP_MAX;
3211 pFpuRes->r80Result.sj64.fInteger = 1;
3212 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3213 if (RTFLOAT64U_IS_SIGNALLING_NAN(pr64Val))
3214 {
3215 pFpuRes->r80Result.sj64.uFraction |= RT_BIT_64(62); /* make quiet */
3216 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3217 pFpuRes->FSW |= X86_FSW_IE;
3218
3219 if (!(pFpuState->FCW & X86_FCW_IM))
3220 {
3221 /* The value is not pushed. */
3222 pFpuRes->FSW &= ~X86_FSW_TOP_MASK;
3223 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B;
3224 pFpuRes->r80Result.au64[0] = 0;
3225 pFpuRes->r80Result.au16[4] = 0;
3226 }
3227 }
3228 else
3229 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3230 }
3231}
3232
3233
3234IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
3235{
3236 pFpuRes->r80Result.au64[0] = pr80Val->au64[0];
3237 pFpuRes->r80Result.au16[4] = pr80Val->au16[4];
3238 /* Raises no exceptions. */
3239 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3240}
3241
3242
3243IEM_DECL_IMPL_DEF(void, iemAImpl_fld1,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3244{
3245 pFpuRes->r80Result.sj64.fSign = 0;
3246 pFpuRes->r80Result.sj64.uExponent = 0 + 16383;
3247 pFpuRes->r80Result.sj64.fInteger = 1;
3248 pFpuRes->r80Result.sj64.uFraction = 0;
3249
3250 /*
3251 * FPU status word:
3252 * - TOP is irrelevant, but we must match x86 assembly version.
3253 * - C1 is always cleared as we don't have any stack overflows.
3254 * - C0, C2, and C3 are undefined and Intel 10980XE does not touch them.
3255 */
3256 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3257}
3258
3259
3260IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2e,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3261{
3262 pFpuRes->r80Result.sj64.fSign = 0;
3263 pFpuRes->r80Result.sj64.uExponent = 0 + 16383;
3264 pFpuRes->r80Result.sj64.fInteger = 1;
3265 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3266 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3267 ? UINT64_C(0x38aa3b295c17f0bc) : UINT64_C(0x38aa3b295c17f0bb);
3268 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3269}
3270
3271
3272IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2t,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3273{
3274 pFpuRes->r80Result.sj64.fSign = 0;
3275 pFpuRes->r80Result.sj64.uExponent = 1 + 16383;
3276 pFpuRes->r80Result.sj64.fInteger = 1;
3277 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) != X86_FCW_RC_UP
3278 ? UINT64_C(0x549a784bcd1b8afe) : UINT64_C(0x549a784bcd1b8aff);
3279 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3280}
3281
3282
3283IEM_DECL_IMPL_DEF(void, iemAImpl_fldlg2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3284{
3285 pFpuRes->r80Result.sj64.fSign = 0;
3286 pFpuRes->r80Result.sj64.uExponent = -2 + 16383;
3287 pFpuRes->r80Result.sj64.fInteger = 1;
3288 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3289 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3290 ? UINT64_C(0x1a209a84fbcff799) : UINT64_C(0x1a209a84fbcff798);
3291 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3292}
3293
3294
3295IEM_DECL_IMPL_DEF(void, iemAImpl_fldln2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3296{
3297 pFpuRes->r80Result.sj64.fSign = 0;
3298 pFpuRes->r80Result.sj64.uExponent = -1 + 16383;
3299 pFpuRes->r80Result.sj64.fInteger = 1;
3300 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3301 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3302 ? UINT64_C(0x317217f7d1cf79ac) : UINT64_C(0x317217f7d1cf79ab);
3303 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3304}
3305
3306
3307IEM_DECL_IMPL_DEF(void, iemAImpl_fldpi,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3308{
3309 pFpuRes->r80Result.sj64.fSign = 0;
3310 pFpuRes->r80Result.sj64.uExponent = 1 + 16383;
3311 pFpuRes->r80Result.sj64.fInteger = 1;
3312 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3313 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3314 ? UINT64_C(0x490fdaa22168c235) : UINT64_C(0x490fdaa22168c234);
3315 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3316}
3317
3318
3319IEM_DECL_IMPL_DEF(void, iemAImpl_fldz,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3320{
3321 pFpuRes->r80Result.s.fSign = 0;
3322 pFpuRes->r80Result.s.uExponent = 0;
3323 pFpuRes->r80Result.s.uMantissa = 0;
3324 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3325}
3326
3327#define EMIT_FILD(a_cBits) \
3328IEM_DECL_IMPL_DEF(void, iemAImpl_fild_r80_from_i ## a_cBits,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, \
3329 int ## a_cBits ## _t const *piVal)) \
3330{ \
3331 int ## a_cBits ## _t iVal = *piVal; \
3332 if (iVal == 0) \
3333 { \
3334 pFpuRes->r80Result.s.fSign = 0; \
3335 pFpuRes->r80Result.s.uExponent = 0; \
3336 pFpuRes->r80Result.s.uMantissa = 0; \
3337 } \
3338 else \
3339 { \
3340 if (iVal > 0) \
3341 pFpuRes->r80Result.s.fSign = 0; \
3342 else \
3343 { \
3344 pFpuRes->r80Result.s.fSign = 1; \
3345 iVal = -iVal; \
3346 } \
3347 unsigned const cBits = ASMBitLastSetU ## a_cBits((uint ## a_cBits ## _t)iVal); \
3348 pFpuRes->r80Result.s.uExponent = cBits - 1 + RTFLOAT80U_EXP_BIAS; \
3349 pFpuRes->r80Result.s.uMantissa = (uint64_t)iVal << (RTFLOAT80U_FRACTION_BITS + 1 - cBits); \
3350 } \
3351 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */ \
3352}
3353EMIT_FILD(16)
3354EMIT_FILD(32)
3355EMIT_FILD(64)
3356
3357
3358IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_d80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTPBCD80U pd80Val))
3359{
3360 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3361 if ( pd80Val->s.abPairs[0] == 0
3362 && pd80Val->s.abPairs[1] == 0
3363 && pd80Val->s.abPairs[2] == 0
3364 && pd80Val->s.abPairs[3] == 0
3365 && pd80Val->s.abPairs[4] == 0
3366 && pd80Val->s.abPairs[5] == 0
3367 && pd80Val->s.abPairs[6] == 0
3368 && pd80Val->s.abPairs[7] == 0
3369 && pd80Val->s.abPairs[8] == 0)
3370 {
3371 pFpuRes->r80Result.s.fSign = pd80Val->s.fSign;
3372 pFpuRes->r80Result.s.uExponent = 0;
3373 pFpuRes->r80Result.s.uMantissa = 0;
3374 }
3375 else
3376 {
3377 pFpuRes->r80Result.s.fSign = pd80Val->s.fSign;
3378
3379 size_t cPairs = RT_ELEMENTS(pd80Val->s.abPairs);
3380 while (cPairs > 0 && pd80Val->s.abPairs[cPairs - 1] == 0)
3381 cPairs--;
3382
3383 uint64_t uVal = 0;
3384 uint64_t uFactor = 1;
3385 for (size_t iPair = 0; iPair < cPairs; iPair++, uFactor *= 100)
3386 uVal += RTPBCD80U_LO_DIGIT(pd80Val->s.abPairs[iPair]) * uFactor
3387 + RTPBCD80U_HI_DIGIT(pd80Val->s.abPairs[iPair]) * uFactor * 10;
3388
3389 unsigned const cBits = ASMBitLastSetU64(uVal);
3390 pFpuRes->r80Result.s.uExponent = cBits - 1 + RTFLOAT80U_EXP_BIAS;
3391 pFpuRes->r80Result.s.uMantissa = uVal << (RTFLOAT80U_FRACTION_BITS + 1 - cBits);
3392 }
3393}
3394
3395
3396/*********************************************************************************************************************************
3397* x87 FPU Stores *
3398*********************************************************************************************************************************/
3399
3400/**
3401 * Helper for storing a deconstructed and normal R80 value as a 64-bit one.
3402 *
3403 * This uses the rounding rules indicated by fFcw and returns updated fFsw.
3404 *
3405 * @returns Updated FPU status word value.
3406 * @param fSignIn Incoming sign indicator.
3407 * @param uMantissaIn Incoming mantissa (dot between bit 63 and 62).
3408 * @param iExponentIn Unbiased exponent.
3409 * @param fFcw The FPU control word.
3410 * @param fFsw Prepped FPU status word, i.e. exceptions and C1 clear.
3411 * @param pr32Dst Where to return the output value, if one should be
3412 * returned.
3413 *
3414 * @note Tailored as a helper for iemAImpl_fst_r80_to_r32 right now.
3415 * @note Exact same logic as iemAImpl_StoreNormalR80AsR64.
3416 */
3417static uint16_t iemAImpl_StoreNormalR80AsR32(bool fSignIn, uint64_t uMantissaIn, int32_t iExponentIn,
3418 uint16_t fFcw, uint16_t fFsw, PRTFLOAT32U pr32Dst)
3419{
3420 uint64_t const fRoundingOffMask = RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS) - 1; /* 0x7ff */
3421 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3422 ? RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS - 1) /* 0x400 */
3423 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
3424 ? fRoundingOffMask
3425 : 0;
3426 uint64_t fRoundedOff = uMantissaIn & fRoundingOffMask;
3427
3428 /*
3429 * Deal with potential overflows/underflows first, optimizing for none.
3430 * 0 and MAX are used for special values; MAX-1 may be rounded up to MAX.
3431 */
3432 int32_t iExponentOut = (int32_t)iExponentIn + RTFLOAT32U_EXP_BIAS;
3433 if ((uint32_t)iExponentOut - 1 < (uint32_t)(RTFLOAT32U_EXP_MAX - 3))
3434 { /* likely? */ }
3435 /*
3436 * Underflow if the exponent zero or negative. This is attempted mapped
3437 * to a subnormal number when possible, with some additional trickery ofc.
3438 */
3439 else if (iExponentOut <= 0)
3440 {
3441 bool const fIsTiny = iExponentOut < 0
3442 || UINT64_MAX - uMantissaIn > uRoundingAdd;
3443 if (!(fFcw & X86_FCW_UM) && fIsTiny)
3444 /* Note! 754-1985 sec 7.4 has something about bias adjust of 192 here, not in 2008 & 2019. Perhaps only 8087 & 287? */
3445 return fFsw | X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3446
3447 if (iExponentOut <= 0)
3448 {
3449 uMantissaIn = iExponentOut <= -63
3450 ? uMantissaIn != 0
3451 : (uMantissaIn >> (-iExponentOut + 1)) | ((uMantissaIn & (RT_BIT_64(-iExponentOut + 1) - 1)) != 0);
3452 fRoundedOff = uMantissaIn & fRoundingOffMask;
3453 if (fRoundedOff && fIsTiny)
3454 fFsw |= X86_FSW_UE;
3455 iExponentOut = 0;
3456 }
3457 }
3458 /*
3459 * Overflow if at or above max exponent value or if we will reach max
3460 * when rounding. Will return +/-zero or +/-max value depending on
3461 * whether we're rounding or not.
3462 */
3463 else if ( iExponentOut >= RTFLOAT32U_EXP_MAX
3464 || ( iExponentOut == RTFLOAT32U_EXP_MAX - 1
3465 && UINT64_MAX - uMantissaIn <= uRoundingAdd))
3466 {
3467 fFsw |= X86_FSW_OE;
3468 if (!(fFcw & X86_FCW_OM))
3469 return fFsw | X86_FSW_ES | X86_FSW_B;
3470 fFsw |= X86_FSW_PE;
3471 if (uRoundingAdd)
3472 fFsw |= X86_FSW_C1;
3473 if (!(fFcw & X86_FCW_PM))
3474 fFsw |= X86_FSW_ES | X86_FSW_B;
3475
3476 pr32Dst->s.fSign = fSignIn;
3477 if (uRoundingAdd)
3478 { /* Zero */
3479 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3480 pr32Dst->s.uFraction = 0;
3481 }
3482 else
3483 { /* Max */
3484 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX - 1;
3485 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1;
3486 }
3487 return fFsw;
3488 }
3489
3490 /*
3491 * Normal or subnormal number.
3492 */
3493 /* Do rounding - just truncate in near mode when midway on an even outcome. */
3494 uint64_t uMantissaOut = uMantissaIn;
3495 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
3496 || (uMantissaIn & RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS))
3497 || fRoundedOff != uRoundingAdd)
3498 {
3499 uMantissaOut = uMantissaIn + uRoundingAdd;
3500 if (uMantissaOut >= uMantissaIn)
3501 { /* likely */ }
3502 else
3503 {
3504 uMantissaOut >>= 1; /* (We don't need to add bit 63 here (the integer bit), as it will be chopped off below.) */
3505 iExponentOut++;
3506 Assert(iExponentOut < RTFLOAT32U_EXP_MAX); /* checked above */
3507 fFsw |= X86_FSW_C1;
3508 }
3509 }
3510 else
3511 uMantissaOut = uMantissaIn;
3512
3513 /* Truncate the mantissa and set the return value. */
3514 uMantissaOut >>= RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS;
3515
3516 pr32Dst->s.uFraction = (uint32_t)uMantissaOut; /* Note! too big for bitfield if normal. */
3517 pr32Dst->s.uExponent = iExponentOut;
3518 pr32Dst->s.fSign = fSignIn;
3519
3520 /* Set status flags realted to rounding. */
3521 if (fRoundedOff)
3522 {
3523 fFsw |= X86_FSW_PE;
3524 if (uMantissaOut > (uMantissaIn >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS)))
3525 fFsw |= X86_FSW_C1;
3526 if (!(fFcw & X86_FCW_PM))
3527 fFsw |= X86_FSW_ES | X86_FSW_B;
3528 }
3529
3530 return fFsw;
3531}
3532
3533
3534/**
3535 * @note Exact same logic as iemAImpl_fst_r80_to_r64.
3536 */
3537IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r32,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3538 PRTFLOAT32U pr32Dst, PCRTFLOAT80U pr80Src))
3539{
3540 uint16_t const fFcw = pFpuState->FCW;
3541 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3542 if (RTFLOAT80U_IS_NORMAL(pr80Src))
3543 fFsw = iemAImpl_StoreNormalR80AsR32(pr80Src->s.fSign, pr80Src->s.uMantissa,
3544 (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS, fFcw, fFsw, pr32Dst);
3545 else if (RTFLOAT80U_IS_ZERO(pr80Src))
3546 {
3547 pr32Dst->s.fSign = pr80Src->s.fSign;
3548 pr32Dst->s.uExponent = 0;
3549 pr32Dst->s.uFraction = 0;
3550 Assert(RTFLOAT32U_IS_ZERO(pr32Dst));
3551 }
3552 else if (RTFLOAT80U_IS_INF(pr80Src))
3553 {
3554 pr32Dst->s.fSign = pr80Src->s.fSign;
3555 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3556 pr32Dst->s.uFraction = 0;
3557 Assert(RTFLOAT32U_IS_INF(pr32Dst));
3558 }
3559 else if (RTFLOAT80U_IS_INDEFINITE(pr80Src))
3560 {
3561 /* Mapped to +/-QNaN */
3562 pr32Dst->s.fSign = pr80Src->s.fSign;
3563 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3564 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3565 }
3566 else if (RTFLOAT80U_IS_PSEUDO_INF(pr80Src) || RTFLOAT80U_IS_UNNORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_NAN(pr80Src))
3567 {
3568 /* Pseudo-Inf / Pseudo-Nan / Unnormal -> QNaN (during load, probably) */
3569 if (fFcw & X86_FCW_IM)
3570 {
3571 pr32Dst->s.fSign = 1;
3572 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3573 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3574 fFsw |= X86_FSW_IE;
3575 }
3576 else
3577 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;;
3578 }
3579 else if (RTFLOAT80U_IS_NAN(pr80Src))
3580 {
3581 /* IM applies to signalled NaN input only. Everything is converted to quiet NaN. */
3582 if ((fFcw & X86_FCW_IM) || !RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3583 {
3584 pr32Dst->s.fSign = pr80Src->s.fSign;
3585 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3586 pr32Dst->s.uFraction = (uint32_t)(pr80Src->sj64.uFraction >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS));
3587 pr32Dst->s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3588 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3589 fFsw |= X86_FSW_IE;
3590 }
3591 else
3592 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;
3593 }
3594 else
3595 {
3596 /* Denormal values causes both an underflow and precision exception. */
3597 Assert(RTFLOAT80U_IS_DENORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src));
3598 if (fFcw & X86_FCW_UM)
3599 {
3600 pr32Dst->s.fSign = pr80Src->s.fSign;
3601 pr32Dst->s.uExponent = 0;
3602 if ((fFcw & X86_FCW_RC_MASK) == (!pr80Src->s.fSign ? X86_FCW_RC_UP : X86_FCW_RC_DOWN))
3603 {
3604 pr32Dst->s.uFraction = 1;
3605 fFsw |= X86_FSW_UE | X86_FSW_PE | X86_FSW_C1;
3606 if (!(fFcw & X86_FCW_PM))
3607 fFsw |= X86_FSW_ES | X86_FSW_B;
3608 }
3609 else
3610 {
3611 pr32Dst->s.uFraction = 0;
3612 fFsw |= X86_FSW_UE | X86_FSW_PE;
3613 if (!(fFcw & X86_FCW_PM))
3614 fFsw |= X86_FSW_ES | X86_FSW_B;
3615 }
3616 }
3617 else
3618 fFsw |= X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3619 }
3620 *pu16FSW = fFsw;
3621}
3622
3623
3624/**
3625 * Helper for storing a deconstructed and normal R80 value as a 64-bit one.
3626 *
3627 * This uses the rounding rules indicated by fFcw and returns updated fFsw.
3628 *
3629 * @returns Updated FPU status word value.
3630 * @param fSignIn Incoming sign indicator.
3631 * @param uMantissaIn Incoming mantissa (dot between bit 63 and 62).
3632 * @param iExponentIn Unbiased exponent.
3633 * @param fFcw The FPU control word.
3634 * @param fFsw Prepped FPU status word, i.e. exceptions and C1 clear.
3635 * @param pr64Dst Where to return the output value, if one should be
3636 * returned.
3637 *
3638 * @note Tailored as a helper for iemAImpl_fst_r80_to_r64 right now.
3639 * @note Exact same logic as iemAImpl_StoreNormalR80AsR32.
3640 */
3641static uint16_t iemAImpl_StoreNormalR80AsR64(bool fSignIn, uint64_t uMantissaIn, int32_t iExponentIn,
3642 uint16_t fFcw, uint16_t fFsw, PRTFLOAT64U pr64Dst)
3643{
3644 uint64_t const fRoundingOffMask = RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS) - 1; /* 0x7ff */
3645 uint32_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3646 ? RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS - 1) /* 0x400 */
3647 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
3648 ? fRoundingOffMask
3649 : 0;
3650 uint32_t fRoundedOff = uMantissaIn & fRoundingOffMask;
3651
3652 /*
3653 * Deal with potential overflows/underflows first, optimizing for none.
3654 * 0 and MAX are used for special values; MAX-1 may be rounded up to MAX.
3655 */
3656 int32_t iExponentOut = (int32_t)iExponentIn + RTFLOAT64U_EXP_BIAS;
3657 if ((uint32_t)iExponentOut - 1 < (uint32_t)(RTFLOAT64U_EXP_MAX - 3))
3658 { /* likely? */ }
3659 /*
3660 * Underflow if the exponent zero or negative. This is attempted mapped
3661 * to a subnormal number when possible, with some additional trickery ofc.
3662 */
3663 else if (iExponentOut <= 0)
3664 {
3665 bool const fIsTiny = iExponentOut < 0
3666 || UINT64_MAX - uMantissaIn > uRoundingAdd;
3667 if (!(fFcw & X86_FCW_UM) && fIsTiny)
3668 /* Note! 754-1985 sec 7.4 has something about bias adjust of 1536 here, not in 2008 & 2019. Perhaps only 8087 & 287? */
3669 return fFsw | X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3670
3671 if (iExponentOut <= 0)
3672 {
3673 uMantissaIn = iExponentOut <= -63
3674 ? uMantissaIn != 0
3675 : (uMantissaIn >> (-iExponentOut + 1)) | ((uMantissaIn & (RT_BIT_64(-iExponentOut + 1) - 1)) != 0);
3676 fRoundedOff = uMantissaIn & fRoundingOffMask;
3677 if (fRoundedOff && fIsTiny)
3678 fFsw |= X86_FSW_UE;
3679 iExponentOut = 0;
3680 }
3681 }
3682 /*
3683 * Overflow if at or above max exponent value or if we will reach max
3684 * when rounding. Will return +/-zero or +/-max value depending on
3685 * whether we're rounding or not.
3686 */
3687 else if ( iExponentOut >= RTFLOAT64U_EXP_MAX
3688 || ( iExponentOut == RTFLOAT64U_EXP_MAX - 1
3689 && UINT64_MAX - uMantissaIn <= uRoundingAdd))
3690 {
3691 fFsw |= X86_FSW_OE;
3692 if (!(fFcw & X86_FCW_OM))
3693 return fFsw | X86_FSW_ES | X86_FSW_B;
3694 fFsw |= X86_FSW_PE;
3695 if (uRoundingAdd)
3696 fFsw |= X86_FSW_C1;
3697 if (!(fFcw & X86_FCW_PM))
3698 fFsw |= X86_FSW_ES | X86_FSW_B;
3699
3700 pr64Dst->s64.fSign = fSignIn;
3701 if (uRoundingAdd)
3702 { /* Zero */
3703 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3704 pr64Dst->s64.uFraction = 0;
3705 }
3706 else
3707 { /* Max */
3708 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX - 1;
3709 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1;
3710 }
3711 return fFsw;
3712 }
3713
3714 /*
3715 * Normal or subnormal number.
3716 */
3717 /* Do rounding - just truncate in near mode when midway on an even outcome. */
3718 uint64_t uMantissaOut = uMantissaIn;
3719 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
3720 || (uMantissaIn & RT_BIT_32(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS))
3721 || fRoundedOff != uRoundingAdd)
3722 {
3723 uMantissaOut = uMantissaIn + uRoundingAdd;
3724 if (uMantissaOut >= uMantissaIn)
3725 { /* likely */ }
3726 else
3727 {
3728 uMantissaOut >>= 1; /* (We don't need to add bit 63 here (the integer bit), as it will be chopped off below.) */
3729 iExponentOut++;
3730 Assert(iExponentOut < RTFLOAT64U_EXP_MAX); /* checked above */
3731 fFsw |= X86_FSW_C1;
3732 }
3733 }
3734 else
3735 uMantissaOut = uMantissaIn;
3736
3737 /* Truncate the mantissa and set the return value. */
3738 uMantissaOut >>= RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS;
3739
3740 pr64Dst->s64.uFraction = uMantissaOut; /* Note! too big for bitfield if normal. */
3741 pr64Dst->s64.uExponent = iExponentOut;
3742 pr64Dst->s64.fSign = fSignIn;
3743
3744 /* Set status flags realted to rounding. */
3745 if (fRoundedOff)
3746 {
3747 fFsw |= X86_FSW_PE;
3748 if (uMantissaOut > (uMantissaIn >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS)))
3749 fFsw |= X86_FSW_C1;
3750 if (!(fFcw & X86_FCW_PM))
3751 fFsw |= X86_FSW_ES | X86_FSW_B;
3752 }
3753
3754 return fFsw;
3755}
3756
3757
3758/**
3759 * @note Exact same logic as iemAImpl_fst_r80_to_r32.
3760 */
3761IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r64,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3762 PRTFLOAT64U pr64Dst, PCRTFLOAT80U pr80Src))
3763{
3764 uint16_t const fFcw = pFpuState->FCW;
3765 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3766 if (RTFLOAT80U_IS_NORMAL(pr80Src))
3767 fFsw = iemAImpl_StoreNormalR80AsR64(pr80Src->s.fSign, pr80Src->s.uMantissa,
3768 (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS, fFcw, fFsw, pr64Dst);
3769 else if (RTFLOAT80U_IS_ZERO(pr80Src))
3770 {
3771 pr64Dst->s64.fSign = pr80Src->s.fSign;
3772 pr64Dst->s64.uExponent = 0;
3773 pr64Dst->s64.uFraction = 0;
3774 Assert(RTFLOAT64U_IS_ZERO(pr64Dst));
3775 }
3776 else if (RTFLOAT80U_IS_INF(pr80Src))
3777 {
3778 pr64Dst->s64.fSign = pr80Src->s.fSign;
3779 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3780 pr64Dst->s64.uFraction = 0;
3781 Assert(RTFLOAT64U_IS_INF(pr64Dst));
3782 }
3783 else if (RTFLOAT80U_IS_INDEFINITE(pr80Src))
3784 {
3785 /* Mapped to +/-QNaN */
3786 pr64Dst->s64.fSign = pr80Src->s.fSign;
3787 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3788 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3789 }
3790 else if (RTFLOAT80U_IS_PSEUDO_INF(pr80Src) || RTFLOAT80U_IS_UNNORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_NAN(pr80Src))
3791 {
3792 /* Pseudo-Inf / Pseudo-Nan / Unnormal -> QNaN (during load, probably) */
3793 if (fFcw & X86_FCW_IM)
3794 {
3795 pr64Dst->s64.fSign = 1;
3796 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3797 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3798 fFsw |= X86_FSW_IE;
3799 }
3800 else
3801 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;;
3802 }
3803 else if (RTFLOAT80U_IS_NAN(pr80Src))
3804 {
3805 /* IM applies to signalled NaN input only. Everything is converted to quiet NaN. */
3806 if ((fFcw & X86_FCW_IM) || !RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3807 {
3808 pr64Dst->s64.fSign = pr80Src->s.fSign;
3809 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3810 pr64Dst->s64.uFraction = pr80Src->sj64.uFraction >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3811 pr64Dst->s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3812 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3813 fFsw |= X86_FSW_IE;
3814 }
3815 else
3816 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;
3817 }
3818 else
3819 {
3820 /* Denormal values causes both an underflow and precision exception. */
3821 Assert(RTFLOAT80U_IS_DENORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src));
3822 if (fFcw & X86_FCW_UM)
3823 {
3824 pr64Dst->s64.fSign = pr80Src->s.fSign;
3825 pr64Dst->s64.uExponent = 0;
3826 if ((fFcw & X86_FCW_RC_MASK) == (!pr80Src->s.fSign ? X86_FCW_RC_UP : X86_FCW_RC_DOWN))
3827 {
3828 pr64Dst->s64.uFraction = 1;
3829 fFsw |= X86_FSW_UE | X86_FSW_PE | X86_FSW_C1;
3830 if (!(fFcw & X86_FCW_PM))
3831 fFsw |= X86_FSW_ES | X86_FSW_B;
3832 }
3833 else
3834 {
3835 pr64Dst->s64.uFraction = 0;
3836 fFsw |= X86_FSW_UE | X86_FSW_PE;
3837 if (!(fFcw & X86_FCW_PM))
3838 fFsw |= X86_FSW_ES | X86_FSW_B;
3839 }
3840 }
3841 else
3842 fFsw |= X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3843 }
3844 *pu16FSW = fFsw;
3845}
3846
3847
3848IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3849 PRTFLOAT80U pr80Dst, PCRTFLOAT80U pr80Src))
3850{
3851 /*
3852 * FPU status word:
3853 * - TOP is irrelevant, but we must match x86 assembly version (0).
3854 * - C1 is always cleared as we don't have any stack overflows.
3855 * - C0, C2, and C3 are undefined and Intel 10980XE does not touch them.
3856 */
3857 *pu16FSW = pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3); /* see iemAImpl_fld1 */
3858 *pr80Dst = *pr80Src;
3859}
3860
3861
3862/*
3863 *
3864 * Mantissa:
3865 * 63 56 48 40 32 24 16 8 0
3866 * v v v v v v v v v
3867 * 1[.]111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000
3868 * \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \
3869 * Exp: 0 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60
3870 *
3871 * int64_t has the same width, only bit 63 is the sign bit. So, the max we can map over
3872 * are bits 1 thru 63, dropping off bit 0, with an exponent of 62. The number of bits we
3873 * drop off from the mantissa increases with decreasing exponent, till an exponent of 0
3874 * where we'll drop off all but bit 63.
3875 */
3876#define EMIT_FIST(a_cBits, a_iType, a_iTypeMin, a_iTypeIndefinite) \
3877IEM_DECL_IMPL_DEF(void, iemAImpl_fist_r80_to_i ## a_cBits,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, \
3878 a_iType *piDst, PCRTFLOAT80U pr80Val)) \
3879{ \
3880 uint16_t const fFcw = pFpuState->FCW; \
3881 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); \
3882 bool const fSignIn = pr80Val->s.fSign; \
3883 \
3884 /* \
3885 * Deal with normal numbers first. \
3886 */ \
3887 if (RTFLOAT80U_IS_NORMAL(pr80Val)) \
3888 { \
3889 uint64_t uMantissa = pr80Val->s.uMantissa; \
3890 int32_t iExponent = (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS; \
3891 \
3892 if ((uint32_t)iExponent <= a_cBits - 2) \
3893 { \
3894 unsigned const cShiftOff = 63 - iExponent; \
3895 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1; \
3896 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST \
3897 ? RT_BIT_64(cShiftOff - 1) \
3898 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP) \
3899 ? fRoundingOffMask \
3900 : 0; \
3901 uint64_t fRoundedOff = uMantissa & fRoundingOffMask; \
3902 \
3903 uMantissa >>= cShiftOff; \
3904 uint64_t const uRounding = (fRoundedOff + uRoundingAdd) >> cShiftOff; \
3905 uMantissa += uRounding; \
3906 if (!(uMantissa & RT_BIT_64(a_cBits - 1))) \
3907 { \
3908 if (fRoundedOff) \
3909 { \
3910 if ((uMantissa & 1) && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST && fRoundedOff == uRoundingAdd) \
3911 uMantissa &= ~(uint64_t)1; /* round to even number if equal distance between up/down. */ \
3912 else if (uRounding) \
3913 fFsw |= X86_FSW_C1; \
3914 fFsw |= X86_FSW_PE; \
3915 if (!(fFcw & X86_FCW_PM)) \
3916 fFsw |= X86_FSW_ES | X86_FSW_B; \
3917 } \
3918 \
3919 if (!fSignIn) \
3920 *piDst = (a_iType)uMantissa; \
3921 else \
3922 *piDst = -(a_iType)uMantissa; \
3923 } \
3924 else \
3925 { \
3926 /* overflowed after rounding. */ \
3927 AssertMsg(iExponent == a_cBits - 2 && uMantissa == RT_BIT_64(a_cBits - 1), \
3928 ("e=%d m=%#RX64 (org %#RX64) s=%d; shift=%d ro=%#RX64 rm=%#RX64 ra=%#RX64\n", iExponent, uMantissa, \
3929 pr80Val->s.uMantissa, fSignIn, cShiftOff, fRoundedOff, fRoundingOffMask, uRoundingAdd)); \
3930 \
3931 /* Special case for the integer minimum value. */ \
3932 if (fSignIn) \
3933 { \
3934 *piDst = a_iTypeMin; \
3935 fFsw |= X86_FSW_PE | X86_FSW_C1; \
3936 if (!(fFcw & X86_FCW_PM)) \
3937 fFsw |= X86_FSW_ES | X86_FSW_B; \
3938 } \
3939 else \
3940 { \
3941 fFsw |= X86_FSW_IE; \
3942 if (fFcw & X86_FCW_IM) \
3943 *piDst = a_iTypeMin; \
3944 else \
3945 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
3946 } \
3947 } \
3948 } \
3949 /* \
3950 * Tiny sub-zero numbers. \
3951 */ \
3952 else if (iExponent < 0) \
3953 { \
3954 if (!fSignIn) \
3955 { \
3956 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP \
3957 || (iExponent == -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST)) \
3958 { \
3959 *piDst = 1; \
3960 fFsw |= X86_FSW_C1; \
3961 } \
3962 else \
3963 *piDst = 0; \
3964 } \
3965 else \
3966 { \
3967 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP \
3968 || (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_ZERO \
3969 || (iExponent < -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST)) \
3970 *piDst = 0; \
3971 else \
3972 { \
3973 *piDst = -1; \
3974 fFsw |= X86_FSW_C1; \
3975 } \
3976 } \
3977 fFsw |= X86_FSW_PE; \
3978 if (!(fFcw & X86_FCW_PM)) \
3979 fFsw |= X86_FSW_ES | X86_FSW_B; \
3980 } \
3981 /* \
3982 * Special MIN case. \
3983 */ \
3984 else if ( fSignIn && iExponent == a_cBits - 1 \
3985 && ( a_cBits < 64 && (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_DOWN \
3986 ? uMantissa < (RT_BIT_64(63) | RT_BIT_64(65 - a_cBits)) \
3987 : uMantissa == RT_BIT_64(63))) \
3988 { \
3989 *piDst = a_iTypeMin; \
3990 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
3991 { \
3992 fFsw |= X86_FSW_PE; \
3993 if (!(fFcw & X86_FCW_PM)) \
3994 fFsw |= X86_FSW_ES | X86_FSW_B; \
3995 } \
3996 } \
3997 /* \
3998 * Too large/small number outside the target integer range. \
3999 */ \
4000 else \
4001 { \
4002 fFsw |= X86_FSW_IE; \
4003 if (fFcw & X86_FCW_IM) \
4004 *piDst = a_iTypeIndefinite; \
4005 else \
4006 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4007 } \
4008 } \
4009 /* \
4010 * Map both +0 and -0 to integer zero (signless/+). \
4011 */ \
4012 else if (RTFLOAT80U_IS_ZERO(pr80Val)) \
4013 *piDst = 0; \
4014 /* \
4015 * Denormals are just really tiny sub-zero numbers that are either rounded \
4016 * to zero, 1 or -1 depending on sign and rounding control. \
4017 */ \
4018 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) || RTFLOAT80U_IS_DENORMAL(pr80Val)) \
4019 { \
4020 if ((fFcw & X86_FCW_RC_MASK) != (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)) \
4021 *piDst = 0; \
4022 else \
4023 { \
4024 *piDst = fSignIn ? -1 : 1; \
4025 fFsw |= X86_FSW_C1; \
4026 } \
4027 fFsw |= X86_FSW_PE; \
4028 if (!(fFcw & X86_FCW_PM)) \
4029 fFsw |= X86_FSW_ES | X86_FSW_B; \
4030 } \
4031 /* \
4032 * All other special values are considered invalid arguments and result \
4033 * in an IE exception and indefinite value if masked. \
4034 */ \
4035 else \
4036 { \
4037 fFsw |= X86_FSW_IE; \
4038 if (fFcw & X86_FCW_IM) \
4039 *piDst = a_iTypeIndefinite; \
4040 else \
4041 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4042 } \
4043 *pu16FSW = fFsw; \
4044}
4045EMIT_FIST(64, int64_t, INT64_MIN, X86_FPU_INT64_INDEFINITE)
4046EMIT_FIST(32, int32_t, INT32_MIN, X86_FPU_INT32_INDEFINITE)
4047EMIT_FIST(16, int16_t, INT16_MIN, X86_FPU_INT16_INDEFINITE)
4048
4049#endif /*IEM_WITHOUT_ASSEMBLY */
4050
4051
4052/*
4053 * The FISTT instruction was added with SSE3 and are a lot simpler than FIST.
4054 *
4055 * The 16-bit version is a bit peculiar, though, as it seems to be raising IE
4056 * as if it was the 32-bit version (i.e. starting with exp 31 instead of 15),
4057 * thus the @a a_cBitsIn.
4058 */
4059#define EMIT_FISTT(a_cBits, a_cBitsIn, a_iType, a_iTypeMin, a_iTypeMax, a_iTypeIndefinite, a_Suffix, a_fIntelVersion) \
4060IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_fistt_r80_to_i,a_cBits,a_Suffix),(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, \
4061 a_iType *piDst, PCRTFLOAT80U pr80Val)) \
4062{ \
4063 uint16_t const fFcw = pFpuState->FCW; \
4064 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); \
4065 bool const fSignIn = pr80Val->s.fSign; \
4066 \
4067 /* \
4068 * Deal with normal numbers first. \
4069 */ \
4070 if (RTFLOAT80U_IS_NORMAL(pr80Val)) \
4071 { \
4072 uint64_t uMantissa = pr80Val->s.uMantissa; \
4073 int32_t iExponent = (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS; \
4074 \
4075 if ((uint32_t)iExponent <= a_cBitsIn - 2) \
4076 { \
4077 unsigned const cShiftOff = 63 - iExponent; \
4078 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1; \
4079 uint64_t const fRoundedOff = uMantissa & fRoundingOffMask; \
4080 uMantissa >>= cShiftOff; \
4081 /*Assert(!(uMantissa & RT_BIT_64(a_cBits - 1)));*/ \
4082 if (!fSignIn) \
4083 *piDst = (a_iType)uMantissa; \
4084 else \
4085 *piDst = -(a_iType)uMantissa; \
4086 \
4087 if (fRoundedOff) \
4088 { \
4089 fFsw |= X86_FSW_PE; \
4090 if (!(fFcw & X86_FCW_PM)) \
4091 fFsw |= X86_FSW_ES | X86_FSW_B; \
4092 } \
4093 } \
4094 /* \
4095 * Tiny sub-zero numbers. \
4096 */ \
4097 else if (iExponent < 0) \
4098 { \
4099 *piDst = 0; \
4100 fFsw |= X86_FSW_PE; \
4101 if (!(fFcw & X86_FCW_PM)) \
4102 fFsw |= X86_FSW_ES | X86_FSW_B; \
4103 } \
4104 /* \
4105 * Special MIN case. \
4106 */ \
4107 else if ( fSignIn && iExponent == a_cBits - 1 \
4108 && (a_cBits < 64 \
4109 ? uMantissa < (RT_BIT_64(63) | RT_BIT_64(65 - a_cBits)) \
4110 : uMantissa == RT_BIT_64(63)) ) \
4111 { \
4112 *piDst = a_iTypeMin; \
4113 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
4114 { \
4115 fFsw |= X86_FSW_PE; \
4116 if (!(fFcw & X86_FCW_PM)) \
4117 fFsw |= X86_FSW_ES | X86_FSW_B; \
4118 } \
4119 } \
4120 /* \
4121 * Figure this weirdness. \
4122 */ \
4123 else if (a_cBits == 16 && fSignIn && iExponent == 31 && uMantissa < UINT64_C(0x8000100000000000) ) \
4124 { \
4125 *piDst = 0; \
4126 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
4127 { \
4128 fFsw |= X86_FSW_PE; \
4129 if (!(fFcw & X86_FCW_PM)) \
4130 fFsw |= X86_FSW_ES | X86_FSW_B; \
4131 } \
4132 } \
4133 /* \
4134 * Too large/small number outside the target integer range. \
4135 */ \
4136 else \
4137 { \
4138 fFsw |= X86_FSW_IE; \
4139 if (fFcw & X86_FCW_IM) \
4140 *piDst = a_iTypeIndefinite; \
4141 else \
4142 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4143 } \
4144 } \
4145 /* \
4146 * Map both +0 and -0 to integer zero (signless/+). \
4147 */ \
4148 else if (RTFLOAT80U_IS_ZERO(pr80Val)) \
4149 *piDst = 0; \
4150 /* \
4151 * Denormals are just really tiny sub-zero numbers that are trucated to zero. \
4152 */ \
4153 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) || RTFLOAT80U_IS_DENORMAL(pr80Val)) \
4154 { \
4155 *piDst = 0; \
4156 fFsw |= X86_FSW_PE; \
4157 if (!(fFcw & X86_FCW_PM)) \
4158 fFsw |= X86_FSW_ES | X86_FSW_B; \
4159 } \
4160 /* \
4161 * All other special values are considered invalid arguments and result \
4162 * in an IE exception and indefinite value if masked. \
4163 */ \
4164 else \
4165 { \
4166 fFsw |= X86_FSW_IE; \
4167 if (fFcw & X86_FCW_IM) \
4168 *piDst = a_iTypeIndefinite; \
4169 else \
4170 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4171 } \
4172 *pu16FSW = fFsw; \
4173}
4174#if defined(IEM_WITHOUT_ASSEMBLY)
4175EMIT_FISTT(64, 64, int64_t, INT64_MIN, INT64_MAX, X86_FPU_INT64_INDEFINITE, RT_NOTHING, 1)
4176EMIT_FISTT(32, 32, int32_t, INT32_MIN, INT32_MAX, X86_FPU_INT32_INDEFINITE, RT_NOTHING, 1)
4177EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, RT_NOTHING, 1)
4178#endif
4179EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, _intel, 1)
4180EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, _amd, 0)
4181
4182
4183#if defined(IEM_WITHOUT_ASSEMBLY)
4184
4185IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_d80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
4186 PRTPBCD80U pd80Dst, PCRTFLOAT80U pr80Src))
4187{
4188 /*static RTPBCD80U const s_ad80MaxMin[2] = { RTPBCD80U_INIT_MAX(), RTPBCD80U_INIT_MIN() };*/
4189 static RTPBCD80U const s_ad80Zeros[2] = { RTPBCD80U_INIT_ZERO(0), RTPBCD80U_INIT_ZERO(1) };
4190 static RTPBCD80U const s_ad80One[2] = { RTPBCD80U_INIT_C(0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,1),
4191 RTPBCD80U_INIT_C(1, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,1) };
4192 static RTPBCD80U const s_d80Indefinite = RTPBCD80U_INIT_INDEFINITE();
4193
4194 uint16_t const fFcw = pFpuState->FCW;
4195 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
4196 bool const fSignIn = pr80Src->s.fSign;
4197
4198 /*
4199 * Deal with normal numbers first.
4200 */
4201 if (RTFLOAT80U_IS_NORMAL(pr80Src))
4202 {
4203 uint64_t uMantissa = pr80Src->s.uMantissa;
4204 int32_t iExponent = (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS;
4205 if ( (uint32_t)iExponent <= 58
4206 || ((uint32_t)iExponent == 59 && uMantissa <= UINT64_C(0xde0b6b3a763fffff)) )
4207 {
4208 unsigned const cShiftOff = 63 - iExponent;
4209 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4210 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4211 ? RT_BIT_64(cShiftOff - 1)
4212 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4213 ? fRoundingOffMask
4214 : 0;
4215 uint64_t fRoundedOff = uMantissa & fRoundingOffMask;
4216
4217 uMantissa >>= cShiftOff;
4218 uint64_t const uRounding = (fRoundedOff + uRoundingAdd) >> cShiftOff;
4219 uMantissa += uRounding;
4220 if (uMantissa <= (uint64_t)RTPBCD80U_MAX)
4221 {
4222 if (fRoundedOff)
4223 {
4224 if ((uMantissa & 1) && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST && fRoundedOff == uRoundingAdd)
4225 uMantissa &= ~(uint64_t)1; /* round to even number if equal distance between up/down. */
4226 else if (uRounding)
4227 fFsw |= X86_FSW_C1;
4228 fFsw |= X86_FSW_PE;
4229 if (!(fFcw & X86_FCW_PM))
4230 fFsw |= X86_FSW_ES | X86_FSW_B;
4231 }
4232
4233 pd80Dst->s.fSign = fSignIn;
4234 pd80Dst->s.uPad = 0;
4235 for (size_t iPair = 0; iPair < RT_ELEMENTS(pd80Dst->s.abPairs); iPair++)
4236 {
4237 unsigned const uDigits = uMantissa % 100;
4238 uMantissa /= 100;
4239 uint8_t const bLo = uDigits % 10;
4240 uint8_t const bHi = uDigits / 10;
4241 pd80Dst->s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(bHi, bLo);
4242 }
4243 }
4244 else
4245 {
4246 /* overflowed after rounding. */
4247 fFsw |= X86_FSW_IE;
4248 if (fFcw & X86_FCW_IM)
4249 *pd80Dst = s_d80Indefinite;
4250 else
4251 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4252 }
4253 }
4254 /*
4255 * Tiny sub-zero numbers.
4256 */
4257 else if (iExponent < 0)
4258 {
4259 if (!fSignIn)
4260 {
4261 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP
4262 || (iExponent == -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST))
4263 {
4264 *pd80Dst = s_ad80One[fSignIn];
4265 fFsw |= X86_FSW_C1;
4266 }
4267 else
4268 *pd80Dst = s_ad80Zeros[fSignIn];
4269 }
4270 else
4271 {
4272 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP
4273 || (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_ZERO
4274 || (iExponent < -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST))
4275 *pd80Dst = s_ad80Zeros[fSignIn];
4276 else
4277 {
4278 *pd80Dst = s_ad80One[fSignIn];
4279 fFsw |= X86_FSW_C1;
4280 }
4281 }
4282 fFsw |= X86_FSW_PE;
4283 if (!(fFcw & X86_FCW_PM))
4284 fFsw |= X86_FSW_ES | X86_FSW_B;
4285 }
4286 /*
4287 * Too large/small number outside the target integer range.
4288 */
4289 else
4290 {
4291 fFsw |= X86_FSW_IE;
4292 if (fFcw & X86_FCW_IM)
4293 *pd80Dst = s_d80Indefinite;
4294 else
4295 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4296 }
4297 }
4298 /*
4299 * Map both +0 and -0 to integer zero (signless/+).
4300 */
4301 else if (RTFLOAT80U_IS_ZERO(pr80Src))
4302 *pd80Dst = s_ad80Zeros[fSignIn];
4303 /*
4304 * Denormals are just really tiny sub-zero numbers that are either rounded
4305 * to zero, 1 or -1 depending on sign and rounding control.
4306 */
4307 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src) || RTFLOAT80U_IS_DENORMAL(pr80Src))
4308 {
4309 if ((fFcw & X86_FCW_RC_MASK) != (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP))
4310 *pd80Dst = s_ad80Zeros[fSignIn];
4311 else
4312 {
4313 *pd80Dst = s_ad80One[fSignIn];
4314 fFsw |= X86_FSW_C1;
4315 }
4316 fFsw |= X86_FSW_PE;
4317 if (!(fFcw & X86_FCW_PM))
4318 fFsw |= X86_FSW_ES | X86_FSW_B;
4319 }
4320 /*
4321 * All other special values are considered invalid arguments and result
4322 * in an IE exception and indefinite value if masked.
4323 */
4324 else
4325 {
4326 fFsw |= X86_FSW_IE;
4327 if (fFcw & X86_FCW_IM)
4328 *pd80Dst = s_d80Indefinite;
4329 else
4330 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4331 }
4332 *pu16FSW = fFsw;
4333}
4334
4335
4336/*********************************************************************************************************************************
4337* FPU Helpers *
4338*********************************************************************************************************************************/
4339#ifdef IEM_WITH_FLOAT128_FOR_FPU
4340
4341DECLINLINE(int) iemFpuF128SetRounding(uint16_t fFcw)
4342{
4343 int fNew;
4344 switch (fFcw & X86_FCW_RC_MASK)
4345 {
4346 default:
4347 case X86_FCW_RC_NEAREST: fNew = FE_TONEAREST; break;
4348 case X86_FCW_RC_ZERO: fNew = FE_TOWARDZERO; break;
4349 case X86_FCW_RC_UP: fNew = FE_UPWARD; break;
4350 case X86_FCW_RC_DOWN: fNew = FE_DOWNWARD; break;
4351 }
4352 int fOld = fegetround();
4353 fesetround(fNew);
4354 return fOld;
4355}
4356
4357
4358DECLINLINE(void) iemFpuF128RestoreRounding(int fOld)
4359{
4360 fesetround(fOld);
4361}
4362
4363DECLINLINE(_Float128) iemFpuF128FromFloat80(PCRTFLOAT80U pr80Val, uint16_t fFcw)
4364{
4365 RT_NOREF(fFcw);
4366 RTFLOAT128U Tmp;
4367 Tmp.s2.uSignAndExponent = pr80Val->s2.uSignAndExponent;
4368 Tmp.s2.uFractionHigh = (uint16_t)((pr80Val->s2.uMantissa & (RT_BIT_64(63) - 1)) >> 48);
4369 Tmp.s2.uFractionMid = (uint32_t)((pr80Val->s2.uMantissa & UINT32_MAX) >> 16);
4370 Tmp.s2.uFractionLow = pr80Val->s2.uMantissa << 48;
4371 if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val))
4372 {
4373 Assert(Tmp.s.uExponent == 0);
4374 Tmp.s2.uSignAndExponent++;
4375 }
4376 return *(_Float128 *)&Tmp;
4377}
4378
4379
4380DECLINLINE(uint16_t) iemFpuF128ToFloat80(PRTFLOAT80U pr80Dst, _Float128 rd128ValSrc, uint16_t fFcw, uint16_t fFsw)
4381{
4382 RT_NOREF(fFcw);
4383 RTFLOAT128U Tmp;
4384 *(_Float128 *)&Tmp = rd128ValSrc;
4385 ASMCompilerBarrier();
4386 if (RTFLOAT128U_IS_NORMAL(&Tmp))
4387 {
4388 pr80Dst->s.fSign = Tmp.s64.fSign;
4389 pr80Dst->s.uExponent = Tmp.s64.uExponent;
4390 uint64_t uFraction = Tmp.s64.uFractionHi << (63 - 48)
4391 | Tmp.s64.uFractionLo >> (64 - 15);
4392
4393 /* Do rounding - just truncate in near mode when midway on an even outcome. */
4394 unsigned const cShiftOff = 64 - 15;
4395 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4396 uint64_t const uRoundedOff = Tmp.s64.uFractionLo & fRoundingOffMask;
4397 if (uRoundedOff)
4398 {
4399 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4400 ? RT_BIT_64(cShiftOff - 1)
4401 : (fFcw & X86_FCW_RC_MASK) == (Tmp.s64.fSign ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4402 ? fRoundingOffMask
4403 : 0;
4404 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
4405 || (Tmp.s64.uFractionLo & RT_BIT_64(cShiftOff))
4406 || uRoundedOff != uRoundingAdd)
4407 {
4408 if ((uRoundedOff + uRoundingAdd) >> cShiftOff)
4409 {
4410 uFraction += 1;
4411 if (!(uFraction & RT_BIT_64(63)))
4412 { /* likely */ }
4413 else
4414 {
4415 uFraction >>= 1;
4416 pr80Dst->s.uExponent++;
4417 if (pr80Dst->s.uExponent == RTFLOAT64U_EXP_MAX)
4418 return fFsw;
4419 }
4420 fFsw |= X86_FSW_C1;
4421 }
4422 }
4423 fFsw |= X86_FSW_PE;
4424 if (!(fFcw & X86_FCW_PM))
4425 fFsw |= X86_FSW_ES | X86_FSW_B;
4426 }
4427 pr80Dst->s.uMantissa = RT_BIT_64(63) | uFraction;
4428 }
4429 else if (RTFLOAT128U_IS_ZERO(&Tmp))
4430 {
4431 pr80Dst->s.fSign = Tmp.s64.fSign;
4432 pr80Dst->s.uExponent = 0;
4433 pr80Dst->s.uMantissa = 0;
4434 }
4435 else if (RTFLOAT128U_IS_INF(&Tmp))
4436 {
4437 pr80Dst->s.fSign = Tmp.s64.fSign;
4438 pr80Dst->s.uExponent = 0;
4439 pr80Dst->s.uMantissa = 0;
4440 }
4441 return fFsw;
4442}
4443
4444
4445#else /* !IEM_WITH_FLOAT128_FOR_FPU - SoftFloat */
4446
4447
4448DECLINLINE(float128_t) iemFpuSoftF128Precision(float128_t r128, unsigned cBits, uint16_t fFcw = X86_FCW_RC_NEAREST)
4449{
4450 RT_NOREF(fFcw);
4451 Assert(cBits > 64);
4452# if 0 /* rounding does not seem to help */
4453 uint64_t off = r128.v[0] & (RT_BIT_64(1 + 112 - cBits) - 1);
4454 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4455 if (off >= RT_BIT_64(1 + 112 - cBits - 1)
4456 && (r128.v[0] & RT_BIT_64(1 + 112 - cBits)))
4457 {
4458 uint64_t uOld = r128.v[0];
4459 r128.v[0] += RT_BIT_64(1 + 112 - cBits);
4460 if (r128.v[0] < uOld)
4461 r128.v[1] += 1;
4462 }
4463# else
4464 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4465# endif
4466 return r128;
4467}
4468
4469
4470DECLINLINE(float128_t) iemFpuSoftF128PrecisionIprt(PCRTFLOAT128U pr128, unsigned cBits, uint16_t fFcw = X86_FCW_RC_NEAREST)
4471{
4472 RT_NOREF(fFcw);
4473 Assert(cBits > 64);
4474# if 0 /* rounding does not seem to help, not even on constants */
4475 float128_t r128 = { pr128->au64[0], pr128->au64[1] };
4476 uint64_t off = r128.v[0] & (RT_BIT_64(1 + 112 - cBits) - 1);
4477 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4478 if (off >= RT_BIT_64(1 + 112 - cBits - 1)
4479 && (r128.v[0] & RT_BIT_64(1 + 112 - cBits)))
4480 {
4481 uint64_t uOld = r128.v[0];
4482 r128.v[0] += RT_BIT_64(1 + 112 - cBits);
4483 if (r128.v[0] < uOld)
4484 r128.v[1] += 1;
4485 }
4486 return r128;
4487# else
4488 float128_t r128 = { pr128->au64[0] & ~(RT_BIT_64(1 + 112 - cBits) - 1), pr128->au64[1] };
4489 return r128;
4490# endif
4491}
4492
4493
4494DECLINLINE(float128_t) iemFpuSoftF128FromIprt(PCRTFLOAT128U pr128)
4495{
4496 float128_t r128 = { pr128->au64[0], pr128->au64[1] };
4497 return r128;
4498}
4499
4500
4501/** Converts a 80-bit floating point value to SoftFloat 128-bit floating point. */
4502DECLINLINE(float128_t) iemFpuSoftF128FromFloat80(PCRTFLOAT80U pr80Val)
4503{
4504 extFloat80_t Tmp;
4505 Tmp.signExp = pr80Val->s2.uSignAndExponent;
4506 Tmp.signif = pr80Val->s2.uMantissa;
4507 return extF80_to_f128(Tmp);
4508}
4509
4510
4511DECLINLINE(uint16_t) iemFpuSoftF128ToFloat80(PRTFLOAT80U pr80Dst, float128_t r128Src, uint16_t fFcw, uint16_t fFsw)
4512{
4513 RT_NOREF(fFcw);
4514 RTFLOAT128U Tmp;
4515 *(float128_t *)&Tmp = r128Src;
4516 ASMCompilerBarrier();
4517
4518 if (RTFLOAT128U_IS_NORMAL(&Tmp))
4519 {
4520 pr80Dst->s.fSign = Tmp.s64.fSign;
4521 pr80Dst->s.uExponent = Tmp.s64.uExponent;
4522 uint64_t uFraction = Tmp.s64.uFractionHi << (63 - 48)
4523 | Tmp.s64.uFractionLo >> (64 - 15);
4524
4525 /* Do rounding - just truncate in near mode when midway on an even outcome. */
4526 unsigned const cShiftOff = 64 - 15;
4527 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4528 uint64_t const uRoundedOff = Tmp.s64.uFractionLo & fRoundingOffMask;
4529 if (uRoundedOff)
4530 {
4531 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4532 ? RT_BIT_64(cShiftOff - 1)
4533 : (fFcw & X86_FCW_RC_MASK) == (Tmp.s64.fSign ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4534 ? fRoundingOffMask
4535 : 0;
4536 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
4537 || (Tmp.s64.uFractionLo & RT_BIT_64(cShiftOff))
4538 || uRoundedOff != uRoundingAdd)
4539 {
4540 if ((uRoundedOff + uRoundingAdd) >> cShiftOff)
4541 {
4542 uFraction += 1;
4543 if (!(uFraction & RT_BIT_64(63)))
4544 { /* likely */ }
4545 else
4546 {
4547 uFraction >>= 1;
4548 pr80Dst->s.uExponent++;
4549 if (pr80Dst->s.uExponent == RTFLOAT64U_EXP_MAX)
4550 return fFsw;
4551 }
4552 fFsw |= X86_FSW_C1;
4553 }
4554 }
4555 fFsw |= X86_FSW_PE;
4556 if (!(fFcw & X86_FCW_PM))
4557 fFsw |= X86_FSW_ES | X86_FSW_B;
4558 }
4559
4560 pr80Dst->s.uMantissa = RT_BIT_64(63) | uFraction;
4561 }
4562 else if (RTFLOAT128U_IS_ZERO(&Tmp))
4563 {
4564 pr80Dst->s.fSign = Tmp.s64.fSign;
4565 pr80Dst->s.uExponent = 0;
4566 pr80Dst->s.uMantissa = 0;
4567 }
4568 else if (RTFLOAT128U_IS_INF(&Tmp))
4569 {
4570 pr80Dst->s.fSign = Tmp.s64.fSign;
4571 pr80Dst->s.uExponent = 0;
4572 pr80Dst->s.uMantissa = 0;
4573 }
4574 return fFsw;
4575}
4576
4577
4578/**
4579 * Helper doing polynomial evaluation using Horner's method.
4580 *
4581 * See https://en.wikipedia.org/wiki/Horner%27s_method for details.
4582 */
4583float128_t iemFpuSoftF128HornerPoly(float128_t z, PCRTFLOAT128U g_par128HornerConsts, size_t cHornerConsts, unsigned cPrecision)
4584{
4585 Assert(cHornerConsts > 1);
4586 size_t i = cHornerConsts - 1;
4587 float128_t r128Result = iemFpuSoftF128PrecisionIprt(&g_par128HornerConsts[i], cPrecision);
4588 while (i-- > 0)
4589 {
4590 r128Result = iemFpuSoftF128Precision(f128_mul(r128Result, z), cPrecision);
4591 r128Result = f128_add(r128Result, iemFpuSoftF128PrecisionIprt(&g_par128HornerConsts[i], cPrecision));
4592 r128Result = iemFpuSoftF128Precision(r128Result, cPrecision);
4593 }
4594 return r128Result;
4595}
4596
4597#endif /* !IEM_WITH_FLOAT128_FOR_FPU - SoftFloat */
4598
4599
4600/**
4601 * Composes a normalized and rounded RTFLOAT80U result from a 192 bit wide
4602 * mantissa, exponent and sign.
4603 *
4604 * @returns Updated FSW.
4605 * @param pr80Dst Where to return the composed value.
4606 * @param fSign The sign.
4607 * @param puMantissa The mantissa, 256-bit type but the to 64-bits are
4608 * ignored and should be zero. This will probably be
4609 * modified during normalization and rounding.
4610 * @param iExponent Unbiased exponent.
4611 * @param fFcw The FPU control word.
4612 * @param fFsw The FPU status word.
4613 */
4614static uint16_t iemFpuFloat80RoundAndComposeFrom192(PRTFLOAT80U pr80Dst, bool fSign, PRTUINT256U puMantissa,
4615 int32_t iExponent, uint16_t fFcw, uint16_t fFsw)
4616{
4617 AssertStmt(puMantissa->QWords.qw3 == 0, puMantissa->QWords.qw3 = 0);
4618
4619 iExponent += RTFLOAT80U_EXP_BIAS;
4620
4621 /* Do normalization if necessary and possible. */
4622 unsigned cShifted = 0;
4623 if (!(puMantissa->QWords.qw2 & RT_BIT_64(63)))
4624 {
4625 int cShift = 192 - RTUInt256BitCount(puMantissa);
4626 if (iExponent > cShift)
4627 iExponent -= cShift;
4628 else
4629 {
4630 if (fFcw & X86_FCW_UM)
4631 {
4632 if (iExponent > 0)
4633 cShift = --iExponent;
4634 else
4635 cShift = 0;
4636 }
4637 iExponent -= cShift;
4638 }
4639 cShifted = cShift;
4640 RTUInt256AssignShiftLeft(puMantissa, cShift);
4641 }
4642
4643 /* Do rounding. */
4644 uint64_t uMantissa = puMantissa->QWords.qw2;
4645 if (puMantissa->QWords.qw1 || puMantissa->QWords.qw0)
4646 {
4647 bool fAdd;
4648 switch (fFcw & X86_FCW_RC_MASK)
4649 {
4650 case X86_FCW_RC_NEAREST:
4651 if (puMantissa->QWords.qw1 & RT_BIT_64(63))
4652 {
4653 if ( (uMantissa & 1)
4654 || puMantissa->QWords.qw0 != 0
4655 || puMantissa->QWords.qw1 != RT_BIT_64(63))
4656 {
4657 fAdd = true;
4658 break;
4659 }
4660 uMantissa &= ~(uint64_t)1;
4661 }
4662 fAdd = false;
4663 break;
4664 case X86_FCW_RC_ZERO:
4665 fAdd = false;
4666 break;
4667 case X86_FCW_RC_UP:
4668 fAdd = !fSign;
4669 break;
4670 case X86_FCW_RC_DOWN:
4671 fAdd = fSign;
4672 break;
4673 }
4674 if (fAdd)
4675 {
4676 uint64_t const uTmp = uMantissa;
4677 uMantissa = uTmp + 1;
4678 if (uMantissa < uTmp)
4679 {
4680 uMantissa >>= 1;
4681 uMantissa |= RT_BIT_64(63);
4682 iExponent++;
4683 }
4684 fFsw |= X86_FSW_C1;
4685 }
4686 fFsw |= X86_FSW_PE;
4687 if (!(fFcw & X86_FCW_PM))
4688 fFsw |= X86_FSW_ES | X86_FSW_B;
4689 }
4690
4691 /* Check for underflow (denormals). */
4692 if (iExponent <= 0)
4693 {
4694 if (fFcw & X86_FCW_UM)
4695 {
4696 if (uMantissa & RT_BIT_64(63))
4697 uMantissa >>= 1;
4698 iExponent = 0;
4699 }
4700 else
4701 {
4702 iExponent += RTFLOAT80U_EXP_BIAS_UNDERFLOW_ADJUST;
4703 fFsw |= X86_FSW_ES | X86_FSW_B;
4704 }
4705 fFsw |= X86_FSW_UE;
4706 }
4707 /* Check for overflow */
4708 else if (iExponent >= RTFLOAT80U_EXP_MAX)
4709 {
4710 Assert(iExponent < RTFLOAT80U_EXP_MAX);
4711 }
4712
4713 /* Compose the result. */
4714 pr80Dst->s.uMantissa = uMantissa;
4715 pr80Dst->s.uExponent = iExponent;
4716 pr80Dst->s.fSign = fSign;
4717 return fFsw;
4718}
4719
4720
4721
4722
4723/*********************************************************************************************************************************
4724* x86 FPU Division Operations *
4725*********************************************************************************************************************************/
4726
4727IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4728 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4729{
4730 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4731 AssertReleaseFailed();
4732}
4733
4734
4735IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4736 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4737{
4738 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4739 AssertReleaseFailed();
4740}
4741
4742
4743IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4744 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4745{
4746 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4747 AssertReleaseFailed();
4748}
4749
4750
4751IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4752 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4753{
4754 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4755 AssertReleaseFailed();
4756}
4757
4758
4759IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4760 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4761{
4762 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4763 AssertReleaseFailed();
4764}
4765
4766
4767IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4768 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4769{
4770 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4771 AssertReleaseFailed();
4772}
4773
4774
4775IEM_DECL_IMPL_DEF(void, iemAImpl_fidiv_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4776 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4777{
4778 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4779 AssertReleaseFailed();
4780}
4781
4782
4783IEM_DECL_IMPL_DEF(void, iemAImpl_fidiv_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4784 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4785{
4786 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4787 AssertReleaseFailed();
4788}
4789
4790
4791IEM_DECL_IMPL_DEF(void, iemAImpl_fidivr_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4792 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4793{
4794 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4795 AssertReleaseFailed();
4796}
4797
4798
4799IEM_DECL_IMPL_DEF(void, iemAImpl_fidivr_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4800 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4801{
4802 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4803 AssertReleaseFailed();
4804}
4805
4806
4807IEM_DECL_IMPL_DEF(void, iemAImpl_fprem_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4808 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4809{
4810 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4811 AssertReleaseFailed();
4812}
4813
4814
4815IEM_DECL_IMPL_DEF(void, iemAImpl_fprem1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4816 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4817{
4818 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4819 AssertReleaseFailed();
4820}
4821
4822
4823/*********************************************************************************************************************************
4824* x87 FPU Multiplication Operations *
4825*********************************************************************************************************************************/
4826
4827IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4828 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4829{
4830 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4831 AssertReleaseFailed();
4832}
4833
4834
4835IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4836 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4837{
4838 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4839 AssertReleaseFailed();
4840}
4841
4842
4843IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4844 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4845{
4846 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4847 AssertReleaseFailed();
4848}
4849
4850
4851IEM_DECL_IMPL_DEF(void, iemAImpl_fimul_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4852 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4853{
4854 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4855 AssertReleaseFailed();
4856}
4857
4858
4859IEM_DECL_IMPL_DEF(void, iemAImpl_fimul_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4860 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4861{
4862 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4863 AssertReleaseFailed();
4864}
4865
4866
4867/*********************************************************************************************************************************
4868* x87 FPU Addition and Subtraction *
4869*********************************************************************************************************************************/
4870
4871IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4872 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4873{
4874 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4875 AssertReleaseFailed();
4876}
4877
4878
4879IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4880 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4881{
4882 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4883 AssertReleaseFailed();
4884}
4885
4886
4887IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4888 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4889{
4890 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4891 AssertReleaseFailed();
4892}
4893
4894
4895IEM_DECL_IMPL_DEF(void, iemAImpl_fiadd_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4896 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4897{
4898 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4899 AssertReleaseFailed();
4900}
4901
4902
4903IEM_DECL_IMPL_DEF(void, iemAImpl_fiadd_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4904 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4905{
4906 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4907 AssertReleaseFailed();
4908}
4909
4910
4911IEM_DECL_IMPL_DEF(void, iemAImpl_fisub_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4912 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4913{
4914 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4915 AssertReleaseFailed();
4916}
4917
4918
4919IEM_DECL_IMPL_DEF(void, iemAImpl_fisub_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4920 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4921{
4922 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4923 AssertReleaseFailed();
4924}
4925
4926
4927IEM_DECL_IMPL_DEF(void, iemAImpl_fisubr_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4928 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
4929{
4930 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
4931 AssertReleaseFailed();
4932}
4933
4934
4935IEM_DECL_IMPL_DEF(void, iemAImpl_fisubr_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4936 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
4937{
4938 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
4939 AssertReleaseFailed();
4940}
4941
4942
4943IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4944 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4945{
4946 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4947 AssertReleaseFailed();
4948}
4949
4950
4951IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4952 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4953{
4954 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4955 AssertReleaseFailed();
4956}
4957
4958
4959IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4960 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4961{
4962 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4963 AssertReleaseFailed();
4964}
4965
4966
4967IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4968 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4969{
4970 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4971 AssertReleaseFailed();
4972}
4973
4974
4975IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4976 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4977{
4978 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4979 AssertReleaseFailed();
4980}
4981
4982
4983IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4984 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4985{
4986 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
4987 AssertReleaseFailed();
4988}
4989
4990
4991/*********************************************************************************************************************************
4992* x87 FPU Trigometric Operations *
4993*********************************************************************************************************************************/
4994
4995
4996IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4997 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4998{
4999 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5000 AssertReleaseFailed();
5001}
5002
5003#endif /* IEM_WITHOUT_ASSEMBLY */
5004
5005IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5006 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5007{
5008 iemAImpl_fpatan_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5009}
5010
5011IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5012 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5013{
5014 iemAImpl_fpatan_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5015}
5016
5017
5018#if defined(IEM_WITHOUT_ASSEMBLY)
5019IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5020{
5021 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
5022 AssertReleaseFailed();
5023}
5024#endif /* IEM_WITHOUT_ASSEMBLY */
5025
5026IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5027{
5028 iemAImpl_fptan_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5029}
5030
5031IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5032{
5033 iemAImpl_fptan_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5034}
5035
5036
5037#ifdef IEM_WITHOUT_ASSEMBLY
5038IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5039{
5040 RT_NOREF(pFpuState, pFpuRes, pr80Val);
5041 AssertReleaseFailed();
5042}
5043#endif /* IEM_WITHOUT_ASSEMBLY */
5044
5045IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5046{
5047 iemAImpl_fsin_r80(pFpuState, pFpuRes, pr80Val);
5048}
5049
5050IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5051{
5052 iemAImpl_fsin_r80(pFpuState, pFpuRes, pr80Val);
5053}
5054
5055#ifdef IEM_WITHOUT_ASSEMBLY
5056IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5057{
5058 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
5059 AssertReleaseFailed();
5060}
5061#endif /* IEM_WITHOUT_ASSEMBLY */
5062
5063IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5064{
5065 iemAImpl_fsincos_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5066}
5067
5068IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5069{
5070 iemAImpl_fsincos_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5071}
5072
5073
5074#ifdef IEM_WITHOUT_ASSEMBLY
5075IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5076{
5077 RT_NOREF(pFpuState, pFpuRes, pr80Val);
5078 AssertReleaseFailed();
5079}
5080#endif /* IEM_WITHOUT_ASSEMBLY */
5081
5082IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5083{
5084 iemAImpl_fcos_r80(pFpuState, pFpuRes, pr80Val);
5085}
5086
5087IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5088{
5089 iemAImpl_fcos_r80(pFpuState, pFpuRes, pr80Val);
5090}
5091
5092#ifdef IEM_WITHOUT_ASSEMBLY
5093
5094
5095/*********************************************************************************************************************************
5096* x87 FPU Compare and Testing Operations *
5097*********************************************************************************************************************************/
5098
5099IEM_DECL_IMPL_DEF(void, iemAImpl_ftst_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
5100{
5101 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT);
5102
5103 if (RTFLOAT80U_IS_ZERO(pr80Val))
5104 fFsw |= X86_FSW_C3;
5105 else if (RTFLOAT80U_IS_NORMAL(pr80Val) || RTFLOAT80U_IS_INF(pr80Val))
5106 fFsw |= pr80Val->s.fSign ? X86_FSW_C0 : 0;
5107 else if (RTFLOAT80U_IS_DENORMAL(pr80Val) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val))
5108 {
5109 fFsw |= pr80Val->s.fSign ? X86_FSW_C0 | X86_FSW_DE : X86_FSW_DE;
5110 if (!(pFpuState->FCW & X86_FCW_DM))
5111 fFsw |= X86_FSW_ES | X86_FSW_B;
5112 }
5113 else
5114 {
5115 fFsw |= X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3 | X86_FSW_IE;
5116 if (!(pFpuState->FCW & X86_FCW_IM))
5117 fFsw |= X86_FSW_ES | X86_FSW_B;
5118 }
5119
5120 *pu16Fsw = fFsw;
5121}
5122
5123
5124IEM_DECL_IMPL_DEF(void, iemAImpl_fxam_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
5125{
5126 RT_NOREF(pFpuState);
5127 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT);
5128
5129 /* C1 = sign bit (always, even if empty Intel says). */
5130 if (pr80Val->s.fSign)
5131 fFsw |= X86_FSW_C1;
5132
5133 /* Classify the value in C0, C2, C3. */
5134 if (!(pFpuState->FTW & RT_BIT_32(X86_FSW_TOP_GET(pFpuState->FSW))))
5135 fFsw |= X86_FSW_C0 | X86_FSW_C3; /* empty */
5136 else if (RTFLOAT80U_IS_NORMAL(pr80Val))
5137 fFsw |= X86_FSW_C2;
5138 else if (RTFLOAT80U_IS_ZERO(pr80Val))
5139 fFsw |= X86_FSW_C3;
5140 else if (RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(pr80Val))
5141 fFsw |= X86_FSW_C0;
5142 else if (RTFLOAT80U_IS_INF(pr80Val))
5143 fFsw |= X86_FSW_C0 | X86_FSW_C2;
5144 else if (RTFLOAT80U_IS_DENORMAL(pr80Val) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val))
5145 fFsw |= X86_FSW_C2 | X86_FSW_C3;
5146 /* whatever else: 0 */
5147
5148 *pu16Fsw = fFsw;
5149}
5150
5151
5152IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r32,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5153 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5154{
5155 RT_NOREF(pFpuState, pFSW, pr80Val1, pr32Val2);
5156 AssertReleaseFailed();
5157}
5158
5159
5160IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r64,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5161 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5162{
5163 RT_NOREF(pFpuState, pFSW, pr80Val1, pr64Val2);
5164 AssertReleaseFailed();
5165}
5166
5167
5168IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5169 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5170{
5171 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
5172 AssertReleaseFailed();
5173}
5174
5175
5176IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fcomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5177 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5178{
5179 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
5180 AssertReleaseFailed();
5181 return 0;
5182}
5183
5184
5185IEM_DECL_IMPL_DEF(void, iemAImpl_fucom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5186 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5187{
5188 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
5189 AssertReleaseFailed();
5190}
5191
5192
5193IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fucomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
5194 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5195{
5196 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pr80Val2);
5197 AssertReleaseFailed();
5198 return 0;
5199}
5200
5201
5202IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i16,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
5203 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5204{
5205 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pi16Val2);
5206 AssertReleaseFailed();
5207}
5208
5209
5210IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i32,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
5211 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5212{
5213 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pi32Val2);
5214 AssertReleaseFailed();
5215}
5216
5217
5218/*********************************************************************************************************************************
5219* x87 FPU Other Operations *
5220*********************************************************************************************************************************/
5221
5222
5223IEM_DECL_IMPL_DEF(void, iemAImpl_frndint_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5224{
5225 RT_NOREF(pFpuState, pFpuRes, pr80Val);
5226 AssertReleaseFailed();
5227}
5228
5229
5230IEM_DECL_IMPL_DEF(void, iemAImpl_fscale_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5231 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5232{
5233 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5234 AssertReleaseFailed();
5235}
5236
5237
5238IEM_DECL_IMPL_DEF(void, iemAImpl_fsqrt_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5239{
5240 RT_NOREF(pFpuState, pFpuRes, pr80Val);
5241 AssertReleaseFailed();
5242}
5243
5244
5245AssertCompileSize(RTFLOAT128U, 16);
5246AssertCompileSize(RTFLOAT80U, 10);
5247AssertCompileSize(RTFLOAT64U, 8);
5248AssertCompileSize(RTFLOAT32U, 4);
5249
5250
5251/**
5252 * @code
5253 * x x * ln2
5254 * f(x) = 2 - 1 = e - 1
5255 *
5256 * @endcode
5257 *
5258 * We can approximate e^x by a Taylor/Maclaurin series:
5259 * @code
5260 * n 0 1 2 3 4
5261 * inf x x x x x x
5262 * SUM ----- = --- + --- + --- + --- + --- + ...
5263 * n=0 n! 0! 1! 2! 3! 4!
5264 *
5265 * 2 3 4
5266 * x x x
5267 * = 1 + x + --- + --- + --- + ...
5268 * 2! 3! 4!
5269 * @endcode
5270 *
5271 * Given z = x * ln2, we get:
5272 * @code
5273 * 2 3 4 n
5274 * z z z z z
5275 * e - 1 = z + --- + --- + --- + ... + ---
5276 * 2! 3! 4! n!
5277 * @endcode
5278 *
5279 * Wanting to use Horner's method, we move one z outside and get:
5280 * @code
5281 * 2 3 (n-1)
5282 * z z z z
5283 * = z ( 1 + --- + --- + --- + ... + ------- )
5284 * 2! 3! 4! n!
5285 * @endcode
5286 *
5287 * The constants we need for using Horner's methods are 1 and 1 / n!.
5288 *
5289 * For very tiny x values, we can get away with f(x) = x * ln 2, because
5290 * because we don't have the necessary precision to represent 1.0 + z/3 + ...
5291 * and can approximate it to be 1.0. For a visual demonstration of this
5292 * check out https://www.desmos.com/calculator/vidcdxizd9 (for as long
5293 * as it valid), plotting f(x) = 2^x - 1 and f(x) = x * ln2.
5294 *
5295 *
5296 * As constant accuracy goes, figure 0.1 "80387 Block Diagram" in the "80387
5297 * Data Sheet" (order 231920-002; Appendix E in 80387 PRM 231917-001; Military
5298 * i387SX 271166-002), indicates that constants are 67-bit (constant rom block)
5299 * and the internal mantissa size is 68-bit (mantissa adder & barrel shifter
5300 * blocks). (The one bit difference is probably an implicit one missing from
5301 * the constant ROM.) A paper on division and sqrt on the AMD-K7 by Stuart F.
5302 * Oberman states that it internally used a 68 bit mantissa with a 18-bit
5303 * exponent.
5304 *
5305 * However, even when sticking to 67 constants / 68 mantissas, I have not yet
5306 * successfully reproduced the exact results from an Intel 10980XE, there is
5307 * always a portition of rounding differences. Not going to spend too much time
5308 * on getting this 100% the same, at least not now.
5309 *
5310 * P.S. If someone are really curious about 8087 and its contstants:
5311 * http://www.righto.com/2020/05/extracting-rom-constants-from-8087-math.html
5312 *
5313 *
5314 * @param pr80Val The exponent value (x), less than 1.0, greater than
5315 * -1.0 and not zero. This can be a normal, denormal
5316 * or pseudo-denormal value.
5317 * @param pr80Result Where to return the result.
5318 * @param fFcw FPU control word.
5319 * @param fFsw FPU status word.
5320 */
5321static uint16_t iemAImpl_f2xm1_r80_normal(PCRTFLOAT80U pr80Val, PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw)
5322{
5323 /* As mentioned above, we can skip the expensive polynomial calculation
5324 as it will be close enough to 1.0 that it makes no difference.
5325
5326 The cutoff point for intel 10980XE is exponents >= -69. Intel
5327 also seems to be using a 67-bit or 68-bit constant value, and we get
5328 a smattering of rounding differences if we go for higher precision. */
5329 if (pr80Val->s.uExponent <= RTFLOAT80U_EXP_BIAS - 69)
5330 {
5331 RTUINT256U u256;
5332 RTUInt128MulByU64Ex(&u256, &g_u128Ln2MantissaIntel, pr80Val->s.uMantissa);
5333 u256.QWords.qw0 |= 1; /* force #PE */
5334 fFsw = iemFpuFloat80RoundAndComposeFrom192(pr80Result, pr80Val->s.fSign, &u256,
5335 !RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) && !RTFLOAT80U_IS_DENORMAL(pr80Val)
5336 ? (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS
5337 : 1 - RTFLOAT80U_EXP_BIAS,
5338 fFcw, fFsw);
5339 }
5340 else
5341 {
5342#ifdef IEM_WITH_FLOAT128_FOR_FPU
5343 /* This approach is not good enough for small values, we end up with zero. */
5344 int const fOldRounding = iemFpuF128SetRounding(fFcw);
5345 _Float128 rd128Val = iemFpuF128FromFloat80(pr80Val, fFcw);
5346 _Float128 rd128Result = powf128(2.0L, rd128Val);
5347 rd128Result -= 1.0L;
5348 fFsw = iemFpuF128ToFloat80(pr80Result, rd128Result, fFcw, fFsw);
5349 iemFpuF128RestoreRounding(fOldRounding);
5350
5351# else
5352 float128_t const x = iemFpuSoftF128FromFloat80(pr80Val);
5353
5354 /* As mentioned above, enforce 68-bit internal mantissa width to better
5355 match the Intel 10980XE results. */
5356 unsigned const cPrecision = 68;
5357
5358 /* first calculate z = x * ln2 */
5359 float128_t z = iemFpuSoftF128Precision(f128_mul(x, iemFpuSoftF128PrecisionIprt(&g_r128Ln2, cPrecision)), cPrecision);
5360
5361 /* Then do the polynomial evaluation. */
5362 float128_t r = iemFpuSoftF128HornerPoly(z, g_ar128F2xm1HornerConsts, RT_ELEMENTS(g_ar128F2xm1HornerConsts), cPrecision);
5363 r = f128_mul(z, r);
5364
5365 /* Output the result. */
5366 fFsw = iemFpuSoftF128ToFloat80(pr80Result, r, fFcw, fFsw);
5367# endif
5368 }
5369 return fFsw;
5370}
5371
5372
5373IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5374{
5375 uint16_t const fFcw = pFpuState->FCW;
5376 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5377
5378 if (RTFLOAT80U_IS_NORMAL(pr80Val))
5379 {
5380 if (pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS)
5381 fFsw = iemAImpl_f2xm1_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5382 else
5383 {
5384 /* Special case:
5385 2^+1.0 - 1.0 = 1.0
5386 2^-1.0 - 1.0 = -0.5 */
5387 if ( pr80Val->s.uExponent == RTFLOAT80U_EXP_BIAS
5388 && pr80Val->s.uMantissa == RT_BIT_64(63))
5389 {
5390 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
5391 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_BIAS - pr80Val->s.fSign;
5392 pFpuRes->r80Result.s.fSign = pr80Val->s.fSign;
5393 }
5394 /* ST(0) > 1.0 || ST(0) < -1.0: undefined behavior */
5395 /** @todo 287 is documented to only accept values 0 <= ST(0) <= 0.5. */
5396 else
5397 pFpuRes->r80Result = *pr80Val;
5398 fFsw |= X86_FSW_PE;
5399 if (!(fFcw & X86_FCW_PM))
5400 fFsw |= X86_FSW_ES | X86_FSW_B;
5401 }
5402 }
5403 else if ( RTFLOAT80U_IS_ZERO(pr80Val)
5404 || RTFLOAT80U_IS_QUIET_NAN(pr80Val)
5405 || RTFLOAT80U_IS_INDEFINITE(pr80Val))
5406 pFpuRes->r80Result = *pr80Val;
5407 else if (RTFLOAT80U_IS_INF(pr80Val))
5408 pFpuRes->r80Result = pr80Val->s.fSign ? g_ar80One[1] : *pr80Val;
5409 else if (RTFLOAT80U_IS_DENORMAL(pr80Val) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val))
5410 {
5411 fFsw |= X86_FSW_DE;
5412 if (fFcw & X86_FCW_DM)
5413 fFsw = iemAImpl_f2xm1_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5414 else
5415 {
5416 pFpuRes->r80Result = *pr80Val;
5417 fFsw |= X86_FSW_ES | X86_FSW_B;
5418 }
5419 }
5420 else
5421 {
5422 if ( ( RTFLOAT80U_IS_UNNORMAL(pr80Val)
5423 || RTFLOAT80U_IS_PSEUDO_NAN(pr80Val))
5424 && (fFcw & X86_FCW_IM))
5425 pFpuRes->r80Result = g_r80Indefinite;
5426 else
5427 {
5428 pFpuRes->r80Result = *pr80Val;
5429 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val) && (fFcw & X86_FCW_IM))
5430 pFpuRes->r80Result.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
5431 }
5432 fFsw |= X86_FSW_IE;
5433 if (!(fFcw & X86_FCW_IM))
5434 fFsw |= X86_FSW_ES | X86_FSW_B;
5435 }
5436 pFpuRes->FSW = fFsw;
5437}
5438
5439#endif /* IEM_WITHOUT_ASSEMBLY */
5440
5441IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5442{
5443 iemAImpl_f2xm1_r80(pFpuState, pFpuRes, pr80Val);
5444}
5445
5446IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5447{
5448 iemAImpl_f2xm1_r80(pFpuState, pFpuRes, pr80Val);
5449}
5450
5451#ifdef IEM_WITHOUT_ASSEMBLY
5452
5453IEM_DECL_IMPL_DEF(void, iemAImpl_fabs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5454{
5455 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5456 pFpuRes->r80Result = *pr80Val;
5457 pFpuRes->r80Result.s.fSign = 0;
5458}
5459
5460
5461IEM_DECL_IMPL_DEF(void, iemAImpl_fchs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5462{
5463 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5464 pFpuRes->r80Result = *pr80Val;
5465 pFpuRes->r80Result.s.fSign = !pr80Val->s.fSign;
5466}
5467
5468
5469IEM_DECL_IMPL_DEF(void, iemAImpl_fxtract_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5470{
5471 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
5472 AssertReleaseFailed();
5473}
5474
5475
5476IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5477 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5478{
5479 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5480 AssertReleaseFailed();
5481}
5482
5483#endif /* IEM_WITHOUT_ASSEMBLY */
5484
5485IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5486 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5487{
5488 iemAImpl_fyl2x_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5489}
5490
5491IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5492 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5493{
5494 iemAImpl_fyl2x_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5495}
5496
5497#if defined(IEM_WITHOUT_ASSEMBLY)
5498
5499IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5500 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5501{
5502 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5503 AssertReleaseFailed();
5504}
5505
5506#endif /* IEM_WITHOUT_ASSEMBLY */
5507
5508IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5509 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5510{
5511 iemAImpl_fyl2xp1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5512}
5513
5514IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5515 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5516{
5517 iemAImpl_fyl2xp1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5518}
5519
5520
5521/*********************************************************************************************************************************
5522* MMX, SSE & AVX *
5523*********************************************************************************************************************************/
5524
5525IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
5526{
5527 RT_NOREF(pFpuState);
5528 puDst->au32[0] = puSrc->au32[0];
5529 puDst->au32[1] = puSrc->au32[0];
5530 puDst->au32[2] = puSrc->au32[2];
5531 puDst->au32[3] = puSrc->au32[2];
5532}
5533
5534#ifdef IEM_WITH_VEX
5535
5536IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
5537{
5538 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
5539 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
5540 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
5541 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
5542 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
5543 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
5544 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
5545 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
5546}
5547
5548
5549IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
5550{
5551 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
5552 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
5553 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
5554 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
5555 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
5556 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
5557 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
5558 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
5559}
5560
5561#endif /* IEM_WITH_VEX */
5562
5563
5564IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
5565{
5566 RT_NOREF(pFpuState);
5567 puDst->au32[0] = puSrc->au32[1];
5568 puDst->au32[1] = puSrc->au32[1];
5569 puDst->au32[2] = puSrc->au32[3];
5570 puDst->au32[3] = puSrc->au32[3];
5571}
5572
5573
5574IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
5575{
5576 RT_NOREF(pFpuState);
5577 puDst->au64[0] = uSrc;
5578 puDst->au64[1] = uSrc;
5579}
5580
5581#ifdef IEM_WITH_VEX
5582
5583IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
5584{
5585 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
5586 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
5587 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
5588 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
5589}
5590
5591IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
5592{
5593 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
5594 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
5595 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
5596 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
5597}
5598
5599#endif /* IEM_WITH_VEX */
5600
5601#ifdef IEM_WITHOUT_ASSEMBLY
5602
5603IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
5604{
5605 RT_NOREF(pFpuState, pu64Dst, pu64Src);
5606 AssertReleaseFailed();
5607}
5608
5609
5610IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
5611{
5612 RT_NOREF(pFpuState, pu128Dst, pu128Src);
5613 AssertReleaseFailed();
5614}
5615
5616
5617IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
5618{
5619 RT_NOREF(pFpuState, pu64Dst, pu64Src);
5620 AssertReleaseFailed();
5621}
5622
5623
5624IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
5625{
5626 RT_NOREF(pFpuState, pu128Dst, pu128Src);
5627 AssertReleaseFailed();
5628}
5629
5630
5631IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
5632{
5633 RT_NOREF(pFpuState, pu64Dst, pu64Src);
5634 AssertReleaseFailed();
5635}
5636
5637
5638IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
5639{
5640 RT_NOREF(pFpuState, pu128Dst, pu128Src);
5641 AssertReleaseFailed();
5642}
5643
5644
5645IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
5646{
5647 RT_NOREF(pFpuState, pu64Dst, pu64Src);
5648 AssertReleaseFailed();
5649}
5650
5651
5652IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
5653{
5654 RT_NOREF(pFpuState, pu128Dst, pu128Src);
5655 AssertReleaseFailed();
5656}
5657
5658
5659IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
5660{
5661 RT_NOREF(pFpuState, pu64Dst, pu64Src);
5662 AssertReleaseFailed();
5663
5664}
5665
5666
5667IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u128,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, PCRTUINT128U pu128Src))
5668{
5669 RT_NOREF(pFpuState, pu64Dst, pu128Src);
5670 AssertReleaseFailed();
5671}
5672
5673
5674IEM_DECL_IMPL_DEF(void, iemAImpl_pshufw,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src, uint8_t bEvil))
5675{
5676 RT_NOREF(pFpuState, pu64Dst, pu64Src, bEvil);
5677 AssertReleaseFailed();
5678}
5679
5680
5681IEM_DECL_IMPL_DEF(void, iemAImpl_pshufhw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
5682{
5683 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
5684 AssertReleaseFailed();
5685}
5686
5687
5688IEM_DECL_IMPL_DEF(void, iemAImpl_pshuflw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
5689{
5690 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
5691 AssertReleaseFailed();
5692}
5693
5694
5695IEM_DECL_IMPL_DEF(void, iemAImpl_pshufd,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
5696{
5697 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
5698 AssertReleaseFailed();
5699}
5700
5701/* PUNPCKHxxx */
5702
5703IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
5704{
5705 RT_NOREF(pFpuState, pu64Dst, pu64Src);
5706 AssertReleaseFailed();
5707}
5708
5709
5710IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
5711{
5712 RT_NOREF(pFpuState, pu128Dst, pu128Src);
5713 AssertReleaseFailed();
5714}
5715
5716
5717IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
5718{
5719 RT_NOREF(pFpuState, pu64Dst, pu64Src);
5720 AssertReleaseFailed();
5721}
5722
5723
5724IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
5725{
5726 RT_NOREF(pFpuState, pu128Dst, pu128Src);
5727 AssertReleaseFailed();
5728}
5729
5730
5731IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
5732{
5733 RT_NOREF(pFpuState, pu64Dst, pu64Src);
5734 AssertReleaseFailed();
5735}
5736
5737
5738IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
5739{
5740 RT_NOREF(pFpuState, pu128Dst, pu128Src);
5741 AssertReleaseFailed();
5742}
5743
5744
5745IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
5746{
5747 RT_NOREF(pFpuState, pu128Dst, pu128Src);
5748 AssertReleaseFailed();
5749}
5750
5751/* PUNPCKLxxx */
5752
5753IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
5754{
5755 RT_NOREF(pFpuState, pu64Dst, pu32Src);
5756 AssertReleaseFailed();
5757}
5758
5759
5760IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
5761{
5762 RT_NOREF(pFpuState, pu128Dst, pu64Src);
5763 AssertReleaseFailed();
5764}
5765
5766
5767IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
5768{
5769 RT_NOREF(pFpuState, pu64Dst, pu32Src);
5770 AssertReleaseFailed();
5771}
5772
5773
5774IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
5775{
5776 RT_NOREF(pFpuState, pu128Dst, pu64Src);
5777 AssertReleaseFailed();
5778}
5779
5780
5781IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
5782{
5783 RT_NOREF(pFpuState, pu64Dst, pu32Src);
5784 AssertReleaseFailed();
5785}
5786
5787
5788IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
5789{
5790 RT_NOREF(pFpuState, pu128Dst, pu64Src);
5791 AssertReleaseFailed();
5792}
5793
5794
5795IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
5796{
5797 RT_NOREF(pFpuState, pu128Dst, pu64Src);
5798 AssertReleaseFailed();
5799}
5800
5801#endif /* IEM_WITHOUT_ASSEMBLY */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette