VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 96348

Last change on this file since 96348 was 96348, checked in by vboxsync, 2 years ago

VMM/testcase/tstIEMAImpl: Implement basic minss/minsd testcases, bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 254.9 KB
Line 
1/* $Id: tstIEMAImpl.cpp 96348 2022-08-19 17:00:45Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "../include/IEMInternal.h"
23
24#include <iprt/errcore.h>
25#include <VBox/log.h>
26#include <iprt/assert.h>
27#include <iprt/ctype.h>
28#include <iprt/getopt.h>
29#include <iprt/initterm.h>
30#include <iprt/message.h>
31#include <iprt/mp.h>
32#include <iprt/rand.h>
33#include <iprt/stream.h>
34#include <iprt/string.h>
35#include <iprt/test.h>
36
37#include "tstIEMAImpl.h"
38
39
40/*********************************************************************************************************************************
41* Defined Constants And Macros *
42*********************************************************************************************************************************/
43#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
44#define ENTRY_EX(a_Name, a_uExtra) \
45 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
46 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
47 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
48
49#define ENTRY_BIN(a_Name) ENTRY_EX_BIN(a_Name, 0)
50#define ENTRY_EX_BIN(a_Name, a_uExtra) \
51 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
52 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
53 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
54
55#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
56#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
57 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
58 g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
59 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
60
61#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
62#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
63 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
64 g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
65 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
66
67#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
68 typedef struct a_TypeName \
69 { \
70 const char *pszName; \
71 a_FunctionPtrType pfn; \
72 a_FunctionPtrType pfnNative; \
73 a_TestType const *paTests; \
74 uint32_t const *pcTests; \
75 uint32_t uExtra; \
76 uint8_t idxCpuEflFlavour; \
77 } a_TypeName
78
79#define COUNT_VARIATIONS(a_SubTest) \
80 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
81
82
83/*********************************************************************************************************************************
84* Global Variables *
85*********************************************************************************************************************************/
86static RTTEST g_hTest;
87static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
88#ifdef TSTIEMAIMPL_WITH_GENERATOR
89static uint32_t g_cZeroDstTests = 2;
90static uint32_t g_cZeroSrcTests = 4;
91#endif
92static uint8_t *g_pu8, *g_pu8Two;
93static uint16_t *g_pu16, *g_pu16Two;
94static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
95static uint64_t *g_pu64, *g_pu64Two;
96static RTUINT128U *g_pu128, *g_pu128Two;
97
98static char g_aszBuf[32][256];
99static unsigned g_idxBuf = 0;
100
101static uint32_t g_cIncludeTestPatterns;
102static uint32_t g_cExcludeTestPatterns;
103static const char *g_apszIncludeTestPatterns[64];
104static const char *g_apszExcludeTestPatterns[64];
105
106static unsigned g_cVerbosity = 0;
107
108
109/*********************************************************************************************************************************
110* Internal Functions *
111*********************************************************************************************************************************/
112static const char *FormatR80(PCRTFLOAT80U pr80);
113static const char *FormatR64(PCRTFLOAT64U pr64);
114static const char *FormatR32(PCRTFLOAT32U pr32);
115
116
117/*
118 * Random helpers.
119 */
120
121static uint32_t RandEFlags(void)
122{
123 uint32_t fEfl = RTRandU32();
124 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
125}
126
127#ifdef TSTIEMAIMPL_WITH_GENERATOR
128
129static uint8_t RandU8(void)
130{
131 return RTRandU32Ex(0, 0xff);
132}
133
134
135static uint16_t RandU16(void)
136{
137 return RTRandU32Ex(0, 0xffff);
138}
139
140
141static uint32_t RandU32(void)
142{
143 return RTRandU32();
144}
145
146#endif
147
148static uint64_t RandU64(void)
149{
150 return RTRandU64();
151}
152
153
154static RTUINT128U RandU128(void)
155{
156 RTUINT128U Ret;
157 Ret.s.Hi = RTRandU64();
158 Ret.s.Lo = RTRandU64();
159 return Ret;
160}
161
162#ifdef TSTIEMAIMPL_WITH_GENERATOR
163
164static uint8_t RandU8Dst(uint32_t iTest)
165{
166 if (iTest < g_cZeroDstTests)
167 return 0;
168 return RandU8();
169}
170
171
172static uint8_t RandU8Src(uint32_t iTest)
173{
174 if (iTest < g_cZeroSrcTests)
175 return 0;
176 return RandU8();
177}
178
179
180static uint16_t RandU16Dst(uint32_t iTest)
181{
182 if (iTest < g_cZeroDstTests)
183 return 0;
184 return RandU16();
185}
186
187
188static uint16_t RandU16Src(uint32_t iTest)
189{
190 if (iTest < g_cZeroSrcTests)
191 return 0;
192 return RandU16();
193}
194
195
196static uint32_t RandU32Dst(uint32_t iTest)
197{
198 if (iTest < g_cZeroDstTests)
199 return 0;
200 return RandU32();
201}
202
203
204static uint32_t RandU32Src(uint32_t iTest)
205{
206 if (iTest < g_cZeroSrcTests)
207 return 0;
208 return RandU32();
209}
210
211
212static uint64_t RandU64Dst(uint32_t iTest)
213{
214 if (iTest < g_cZeroDstTests)
215 return 0;
216 return RandU64();
217}
218
219
220static uint64_t RandU64Src(uint32_t iTest)
221{
222 if (iTest < g_cZeroSrcTests)
223 return 0;
224 return RandU64();
225}
226
227
228/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
229static int16_t RandI16Src2(uint32_t iTest)
230{
231 if (iTest < 18 * 4)
232 switch (iTest % 4)
233 {
234 case 0: return 0;
235 case 1: return INT16_MAX;
236 case 2: return INT16_MIN;
237 case 3: break;
238 }
239 return (int16_t)RandU16();
240}
241
242
243/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
244static int32_t RandI32Src2(uint32_t iTest)
245{
246 if (iTest < 18 * 4)
247 switch (iTest % 4)
248 {
249 case 0: return 0;
250 case 1: return INT32_MAX;
251 case 2: return INT32_MIN;
252 case 3: break;
253 }
254 return (int32_t)RandU32();
255}
256
257
258#if 0
259static int64_t RandI64Src(uint32_t iTest)
260{
261 RT_NOREF(iTest);
262 return (int64_t)RandU64();
263}
264#endif
265
266
267static uint16_t RandFcw(void)
268{
269 return RandU16() & ~X86_FCW_ZERO_MASK;
270}
271
272
273static uint16_t RandFsw(void)
274{
275 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
276 return RandU16();
277}
278
279
280static uint32_t RandMxcsr(void)
281{
282 return RandU32() & ~X86_MXCSR_ZERO_MASK;
283}
284
285
286static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
287{
288 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
289 pr80->sj64.uFraction >>= cShift;
290 else
291 pr80->sj64.uFraction = (cShift % 19) + 1;
292}
293
294
295
296static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
297{
298 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
299
300 RTFLOAT80U r80;
301 r80.au64[0] = RandU64();
302 r80.au16[4] = RandU16();
303
304 /*
305 * Adjust the random stuff according to bType.
306 */
307 bType &= 0x1f;
308 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
309 {
310 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
311 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
312 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
313 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
314 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
315 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
316 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
317 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
318 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
319 }
320 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
321 {
322 /* Denormals (4,5) and Pseudo denormals (6,7) */
323 if (bType & 1)
324 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
325 else if (r80.sj64.uFraction == 0 && bType < 6)
326 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
327 r80.sj64.uExponent = 0;
328 r80.sj64.fInteger = bType >= 6;
329 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
330 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
331 }
332 else if (bType == 8 || bType == 9)
333 {
334 /* Pseudo NaN. */
335 if (bType & 1)
336 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
337 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
338 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
339 r80.sj64.uExponent = 0x7fff;
340 if (r80.sj64.fInteger)
341 r80.sj64.uFraction |= RT_BIT_64(62);
342 else
343 r80.sj64.uFraction &= ~RT_BIT_64(62);
344 r80.sj64.fInteger = 0;
345 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
346 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
347 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
348 }
349 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
350 {
351 /* Quiet and signalling NaNs. */
352 if (bType & 1)
353 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
354 else if (r80.sj64.uFraction == 0)
355 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
356 r80.sj64.uExponent = 0x7fff;
357 if (bType < 12)
358 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
359 else
360 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
361 r80.sj64.fInteger = 1;
362 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
363 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
364 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
365 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
366 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
367 }
368 else if (bType == 14 || bType == 15)
369 {
370 /* Unnormals */
371 if (bType & 1)
372 SafeR80FractionShift(&r80, RandU8() % 62);
373 r80.sj64.fInteger = 0;
374 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
375 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
376 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
377 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
378 }
379 else if (bType < 26)
380 {
381 /* Make sure we have lots of normalized values. */
382 if (!fIntTarget)
383 {
384 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
385 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
386 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
387 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
388 r80.sj64.fInteger = 1;
389 if (r80.sj64.uExponent <= uMinExp)
390 r80.sj64.uExponent = uMinExp + 1;
391 else if (r80.sj64.uExponent >= uMaxExp)
392 r80.sj64.uExponent = uMaxExp - 1;
393
394 if (bType == 16)
395 { /* All 1s is useful to testing rounding. Also try trigger special
396 behaviour by sometimes rounding out of range, while we're at it. */
397 r80.sj64.uFraction = RT_BIT_64(63) - 1;
398 uint8_t bExp = RandU8();
399 if ((bExp & 3) == 0)
400 r80.sj64.uExponent = uMaxExp - 1;
401 else if ((bExp & 3) == 1)
402 r80.sj64.uExponent = uMinExp + 1;
403 else if ((bExp & 3) == 2)
404 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
405 }
406 }
407 else
408 {
409 /* integer target: */
410 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
411 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
412 r80.sj64.fInteger = 1;
413 if (r80.sj64.uExponent < uMinExp)
414 r80.sj64.uExponent = uMinExp;
415 else if (r80.sj64.uExponent > uMaxExp)
416 r80.sj64.uExponent = uMaxExp;
417
418 if (bType == 16)
419 { /* All 1s is useful to testing rounding. Also try trigger special
420 behaviour by sometimes rounding out of range, while we're at it. */
421 r80.sj64.uFraction = RT_BIT_64(63) - 1;
422 uint8_t bExp = RandU8();
423 if ((bExp & 3) == 0)
424 r80.sj64.uExponent = uMaxExp;
425 else if ((bExp & 3) == 1)
426 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
427 }
428 }
429
430 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
431 }
432 return r80;
433}
434
435
436static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
437{
438 /*
439 * Make it more likely that we get a good selection of special values.
440 */
441 return RandR80Ex(RandU8(), cTarget, fIntTarget);
442
443}
444
445
446static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
447{
448 /* Make sure we cover all the basic types first before going for random selection: */
449 if (iTest <= 18)
450 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
451 return RandR80(cTarget, fIntTarget);
452}
453
454
455/**
456 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
457 * to a 0..17, covering all basic value types.
458 */
459static uint8_t RandR80Src12RemapType(uint8_t bType)
460{
461 switch (bType)
462 {
463 case 0: return 18; /* normal */
464 case 1: return 16; /* normal extreme rounding */
465 case 2: return 14; /* unnormal */
466 case 3: return 12; /* Signalling NaN */
467 case 4: return 10; /* Quiet NaN */
468 case 5: return 8; /* PseudoNaN */
469 case 6: return 6; /* Pseudo Denormal */
470 case 7: return 4; /* Denormal */
471 case 8: return 3; /* Indefinite */
472 case 9: return 2; /* Infinity */
473 case 10: return 1; /* Pseudo-Infinity */
474 case 11: return 0; /* Zero */
475 default: AssertFailedReturn(18);
476 }
477}
478
479
480/**
481 * This works in tandem with RandR80Src2 to make sure we cover all operand
482 * type mixes first before we venture into regular random testing.
483 *
484 * There are 11 basic variations, when we leave out the five odd ones using
485 * SafeR80FractionShift. Because of the special normalized value targetting at
486 * rounding, we make it an even 12. So 144 combinations for two operands.
487 */
488static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
489{
490 if (cPartnerBits == 80)
491 {
492 Assert(!fPartnerInt);
493 if (iTest < 12 * 12)
494 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
495 }
496 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
497 {
498 if (iTest < 12 * 10)
499 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
500 }
501 else if (iTest < 18 * 4 && fPartnerInt)
502 return RandR80Ex(iTest / 4);
503 return RandR80();
504}
505
506
507/** Partner to RandR80Src1. */
508static RTFLOAT80U RandR80Src2(uint32_t iTest)
509{
510 if (iTest < 12 * 12)
511 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
512 return RandR80();
513}
514
515
516static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
517{
518 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
519 pr64->s64.uFraction >>= cShift;
520 else
521 pr64->s64.uFraction = (cShift % 19) + 1;
522}
523
524
525static RTFLOAT64U RandR64Ex(uint8_t bType)
526{
527 RTFLOAT64U r64;
528 r64.u = RandU64();
529
530 /*
531 * Make it more likely that we get a good selection of special values.
532 * On average 6 out of 16 calls should return a special value.
533 */
534 bType &= 0xf;
535 if (bType == 0 || bType == 1)
536 {
537 /* 0 or Infinity. We only keep fSign here. */
538 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
539 r64.s.uFractionHigh = 0;
540 r64.s.uFractionLow = 0;
541 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
542 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
543 }
544 else if (bType == 2 || bType == 3)
545 {
546 /* Subnormals */
547 if (bType == 3)
548 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
549 else if (r64.s64.uFraction == 0)
550 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
551 r64.s64.uExponent = 0;
552 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
553 }
554 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
555 {
556 /* NaNs */
557 if (bType & 1)
558 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
559 else if (r64.s64.uFraction == 0)
560 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
561 r64.s64.uExponent = 0x7ff;
562 if (bType < 6)
563 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
564 else
565 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
566 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
567 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
568 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
569 }
570 else if (bType < 12)
571 {
572 /* Make sure we have lots of normalized values. */
573 if (r64.s.uExponent == 0)
574 r64.s.uExponent = 1;
575 else if (r64.s.uExponent == 0x7ff)
576 r64.s.uExponent = 0x7fe;
577 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
578 }
579 return r64;
580}
581
582
583static RTFLOAT64U RandR64Src(uint32_t iTest)
584{
585 if (iTest < 16)
586 return RandR64Ex(iTest);
587 return RandR64Ex(RandU8());
588}
589
590
591/** Pairing with a 80-bit floating point arg. */
592static RTFLOAT64U RandR64Src2(uint32_t iTest)
593{
594 if (iTest < 12 * 10)
595 return RandR64Ex(9 - iTest % 10); /* start with normal values */
596 return RandR64Ex(RandU8());
597}
598
599
600static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
601{
602 if (pr32->s.uFraction >= RT_BIT_32(cShift))
603 pr32->s.uFraction >>= cShift;
604 else
605 pr32->s.uFraction = (cShift % 19) + 1;
606}
607
608
609static RTFLOAT32U RandR32Ex(uint8_t bType)
610{
611 RTFLOAT32U r32;
612 r32.u = RandU32();
613
614 /*
615 * Make it more likely that we get a good selection of special values.
616 * On average 6 out of 16 calls should return a special value.
617 */
618 bType &= 0xf;
619 if (bType == 0 || bType == 1)
620 {
621 /* 0 or Infinity. We only keep fSign here. */
622 r32.s.uExponent = bType == 0 ? 0 : 0xff;
623 r32.s.uFraction = 0;
624 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
625 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
626 }
627 else if (bType == 2 || bType == 3)
628 {
629 /* Subnormals */
630 if (bType == 3)
631 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
632 else if (r32.s.uFraction == 0)
633 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
634 r32.s.uExponent = 0;
635 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
636 }
637 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
638 {
639 /* NaNs */
640 if (bType & 1)
641 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
642 else if (r32.s.uFraction == 0)
643 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
644 r32.s.uExponent = 0xff;
645 if (bType < 6)
646 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
647 else
648 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
649 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
650 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
651 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
652 }
653 else if (bType < 12)
654 {
655 /* Make sure we have lots of normalized values. */
656 if (r32.s.uExponent == 0)
657 r32.s.uExponent = 1;
658 else if (r32.s.uExponent == 0xff)
659 r32.s.uExponent = 0xfe;
660 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
661 }
662 return r32;
663}
664
665
666static RTFLOAT32U RandR32Src(uint32_t iTest)
667{
668 if (iTest < 16)
669 return RandR32Ex(iTest);
670 return RandR32Ex(RandU8());
671}
672
673
674/** Pairing with a 80-bit floating point arg. */
675static RTFLOAT32U RandR32Src2(uint32_t iTest)
676{
677 if (iTest < 12 * 10)
678 return RandR32Ex(9 - iTest % 10); /* start with normal values */
679 return RandR32Ex(RandU8());
680}
681
682
683static RTPBCD80U RandD80Src(uint32_t iTest)
684{
685 if (iTest < 3)
686 {
687 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
688 return d80Zero;
689 }
690 if (iTest < 5)
691 {
692 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
693 return d80Ind;
694 }
695
696 RTPBCD80U d80;
697 uint8_t b = RandU8();
698 d80.s.fSign = b & 1;
699
700 if ((iTest & 7) >= 6)
701 {
702 /* Illegal */
703 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
704 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
705 d80.s.abPairs[iPair] = RandU8();
706 }
707 else
708 {
709 /* Normal */
710 d80.s.uPad = 0;
711 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
712 {
713 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
714 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
715 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
716 }
717 }
718 return d80;
719}
720
721
722const char *GenFormatR80(PCRTFLOAT80U plrd)
723{
724 if (RTFLOAT80U_IS_ZERO(plrd))
725 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
726 if (RTFLOAT80U_IS_INF(plrd))
727 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
728 if (RTFLOAT80U_IS_INDEFINITE(plrd))
729 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
730 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
731 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
732 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
733 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
734
735 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
736 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
737 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
738 return pszBuf;
739}
740
741const char *GenFormatR64(PCRTFLOAT64U prd)
742{
743 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
744 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
745 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
746 return pszBuf;
747}
748
749
750const char *GenFormatR32(PCRTFLOAT32U pr)
751{
752 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
753 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
754 return pszBuf;
755}
756
757
758const char *GenFormatD80(PCRTPBCD80U pd80)
759{
760 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
761 size_t off;
762 if (pd80->s.uPad == 0)
763 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
764 else
765 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
766 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
767 while (iPair-- > 0)
768 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
769 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
770 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
771 pszBuf[off++] = ')';
772 pszBuf[off++] = '\0';
773 return pszBuf;
774}
775
776
777const char *GenFormatI64(int64_t i64)
778{
779 if (i64 == INT64_MIN) /* This one is problematic */
780 return "INT64_MIN";
781 if (i64 == INT64_MAX)
782 return "INT64_MAX";
783 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
784 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
785 return pszBuf;
786}
787
788
789const char *GenFormatI64(int64_t const *pi64)
790{
791 return GenFormatI64(*pi64);
792}
793
794
795const char *GenFormatI32(int32_t i32)
796{
797 if (i32 == INT32_MIN) /* This one is problematic */
798 return "INT32_MIN";
799 if (i32 == INT32_MAX)
800 return "INT32_MAX";
801 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
802 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
803 return pszBuf;
804}
805
806
807const char *GenFormatI32(int32_t const *pi32)
808{
809 return GenFormatI32(*pi32);
810}
811
812
813const char *GenFormatI16(int16_t i16)
814{
815 if (i16 == INT16_MIN) /* This one is problematic */
816 return "INT16_MIN";
817 if (i16 == INT16_MAX)
818 return "INT16_MAX";
819 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
820 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
821 return pszBuf;
822}
823
824
825const char *GenFormatI16(int16_t const *pi16)
826{
827 return GenFormatI16(*pi16);
828}
829
830
831static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
832{
833 /* We want to tag the generated source code with the revision that produced it. */
834 static char s_szRev[] = "$Revision: 96348 $";
835 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
836 size_t cchRev = 0;
837 while (RT_C_IS_DIGIT(pszRev[cchRev]))
838 cchRev++;
839
840 RTStrmPrintf(pOut,
841 "/* $Id: tstIEMAImpl.cpp 96348 2022-08-19 17:00:45Z vboxsync $ */\n"
842 "/** @file\n"
843 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
844 " */\n"
845 "\n"
846 "/*\n"
847 " * Copyright (C) 2022 Oracle Corporation\n"
848 " *\n"
849 " * This file is part of VirtualBox Open Source Edition (OSE), as\n"
850 " * available from http://www.virtualbox.org. This file is free software;\n"
851 " * you can redistribute it and/or modify it under the terms of the GNU\n"
852 " * General Public License (GPL) as published by the Free Software\n"
853 " * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
854 " * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
855 " * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
856 " */\n"
857 "\n"
858 "#include \"tstIEMAImpl.h\"\n"
859 "\n"
860 ,
861 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
862}
863
864
865static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
866{
867 PRTSTREAM pOut = NULL;
868 int rc = RTStrmOpen(pszFilename, "w", &pOut);
869 if (RT_SUCCESS(rc))
870 {
871 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
872 return pOut;
873 }
874 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
875 return NULL;
876}
877
878
879static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
880{
881 RTStrmPrintf(pOut,
882 "\n"
883 "/* end of file */\n");
884 int rc = RTStrmClose(pOut);
885 if (RT_SUCCESS(rc))
886 return rcExit;
887 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
888}
889
890
891static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
892{
893 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
894}
895
896
897static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
898{
899 RTStrmPrintf(pOut,
900 "};\n"
901 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
902 "\n",
903 pszName, pszName);
904}
905
906#endif /* TSTIEMAIMPL_WITH_GENERATOR */
907
908
909/*
910 * Test helpers.
911 */
912static bool IsTestEnabled(const char *pszName)
913{
914 /* Process excludes first: */
915 uint32_t i = g_cExcludeTestPatterns;
916 while (i-- > 0)
917 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
918 return false;
919
920 /* If no include patterns, everything is included: */
921 i = g_cIncludeTestPatterns;
922 if (!i)
923 return true;
924
925 /* Otherwise only tests in the include patters gets tested: */
926 while (i-- > 0)
927 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
928 return true;
929
930 return false;
931}
932
933
934static bool SubTestAndCheckIfEnabled(const char *pszName)
935{
936 RTTestSub(g_hTest, pszName);
937 if (IsTestEnabled(pszName))
938 return true;
939 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
940 return false;
941}
942
943
944static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
945{
946 if (fActual == fExpected)
947 return "";
948
949 uint32_t const fXor = fActual ^ fExpected;
950 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
951 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
952
953 static struct
954 {
955 const char *pszName;
956 uint32_t fFlag;
957 } const s_aFlags[] =
958 {
959#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
960 EFL_ENTRY(CF),
961 EFL_ENTRY(PF),
962 EFL_ENTRY(AF),
963 EFL_ENTRY(ZF),
964 EFL_ENTRY(SF),
965 EFL_ENTRY(TF),
966 EFL_ENTRY(IF),
967 EFL_ENTRY(DF),
968 EFL_ENTRY(OF),
969 EFL_ENTRY(IOPL),
970 EFL_ENTRY(NT),
971 EFL_ENTRY(RF),
972 EFL_ENTRY(VM),
973 EFL_ENTRY(AC),
974 EFL_ENTRY(VIF),
975 EFL_ENTRY(VIP),
976 EFL_ENTRY(ID),
977 };
978 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
979 if (s_aFlags[i].fFlag & fXor)
980 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
981 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
982 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
983 return pszBuf;
984}
985
986
987static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
988{
989 if (fActual == fExpected)
990 return "";
991
992 uint16_t const fXor = fActual ^ fExpected;
993 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
994 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
995
996 static struct
997 {
998 const char *pszName;
999 uint32_t fFlag;
1000 } const s_aFlags[] =
1001 {
1002#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1003 FSW_ENTRY(IE),
1004 FSW_ENTRY(DE),
1005 FSW_ENTRY(ZE),
1006 FSW_ENTRY(OE),
1007 FSW_ENTRY(UE),
1008 FSW_ENTRY(PE),
1009 FSW_ENTRY(SF),
1010 FSW_ENTRY(ES),
1011 FSW_ENTRY(C0),
1012 FSW_ENTRY(C1),
1013 FSW_ENTRY(C2),
1014 FSW_ENTRY(C3),
1015 FSW_ENTRY(B),
1016 };
1017 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1018 if (s_aFlags[i].fFlag & fXor)
1019 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1020 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1021 if (fXor & X86_FSW_TOP_MASK)
1022 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1023 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1024#if 0 /* For debugging fprem & fprem1 */
1025 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1026 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1027#endif
1028 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1029 return pszBuf;
1030}
1031
1032
1033static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1034{
1035 if (fActual == fExpected)
1036 return "";
1037
1038 uint16_t const fXor = fActual ^ fExpected;
1039 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1040 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1041
1042 static struct
1043 {
1044 const char *pszName;
1045 uint32_t fFlag;
1046 } const s_aFlags[] =
1047 {
1048#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1049 MXCSR_ENTRY(IE),
1050 MXCSR_ENTRY(DE),
1051 MXCSR_ENTRY(ZE),
1052 MXCSR_ENTRY(OE),
1053 MXCSR_ENTRY(UE),
1054 MXCSR_ENTRY(PE),
1055
1056 MXCSR_ENTRY(IM),
1057 MXCSR_ENTRY(DM),
1058 MXCSR_ENTRY(ZM),
1059 MXCSR_ENTRY(OM),
1060 MXCSR_ENTRY(UM),
1061 MXCSR_ENTRY(PM),
1062
1063 MXCSR_ENTRY(DAZ),
1064 MXCSR_ENTRY(FZ),
1065#undef MXCSR_ENTRY
1066 };
1067 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1068 if (s_aFlags[i].fFlag & fXor)
1069 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1070 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1071 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1072 return pszBuf;
1073}
1074
1075
1076static const char *FormatFcw(uint16_t fFcw)
1077{
1078 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1079
1080 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1081 switch (fFcw & X86_FCW_PC_MASK)
1082 {
1083 case X86_FCW_PC_24: pszPC = "PC24"; break;
1084 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1085 case X86_FCW_PC_53: pszPC = "PC53"; break;
1086 case X86_FCW_PC_64: pszPC = "PC64"; break;
1087 }
1088
1089 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1090 switch (fFcw & X86_FCW_RC_MASK)
1091 {
1092 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1093 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1094 case X86_FCW_RC_UP: pszRC = "UP"; break;
1095 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1096 }
1097 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1098
1099 static struct
1100 {
1101 const char *pszName;
1102 uint32_t fFlag;
1103 } const s_aFlags[] =
1104 {
1105#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1106 FCW_ENTRY(IM),
1107 FCW_ENTRY(DM),
1108 FCW_ENTRY(ZM),
1109 FCW_ENTRY(OM),
1110 FCW_ENTRY(UM),
1111 FCW_ENTRY(PM),
1112 { "6M", 64 },
1113 };
1114 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1115 if (fFcw & s_aFlags[i].fFlag)
1116 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1117
1118 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1119 return pszBuf;
1120}
1121
1122
1123static const char *FormatMxcsr(uint32_t fMxcsr)
1124{
1125 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1126
1127 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1128 switch (fMxcsr & X86_MXCSR_RC_MASK)
1129 {
1130 case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1131 case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1132 case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1133 case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1134 }
1135
1136 const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1137 const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1138 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1139
1140 static struct
1141 {
1142 const char *pszName;
1143 uint32_t fFlag;
1144 } const s_aFlags[] =
1145 {
1146#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1147 MXCSR_ENTRY(IE),
1148 MXCSR_ENTRY(DE),
1149 MXCSR_ENTRY(ZE),
1150 MXCSR_ENTRY(OE),
1151 MXCSR_ENTRY(UE),
1152 MXCSR_ENTRY(PE),
1153
1154 MXCSR_ENTRY(IM),
1155 MXCSR_ENTRY(DM),
1156 MXCSR_ENTRY(ZM),
1157 MXCSR_ENTRY(OM),
1158 MXCSR_ENTRY(UM),
1159 MXCSR_ENTRY(PM),
1160 { "6M", 64 },
1161 };
1162 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1163 if (fMxcsr & s_aFlags[i].fFlag)
1164 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1165
1166 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1167 return pszBuf;
1168}
1169
1170
1171static const char *FormatR80(PCRTFLOAT80U pr80)
1172{
1173 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1174 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1175 return pszBuf;
1176}
1177
1178
1179static const char *FormatR64(PCRTFLOAT64U pr64)
1180{
1181 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1182 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1183 return pszBuf;
1184}
1185
1186
1187static const char *FormatR32(PCRTFLOAT32U pr32)
1188{
1189 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1190 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1191 return pszBuf;
1192}
1193
1194
1195static const char *FormatD80(PCRTPBCD80U pd80)
1196{
1197 /* There is only one indefinite endcoding (same as for 80-bit
1198 floating point), so get it out of the way first: */
1199 if (RTPBCD80U_IS_INDEFINITE(pd80))
1200 return "Ind";
1201
1202 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1203 size_t off = 0;
1204 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1205 unsigned cBadDigits = 0;
1206 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1207 while (iPair-- > 0)
1208 {
1209 static const char s_szDigits[] = "0123456789abcdef";
1210 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1211 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1212 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1213 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1214 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1215 }
1216 if (cBadDigits || pd80->s.uPad != 0)
1217 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1218 pszBuf[off] = '\0';
1219 return pszBuf;
1220}
1221
1222
1223#if 0
1224static const char *FormatI64(int64_t const *piVal)
1225{
1226 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1227 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1228 return pszBuf;
1229}
1230#endif
1231
1232
1233static const char *FormatI32(int32_t const *piVal)
1234{
1235 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1236 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1237 return pszBuf;
1238}
1239
1240
1241static const char *FormatI16(int16_t const *piVal)
1242{
1243 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1244 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1245 return pszBuf;
1246}
1247
1248
1249/*
1250 * Binary operations.
1251 */
1252TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1253TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1254TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1255TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1256
1257#ifdef TSTIEMAIMPL_WITH_GENERATOR
1258# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1259static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1260{ \
1261 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1262 { \
1263 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1264 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1265 PRTSTREAM pOutFn = pOut; \
1266 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1267 { \
1268 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1269 continue; \
1270 pOutFn = pOutCpu; \
1271 } \
1272 \
1273 GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1274 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1275 { \
1276 a_TestType Test; \
1277 Test.fEflIn = RandEFlags(); \
1278 Test.fEflOut = Test.fEflIn; \
1279 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1280 Test.uDstOut = Test.uDstIn; \
1281 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1282 if (g_aBinU ## a_cBits[iFn].uExtra) \
1283 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1284 Test.uMisc = 0; \
1285 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1286 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1287 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1288 } \
1289 GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1290 } \
1291}
1292#else
1293# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1294#endif
1295
1296#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1297GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1298\
1299static void BinU ## a_cBits ## Test(void) \
1300{ \
1301 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1302 { \
1303 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1304 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1305 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1306 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1307 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1308 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1309 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1310 { \
1311 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1312 { \
1313 uint32_t fEfl = paTests[iTest].fEflIn; \
1314 a_uType uDst = paTests[iTest].uDstIn; \
1315 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1316 if ( uDst != paTests[iTest].uDstOut \
1317 || fEfl != paTests[iTest].fEflOut) \
1318 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1319 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1320 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1321 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1322 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1323 else \
1324 { \
1325 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1326 *g_pfEfl = paTests[iTest].fEflIn; \
1327 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1328 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1329 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1330 } \
1331 } \
1332 pfn = a_aSubTests[iFn].pfnNative; \
1333 } \
1334 } \
1335}
1336
1337
1338/*
1339 * 8-bit binary operations.
1340 */
1341static const BINU8_T g_aBinU8[] =
1342{
1343 ENTRY(add_u8),
1344 ENTRY(add_u8_locked),
1345 ENTRY(adc_u8),
1346 ENTRY(adc_u8_locked),
1347 ENTRY(sub_u8),
1348 ENTRY(sub_u8_locked),
1349 ENTRY(sbb_u8),
1350 ENTRY(sbb_u8_locked),
1351 ENTRY(or_u8),
1352 ENTRY(or_u8_locked),
1353 ENTRY(xor_u8),
1354 ENTRY(xor_u8_locked),
1355 ENTRY(and_u8),
1356 ENTRY(and_u8_locked),
1357 ENTRY(cmp_u8),
1358 ENTRY(test_u8),
1359};
1360TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1361
1362
1363/*
1364 * 16-bit binary operations.
1365 */
1366static const BINU16_T g_aBinU16[] =
1367{
1368 ENTRY(add_u16),
1369 ENTRY(add_u16_locked),
1370 ENTRY(adc_u16),
1371 ENTRY(adc_u16_locked),
1372 ENTRY(sub_u16),
1373 ENTRY(sub_u16_locked),
1374 ENTRY(sbb_u16),
1375 ENTRY(sbb_u16_locked),
1376 ENTRY(or_u16),
1377 ENTRY(or_u16_locked),
1378 ENTRY(xor_u16),
1379 ENTRY(xor_u16_locked),
1380 ENTRY(and_u16),
1381 ENTRY(and_u16_locked),
1382 ENTRY(cmp_u16),
1383 ENTRY(test_u16),
1384 ENTRY_EX(bt_u16, 1),
1385 ENTRY_EX(btc_u16, 1),
1386 ENTRY_EX(btc_u16_locked, 1),
1387 ENTRY_EX(btr_u16, 1),
1388 ENTRY_EX(btr_u16_locked, 1),
1389 ENTRY_EX(bts_u16, 1),
1390 ENTRY_EX(bts_u16_locked, 1),
1391 ENTRY_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1392 ENTRY_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1393 ENTRY_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1394 ENTRY_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1395 ENTRY_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1396 ENTRY_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1397 ENTRY(arpl),
1398};
1399TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1400
1401
1402/*
1403 * 32-bit binary operations.
1404 */
1405static const BINU32_T g_aBinU32[] =
1406{
1407 ENTRY(add_u32),
1408 ENTRY(add_u32_locked),
1409 ENTRY(adc_u32),
1410 ENTRY(adc_u32_locked),
1411 ENTRY(sub_u32),
1412 ENTRY(sub_u32_locked),
1413 ENTRY(sbb_u32),
1414 ENTRY(sbb_u32_locked),
1415 ENTRY(or_u32),
1416 ENTRY(or_u32_locked),
1417 ENTRY(xor_u32),
1418 ENTRY(xor_u32_locked),
1419 ENTRY(and_u32),
1420 ENTRY(and_u32_locked),
1421 ENTRY(cmp_u32),
1422 ENTRY(test_u32),
1423 ENTRY_EX(bt_u32, 1),
1424 ENTRY_EX(btc_u32, 1),
1425 ENTRY_EX(btc_u32_locked, 1),
1426 ENTRY_EX(btr_u32, 1),
1427 ENTRY_EX(btr_u32_locked, 1),
1428 ENTRY_EX(bts_u32, 1),
1429 ENTRY_EX(bts_u32_locked, 1),
1430 ENTRY_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1431 ENTRY_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1432 ENTRY_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1433 ENTRY_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1434 ENTRY_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1435 ENTRY_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1436};
1437TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1438
1439
1440/*
1441 * 64-bit binary operations.
1442 */
1443static const BINU64_T g_aBinU64[] =
1444{
1445 ENTRY(add_u64),
1446 ENTRY(add_u64_locked),
1447 ENTRY(adc_u64),
1448 ENTRY(adc_u64_locked),
1449 ENTRY(sub_u64),
1450 ENTRY(sub_u64_locked),
1451 ENTRY(sbb_u64),
1452 ENTRY(sbb_u64_locked),
1453 ENTRY(or_u64),
1454 ENTRY(or_u64_locked),
1455 ENTRY(xor_u64),
1456 ENTRY(xor_u64_locked),
1457 ENTRY(and_u64),
1458 ENTRY(and_u64_locked),
1459 ENTRY(cmp_u64),
1460 ENTRY(test_u64),
1461 ENTRY_EX(bt_u64, 1),
1462 ENTRY_EX(btc_u64, 1),
1463 ENTRY_EX(btc_u64_locked, 1),
1464 ENTRY_EX(btr_u64, 1),
1465 ENTRY_EX(btr_u64_locked, 1),
1466 ENTRY_EX(bts_u64, 1),
1467 ENTRY_EX(bts_u64_locked, 1),
1468 ENTRY_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1469 ENTRY_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1470 ENTRY_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1471 ENTRY_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1472 ENTRY_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1473 ENTRY_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1474};
1475TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1476
1477
1478/*
1479 * XCHG
1480 */
1481static void XchgTest(void)
1482{
1483 if (!SubTestAndCheckIfEnabled("xchg"))
1484 return;
1485 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1486 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1487 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1488 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1489
1490 static struct
1491 {
1492 uint8_t cb; uint64_t fMask;
1493 union
1494 {
1495 uintptr_t pfn;
1496 FNIEMAIMPLXCHGU8 *pfnU8;
1497 FNIEMAIMPLXCHGU16 *pfnU16;
1498 FNIEMAIMPLXCHGU32 *pfnU32;
1499 FNIEMAIMPLXCHGU64 *pfnU64;
1500 } u;
1501 }
1502 s_aXchgWorkers[] =
1503 {
1504 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1505 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1506 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1507 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1508 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1509 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1510 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1511 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1512 };
1513 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1514 {
1515 RTUINT64U uIn1, uIn2, uMem, uDst;
1516 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1517 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1518 if (uIn1.u == uIn2.u)
1519 uDst.u = uIn2.u = ~uIn2.u;
1520
1521 switch (s_aXchgWorkers[i].cb)
1522 {
1523 case 1:
1524 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1525 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1526 break;
1527 case 2:
1528 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1529 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1530 break;
1531 case 4:
1532 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1533 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1534 break;
1535 case 8:
1536 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1537 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1538 break;
1539 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1540 }
1541
1542 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1543 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1544 }
1545}
1546
1547
1548/*
1549 * XADD
1550 */
1551static void XaddTest(void)
1552{
1553#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1554 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1555 static struct \
1556 { \
1557 const char *pszName; \
1558 FNIEMAIMPLXADDU ## a_cBits *pfn; \
1559 BINU ## a_cBits ## _TEST_T const *paTests; \
1560 uint32_t const *pcTests; \
1561 } const s_aFuncs[] = \
1562 { \
1563 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1564 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1565 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1566 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1567 }; \
1568 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1569 { \
1570 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1571 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1572 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1573 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1574 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1575 { \
1576 uint32_t fEfl = paTests[iTest].fEflIn; \
1577 a_Type uSrc = paTests[iTest].uSrcIn; \
1578 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1579 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1580 if ( fEfl != paTests[iTest].fEflOut \
1581 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
1582 || uSrc != paTests[iTest].uDstIn) \
1583 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1584 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1585 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1586 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1587 } \
1588 } \
1589 } while(0)
1590 TEST_XADD(8, uint8_t, "%#04x");
1591 TEST_XADD(16, uint16_t, "%#06x");
1592 TEST_XADD(32, uint32_t, "%#010RX32");
1593 TEST_XADD(64, uint64_t, "%#010RX64");
1594}
1595
1596
1597/*
1598 * CMPXCHG
1599 */
1600
1601static void CmpXchgTest(void)
1602{
1603#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1604 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
1605 static struct \
1606 { \
1607 const char *pszName; \
1608 FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1609 PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1610 BINU ## a_cBits ## _TEST_T const *paTests; \
1611 uint32_t const *pcTests; \
1612 } const s_aFuncs[] = \
1613 { \
1614 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1615 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1616 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1617 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1618 }; \
1619 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1620 { \
1621 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1622 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1623 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1624 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1625 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1626 { \
1627 /* as is (99% likely to be negative). */ \
1628 uint32_t fEfl = paTests[iTest].fEflIn; \
1629 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1630 a_Type uA = paTests[iTest].uDstIn; \
1631 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1632 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1633 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1634 if ( fEfl != paTests[iTest].fEflOut \
1635 || *g_pu ## a_cBits != uExpect \
1636 || uA != paTests[iTest].uSrcIn) \
1637 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1638 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1639 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1640 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1641 /* positive */ \
1642 uint32_t fEflExpect = paTests[iTest].fEflIn; \
1643 uA = paTests[iTest].uDstIn; \
1644 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1645 fEfl = paTests[iTest].fEflIn; \
1646 uA = paTests[iTest].uDstIn; \
1647 *g_pu ## a_cBits = uA; \
1648 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1649 if ( fEfl != fEflExpect \
1650 || *g_pu ## a_cBits != uNew \
1651 || uA != paTests[iTest].uDstIn) \
1652 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1653 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1654 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1655 EFlagsDiff(fEfl, fEflExpect)); \
1656 } \
1657 } \
1658 } while(0)
1659 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1660 TEST_CMPXCHG(16, uint16_t, "%#06x");
1661 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1662#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1663 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1664#endif
1665}
1666
1667static void CmpXchg8bTest(void)
1668{
1669 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
1670 static struct
1671 {
1672 const char *pszName;
1673 FNIEMAIMPLCMPXCHG8B *pfn;
1674 } const s_aFuncs[] =
1675 {
1676 { "cmpxchg8b", iemAImpl_cmpxchg8b },
1677 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1678 };
1679 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1680 {
1681 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1682 continue;
1683 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1684 {
1685 uint64_t const uOldValue = RandU64();
1686 uint64_t const uNewValue = RandU64();
1687
1688 /* positive test. */
1689 RTUINT64U uA, uB;
1690 uB.u = uNewValue;
1691 uA.u = uOldValue;
1692 *g_pu64 = uOldValue;
1693 uint32_t fEflIn = RandEFlags();
1694 uint32_t fEfl = fEflIn;
1695 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1696 if ( fEfl != (fEflIn | X86_EFL_ZF)
1697 || *g_pu64 != uNewValue
1698 || uA.u != uOldValue)
1699 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1700 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1701 fEfl, *g_pu64, uA.u,
1702 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1703 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1704
1705 /* negative */
1706 uint64_t const uExpect = ~uOldValue;
1707 *g_pu64 = uExpect;
1708 uA.u = uOldValue;
1709 uB.u = uNewValue;
1710 fEfl = fEflIn = RandEFlags();
1711 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1712 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1713 || *g_pu64 != uExpect
1714 || uA.u != uExpect)
1715 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1716 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1717 fEfl, *g_pu64, uA.u,
1718 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1719 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1720 }
1721 }
1722}
1723
1724static void CmpXchg16bTest(void)
1725{
1726 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1727 static struct
1728 {
1729 const char *pszName;
1730 FNIEMAIMPLCMPXCHG16B *pfn;
1731 } const s_aFuncs[] =
1732 {
1733 { "cmpxchg16b", iemAImpl_cmpxchg16b },
1734 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1735#if !defined(RT_ARCH_ARM64)
1736 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1737#endif
1738 };
1739 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1740 {
1741 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1742 continue;
1743#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1744 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1745 {
1746 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1747 continue;
1748 }
1749#endif
1750 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1751 {
1752 RTUINT128U const uOldValue = RandU128();
1753 RTUINT128U const uNewValue = RandU128();
1754
1755 /* positive test. */
1756 RTUINT128U uA, uB;
1757 uB = uNewValue;
1758 uA = uOldValue;
1759 *g_pu128 = uOldValue;
1760 uint32_t fEflIn = RandEFlags();
1761 uint32_t fEfl = fEflIn;
1762 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1763 if ( fEfl != (fEflIn | X86_EFL_ZF)
1764 || g_pu128->s.Lo != uNewValue.s.Lo
1765 || g_pu128->s.Hi != uNewValue.s.Hi
1766 || uA.s.Lo != uOldValue.s.Lo
1767 || uA.s.Hi != uOldValue.s.Hi)
1768 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1769 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1770 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1771 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1772 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1773 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1774 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1775 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1776
1777 /* negative */
1778 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1779 *g_pu128 = uExpect;
1780 uA = uOldValue;
1781 uB = uNewValue;
1782 fEfl = fEflIn = RandEFlags();
1783 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1784 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1785 || g_pu128->s.Lo != uExpect.s.Lo
1786 || g_pu128->s.Hi != uExpect.s.Hi
1787 || uA.s.Lo != uExpect.s.Lo
1788 || uA.s.Hi != uExpect.s.Hi)
1789 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1790 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1791 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1792 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1793 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1794 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1795 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1796 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1797 }
1798 }
1799}
1800
1801
1802/*
1803 * Double shifts.
1804 *
1805 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1806 */
1807#ifdef TSTIEMAIMPL_WITH_GENERATOR
1808# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1809void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1810{ \
1811 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1812 { \
1813 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1814 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1815 continue; \
1816 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1817 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1818 { \
1819 a_TestType Test; \
1820 Test.fEflIn = RandEFlags(); \
1821 Test.fEflOut = Test.fEflIn; \
1822 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1823 Test.uDstOut = Test.uDstIn; \
1824 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1825 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1826 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1827 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1828 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1829 } \
1830 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1831 } \
1832}
1833#else
1834# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1835#endif
1836
1837#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1838TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1839\
1840static a_SubTestType const a_aSubTests[] = \
1841{ \
1842 ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1843 ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1844 ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1845 ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1846}; \
1847\
1848GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1849\
1850static void ShiftDblU ## a_cBits ## Test(void) \
1851{ \
1852 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1853 { \
1854 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1855 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1856 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1857 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1858 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1859 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1860 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1861 { \
1862 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1863 { \
1864 uint32_t fEfl = paTests[iTest].fEflIn; \
1865 a_Type uDst = paTests[iTest].uDstIn; \
1866 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1867 if ( uDst != paTests[iTest].uDstOut \
1868 || fEfl != paTests[iTest].fEflOut) \
1869 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1870 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1871 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1872 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1873 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1874 else \
1875 { \
1876 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1877 *g_pfEfl = paTests[iTest].fEflIn; \
1878 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1879 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1880 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1881 } \
1882 } \
1883 pfn = a_aSubTests[iFn].pfnNative; \
1884 } \
1885 } \
1886}
1887TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1888TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1889TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1890
1891#ifdef TSTIEMAIMPL_WITH_GENERATOR
1892static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1893{
1894 ShiftDblU16Generate(pOut, cTests);
1895 ShiftDblU32Generate(pOut, cTests);
1896 ShiftDblU64Generate(pOut, cTests);
1897}
1898#endif
1899
1900static void ShiftDblTest(void)
1901{
1902 ShiftDblU16Test();
1903 ShiftDblU32Test();
1904 ShiftDblU64Test();
1905}
1906
1907
1908/*
1909 * Unary operators.
1910 *
1911 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1912 */
1913#ifdef TSTIEMAIMPL_WITH_GENERATOR
1914# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1915void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1916{ \
1917 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1918 { \
1919 GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1920 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1921 { \
1922 a_TestType Test; \
1923 Test.fEflIn = RandEFlags(); \
1924 Test.fEflOut = Test.fEflIn; \
1925 Test.uDstIn = RandU ## a_cBits(); \
1926 Test.uDstOut = Test.uDstIn; \
1927 Test.uSrcIn = 0; \
1928 Test.uMisc = 0; \
1929 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1930 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1931 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1932 } \
1933 GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1934 } \
1935}
1936#else
1937# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1938#endif
1939
1940#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1941TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1942static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1943{ \
1944 ENTRY(inc_u ## a_cBits), \
1945 ENTRY(inc_u ## a_cBits ## _locked), \
1946 ENTRY(dec_u ## a_cBits), \
1947 ENTRY(dec_u ## a_cBits ## _locked), \
1948 ENTRY(not_u ## a_cBits), \
1949 ENTRY(not_u ## a_cBits ## _locked), \
1950 ENTRY(neg_u ## a_cBits), \
1951 ENTRY(neg_u ## a_cBits ## _locked), \
1952}; \
1953\
1954GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1955\
1956static void UnaryU ## a_cBits ## Test(void) \
1957{ \
1958 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1959 { \
1960 if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
1961 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1962 uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1963 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1964 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1965 { \
1966 uint32_t fEfl = paTests[iTest].fEflIn; \
1967 a_Type uDst = paTests[iTest].uDstIn; \
1968 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
1969 if ( uDst != paTests[iTest].uDstOut \
1970 || fEfl != paTests[iTest].fEflOut) \
1971 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1972 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
1973 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1974 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1975 else \
1976 { \
1977 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1978 *g_pfEfl = paTests[iTest].fEflIn; \
1979 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
1980 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1981 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1982 } \
1983 } \
1984 } \
1985}
1986TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
1987TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
1988TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
1989TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
1990
1991#ifdef TSTIEMAIMPL_WITH_GENERATOR
1992static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
1993{
1994 UnaryU8Generate(pOut, cTests);
1995 UnaryU16Generate(pOut, cTests);
1996 UnaryU32Generate(pOut, cTests);
1997 UnaryU64Generate(pOut, cTests);
1998}
1999#endif
2000
2001static void UnaryTest(void)
2002{
2003 UnaryU8Test();
2004 UnaryU16Test();
2005 UnaryU32Test();
2006 UnaryU64Test();
2007}
2008
2009
2010/*
2011 * Shifts.
2012 *
2013 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2014 */
2015#ifdef TSTIEMAIMPL_WITH_GENERATOR
2016# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2017void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2018{ \
2019 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2020 { \
2021 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2022 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2023 continue; \
2024 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2025 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2026 { \
2027 a_TestType Test; \
2028 Test.fEflIn = RandEFlags(); \
2029 Test.fEflOut = Test.fEflIn; \
2030 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2031 Test.uDstOut = Test.uDstIn; \
2032 Test.uSrcIn = 0; \
2033 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2034 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2035 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
2036 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2037 \
2038 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
2039 Test.fEflOut = Test.fEflIn; \
2040 Test.uDstOut = Test.uDstIn; \
2041 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2042 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
2043 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2044 } \
2045 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2046 } \
2047}
2048#else
2049# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2050#endif
2051
2052#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2053TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2054static a_SubTestType const a_aSubTests[] = \
2055{ \
2056 ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2057 ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2058 ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2059 ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2060 ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2061 ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2062 ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2063 ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2064 ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2065 ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2066 ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2067 ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2068 ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2069 ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2070}; \
2071\
2072GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2073\
2074static void ShiftU ## a_cBits ## Test(void) \
2075{ \
2076 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2077 { \
2078 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2079 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2080 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2081 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2082 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2083 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2084 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2085 { \
2086 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2087 { \
2088 uint32_t fEfl = paTests[iTest].fEflIn; \
2089 a_Type uDst = paTests[iTest].uDstIn; \
2090 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2091 if ( uDst != paTests[iTest].uDstOut \
2092 || fEfl != paTests[iTest].fEflOut ) \
2093 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2094 iTest, iVar == 0 ? "" : "/n", \
2095 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2096 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2097 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2098 else \
2099 { \
2100 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2101 *g_pfEfl = paTests[iTest].fEflIn; \
2102 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2103 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2104 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2105 } \
2106 } \
2107 pfn = a_aSubTests[iFn].pfnNative; \
2108 } \
2109 } \
2110}
2111TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2112TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2113TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2114TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2115
2116#ifdef TSTIEMAIMPL_WITH_GENERATOR
2117static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2118{
2119 ShiftU8Generate(pOut, cTests);
2120 ShiftU16Generate(pOut, cTests);
2121 ShiftU32Generate(pOut, cTests);
2122 ShiftU64Generate(pOut, cTests);
2123}
2124#endif
2125
2126static void ShiftTest(void)
2127{
2128 ShiftU8Test();
2129 ShiftU16Test();
2130 ShiftU32Test();
2131 ShiftU64Test();
2132}
2133
2134
2135/*
2136 * Multiplication and division.
2137 *
2138 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2139 * Note! Currently ignoring undefined bits.
2140 */
2141
2142/* U8 */
2143TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2144static INT_MULDIV_U8_T const g_aMulDivU8[] =
2145{
2146 ENTRY_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2147 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2148 ENTRY_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2149 ENTRY_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2150 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2151 ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2152 ENTRY_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2153 ENTRY_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2154 ENTRY_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2155 ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2156};
2157
2158#ifdef TSTIEMAIMPL_WITH_GENERATOR
2159static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2160{
2161 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2162 {
2163 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2164 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2165 continue;
2166 GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2167 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2168 {
2169 MULDIVU8_TEST_T Test;
2170 Test.fEflIn = RandEFlags();
2171 Test.fEflOut = Test.fEflIn;
2172 Test.uDstIn = RandU16Dst(iTest);
2173 Test.uDstOut = Test.uDstIn;
2174 Test.uSrcIn = RandU8Src(iTest);
2175 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2176 RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2177 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2178 }
2179 GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2180 }
2181}
2182#endif
2183
2184static void MulDivU8Test(void)
2185{
2186 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2187 {
2188 if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2189 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2190 uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2191 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2192 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2193 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2194 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2195 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2196 {
2197 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2198 {
2199 uint32_t fEfl = paTests[iTest].fEflIn;
2200 uint16_t uDst = paTests[iTest].uDstIn;
2201 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2202 if ( uDst != paTests[iTest].uDstOut
2203 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2204 || rc != paTests[iTest].rc)
2205 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2206 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2207 "%sexpected %#08x %#06RX16 %d%s\n",
2208 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2209 iVar ? " " : "", fEfl, uDst, rc,
2210 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2211 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2212 else
2213 {
2214 *g_pu16 = paTests[iTest].uDstIn;
2215 *g_pfEfl = paTests[iTest].fEflIn;
2216 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2217 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2218 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2219 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2220 }
2221 }
2222 pfn = g_aMulDivU8[iFn].pfnNative;
2223 }
2224 }
2225}
2226
2227#ifdef TSTIEMAIMPL_WITH_GENERATOR
2228# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2229void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2230{ \
2231 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2232 { \
2233 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2234 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2235 continue; \
2236 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2237 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2238 { \
2239 a_TestType Test; \
2240 Test.fEflIn = RandEFlags(); \
2241 Test.fEflOut = Test.fEflIn; \
2242 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2243 Test.uDst1Out = Test.uDst1In; \
2244 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2245 Test.uDst2Out = Test.uDst2In; \
2246 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2247 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2248 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2249 Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2250 Test.rc, iTest); \
2251 } \
2252 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2253 } \
2254}
2255#else
2256# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2257#endif
2258
2259#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2260TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2261static a_SubTestType const a_aSubTests [] = \
2262{ \
2263 ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2264 ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2265 ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2266 ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2267 ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2268 ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2269 ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2270 ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2271}; \
2272\
2273GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2274\
2275static void MulDivU ## a_cBits ## Test(void) \
2276{ \
2277 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2278 { \
2279 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2280 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2281 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2282 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2283 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2284 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2285 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2286 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2287 { \
2288 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2289 { \
2290 uint32_t fEfl = paTests[iTest].fEflIn; \
2291 a_Type uDst1 = paTests[iTest].uDst1In; \
2292 a_Type uDst2 = paTests[iTest].uDst2In; \
2293 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2294 if ( uDst1 != paTests[iTest].uDst1Out \
2295 || uDst2 != paTests[iTest].uDst2Out \
2296 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2297 || rc != paTests[iTest].rc) \
2298 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2299 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2300 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2301 iTest, iVar == 0 ? "" : "/n", \
2302 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2303 fEfl, uDst1, uDst2, rc, \
2304 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2305 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2306 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2307 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2308 else \
2309 { \
2310 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2311 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2312 *g_pfEfl = paTests[iTest].fEflIn; \
2313 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2314 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2315 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2316 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2317 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2318 } \
2319 } \
2320 pfn = a_aSubTests[iFn].pfnNative; \
2321 } \
2322 } \
2323}
2324TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2325TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2326TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2327
2328#ifdef TSTIEMAIMPL_WITH_GENERATOR
2329static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2330{
2331 MulDivU8Generate(pOut, cTests);
2332 MulDivU16Generate(pOut, cTests);
2333 MulDivU32Generate(pOut, cTests);
2334 MulDivU64Generate(pOut, cTests);
2335}
2336#endif
2337
2338static void MulDivTest(void)
2339{
2340 MulDivU8Test();
2341 MulDivU16Test();
2342 MulDivU32Test();
2343 MulDivU64Test();
2344}
2345
2346
2347/*
2348 * BSWAP
2349 */
2350static void BswapTest(void)
2351{
2352 if (SubTestAndCheckIfEnabled("bswap_u16"))
2353 {
2354 *g_pu32 = UINT32_C(0x12345678);
2355 iemAImpl_bswap_u16(g_pu32);
2356#if 0
2357 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2358#else
2359 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2360#endif
2361 *g_pu32 = UINT32_C(0xffff1122);
2362 iemAImpl_bswap_u16(g_pu32);
2363#if 0
2364 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2365#else
2366 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2367#endif
2368 }
2369
2370 if (SubTestAndCheckIfEnabled("bswap_u32"))
2371 {
2372 *g_pu32 = UINT32_C(0x12345678);
2373 iemAImpl_bswap_u32(g_pu32);
2374 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2375 }
2376
2377 if (SubTestAndCheckIfEnabled("bswap_u64"))
2378 {
2379 *g_pu64 = UINT64_C(0x0123456789abcdef);
2380 iemAImpl_bswap_u64(g_pu64);
2381 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2382 }
2383}
2384
2385
2386
2387/*********************************************************************************************************************************
2388* Floating point (x87 style) *
2389*********************************************************************************************************************************/
2390
2391/*
2392 * FPU constant loading.
2393 */
2394TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2395
2396static const FPU_LD_CONST_T g_aFpuLdConst[] =
2397{
2398 ENTRY(fld1),
2399 ENTRY(fldl2t),
2400 ENTRY(fldl2e),
2401 ENTRY(fldpi),
2402 ENTRY(fldlg2),
2403 ENTRY(fldln2),
2404 ENTRY(fldz),
2405};
2406
2407#ifdef TSTIEMAIMPL_WITH_GENERATOR
2408static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2409{
2410 X86FXSTATE State;
2411 RT_ZERO(State);
2412 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2413 {
2414 GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2415 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2416 {
2417 State.FCW = RandFcw();
2418 State.FSW = RandFsw();
2419
2420 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2421 {
2422 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2423 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2424 g_aFpuLdConst[iFn].pfn(&State, &Res);
2425 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2426 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2427 }
2428 }
2429 GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2430 }
2431}
2432#endif
2433
2434static void FpuLoadConstTest(void)
2435{
2436 /*
2437 * Inputs:
2438 * - FSW: C0, C1, C2, C3
2439 * - FCW: Exception masks, Precision control, Rounding control.
2440 *
2441 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2442 */
2443 X86FXSTATE State;
2444 RT_ZERO(State);
2445 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2446 {
2447 if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2448 continue;
2449
2450 uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2451 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2452 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2453 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2454 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2455 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2456 {
2457 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2458 {
2459 State.FCW = paTests[iTest].fFcw;
2460 State.FSW = paTests[iTest].fFswIn;
2461 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2462 pfn(&State, &Res);
2463 if ( Res.FSW != paTests[iTest].fFswOut
2464 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2465 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2466 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2467 Res.FSW, FormatR80(&Res.r80Result),
2468 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2469 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2470 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2471 FormatFcw(paTests[iTest].fFcw) );
2472 }
2473 pfn = g_aFpuLdConst[iFn].pfnNative;
2474 }
2475 }
2476}
2477
2478
2479/*
2480 * Load floating point values from memory.
2481 */
2482#ifdef TSTIEMAIMPL_WITH_GENERATOR
2483# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2484static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2485{ \
2486 X86FXSTATE State; \
2487 RT_ZERO(State); \
2488 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2489 { \
2490 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2491 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2492 { \
2493 State.FCW = RandFcw(); \
2494 State.FSW = RandFsw(); \
2495 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2496 \
2497 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2498 { \
2499 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2500 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2501 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2502 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2503 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2504 GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2505 } \
2506 } \
2507 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2508 } \
2509}
2510#else
2511# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2512#endif
2513
2514#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2515typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2516typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2517TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2518\
2519static const a_SubTestType a_aSubTests[] = \
2520{ \
2521 ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2522}; \
2523GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2524\
2525static void FpuLdR ## a_cBits ## Test(void) \
2526{ \
2527 X86FXSTATE State; \
2528 RT_ZERO(State); \
2529 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2530 { \
2531 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2532 \
2533 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2534 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2535 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2536 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2537 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2538 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2539 { \
2540 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2541 { \
2542 a_rdTypeIn const InVal = paTests[iTest].InVal; \
2543 State.FCW = paTests[iTest].fFcw; \
2544 State.FSW = paTests[iTest].fFswIn; \
2545 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2546 pfn(&State, &Res, &InVal); \
2547 if ( Res.FSW != paTests[iTest].fFswOut \
2548 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2549 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2550 "%s -> fsw=%#06x %s\n" \
2551 "%s expected %#06x %s%s%s (%s)\n", \
2552 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2553 FormatR ## a_cBits(&paTests[iTest].InVal), \
2554 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2555 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2556 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2557 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2558 FormatFcw(paTests[iTest].fFcw) ); \
2559 } \
2560 pfn = a_aSubTests[iFn].pfnNative; \
2561 } \
2562 } \
2563}
2564
2565TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2566TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2567TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2568
2569#ifdef TSTIEMAIMPL_WITH_GENERATOR
2570static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2571{
2572 FpuLdR80Generate(pOut, cTests);
2573 FpuLdR64Generate(pOut, cTests);
2574 FpuLdR32Generate(pOut, cTests);
2575}
2576#endif
2577
2578static void FpuLdMemTest(void)
2579{
2580 FpuLdR80Test();
2581 FpuLdR64Test();
2582 FpuLdR32Test();
2583}
2584
2585
2586/*
2587 * Load integer values from memory.
2588 */
2589#ifdef TSTIEMAIMPL_WITH_GENERATOR
2590# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2591static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2592{ \
2593 X86FXSTATE State; \
2594 RT_ZERO(State); \
2595 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2596 { \
2597 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2598 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2599 { \
2600 State.FCW = RandFcw(); \
2601 State.FSW = RandFsw(); \
2602 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2603 \
2604 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2605 { \
2606 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2607 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2608 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2609 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2610 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2611 } \
2612 } \
2613 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2614 } \
2615}
2616#else
2617# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2618#endif
2619
2620#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2621typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2622typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2623TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2624\
2625static const a_SubTestType a_aSubTests[] = \
2626{ \
2627 ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2628}; \
2629GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2630\
2631static void FpuLdI ## a_cBits ## Test(void) \
2632{ \
2633 X86FXSTATE State; \
2634 RT_ZERO(State); \
2635 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2636 { \
2637 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2638 \
2639 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2640 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2641 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2642 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2643 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2644 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2645 { \
2646 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2647 { \
2648 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2649 State.FCW = paTests[iTest].fFcw; \
2650 State.FSW = paTests[iTest].fFswIn; \
2651 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2652 pfn(&State, &Res, &iInVal); \
2653 if ( Res.FSW != paTests[iTest].fFswOut \
2654 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2655 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2656 "%s -> fsw=%#06x %s\n" \
2657 "%s expected %#06x %s%s%s (%s)\n", \
2658 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2659 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2660 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2661 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2662 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2663 FormatFcw(paTests[iTest].fFcw) ); \
2664 } \
2665 pfn = a_aSubTests[iFn].pfnNative; \
2666 } \
2667 } \
2668}
2669
2670TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2671TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2672TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2673
2674#ifdef TSTIEMAIMPL_WITH_GENERATOR
2675static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2676{
2677 FpuLdI64Generate(pOut, cTests);
2678 FpuLdI32Generate(pOut, cTests);
2679 FpuLdI16Generate(pOut, cTests);
2680}
2681#endif
2682
2683static void FpuLdIntTest(void)
2684{
2685 FpuLdI64Test();
2686 FpuLdI32Test();
2687 FpuLdI16Test();
2688}
2689
2690
2691/*
2692 * Load binary coded decimal values from memory.
2693 */
2694typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2695typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2696TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2697
2698static const FPU_LD_D80_T g_aFpuLdD80[] =
2699{
2700 ENTRY(fld_r80_from_d80)
2701};
2702
2703#ifdef TSTIEMAIMPL_WITH_GENERATOR
2704static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2705{
2706 X86FXSTATE State;
2707 RT_ZERO(State);
2708 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2709 {
2710 GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2711 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2712 {
2713 State.FCW = RandFcw();
2714 State.FSW = RandFsw();
2715 RTPBCD80U InVal = RandD80Src(iTest);
2716
2717 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2718 {
2719 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2720 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2721 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2722 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2723 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2724 iTest, iRounding);
2725 }
2726 }
2727 GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2728 }
2729}
2730#endif
2731
2732static void FpuLdD80Test(void)
2733{
2734 X86FXSTATE State;
2735 RT_ZERO(State);
2736 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2737 {
2738 if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2739 continue;
2740
2741 uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2742 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2743 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2744 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2745 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2746 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2747 {
2748 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2749 {
2750 RTPBCD80U const InVal = paTests[iTest].InVal;
2751 State.FCW = paTests[iTest].fFcw;
2752 State.FSW = paTests[iTest].fFswIn;
2753 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2754 pfn(&State, &Res, &InVal);
2755 if ( Res.FSW != paTests[iTest].fFswOut
2756 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2757 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2758 "%s -> fsw=%#06x %s\n"
2759 "%s expected %#06x %s%s%s (%s)\n",
2760 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2761 FormatD80(&paTests[iTest].InVal),
2762 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2763 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2764 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2765 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2766 FormatFcw(paTests[iTest].fFcw) );
2767 }
2768 pfn = g_aFpuLdD80[iFn].pfnNative;
2769 }
2770 }
2771}
2772
2773
2774/*
2775 * Store values floating point values to memory.
2776 */
2777#ifdef TSTIEMAIMPL_WITH_GENERATOR
2778static const RTFLOAT80U g_aFpuStR32Specials[] =
2779{
2780 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2781 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2782 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2783 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2784};
2785static const RTFLOAT80U g_aFpuStR64Specials[] =
2786{
2787 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2788 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2789 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2790 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2791 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2792};
2793static const RTFLOAT80U g_aFpuStR80Specials[] =
2794{
2795 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2796};
2797# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2798static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2799{ \
2800 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2801 X86FXSTATE State; \
2802 RT_ZERO(State); \
2803 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2804 { \
2805 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2806 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2807 { \
2808 uint16_t const fFcw = RandFcw(); \
2809 State.FSW = RandFsw(); \
2810 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2811 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2812 \
2813 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2814 { \
2815 /* PC doesn't influence these, so leave as is. */ \
2816 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2817 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2818 { \
2819 uint16_t uFswOut = 0; \
2820 a_rdType OutVal; \
2821 RT_ZERO(OutVal); \
2822 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2823 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2824 | (iRounding << X86_FCW_RC_SHIFT); \
2825 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2826 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2827 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2828 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2829 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2830 GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2831 } \
2832 } \
2833 } \
2834 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2835 } \
2836}
2837#else
2838# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2839#endif
2840
2841#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2842typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2843 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2844typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2845TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2846\
2847static const a_SubTestType a_aSubTests[] = \
2848{ \
2849 ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2850}; \
2851GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2852\
2853static void FpuStR ## a_cBits ## Test(void) \
2854{ \
2855 X86FXSTATE State; \
2856 RT_ZERO(State); \
2857 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2858 { \
2859 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2860 \
2861 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2862 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2863 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2864 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2865 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2866 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2867 { \
2868 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2869 { \
2870 RTFLOAT80U const InVal = paTests[iTest].InVal; \
2871 uint16_t uFswOut = 0; \
2872 a_rdType OutVal; \
2873 RT_ZERO(OutVal); \
2874 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2875 State.FCW = paTests[iTest].fFcw; \
2876 State.FSW = paTests[iTest].fFswIn; \
2877 pfn(&State, &uFswOut, &OutVal, &InVal); \
2878 if ( uFswOut != paTests[iTest].fFswOut \
2879 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2880 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2881 "%s -> fsw=%#06x %s\n" \
2882 "%s expected %#06x %s%s%s (%s)\n", \
2883 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2884 FormatR80(&paTests[iTest].InVal), \
2885 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2886 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2887 FswDiff(uFswOut, paTests[iTest].fFswOut), \
2888 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2889 FormatFcw(paTests[iTest].fFcw) ); \
2890 } \
2891 pfn = a_aSubTests[iFn].pfnNative; \
2892 } \
2893 } \
2894}
2895
2896TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2897TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2898TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2899
2900#ifdef TSTIEMAIMPL_WITH_GENERATOR
2901static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2902{
2903 FpuStR80Generate(pOut, cTests);
2904 FpuStR64Generate(pOut, cTests);
2905 FpuStR32Generate(pOut, cTests);
2906}
2907#endif
2908
2909static void FpuStMemTest(void)
2910{
2911 FpuStR80Test();
2912 FpuStR64Test();
2913 FpuStR32Test();
2914}
2915
2916
2917/*
2918 * Store integer values to memory or register.
2919 */
2920TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2921TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2922TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2923
2924static const FPU_ST_I16_T g_aFpuStI16[] =
2925{
2926 ENTRY(fist_r80_to_i16),
2927 ENTRY_AMD( fistt_r80_to_i16, 0),
2928 ENTRY_INTEL(fistt_r80_to_i16, 0),
2929};
2930static const FPU_ST_I32_T g_aFpuStI32[] =
2931{
2932 ENTRY(fist_r80_to_i32),
2933 ENTRY(fistt_r80_to_i32),
2934};
2935static const FPU_ST_I64_T g_aFpuStI64[] =
2936{
2937 ENTRY(fist_r80_to_i64),
2938 ENTRY(fistt_r80_to_i64),
2939};
2940
2941#ifdef TSTIEMAIMPL_WITH_GENERATOR
2942static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2943{
2944 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2945 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2946 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2947 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2948 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2949 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2950 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2951 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2952 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2953 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2954 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2955 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2956 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2957 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2958 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2959 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2960 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2961 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2962 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2963 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2964 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2965 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2966 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2967 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
2968 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2969 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
2970 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
2971 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
2972 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
2973 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
2974 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
2975 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
2976 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2977 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2978 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2979 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2980 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2981 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2982 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2983 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2984 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2985 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2986 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2987 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2988 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2989 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2990 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
2991};
2992static const RTFLOAT80U g_aFpuStI32Specials[] =
2993{
2994 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2995 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2996 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2997 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2998 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2999 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3000 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3001 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3002 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3003 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3004 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3005 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3006 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3007 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3008 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3009 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3010};
3011static const RTFLOAT80U g_aFpuStI64Specials[] =
3012{
3013 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3014 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3015 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3016 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3017 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3018 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3019 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3020 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3021 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3022 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3023 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3024 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3025 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3026 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3027 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3028 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3029 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3030};
3031
3032# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3033static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
3034{ \
3035 X86FXSTATE State; \
3036 RT_ZERO(State); \
3037 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3038 { \
3039 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3040 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3041 PRTSTREAM pOutFn = pOut; \
3042 if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
3043 { \
3044 if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3045 continue; \
3046 pOutFn = pOutCpu; \
3047 } \
3048 \
3049 GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
3050 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3051 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3052 { \
3053 uint16_t const fFcw = RandFcw(); \
3054 State.FSW = RandFsw(); \
3055 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3056 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3057 \
3058 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3059 { \
3060 /* PC doesn't influence these, so leave as is. */ \
3061 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3062 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3063 { \
3064 uint16_t uFswOut = 0; \
3065 a_iType iOutVal = ~(a_iType)2; \
3066 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3067 | (iRounding << X86_FCW_RC_SHIFT); \
3068 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3069 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3070 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3071 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3072 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3073 GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
3074 } \
3075 } \
3076 } \
3077 GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
3078 } \
3079}
3080#else
3081# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3082#endif
3083
3084#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3085GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3086\
3087static void FpuStI ## a_cBits ## Test(void) \
3088{ \
3089 X86FXSTATE State; \
3090 RT_ZERO(State); \
3091 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3092 { \
3093 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3094 \
3095 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3096 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3097 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3098 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3099 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3100 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3101 { \
3102 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3103 { \
3104 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3105 uint16_t uFswOut = 0; \
3106 a_iType iOutVal = ~(a_iType)2; \
3107 State.FCW = paTests[iTest].fFcw; \
3108 State.FSW = paTests[iTest].fFswIn; \
3109 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3110 if ( uFswOut != paTests[iTest].fFswOut \
3111 || iOutVal != paTests[iTest].iOutVal) \
3112 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3113 "%s -> fsw=%#06x " a_szFmt "\n" \
3114 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3115 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3116 FormatR80(&paTests[iTest].InVal), \
3117 iVar ? " " : "", uFswOut, iOutVal, \
3118 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3119 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3120 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3121 } \
3122 pfn = a_aSubTests[iFn].pfnNative; \
3123 } \
3124 } \
3125}
3126
3127//fistt_r80_to_i16 diffs for AMD, of course :-)
3128
3129TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3130TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3131TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3132
3133#ifdef TSTIEMAIMPL_WITH_GENERATOR
3134static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3135{
3136 FpuStI64Generate(pOut, pOutCpu, cTests);
3137 FpuStI32Generate(pOut, pOutCpu, cTests);
3138 FpuStI16Generate(pOut, pOutCpu, cTests);
3139}
3140#endif
3141
3142static void FpuStIntTest(void)
3143{
3144 FpuStI64Test();
3145 FpuStI32Test();
3146 FpuStI16Test();
3147}
3148
3149
3150/*
3151 * Store as packed BCD value (memory).
3152 */
3153typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3154typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3155TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3156
3157static const FPU_ST_D80_T g_aFpuStD80[] =
3158{
3159 ENTRY(fst_r80_to_d80),
3160};
3161
3162#ifdef TSTIEMAIMPL_WITH_GENERATOR
3163static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3164{
3165 static RTFLOAT80U const s_aSpecials[] =
3166 {
3167 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3168 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3169 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3170 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3171 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3172 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3173 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3174 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3175 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3176 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3177 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3178 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3179 };
3180
3181 X86FXSTATE State;
3182 RT_ZERO(State);
3183 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3184 {
3185 GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3186 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3187 {
3188 uint16_t const fFcw = RandFcw();
3189 State.FSW = RandFsw();
3190 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3191
3192 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3193 {
3194 /* PC doesn't influence these, so leave as is. */
3195 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3196 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
3197 {
3198 uint16_t uFswOut = 0;
3199 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3200 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
3201 | (iRounding << X86_FCW_RC_SHIFT);
3202 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
3203 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
3204 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3205 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3206 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3207 GenFormatD80(&OutVal), iTest, iRounding, iMask);
3208 }
3209 }
3210 }
3211 GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3212 }
3213}
3214#endif
3215
3216
3217static void FpuStD80Test(void)
3218{
3219 X86FXSTATE State;
3220 RT_ZERO(State);
3221 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3222 {
3223 if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3224 continue;
3225
3226 uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3227 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3228 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3229 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3230 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3231 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3232 {
3233 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3234 {
3235 RTFLOAT80U const InVal = paTests[iTest].InVal;
3236 uint16_t uFswOut = 0;
3237 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3238 State.FCW = paTests[iTest].fFcw;
3239 State.FSW = paTests[iTest].fFswIn;
3240 pfn(&State, &uFswOut, &OutVal, &InVal);
3241 if ( uFswOut != paTests[iTest].fFswOut
3242 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3243 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3244 "%s -> fsw=%#06x %s\n"
3245 "%s expected %#06x %s%s%s (%s)\n",
3246 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3247 FormatR80(&paTests[iTest].InVal),
3248 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3249 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3250 FswDiff(uFswOut, paTests[iTest].fFswOut),
3251 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3252 FormatFcw(paTests[iTest].fFcw) );
3253 }
3254 pfn = g_aFpuStD80[iFn].pfnNative;
3255 }
3256 }
3257}
3258
3259
3260
3261/*********************************************************************************************************************************
3262* x87 FPU Binary Operations *
3263*********************************************************************************************************************************/
3264
3265/*
3266 * Binary FPU operations on two 80-bit floating point values.
3267 */
3268TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3269enum { kFpuBinaryHint_fprem = 1, };
3270
3271static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3272{
3273 ENTRY(fadd_r80_by_r80),
3274 ENTRY(fsub_r80_by_r80),
3275 ENTRY(fsubr_r80_by_r80),
3276 ENTRY(fmul_r80_by_r80),
3277 ENTRY(fdiv_r80_by_r80),
3278 ENTRY(fdivr_r80_by_r80),
3279 ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3280 ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3281 ENTRY(fscale_r80_by_r80),
3282 ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3283 ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3284 ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3285 ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3286 ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3287 ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3288};
3289
3290#ifdef TSTIEMAIMPL_WITH_GENERATOR
3291static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3292{
3293 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3294
3295 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3296 {
3297 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3298 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3299 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3300 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3301 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3302 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3303 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3304 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3305 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3306 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3307 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3308 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3309 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3310 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3311 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3312 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3313 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3314 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3315 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3316 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3317 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3318 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3319 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3320 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3321 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3322 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3323 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3324 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3325 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3326 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3327 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3328 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3329 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3330 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3331 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3332 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3333 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3334 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3335 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3336 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3337 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3338 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3339 /* fscale: Negative variants for the essentials of the above. */
3340 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3341 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3342 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3343 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3344 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3345 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3346 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3347 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3348 /* fscale: Some fun with denormals and pseudo-denormals. */
3349 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3350 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3351 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3352 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3353 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3354 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3355 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3356 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3357 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3358 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3359 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3360 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3361 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3362 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3363 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3364 };
3365
3366 X86FXSTATE State;
3367 RT_ZERO(State);
3368 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3369 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3370 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3371 {
3372 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3373 PRTSTREAM pOutFn = pOut;
3374 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3375 {
3376 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3377 continue;
3378 pOutFn = pOutCpu;
3379 }
3380
3381 GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3382 uint32_t iTestOutput = 0;
3383 uint32_t cNormalInputPairs = 0;
3384 uint32_t cTargetRangeInputs = 0;
3385 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3386 {
3387 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3388 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3389 bool fTargetRange = false;
3390 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3391 {
3392 cNormalInputPairs++;
3393 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3394 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3395 cTargetRangeInputs += fTargetRange = true;
3396 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3397 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3398 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3399 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3400 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3401 cTargetRangeInputs += fTargetRange = true;
3402 }
3403 }
3404 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3405 {
3406 iTest -= 1;
3407 continue;
3408 }
3409
3410 uint16_t const fFcwExtra = 0;
3411 uint16_t const fFcw = RandFcw();
3412 State.FSW = RandFsw();
3413
3414 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3415 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3416 {
3417 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3418 | (iRounding << X86_FCW_RC_SHIFT)
3419 | (iPrecision << X86_FCW_PC_SHIFT)
3420 | X86_FCW_MASK_ALL;
3421 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3422 pfn(&State, &ResM, &InVal1, &InVal2);
3423 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3424 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3425 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3426
3427 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3428 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3429 pfn(&State, &ResU, &InVal1, &InVal2);
3430 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3431 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3432 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3433
3434 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3435 if (fXcpt)
3436 {
3437 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3438 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3439 pfn(&State, &Res1, &InVal1, &InVal2);
3440 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3441 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3442 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3443 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3444 {
3445 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3446 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3447 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3448 pfn(&State, &Res2, &InVal1, &InVal2);
3449 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3450 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3451 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3452 }
3453 if (!RT_IS_POWER_OF_TWO(fXcpt))
3454 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3455 if (fUnmasked & fXcpt)
3456 {
3457 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3458 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3459 pfn(&State, &Res3, &InVal1, &InVal2);
3460 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3461 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3462 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3463 }
3464 }
3465
3466 /* If the values are in range and caused no exceptions, do the whole series of
3467 partial reminders till we get the non-partial one or run into an exception. */
3468 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3469 {
3470 IEMFPURESULT ResPrev = ResM;
3471 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
3472 {
3473 State.FCW = State.FCW | X86_FCW_MASK_ALL;
3474 State.FSW = ResPrev.FSW;
3475 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3476 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
3477 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
3478 State.FCW | fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
3479 GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
3480 iTest, iRounding, iPrecision, i + 1, iTestOutput++);
3481 ResPrev = ResSeq;
3482 }
3483 }
3484 }
3485 }
3486 GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3487 }
3488}
3489#endif
3490
3491
3492static void FpuBinaryR80Test(void)
3493{
3494 X86FXSTATE State;
3495 RT_ZERO(State);
3496 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3497 {
3498 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3499 continue;
3500
3501 uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3502 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3503 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3504 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3505 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3506 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3507 {
3508 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3509 {
3510 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3511 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3512 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3513 State.FCW = paTests[iTest].fFcw;
3514 State.FSW = paTests[iTest].fFswIn;
3515 pfn(&State, &Res, &InVal1, &InVal2);
3516 if ( Res.FSW != paTests[iTest].fFswOut
3517 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3518 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3519 "%s -> fsw=%#06x %s\n"
3520 "%s expected %#06x %s%s%s (%s)\n",
3521 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3522 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3523 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3524 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3525 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3526 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3527 FormatFcw(paTests[iTest].fFcw) );
3528 }
3529 pfn = g_aFpuBinaryR80[iFn].pfnNative;
3530 }
3531 }
3532}
3533
3534
3535/*
3536 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3537 */
3538#define int64_t_IS_NORMAL(a) 1
3539#define int32_t_IS_NORMAL(a) 1
3540#define int16_t_IS_NORMAL(a) 1
3541
3542#ifdef TSTIEMAIMPL_WITH_GENERATOR
3543static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3544{
3545 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3546 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3547};
3548static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3549{
3550 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3551 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3552};
3553static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3554{
3555 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3556};
3557static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3558{
3559 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3560};
3561
3562# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3563static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3564{ \
3565 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3566 \
3567 X86FXSTATE State; \
3568 RT_ZERO(State); \
3569 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3570 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3571 { \
3572 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3573 uint32_t cNormalInputPairs = 0; \
3574 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3575 { \
3576 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3577 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3578 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3579 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3580 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3581 cNormalInputPairs++; \
3582 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3583 { \
3584 iTest -= 1; \
3585 continue; \
3586 } \
3587 \
3588 uint16_t const fFcw = RandFcw(); \
3589 State.FSW = RandFsw(); \
3590 \
3591 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3592 { \
3593 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3594 { \
3595 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3596 { \
3597 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
3598 | (iRounding << X86_FCW_RC_SHIFT) \
3599 | (iPrecision << X86_FCW_PC_SHIFT) \
3600 | iMask; \
3601 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3602 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3603 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3604 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3605 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3606 } \
3607 } \
3608 } \
3609 } \
3610 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3611 } \
3612}
3613#else
3614# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3615#endif
3616
3617#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3618TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3619\
3620static const a_SubTestType a_aSubTests[] = \
3621{ \
3622 ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3623 ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3624 ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3625 ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3626 ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3627 ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3628}; \
3629\
3630GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3631\
3632static void FpuBinary ## a_UpBits ## Test(void) \
3633{ \
3634 X86FXSTATE State; \
3635 RT_ZERO(State); \
3636 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3637 { \
3638 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3639 \
3640 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3641 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3642 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3643 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3644 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3645 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3646 { \
3647 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3648 { \
3649 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3650 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3651 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3652 State.FCW = paTests[iTest].fFcw; \
3653 State.FSW = paTests[iTest].fFswIn; \
3654 pfn(&State, &Res, &InVal1, &InVal2); \
3655 if ( Res.FSW != paTests[iTest].fFswOut \
3656 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3657 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3658 "%s -> fsw=%#06x %s\n" \
3659 "%s expected %#06x %s%s%s (%s)\n", \
3660 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3661 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3662 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3663 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3664 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3665 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3666 FormatFcw(paTests[iTest].fFcw) ); \
3667 } \
3668 pfn = a_aSubTests[iFn].pfnNative; \
3669 } \
3670 } \
3671}
3672
3673TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3674TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3675TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3676TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3677
3678
3679/*
3680 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3681 */
3682#ifdef TSTIEMAIMPL_WITH_GENERATOR
3683static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3684{
3685 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3686 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3687};
3688static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3689{
3690 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3691 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3692};
3693static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3694{
3695 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3696 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3697};
3698static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3699{
3700 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3701};
3702static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3703{
3704 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3705};
3706
3707# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3708static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3709{ \
3710 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3711 \
3712 X86FXSTATE State; \
3713 RT_ZERO(State); \
3714 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3715 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3716 { \
3717 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3718 uint32_t cNormalInputPairs = 0; \
3719 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3720 { \
3721 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3722 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3723 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3724 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3725 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3726 cNormalInputPairs++; \
3727 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3728 { \
3729 iTest -= 1; \
3730 continue; \
3731 } \
3732 \
3733 uint16_t const fFcw = RandFcw(); \
3734 State.FSW = RandFsw(); \
3735 \
3736 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3737 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3738 { \
3739 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
3740 uint16_t fFswOut = 0; \
3741 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3742 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3743 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3744 iTest, iMask ? 'c' : 'u'); \
3745 } \
3746 } \
3747 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3748 } \
3749}
3750#else
3751# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3752#endif
3753
3754#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3755TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3756\
3757static const a_SubTestType a_aSubTests[] = \
3758{ \
3759 __VA_ARGS__ \
3760}; \
3761\
3762GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3763\
3764static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3765{ \
3766 X86FXSTATE State; \
3767 RT_ZERO(State); \
3768 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3769 { \
3770 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3771 \
3772 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3773 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3774 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3775 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3776 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3777 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3778 { \
3779 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3780 { \
3781 uint16_t fFswOut = 0; \
3782 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3783 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3784 State.FCW = paTests[iTest].fFcw; \
3785 State.FSW = paTests[iTest].fFswIn; \
3786 pfn(&State, &fFswOut, &InVal1, &InVal2); \
3787 if (fFswOut != paTests[iTest].fFswOut) \
3788 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3789 "%s -> fsw=%#06x\n" \
3790 "%s expected %#06x %s (%s)\n", \
3791 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3792 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3793 iVar ? " " : "", fFswOut, \
3794 iVar ? " " : "", paTests[iTest].fFswOut, \
3795 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3796 } \
3797 pfn = a_aSubTests[iFn].pfnNative; \
3798 } \
3799 } \
3800}
3801
3802TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3803TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3804TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3805TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3806TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3807
3808
3809/*
3810 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3811 */
3812TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3813
3814static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3815{
3816 ENTRY(fcomi_r80_by_r80),
3817 ENTRY(fucomi_r80_by_r80),
3818};
3819
3820#ifdef TSTIEMAIMPL_WITH_GENERATOR
3821static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3822{
3823 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3824 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3825};
3826
3827static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3828{
3829 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3830
3831 X86FXSTATE State;
3832 RT_ZERO(State);
3833 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3834 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3835 {
3836 GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3837 uint32_t cNormalInputPairs = 0;
3838 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3839 {
3840 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3841 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3842 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3843 cNormalInputPairs++;
3844 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3845 {
3846 iTest -= 1;
3847 continue;
3848 }
3849
3850 uint16_t const fFcw = RandFcw();
3851 State.FSW = RandFsw();
3852
3853 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3854 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3855 {
3856 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
3857 uint16_t uFswOut = 0;
3858 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3859 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3860 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3861 iTest, iMask ? 'c' : 'u');
3862 }
3863 }
3864 GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3865 }
3866}
3867#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
3868
3869static void FpuBinaryEflR80Test(void)
3870{
3871 X86FXSTATE State;
3872 RT_ZERO(State);
3873 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3874 {
3875 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3876 continue;
3877
3878 uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3879 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3880 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3881 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3882 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3883 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3884 {
3885 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3886 {
3887 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3888 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3889 State.FCW = paTests[iTest].fFcw;
3890 State.FSW = paTests[iTest].fFswIn;
3891 uint16_t uFswOut = 0;
3892 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3893 if ( uFswOut != paTests[iTest].fFswOut
3894 || fEflOut != paTests[iTest].fEflOut)
3895 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3896 "%s -> fsw=%#06x efl=%#08x\n"
3897 "%s expected %#06x %#08x %s%s (%s)\n",
3898 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3899 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3900 iVar ? " " : "", uFswOut, fEflOut,
3901 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3902 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
3903 FormatFcw(paTests[iTest].fFcw));
3904 }
3905 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3906 }
3907 }
3908}
3909
3910
3911/*********************************************************************************************************************************
3912* x87 FPU Unary Operations *
3913*********************************************************************************************************************************/
3914
3915/*
3916 * Unary FPU operations on one 80-bit floating point value.
3917 *
3918 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3919 * a rounding error or not.
3920 */
3921TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3922
3923enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
3924static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3925{
3926 ENTRY_EX( fabs_r80, kUnary_Accurate),
3927 ENTRY_EX( fchs_r80, kUnary_Accurate),
3928 ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3929 ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3930 ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3931 ENTRY_EX( frndint_r80, kUnary_Accurate),
3932 ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3933 ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
3934 ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
3935 ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
3936};
3937
3938#ifdef TSTIEMAIMPL_WITH_GENERATOR
3939
3940static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3941{
3942 if ( enmKind == kUnary_Rounding_F2xm1
3943 && RTFLOAT80U_IS_NORMAL(pr80Val)
3944 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3945 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
3946 return true;
3947 return false;
3948}
3949
3950static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3951{
3952 static RTFLOAT80U const s_aSpecials[] =
3953 {
3954 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
3955 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
3956 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
3957 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
3958 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
3959 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
3960 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
3961 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
3962 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
3963 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
3964 };
3965 X86FXSTATE State;
3966 RT_ZERO(State);
3967 uint32_t cMinNormals = cTests / 4;
3968 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3969 {
3970 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
3971 PRTSTREAM pOutFn = pOut;
3972 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3973 {
3974 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3975 continue;
3976 pOutFn = pOutCpu;
3977 }
3978
3979 GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3980 uint32_t iTestOutput = 0;
3981 uint32_t cNormalInputs = 0;
3982 uint32_t cTargetRangeInputs = 0;
3983 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3984 {
3985 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
3986 if (RTFLOAT80U_IS_NORMAL(&InVal))
3987 {
3988 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
3989 {
3990 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
3991 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
3992 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
3993 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
3994 cTargetRangeInputs++;
3995 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
3996 {
3997 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
3998 cTargetRangeInputs++;
3999 }
4000 }
4001 cNormalInputs++;
4002 }
4003 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4004 {
4005 iTest -= 1;
4006 continue;
4007 }
4008
4009 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4010 uint16_t const fFcw = RandFcw();
4011 State.FSW = RandFsw();
4012
4013 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4014 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4015 {
4016 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4017 | (iRounding << X86_FCW_RC_SHIFT)
4018 | (iPrecision << X86_FCW_PC_SHIFT)
4019 | X86_FCW_MASK_ALL;
4020 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4021 pfn(&State, &ResM, &InVal);
4022 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4023 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
4024 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4025
4026 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4027 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4028 pfn(&State, &ResU, &InVal);
4029 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4030 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
4031 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4032
4033 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4034 if (fXcpt)
4035 {
4036 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4037 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4038 pfn(&State, &Res1, &InVal);
4039 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4040 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
4041 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4042 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4043 {
4044 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4045 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4046 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4047 pfn(&State, &Res2, &InVal);
4048 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4049 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
4050 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4051 }
4052 if (!RT_IS_POWER_OF_TWO(fXcpt))
4053 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4054 if (fUnmasked & fXcpt)
4055 {
4056 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4057 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4058 pfn(&State, &Res3, &InVal);
4059 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4060 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
4061 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4062 }
4063 }
4064 }
4065 }
4066 GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
4067 }
4068}
4069#endif
4070
4071static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4072{
4073 if (fFcw1 == fFcw2)
4074 return true;
4075 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4076 {
4077 *pfRndErr = true;
4078 return true;
4079 }
4080 return false;
4081}
4082
4083static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4084{
4085 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4086 return true;
4087 if ( fRndErrOk
4088 && pr80Val1->s.fSign == pr80Val2->s.fSign)
4089 {
4090 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4091 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4092 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4093 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4094 ||
4095 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4096 && pr80Val1->s.uMantissa == UINT64_MAX
4097 && pr80Val2->s.uMantissa == RT_BIT_64(63))
4098 ||
4099 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4100 && pr80Val2->s.uMantissa == UINT64_MAX
4101 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4102 {
4103 *pfRndErr = true;
4104 return true;
4105 }
4106 }
4107 return false;
4108}
4109
4110
4111static void FpuUnaryR80Test(void)
4112{
4113 X86FXSTATE State;
4114 RT_ZERO(State);
4115 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4116 {
4117 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
4118 continue;
4119
4120 uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4121 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4122 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4123 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4124 uint32_t cRndErrs = 0;
4125 uint32_t cPossibleRndErrs = 0;
4126 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4127 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4128 {
4129 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4130 {
4131 RTFLOAT80U const InVal = paTests[iTest].InVal;
4132 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4133 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4134 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4135 State.FSW = paTests[iTest].fFswIn;
4136 pfn(&State, &Res, &InVal);
4137 bool fRndErr = false;
4138 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4139 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4140 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4141 "%s -> fsw=%#06x %s\n"
4142 "%s expected %#06x %s%s%s%s (%s)\n",
4143 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4144 FormatR80(&paTests[iTest].InVal),
4145 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4146 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4147 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4148 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4149 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4150 cRndErrs += fRndErr;
4151 cPossibleRndErrs += fRndErrOk;
4152 }
4153 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4154 }
4155 if (cPossibleRndErrs > 0)
4156 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4157 }
4158}
4159
4160
4161/*
4162 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4163 */
4164TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4165
4166static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4167{
4168 ENTRY(ftst_r80),
4169 ENTRY_EX(fxam_r80, 1),
4170};
4171
4172#ifdef TSTIEMAIMPL_WITH_GENERATOR
4173static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4174{
4175 static RTFLOAT80U const s_aSpecials[] =
4176 {
4177 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4178 };
4179
4180 X86FXSTATE State;
4181 RT_ZERO(State);
4182 uint32_t cMinNormals = cTests / 4;
4183 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4184 {
4185 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4186 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4187 PRTSTREAM pOutFn = pOut;
4188 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4189 {
4190 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4191 continue;
4192 pOutFn = pOutCpu;
4193 }
4194 State.FTW = 0;
4195
4196 GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4197 uint32_t cNormalInputs = 0;
4198 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4199 {
4200 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4201 if (RTFLOAT80U_IS_NORMAL(&InVal))
4202 cNormalInputs++;
4203 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4204 {
4205 iTest -= 1;
4206 continue;
4207 }
4208
4209 uint16_t const fFcw = RandFcw();
4210 State.FSW = RandFsw();
4211 if (!fIsFxam)
4212 {
4213 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4214 {
4215 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4216 {
4217 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4218 {
4219 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4220 | (iRounding << X86_FCW_RC_SHIFT)
4221 | (iPrecision << X86_FCW_PC_SHIFT)
4222 | iMask;
4223 uint16_t fFswOut = 0;
4224 pfn(&State, &fFswOut, &InVal);
4225 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4226 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4227 iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4228 }
4229 }
4230 }
4231 }
4232 else
4233 {
4234 uint16_t fFswOut = 0;
4235 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4236 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4237 State.FCW = fFcw;
4238 pfn(&State, &fFswOut, &InVal);
4239 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4240 fFcw | fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4241 }
4242 }
4243 GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4244 }
4245}
4246#endif
4247
4248
4249static void FpuUnaryFswR80Test(void)
4250{
4251 X86FXSTATE State;
4252 RT_ZERO(State);
4253 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4254 {
4255 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4256 continue;
4257
4258 uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4259 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4260 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4261 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4262 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4263 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4264 {
4265 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4266 {
4267 RTFLOAT80U const InVal = paTests[iTest].InVal;
4268 uint16_t fFswOut = 0;
4269 State.FSW = paTests[iTest].fFswIn;
4270 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4271 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4272 pfn(&State, &fFswOut, &InVal);
4273 if (fFswOut != paTests[iTest].fFswOut)
4274 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4275 "%s -> fsw=%#06x\n"
4276 "%s expected %#06x %s (%s%s)\n",
4277 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4278 FormatR80(&paTests[iTest].InVal),
4279 iVar ? " " : "", fFswOut,
4280 iVar ? " " : "", paTests[iTest].fFswOut,
4281 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4282 paTests[iTest].fFcw & 0x80 ? " empty" : "");
4283 }
4284 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4285 }
4286 }
4287}
4288
4289/*
4290 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
4291 */
4292TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4293
4294static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4295{
4296 ENTRY(fxtract_r80_r80),
4297 ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4298 ENTRY_INTEL(fptan_r80_r80, 0),
4299 ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4300 ENTRY_INTEL(fsincos_r80_r80, 0),
4301};
4302
4303#ifdef TSTIEMAIMPL_WITH_GENERATOR
4304static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4305{
4306 static RTFLOAT80U const s_aSpecials[] =
4307 {
4308 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4309 };
4310
4311 X86FXSTATE State;
4312 RT_ZERO(State);
4313 uint32_t cMinNormals = cTests / 4;
4314 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4315 {
4316 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4317 PRTSTREAM pOutFn = pOut;
4318 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4319 {
4320 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4321 continue;
4322 pOutFn = pOutCpu;
4323 }
4324
4325 GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4326 uint32_t iTestOutput = 0;
4327 uint32_t cNormalInputs = 0;
4328 uint32_t cTargetRangeInputs = 0;
4329 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4330 {
4331 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4332 if (RTFLOAT80U_IS_NORMAL(&InVal))
4333 {
4334 if (iFn != 0)
4335 {
4336 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4337 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4338 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4339 cTargetRangeInputs++;
4340 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4341 {
4342 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4343 cTargetRangeInputs++;
4344 }
4345 }
4346 cNormalInputs++;
4347 }
4348 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4349 {
4350 iTest -= 1;
4351 continue;
4352 }
4353
4354 uint16_t const fFcwExtra = 0; /* for rounding error indication */
4355 uint16_t const fFcw = RandFcw();
4356 State.FSW = RandFsw();
4357
4358 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4359 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4360 {
4361 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4362 | (iRounding << X86_FCW_RC_SHIFT)
4363 | (iPrecision << X86_FCW_PC_SHIFT)
4364 | X86_FCW_MASK_ALL;
4365 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4366 pfn(&State, &ResM, &InVal);
4367 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4368 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4369 GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4370
4371 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4372 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4373 pfn(&State, &ResU, &InVal);
4374 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4375 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4376 GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4377
4378 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4379 if (fXcpt)
4380 {
4381 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4382 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4383 pfn(&State, &Res1, &InVal);
4384 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4385 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4386 GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4387 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4388 {
4389 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4390 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4391 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4392 pfn(&State, &Res2, &InVal);
4393 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4394 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4395 GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4396 }
4397 if (!RT_IS_POWER_OF_TWO(fXcpt))
4398 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4399 if (fUnmasked & fXcpt)
4400 {
4401 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4402 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4403 pfn(&State, &Res3, &InVal);
4404 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4405 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4406 GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4407 }
4408 }
4409 }
4410 }
4411 GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4412 }
4413}
4414#endif
4415
4416
4417static void FpuUnaryTwoR80Test(void)
4418{
4419 X86FXSTATE State;
4420 RT_ZERO(State);
4421 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4422 {
4423 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4424 continue;
4425
4426 uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4427 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4428 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4429 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4430 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4431 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4432 {
4433 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4434 {
4435 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4436 RTFLOAT80U const InVal = paTests[iTest].InVal;
4437 State.FCW = paTests[iTest].fFcw;
4438 State.FSW = paTests[iTest].fFswIn;
4439 pfn(&State, &Res, &InVal);
4440 if ( Res.FSW != paTests[iTest].fFswOut
4441 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4442 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4443 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4444 "%s -> fsw=%#06x %s %s\n"
4445 "%s expected %#06x %s %s %s%s%s (%s)\n",
4446 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4447 FormatR80(&paTests[iTest].InVal),
4448 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4449 iVar ? " " : "", paTests[iTest].fFswOut,
4450 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4451 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4452 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4453 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4454 }
4455 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4456 }
4457 }
4458}
4459
4460
4461/*********************************************************************************************************************************
4462* SSE floating point Binary Operations *
4463*********************************************************************************************************************************/
4464
4465/*
4466 * Binary SSE operations on packed single precision floating point values.
4467 */
4468TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4469
4470static const SSE_BINARY_R32_T g_aSseBinaryR32[] =
4471{
4472 ENTRY_BIN(addps_u128),
4473 ENTRY_BIN(mulps_u128),
4474 ENTRY_BIN(subps_u128),
4475 ENTRY_BIN(minps_u128),
4476 ENTRY_BIN(divps_u128),
4477 ENTRY_BIN(maxps_u128),
4478};
4479
4480#ifdef TSTIEMAIMPL_WITH_GENERATOR
4481static RTEXITCODE SseBinaryR32Generate(const char *pszDataFileFmt, uint32_t cTests)
4482{
4483 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4484
4485 static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
4486 {
4487 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
4488 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
4489 /** @todo More specials. */
4490 };
4491
4492 X86FXSTATE State;
4493 RT_ZERO(State);
4494 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4495 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4496 {
4497 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
4498
4499 PRTSTREAM pStrmOut = NULL;
4500 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32[iFn].pszName);
4501 if (RT_FAILURE(rc))
4502 {
4503 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4504 return RTEXITCODE_FAILURE;
4505 }
4506
4507 uint32_t cNormalInputPairs = 0;
4508 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4509 {
4510 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4511
4512 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4513 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4514 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4515 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4516
4517 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4518 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
4519 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
4520 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
4521
4522 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
4523 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
4524 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
4525 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
4526 cNormalInputPairs++;
4527 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4528 {
4529 iTest -= 1;
4530 continue;
4531 }
4532
4533 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4534 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4535 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4536 for (uint8_t iFz = 0; iFz < 2; iFz++)
4537 {
4538 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4539 | (iRounding << X86_MXCSR_RC_SHIFT)
4540 | (iDaz ? X86_MXCSR_DAZ : 0)
4541 | (iFz ? X86_MXCSR_FZ : 0)
4542 | X86_MXCSR_XCPT_MASK;
4543 IEMSSERESULT ResM; RT_ZERO(ResM);
4544 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4545 TestData.fMxcsrIn = State.MXCSR;
4546 TestData.fMxcsrOut = ResM.MXCSR;
4547 TestData.OutVal = ResM.uResult;
4548 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4549
4550 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4551 IEMSSERESULT ResU; RT_ZERO(ResU);
4552 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4553 TestData.fMxcsrIn = State.MXCSR;
4554 TestData.fMxcsrOut = ResU.MXCSR;
4555 TestData.OutVal = ResU.uResult;
4556 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4557
4558 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4559 if (fXcpt)
4560 {
4561 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4562 IEMSSERESULT Res1; RT_ZERO(Res1);
4563 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4564 TestData.fMxcsrIn = State.MXCSR;
4565 TestData.fMxcsrOut = Res1.MXCSR;
4566 TestData.OutVal = Res1.uResult;
4567 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4568
4569 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4570 {
4571 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4572 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4573 IEMSSERESULT Res2; RT_ZERO(Res2);
4574 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4575 TestData.fMxcsrIn = State.MXCSR;
4576 TestData.fMxcsrOut = Res2.MXCSR;
4577 TestData.OutVal = Res2.uResult;
4578 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4579 }
4580 if (!RT_IS_POWER_OF_TWO(fXcpt))
4581 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4582 if (fUnmasked & fXcpt)
4583 {
4584 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4585 IEMSSERESULT Res3; RT_ZERO(Res3);
4586 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4587 TestData.fMxcsrIn = State.MXCSR;
4588 TestData.fMxcsrOut = Res3.MXCSR;
4589 TestData.OutVal = Res3.uResult;
4590 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4591 }
4592 }
4593 }
4594 }
4595 rc = RTStrmClose(pStrmOut);
4596 if (RT_FAILURE(rc))
4597 {
4598 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4599 return RTEXITCODE_FAILURE;
4600 }
4601 }
4602
4603 return RTEXITCODE_SUCCESS;
4604}
4605#endif
4606
4607static void SseBinaryR32Test(void)
4608{
4609 X86FXSTATE State;
4610 RT_ZERO(State);
4611 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4612 {
4613 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32[iFn].pszName))
4614 continue;
4615
4616 uint32_t const cTests = *g_aSseBinaryR32[iFn].pcTests;
4617 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
4618 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
4619 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
4620 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4621 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4622 {
4623 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4624 {
4625 IEMSSERESULT Res; RT_ZERO(Res);
4626
4627 State.MXCSR = paTests[iTest].fMxcsrIn;
4628 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4629 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
4630 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
4631 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
4632 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
4633 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4634 || !fValsIdentical)
4635 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
4636 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
4637 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
4638 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4639 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
4640 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
4641 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
4642 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
4643 iVar ? " " : "", Res.MXCSR,
4644 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
4645 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
4646 iVar ? " " : "", paTests[iTest].fMxcsrOut,
4647 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
4648 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
4649 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4650 !fValsIdentical ? " - val" : "",
4651 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4652 }
4653 pfn = g_aSseBinaryR32[iFn].pfnNative;
4654 }
4655 }
4656}
4657
4658
4659/*
4660 * Binary SSE operations on packed single precision floating point values.
4661 */
4662TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4663
4664static const SSE_BINARY_R64_T g_aSseBinaryR64[] =
4665{
4666 ENTRY_BIN(addpd_u128),
4667 ENTRY_BIN(mulpd_u128),
4668 ENTRY_BIN(subpd_u128),
4669 ENTRY_BIN(minpd_u128),
4670 ENTRY_BIN(divpd_u128),
4671 ENTRY_BIN(maxpd_u128),
4672};
4673
4674#ifdef TSTIEMAIMPL_WITH_GENERATOR
4675static RTEXITCODE SseBinaryR64Generate(const char *pszDataFileFmt, uint32_t cTests)
4676{
4677 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4678
4679 static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
4680 {
4681 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
4682 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
4683 /** @todo More specials. */
4684 };
4685
4686 X86FXSTATE State;
4687 RT_ZERO(State);
4688 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4689 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4690 {
4691 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
4692
4693 PRTSTREAM pStrmOut = NULL;
4694 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64[iFn].pszName);
4695 if (RT_FAILURE(rc))
4696 {
4697 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4698 return RTEXITCODE_FAILURE;
4699 }
4700
4701 uint32_t cNormalInputPairs = 0;
4702 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4703 {
4704 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4705
4706 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4707 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4708 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4709 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4710
4711 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
4712 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
4713 cNormalInputPairs++;
4714 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4715 {
4716 iTest -= 1;
4717 continue;
4718 }
4719
4720 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4721 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4722 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4723 for (uint8_t iFz = 0; iFz < 2; iFz++)
4724 {
4725 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4726 | (iRounding << X86_MXCSR_RC_SHIFT)
4727 | (iDaz ? X86_MXCSR_DAZ : 0)
4728 | (iFz ? X86_MXCSR_FZ : 0)
4729 | X86_MXCSR_XCPT_MASK;
4730 IEMSSERESULT ResM; RT_ZERO(ResM);
4731 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4732 TestData.fMxcsrIn = State.MXCSR;
4733 TestData.fMxcsrOut = ResM.MXCSR;
4734 TestData.OutVal = ResM.uResult;
4735 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4736
4737 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4738 IEMSSERESULT ResU; RT_ZERO(ResU);
4739 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4740 TestData.fMxcsrIn = State.MXCSR;
4741 TestData.fMxcsrOut = ResU.MXCSR;
4742 TestData.OutVal = ResU.uResult;
4743 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4744
4745 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4746 if (fXcpt)
4747 {
4748 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4749 IEMSSERESULT Res1; RT_ZERO(Res1);
4750 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4751 TestData.fMxcsrIn = State.MXCSR;
4752 TestData.fMxcsrOut = Res1.MXCSR;
4753 TestData.OutVal = Res1.uResult;
4754 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4755
4756 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4757 {
4758 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4759 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4760 IEMSSERESULT Res2; RT_ZERO(Res2);
4761 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4762 TestData.fMxcsrIn = State.MXCSR;
4763 TestData.fMxcsrOut = Res2.MXCSR;
4764 TestData.OutVal = Res2.uResult;
4765 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4766 }
4767 if (!RT_IS_POWER_OF_TWO(fXcpt))
4768 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4769 if (fUnmasked & fXcpt)
4770 {
4771 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4772 IEMSSERESULT Res3; RT_ZERO(Res3);
4773 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4774 TestData.fMxcsrIn = State.MXCSR;
4775 TestData.fMxcsrOut = Res3.MXCSR;
4776 TestData.OutVal = Res3.uResult;
4777 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4778 }
4779 }
4780 }
4781 }
4782 rc = RTStrmClose(pStrmOut);
4783 if (RT_FAILURE(rc))
4784 {
4785 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4786 return RTEXITCODE_FAILURE;
4787 }
4788 }
4789
4790 return RTEXITCODE_SUCCESS;
4791}
4792#endif
4793
4794
4795static void SseBinaryR64Test(void)
4796{
4797 X86FXSTATE State;
4798 RT_ZERO(State);
4799 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4800 {
4801 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64[iFn].pszName))
4802 continue;
4803
4804 uint32_t const cTests = *g_aSseBinaryR64[iFn].pcTests;
4805 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
4806 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
4807 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
4808 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4809 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4810 {
4811 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4812 {
4813 IEMSSERESULT Res; RT_ZERO(Res);
4814
4815 State.MXCSR = paTests[iTest].fMxcsrIn;
4816 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4817 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4818 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4819 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4820 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
4821 "%s -> mxcsr=%#08x %s'%s\n"
4822 "%s expected %#08x %s'%s%s%s (%s)\n",
4823 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4824 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
4825 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
4826 iVar ? " " : "", Res.MXCSR,
4827 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
4828 iVar ? " " : "", paTests[iTest].fMxcsrOut,
4829 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
4830 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4831 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4832 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4833 ? " - val" : "",
4834 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4835 }
4836 pfn = g_aSseBinaryR64[iFn].pfnNative;
4837 }
4838 }
4839}
4840
4841
4842/*
4843 * Binary SSE operations on packed single precision floating point values.
4844 */
4845TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
4846
4847static const SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
4848{
4849 ENTRY_BIN(addss_u128_r32),
4850 ENTRY_BIN(mulss_u128_r32),
4851 ENTRY_BIN(subss_u128_r32),
4852 ENTRY_BIN(minss_u128_r32),
4853};
4854
4855#ifdef TSTIEMAIMPL_WITH_GENERATOR
4856static RTEXITCODE SseBinaryU128R32Generate(const char *pszDataFileFmt, uint32_t cTests)
4857{
4858 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4859
4860 static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
4861 {
4862 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
4863 /** @todo More specials. */
4864 };
4865
4866 X86FXSTATE State;
4867 RT_ZERO(State);
4868 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4869 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
4870 {
4871 PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
4872
4873 PRTSTREAM pStrmOut = NULL;
4874 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R32[iFn].pszName);
4875 if (RT_FAILURE(rc))
4876 {
4877 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
4878 return RTEXITCODE_FAILURE;
4879 }
4880
4881 uint32_t cNormalInputPairs = 0;
4882 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4883 {
4884 SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
4885
4886 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4887 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4888 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4889 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4890
4891 TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
4892
4893 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
4894 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
4895 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
4896 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
4897 && RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
4898 cNormalInputPairs++;
4899 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4900 {
4901 iTest -= 1;
4902 continue;
4903 }
4904
4905 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4906 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4907 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4908 for (uint8_t iFz = 0; iFz < 2; iFz++)
4909 {
4910 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4911 | (iRounding << X86_MXCSR_RC_SHIFT)
4912 | (iDaz ? X86_MXCSR_DAZ : 0)
4913 | (iFz ? X86_MXCSR_FZ : 0)
4914 | X86_MXCSR_XCPT_MASK;
4915 IEMSSERESULT ResM; RT_ZERO(ResM);
4916 pfn(&State, &ResM, &TestData.InVal1, &TestData.r32Val2);
4917 TestData.fMxcsrIn = State.MXCSR;
4918 TestData.fMxcsrOut = ResM.MXCSR;
4919 TestData.OutVal = ResM.uResult;
4920 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4921
4922 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4923 IEMSSERESULT ResU; RT_ZERO(ResU);
4924 pfn(&State, &ResU, &TestData.InVal1, &TestData.r32Val2);
4925 TestData.fMxcsrIn = State.MXCSR;
4926 TestData.fMxcsrOut = ResU.MXCSR;
4927 TestData.OutVal = ResU.uResult;
4928 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4929
4930 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4931 if (fXcpt)
4932 {
4933 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4934 IEMSSERESULT Res1; RT_ZERO(Res1);
4935 pfn(&State, &Res1, &TestData.InVal1, &TestData.r32Val2);
4936 TestData.fMxcsrIn = State.MXCSR;
4937 TestData.fMxcsrOut = Res1.MXCSR;
4938 TestData.OutVal = Res1.uResult;
4939 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4940
4941 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4942 {
4943 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4944 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4945 IEMSSERESULT Res2; RT_ZERO(Res2);
4946 pfn(&State, &Res2, &TestData.InVal1, &TestData.r32Val2);
4947 TestData.fMxcsrIn = State.MXCSR;
4948 TestData.fMxcsrOut = Res2.MXCSR;
4949 TestData.OutVal = Res2.uResult;
4950 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4951 }
4952 if (!RT_IS_POWER_OF_TWO(fXcpt))
4953 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4954 if (fUnmasked & fXcpt)
4955 {
4956 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4957 IEMSSERESULT Res3; RT_ZERO(Res3);
4958 pfn(&State, &Res3, &TestData.InVal1, &TestData.r32Val2);
4959 TestData.fMxcsrIn = State.MXCSR;
4960 TestData.fMxcsrOut = Res3.MXCSR;
4961 TestData.OutVal = Res3.uResult;
4962 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4963 }
4964 }
4965 }
4966 }
4967 rc = RTStrmClose(pStrmOut);
4968 if (RT_FAILURE(rc))
4969 {
4970 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
4971 return RTEXITCODE_FAILURE;
4972 }
4973 }
4974
4975 return RTEXITCODE_SUCCESS;
4976}
4977#endif
4978
4979static void SseBinaryU128R32Test(void)
4980{
4981 X86FXSTATE State;
4982 RT_ZERO(State);
4983 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
4984 {
4985 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R32[iFn].pszName))
4986 continue;
4987
4988 uint32_t const cTests = *g_aSseBinaryU128R32[iFn].pcTests;
4989 SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
4990 PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
4991 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
4992 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4993 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4994 {
4995 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4996 {
4997 IEMSSERESULT Res; RT_ZERO(Res);
4998
4999 State.MXCSR = paTests[iTest].fMxcsrIn;
5000 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
5001 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5002 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5003 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5004 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5005 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5006 || !fValsIdentical)
5007 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
5008 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5009 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5010 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5011 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5012 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5013 FormatR32(&paTests[iTest].r32Val2),
5014 iVar ? " " : "", Res.MXCSR,
5015 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5016 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5017 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5018 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5019 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5020 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5021 !fValsIdentical ? " - val" : "",
5022 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5023 }
5024 }
5025 }
5026}
5027
5028
5029/*
5030 * Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
5031 */
5032TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
5033
5034static const SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
5035{
5036 ENTRY_BIN(addsd_u128_r64),
5037 ENTRY_BIN(mulsd_u128_r64),
5038 ENTRY_BIN(subsd_u128_r64),
5039 ENTRY_BIN(minsd_u128_r64),
5040};
5041
5042#ifdef TSTIEMAIMPL_WITH_GENERATOR
5043static RTEXITCODE SseBinaryU128R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5044{
5045 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5046
5047 static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
5048 {
5049 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5050 /** @todo More specials. */
5051 };
5052
5053 X86FXSTATE State;
5054 RT_ZERO(State);
5055 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5056 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5057 {
5058 PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
5059
5060 PRTSTREAM pStrmOut = NULL;
5061 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R64[iFn].pszName);
5062 if (RT_FAILURE(rc))
5063 {
5064 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5065 return RTEXITCODE_FAILURE;
5066 }
5067
5068 uint32_t cNormalInputPairs = 0;
5069 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5070 {
5071 SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
5072
5073 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5074 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5075 TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5076
5077 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5078 && RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
5079 cNormalInputPairs++;
5080 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5081 {
5082 iTest -= 1;
5083 continue;
5084 }
5085
5086 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5087 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5088 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5089 for (uint8_t iFz = 0; iFz < 2; iFz++)
5090 {
5091 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5092 | (iRounding << X86_MXCSR_RC_SHIFT)
5093 | (iDaz ? X86_MXCSR_DAZ : 0)
5094 | (iFz ? X86_MXCSR_FZ : 0)
5095 | X86_MXCSR_XCPT_MASK;
5096 IEMSSERESULT ResM; RT_ZERO(ResM);
5097 pfn(&State, &ResM, &TestData.InVal1, &TestData.r64Val2);
5098 TestData.fMxcsrIn = State.MXCSR;
5099 TestData.fMxcsrOut = ResM.MXCSR;
5100 TestData.OutVal = ResM.uResult;
5101 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5102
5103 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5104 IEMSSERESULT ResU; RT_ZERO(ResU);
5105 pfn(&State, &ResU, &TestData.InVal1, &TestData.r64Val2);
5106 TestData.fMxcsrIn = State.MXCSR;
5107 TestData.fMxcsrOut = ResU.MXCSR;
5108 TestData.OutVal = ResU.uResult;
5109 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5110
5111 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5112 if (fXcpt)
5113 {
5114 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5115 IEMSSERESULT Res1; RT_ZERO(Res1);
5116 pfn(&State, &Res1, &TestData.InVal1, &TestData.r64Val2);
5117 TestData.fMxcsrIn = State.MXCSR;
5118 TestData.fMxcsrOut = Res1.MXCSR;
5119 TestData.OutVal = Res1.uResult;
5120 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5121
5122 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5123 {
5124 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5125 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5126 IEMSSERESULT Res2; RT_ZERO(Res2);
5127 pfn(&State, &Res2, &TestData.InVal1, &TestData.r64Val2);
5128 TestData.fMxcsrIn = State.MXCSR;
5129 TestData.fMxcsrOut = Res2.MXCSR;
5130 TestData.OutVal = Res2.uResult;
5131 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5132 }
5133 if (!RT_IS_POWER_OF_TWO(fXcpt))
5134 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5135 if (fUnmasked & fXcpt)
5136 {
5137 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5138 IEMSSERESULT Res3; RT_ZERO(Res3);
5139 pfn(&State, &Res3, &TestData.InVal1, &TestData.r64Val2);
5140 TestData.fMxcsrIn = State.MXCSR;
5141 TestData.fMxcsrOut = Res3.MXCSR;
5142 TestData.OutVal = Res3.uResult;
5143 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5144 }
5145 }
5146 }
5147 }
5148 rc = RTStrmClose(pStrmOut);
5149 if (RT_FAILURE(rc))
5150 {
5151 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5152 return RTEXITCODE_FAILURE;
5153 }
5154 }
5155
5156 return RTEXITCODE_SUCCESS;
5157}
5158#endif
5159
5160
5161static void SseBinaryU128R64Test(void)
5162{
5163 X86FXSTATE State;
5164 RT_ZERO(State);
5165 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5166 {
5167 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R64[iFn].pszName))
5168 continue;
5169
5170 uint32_t const cTests = *g_aSseBinaryU128R64[iFn].pcTests;
5171 SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
5172 PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
5173 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
5174 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5175 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5176 {
5177 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_U128_R64_TEST_T); iTest++)
5178 {
5179 IEMSSERESULT Res; RT_ZERO(Res);
5180
5181 State.MXCSR = paTests[iTest].fMxcsrIn;
5182 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
5183 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5184 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5185 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5186 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
5187 "%s -> mxcsr=%#08x %s'%s\n"
5188 "%s expected %#08x %s'%s%s%s (%s)\n",
5189 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5190 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5191 FormatR64(&paTests[iTest].r64Val2),
5192 iVar ? " " : "", Res.MXCSR,
5193 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5194 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5195 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5196 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5197 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5198 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5199 ? " - val" : "",
5200 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5201 }
5202 }
5203 }
5204}
5205
5206
5207
5208int main(int argc, char **argv)
5209{
5210 int rc = RTR3InitExe(argc, &argv, 0);
5211 if (RT_FAILURE(rc))
5212 return RTMsgInitFailure(rc);
5213
5214 /*
5215 * Determin the host CPU.
5216 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
5217 */
5218#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
5219 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
5220 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
5221 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
5222#else
5223 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
5224#endif
5225
5226 /*
5227 * Parse arguments.
5228 */
5229 enum { kModeNotSet, kModeTest, kModeGenerate }
5230 enmMode = kModeNotSet;
5231 bool fInt = true;
5232 bool fFpuLdSt = true;
5233 bool fFpuBinary1 = true;
5234 bool fFpuBinary2 = true;
5235 bool fFpuOther = true;
5236 bool fCpuData = true;
5237 bool fCommonData = true;
5238 bool fSseFpBinary = true;
5239 uint32_t const cDefaultTests = 96;
5240 uint32_t cTests = cDefaultTests;
5241 RTGETOPTDEF const s_aOptions[] =
5242 {
5243 // mode:
5244 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
5245 { "--test", 't', RTGETOPT_REQ_NOTHING },
5246 // test selection (both)
5247 { "--all", 'a', RTGETOPT_REQ_NOTHING },
5248 { "--none", 'z', RTGETOPT_REQ_NOTHING },
5249 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
5250 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
5251 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
5252 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
5253 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
5254 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
5255 { "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
5256 { "--int", 'i', RTGETOPT_REQ_NOTHING },
5257 { "--include", 'I', RTGETOPT_REQ_STRING },
5258 { "--exclude", 'X', RTGETOPT_REQ_STRING },
5259 // generation parameters
5260 { "--common", 'm', RTGETOPT_REQ_NOTHING },
5261 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
5262 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
5263 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
5264 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
5265 };
5266
5267 RTGETOPTSTATE State;
5268 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
5269 AssertRCReturn(rc, RTEXITCODE_FAILURE);
5270
5271 RTGETOPTUNION ValueUnion;
5272 while ((rc = RTGetOpt(&State, &ValueUnion)))
5273 {
5274 switch (rc)
5275 {
5276 case 'g':
5277 enmMode = kModeGenerate;
5278 break;
5279 case 't':
5280 enmMode = kModeTest;
5281 break;
5282
5283 case 'a':
5284 fCpuData = true;
5285 fCommonData = true;
5286 fInt = true;
5287 fFpuLdSt = true;
5288 fFpuBinary1 = true;
5289 fFpuBinary2 = true;
5290 fFpuOther = true;
5291 fSseFpBinary = true;
5292 break;
5293 case 'z':
5294 fCpuData = false;
5295 fCommonData = false;
5296 fInt = false;
5297 fFpuLdSt = false;
5298 fFpuBinary1 = false;
5299 fFpuBinary2 = false;
5300 fFpuOther = false;
5301 fSseFpBinary = false;
5302 break;
5303
5304 case 'F':
5305 fFpuLdSt = true;
5306 break;
5307 case 'O':
5308 fFpuOther = true;
5309 break;
5310 case 'B':
5311 fFpuBinary1 = true;
5312 break;
5313 case 'P':
5314 fFpuBinary2 = true;
5315 break;
5316 case 'S':
5317 fSseFpBinary = true;
5318 break;
5319 case 'i':
5320 fInt = true;
5321 break;
5322
5323 case 'I':
5324 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
5325 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
5326 RT_ELEMENTS(g_apszIncludeTestPatterns));
5327 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
5328 break;
5329 case 'X':
5330 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
5331 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
5332 RT_ELEMENTS(g_apszExcludeTestPatterns));
5333 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
5334 break;
5335
5336 case 'm':
5337 fCommonData = true;
5338 break;
5339 case 'c':
5340 fCpuData = true;
5341 break;
5342 case 'n':
5343 cTests = ValueUnion.u32;
5344 break;
5345
5346 case 'q':
5347 g_cVerbosity = 0;
5348 break;
5349 case 'v':
5350 g_cVerbosity++;
5351 break;
5352
5353 case 'h':
5354 RTPrintf("usage: %s <-g|-t> [options]\n"
5355 "\n"
5356 "Mode:\n"
5357 " -g, --generate\n"
5358 " Generate test data.\n"
5359 " -t, --test\n"
5360 " Execute tests.\n"
5361 "\n"
5362 "Test selection (both modes):\n"
5363 " -a, --all\n"
5364 " Enable all tests and generated test data. (default)\n"
5365 " -z, --zap, --none\n"
5366 " Disable all tests and test data types.\n"
5367 " -i, --int\n"
5368 " Enable non-FPU tests.\n"
5369 " -F, --fpu-ld-st\n"
5370 " Enable FPU load and store tests.\n"
5371 " -B, --fpu-binary-1\n"
5372 " Enable FPU binary 80-bit FP tests.\n"
5373 " -P, --fpu-binary-2\n"
5374 " Enable FPU binary 64- and 32-bit FP tests.\n"
5375 " -O, --fpu-other\n"
5376 " Enable FPU binary 64- and 32-bit FP tests.\n"
5377 " -S, --sse-fp-binary\n"
5378 " Enable SSE binary 64- and 32-bit FP tests.\n"
5379 " -I,--include=<test-patter>\n"
5380 " Enable tests matching the given pattern.\n"
5381 " -X,--exclude=<test-patter>\n"
5382 " Skip tests matching the given pattern (overrides --include).\n"
5383 "\n"
5384 "Generation:\n"
5385 " -m, --common\n"
5386 " Enable generating common test data.\n"
5387 " -c, --only-cpu\n"
5388 " Enable generating CPU specific test data.\n"
5389 " -n, --number-of-test <count>\n"
5390 " Number of tests to generate. Default: %u\n"
5391 "\n"
5392 "Other:\n"
5393 " -v, --verbose\n"
5394 " -q, --quiet\n"
5395 " Noise level. Default: --quiet\n"
5396 , argv[0], cDefaultTests);
5397 return RTEXITCODE_SUCCESS;
5398 default:
5399 return RTGetOptPrintError(rc, &ValueUnion);
5400 }
5401 }
5402
5403 /*
5404 * Generate data?
5405 */
5406 if (enmMode == kModeGenerate)
5407 {
5408#ifdef TSTIEMAIMPL_WITH_GENERATOR
5409 char szCpuDesc[256] = {0};
5410 RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
5411 const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
5412# if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
5413 const char * const pszBitBucket = "NUL";
5414# else
5415 const char * const pszBitBucket = "/dev/null";
5416# endif
5417
5418 if (cTests == 0)
5419 cTests = cDefaultTests;
5420 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
5421 g_cZeroSrcTests = g_cZeroDstTests * 2;
5422
5423 if (fInt)
5424 {
5425 const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
5426 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5427 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5428 ? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
5429 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5430 if (!pStrmData || !pStrmDataCpu)
5431 return RTEXITCODE_FAILURE;
5432
5433 BinU8Generate( pStrmData, pStrmDataCpu, cTests);
5434 BinU16Generate(pStrmData, pStrmDataCpu, cTests);
5435 BinU32Generate(pStrmData, pStrmDataCpu, cTests);
5436 BinU64Generate(pStrmData, pStrmDataCpu, cTests);
5437 ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
5438 UnaryGenerate(pStrmData, cTests);
5439 ShiftGenerate(pStrmDataCpu, cTests);
5440 MulDivGenerate(pStrmDataCpu, cTests);
5441
5442 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5443 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5444 if (rcExit != RTEXITCODE_SUCCESS)
5445 return rcExit;
5446 }
5447
5448 if (fFpuLdSt)
5449 {
5450 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
5451 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5452 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5453 ? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
5454 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5455 if (!pStrmData || !pStrmDataCpu)
5456 return RTEXITCODE_FAILURE;
5457
5458 FpuLdConstGenerate(pStrmData, cTests);
5459 FpuLdIntGenerate(pStrmData, cTests);
5460 FpuLdD80Generate(pStrmData, cTests);
5461 FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
5462 FpuStD80Generate(pStrmData, cTests);
5463 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
5464 FpuLdMemGenerate(pStrmData, cTests2);
5465 FpuStMemGenerate(pStrmData, cTests2);
5466
5467 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5468 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5469 if (rcExit != RTEXITCODE_SUCCESS)
5470 return rcExit;
5471 }
5472
5473 if (fFpuBinary1)
5474 {
5475 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
5476 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5477 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5478 ? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
5479 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5480 if (!pStrmData || !pStrmDataCpu)
5481 return RTEXITCODE_FAILURE;
5482
5483 FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
5484 FpuBinaryFswR80Generate(pStrmData, cTests);
5485 FpuBinaryEflR80Generate(pStrmData, cTests);
5486
5487 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5488 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5489 if (rcExit != RTEXITCODE_SUCCESS)
5490 return rcExit;
5491 }
5492
5493 if (fFpuBinary2)
5494 {
5495 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
5496 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5497 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5498 ? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
5499 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5500 if (!pStrmData || !pStrmDataCpu)
5501 return RTEXITCODE_FAILURE;
5502
5503 FpuBinaryR64Generate(pStrmData, cTests);
5504 FpuBinaryR32Generate(pStrmData, cTests);
5505 FpuBinaryI32Generate(pStrmData, cTests);
5506 FpuBinaryI16Generate(pStrmData, cTests);
5507 FpuBinaryFswR64Generate(pStrmData, cTests);
5508 FpuBinaryFswR32Generate(pStrmData, cTests);
5509 FpuBinaryFswI32Generate(pStrmData, cTests);
5510 FpuBinaryFswI16Generate(pStrmData, cTests);
5511
5512 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5513 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5514 if (rcExit != RTEXITCODE_SUCCESS)
5515 return rcExit;
5516 }
5517
5518 if (fFpuOther)
5519 {
5520 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
5521 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5522 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5523 ? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
5524 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5525 if (!pStrmData || !pStrmDataCpu)
5526 return RTEXITCODE_FAILURE;
5527
5528 FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
5529 FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
5530 FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
5531
5532 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5533 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5534 if (rcExit != RTEXITCODE_SUCCESS)
5535 return rcExit;
5536 }
5537
5538 if (fSseFpBinary)
5539 {
5540 const char *pszDataFileFmt = fCommonData ? "tstIEMAImplDataSseBinary-%s.bin" : pszBitBucket;
5541
5542 RTEXITCODE rcExit = SseBinaryR32Generate(pszDataFileFmt, cTests);
5543 if (rcExit == RTEXITCODE_SUCCESS)
5544 rcExit = SseBinaryR64Generate(pszDataFileFmt, cTests);
5545 if (rcExit == RTEXITCODE_SUCCESS)
5546 rcExit = SseBinaryU128R32Generate(pszDataFileFmt, cTests);
5547 if (rcExit == RTEXITCODE_SUCCESS)
5548 rcExit = SseBinaryU128R64Generate(pszDataFileFmt, cTests);
5549 if (rcExit != RTEXITCODE_SUCCESS)
5550 return rcExit;
5551 }
5552
5553 return RTEXITCODE_SUCCESS;
5554#else
5555 return RTMsgErrorExitFailure("Test data generator not compiled in!");
5556#endif
5557 }
5558
5559 /*
5560 * Do testing. Currrently disabled by default as data needs to be checked
5561 * on both intel and AMD systems first.
5562 */
5563 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
5564 AssertRCReturn(rc, RTEXITCODE_FAILURE);
5565 if (enmMode == kModeTest)
5566 {
5567 RTTestBanner(g_hTest);
5568
5569 /* Allocate guarded memory for use in the tests. */
5570#define ALLOC_GUARDED_VAR(a_puVar) do { \
5571 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
5572 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
5573 } while (0)
5574 ALLOC_GUARDED_VAR(g_pu8);
5575 ALLOC_GUARDED_VAR(g_pu16);
5576 ALLOC_GUARDED_VAR(g_pu32);
5577 ALLOC_GUARDED_VAR(g_pu64);
5578 ALLOC_GUARDED_VAR(g_pu128);
5579 ALLOC_GUARDED_VAR(g_pu8Two);
5580 ALLOC_GUARDED_VAR(g_pu16Two);
5581 ALLOC_GUARDED_VAR(g_pu32Two);
5582 ALLOC_GUARDED_VAR(g_pu64Two);
5583 ALLOC_GUARDED_VAR(g_pu128Two);
5584 ALLOC_GUARDED_VAR(g_pfEfl);
5585 if (RTTestErrorCount(g_hTest) == 0)
5586 {
5587 if (fInt)
5588 {
5589 BinU8Test();
5590 BinU16Test();
5591 BinU32Test();
5592 BinU64Test();
5593 XchgTest();
5594 XaddTest();
5595 CmpXchgTest();
5596 CmpXchg8bTest();
5597 CmpXchg16bTest();
5598 ShiftDblTest();
5599 UnaryTest();
5600 ShiftTest();
5601 MulDivTest();
5602 BswapTest();
5603 }
5604
5605 if (fFpuLdSt)
5606 {
5607 FpuLoadConstTest();
5608 FpuLdMemTest();
5609 FpuLdIntTest();
5610 FpuLdD80Test();
5611 FpuStMemTest();
5612 FpuStIntTest();
5613 FpuStD80Test();
5614 }
5615
5616 if (fFpuBinary1)
5617 {
5618 FpuBinaryR80Test();
5619 FpuBinaryFswR80Test();
5620 FpuBinaryEflR80Test();
5621 }
5622
5623 if (fFpuBinary2)
5624 {
5625 FpuBinaryR64Test();
5626 FpuBinaryR32Test();
5627 FpuBinaryI32Test();
5628 FpuBinaryI16Test();
5629 FpuBinaryFswR64Test();
5630 FpuBinaryFswR32Test();
5631 FpuBinaryFswI32Test();
5632 FpuBinaryFswI16Test();
5633 }
5634
5635 if (fFpuOther)
5636 {
5637 FpuUnaryR80Test();
5638 FpuUnaryFswR80Test();
5639 FpuUnaryTwoR80Test();
5640 }
5641
5642 if (fSseFpBinary)
5643 {
5644 SseBinaryR32Test();
5645 SseBinaryR64Test();
5646 SseBinaryU128R32Test();
5647 SseBinaryU128R64Test();
5648 }
5649 }
5650 return RTTestSummaryAndDestroy(g_hTest);
5651 }
5652 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
5653}
5654
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette