VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 96248

Last change on this file since 96248 was 96248, checked in by vboxsync, 2 years ago

VMM/testcase/tstIEMAImpl: Start testing single precision instruction emulations, bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 225.1 KB
Line 
1/* $Id: tstIEMAImpl.cpp 96248 2022-08-17 09:09:34Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "../include/IEMInternal.h"
23
24#include <iprt/errcore.h>
25#include <VBox/log.h>
26#include <iprt/assert.h>
27#include <iprt/ctype.h>
28#include <iprt/getopt.h>
29#include <iprt/initterm.h>
30#include <iprt/message.h>
31#include <iprt/mp.h>
32#include <iprt/rand.h>
33#include <iprt/stream.h>
34#include <iprt/string.h>
35#include <iprt/test.h>
36
37#include "tstIEMAImpl.h"
38
39
40/*********************************************************************************************************************************
41* Defined Constants And Macros *
42*********************************************************************************************************************************/
43#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
44#define ENTRY_EX(a_Name, a_uExtra) \
45 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
46 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
47 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
48
49#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
50#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
51 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
52 g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
53 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
54
55#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
56#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
57 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
58 g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
59 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
60
61#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
62 typedef struct a_TypeName \
63 { \
64 const char *pszName; \
65 a_FunctionPtrType pfn; \
66 a_FunctionPtrType pfnNative; \
67 a_TestType const *paTests; \
68 uint32_t const *pcTests; \
69 uint32_t uExtra; \
70 uint8_t idxCpuEflFlavour; \
71 } a_TypeName
72
73#define COUNT_VARIATIONS(a_SubTest) \
74 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
75
76
77/*********************************************************************************************************************************
78* Global Variables *
79*********************************************************************************************************************************/
80static RTTEST g_hTest;
81static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
82#ifdef TSTIEMAIMPL_WITH_GENERATOR
83static uint32_t g_cZeroDstTests = 2;
84static uint32_t g_cZeroSrcTests = 4;
85#endif
86static uint8_t *g_pu8, *g_pu8Two;
87static uint16_t *g_pu16, *g_pu16Two;
88static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
89static uint64_t *g_pu64, *g_pu64Two;
90static RTUINT128U *g_pu128, *g_pu128Two;
91
92static char g_aszBuf[16][256];
93static unsigned g_idxBuf = 0;
94
95static uint32_t g_cIncludeTestPatterns;
96static uint32_t g_cExcludeTestPatterns;
97static const char *g_apszIncludeTestPatterns[64];
98static const char *g_apszExcludeTestPatterns[64];
99
100static unsigned g_cVerbosity = 0;
101
102
103/*********************************************************************************************************************************
104* Internal Functions *
105*********************************************************************************************************************************/
106static const char *FormatR80(PCRTFLOAT80U pr80);
107static const char *FormatR64(PCRTFLOAT64U pr64);
108static const char *FormatR32(PCRTFLOAT32U pr32);
109
110
111/*
112 * Random helpers.
113 */
114
115static uint32_t RandEFlags(void)
116{
117 uint32_t fEfl = RTRandU32();
118 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
119}
120
121#ifdef TSTIEMAIMPL_WITH_GENERATOR
122
123static uint8_t RandU8(void)
124{
125 return RTRandU32Ex(0, 0xff);
126}
127
128
129static uint16_t RandU16(void)
130{
131 return RTRandU32Ex(0, 0xffff);
132}
133
134
135static uint32_t RandU32(void)
136{
137 return RTRandU32();
138}
139
140#endif
141
142static uint64_t RandU64(void)
143{
144 return RTRandU64();
145}
146
147
148static RTUINT128U RandU128(void)
149{
150 RTUINT128U Ret;
151 Ret.s.Hi = RTRandU64();
152 Ret.s.Lo = RTRandU64();
153 return Ret;
154}
155
156#ifdef TSTIEMAIMPL_WITH_GENERATOR
157
158static uint8_t RandU8Dst(uint32_t iTest)
159{
160 if (iTest < g_cZeroDstTests)
161 return 0;
162 return RandU8();
163}
164
165
166static uint8_t RandU8Src(uint32_t iTest)
167{
168 if (iTest < g_cZeroSrcTests)
169 return 0;
170 return RandU8();
171}
172
173
174static uint16_t RandU16Dst(uint32_t iTest)
175{
176 if (iTest < g_cZeroDstTests)
177 return 0;
178 return RandU16();
179}
180
181
182static uint16_t RandU16Src(uint32_t iTest)
183{
184 if (iTest < g_cZeroSrcTests)
185 return 0;
186 return RandU16();
187}
188
189
190static uint32_t RandU32Dst(uint32_t iTest)
191{
192 if (iTest < g_cZeroDstTests)
193 return 0;
194 return RandU32();
195}
196
197
198static uint32_t RandU32Src(uint32_t iTest)
199{
200 if (iTest < g_cZeroSrcTests)
201 return 0;
202 return RandU32();
203}
204
205
206static uint64_t RandU64Dst(uint32_t iTest)
207{
208 if (iTest < g_cZeroDstTests)
209 return 0;
210 return RandU64();
211}
212
213
214static uint64_t RandU64Src(uint32_t iTest)
215{
216 if (iTest < g_cZeroSrcTests)
217 return 0;
218 return RandU64();
219}
220
221
222/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
223static int16_t RandI16Src2(uint32_t iTest)
224{
225 if (iTest < 18 * 4)
226 switch (iTest % 4)
227 {
228 case 0: return 0;
229 case 1: return INT16_MAX;
230 case 2: return INT16_MIN;
231 case 3: break;
232 }
233 return (int16_t)RandU16();
234}
235
236
237/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
238static int32_t RandI32Src2(uint32_t iTest)
239{
240 if (iTest < 18 * 4)
241 switch (iTest % 4)
242 {
243 case 0: return 0;
244 case 1: return INT32_MAX;
245 case 2: return INT32_MIN;
246 case 3: break;
247 }
248 return (int32_t)RandU32();
249}
250
251
252#if 0
253static int64_t RandI64Src(uint32_t iTest)
254{
255 RT_NOREF(iTest);
256 return (int64_t)RandU64();
257}
258#endif
259
260
261static uint16_t RandFcw(void)
262{
263 return RandU16() & ~X86_FCW_ZERO_MASK;
264}
265
266
267static uint16_t RandFsw(void)
268{
269 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
270 return RandU16();
271}
272
273
274static uint32_t RandMxcsr(void)
275{
276 return RandU32() & ~X86_MXCSR_ZERO_MASK;
277}
278
279
280static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
281{
282 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
283 pr80->sj64.uFraction >>= cShift;
284 else
285 pr80->sj64.uFraction = (cShift % 19) + 1;
286}
287
288
289
290static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
291{
292 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
293
294 RTFLOAT80U r80;
295 r80.au64[0] = RandU64();
296 r80.au16[4] = RandU16();
297
298 /*
299 * Adjust the random stuff according to bType.
300 */
301 bType &= 0x1f;
302 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
303 {
304 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
305 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
306 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
307 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
308 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
309 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
310 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
311 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
312 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
313 }
314 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
315 {
316 /* Denormals (4,5) and Pseudo denormals (6,7) */
317 if (bType & 1)
318 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
319 else if (r80.sj64.uFraction == 0 && bType < 6)
320 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
321 r80.sj64.uExponent = 0;
322 r80.sj64.fInteger = bType >= 6;
323 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
324 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
325 }
326 else if (bType == 8 || bType == 9)
327 {
328 /* Pseudo NaN. */
329 if (bType & 1)
330 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
331 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
332 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
333 r80.sj64.uExponent = 0x7fff;
334 if (r80.sj64.fInteger)
335 r80.sj64.uFraction |= RT_BIT_64(62);
336 else
337 r80.sj64.uFraction &= ~RT_BIT_64(62);
338 r80.sj64.fInteger = 0;
339 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
340 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
341 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
342 }
343 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
344 {
345 /* Quiet and signalling NaNs. */
346 if (bType & 1)
347 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
348 else if (r80.sj64.uFraction == 0)
349 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
350 r80.sj64.uExponent = 0x7fff;
351 if (bType < 12)
352 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
353 else
354 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
355 r80.sj64.fInteger = 1;
356 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
357 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
358 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
359 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
360 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
361 }
362 else if (bType == 14 || bType == 15)
363 {
364 /* Unnormals */
365 if (bType & 1)
366 SafeR80FractionShift(&r80, RandU8() % 62);
367 r80.sj64.fInteger = 0;
368 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
369 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
370 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
371 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
372 }
373 else if (bType < 26)
374 {
375 /* Make sure we have lots of normalized values. */
376 if (!fIntTarget)
377 {
378 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
379 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
380 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
381 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
382 r80.sj64.fInteger = 1;
383 if (r80.sj64.uExponent <= uMinExp)
384 r80.sj64.uExponent = uMinExp + 1;
385 else if (r80.sj64.uExponent >= uMaxExp)
386 r80.sj64.uExponent = uMaxExp - 1;
387
388 if (bType == 16)
389 { /* All 1s is useful to testing rounding. Also try trigger special
390 behaviour by sometimes rounding out of range, while we're at it. */
391 r80.sj64.uFraction = RT_BIT_64(63) - 1;
392 uint8_t bExp = RandU8();
393 if ((bExp & 3) == 0)
394 r80.sj64.uExponent = uMaxExp - 1;
395 else if ((bExp & 3) == 1)
396 r80.sj64.uExponent = uMinExp + 1;
397 else if ((bExp & 3) == 2)
398 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
399 }
400 }
401 else
402 {
403 /* integer target: */
404 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
405 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
406 r80.sj64.fInteger = 1;
407 if (r80.sj64.uExponent < uMinExp)
408 r80.sj64.uExponent = uMinExp;
409 else if (r80.sj64.uExponent > uMaxExp)
410 r80.sj64.uExponent = uMaxExp;
411
412 if (bType == 16)
413 { /* All 1s is useful to testing rounding. Also try trigger special
414 behaviour by sometimes rounding out of range, while we're at it. */
415 r80.sj64.uFraction = RT_BIT_64(63) - 1;
416 uint8_t bExp = RandU8();
417 if ((bExp & 3) == 0)
418 r80.sj64.uExponent = uMaxExp;
419 else if ((bExp & 3) == 1)
420 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
421 }
422 }
423
424 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
425 }
426 return r80;
427}
428
429
430static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
431{
432 /*
433 * Make it more likely that we get a good selection of special values.
434 */
435 return RandR80Ex(RandU8(), cTarget, fIntTarget);
436
437}
438
439
440static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
441{
442 /* Make sure we cover all the basic types first before going for random selection: */
443 if (iTest <= 18)
444 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
445 return RandR80(cTarget, fIntTarget);
446}
447
448
449/**
450 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
451 * to a 0..17, covering all basic value types.
452 */
453static uint8_t RandR80Src12RemapType(uint8_t bType)
454{
455 switch (bType)
456 {
457 case 0: return 18; /* normal */
458 case 1: return 16; /* normal extreme rounding */
459 case 2: return 14; /* unnormal */
460 case 3: return 12; /* Signalling NaN */
461 case 4: return 10; /* Quiet NaN */
462 case 5: return 8; /* PseudoNaN */
463 case 6: return 6; /* Pseudo Denormal */
464 case 7: return 4; /* Denormal */
465 case 8: return 3; /* Indefinite */
466 case 9: return 2; /* Infinity */
467 case 10: return 1; /* Pseudo-Infinity */
468 case 11: return 0; /* Zero */
469 default: AssertFailedReturn(18);
470 }
471}
472
473
474/**
475 * This works in tandem with RandR80Src2 to make sure we cover all operand
476 * type mixes first before we venture into regular random testing.
477 *
478 * There are 11 basic variations, when we leave out the five odd ones using
479 * SafeR80FractionShift. Because of the special normalized value targetting at
480 * rounding, we make it an even 12. So 144 combinations for two operands.
481 */
482static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
483{
484 if (cPartnerBits == 80)
485 {
486 Assert(!fPartnerInt);
487 if (iTest < 12 * 12)
488 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
489 }
490 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
491 {
492 if (iTest < 12 * 10)
493 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
494 }
495 else if (iTest < 18 * 4 && fPartnerInt)
496 return RandR80Ex(iTest / 4);
497 return RandR80();
498}
499
500
501/** Partner to RandR80Src1. */
502static RTFLOAT80U RandR80Src2(uint32_t iTest)
503{
504 if (iTest < 12 * 12)
505 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
506 return RandR80();
507}
508
509
510static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
511{
512 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
513 pr64->s64.uFraction >>= cShift;
514 else
515 pr64->s64.uFraction = (cShift % 19) + 1;
516}
517
518
519static RTFLOAT64U RandR64Ex(uint8_t bType)
520{
521 RTFLOAT64U r64;
522 r64.u = RandU64();
523
524 /*
525 * Make it more likely that we get a good selection of special values.
526 * On average 6 out of 16 calls should return a special value.
527 */
528 bType &= 0xf;
529 if (bType == 0 || bType == 1)
530 {
531 /* 0 or Infinity. We only keep fSign here. */
532 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
533 r64.s.uFractionHigh = 0;
534 r64.s.uFractionLow = 0;
535 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
536 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
537 }
538 else if (bType == 2 || bType == 3)
539 {
540 /* Subnormals */
541 if (bType == 3)
542 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
543 else if (r64.s64.uFraction == 0)
544 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
545 r64.s64.uExponent = 0;
546 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
547 }
548 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
549 {
550 /* NaNs */
551 if (bType & 1)
552 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
553 else if (r64.s64.uFraction == 0)
554 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
555 r64.s64.uExponent = 0x7ff;
556 if (bType < 6)
557 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
558 else
559 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
560 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
561 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
562 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
563 }
564 else if (bType < 12)
565 {
566 /* Make sure we have lots of normalized values. */
567 if (r64.s.uExponent == 0)
568 r64.s.uExponent = 1;
569 else if (r64.s.uExponent == 0x7ff)
570 r64.s.uExponent = 0x7fe;
571 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
572 }
573 return r64;
574}
575
576
577static RTFLOAT64U RandR64Src(uint32_t iTest)
578{
579 if (iTest < 16)
580 return RandR64Ex(iTest);
581 return RandR64Ex(RandU8());
582}
583
584
585/** Pairing with a 80-bit floating point arg. */
586static RTFLOAT64U RandR64Src2(uint32_t iTest)
587{
588 if (iTest < 12 * 10)
589 return RandR64Ex(9 - iTest % 10); /* start with normal values */
590 return RandR64Ex(RandU8());
591}
592
593
594static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
595{
596 if (pr32->s.uFraction >= RT_BIT_32(cShift))
597 pr32->s.uFraction >>= cShift;
598 else
599 pr32->s.uFraction = (cShift % 19) + 1;
600}
601
602
603static RTFLOAT32U RandR32Ex(uint8_t bType)
604{
605 RTFLOAT32U r32;
606 r32.u = RandU32();
607
608 /*
609 * Make it more likely that we get a good selection of special values.
610 * On average 6 out of 16 calls should return a special value.
611 */
612 bType &= 0xf;
613 if (bType == 0 || bType == 1)
614 {
615 /* 0 or Infinity. We only keep fSign here. */
616 r32.s.uExponent = bType == 0 ? 0 : 0xff;
617 r32.s.uFraction = 0;
618 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
619 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
620 }
621 else if (bType == 2 || bType == 3)
622 {
623 /* Subnormals */
624 if (bType == 3)
625 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
626 else if (r32.s.uFraction == 0)
627 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
628 r32.s.uExponent = 0;
629 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
630 }
631 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
632 {
633 /* NaNs */
634 if (bType & 1)
635 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
636 else if (r32.s.uFraction == 0)
637 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
638 r32.s.uExponent = 0xff;
639 if (bType < 6)
640 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
641 else
642 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
643 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
644 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
645 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
646 }
647 else if (bType < 12)
648 {
649 /* Make sure we have lots of normalized values. */
650 if (r32.s.uExponent == 0)
651 r32.s.uExponent = 1;
652 else if (r32.s.uExponent == 0xff)
653 r32.s.uExponent = 0xfe;
654 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
655 }
656 return r32;
657}
658
659
660static RTFLOAT32U RandR32Src(uint32_t iTest)
661{
662 if (iTest < 16)
663 return RandR32Ex(iTest);
664 return RandR32Ex(RandU8());
665}
666
667
668/** Pairing with a 80-bit floating point arg. */
669static RTFLOAT32U RandR32Src2(uint32_t iTest)
670{
671 if (iTest < 12 * 10)
672 return RandR32Ex(9 - iTest % 10); /* start with normal values */
673 return RandR32Ex(RandU8());
674}
675
676
677static RTPBCD80U RandD80Src(uint32_t iTest)
678{
679 if (iTest < 3)
680 {
681 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
682 return d80Zero;
683 }
684 if (iTest < 5)
685 {
686 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
687 return d80Ind;
688 }
689
690 RTPBCD80U d80;
691 uint8_t b = RandU8();
692 d80.s.fSign = b & 1;
693
694 if ((iTest & 7) >= 6)
695 {
696 /* Illegal */
697 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
698 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
699 d80.s.abPairs[iPair] = RandU8();
700 }
701 else
702 {
703 /* Normal */
704 d80.s.uPad = 0;
705 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
706 {
707 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
708 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
709 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
710 }
711 }
712 return d80;
713}
714
715
716const char *GenFormatR80(PCRTFLOAT80U plrd)
717{
718 if (RTFLOAT80U_IS_ZERO(plrd))
719 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
720 if (RTFLOAT80U_IS_INF(plrd))
721 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
722 if (RTFLOAT80U_IS_INDEFINITE(plrd))
723 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
724 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
725 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
726 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
727 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
728
729 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
730 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
731 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
732 return pszBuf;
733}
734
735const char *GenFormatR64(PCRTFLOAT64U prd)
736{
737 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
738 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
739 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
740 return pszBuf;
741}
742
743
744const char *GenFormatR32(PCRTFLOAT32U pr)
745{
746 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
747 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
748 return pszBuf;
749}
750
751
752const char *GenFormatD80(PCRTPBCD80U pd80)
753{
754 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
755 size_t off;
756 if (pd80->s.uPad == 0)
757 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
758 else
759 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
760 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
761 while (iPair-- > 0)
762 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
763 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
764 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
765 pszBuf[off++] = ')';
766 pszBuf[off++] = '\0';
767 return pszBuf;
768}
769
770
771const char *GenFormatI64(int64_t i64)
772{
773 if (i64 == INT64_MIN) /* This one is problematic */
774 return "INT64_MIN";
775 if (i64 == INT64_MAX)
776 return "INT64_MAX";
777 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
778 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
779 return pszBuf;
780}
781
782
783const char *GenFormatI64(int64_t const *pi64)
784{
785 return GenFormatI64(*pi64);
786}
787
788
789const char *GenFormatI32(int32_t i32)
790{
791 if (i32 == INT32_MIN) /* This one is problematic */
792 return "INT32_MIN";
793 if (i32 == INT32_MAX)
794 return "INT32_MAX";
795 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
796 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
797 return pszBuf;
798}
799
800
801const char *GenFormatI32(int32_t const *pi32)
802{
803 return GenFormatI32(*pi32);
804}
805
806
807const char *GenFormatI16(int16_t i16)
808{
809 if (i16 == INT16_MIN) /* This one is problematic */
810 return "INT16_MIN";
811 if (i16 == INT16_MAX)
812 return "INT16_MAX";
813 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
814 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
815 return pszBuf;
816}
817
818
819const char *GenFormatI16(int16_t const *pi16)
820{
821 return GenFormatI16(*pi16);
822}
823
824
825static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
826{
827 /* We want to tag the generated source code with the revision that produced it. */
828 static char s_szRev[] = "$Revision: 96248 $";
829 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
830 size_t cchRev = 0;
831 while (RT_C_IS_DIGIT(pszRev[cchRev]))
832 cchRev++;
833
834 RTStrmPrintf(pOut,
835 "/* $Id: tstIEMAImpl.cpp 96248 2022-08-17 09:09:34Z vboxsync $ */\n"
836 "/** @file\n"
837 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
838 " */\n"
839 "\n"
840 "/*\n"
841 " * Copyright (C) 2022 Oracle Corporation\n"
842 " *\n"
843 " * This file is part of VirtualBox Open Source Edition (OSE), as\n"
844 " * available from http://www.virtualbox.org. This file is free software;\n"
845 " * you can redistribute it and/or modify it under the terms of the GNU\n"
846 " * General Public License (GPL) as published by the Free Software\n"
847 " * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
848 " * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
849 " * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
850 " */\n"
851 "\n"
852 "#include \"tstIEMAImpl.h\"\n"
853 "\n"
854 ,
855 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
856}
857
858
859static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
860{
861 PRTSTREAM pOut = NULL;
862 int rc = RTStrmOpen(pszFilename, "w", &pOut);
863 if (RT_SUCCESS(rc))
864 {
865 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
866 return pOut;
867 }
868 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
869 return NULL;
870}
871
872
873static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
874{
875 RTStrmPrintf(pOut,
876 "\n"
877 "/* end of file */\n");
878 int rc = RTStrmClose(pOut);
879 if (RT_SUCCESS(rc))
880 return rcExit;
881 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
882}
883
884
885static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
886{
887 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
888}
889
890
891static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
892{
893 RTStrmPrintf(pOut,
894 "};\n"
895 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
896 "\n",
897 pszName, pszName);
898}
899
900#endif /* TSTIEMAIMPL_WITH_GENERATOR */
901
902
903/*
904 * Test helpers.
905 */
906static bool IsTestEnabled(const char *pszName)
907{
908 /* Process excludes first: */
909 uint32_t i = g_cExcludeTestPatterns;
910 while (i-- > 0)
911 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
912 return false;
913
914 /* If no include patterns, everything is included: */
915 i = g_cIncludeTestPatterns;
916 if (!i)
917 return true;
918
919 /* Otherwise only tests in the include patters gets tested: */
920 while (i-- > 0)
921 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
922 return true;
923
924 return false;
925}
926
927
928static bool SubTestAndCheckIfEnabled(const char *pszName)
929{
930 RTTestSub(g_hTest, pszName);
931 if (IsTestEnabled(pszName))
932 return true;
933 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
934 return false;
935}
936
937
938static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
939{
940 if (fActual == fExpected)
941 return "";
942
943 uint32_t const fXor = fActual ^ fExpected;
944 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
945 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
946
947 static struct
948 {
949 const char *pszName;
950 uint32_t fFlag;
951 } const s_aFlags[] =
952 {
953#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
954 EFL_ENTRY(CF),
955 EFL_ENTRY(PF),
956 EFL_ENTRY(AF),
957 EFL_ENTRY(ZF),
958 EFL_ENTRY(SF),
959 EFL_ENTRY(TF),
960 EFL_ENTRY(IF),
961 EFL_ENTRY(DF),
962 EFL_ENTRY(OF),
963 EFL_ENTRY(IOPL),
964 EFL_ENTRY(NT),
965 EFL_ENTRY(RF),
966 EFL_ENTRY(VM),
967 EFL_ENTRY(AC),
968 EFL_ENTRY(VIF),
969 EFL_ENTRY(VIP),
970 EFL_ENTRY(ID),
971 };
972 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
973 if (s_aFlags[i].fFlag & fXor)
974 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
975 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
976 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
977 return pszBuf;
978}
979
980
981static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
982{
983 if (fActual == fExpected)
984 return "";
985
986 uint16_t const fXor = fActual ^ fExpected;
987 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
988 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
989
990 static struct
991 {
992 const char *pszName;
993 uint32_t fFlag;
994 } const s_aFlags[] =
995 {
996#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
997 FSW_ENTRY(IE),
998 FSW_ENTRY(DE),
999 FSW_ENTRY(ZE),
1000 FSW_ENTRY(OE),
1001 FSW_ENTRY(UE),
1002 FSW_ENTRY(PE),
1003 FSW_ENTRY(SF),
1004 FSW_ENTRY(ES),
1005 FSW_ENTRY(C0),
1006 FSW_ENTRY(C1),
1007 FSW_ENTRY(C2),
1008 FSW_ENTRY(C3),
1009 FSW_ENTRY(B),
1010 };
1011 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1012 if (s_aFlags[i].fFlag & fXor)
1013 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1014 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1015 if (fXor & X86_FSW_TOP_MASK)
1016 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1017 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1018#if 0 /* For debugging fprem & fprem1 */
1019 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1020 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1021#endif
1022 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1023 return pszBuf;
1024}
1025
1026
1027static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1028{
1029 if (fActual == fExpected)
1030 return "";
1031
1032 uint16_t const fXor = fActual ^ fExpected;
1033 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1034 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1035
1036 static struct
1037 {
1038 const char *pszName;
1039 uint32_t fFlag;
1040 } const s_aFlags[] =
1041 {
1042#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1043 MXCSR_ENTRY(IE),
1044 MXCSR_ENTRY(DE),
1045 MXCSR_ENTRY(ZE),
1046 MXCSR_ENTRY(OE),
1047 MXCSR_ENTRY(UE),
1048 MXCSR_ENTRY(PE),
1049
1050 MXCSR_ENTRY(IM),
1051 MXCSR_ENTRY(DM),
1052 MXCSR_ENTRY(ZM),
1053 MXCSR_ENTRY(OM),
1054 MXCSR_ENTRY(UM),
1055 MXCSR_ENTRY(PM),
1056
1057 MXCSR_ENTRY(DAZ),
1058 MXCSR_ENTRY(FZ),
1059#undef MXCSR_ENTRY
1060 };
1061 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1062 if (s_aFlags[i].fFlag & fXor)
1063 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1064 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1065 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1066 return pszBuf;
1067}
1068
1069
1070static const char *FormatFcw(uint16_t fFcw)
1071{
1072 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1073
1074 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1075 switch (fFcw & X86_FCW_PC_MASK)
1076 {
1077 case X86_FCW_PC_24: pszPC = "PC24"; break;
1078 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1079 case X86_FCW_PC_53: pszPC = "PC53"; break;
1080 case X86_FCW_PC_64: pszPC = "PC64"; break;
1081 }
1082
1083 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1084 switch (fFcw & X86_FCW_RC_MASK)
1085 {
1086 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1087 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1088 case X86_FCW_RC_UP: pszRC = "UP"; break;
1089 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1090 }
1091 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1092
1093 static struct
1094 {
1095 const char *pszName;
1096 uint32_t fFlag;
1097 } const s_aFlags[] =
1098 {
1099#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1100 FCW_ENTRY(IM),
1101 FCW_ENTRY(DM),
1102 FCW_ENTRY(ZM),
1103 FCW_ENTRY(OM),
1104 FCW_ENTRY(UM),
1105 FCW_ENTRY(PM),
1106 { "6M", 64 },
1107 };
1108 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1109 if (fFcw & s_aFlags[i].fFlag)
1110 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1111
1112 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1113 return pszBuf;
1114}
1115
1116
1117static const char *FormatMxcsr(uint32_t fMxcsr)
1118{
1119 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1120
1121 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1122 switch (fMxcsr & X86_MXCSR_RC_MASK)
1123 {
1124 case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1125 case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1126 case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1127 case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1128 }
1129
1130 const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1131 const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1132 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1133
1134 static struct
1135 {
1136 const char *pszName;
1137 uint32_t fFlag;
1138 } const s_aFlags[] =
1139 {
1140#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1141 MXCSR_ENTRY(IE),
1142 MXCSR_ENTRY(DE),
1143 MXCSR_ENTRY(ZE),
1144 MXCSR_ENTRY(OE),
1145 MXCSR_ENTRY(UE),
1146 MXCSR_ENTRY(PE),
1147
1148 MXCSR_ENTRY(IM),
1149 MXCSR_ENTRY(DM),
1150 MXCSR_ENTRY(ZM),
1151 MXCSR_ENTRY(OM),
1152 MXCSR_ENTRY(UM),
1153 MXCSR_ENTRY(PM),
1154 { "6M", 64 },
1155 };
1156 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1157 if (fMxcsr & s_aFlags[i].fFlag)
1158 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1159
1160 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1161 return pszBuf;
1162}
1163
1164
1165static const char *FormatR80(PCRTFLOAT80U pr80)
1166{
1167 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1168 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1169 return pszBuf;
1170}
1171
1172
1173static const char *FormatR64(PCRTFLOAT64U pr64)
1174{
1175 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1176 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1177 return pszBuf;
1178}
1179
1180
1181static const char *FormatR32(PCRTFLOAT32U pr32)
1182{
1183 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1184 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1185 return pszBuf;
1186}
1187
1188
1189static const char *FormatD80(PCRTPBCD80U pd80)
1190{
1191 /* There is only one indefinite endcoding (same as for 80-bit
1192 floating point), so get it out of the way first: */
1193 if (RTPBCD80U_IS_INDEFINITE(pd80))
1194 return "Ind";
1195
1196 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1197 size_t off = 0;
1198 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1199 unsigned cBadDigits = 0;
1200 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1201 while (iPair-- > 0)
1202 {
1203 static const char s_szDigits[] = "0123456789abcdef";
1204 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1205 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1206 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1207 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1208 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1209 }
1210 if (cBadDigits || pd80->s.uPad != 0)
1211 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1212 pszBuf[off] = '\0';
1213 return pszBuf;
1214}
1215
1216
1217#if 0
1218static const char *FormatI64(int64_t const *piVal)
1219{
1220 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1221 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1222 return pszBuf;
1223}
1224#endif
1225
1226
1227static const char *FormatI32(int32_t const *piVal)
1228{
1229 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1230 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1231 return pszBuf;
1232}
1233
1234
1235static const char *FormatI16(int16_t const *piVal)
1236{
1237 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1238 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1239 return pszBuf;
1240}
1241
1242
1243/*
1244 * Binary operations.
1245 */
1246TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1247TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1248TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1249TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1250
1251#ifdef TSTIEMAIMPL_WITH_GENERATOR
1252# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1253static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1254{ \
1255 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1256 { \
1257 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1258 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1259 PRTSTREAM pOutFn = pOut; \
1260 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1261 { \
1262 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1263 continue; \
1264 pOutFn = pOutCpu; \
1265 } \
1266 \
1267 GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1268 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1269 { \
1270 a_TestType Test; \
1271 Test.fEflIn = RandEFlags(); \
1272 Test.fEflOut = Test.fEflIn; \
1273 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1274 Test.uDstOut = Test.uDstIn; \
1275 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1276 if (g_aBinU ## a_cBits[iFn].uExtra) \
1277 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1278 Test.uMisc = 0; \
1279 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1280 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1281 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1282 } \
1283 GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1284 } \
1285}
1286#else
1287# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1288#endif
1289
1290#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1291GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1292\
1293static void BinU ## a_cBits ## Test(void) \
1294{ \
1295 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1296 { \
1297 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1298 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1299 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1300 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1301 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1302 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1303 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1304 { \
1305 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1306 { \
1307 uint32_t fEfl = paTests[iTest].fEflIn; \
1308 a_uType uDst = paTests[iTest].uDstIn; \
1309 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1310 if ( uDst != paTests[iTest].uDstOut \
1311 || fEfl != paTests[iTest].fEflOut) \
1312 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1313 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1314 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1315 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1316 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1317 else \
1318 { \
1319 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1320 *g_pfEfl = paTests[iTest].fEflIn; \
1321 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1322 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1323 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1324 } \
1325 } \
1326 pfn = a_aSubTests[iFn].pfnNative; \
1327 } \
1328 } \
1329}
1330
1331
1332/*
1333 * 8-bit binary operations.
1334 */
1335static const BINU8_T g_aBinU8[] =
1336{
1337 ENTRY(add_u8),
1338 ENTRY(add_u8_locked),
1339 ENTRY(adc_u8),
1340 ENTRY(adc_u8_locked),
1341 ENTRY(sub_u8),
1342 ENTRY(sub_u8_locked),
1343 ENTRY(sbb_u8),
1344 ENTRY(sbb_u8_locked),
1345 ENTRY(or_u8),
1346 ENTRY(or_u8_locked),
1347 ENTRY(xor_u8),
1348 ENTRY(xor_u8_locked),
1349 ENTRY(and_u8),
1350 ENTRY(and_u8_locked),
1351 ENTRY(cmp_u8),
1352 ENTRY(test_u8),
1353};
1354TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1355
1356
1357/*
1358 * 16-bit binary operations.
1359 */
1360static const BINU16_T g_aBinU16[] =
1361{
1362 ENTRY(add_u16),
1363 ENTRY(add_u16_locked),
1364 ENTRY(adc_u16),
1365 ENTRY(adc_u16_locked),
1366 ENTRY(sub_u16),
1367 ENTRY(sub_u16_locked),
1368 ENTRY(sbb_u16),
1369 ENTRY(sbb_u16_locked),
1370 ENTRY(or_u16),
1371 ENTRY(or_u16_locked),
1372 ENTRY(xor_u16),
1373 ENTRY(xor_u16_locked),
1374 ENTRY(and_u16),
1375 ENTRY(and_u16_locked),
1376 ENTRY(cmp_u16),
1377 ENTRY(test_u16),
1378 ENTRY_EX(bt_u16, 1),
1379 ENTRY_EX(btc_u16, 1),
1380 ENTRY_EX(btc_u16_locked, 1),
1381 ENTRY_EX(btr_u16, 1),
1382 ENTRY_EX(btr_u16_locked, 1),
1383 ENTRY_EX(bts_u16, 1),
1384 ENTRY_EX(bts_u16_locked, 1),
1385 ENTRY_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1386 ENTRY_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1387 ENTRY_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1388 ENTRY_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1389 ENTRY_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1390 ENTRY_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1391 ENTRY(arpl),
1392};
1393TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1394
1395
1396/*
1397 * 32-bit binary operations.
1398 */
1399static const BINU32_T g_aBinU32[] =
1400{
1401 ENTRY(add_u32),
1402 ENTRY(add_u32_locked),
1403 ENTRY(adc_u32),
1404 ENTRY(adc_u32_locked),
1405 ENTRY(sub_u32),
1406 ENTRY(sub_u32_locked),
1407 ENTRY(sbb_u32),
1408 ENTRY(sbb_u32_locked),
1409 ENTRY(or_u32),
1410 ENTRY(or_u32_locked),
1411 ENTRY(xor_u32),
1412 ENTRY(xor_u32_locked),
1413 ENTRY(and_u32),
1414 ENTRY(and_u32_locked),
1415 ENTRY(cmp_u32),
1416 ENTRY(test_u32),
1417 ENTRY_EX(bt_u32, 1),
1418 ENTRY_EX(btc_u32, 1),
1419 ENTRY_EX(btc_u32_locked, 1),
1420 ENTRY_EX(btr_u32, 1),
1421 ENTRY_EX(btr_u32_locked, 1),
1422 ENTRY_EX(bts_u32, 1),
1423 ENTRY_EX(bts_u32_locked, 1),
1424 ENTRY_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1425 ENTRY_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1426 ENTRY_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1427 ENTRY_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1428 ENTRY_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1429 ENTRY_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1430};
1431TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1432
1433
1434/*
1435 * 64-bit binary operations.
1436 */
1437static const BINU64_T g_aBinU64[] =
1438{
1439 ENTRY(add_u64),
1440 ENTRY(add_u64_locked),
1441 ENTRY(adc_u64),
1442 ENTRY(adc_u64_locked),
1443 ENTRY(sub_u64),
1444 ENTRY(sub_u64_locked),
1445 ENTRY(sbb_u64),
1446 ENTRY(sbb_u64_locked),
1447 ENTRY(or_u64),
1448 ENTRY(or_u64_locked),
1449 ENTRY(xor_u64),
1450 ENTRY(xor_u64_locked),
1451 ENTRY(and_u64),
1452 ENTRY(and_u64_locked),
1453 ENTRY(cmp_u64),
1454 ENTRY(test_u64),
1455 ENTRY_EX(bt_u64, 1),
1456 ENTRY_EX(btc_u64, 1),
1457 ENTRY_EX(btc_u64_locked, 1),
1458 ENTRY_EX(btr_u64, 1),
1459 ENTRY_EX(btr_u64_locked, 1),
1460 ENTRY_EX(bts_u64, 1),
1461 ENTRY_EX(bts_u64_locked, 1),
1462 ENTRY_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1463 ENTRY_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1464 ENTRY_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1465 ENTRY_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1466 ENTRY_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1467 ENTRY_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1468};
1469TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1470
1471
1472/*
1473 * XCHG
1474 */
1475static void XchgTest(void)
1476{
1477 if (!SubTestAndCheckIfEnabled("xchg"))
1478 return;
1479 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1480 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1481 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1482 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1483
1484 static struct
1485 {
1486 uint8_t cb; uint64_t fMask;
1487 union
1488 {
1489 uintptr_t pfn;
1490 FNIEMAIMPLXCHGU8 *pfnU8;
1491 FNIEMAIMPLXCHGU16 *pfnU16;
1492 FNIEMAIMPLXCHGU32 *pfnU32;
1493 FNIEMAIMPLXCHGU64 *pfnU64;
1494 } u;
1495 }
1496 s_aXchgWorkers[] =
1497 {
1498 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1499 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1500 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1501 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1502 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1503 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1504 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1505 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1506 };
1507 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1508 {
1509 RTUINT64U uIn1, uIn2, uMem, uDst;
1510 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1511 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1512 if (uIn1.u == uIn2.u)
1513 uDst.u = uIn2.u = ~uIn2.u;
1514
1515 switch (s_aXchgWorkers[i].cb)
1516 {
1517 case 1:
1518 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1519 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1520 break;
1521 case 2:
1522 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1523 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1524 break;
1525 case 4:
1526 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1527 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1528 break;
1529 case 8:
1530 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1531 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1532 break;
1533 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1534 }
1535
1536 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1537 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1538 }
1539}
1540
1541
1542/*
1543 * XADD
1544 */
1545static void XaddTest(void)
1546{
1547#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1548 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1549 static struct \
1550 { \
1551 const char *pszName; \
1552 FNIEMAIMPLXADDU ## a_cBits *pfn; \
1553 BINU ## a_cBits ## _TEST_T const *paTests; \
1554 uint32_t const *pcTests; \
1555 } const s_aFuncs[] = \
1556 { \
1557 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1558 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1559 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1560 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1561 }; \
1562 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1563 { \
1564 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1565 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1566 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1567 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1568 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1569 { \
1570 uint32_t fEfl = paTests[iTest].fEflIn; \
1571 a_Type uSrc = paTests[iTest].uSrcIn; \
1572 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1573 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1574 if ( fEfl != paTests[iTest].fEflOut \
1575 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
1576 || uSrc != paTests[iTest].uDstIn) \
1577 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1578 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1579 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1580 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1581 } \
1582 } \
1583 } while(0)
1584 TEST_XADD(8, uint8_t, "%#04x");
1585 TEST_XADD(16, uint16_t, "%#06x");
1586 TEST_XADD(32, uint32_t, "%#010RX32");
1587 TEST_XADD(64, uint64_t, "%#010RX64");
1588}
1589
1590
1591/*
1592 * CMPXCHG
1593 */
1594
1595static void CmpXchgTest(void)
1596{
1597#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1598 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
1599 static struct \
1600 { \
1601 const char *pszName; \
1602 FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1603 PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1604 BINU ## a_cBits ## _TEST_T const *paTests; \
1605 uint32_t const *pcTests; \
1606 } const s_aFuncs[] = \
1607 { \
1608 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1609 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1610 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1611 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1612 }; \
1613 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1614 { \
1615 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1616 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1617 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1618 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1619 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1620 { \
1621 /* as is (99% likely to be negative). */ \
1622 uint32_t fEfl = paTests[iTest].fEflIn; \
1623 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1624 a_Type uA = paTests[iTest].uDstIn; \
1625 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1626 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1627 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1628 if ( fEfl != paTests[iTest].fEflOut \
1629 || *g_pu ## a_cBits != uExpect \
1630 || uA != paTests[iTest].uSrcIn) \
1631 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1632 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1633 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1634 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1635 /* positive */ \
1636 uint32_t fEflExpect = paTests[iTest].fEflIn; \
1637 uA = paTests[iTest].uDstIn; \
1638 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1639 fEfl = paTests[iTest].fEflIn; \
1640 uA = paTests[iTest].uDstIn; \
1641 *g_pu ## a_cBits = uA; \
1642 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1643 if ( fEfl != fEflExpect \
1644 || *g_pu ## a_cBits != uNew \
1645 || uA != paTests[iTest].uDstIn) \
1646 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1647 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1648 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1649 EFlagsDiff(fEfl, fEflExpect)); \
1650 } \
1651 } \
1652 } while(0)
1653 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1654 TEST_CMPXCHG(16, uint16_t, "%#06x");
1655 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1656#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1657 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1658#endif
1659}
1660
1661static void CmpXchg8bTest(void)
1662{
1663 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
1664 static struct
1665 {
1666 const char *pszName;
1667 FNIEMAIMPLCMPXCHG8B *pfn;
1668 } const s_aFuncs[] =
1669 {
1670 { "cmpxchg8b", iemAImpl_cmpxchg8b },
1671 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1672 };
1673 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1674 {
1675 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1676 continue;
1677 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1678 {
1679 uint64_t const uOldValue = RandU64();
1680 uint64_t const uNewValue = RandU64();
1681
1682 /* positive test. */
1683 RTUINT64U uA, uB;
1684 uB.u = uNewValue;
1685 uA.u = uOldValue;
1686 *g_pu64 = uOldValue;
1687 uint32_t fEflIn = RandEFlags();
1688 uint32_t fEfl = fEflIn;
1689 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1690 if ( fEfl != (fEflIn | X86_EFL_ZF)
1691 || *g_pu64 != uNewValue
1692 || uA.u != uOldValue)
1693 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1694 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1695 fEfl, *g_pu64, uA.u,
1696 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1697 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1698
1699 /* negative */
1700 uint64_t const uExpect = ~uOldValue;
1701 *g_pu64 = uExpect;
1702 uA.u = uOldValue;
1703 uB.u = uNewValue;
1704 fEfl = fEflIn = RandEFlags();
1705 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1706 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1707 || *g_pu64 != uExpect
1708 || uA.u != uExpect)
1709 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1710 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1711 fEfl, *g_pu64, uA.u,
1712 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1713 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1714 }
1715 }
1716}
1717
1718static void CmpXchg16bTest(void)
1719{
1720 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1721 static struct
1722 {
1723 const char *pszName;
1724 FNIEMAIMPLCMPXCHG16B *pfn;
1725 } const s_aFuncs[] =
1726 {
1727 { "cmpxchg16b", iemAImpl_cmpxchg16b },
1728 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1729#if !defined(RT_ARCH_ARM64)
1730 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1731#endif
1732 };
1733 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1734 {
1735 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1736 continue;
1737#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1738 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1739 {
1740 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1741 continue;
1742 }
1743#endif
1744 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1745 {
1746 RTUINT128U const uOldValue = RandU128();
1747 RTUINT128U const uNewValue = RandU128();
1748
1749 /* positive test. */
1750 RTUINT128U uA, uB;
1751 uB = uNewValue;
1752 uA = uOldValue;
1753 *g_pu128 = uOldValue;
1754 uint32_t fEflIn = RandEFlags();
1755 uint32_t fEfl = fEflIn;
1756 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1757 if ( fEfl != (fEflIn | X86_EFL_ZF)
1758 || g_pu128->s.Lo != uNewValue.s.Lo
1759 || g_pu128->s.Hi != uNewValue.s.Hi
1760 || uA.s.Lo != uOldValue.s.Lo
1761 || uA.s.Hi != uOldValue.s.Hi)
1762 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1763 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1764 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1765 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1766 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1767 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1768 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1769 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1770
1771 /* negative */
1772 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1773 *g_pu128 = uExpect;
1774 uA = uOldValue;
1775 uB = uNewValue;
1776 fEfl = fEflIn = RandEFlags();
1777 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1778 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1779 || g_pu128->s.Lo != uExpect.s.Lo
1780 || g_pu128->s.Hi != uExpect.s.Hi
1781 || uA.s.Lo != uExpect.s.Lo
1782 || uA.s.Hi != uExpect.s.Hi)
1783 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1784 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1785 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1786 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1787 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1788 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1789 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1790 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1791 }
1792 }
1793}
1794
1795
1796/*
1797 * Double shifts.
1798 *
1799 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1800 */
1801#ifdef TSTIEMAIMPL_WITH_GENERATOR
1802# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1803void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1804{ \
1805 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1806 { \
1807 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1808 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1809 continue; \
1810 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1811 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1812 { \
1813 a_TestType Test; \
1814 Test.fEflIn = RandEFlags(); \
1815 Test.fEflOut = Test.fEflIn; \
1816 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1817 Test.uDstOut = Test.uDstIn; \
1818 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1819 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1820 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1821 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1822 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1823 } \
1824 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1825 } \
1826}
1827#else
1828# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1829#endif
1830
1831#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1832TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1833\
1834static a_SubTestType const a_aSubTests[] = \
1835{ \
1836 ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1837 ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1838 ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1839 ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1840}; \
1841\
1842GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1843\
1844static void ShiftDblU ## a_cBits ## Test(void) \
1845{ \
1846 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1847 { \
1848 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1849 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1850 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1851 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1852 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1853 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1854 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1855 { \
1856 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1857 { \
1858 uint32_t fEfl = paTests[iTest].fEflIn; \
1859 a_Type uDst = paTests[iTest].uDstIn; \
1860 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1861 if ( uDst != paTests[iTest].uDstOut \
1862 || fEfl != paTests[iTest].fEflOut) \
1863 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1864 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1865 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1866 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1867 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1868 else \
1869 { \
1870 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1871 *g_pfEfl = paTests[iTest].fEflIn; \
1872 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1873 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1874 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1875 } \
1876 } \
1877 pfn = a_aSubTests[iFn].pfnNative; \
1878 } \
1879 } \
1880}
1881TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1882TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1883TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1884
1885#ifdef TSTIEMAIMPL_WITH_GENERATOR
1886static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1887{
1888 ShiftDblU16Generate(pOut, cTests);
1889 ShiftDblU32Generate(pOut, cTests);
1890 ShiftDblU64Generate(pOut, cTests);
1891}
1892#endif
1893
1894static void ShiftDblTest(void)
1895{
1896 ShiftDblU16Test();
1897 ShiftDblU32Test();
1898 ShiftDblU64Test();
1899}
1900
1901
1902/*
1903 * Unary operators.
1904 *
1905 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1906 */
1907#ifdef TSTIEMAIMPL_WITH_GENERATOR
1908# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1909void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1910{ \
1911 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1912 { \
1913 GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1914 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1915 { \
1916 a_TestType Test; \
1917 Test.fEflIn = RandEFlags(); \
1918 Test.fEflOut = Test.fEflIn; \
1919 Test.uDstIn = RandU ## a_cBits(); \
1920 Test.uDstOut = Test.uDstIn; \
1921 Test.uSrcIn = 0; \
1922 Test.uMisc = 0; \
1923 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1924 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1925 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1926 } \
1927 GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1928 } \
1929}
1930#else
1931# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1932#endif
1933
1934#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1935TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1936static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1937{ \
1938 ENTRY(inc_u ## a_cBits), \
1939 ENTRY(inc_u ## a_cBits ## _locked), \
1940 ENTRY(dec_u ## a_cBits), \
1941 ENTRY(dec_u ## a_cBits ## _locked), \
1942 ENTRY(not_u ## a_cBits), \
1943 ENTRY(not_u ## a_cBits ## _locked), \
1944 ENTRY(neg_u ## a_cBits), \
1945 ENTRY(neg_u ## a_cBits ## _locked), \
1946}; \
1947\
1948GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1949\
1950static void UnaryU ## a_cBits ## Test(void) \
1951{ \
1952 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1953 { \
1954 if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
1955 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1956 uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1957 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1958 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1959 { \
1960 uint32_t fEfl = paTests[iTest].fEflIn; \
1961 a_Type uDst = paTests[iTest].uDstIn; \
1962 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
1963 if ( uDst != paTests[iTest].uDstOut \
1964 || fEfl != paTests[iTest].fEflOut) \
1965 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1966 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
1967 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1968 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1969 else \
1970 { \
1971 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1972 *g_pfEfl = paTests[iTest].fEflIn; \
1973 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
1974 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1975 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1976 } \
1977 } \
1978 } \
1979}
1980TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
1981TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
1982TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
1983TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
1984
1985#ifdef TSTIEMAIMPL_WITH_GENERATOR
1986static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
1987{
1988 UnaryU8Generate(pOut, cTests);
1989 UnaryU16Generate(pOut, cTests);
1990 UnaryU32Generate(pOut, cTests);
1991 UnaryU64Generate(pOut, cTests);
1992}
1993#endif
1994
1995static void UnaryTest(void)
1996{
1997 UnaryU8Test();
1998 UnaryU16Test();
1999 UnaryU32Test();
2000 UnaryU64Test();
2001}
2002
2003
2004/*
2005 * Shifts.
2006 *
2007 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2008 */
2009#ifdef TSTIEMAIMPL_WITH_GENERATOR
2010# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2011void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2012{ \
2013 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2014 { \
2015 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2016 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2017 continue; \
2018 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2019 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2020 { \
2021 a_TestType Test; \
2022 Test.fEflIn = RandEFlags(); \
2023 Test.fEflOut = Test.fEflIn; \
2024 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2025 Test.uDstOut = Test.uDstIn; \
2026 Test.uSrcIn = 0; \
2027 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2028 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2029 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
2030 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2031 \
2032 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
2033 Test.fEflOut = Test.fEflIn; \
2034 Test.uDstOut = Test.uDstIn; \
2035 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2036 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
2037 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2038 } \
2039 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2040 } \
2041}
2042#else
2043# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2044#endif
2045
2046#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2047TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2048static a_SubTestType const a_aSubTests[] = \
2049{ \
2050 ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2051 ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2052 ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2053 ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2054 ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2055 ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2056 ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2057 ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2058 ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2059 ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2060 ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2061 ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2062 ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2063 ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2064}; \
2065\
2066GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2067\
2068static void ShiftU ## a_cBits ## Test(void) \
2069{ \
2070 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2071 { \
2072 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2073 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2074 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2075 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2076 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2077 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2078 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2079 { \
2080 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2081 { \
2082 uint32_t fEfl = paTests[iTest].fEflIn; \
2083 a_Type uDst = paTests[iTest].uDstIn; \
2084 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2085 if ( uDst != paTests[iTest].uDstOut \
2086 || fEfl != paTests[iTest].fEflOut ) \
2087 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2088 iTest, iVar == 0 ? "" : "/n", \
2089 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2090 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2091 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2092 else \
2093 { \
2094 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2095 *g_pfEfl = paTests[iTest].fEflIn; \
2096 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2097 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2098 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2099 } \
2100 } \
2101 pfn = a_aSubTests[iFn].pfnNative; \
2102 } \
2103 } \
2104}
2105TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2106TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2107TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2108TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2109
2110#ifdef TSTIEMAIMPL_WITH_GENERATOR
2111static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2112{
2113 ShiftU8Generate(pOut, cTests);
2114 ShiftU16Generate(pOut, cTests);
2115 ShiftU32Generate(pOut, cTests);
2116 ShiftU64Generate(pOut, cTests);
2117}
2118#endif
2119
2120static void ShiftTest(void)
2121{
2122 ShiftU8Test();
2123 ShiftU16Test();
2124 ShiftU32Test();
2125 ShiftU64Test();
2126}
2127
2128
2129/*
2130 * Multiplication and division.
2131 *
2132 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2133 * Note! Currently ignoring undefined bits.
2134 */
2135
2136/* U8 */
2137TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2138static INT_MULDIV_U8_T const g_aMulDivU8[] =
2139{
2140 ENTRY_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2141 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2142 ENTRY_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2143 ENTRY_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2144 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2145 ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2146 ENTRY_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2147 ENTRY_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2148 ENTRY_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2149 ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2150};
2151
2152#ifdef TSTIEMAIMPL_WITH_GENERATOR
2153static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2154{
2155 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2156 {
2157 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2158 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2159 continue;
2160 GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2161 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2162 {
2163 MULDIVU8_TEST_T Test;
2164 Test.fEflIn = RandEFlags();
2165 Test.fEflOut = Test.fEflIn;
2166 Test.uDstIn = RandU16Dst(iTest);
2167 Test.uDstOut = Test.uDstIn;
2168 Test.uSrcIn = RandU8Src(iTest);
2169 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2170 RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2171 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2172 }
2173 GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2174 }
2175}
2176#endif
2177
2178static void MulDivU8Test(void)
2179{
2180 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2181 {
2182 if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2183 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2184 uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2185 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2186 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2187 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2188 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2189 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2190 {
2191 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2192 {
2193 uint32_t fEfl = paTests[iTest].fEflIn;
2194 uint16_t uDst = paTests[iTest].uDstIn;
2195 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2196 if ( uDst != paTests[iTest].uDstOut
2197 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2198 || rc != paTests[iTest].rc)
2199 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2200 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2201 "%sexpected %#08x %#06RX16 %d%s\n",
2202 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2203 iVar ? " " : "", fEfl, uDst, rc,
2204 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2205 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2206 else
2207 {
2208 *g_pu16 = paTests[iTest].uDstIn;
2209 *g_pfEfl = paTests[iTest].fEflIn;
2210 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2211 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2212 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2213 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2214 }
2215 }
2216 pfn = g_aMulDivU8[iFn].pfnNative;
2217 }
2218 }
2219}
2220
2221#ifdef TSTIEMAIMPL_WITH_GENERATOR
2222# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2223void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2224{ \
2225 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2226 { \
2227 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2228 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2229 continue; \
2230 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2231 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2232 { \
2233 a_TestType Test; \
2234 Test.fEflIn = RandEFlags(); \
2235 Test.fEflOut = Test.fEflIn; \
2236 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2237 Test.uDst1Out = Test.uDst1In; \
2238 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2239 Test.uDst2Out = Test.uDst2In; \
2240 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2241 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2242 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2243 Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2244 Test.rc, iTest); \
2245 } \
2246 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2247 } \
2248}
2249#else
2250# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2251#endif
2252
2253#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2254TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2255static a_SubTestType const a_aSubTests [] = \
2256{ \
2257 ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2258 ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2259 ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2260 ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2261 ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2262 ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2263 ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2264 ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2265}; \
2266\
2267GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2268\
2269static void MulDivU ## a_cBits ## Test(void) \
2270{ \
2271 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2272 { \
2273 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2274 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2275 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2276 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2277 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2278 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2279 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2280 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2281 { \
2282 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2283 { \
2284 uint32_t fEfl = paTests[iTest].fEflIn; \
2285 a_Type uDst1 = paTests[iTest].uDst1In; \
2286 a_Type uDst2 = paTests[iTest].uDst2In; \
2287 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2288 if ( uDst1 != paTests[iTest].uDst1Out \
2289 || uDst2 != paTests[iTest].uDst2Out \
2290 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2291 || rc != paTests[iTest].rc) \
2292 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2293 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2294 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2295 iTest, iVar == 0 ? "" : "/n", \
2296 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2297 fEfl, uDst1, uDst2, rc, \
2298 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2299 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2300 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2301 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2302 else \
2303 { \
2304 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2305 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2306 *g_pfEfl = paTests[iTest].fEflIn; \
2307 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2308 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2309 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2310 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2311 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2312 } \
2313 } \
2314 pfn = a_aSubTests[iFn].pfnNative; \
2315 } \
2316 } \
2317}
2318TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2319TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2320TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2321
2322#ifdef TSTIEMAIMPL_WITH_GENERATOR
2323static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2324{
2325 MulDivU8Generate(pOut, cTests);
2326 MulDivU16Generate(pOut, cTests);
2327 MulDivU32Generate(pOut, cTests);
2328 MulDivU64Generate(pOut, cTests);
2329}
2330#endif
2331
2332static void MulDivTest(void)
2333{
2334 MulDivU8Test();
2335 MulDivU16Test();
2336 MulDivU32Test();
2337 MulDivU64Test();
2338}
2339
2340
2341/*
2342 * BSWAP
2343 */
2344static void BswapTest(void)
2345{
2346 if (SubTestAndCheckIfEnabled("bswap_u16"))
2347 {
2348 *g_pu32 = UINT32_C(0x12345678);
2349 iemAImpl_bswap_u16(g_pu32);
2350#if 0
2351 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2352#else
2353 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2354#endif
2355 *g_pu32 = UINT32_C(0xffff1122);
2356 iemAImpl_bswap_u16(g_pu32);
2357#if 0
2358 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2359#else
2360 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2361#endif
2362 }
2363
2364 if (SubTestAndCheckIfEnabled("bswap_u32"))
2365 {
2366 *g_pu32 = UINT32_C(0x12345678);
2367 iemAImpl_bswap_u32(g_pu32);
2368 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2369 }
2370
2371 if (SubTestAndCheckIfEnabled("bswap_u64"))
2372 {
2373 *g_pu64 = UINT64_C(0x0123456789abcdef);
2374 iemAImpl_bswap_u64(g_pu64);
2375 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2376 }
2377}
2378
2379
2380
2381/*********************************************************************************************************************************
2382* Floating point (x87 style) *
2383*********************************************************************************************************************************/
2384
2385/*
2386 * FPU constant loading.
2387 */
2388TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2389
2390static const FPU_LD_CONST_T g_aFpuLdConst[] =
2391{
2392 ENTRY(fld1),
2393 ENTRY(fldl2t),
2394 ENTRY(fldl2e),
2395 ENTRY(fldpi),
2396 ENTRY(fldlg2),
2397 ENTRY(fldln2),
2398 ENTRY(fldz),
2399};
2400
2401#ifdef TSTIEMAIMPL_WITH_GENERATOR
2402static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2403{
2404 X86FXSTATE State;
2405 RT_ZERO(State);
2406 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2407 {
2408 GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2409 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2410 {
2411 State.FCW = RandFcw();
2412 State.FSW = RandFsw();
2413
2414 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2415 {
2416 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2417 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2418 g_aFpuLdConst[iFn].pfn(&State, &Res);
2419 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2420 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2421 }
2422 }
2423 GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2424 }
2425}
2426#endif
2427
2428static void FpuLoadConstTest(void)
2429{
2430 /*
2431 * Inputs:
2432 * - FSW: C0, C1, C2, C3
2433 * - FCW: Exception masks, Precision control, Rounding control.
2434 *
2435 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2436 */
2437 X86FXSTATE State;
2438 RT_ZERO(State);
2439 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2440 {
2441 if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2442 continue;
2443
2444 uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2445 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2446 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2447 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2448 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2449 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2450 {
2451 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2452 {
2453 State.FCW = paTests[iTest].fFcw;
2454 State.FSW = paTests[iTest].fFswIn;
2455 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2456 pfn(&State, &Res);
2457 if ( Res.FSW != paTests[iTest].fFswOut
2458 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2459 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2460 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2461 Res.FSW, FormatR80(&Res.r80Result),
2462 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2463 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2464 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2465 FormatFcw(paTests[iTest].fFcw) );
2466 }
2467 pfn = g_aFpuLdConst[iFn].pfnNative;
2468 }
2469 }
2470}
2471
2472
2473/*
2474 * Load floating point values from memory.
2475 */
2476#ifdef TSTIEMAIMPL_WITH_GENERATOR
2477# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2478static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2479{ \
2480 X86FXSTATE State; \
2481 RT_ZERO(State); \
2482 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2483 { \
2484 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2485 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2486 { \
2487 State.FCW = RandFcw(); \
2488 State.FSW = RandFsw(); \
2489 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2490 \
2491 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2492 { \
2493 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2494 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2495 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2496 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2497 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2498 GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2499 } \
2500 } \
2501 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2502 } \
2503}
2504#else
2505# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2506#endif
2507
2508#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2509typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2510typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2511TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2512\
2513static const a_SubTestType a_aSubTests[] = \
2514{ \
2515 ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2516}; \
2517GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2518\
2519static void FpuLdR ## a_cBits ## Test(void) \
2520{ \
2521 X86FXSTATE State; \
2522 RT_ZERO(State); \
2523 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2524 { \
2525 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2526 \
2527 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2528 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2529 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2530 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2531 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2532 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2533 { \
2534 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2535 { \
2536 a_rdTypeIn const InVal = paTests[iTest].InVal; \
2537 State.FCW = paTests[iTest].fFcw; \
2538 State.FSW = paTests[iTest].fFswIn; \
2539 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2540 pfn(&State, &Res, &InVal); \
2541 if ( Res.FSW != paTests[iTest].fFswOut \
2542 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2543 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2544 "%s -> fsw=%#06x %s\n" \
2545 "%s expected %#06x %s%s%s (%s)\n", \
2546 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2547 FormatR ## a_cBits(&paTests[iTest].InVal), \
2548 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2549 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2550 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2551 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2552 FormatFcw(paTests[iTest].fFcw) ); \
2553 } \
2554 pfn = a_aSubTests[iFn].pfnNative; \
2555 } \
2556 } \
2557}
2558
2559TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2560TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2561TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2562
2563#ifdef TSTIEMAIMPL_WITH_GENERATOR
2564static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2565{
2566 FpuLdR80Generate(pOut, cTests);
2567 FpuLdR64Generate(pOut, cTests);
2568 FpuLdR32Generate(pOut, cTests);
2569}
2570#endif
2571
2572static void FpuLdMemTest(void)
2573{
2574 FpuLdR80Test();
2575 FpuLdR64Test();
2576 FpuLdR32Test();
2577}
2578
2579
2580/*
2581 * Load integer values from memory.
2582 */
2583#ifdef TSTIEMAIMPL_WITH_GENERATOR
2584# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2585static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2586{ \
2587 X86FXSTATE State; \
2588 RT_ZERO(State); \
2589 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2590 { \
2591 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2592 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2593 { \
2594 State.FCW = RandFcw(); \
2595 State.FSW = RandFsw(); \
2596 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2597 \
2598 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2599 { \
2600 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2601 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2602 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2603 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2604 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2605 } \
2606 } \
2607 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2608 } \
2609}
2610#else
2611# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2612#endif
2613
2614#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2615typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2616typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2617TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2618\
2619static const a_SubTestType a_aSubTests[] = \
2620{ \
2621 ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2622}; \
2623GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2624\
2625static void FpuLdI ## a_cBits ## Test(void) \
2626{ \
2627 X86FXSTATE State; \
2628 RT_ZERO(State); \
2629 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2630 { \
2631 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2632 \
2633 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2634 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2635 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2636 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2637 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2638 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2639 { \
2640 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2641 { \
2642 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2643 State.FCW = paTests[iTest].fFcw; \
2644 State.FSW = paTests[iTest].fFswIn; \
2645 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2646 pfn(&State, &Res, &iInVal); \
2647 if ( Res.FSW != paTests[iTest].fFswOut \
2648 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2649 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2650 "%s -> fsw=%#06x %s\n" \
2651 "%s expected %#06x %s%s%s (%s)\n", \
2652 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2653 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2654 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2655 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2656 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2657 FormatFcw(paTests[iTest].fFcw) ); \
2658 } \
2659 pfn = a_aSubTests[iFn].pfnNative; \
2660 } \
2661 } \
2662}
2663
2664TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2665TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2666TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2667
2668#ifdef TSTIEMAIMPL_WITH_GENERATOR
2669static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2670{
2671 FpuLdI64Generate(pOut, cTests);
2672 FpuLdI32Generate(pOut, cTests);
2673 FpuLdI16Generate(pOut, cTests);
2674}
2675#endif
2676
2677static void FpuLdIntTest(void)
2678{
2679 FpuLdI64Test();
2680 FpuLdI32Test();
2681 FpuLdI16Test();
2682}
2683
2684
2685/*
2686 * Load binary coded decimal values from memory.
2687 */
2688typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2689typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2690TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2691
2692static const FPU_LD_D80_T g_aFpuLdD80[] =
2693{
2694 ENTRY(fld_r80_from_d80)
2695};
2696
2697#ifdef TSTIEMAIMPL_WITH_GENERATOR
2698static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2699{
2700 X86FXSTATE State;
2701 RT_ZERO(State);
2702 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2703 {
2704 GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2705 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2706 {
2707 State.FCW = RandFcw();
2708 State.FSW = RandFsw();
2709 RTPBCD80U InVal = RandD80Src(iTest);
2710
2711 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2712 {
2713 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2714 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2715 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2716 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2717 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2718 iTest, iRounding);
2719 }
2720 }
2721 GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2722 }
2723}
2724#endif
2725
2726static void FpuLdD80Test(void)
2727{
2728 X86FXSTATE State;
2729 RT_ZERO(State);
2730 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2731 {
2732 if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2733 continue;
2734
2735 uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2736 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2737 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2738 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2739 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2740 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2741 {
2742 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2743 {
2744 RTPBCD80U const InVal = paTests[iTest].InVal;
2745 State.FCW = paTests[iTest].fFcw;
2746 State.FSW = paTests[iTest].fFswIn;
2747 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2748 pfn(&State, &Res, &InVal);
2749 if ( Res.FSW != paTests[iTest].fFswOut
2750 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2751 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2752 "%s -> fsw=%#06x %s\n"
2753 "%s expected %#06x %s%s%s (%s)\n",
2754 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2755 FormatD80(&paTests[iTest].InVal),
2756 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2757 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2758 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2759 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2760 FormatFcw(paTests[iTest].fFcw) );
2761 }
2762 pfn = g_aFpuLdD80[iFn].pfnNative;
2763 }
2764 }
2765}
2766
2767
2768/*
2769 * Store values floating point values to memory.
2770 */
2771#ifdef TSTIEMAIMPL_WITH_GENERATOR
2772static const RTFLOAT80U g_aFpuStR32Specials[] =
2773{
2774 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2775 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2776 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2777 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2778};
2779static const RTFLOAT80U g_aFpuStR64Specials[] =
2780{
2781 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2782 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2783 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2784 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2785 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2786};
2787static const RTFLOAT80U g_aFpuStR80Specials[] =
2788{
2789 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2790};
2791# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2792static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2793{ \
2794 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2795 X86FXSTATE State; \
2796 RT_ZERO(State); \
2797 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2798 { \
2799 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2800 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2801 { \
2802 uint16_t const fFcw = RandFcw(); \
2803 State.FSW = RandFsw(); \
2804 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2805 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2806 \
2807 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2808 { \
2809 /* PC doesn't influence these, so leave as is. */ \
2810 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2811 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2812 { \
2813 uint16_t uFswOut = 0; \
2814 a_rdType OutVal; \
2815 RT_ZERO(OutVal); \
2816 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2817 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2818 | (iRounding << X86_FCW_RC_SHIFT); \
2819 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2820 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2821 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2822 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2823 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2824 GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2825 } \
2826 } \
2827 } \
2828 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2829 } \
2830}
2831#else
2832# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2833#endif
2834
2835#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2836typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2837 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2838typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2839TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2840\
2841static const a_SubTestType a_aSubTests[] = \
2842{ \
2843 ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2844}; \
2845GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2846\
2847static void FpuStR ## a_cBits ## Test(void) \
2848{ \
2849 X86FXSTATE State; \
2850 RT_ZERO(State); \
2851 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2852 { \
2853 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2854 \
2855 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2856 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2857 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2858 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2859 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2860 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2861 { \
2862 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2863 { \
2864 RTFLOAT80U const InVal = paTests[iTest].InVal; \
2865 uint16_t uFswOut = 0; \
2866 a_rdType OutVal; \
2867 RT_ZERO(OutVal); \
2868 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2869 State.FCW = paTests[iTest].fFcw; \
2870 State.FSW = paTests[iTest].fFswIn; \
2871 pfn(&State, &uFswOut, &OutVal, &InVal); \
2872 if ( uFswOut != paTests[iTest].fFswOut \
2873 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2874 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2875 "%s -> fsw=%#06x %s\n" \
2876 "%s expected %#06x %s%s%s (%s)\n", \
2877 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2878 FormatR80(&paTests[iTest].InVal), \
2879 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2880 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2881 FswDiff(uFswOut, paTests[iTest].fFswOut), \
2882 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2883 FormatFcw(paTests[iTest].fFcw) ); \
2884 } \
2885 pfn = a_aSubTests[iFn].pfnNative; \
2886 } \
2887 } \
2888}
2889
2890TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2891TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2892TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2893
2894#ifdef TSTIEMAIMPL_WITH_GENERATOR
2895static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2896{
2897 FpuStR80Generate(pOut, cTests);
2898 FpuStR64Generate(pOut, cTests);
2899 FpuStR32Generate(pOut, cTests);
2900}
2901#endif
2902
2903static void FpuStMemTest(void)
2904{
2905 FpuStR80Test();
2906 FpuStR64Test();
2907 FpuStR32Test();
2908}
2909
2910
2911/*
2912 * Store integer values to memory or register.
2913 */
2914TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2915TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2916TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2917
2918static const FPU_ST_I16_T g_aFpuStI16[] =
2919{
2920 ENTRY(fist_r80_to_i16),
2921 ENTRY_AMD( fistt_r80_to_i16, 0),
2922 ENTRY_INTEL(fistt_r80_to_i16, 0),
2923};
2924static const FPU_ST_I32_T g_aFpuStI32[] =
2925{
2926 ENTRY(fist_r80_to_i32),
2927 ENTRY(fistt_r80_to_i32),
2928};
2929static const FPU_ST_I64_T g_aFpuStI64[] =
2930{
2931 ENTRY(fist_r80_to_i64),
2932 ENTRY(fistt_r80_to_i64),
2933};
2934
2935#ifdef TSTIEMAIMPL_WITH_GENERATOR
2936static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2937{
2938 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2939 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2940 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2941 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2942 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2943 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2944 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2945 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2946 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2947 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2948 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2949 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2950 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2951 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2952 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2953 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2954 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2955 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2956 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2957 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2958 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2959 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2960 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2961 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
2962 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2963 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
2964 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
2965 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
2966 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
2967 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
2968 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
2969 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
2970 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2971 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2972 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2973 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2974 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2975 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2976 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2977 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2978 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2979 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2980 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2981 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2982 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2983 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2984 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
2985};
2986static const RTFLOAT80U g_aFpuStI32Specials[] =
2987{
2988 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2989 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2990 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2991 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2992 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2993 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2994 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2995 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2996 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2997 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2998 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2999 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3000 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3001 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3002 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3003 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3004};
3005static const RTFLOAT80U g_aFpuStI64Specials[] =
3006{
3007 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3008 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3009 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3010 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3011 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3012 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3013 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3014 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3015 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3016 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3017 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3018 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3019 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3020 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3021 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3022 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3023 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3024};
3025
3026# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3027static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
3028{ \
3029 X86FXSTATE State; \
3030 RT_ZERO(State); \
3031 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3032 { \
3033 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3034 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3035 PRTSTREAM pOutFn = pOut; \
3036 if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
3037 { \
3038 if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3039 continue; \
3040 pOutFn = pOutCpu; \
3041 } \
3042 \
3043 GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
3044 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3045 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3046 { \
3047 uint16_t const fFcw = RandFcw(); \
3048 State.FSW = RandFsw(); \
3049 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3050 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3051 \
3052 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3053 { \
3054 /* PC doesn't influence these, so leave as is. */ \
3055 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3056 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3057 { \
3058 uint16_t uFswOut = 0; \
3059 a_iType iOutVal = ~(a_iType)2; \
3060 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3061 | (iRounding << X86_FCW_RC_SHIFT); \
3062 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3063 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3064 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3065 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3066 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3067 GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
3068 } \
3069 } \
3070 } \
3071 GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
3072 } \
3073}
3074#else
3075# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3076#endif
3077
3078#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3079GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3080\
3081static void FpuStI ## a_cBits ## Test(void) \
3082{ \
3083 X86FXSTATE State; \
3084 RT_ZERO(State); \
3085 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3086 { \
3087 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3088 \
3089 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3090 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3091 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3092 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3093 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3094 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3095 { \
3096 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3097 { \
3098 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3099 uint16_t uFswOut = 0; \
3100 a_iType iOutVal = ~(a_iType)2; \
3101 State.FCW = paTests[iTest].fFcw; \
3102 State.FSW = paTests[iTest].fFswIn; \
3103 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3104 if ( uFswOut != paTests[iTest].fFswOut \
3105 || iOutVal != paTests[iTest].iOutVal) \
3106 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3107 "%s -> fsw=%#06x " a_szFmt "\n" \
3108 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3109 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3110 FormatR80(&paTests[iTest].InVal), \
3111 iVar ? " " : "", uFswOut, iOutVal, \
3112 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3113 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3114 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3115 } \
3116 pfn = a_aSubTests[iFn].pfnNative; \
3117 } \
3118 } \
3119}
3120
3121//fistt_r80_to_i16 diffs for AMD, of course :-)
3122
3123TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3124TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3125TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3126
3127#ifdef TSTIEMAIMPL_WITH_GENERATOR
3128static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3129{
3130 FpuStI64Generate(pOut, pOutCpu, cTests);
3131 FpuStI32Generate(pOut, pOutCpu, cTests);
3132 FpuStI16Generate(pOut, pOutCpu, cTests);
3133}
3134#endif
3135
3136static void FpuStIntTest(void)
3137{
3138 FpuStI64Test();
3139 FpuStI32Test();
3140 FpuStI16Test();
3141}
3142
3143
3144/*
3145 * Store as packed BCD value (memory).
3146 */
3147typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3148typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3149TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3150
3151static const FPU_ST_D80_T g_aFpuStD80[] =
3152{
3153 ENTRY(fst_r80_to_d80),
3154};
3155
3156#ifdef TSTIEMAIMPL_WITH_GENERATOR
3157static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3158{
3159 static RTFLOAT80U const s_aSpecials[] =
3160 {
3161 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3162 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3163 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3164 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3165 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3166 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3167 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3168 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3169 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3170 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3171 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3172 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3173 };
3174
3175 X86FXSTATE State;
3176 RT_ZERO(State);
3177 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3178 {
3179 GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3180 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3181 {
3182 uint16_t const fFcw = RandFcw();
3183 State.FSW = RandFsw();
3184 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3185
3186 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3187 {
3188 /* PC doesn't influence these, so leave as is. */
3189 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3190 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
3191 {
3192 uint16_t uFswOut = 0;
3193 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3194 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
3195 | (iRounding << X86_FCW_RC_SHIFT);
3196 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
3197 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
3198 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3199 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3200 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3201 GenFormatD80(&OutVal), iTest, iRounding, iMask);
3202 }
3203 }
3204 }
3205 GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3206 }
3207}
3208#endif
3209
3210
3211static void FpuStD80Test(void)
3212{
3213 X86FXSTATE State;
3214 RT_ZERO(State);
3215 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3216 {
3217 if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3218 continue;
3219
3220 uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3221 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3222 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3223 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3224 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3225 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3226 {
3227 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3228 {
3229 RTFLOAT80U const InVal = paTests[iTest].InVal;
3230 uint16_t uFswOut = 0;
3231 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3232 State.FCW = paTests[iTest].fFcw;
3233 State.FSW = paTests[iTest].fFswIn;
3234 pfn(&State, &uFswOut, &OutVal, &InVal);
3235 if ( uFswOut != paTests[iTest].fFswOut
3236 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3237 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3238 "%s -> fsw=%#06x %s\n"
3239 "%s expected %#06x %s%s%s (%s)\n",
3240 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3241 FormatR80(&paTests[iTest].InVal),
3242 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3243 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3244 FswDiff(uFswOut, paTests[iTest].fFswOut),
3245 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3246 FormatFcw(paTests[iTest].fFcw) );
3247 }
3248 pfn = g_aFpuStD80[iFn].pfnNative;
3249 }
3250 }
3251}
3252
3253
3254
3255/*********************************************************************************************************************************
3256* x87 FPU Binary Operations *
3257*********************************************************************************************************************************/
3258
3259/*
3260 * Binary FPU operations on two 80-bit floating point values.
3261 */
3262TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3263enum { kFpuBinaryHint_fprem = 1, };
3264
3265static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3266{
3267 ENTRY(fadd_r80_by_r80),
3268 ENTRY(fsub_r80_by_r80),
3269 ENTRY(fsubr_r80_by_r80),
3270 ENTRY(fmul_r80_by_r80),
3271 ENTRY(fdiv_r80_by_r80),
3272 ENTRY(fdivr_r80_by_r80),
3273 ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3274 ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3275 ENTRY(fscale_r80_by_r80),
3276 ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3277 ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3278 ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3279 ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3280 ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3281 ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3282};
3283
3284#ifdef TSTIEMAIMPL_WITH_GENERATOR
3285static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3286{
3287 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3288
3289 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3290 {
3291 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3292 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3293 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3294 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3295 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3296 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3297 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3298 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3299 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3300 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3301 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3302 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3303 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3304 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3305 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3306 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3307 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3308 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3309 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3310 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3311 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3312 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3313 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3314 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3315 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3316 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3317 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3318 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3319 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3320 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3321 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3322 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3323 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3324 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3325 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3326 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3327 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3328 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3329 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3330 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3331 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3332 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3333 /* fscale: Negative variants for the essentials of the above. */
3334 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3335 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3336 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3337 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3338 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3339 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3340 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3341 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3342 /* fscale: Some fun with denormals and pseudo-denormals. */
3343 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3344 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3345 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3346 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3347 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3348 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3349 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3350 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3351 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3352 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3353 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3354 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3355 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3356 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3357 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3358 };
3359
3360 X86FXSTATE State;
3361 RT_ZERO(State);
3362 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3363 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3364 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3365 {
3366 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3367 PRTSTREAM pOutFn = pOut;
3368 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3369 {
3370 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3371 continue;
3372 pOutFn = pOutCpu;
3373 }
3374
3375 GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3376 uint32_t iTestOutput = 0;
3377 uint32_t cNormalInputPairs = 0;
3378 uint32_t cTargetRangeInputs = 0;
3379 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3380 {
3381 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3382 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3383 bool fTargetRange = false;
3384 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3385 {
3386 cNormalInputPairs++;
3387 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3388 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3389 cTargetRangeInputs += fTargetRange = true;
3390 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3391 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3392 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3393 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3394 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3395 cTargetRangeInputs += fTargetRange = true;
3396 }
3397 }
3398 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3399 {
3400 iTest -= 1;
3401 continue;
3402 }
3403
3404 uint16_t const fFcwExtra = 0;
3405 uint16_t const fFcw = RandFcw();
3406 State.FSW = RandFsw();
3407
3408 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3409 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3410 {
3411 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3412 | (iRounding << X86_FCW_RC_SHIFT)
3413 | (iPrecision << X86_FCW_PC_SHIFT)
3414 | X86_FCW_MASK_ALL;
3415 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3416 pfn(&State, &ResM, &InVal1, &InVal2);
3417 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3418 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3419 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3420
3421 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3422 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3423 pfn(&State, &ResU, &InVal1, &InVal2);
3424 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3425 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3426 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3427
3428 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3429 if (fXcpt)
3430 {
3431 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3432 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3433 pfn(&State, &Res1, &InVal1, &InVal2);
3434 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3435 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3436 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3437 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3438 {
3439 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3440 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3441 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3442 pfn(&State, &Res2, &InVal1, &InVal2);
3443 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3444 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3445 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3446 }
3447 if (!RT_IS_POWER_OF_TWO(fXcpt))
3448 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3449 if (fUnmasked & fXcpt)
3450 {
3451 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3452 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3453 pfn(&State, &Res3, &InVal1, &InVal2);
3454 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3455 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3456 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3457 }
3458 }
3459
3460 /* If the values are in range and caused no exceptions, do the whole series of
3461 partial reminders till we get the non-partial one or run into an exception. */
3462 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3463 {
3464 IEMFPURESULT ResPrev = ResM;
3465 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
3466 {
3467 State.FCW = State.FCW | X86_FCW_MASK_ALL;
3468 State.FSW = ResPrev.FSW;
3469 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3470 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
3471 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
3472 State.FCW | fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
3473 GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
3474 iTest, iRounding, iPrecision, i + 1, iTestOutput++);
3475 ResPrev = ResSeq;
3476 }
3477 }
3478 }
3479 }
3480 GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3481 }
3482}
3483#endif
3484
3485
3486static void FpuBinaryR80Test(void)
3487{
3488 X86FXSTATE State;
3489 RT_ZERO(State);
3490 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3491 {
3492 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3493 continue;
3494
3495 uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3496 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3497 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3498 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3499 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3500 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3501 {
3502 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3503 {
3504 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3505 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3506 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3507 State.FCW = paTests[iTest].fFcw;
3508 State.FSW = paTests[iTest].fFswIn;
3509 pfn(&State, &Res, &InVal1, &InVal2);
3510 if ( Res.FSW != paTests[iTest].fFswOut
3511 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3512 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3513 "%s -> fsw=%#06x %s\n"
3514 "%s expected %#06x %s%s%s (%s)\n",
3515 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3516 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3517 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3518 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3519 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3520 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3521 FormatFcw(paTests[iTest].fFcw) );
3522 }
3523 pfn = g_aFpuBinaryR80[iFn].pfnNative;
3524 }
3525 }
3526}
3527
3528
3529/*
3530 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3531 */
3532#define int64_t_IS_NORMAL(a) 1
3533#define int32_t_IS_NORMAL(a) 1
3534#define int16_t_IS_NORMAL(a) 1
3535
3536#ifdef TSTIEMAIMPL_WITH_GENERATOR
3537static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3538{
3539 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3540 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3541};
3542static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3543{
3544 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3545 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3546};
3547static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3548{
3549 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3550};
3551static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3552{
3553 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3554};
3555
3556# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3557static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3558{ \
3559 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3560 \
3561 X86FXSTATE State; \
3562 RT_ZERO(State); \
3563 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3564 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3565 { \
3566 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3567 uint32_t cNormalInputPairs = 0; \
3568 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3569 { \
3570 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3571 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3572 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3573 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3574 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3575 cNormalInputPairs++; \
3576 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3577 { \
3578 iTest -= 1; \
3579 continue; \
3580 } \
3581 \
3582 uint16_t const fFcw = RandFcw(); \
3583 State.FSW = RandFsw(); \
3584 \
3585 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3586 { \
3587 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3588 { \
3589 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3590 { \
3591 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
3592 | (iRounding << X86_FCW_RC_SHIFT) \
3593 | (iPrecision << X86_FCW_PC_SHIFT) \
3594 | iMask; \
3595 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3596 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3597 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3598 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3599 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3600 } \
3601 } \
3602 } \
3603 } \
3604 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3605 } \
3606}
3607#else
3608# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3609#endif
3610
3611#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3612TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3613\
3614static const a_SubTestType a_aSubTests[] = \
3615{ \
3616 ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3617 ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3618 ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3619 ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3620 ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3621 ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3622}; \
3623\
3624GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3625\
3626static void FpuBinary ## a_UpBits ## Test(void) \
3627{ \
3628 X86FXSTATE State; \
3629 RT_ZERO(State); \
3630 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3631 { \
3632 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3633 \
3634 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3635 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3636 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3637 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3638 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3639 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3640 { \
3641 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3642 { \
3643 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3644 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3645 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3646 State.FCW = paTests[iTest].fFcw; \
3647 State.FSW = paTests[iTest].fFswIn; \
3648 pfn(&State, &Res, &InVal1, &InVal2); \
3649 if ( Res.FSW != paTests[iTest].fFswOut \
3650 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3651 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3652 "%s -> fsw=%#06x %s\n" \
3653 "%s expected %#06x %s%s%s (%s)\n", \
3654 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3655 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3656 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3657 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3658 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3659 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3660 FormatFcw(paTests[iTest].fFcw) ); \
3661 } \
3662 pfn = a_aSubTests[iFn].pfnNative; \
3663 } \
3664 } \
3665}
3666
3667TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3668TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3669TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3670TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3671
3672
3673/*
3674 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3675 */
3676#ifdef TSTIEMAIMPL_WITH_GENERATOR
3677static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3678{
3679 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3680 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3681};
3682static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3683{
3684 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3685 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3686};
3687static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3688{
3689 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3690 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3691};
3692static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3693{
3694 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3695};
3696static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3697{
3698 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3699};
3700
3701# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3702static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3703{ \
3704 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3705 \
3706 X86FXSTATE State; \
3707 RT_ZERO(State); \
3708 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3709 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3710 { \
3711 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3712 uint32_t cNormalInputPairs = 0; \
3713 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3714 { \
3715 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3716 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3717 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3718 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3719 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3720 cNormalInputPairs++; \
3721 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3722 { \
3723 iTest -= 1; \
3724 continue; \
3725 } \
3726 \
3727 uint16_t const fFcw = RandFcw(); \
3728 State.FSW = RandFsw(); \
3729 \
3730 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3731 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3732 { \
3733 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
3734 uint16_t fFswOut = 0; \
3735 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3736 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3737 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3738 iTest, iMask ? 'c' : 'u'); \
3739 } \
3740 } \
3741 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3742 } \
3743}
3744#else
3745# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3746#endif
3747
3748#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3749TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3750\
3751static const a_SubTestType a_aSubTests[] = \
3752{ \
3753 __VA_ARGS__ \
3754}; \
3755\
3756GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3757\
3758static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3759{ \
3760 X86FXSTATE State; \
3761 RT_ZERO(State); \
3762 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3763 { \
3764 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3765 \
3766 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3767 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3768 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3769 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3770 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3771 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3772 { \
3773 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3774 { \
3775 uint16_t fFswOut = 0; \
3776 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3777 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3778 State.FCW = paTests[iTest].fFcw; \
3779 State.FSW = paTests[iTest].fFswIn; \
3780 pfn(&State, &fFswOut, &InVal1, &InVal2); \
3781 if (fFswOut != paTests[iTest].fFswOut) \
3782 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3783 "%s -> fsw=%#06x\n" \
3784 "%s expected %#06x %s (%s)\n", \
3785 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3786 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3787 iVar ? " " : "", fFswOut, \
3788 iVar ? " " : "", paTests[iTest].fFswOut, \
3789 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3790 } \
3791 pfn = a_aSubTests[iFn].pfnNative; \
3792 } \
3793 } \
3794}
3795
3796TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3797TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3798TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3799TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3800TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3801
3802
3803/*
3804 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3805 */
3806TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3807
3808static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3809{
3810 ENTRY(fcomi_r80_by_r80),
3811 ENTRY(fucomi_r80_by_r80),
3812};
3813
3814#ifdef TSTIEMAIMPL_WITH_GENERATOR
3815static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3816{
3817 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3818 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3819};
3820
3821static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3822{
3823 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3824
3825 X86FXSTATE State;
3826 RT_ZERO(State);
3827 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3828 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3829 {
3830 GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3831 uint32_t cNormalInputPairs = 0;
3832 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3833 {
3834 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3835 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3836 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3837 cNormalInputPairs++;
3838 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3839 {
3840 iTest -= 1;
3841 continue;
3842 }
3843
3844 uint16_t const fFcw = RandFcw();
3845 State.FSW = RandFsw();
3846
3847 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3848 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3849 {
3850 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
3851 uint16_t uFswOut = 0;
3852 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3853 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3854 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3855 iTest, iMask ? 'c' : 'u');
3856 }
3857 }
3858 GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3859 }
3860}
3861#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
3862
3863static void FpuBinaryEflR80Test(void)
3864{
3865 X86FXSTATE State;
3866 RT_ZERO(State);
3867 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3868 {
3869 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3870 continue;
3871
3872 uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3873 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3874 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3875 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3876 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3877 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3878 {
3879 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3880 {
3881 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3882 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3883 State.FCW = paTests[iTest].fFcw;
3884 State.FSW = paTests[iTest].fFswIn;
3885 uint16_t uFswOut = 0;
3886 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3887 if ( uFswOut != paTests[iTest].fFswOut
3888 || fEflOut != paTests[iTest].fEflOut)
3889 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3890 "%s -> fsw=%#06x efl=%#08x\n"
3891 "%s expected %#06x %#08x %s%s (%s)\n",
3892 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3893 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3894 iVar ? " " : "", uFswOut, fEflOut,
3895 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3896 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
3897 FormatFcw(paTests[iTest].fFcw));
3898 }
3899 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3900 }
3901 }
3902}
3903
3904
3905/*********************************************************************************************************************************
3906* x87 FPU Unary Operations *
3907*********************************************************************************************************************************/
3908
3909/*
3910 * Unary FPU operations on one 80-bit floating point value.
3911 *
3912 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3913 * a rounding error or not.
3914 */
3915TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3916
3917enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
3918static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3919{
3920 ENTRY_EX( fabs_r80, kUnary_Accurate),
3921 ENTRY_EX( fchs_r80, kUnary_Accurate),
3922 ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3923 ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3924 ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3925 ENTRY_EX( frndint_r80, kUnary_Accurate),
3926 ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3927 ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
3928 ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
3929 ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
3930};
3931
3932#ifdef TSTIEMAIMPL_WITH_GENERATOR
3933
3934static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3935{
3936 if ( enmKind == kUnary_Rounding_F2xm1
3937 && RTFLOAT80U_IS_NORMAL(pr80Val)
3938 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3939 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
3940 return true;
3941 return false;
3942}
3943
3944static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3945{
3946 static RTFLOAT80U const s_aSpecials[] =
3947 {
3948 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
3949 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
3950 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
3951 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
3952 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
3953 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
3954 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
3955 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
3956 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
3957 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
3958 };
3959 X86FXSTATE State;
3960 RT_ZERO(State);
3961 uint32_t cMinNormals = cTests / 4;
3962 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3963 {
3964 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
3965 PRTSTREAM pOutFn = pOut;
3966 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3967 {
3968 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3969 continue;
3970 pOutFn = pOutCpu;
3971 }
3972
3973 GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3974 uint32_t iTestOutput = 0;
3975 uint32_t cNormalInputs = 0;
3976 uint32_t cTargetRangeInputs = 0;
3977 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3978 {
3979 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
3980 if (RTFLOAT80U_IS_NORMAL(&InVal))
3981 {
3982 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
3983 {
3984 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
3985 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
3986 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
3987 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
3988 cTargetRangeInputs++;
3989 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
3990 {
3991 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
3992 cTargetRangeInputs++;
3993 }
3994 }
3995 cNormalInputs++;
3996 }
3997 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3998 {
3999 iTest -= 1;
4000 continue;
4001 }
4002
4003 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4004 uint16_t const fFcw = RandFcw();
4005 State.FSW = RandFsw();
4006
4007 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4008 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4009 {
4010 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4011 | (iRounding << X86_FCW_RC_SHIFT)
4012 | (iPrecision << X86_FCW_PC_SHIFT)
4013 | X86_FCW_MASK_ALL;
4014 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4015 pfn(&State, &ResM, &InVal);
4016 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4017 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
4018 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4019
4020 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4021 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4022 pfn(&State, &ResU, &InVal);
4023 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4024 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
4025 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4026
4027 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4028 if (fXcpt)
4029 {
4030 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4031 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4032 pfn(&State, &Res1, &InVal);
4033 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4034 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
4035 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4036 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4037 {
4038 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4039 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4040 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4041 pfn(&State, &Res2, &InVal);
4042 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4043 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
4044 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4045 }
4046 if (!RT_IS_POWER_OF_TWO(fXcpt))
4047 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4048 if (fUnmasked & fXcpt)
4049 {
4050 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4051 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4052 pfn(&State, &Res3, &InVal);
4053 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4054 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
4055 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4056 }
4057 }
4058 }
4059 }
4060 GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
4061 }
4062}
4063#endif
4064
4065static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4066{
4067 if (fFcw1 == fFcw2)
4068 return true;
4069 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4070 {
4071 *pfRndErr = true;
4072 return true;
4073 }
4074 return false;
4075}
4076
4077static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4078{
4079 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4080 return true;
4081 if ( fRndErrOk
4082 && pr80Val1->s.fSign == pr80Val2->s.fSign)
4083 {
4084 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4085 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4086 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4087 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4088 ||
4089 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4090 && pr80Val1->s.uMantissa == UINT64_MAX
4091 && pr80Val2->s.uMantissa == RT_BIT_64(63))
4092 ||
4093 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4094 && pr80Val2->s.uMantissa == UINT64_MAX
4095 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4096 {
4097 *pfRndErr = true;
4098 return true;
4099 }
4100 }
4101 return false;
4102}
4103
4104
4105static void FpuUnaryR80Test(void)
4106{
4107 X86FXSTATE State;
4108 RT_ZERO(State);
4109 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4110 {
4111 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
4112 continue;
4113
4114 uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4115 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4116 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4117 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4118 uint32_t cRndErrs = 0;
4119 uint32_t cPossibleRndErrs = 0;
4120 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4121 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4122 {
4123 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4124 {
4125 RTFLOAT80U const InVal = paTests[iTest].InVal;
4126 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4127 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4128 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4129 State.FSW = paTests[iTest].fFswIn;
4130 pfn(&State, &Res, &InVal);
4131 bool fRndErr = false;
4132 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4133 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4134 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4135 "%s -> fsw=%#06x %s\n"
4136 "%s expected %#06x %s%s%s%s (%s)\n",
4137 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4138 FormatR80(&paTests[iTest].InVal),
4139 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4140 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4141 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4142 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4143 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4144 cRndErrs += fRndErr;
4145 cPossibleRndErrs += fRndErrOk;
4146 }
4147 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4148 }
4149 if (cPossibleRndErrs > 0)
4150 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4151 }
4152}
4153
4154
4155/*
4156 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4157 */
4158TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4159
4160static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4161{
4162 ENTRY(ftst_r80),
4163 ENTRY_EX(fxam_r80, 1),
4164};
4165
4166#ifdef TSTIEMAIMPL_WITH_GENERATOR
4167static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4168{
4169 static RTFLOAT80U const s_aSpecials[] =
4170 {
4171 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4172 };
4173
4174 X86FXSTATE State;
4175 RT_ZERO(State);
4176 uint32_t cMinNormals = cTests / 4;
4177 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4178 {
4179 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4180 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4181 PRTSTREAM pOutFn = pOut;
4182 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4183 {
4184 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4185 continue;
4186 pOutFn = pOutCpu;
4187 }
4188 State.FTW = 0;
4189
4190 GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4191 uint32_t cNormalInputs = 0;
4192 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4193 {
4194 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4195 if (RTFLOAT80U_IS_NORMAL(&InVal))
4196 cNormalInputs++;
4197 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4198 {
4199 iTest -= 1;
4200 continue;
4201 }
4202
4203 uint16_t const fFcw = RandFcw();
4204 State.FSW = RandFsw();
4205 if (!fIsFxam)
4206 {
4207 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4208 {
4209 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4210 {
4211 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4212 {
4213 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4214 | (iRounding << X86_FCW_RC_SHIFT)
4215 | (iPrecision << X86_FCW_PC_SHIFT)
4216 | iMask;
4217 uint16_t fFswOut = 0;
4218 pfn(&State, &fFswOut, &InVal);
4219 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4220 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4221 iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4222 }
4223 }
4224 }
4225 }
4226 else
4227 {
4228 uint16_t fFswOut = 0;
4229 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4230 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4231 State.FCW = fFcw;
4232 pfn(&State, &fFswOut, &InVal);
4233 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4234 fFcw | fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4235 }
4236 }
4237 GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4238 }
4239}
4240#endif
4241
4242
4243static void FpuUnaryFswR80Test(void)
4244{
4245 X86FXSTATE State;
4246 RT_ZERO(State);
4247 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4248 {
4249 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4250 continue;
4251
4252 uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4253 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4254 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4255 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4256 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4257 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4258 {
4259 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4260 {
4261 RTFLOAT80U const InVal = paTests[iTest].InVal;
4262 uint16_t fFswOut = 0;
4263 State.FSW = paTests[iTest].fFswIn;
4264 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4265 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4266 pfn(&State, &fFswOut, &InVal);
4267 if (fFswOut != paTests[iTest].fFswOut)
4268 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4269 "%s -> fsw=%#06x\n"
4270 "%s expected %#06x %s (%s%s)\n",
4271 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4272 FormatR80(&paTests[iTest].InVal),
4273 iVar ? " " : "", fFswOut,
4274 iVar ? " " : "", paTests[iTest].fFswOut,
4275 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4276 paTests[iTest].fFcw & 0x80 ? " empty" : "");
4277 }
4278 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4279 }
4280 }
4281}
4282
4283/*
4284 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
4285 */
4286TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4287
4288static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4289{
4290 ENTRY(fxtract_r80_r80),
4291 ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4292 ENTRY_INTEL(fptan_r80_r80, 0),
4293 ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4294 ENTRY_INTEL(fsincos_r80_r80, 0),
4295};
4296
4297#ifdef TSTIEMAIMPL_WITH_GENERATOR
4298static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4299{
4300 static RTFLOAT80U const s_aSpecials[] =
4301 {
4302 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4303 };
4304
4305 X86FXSTATE State;
4306 RT_ZERO(State);
4307 uint32_t cMinNormals = cTests / 4;
4308 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4309 {
4310 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4311 PRTSTREAM pOutFn = pOut;
4312 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4313 {
4314 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4315 continue;
4316 pOutFn = pOutCpu;
4317 }
4318
4319 GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4320 uint32_t iTestOutput = 0;
4321 uint32_t cNormalInputs = 0;
4322 uint32_t cTargetRangeInputs = 0;
4323 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4324 {
4325 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4326 if (RTFLOAT80U_IS_NORMAL(&InVal))
4327 {
4328 if (iFn != 0)
4329 {
4330 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4331 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4332 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4333 cTargetRangeInputs++;
4334 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4335 {
4336 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4337 cTargetRangeInputs++;
4338 }
4339 }
4340 cNormalInputs++;
4341 }
4342 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4343 {
4344 iTest -= 1;
4345 continue;
4346 }
4347
4348 uint16_t const fFcwExtra = 0; /* for rounding error indication */
4349 uint16_t const fFcw = RandFcw();
4350 State.FSW = RandFsw();
4351
4352 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4353 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4354 {
4355 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4356 | (iRounding << X86_FCW_RC_SHIFT)
4357 | (iPrecision << X86_FCW_PC_SHIFT)
4358 | X86_FCW_MASK_ALL;
4359 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4360 pfn(&State, &ResM, &InVal);
4361 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4362 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4363 GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4364
4365 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4366 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4367 pfn(&State, &ResU, &InVal);
4368 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4369 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4370 GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4371
4372 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4373 if (fXcpt)
4374 {
4375 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4376 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4377 pfn(&State, &Res1, &InVal);
4378 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4379 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4380 GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4381 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4382 {
4383 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4384 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4385 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4386 pfn(&State, &Res2, &InVal);
4387 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4388 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4389 GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4390 }
4391 if (!RT_IS_POWER_OF_TWO(fXcpt))
4392 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4393 if (fUnmasked & fXcpt)
4394 {
4395 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4396 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4397 pfn(&State, &Res3, &InVal);
4398 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4399 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4400 GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4401 }
4402 }
4403 }
4404 }
4405 GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4406 }
4407}
4408#endif
4409
4410
4411static void FpuUnaryTwoR80Test(void)
4412{
4413 X86FXSTATE State;
4414 RT_ZERO(State);
4415 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4416 {
4417 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4418 continue;
4419
4420 uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4421 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4422 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4423 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4424 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4425 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4426 {
4427 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4428 {
4429 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4430 RTFLOAT80U const InVal = paTests[iTest].InVal;
4431 State.FCW = paTests[iTest].fFcw;
4432 State.FSW = paTests[iTest].fFswIn;
4433 pfn(&State, &Res, &InVal);
4434 if ( Res.FSW != paTests[iTest].fFswOut
4435 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4436 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4437 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4438 "%s -> fsw=%#06x %s %s\n"
4439 "%s expected %#06x %s %s %s%s%s (%s)\n",
4440 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4441 FormatR80(&paTests[iTest].InVal),
4442 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4443 iVar ? " " : "", paTests[iTest].fFswOut,
4444 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4445 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4446 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4447 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4448 }
4449 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4450 }
4451 }
4452}
4453
4454
4455/*********************************************************************************************************************************
4456* SSE floating point Binary Operations *
4457*********************************************************************************************************************************/
4458
4459/*
4460 * Binary SSE operations on packed single precision floating point values.
4461 */
4462TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4463
4464static const SSE_BINARY_R32_T g_aSseBinaryR32[] =
4465{
4466 ENTRY(addps_u128),
4467};
4468
4469#ifdef TSTIEMAIMPL_WITH_GENERATOR
4470static void SseBinaryR32Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4471{
4472 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4473
4474 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
4475 {
4476 { RTFLOAT32U_INIT_ZERO(0),
4477 RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
4478 /** @todo More specials. */
4479 };
4480
4481 X86FXSTATE State;
4482 RT_ZERO(State);
4483 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4484 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4485 {
4486 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
4487 PRTSTREAM pOutFn = pOut;
4488 if (g_aSseBinaryR32[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4489 {
4490 if (g_aSseBinaryR32[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4491 continue;
4492 pOutFn = pOutCpu;
4493 }
4494
4495 GenerateArrayStart(pOutFn, g_aSseBinaryR32[iFn].pszName, "SSE_BINARY_R32_TEST_T");
4496 uint32_t iTestOutput = 0;
4497 uint32_t cNormalInputPairs = 0;
4498 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4499 {
4500 RTFLOAT32U InVal1 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
4501 RTFLOAT32U InVal2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
4502 if (RTFLOAT32U_IS_NORMAL(&InVal1) && RTFLOAT32U_IS_NORMAL(&InVal2))
4503 cNormalInputPairs++;
4504 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4505 {
4506 iTest -= 1;
4507 continue;
4508 }
4509
4510 X86XMMREG XmmVal1 = { 0 };
4511 X86XMMREG XmmVal2 = { 0 };
4512
4513 XmmVal1.ar32[0] = InVal1;
4514 XmmVal2.ar32[0] = InVal2;
4515
4516 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4517 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4518 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4519 for (uint8_t iFz = 0; iFz < 2; iFz++)
4520 {
4521 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4522 | (iRounding << X86_MXCSR_RC_SHIFT)
4523 | (iDaz ? X86_MXCSR_DAZ : 0)
4524 | (iFz ? X86_MXCSR_FZ : 0)
4525 | X86_MXCSR_XCPT_MASK;
4526 IEMSSERESULT ResM = { 0, 0 };
4527 pfn(&State, &ResM, &XmmVal1, &XmmVal2);
4528 RTStrmPrintf(pOutFn, " { %#08x, %#08x, %s, %s, %s }, /* #%u/%u/%c/%c/m = #%u */\n",
4529 State.MXCSR, ResM.MXCSR, GenFormatR32(&XmmVal1.ar32[0]), GenFormatR32(&XmmVal2.ar32[0]),
4530 GenFormatR32(&ResM.uResult.ar32[0]), iTest, iRounding,
4531 iDaz ? 'd' : '0', iFz ? 'f' : '0',
4532 iTestOutput++);
4533
4534 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4535 IEMSSERESULT ResU = { 0, 0 };
4536 pfn(&State, &ResU, &XmmVal1, &XmmVal2);
4537 RTStrmPrintf(pOutFn, " { %#08x, %#08x, %s, %s, %s }, /* #%u/%u/%c/%c/u = #%u */\n",
4538 State.MXCSR, ResU.MXCSR, GenFormatR32(&XmmVal1.ar32[0]), GenFormatR32(&XmmVal2.ar32[0]),
4539 GenFormatR32(&ResU.uResult.ar32[0]), iTest, iRounding,
4540 iDaz ? 'd' : '0', iFz ? 'f' : '0',
4541 iTestOutput++);
4542
4543 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4544 if (fXcpt)
4545 {
4546 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4547 IEMSSERESULT Res1 = { 0, 0 };
4548 pfn(&State, &Res1, &XmmVal1, &XmmVal2);
4549 RTStrmPrintf(pOutFn, " { %#08x, %#08x, %s, %s, %s }, /* #%u/%u/%c/%c/u = #%u */\n",
4550 State.MXCSR, Res1.MXCSR, GenFormatR32(&XmmVal1.ar32[0]), GenFormatR32(&XmmVal2.ar32[0]),
4551 GenFormatR32(&Res1.uResult.ar32[0]), iTest, iRounding,
4552 iDaz ? 'd' : '0', iFz ? 'f' : '0',
4553 iTestOutput++);
4554 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4555 {
4556 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4557 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4558 IEMSSERESULT Res2 = { 0, 0 };
4559 pfn(&State, &Res2, &XmmVal1, &XmmVal2);
4560 RTStrmPrintf(pOutFn, " { %#08x, %#08x, %s, %s, %s }, /* #%u/%u/%c/%c/%#x[!] = #%u */\n",
4561 State.MXCSR, Res2.MXCSR, GenFormatR32(&XmmVal1.ar32[0]), GenFormatR32(&XmmVal2.ar32[0]),
4562 GenFormatR32(&Res2.uResult.ar32[0]), iTest, iRounding,
4563 iDaz ? 'd' : '0', iFz ? 'f' : '0', fXcpt,
4564 iTestOutput++);
4565 }
4566 if (!RT_IS_POWER_OF_TWO(fXcpt))
4567 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4568 if (fUnmasked & fXcpt)
4569 {
4570 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4571 IEMSSERESULT Res3 = { 0, 0 };
4572 pfn(&State, &Res3, &XmmVal1, &XmmVal2);
4573 RTStrmPrintf(pOutFn, " { %#08x, %#08x, %s, %s, %s }, /* #%u/%u/%c/%c/u%#x = #%u */\n",
4574 State.MXCSR, Res3.MXCSR, GenFormatR32(&XmmVal1.ar32[0]), GenFormatR32(&XmmVal2.ar32[0]),
4575 GenFormatR32(&Res3.uResult.ar32[0]), iTest, iRounding,
4576 iDaz ? 'd' : '0', iFz ? 'f' : '0', fUnmasked,
4577 iTestOutput++);
4578 }
4579 }
4580 }
4581 }
4582 GenerateArrayEnd(pOutFn, g_aSseBinaryR32[iFn].pszName);
4583 }
4584}
4585#endif
4586
4587static void SseBinaryR32Test(void)
4588{
4589 X86FXSTATE State;
4590 RT_ZERO(State);
4591 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4592 {
4593 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32[iFn].pszName))
4594 continue;
4595
4596 uint32_t const cTests = *g_aSseBinaryR32[iFn].pcTests;
4597 SSE_BINARY_R32_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
4598 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
4599 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
4600 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4601 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4602 {
4603 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4604 {
4605 X86XMMREG InVal1 = { 0 };
4606 X86XMMREG InVal2 = { 0 };
4607 IEMSSERESULT Res = { 0, 0 };
4608
4609 InVal1.ar32[0] = paTests[iTest].InVal1;
4610 InVal2.ar32[0] = paTests[iTest].InVal2;
4611 State.MXCSR = paTests[iTest].fMxcsrIn;
4612 pfn(&State, &Res, &InVal1, &InVal2);
4613 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4614 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal))
4615 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s in2=%s\n"
4616 "%s -> mxcsr=%#08x %s\n"
4617 "%s expected %#08x %s%s%s (%s)\n",
4618 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4619 FormatR32(&paTests[iTest].InVal1), FormatR32(&paTests[iTest].InVal2),
4620 iVar ? " " : "", Res.MXCSR, FormatR32(&Res.uResult.ar32[0]),
4621 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].OutVal),
4622 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4623 !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal) ? " - val" : "",
4624 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4625 }
4626 pfn = g_aSseBinaryR32[iFn].pfnNative;
4627 }
4628 }
4629}
4630
4631
4632
4633int main(int argc, char **argv)
4634{
4635 int rc = RTR3InitExe(argc, &argv, 0);
4636 if (RT_FAILURE(rc))
4637 return RTMsgInitFailure(rc);
4638
4639 /*
4640 * Determin the host CPU.
4641 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
4642 */
4643#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
4644 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
4645 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
4646 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4647#else
4648 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4649#endif
4650
4651 /*
4652 * Parse arguments.
4653 */
4654 enum { kModeNotSet, kModeTest, kModeGenerate }
4655 enmMode = kModeNotSet;
4656 bool fInt = true;
4657 bool fFpuLdSt = true;
4658 bool fFpuBinary1 = true;
4659 bool fFpuBinary2 = true;
4660 bool fFpuOther = true;
4661 bool fCpuData = true;
4662 bool fCommonData = true;
4663 bool fSseFpBinary = true;
4664 uint32_t const cDefaultTests = 96;
4665 uint32_t cTests = cDefaultTests;
4666 RTGETOPTDEF const s_aOptions[] =
4667 {
4668 // mode:
4669 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
4670 { "--test", 't', RTGETOPT_REQ_NOTHING },
4671 // test selection (both)
4672 { "--all", 'a', RTGETOPT_REQ_NOTHING },
4673 { "--none", 'z', RTGETOPT_REQ_NOTHING },
4674 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
4675 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
4676 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
4677 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
4678 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
4679 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
4680 { "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
4681 { "--int", 'i', RTGETOPT_REQ_NOTHING },
4682 { "--include", 'I', RTGETOPT_REQ_STRING },
4683 { "--exclude", 'X', RTGETOPT_REQ_STRING },
4684 // generation parameters
4685 { "--common", 'm', RTGETOPT_REQ_NOTHING },
4686 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
4687 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
4688 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
4689 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
4690 };
4691
4692 RTGETOPTSTATE State;
4693 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
4694 AssertRCReturn(rc, RTEXITCODE_FAILURE);
4695
4696 RTGETOPTUNION ValueUnion;
4697 while ((rc = RTGetOpt(&State, &ValueUnion)))
4698 {
4699 switch (rc)
4700 {
4701 case 'g':
4702 enmMode = kModeGenerate;
4703 break;
4704 case 't':
4705 enmMode = kModeTest;
4706 break;
4707
4708 case 'a':
4709 fCpuData = true;
4710 fCommonData = true;
4711 fInt = true;
4712 fFpuLdSt = true;
4713 fFpuBinary1 = true;
4714 fFpuBinary2 = true;
4715 fFpuOther = true;
4716 fSseFpBinary = true;
4717 break;
4718 case 'z':
4719 fCpuData = false;
4720 fCommonData = false;
4721 fInt = false;
4722 fFpuLdSt = false;
4723 fFpuBinary1 = false;
4724 fFpuBinary2 = false;
4725 fFpuOther = false;
4726 fSseFpBinary = false;
4727 break;
4728
4729 case 'F':
4730 fFpuLdSt = true;
4731 break;
4732 case 'O':
4733 fFpuOther = true;
4734 break;
4735 case 'B':
4736 fFpuBinary1 = true;
4737 break;
4738 case 'P':
4739 fFpuBinary2 = true;
4740 break;
4741 case 'S':
4742 fSseFpBinary = true;
4743 break;
4744 case 'i':
4745 fInt = true;
4746 break;
4747
4748 case 'I':
4749 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
4750 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
4751 RT_ELEMENTS(g_apszIncludeTestPatterns));
4752 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
4753 break;
4754 case 'X':
4755 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
4756 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
4757 RT_ELEMENTS(g_apszExcludeTestPatterns));
4758 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
4759 break;
4760
4761 case 'm':
4762 fCommonData = true;
4763 break;
4764 case 'c':
4765 fCpuData = true;
4766 break;
4767 case 'n':
4768 cTests = ValueUnion.u32;
4769 break;
4770
4771 case 'q':
4772 g_cVerbosity = 0;
4773 break;
4774 case 'v':
4775 g_cVerbosity++;
4776 break;
4777
4778 case 'h':
4779 RTPrintf("usage: %s <-g|-t> [options]\n"
4780 "\n"
4781 "Mode:\n"
4782 " -g, --generate\n"
4783 " Generate test data.\n"
4784 " -t, --test\n"
4785 " Execute tests.\n"
4786 "\n"
4787 "Test selection (both modes):\n"
4788 " -a, --all\n"
4789 " Enable all tests and generated test data. (default)\n"
4790 " -z, --zap, --none\n"
4791 " Disable all tests and test data types.\n"
4792 " -i, --int\n"
4793 " Enable non-FPU tests.\n"
4794 " -F, --fpu-ld-st\n"
4795 " Enable FPU load and store tests.\n"
4796 " -B, --fpu-binary-1\n"
4797 " Enable FPU binary 80-bit FP tests.\n"
4798 " -P, --fpu-binary-2\n"
4799 " Enable FPU binary 64- and 32-bit FP tests.\n"
4800 " -O, --fpu-other\n"
4801 " Enable FPU binary 64- and 32-bit FP tests.\n"
4802 " -S, --sse-fp-binary\n"
4803 " Enable SSE binary 64- and 32-bit FP tests.\n"
4804 " -I,--include=<test-patter>\n"
4805 " Enable tests matching the given pattern.\n"
4806 " -X,--exclude=<test-patter>\n"
4807 " Skip tests matching the given pattern (overrides --include).\n"
4808 "\n"
4809 "Generation:\n"
4810 " -m, --common\n"
4811 " Enable generating common test data.\n"
4812 " -c, --only-cpu\n"
4813 " Enable generating CPU specific test data.\n"
4814 " -n, --number-of-test <count>\n"
4815 " Number of tests to generate. Default: %u\n"
4816 "\n"
4817 "Other:\n"
4818 " -v, --verbose\n"
4819 " -q, --quiet\n"
4820 " Noise level. Default: --quiet\n"
4821 , argv[0], cDefaultTests);
4822 return RTEXITCODE_SUCCESS;
4823 default:
4824 return RTGetOptPrintError(rc, &ValueUnion);
4825 }
4826 }
4827
4828 /*
4829 * Generate data?
4830 */
4831 if (enmMode == kModeGenerate)
4832 {
4833#ifdef TSTIEMAIMPL_WITH_GENERATOR
4834 char szCpuDesc[256] = {0};
4835 RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
4836 const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
4837# if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
4838 const char * const pszBitBucket = "NUL";
4839# else
4840 const char * const pszBitBucket = "/dev/null";
4841# endif
4842
4843 if (cTests == 0)
4844 cTests = cDefaultTests;
4845 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
4846 g_cZeroSrcTests = g_cZeroDstTests * 2;
4847
4848 if (fInt)
4849 {
4850 const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
4851 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4852 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4853 ? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
4854 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4855 if (!pStrmData || !pStrmDataCpu)
4856 return RTEXITCODE_FAILURE;
4857
4858 BinU8Generate( pStrmData, pStrmDataCpu, cTests);
4859 BinU16Generate(pStrmData, pStrmDataCpu, cTests);
4860 BinU32Generate(pStrmData, pStrmDataCpu, cTests);
4861 BinU64Generate(pStrmData, pStrmDataCpu, cTests);
4862 ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
4863 UnaryGenerate(pStrmData, cTests);
4864 ShiftGenerate(pStrmDataCpu, cTests);
4865 MulDivGenerate(pStrmDataCpu, cTests);
4866
4867 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4868 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4869 if (rcExit != RTEXITCODE_SUCCESS)
4870 return rcExit;
4871 }
4872
4873 if (fFpuLdSt)
4874 {
4875 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
4876 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4877 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4878 ? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
4879 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4880 if (!pStrmData || !pStrmDataCpu)
4881 return RTEXITCODE_FAILURE;
4882
4883 FpuLdConstGenerate(pStrmData, cTests);
4884 FpuLdIntGenerate(pStrmData, cTests);
4885 FpuLdD80Generate(pStrmData, cTests);
4886 FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
4887 FpuStD80Generate(pStrmData, cTests);
4888 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
4889 FpuLdMemGenerate(pStrmData, cTests2);
4890 FpuStMemGenerate(pStrmData, cTests2);
4891
4892 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4893 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4894 if (rcExit != RTEXITCODE_SUCCESS)
4895 return rcExit;
4896 }
4897
4898 if (fFpuBinary1)
4899 {
4900 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
4901 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4902 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4903 ? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
4904 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4905 if (!pStrmData || !pStrmDataCpu)
4906 return RTEXITCODE_FAILURE;
4907
4908 FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4909 FpuBinaryFswR80Generate(pStrmData, cTests);
4910 FpuBinaryEflR80Generate(pStrmData, cTests);
4911
4912 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4913 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4914 if (rcExit != RTEXITCODE_SUCCESS)
4915 return rcExit;
4916 }
4917
4918 if (fFpuBinary2)
4919 {
4920 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
4921 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4922 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4923 ? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
4924 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4925 if (!pStrmData || !pStrmDataCpu)
4926 return RTEXITCODE_FAILURE;
4927
4928 FpuBinaryR64Generate(pStrmData, cTests);
4929 FpuBinaryR32Generate(pStrmData, cTests);
4930 FpuBinaryI32Generate(pStrmData, cTests);
4931 FpuBinaryI16Generate(pStrmData, cTests);
4932 FpuBinaryFswR64Generate(pStrmData, cTests);
4933 FpuBinaryFswR32Generate(pStrmData, cTests);
4934 FpuBinaryFswI32Generate(pStrmData, cTests);
4935 FpuBinaryFswI16Generate(pStrmData, cTests);
4936
4937 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4938 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4939 if (rcExit != RTEXITCODE_SUCCESS)
4940 return rcExit;
4941 }
4942
4943 if (fFpuOther)
4944 {
4945 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
4946 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4947 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4948 ? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
4949 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4950 if (!pStrmData || !pStrmDataCpu)
4951 return RTEXITCODE_FAILURE;
4952
4953 FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4954 FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
4955 FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
4956
4957 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4958 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4959 if (rcExit != RTEXITCODE_SUCCESS)
4960 return rcExit;
4961 }
4962
4963 if (fSseFpBinary)
4964 {
4965 const char *pszDataFile = fCommonData ? "tstIEMAImplDataSseBinary.cpp" : pszBitBucket;
4966 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4967 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4968 ? "tstIEMAImplDataSimdBinary-Amd.cpp" : "tstIEMAImplDataSimdBinary-Intel.cpp"; */
4969 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4970 if (!pStrmData || !pStrmDataCpu)
4971 return RTEXITCODE_FAILURE;
4972
4973 SseBinaryR32Generate(pStrmData, pStrmDataCpu, cTests);
4974 //SseBinaryR64Generate(pStrmData, cTests);
4975
4976 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4977 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4978 if (rcExit != RTEXITCODE_SUCCESS)
4979 return rcExit;
4980 }
4981
4982 return RTEXITCODE_SUCCESS;
4983#else
4984 return RTMsgErrorExitFailure("Test data generator not compiled in!");
4985#endif
4986 }
4987
4988 /*
4989 * Do testing. Currrently disabled by default as data needs to be checked
4990 * on both intel and AMD systems first.
4991 */
4992 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
4993 AssertRCReturn(rc, RTEXITCODE_FAILURE);
4994 if (enmMode == kModeTest)
4995 {
4996 RTTestBanner(g_hTest);
4997
4998 /* Allocate guarded memory for use in the tests. */
4999#define ALLOC_GUARDED_VAR(a_puVar) do { \
5000 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
5001 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
5002 } while (0)
5003 ALLOC_GUARDED_VAR(g_pu8);
5004 ALLOC_GUARDED_VAR(g_pu16);
5005 ALLOC_GUARDED_VAR(g_pu32);
5006 ALLOC_GUARDED_VAR(g_pu64);
5007 ALLOC_GUARDED_VAR(g_pu128);
5008 ALLOC_GUARDED_VAR(g_pu8Two);
5009 ALLOC_GUARDED_VAR(g_pu16Two);
5010 ALLOC_GUARDED_VAR(g_pu32Two);
5011 ALLOC_GUARDED_VAR(g_pu64Two);
5012 ALLOC_GUARDED_VAR(g_pu128Two);
5013 ALLOC_GUARDED_VAR(g_pfEfl);
5014 if (RTTestErrorCount(g_hTest) == 0)
5015 {
5016 if (fInt)
5017 {
5018 BinU8Test();
5019 BinU16Test();
5020 BinU32Test();
5021 BinU64Test();
5022 XchgTest();
5023 XaddTest();
5024 CmpXchgTest();
5025 CmpXchg8bTest();
5026 CmpXchg16bTest();
5027 ShiftDblTest();
5028 UnaryTest();
5029 ShiftTest();
5030 MulDivTest();
5031 BswapTest();
5032 }
5033
5034 if (fFpuLdSt)
5035 {
5036 FpuLoadConstTest();
5037 FpuLdMemTest();
5038 FpuLdIntTest();
5039 FpuLdD80Test();
5040 FpuStMemTest();
5041 FpuStIntTest();
5042 FpuStD80Test();
5043 }
5044
5045 if (fFpuBinary1)
5046 {
5047 FpuBinaryR80Test();
5048 FpuBinaryFswR80Test();
5049 FpuBinaryEflR80Test();
5050 }
5051
5052 if (fFpuBinary2)
5053 {
5054 FpuBinaryR64Test();
5055 FpuBinaryR32Test();
5056 FpuBinaryI32Test();
5057 FpuBinaryI16Test();
5058 FpuBinaryFswR64Test();
5059 FpuBinaryFswR32Test();
5060 FpuBinaryFswI32Test();
5061 FpuBinaryFswI16Test();
5062 }
5063
5064 if (fFpuOther)
5065 {
5066 FpuUnaryR80Test();
5067 FpuUnaryFswR80Test();
5068 FpuUnaryTwoR80Test();
5069 }
5070
5071 if (fSseFpBinary)
5072 {
5073 SseBinaryR32Test();
5074 }
5075 }
5076 return RTTestSummaryAndDestroy(g_hTest);
5077 }
5078 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
5079}
5080
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette