VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 103050

Last change on this file since 103050 was 103050, checked in by vboxsync, 15 months ago

tstIEMAImpl: Working on converting the C++ data to compressed binary. Done Int. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 475.7 KB
Line 
1/* $Id: tstIEMAImpl.cpp 103050 2024-01-25 00:42:30Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include "../include/IEMInternal.h"
33
34#include <iprt/errcore.h>
35#include <VBox/log.h>
36#include <iprt/assert.h>
37#include <iprt/ctype.h>
38#include <iprt/err.h>
39#include <iprt/getopt.h>
40#include <iprt/initterm.h>
41#include <iprt/file.h>
42#include <iprt/mem.h>
43#include <iprt/message.h>
44#include <iprt/mp.h>
45#include <iprt/rand.h>
46#include <iprt/stream.h>
47#include <iprt/string.h>
48#include <iprt/test.h>
49#include <iprt/time.h>
50#include <iprt/thread.h>
51#include <iprt/vfs.h>
52#include <iprt/zip.h>
53#include <VBox/version.h>
54
55#include "tstIEMAImpl.h"
56
57
58/*********************************************************************************************************************************
59* Defined Constants And Macros *
60*********************************************************************************************************************************/
61#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
62#define ENTRY_EX(a_Name, a_uExtra) \
63 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
64 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
65 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
66
67#define ENTRY_FIX(a_Name) ENTRY_FIX_EX(a_Name, 0)
68#ifdef TSTIEMAIMPL_WITH_GENERATOR
69# define ENTRY_FIX_EX(a_Name, a_uExtra) \
70 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
71 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
72 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, \
73 false, false, RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
74#else
75# define ENTRY_FIX_EX(a_Name, a_uExtra) ENTRY_EX(a_Name, a_uExtra)
76#endif
77
78#define ENTRY_PFN_CAST(a_Name, a_pfnType) ENTRY_PFN_CAST_EX(a_Name, a_pfnType, 0)
79#define ENTRY_PFN_CAST_EX(a_Name, a_pfnType, a_uExtra) \
80 { RT_XSTR(a_Name), (a_pfnType)iemAImpl_ ## a_Name, NULL, \
81 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
82 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
83
84#define ENTRY_BIN(a_Name) ENTRY_EX_BIN(a_Name, 0)
85#define ENTRY_EX_BIN(a_Name, a_uExtra) \
86 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
87 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
88 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, true /*fBinary*/, true /*fCompressed*/ }
89
90#define ENTRY_BIN_AVX(a_Name) ENTRY_BIN_AVX_EX(a_Name, 0)
91#ifndef IEM_WITHOUT_ASSEMBLY
92# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
93 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
94 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
95 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, true /*fBinary*/, true /*fCompressed*/ }
96#else
97# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
98 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
99 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
100 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, true /*fBinary*/, true /*fCompressed*/ }
101#endif
102
103#define ENTRY_BIN_SSE_OPT(a_Name) ENTRY_BIN_SSE_OPT_EX(a_Name, 0)
104#ifndef IEM_WITHOUT_ASSEMBLY
105# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
106 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
107 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
108 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, true /*fBinary*/, true /*fCompressed*/ }
109#else
110# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
111 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
112 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
113 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, true /*fBinary*/, true /*fCompressed*/ }
114#endif
115
116
117#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
118#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
119 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
120 g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
121 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
122
123#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
124#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
125 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
126 g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
127 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
128
129#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
130 typedef struct a_TypeName \
131 { \
132 const char *pszName; \
133 const a_FunctionPtrType pfn; \
134 const a_FunctionPtrType pfnNative; \
135 a_TestType const *paTests; /**< These are update for compressed tests. */ \
136 uint32_t const *pcTests; /**< These are update for compressed tests. */ \
137 uint32_t const uExtra; \
138 uint8_t const idxCpuEflFlavour; \
139 bool const fBinary; \
140 bool fCompressed; /**< This is cleared after decompressing the tests. */ \
141 uint16_t const cFixedTests; \
142 a_TestType const * const paFixedTests; \
143 } a_TypeName
144
145#define COUNT_VARIATIONS(a_SubTest) \
146 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
147
148
149/*********************************************************************************************************************************
150* Structures and Typedefs *
151*********************************************************************************************************************************/
152typedef struct IEMBINARYOUTPUT
153{
154 /** The output file. */
155 RTVFSFILE hVfsFile;
156 /** The stream we write uncompressed binary test data to. */
157 RTVFSIOSTREAM hVfsUncompressed;
158 /** Write status. */
159 int rcWrite;
160 /** Set if NULL. */
161 bool fNull;
162 /** Filename. */
163 char szFilename[79];
164} IEMBINARYOUTPUT;
165typedef IEMBINARYOUTPUT *PIEMBINARYOUTPUT;
166
167
168/*********************************************************************************************************************************
169* Global Variables *
170*********************************************************************************************************************************/
171static RTTEST g_hTest;
172static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
173#ifdef TSTIEMAIMPL_WITH_GENERATOR
174static uint32_t g_cZeroDstTests = 2;
175static uint32_t g_cZeroSrcTests = 4;
176#endif
177static uint8_t *g_pu8, *g_pu8Two;
178static uint16_t *g_pu16, *g_pu16Two;
179static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
180static uint64_t *g_pu64, *g_pu64Two;
181static RTUINT128U *g_pu128, *g_pu128Two;
182
183static char g_aszBuf[32][256];
184static unsigned g_idxBuf = 0;
185
186static uint32_t g_cIncludeTestPatterns;
187static uint32_t g_cExcludeTestPatterns;
188static const char *g_apszIncludeTestPatterns[64];
189static const char *g_apszExcludeTestPatterns[64];
190
191/** Higher value, means longer benchmarking. */
192static uint64_t g_cPicoSecBenchmark = 0;
193
194static unsigned g_cVerbosity = 0;
195
196
197/*********************************************************************************************************************************
198* Internal Functions *
199*********************************************************************************************************************************/
200static const char *FormatR80(PCRTFLOAT80U pr80);
201static const char *FormatR64(PCRTFLOAT64U pr64);
202static const char *FormatR32(PCRTFLOAT32U pr32);
203
204
205/*
206 * Random helpers.
207 */
208
209static uint32_t RandEFlags(void)
210{
211 uint32_t fEfl = RTRandU32();
212 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
213}
214
215#ifdef TSTIEMAIMPL_WITH_GENERATOR
216
217static uint8_t RandU8(void)
218{
219 return RTRandU32Ex(0, 0xff);
220}
221
222
223static uint16_t RandU16(void)
224{
225 return RTRandU32Ex(0, 0xffff);
226}
227
228
229static uint32_t RandU32(void)
230{
231 return RTRandU32();
232}
233
234#endif
235
236static uint64_t RandU64(void)
237{
238 return RTRandU64();
239}
240
241
242static RTUINT128U RandU128(void)
243{
244 RTUINT128U Ret;
245 Ret.s.Hi = RTRandU64();
246 Ret.s.Lo = RTRandU64();
247 return Ret;
248}
249
250#ifdef TSTIEMAIMPL_WITH_GENERATOR
251
252static uint8_t RandU8Dst(uint32_t iTest)
253{
254 if (iTest < g_cZeroDstTests)
255 return 0;
256 return RandU8();
257}
258
259
260static uint8_t RandU8Src(uint32_t iTest)
261{
262 if (iTest < g_cZeroSrcTests)
263 return 0;
264 return RandU8();
265}
266
267
268static uint16_t RandU16Dst(uint32_t iTest)
269{
270 if (iTest < g_cZeroDstTests)
271 return 0;
272 return RandU16();
273}
274
275
276static uint16_t RandU16Src(uint32_t iTest)
277{
278 if (iTest < g_cZeroSrcTests)
279 return 0;
280 return RandU16();
281}
282
283
284static uint32_t RandU32Dst(uint32_t iTest)
285{
286 if (iTest < g_cZeroDstTests)
287 return 0;
288 return RandU32();
289}
290
291
292static uint32_t RandU32Src(uint32_t iTest)
293{
294 if (iTest < g_cZeroSrcTests)
295 return 0;
296 return RandU32();
297}
298
299
300static uint64_t RandU64Dst(uint32_t iTest)
301{
302 if (iTest < g_cZeroDstTests)
303 return 0;
304 return RandU64();
305}
306
307
308static uint64_t RandU64Src(uint32_t iTest)
309{
310 if (iTest < g_cZeroSrcTests)
311 return 0;
312 return RandU64();
313}
314
315
316/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
317static int16_t RandI16Src2(uint32_t iTest)
318{
319 if (iTest < 18 * 4)
320 switch (iTest % 4)
321 {
322 case 0: return 0;
323 case 1: return INT16_MAX;
324 case 2: return INT16_MIN;
325 case 3: break;
326 }
327 return (int16_t)RandU16();
328}
329
330
331/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
332static int32_t RandI32Src2(uint32_t iTest)
333{
334 if (iTest < 18 * 4)
335 switch (iTest % 4)
336 {
337 case 0: return 0;
338 case 1: return INT32_MAX;
339 case 2: return INT32_MIN;
340 case 3: break;
341 }
342 return (int32_t)RandU32();
343}
344
345
346static int64_t RandI64Src(uint32_t iTest)
347{
348 RT_NOREF(iTest);
349 return (int64_t)RandU64();
350}
351
352
353static uint16_t RandFcw(void)
354{
355 return RandU16() & ~X86_FCW_ZERO_MASK;
356}
357
358
359static uint16_t RandFsw(void)
360{
361 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
362 return RandU16();
363}
364
365
366static uint32_t RandMxcsr(void)
367{
368 return RandU32() & ~X86_MXCSR_ZERO_MASK;
369}
370
371
372static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
373{
374 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
375 pr80->sj64.uFraction >>= cShift;
376 else
377 pr80->sj64.uFraction = (cShift % 19) + 1;
378}
379
380
381
382static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
383{
384 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
385
386 RTFLOAT80U r80;
387 r80.au64[0] = RandU64();
388 r80.au16[4] = RandU16();
389
390 /*
391 * Adjust the random stuff according to bType.
392 */
393 bType &= 0x1f;
394 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
395 {
396 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
397 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
398 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
399 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
400 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
401 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
402 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
403 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
404 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
405 }
406 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
407 {
408 /* Denormals (4,5) and Pseudo denormals (6,7) */
409 if (bType & 1)
410 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
411 else if (r80.sj64.uFraction == 0 && bType < 6)
412 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
413 r80.sj64.uExponent = 0;
414 r80.sj64.fInteger = bType >= 6;
415 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
416 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
417 }
418 else if (bType == 8 || bType == 9)
419 {
420 /* Pseudo NaN. */
421 if (bType & 1)
422 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
423 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
424 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
425 r80.sj64.uExponent = 0x7fff;
426 if (r80.sj64.fInteger)
427 r80.sj64.uFraction |= RT_BIT_64(62);
428 else
429 r80.sj64.uFraction &= ~RT_BIT_64(62);
430 r80.sj64.fInteger = 0;
431 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
432 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
433 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
434 }
435 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
436 {
437 /* Quiet and signalling NaNs. */
438 if (bType & 1)
439 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
440 else if (r80.sj64.uFraction == 0)
441 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
442 r80.sj64.uExponent = 0x7fff;
443 if (bType < 12)
444 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
445 else
446 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
447 r80.sj64.fInteger = 1;
448 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
449 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
450 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
451 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
452 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
453 }
454 else if (bType == 14 || bType == 15)
455 {
456 /* Unnormals */
457 if (bType & 1)
458 SafeR80FractionShift(&r80, RandU8() % 62);
459 r80.sj64.fInteger = 0;
460 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
461 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
462 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
463 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
464 }
465 else if (bType < 26)
466 {
467 /* Make sure we have lots of normalized values. */
468 if (!fIntTarget)
469 {
470 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
471 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
472 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
473 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
474 r80.sj64.fInteger = 1;
475 if (r80.sj64.uExponent <= uMinExp)
476 r80.sj64.uExponent = uMinExp + 1;
477 else if (r80.sj64.uExponent >= uMaxExp)
478 r80.sj64.uExponent = uMaxExp - 1;
479
480 if (bType == 16)
481 { /* All 1s is useful to testing rounding. Also try trigger special
482 behaviour by sometimes rounding out of range, while we're at it. */
483 r80.sj64.uFraction = RT_BIT_64(63) - 1;
484 uint8_t bExp = RandU8();
485 if ((bExp & 3) == 0)
486 r80.sj64.uExponent = uMaxExp - 1;
487 else if ((bExp & 3) == 1)
488 r80.sj64.uExponent = uMinExp + 1;
489 else if ((bExp & 3) == 2)
490 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
491 }
492 }
493 else
494 {
495 /* integer target: */
496 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
497 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
498 r80.sj64.fInteger = 1;
499 if (r80.sj64.uExponent < uMinExp)
500 r80.sj64.uExponent = uMinExp;
501 else if (r80.sj64.uExponent > uMaxExp)
502 r80.sj64.uExponent = uMaxExp;
503
504 if (bType == 16)
505 { /* All 1s is useful to testing rounding. Also try trigger special
506 behaviour by sometimes rounding out of range, while we're at it. */
507 r80.sj64.uFraction = RT_BIT_64(63) - 1;
508 uint8_t bExp = RandU8();
509 if ((bExp & 3) == 0)
510 r80.sj64.uExponent = uMaxExp;
511 else if ((bExp & 3) == 1)
512 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
513 }
514 }
515
516 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
517 }
518 return r80;
519}
520
521
522static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
523{
524 /*
525 * Make it more likely that we get a good selection of special values.
526 */
527 return RandR80Ex(RandU8(), cTarget, fIntTarget);
528
529}
530
531
532static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
533{
534 /* Make sure we cover all the basic types first before going for random selection: */
535 if (iTest <= 18)
536 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
537 return RandR80(cTarget, fIntTarget);
538}
539
540
541/**
542 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
543 * to a 0..17, covering all basic value types.
544 */
545static uint8_t RandR80Src12RemapType(uint8_t bType)
546{
547 switch (bType)
548 {
549 case 0: return 18; /* normal */
550 case 1: return 16; /* normal extreme rounding */
551 case 2: return 14; /* unnormal */
552 case 3: return 12; /* Signalling NaN */
553 case 4: return 10; /* Quiet NaN */
554 case 5: return 8; /* PseudoNaN */
555 case 6: return 6; /* Pseudo Denormal */
556 case 7: return 4; /* Denormal */
557 case 8: return 3; /* Indefinite */
558 case 9: return 2; /* Infinity */
559 case 10: return 1; /* Pseudo-Infinity */
560 case 11: return 0; /* Zero */
561 default: AssertFailedReturn(18);
562 }
563}
564
565
566/**
567 * This works in tandem with RandR80Src2 to make sure we cover all operand
568 * type mixes first before we venture into regular random testing.
569 *
570 * There are 11 basic variations, when we leave out the five odd ones using
571 * SafeR80FractionShift. Because of the special normalized value targetting at
572 * rounding, we make it an even 12. So 144 combinations for two operands.
573 */
574static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
575{
576 if (cPartnerBits == 80)
577 {
578 Assert(!fPartnerInt);
579 if (iTest < 12 * 12)
580 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
581 }
582 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
583 {
584 if (iTest < 12 * 10)
585 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
586 }
587 else if (iTest < 18 * 4 && fPartnerInt)
588 return RandR80Ex(iTest / 4);
589 return RandR80();
590}
591
592
593/** Partner to RandR80Src1. */
594static RTFLOAT80U RandR80Src2(uint32_t iTest)
595{
596 if (iTest < 12 * 12)
597 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
598 return RandR80();
599}
600
601
602static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
603{
604 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
605 pr64->s64.uFraction >>= cShift;
606 else
607 pr64->s64.uFraction = (cShift % 19) + 1;
608}
609
610
611static RTFLOAT64U RandR64Ex(uint8_t bType)
612{
613 RTFLOAT64U r64;
614 r64.u = RandU64();
615
616 /*
617 * Make it more likely that we get a good selection of special values.
618 * On average 6 out of 16 calls should return a special value.
619 */
620 bType &= 0xf;
621 if (bType == 0 || bType == 1)
622 {
623 /* 0 or Infinity. We only keep fSign here. */
624 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
625 r64.s.uFractionHigh = 0;
626 r64.s.uFractionLow = 0;
627 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
628 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
629 }
630 else if (bType == 2 || bType == 3)
631 {
632 /* Subnormals */
633 if (bType == 3)
634 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
635 else if (r64.s64.uFraction == 0)
636 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
637 r64.s64.uExponent = 0;
638 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
639 }
640 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
641 {
642 /* NaNs */
643 if (bType & 1)
644 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
645 else if (r64.s64.uFraction == 0)
646 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
647 r64.s64.uExponent = 0x7ff;
648 if (bType < 6)
649 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
650 else
651 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
652 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
653 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
654 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
655 }
656 else if (bType < 12)
657 {
658 /* Make sure we have lots of normalized values. */
659 if (r64.s.uExponent == 0)
660 r64.s.uExponent = 1;
661 else if (r64.s.uExponent == 0x7ff)
662 r64.s.uExponent = 0x7fe;
663 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
664 }
665 return r64;
666}
667
668
669static RTFLOAT64U RandR64Src(uint32_t iTest)
670{
671 if (iTest < 16)
672 return RandR64Ex(iTest);
673 return RandR64Ex(RandU8());
674}
675
676
677/** Pairing with a 80-bit floating point arg. */
678static RTFLOAT64U RandR64Src2(uint32_t iTest)
679{
680 if (iTest < 12 * 10)
681 return RandR64Ex(9 - iTest % 10); /* start with normal values */
682 return RandR64Ex(RandU8());
683}
684
685
686static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
687{
688 if (pr32->s.uFraction >= RT_BIT_32(cShift))
689 pr32->s.uFraction >>= cShift;
690 else
691 pr32->s.uFraction = (cShift % 19) + 1;
692}
693
694
695static RTFLOAT32U RandR32Ex(uint8_t bType)
696{
697 RTFLOAT32U r32;
698 r32.u = RandU32();
699
700 /*
701 * Make it more likely that we get a good selection of special values.
702 * On average 6 out of 16 calls should return a special value.
703 */
704 bType &= 0xf;
705 if (bType == 0 || bType == 1)
706 {
707 /* 0 or Infinity. We only keep fSign here. */
708 r32.s.uExponent = bType == 0 ? 0 : 0xff;
709 r32.s.uFraction = 0;
710 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
711 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
712 }
713 else if (bType == 2 || bType == 3)
714 {
715 /* Subnormals */
716 if (bType == 3)
717 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
718 else if (r32.s.uFraction == 0)
719 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
720 r32.s.uExponent = 0;
721 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
722 }
723 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
724 {
725 /* NaNs */
726 if (bType & 1)
727 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
728 else if (r32.s.uFraction == 0)
729 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
730 r32.s.uExponent = 0xff;
731 if (bType < 6)
732 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
733 else
734 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
735 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
736 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
737 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
738 }
739 else if (bType < 12)
740 {
741 /* Make sure we have lots of normalized values. */
742 if (r32.s.uExponent == 0)
743 r32.s.uExponent = 1;
744 else if (r32.s.uExponent == 0xff)
745 r32.s.uExponent = 0xfe;
746 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
747 }
748 return r32;
749}
750
751
752static RTFLOAT32U RandR32Src(uint32_t iTest)
753{
754 if (iTest < 16)
755 return RandR32Ex(iTest);
756 return RandR32Ex(RandU8());
757}
758
759
760/** Pairing with a 80-bit floating point arg. */
761static RTFLOAT32U RandR32Src2(uint32_t iTest)
762{
763 if (iTest < 12 * 10)
764 return RandR32Ex(9 - iTest % 10); /* start with normal values */
765 return RandR32Ex(RandU8());
766}
767
768
769static RTPBCD80U RandD80Src(uint32_t iTest)
770{
771 if (iTest < 3)
772 {
773 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
774 return d80Zero;
775 }
776 if (iTest < 5)
777 {
778 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
779 return d80Ind;
780 }
781
782 RTPBCD80U d80;
783 uint8_t b = RandU8();
784 d80.s.fSign = b & 1;
785
786 if ((iTest & 7) >= 6)
787 {
788 /* Illegal */
789 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
790 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
791 d80.s.abPairs[iPair] = RandU8();
792 }
793 else
794 {
795 /* Normal */
796 d80.s.uPad = 0;
797 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
798 {
799 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
800 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
801 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
802 }
803 }
804 return d80;
805}
806
807
808static const char *GenFormatR80(PCRTFLOAT80U plrd)
809{
810 if (RTFLOAT80U_IS_ZERO(plrd))
811 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
812 if (RTFLOAT80U_IS_INF(plrd))
813 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
814 if (RTFLOAT80U_IS_INDEFINITE(plrd))
815 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
816 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
817 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
818 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
819 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
820
821 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
822 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
823 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
824 return pszBuf;
825}
826
827static const char *GenFormatR64(PCRTFLOAT64U prd)
828{
829 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
830 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
831 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
832 return pszBuf;
833}
834
835
836static const char *GenFormatR32(PCRTFLOAT32U pr)
837{
838 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
839 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
840 return pszBuf;
841}
842
843
844static const char *GenFormatD80(PCRTPBCD80U pd80)
845{
846 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
847 size_t off;
848 if (pd80->s.uPad == 0)
849 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
850 else
851 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
852 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
853 while (iPair-- > 0)
854 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
855 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
856 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
857 pszBuf[off++] = ')';
858 pszBuf[off++] = '\0';
859 return pszBuf;
860}
861
862
863static const char *GenFormatI64(int64_t i64)
864{
865 if (i64 == INT64_MIN) /* This one is problematic */
866 return "INT64_MIN";
867 if (i64 == INT64_MAX)
868 return "INT64_MAX";
869 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
870 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
871 return pszBuf;
872}
873
874#if 0 /* unused */
875static const char *GenFormatI64(int64_t const *pi64)
876{
877 return GenFormatI64(*pi64);
878}
879#endif
880
881static const char *GenFormatI32(int32_t i32)
882{
883 if (i32 == INT32_MIN) /* This one is problematic */
884 return "INT32_MIN";
885 if (i32 == INT32_MAX)
886 return "INT32_MAX";
887 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
888 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
889 return pszBuf;
890}
891
892
893const char *GenFormatI32(int32_t const *pi32)
894{
895 return GenFormatI32(*pi32);
896}
897
898
899const char *GenFormatI16(int16_t i16)
900{
901 if (i16 == INT16_MIN) /* This one is problematic */
902 return "INT16_MIN";
903 if (i16 == INT16_MAX)
904 return "INT16_MAX";
905 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
906 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
907 return pszBuf;
908}
909
910
911const char *GenFormatI16(int16_t const *pi16)
912{
913 return GenFormatI16(*pi16);
914}
915
916
917static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
918{
919 /* We want to tag the generated source code with the revision that produced it. */
920 static char s_szRev[] = "$Revision: 103050 $";
921 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
922 size_t cchRev = 0;
923 while (RT_C_IS_DIGIT(pszRev[cchRev]))
924 cchRev++;
925
926 RTStrmPrintf(pOut,
927 "/* $Id: tstIEMAImpl.cpp 103050 2024-01-25 00:42:30Z vboxsync $ */\n"
928 "/** @file\n"
929 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
930 " */\n"
931 "\n"
932 "/*\n"
933 " * Copyright (C) 2022-" VBOX_C_YEAR " Oracle and/or its affiliates.\n"
934 " *\n"
935 " * This file is part of VirtualBox base platform packages, as\n"
936 " * available from https://www.virtualbox.org.\n"
937 " *\n"
938 " * This program is free software; you can redistribute it and/or\n"
939 " * modify it under the terms of the GNU General Public License\n"
940 " * as published by the Free Software Foundation, in version 3 of the\n"
941 " * License.\n"
942 " *\n"
943 " * This program is distributed in the hope that it will be useful, but\n"
944 " * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
945 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
946 " * General Public License for more details.\n"
947 " *\n"
948 " * You should have received a copy of the GNU General Public License\n"
949 " * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
950 " *\n"
951 " * SPDX-License-Identifier: GPL-3.0-only\n"
952 " */\n"
953 "\n"
954 "#include \"tstIEMAImpl.h\"\n"
955 "\n"
956 ,
957 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
958}
959
960
961static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
962{
963 PRTSTREAM pOut = NULL;
964 int rc = RTStrmOpen(pszFilename, "w", &pOut);
965 if (RT_SUCCESS(rc))
966 {
967 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
968 return pOut;
969 }
970 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
971 return NULL;
972}
973
974
975static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
976{
977 RTStrmPrintf(pOut,
978 "\n"
979 "/* end of file */\n");
980 int rc = RTStrmClose(pOut);
981 if (RT_SUCCESS(rc))
982 return rcExit;
983 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
984}
985
986
987static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
988{
989 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
990}
991
992
993static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
994{
995 RTStrmPrintf(pOut,
996 "};\n"
997 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
998 "\n",
999 pszName, pszName);
1000}
1001
1002
1003static bool GenerateBinaryOpen(PIEMBINARYOUTPUT pBinOut, const char *pszFilenameFmt, const char *pszName)
1004{
1005 pBinOut->hVfsFile = NIL_RTVFSFILE;
1006 pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1007 if (pszFilenameFmt)
1008 {
1009 pBinOut->fNull = false;
1010 if (RTStrPrintf2(pBinOut->szFilename, sizeof(pBinOut->szFilename), pszFilenameFmt, pszName) > 0)
1011 {
1012 RTMsgInfo("GenerateBinaryOpen: %s...\n", pBinOut->szFilename);
1013 pBinOut->rcWrite = RTVfsFileOpenNormal(pBinOut->szFilename,
1014 RTFILE_O_CREATE_REPLACE | RTFILE_O_WRITE | RTFILE_O_DENY_READWRITE,
1015 &pBinOut->hVfsFile);
1016 if (RT_SUCCESS(pBinOut->rcWrite))
1017 {
1018 RTVFSIOSTREAM hVfsIoFile = RTVfsFileToIoStream(pBinOut->hVfsFile);
1019 if (hVfsIoFile != NIL_RTVFSIOSTREAM)
1020 {
1021 pBinOut->rcWrite = RTZipGzipCompressIoStream(hVfsIoFile, 0 /*fFlags*/, 9, &pBinOut->hVfsUncompressed);
1022 RTVfsIoStrmRelease(hVfsIoFile);
1023 if (RT_SUCCESS(pBinOut->rcWrite))
1024 {
1025 pBinOut->rcWrite = VINF_SUCCESS;
1026 return true;
1027 }
1028
1029 RTMsgError("RTZipGzipCompressIoStream: %Rrc", pBinOut->rcWrite);
1030 }
1031 else
1032 {
1033 RTMsgError("RTVfsFileToIoStream failed!");
1034 pBinOut->rcWrite = VERR_VFS_CHAIN_CAST_FAILED;
1035 }
1036 RTVfsFileRelease(pBinOut->hVfsFile);
1037 RTFileDelete(pBinOut->szFilename);
1038 }
1039 else
1040 RTMsgError("Failed to open '%s' for writing: %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1041 }
1042 else
1043 {
1044 RTMsgError("filename too long: %s + %s", pszFilenameFmt, pszName);
1045 pBinOut->rcWrite = VERR_BUFFER_OVERFLOW;
1046 }
1047 return false;
1048 }
1049 RTMsgInfo("GenerateBinaryOpen: %s -> /dev/null\n", pszName);
1050 pBinOut->rcWrite = VERR_IGNORED;
1051 pBinOut->fNull = true;
1052 pBinOut->szFilename[0] = '\0';
1053 return true;
1054}
1055
1056
1057static void GenerateBinaryWrite(PIEMBINARYOUTPUT pBinOut, const void *pvData, size_t cbData)
1058{
1059 if (RT_SUCCESS_NP(pBinOut->rcWrite))
1060 {
1061 pBinOut->rcWrite = RTVfsIoStrmWrite(pBinOut->hVfsUncompressed, pvData, cbData, true /*fBlocking*/, NULL);
1062 if (RT_SUCCESS(pBinOut->rcWrite))
1063 return;
1064 RTMsgError("Error writing '%s': %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1065 }
1066}
1067
1068
1069static bool GenerateBinaryClose(PIEMBINARYOUTPUT pBinOut)
1070{
1071 if (!pBinOut->fNull)
1072 {
1073 /* This is rather jovial about rcWrite. */
1074 int const rc1 = RTVfsIoStrmFlush(pBinOut->hVfsUncompressed);
1075 RTVfsIoStrmRelease(pBinOut->hVfsUncompressed);
1076 pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1077 if (RT_FAILURE(rc1))
1078 RTMsgError("Error flushing '%s' (uncompressed stream): %Rrc", pBinOut->szFilename, rc1);
1079
1080 int const rc2 = RTVfsFileFlush(pBinOut->hVfsFile);
1081 RTVfsFileRelease(pBinOut->hVfsFile);
1082 pBinOut->hVfsFile = NIL_RTVFSFILE;
1083 if (RT_FAILURE(rc2))
1084 RTMsgError("Error flushing '%s' (compressed file): %Rrc", pBinOut->szFilename, rc2);
1085
1086 return RT_SUCCESS(rc2) && RT_SUCCESS(rc1) && RT_SUCCESS(pBinOut->rcWrite);
1087 }
1088 return true;
1089}
1090
1091
1092#endif /* TSTIEMAIMPL_WITH_GENERATOR */
1093
1094
1095/*
1096 * Test helpers.
1097 */
1098static bool IsTestEnabled(const char *pszName)
1099{
1100 /* Process excludes first: */
1101 uint32_t i = g_cExcludeTestPatterns;
1102 while (i-- > 0)
1103 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1104 return false;
1105
1106 /* If no include patterns, everything is included: */
1107 i = g_cIncludeTestPatterns;
1108 if (!i)
1109 return true;
1110
1111 /* Otherwise only tests in the include patters gets tested: */
1112 while (i-- > 0)
1113 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1114 return true;
1115
1116 return false;
1117}
1118
1119
1120static bool SubTestAndCheckIfEnabled(const char *pszName)
1121{
1122 RTTestSub(g_hTest, pszName);
1123 if (IsTestEnabled(pszName))
1124 return true;
1125 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
1126 return false;
1127}
1128
1129
1130/** Decompresses test data before use as required. */
1131static int DecompressBinaryTest(bool *pfCompressed, void **ppvTests, uint32_t const **ppcTests, size_t cbEntry, bool fBinary)
1132{
1133 if (!*pfCompressed)
1134 return VINF_SUCCESS;
1135
1136 /* Open a memory stream for the compressed binary data. */
1137 uint32_t const cbCompressed = **ppcTests;
1138 RTVFSIOSTREAM hVfsIos = NIL_RTVFSIOSTREAM;
1139 int rc = RTVfsIoStrmFromBuffer(RTFILE_O_READ, *ppvTests, cbCompressed, &hVfsIos);
1140 RTTESTI_CHECK_RC_OK_RET(rc, rc);
1141
1142 /* Open a decompressed stream for it. */
1143 RTVFSIOSTREAM hVfsIosDecomp = NIL_RTVFSIOSTREAM;
1144 rc = RTZipGzipDecompressIoStream(hVfsIos, RTZIPGZIPDECOMP_F_ALLOW_ZLIB_HDR, &hVfsIosDecomp);
1145 RTTESTI_CHECK_RC_OK(rc);
1146 if (RT_SUCCESS(rc))
1147 {
1148 /* Initial output buffer allocation. */
1149 size_t cbDecompressedAlloc = cbCompressed <= _16M ? (size_t)cbCompressed * 16 : (size_t)cbCompressed * 4;
1150 uint8_t *pbDecompressed = (uint8_t *)RTMemAllocZ(cbDecompressedAlloc);
1151 if (pbDecompressed)
1152 {
1153 size_t off = 0;
1154 for (;;)
1155 {
1156 size_t cbRead = 0;
1157 rc = RTVfsIoStrmRead(hVfsIosDecomp, &pbDecompressed[off], cbDecompressedAlloc - off, true /*fBlocking*/, &cbRead);
1158 if (RT_FAILURE(rc))
1159 break;
1160 if (rc == VINF_EOF && cbRead == 0)
1161 break;
1162 off += cbRead;
1163
1164 if (cbDecompressedAlloc < off + 256)
1165 {
1166 size_t const cbNew = cbDecompressedAlloc < _128M ? cbDecompressedAlloc * 2 : cbDecompressedAlloc + _32M;
1167 void * const pvNew = RTMemRealloc(pbDecompressed, cbNew);
1168 AssertBreakStmt(pvNew, rc = VERR_NO_MEMORY);
1169 cbDecompressedAlloc = cbNew;
1170 pbDecompressed = (uint8_t *)pvNew;
1171 }
1172 }
1173 if (RT_SUCCESS(rc))
1174 {
1175 if ((off % cbEntry) == 0)
1176 {
1177 if (cbDecompressedAlloc - off > _512K)
1178 {
1179 void * const pvNew = RTMemRealloc(pbDecompressed, off);
1180 if (pvNew)
1181 pbDecompressed = (uint8_t *)pvNew;
1182 }
1183 uint32_t *pcTests = (uint32_t *)RTMemAlloc(sizeof(uint32_t));
1184 if (pcTests)
1185 {
1186 /* Done! */
1187 *pcTests = (uint32_t)(fBinary ? off : off / cbEntry);
1188 *ppvTests = pbDecompressed;
1189 *ppcTests = pcTests;
1190 *pfCompressed = false;
1191
1192 pbDecompressed = NULL;
1193 rc = VINF_SUCCESS;
1194 }
1195 else
1196 {
1197 RTTestIFailed("Out of memory decompressing test data (uint32_t)");
1198 rc = VERR_NO_MEMORY;
1199 }
1200 }
1201 else
1202 {
1203 RTTestIFailed("Uneven decompressed data size: %#zx vs entry size %#zx -> %#zx", off, cbEntry, off % cbEntry);
1204 rc = VERR_IO_BAD_LENGTH;
1205 }
1206 }
1207 else
1208 RTTestIFailed("Failed to decompress binary stream: %Rrc (off=%#zx, cbCompressed=%#x)", rc, off, cbCompressed);
1209 RTMemFree(pbDecompressed);
1210 }
1211 else
1212 {
1213 RTTestIFailed("Out of memory decompressing test data");
1214 rc = VERR_NO_MEMORY;
1215 }
1216 RTVfsIoStrmRelease(hVfsIosDecomp);
1217 }
1218 RTVfsIoStrmRelease(hVfsIos);
1219 return rc;
1220}
1221
1222#define DECOMPRESS_TESTS(a_Entry) \
1223 RT_SUCCESS(DecompressBinaryTest(&(a_Entry).fCompressed, (void **)&(a_Entry).paTests, &(a_Entry).pcTests, \
1224 sizeof((a_Entry).paTests[0]), (a_Entry).fBinary))
1225
1226
1227/** Decompresses test data before use as required. */
1228static int SubTestAndCheckIfEnabledAndDecompress(const char *pszName, size_t cbEntry, bool fBinary,
1229 bool *pfCompressed, void **ppvTests, uint32_t const **ppcTests)
1230{
1231 if (SubTestAndCheckIfEnabled(pszName))
1232 {
1233 int const rc = DecompressBinaryTest(pfCompressed, ppvTests, ppcTests, cbEntry, fBinary);
1234 if (RT_SUCCESS(rc))
1235 return true;
1236 }
1237 return false;
1238}
1239
1240#define SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_Entry) \
1241 SubTestAndCheckIfEnabledAndDecompress((a_Entry).pszName, sizeof((a_Entry).paTests[0]), (a_Entry).fBinary, \
1242 &(a_Entry).fCompressed, (void **)&(a_Entry).paTests, &(a_Entry).pcTests)
1243
1244
1245static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
1246{
1247 if (fActual == fExpected)
1248 return "";
1249
1250 uint32_t const fXor = fActual ^ fExpected;
1251 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1252 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1253
1254 static struct
1255 {
1256 const char *pszName;
1257 uint32_t fFlag;
1258 } const s_aFlags[] =
1259 {
1260#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
1261 EFL_ENTRY(CF),
1262 EFL_ENTRY(PF),
1263 EFL_ENTRY(AF),
1264 EFL_ENTRY(ZF),
1265 EFL_ENTRY(SF),
1266 EFL_ENTRY(TF),
1267 EFL_ENTRY(IF),
1268 EFL_ENTRY(DF),
1269 EFL_ENTRY(OF),
1270 EFL_ENTRY(IOPL),
1271 EFL_ENTRY(NT),
1272 EFL_ENTRY(RF),
1273 EFL_ENTRY(VM),
1274 EFL_ENTRY(AC),
1275 EFL_ENTRY(VIF),
1276 EFL_ENTRY(VIP),
1277 EFL_ENTRY(ID),
1278 };
1279 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1280 if (s_aFlags[i].fFlag & fXor)
1281 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1282 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1283 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1284 return pszBuf;
1285}
1286
1287
1288static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
1289{
1290 if (fActual == fExpected)
1291 return "";
1292
1293 uint16_t const fXor = fActual ^ fExpected;
1294 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1295 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1296
1297 static struct
1298 {
1299 const char *pszName;
1300 uint32_t fFlag;
1301 } const s_aFlags[] =
1302 {
1303#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1304 FSW_ENTRY(IE),
1305 FSW_ENTRY(DE),
1306 FSW_ENTRY(ZE),
1307 FSW_ENTRY(OE),
1308 FSW_ENTRY(UE),
1309 FSW_ENTRY(PE),
1310 FSW_ENTRY(SF),
1311 FSW_ENTRY(ES),
1312 FSW_ENTRY(C0),
1313 FSW_ENTRY(C1),
1314 FSW_ENTRY(C2),
1315 FSW_ENTRY(C3),
1316 FSW_ENTRY(B),
1317 };
1318 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1319 if (s_aFlags[i].fFlag & fXor)
1320 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1321 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1322 if (fXor & X86_FSW_TOP_MASK)
1323 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1324 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1325#if 0 /* For debugging fprem & fprem1 */
1326 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1327 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1328#endif
1329 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1330 return pszBuf;
1331}
1332
1333
1334static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1335{
1336 if (fActual == fExpected)
1337 return "";
1338
1339 uint16_t const fXor = fActual ^ fExpected;
1340 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1341 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1342
1343 static struct
1344 {
1345 const char *pszName;
1346 uint32_t fFlag;
1347 } const s_aFlags[] =
1348 {
1349#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1350 MXCSR_ENTRY(IE),
1351 MXCSR_ENTRY(DE),
1352 MXCSR_ENTRY(ZE),
1353 MXCSR_ENTRY(OE),
1354 MXCSR_ENTRY(UE),
1355 MXCSR_ENTRY(PE),
1356
1357 MXCSR_ENTRY(IM),
1358 MXCSR_ENTRY(DM),
1359 MXCSR_ENTRY(ZM),
1360 MXCSR_ENTRY(OM),
1361 MXCSR_ENTRY(UM),
1362 MXCSR_ENTRY(PM),
1363
1364 MXCSR_ENTRY(DAZ),
1365 MXCSR_ENTRY(FZ),
1366#undef MXCSR_ENTRY
1367 };
1368 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1369 if (s_aFlags[i].fFlag & fXor)
1370 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1371 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1372 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1373 return pszBuf;
1374}
1375
1376
1377static const char *FormatFcw(uint16_t fFcw)
1378{
1379 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1380
1381 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1382 switch (fFcw & X86_FCW_PC_MASK)
1383 {
1384 case X86_FCW_PC_24: pszPC = "PC24"; break;
1385 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1386 case X86_FCW_PC_53: pszPC = "PC53"; break;
1387 case X86_FCW_PC_64: pszPC = "PC64"; break;
1388 }
1389
1390 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1391 switch (fFcw & X86_FCW_RC_MASK)
1392 {
1393 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1394 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1395 case X86_FCW_RC_UP: pszRC = "UP"; break;
1396 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1397 }
1398 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1399
1400 static struct
1401 {
1402 const char *pszName;
1403 uint32_t fFlag;
1404 } const s_aFlags[] =
1405 {
1406#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1407 FCW_ENTRY(IM),
1408 FCW_ENTRY(DM),
1409 FCW_ENTRY(ZM),
1410 FCW_ENTRY(OM),
1411 FCW_ENTRY(UM),
1412 FCW_ENTRY(PM),
1413 { "6M", 64 },
1414 };
1415 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1416 if (fFcw & s_aFlags[i].fFlag)
1417 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1418
1419 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1420 return pszBuf;
1421}
1422
1423
1424static const char *FormatMxcsr(uint32_t fMxcsr)
1425{
1426 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1427
1428 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1429 switch (fMxcsr & X86_MXCSR_RC_MASK)
1430 {
1431 case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1432 case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1433 case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1434 case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1435 }
1436
1437 const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1438 const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1439 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1440
1441 static struct
1442 {
1443 const char *pszName;
1444 uint32_t fFlag;
1445 } const s_aFlags[] =
1446 {
1447#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1448 MXCSR_ENTRY(IE),
1449 MXCSR_ENTRY(DE),
1450 MXCSR_ENTRY(ZE),
1451 MXCSR_ENTRY(OE),
1452 MXCSR_ENTRY(UE),
1453 MXCSR_ENTRY(PE),
1454
1455 MXCSR_ENTRY(IM),
1456 MXCSR_ENTRY(DM),
1457 MXCSR_ENTRY(ZM),
1458 MXCSR_ENTRY(OM),
1459 MXCSR_ENTRY(UM),
1460 MXCSR_ENTRY(PM),
1461 { "6M", 64 },
1462 };
1463 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1464 if (fMxcsr & s_aFlags[i].fFlag)
1465 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1466
1467 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1468 return pszBuf;
1469}
1470
1471
1472static const char *FormatR80(PCRTFLOAT80U pr80)
1473{
1474 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1475 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1476 return pszBuf;
1477}
1478
1479
1480static const char *FormatR64(PCRTFLOAT64U pr64)
1481{
1482 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1483 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1484 return pszBuf;
1485}
1486
1487
1488static const char *FormatR32(PCRTFLOAT32U pr32)
1489{
1490 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1491 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1492 return pszBuf;
1493}
1494
1495
1496static const char *FormatD80(PCRTPBCD80U pd80)
1497{
1498 /* There is only one indefinite endcoding (same as for 80-bit
1499 floating point), so get it out of the way first: */
1500 if (RTPBCD80U_IS_INDEFINITE(pd80))
1501 return "Ind";
1502
1503 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1504 size_t off = 0;
1505 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1506 unsigned cBadDigits = 0;
1507 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1508 while (iPair-- > 0)
1509 {
1510 static const char s_szDigits[] = "0123456789abcdef";
1511 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1512 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1513 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1514 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1515 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1516 }
1517 if (cBadDigits || pd80->s.uPad != 0)
1518 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1519 pszBuf[off] = '\0';
1520 return pszBuf;
1521}
1522
1523
1524#if 0
1525static const char *FormatI64(int64_t const *piVal)
1526{
1527 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1528 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1529 return pszBuf;
1530}
1531#endif
1532
1533
1534static const char *FormatI32(int32_t const *piVal)
1535{
1536 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1537 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1538 return pszBuf;
1539}
1540
1541
1542static const char *FormatI16(int16_t const *piVal)
1543{
1544 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1545 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1546 return pszBuf;
1547}
1548
1549
1550static const char *FormatU128(PCRTUINT128U puVal)
1551{
1552 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1553 RTStrFormatU128(pszBuf, sizeof(g_aszBuf[0]), puVal, 16, 0, 0, RTSTR_F_SPECIAL);
1554 return pszBuf;
1555}
1556
1557
1558/*
1559 * Binary operations.
1560 */
1561TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1562TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1563TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1564TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1565
1566#ifdef TSTIEMAIMPL_WITH_GENERATOR
1567# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1568static RTEXITCODE BinU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
1569{ \
1570 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1571 { \
1572 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1573 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1574 IEMBINARYOUTPUT BinOut; \
1575 if ( g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1576 && g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1577 continue; \
1578 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[g_aBinU ## a_cBits[iFn].idxCpuEflFlavour], \
1579 g_aBinU ## a_cBits[iFn].pszName), RTEXITCODE_FAILURE); \
1580 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1581 { \
1582 a_TestType Test; \
1583 Test.fEflIn = RandEFlags(); \
1584 Test.fEflOut = Test.fEflIn; \
1585 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1586 Test.uDstOut = Test.uDstIn; \
1587 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1588 if (g_aBinU ## a_cBits[iFn].uExtra) \
1589 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1590 Test.uMisc = 0; \
1591 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1592 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1593 } \
1594 for (uint32_t iTest = 0; iTest < g_aBinU ## a_cBits[iFn].cFixedTests; iTest++ ) \
1595 { \
1596 a_TestType Test; \
1597 Test.fEflIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
1598 : g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn; \
1599 Test.fEflOut = Test.fEflIn; \
1600 Test.uDstIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uDstIn; \
1601 Test.uDstOut = Test.uDstIn; \
1602 Test.uSrcIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uSrcIn; \
1603 Test.uMisc = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uMisc; \
1604 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1605 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1606 } \
1607 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1608 } \
1609 return RTEXITCODE_SUCCESS; \
1610} \
1611/* Temp for conversion. */ \
1612static RTEXITCODE BinU ## a_cBits ## DumpAll(const char * const * papszNameFmts) \
1613{ \
1614 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1615 { \
1616 AssertReturn(DECOMPRESS_TESTS(g_aBinU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
1617 IEMBINARYOUTPUT BinOut; \
1618 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[g_aBinU ## a_cBits[iFn].idxCpuEflFlavour], \
1619 g_aBinU ## a_cBits[iFn].pszName), RTEXITCODE_FAILURE); \
1620 size_t cbTests = g_aBinU ## a_cBits[iFn].pcTests[0]; \
1621 if (!g_aBinU ## a_cBits[iFn].fBinary) \
1622 cbTests *= sizeof(g_aBinU ## a_cBits[iFn].paTests[0]); \
1623 GenerateBinaryWrite(&BinOut, g_aBinU ## a_cBits[iFn].paTests, cbTests); \
1624 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1625 } \
1626 return RTEXITCODE_SUCCESS; \
1627}
1628
1629#else
1630# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1631#endif
1632
1633
1634/** Based on a quick probe run, guess how long to run the benchmark. */
1635static uint32_t EstimateIterations(uint32_t cProbeIterations, uint64_t cNsProbe)
1636{
1637 uint64_t cPicoSecPerIteration = cNsProbe * 1000 / cProbeIterations;
1638 uint64_t cIterations = g_cPicoSecBenchmark / cPicoSecPerIteration;
1639 if (cIterations > _2G)
1640 return _2G;
1641 if (cIterations < _4K)
1642 return _4K;
1643 return RT_ALIGN_32((uint32_t)cIterations, _4K);
1644}
1645
1646
1647#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1648GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1649\
1650static uint64_t BinU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLBINU ## a_cBits pfn, a_TestType const *pEntry) \
1651{ \
1652 uint32_t const fEflIn = pEntry->fEflIn; \
1653 a_uType const uDstIn = pEntry->uDstIn; \
1654 a_uType const uSrcIn = pEntry->uSrcIn; \
1655 cIterations /= 4; \
1656 RTThreadYield(); \
1657 uint64_t const nsStart = RTTimeNanoTS(); \
1658 for (uint32_t i = 0; i < cIterations; i++) \
1659 { \
1660 uint32_t fBenchEfl = fEflIn; \
1661 a_uType uBenchDst = uDstIn; \
1662 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1663 \
1664 fBenchEfl = fEflIn; \
1665 uBenchDst = uDstIn; \
1666 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1667 \
1668 fBenchEfl = fEflIn; \
1669 uBenchDst = uDstIn; \
1670 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1671 \
1672 fBenchEfl = fEflIn; \
1673 uBenchDst = uDstIn; \
1674 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1675 } \
1676 return RTTimeNanoTS() - nsStart; \
1677} \
1678\
1679static void BinU ## a_cBits ## Test(void) \
1680{ \
1681 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1682 { \
1683 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
1684 continue; \
1685 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1686 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1687 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1688 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1689 if (!cTests) { RTTestSkipped(g_hTest, "no tests"); continue; } \
1690 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1691 { \
1692 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1693 { \
1694 uint32_t fEfl = paTests[iTest].fEflIn; \
1695 a_uType uDst = paTests[iTest].uDstIn; \
1696 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1697 if ( uDst != paTests[iTest].uDstOut \
1698 || fEfl != paTests[iTest].fEflOut ) \
1699 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1700 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1701 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1702 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1703 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1704 else \
1705 { \
1706 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1707 *g_pfEfl = paTests[iTest].fEflIn; \
1708 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1709 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1710 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1711 } \
1712 } \
1713 \
1714 /* Benchmark if all succeeded. */ \
1715 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
1716 { \
1717 uint32_t const iTest = cTests / 2; \
1718 uint32_t const cIterations = EstimateIterations(_64K, BinU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
1719 uint64_t const cNsRealRun = BinU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
1720 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
1721 "%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
1722 } \
1723 \
1724 /* Next variation is native. */ \
1725 pfn = a_aSubTests[iFn].pfnNative; \
1726 } \
1727 } \
1728}
1729
1730
1731/*
1732 * 8-bit binary operations.
1733 */
1734static BINU8_T g_aBinU8[] =
1735{
1736 ENTRY(add_u8),
1737 ENTRY(add_u8_locked),
1738 ENTRY(adc_u8),
1739 ENTRY(adc_u8_locked),
1740 ENTRY(sub_u8),
1741 ENTRY(sub_u8_locked),
1742 ENTRY(sbb_u8),
1743 ENTRY(sbb_u8_locked),
1744 ENTRY(or_u8),
1745 ENTRY(or_u8_locked),
1746 ENTRY(xor_u8),
1747 ENTRY(xor_u8_locked),
1748 ENTRY(and_u8),
1749 ENTRY(and_u8_locked),
1750 ENTRY_PFN_CAST(cmp_u8, PFNIEMAIMPLBINU8),
1751 ENTRY_PFN_CAST(test_u8, PFNIEMAIMPLBINU8),
1752};
1753TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1754
1755
1756/*
1757 * 16-bit binary operations.
1758 */
1759#ifdef TSTIEMAIMPL_WITH_GENERATOR
1760static const BINU16_TEST_T g_aFixedTests_add_u16[] =
1761{
1762 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1763 { UINT32_MAX, 0, 1, 0, UINT16_MAX, 0 },
1764};
1765#endif
1766static BINU16_T g_aBinU16[] =
1767{
1768 ENTRY_FIX(add_u16),
1769 ENTRY(add_u16_locked),
1770 ENTRY(adc_u16),
1771 ENTRY(adc_u16_locked),
1772 ENTRY(sub_u16),
1773 ENTRY(sub_u16_locked),
1774 ENTRY(sbb_u16),
1775 ENTRY(sbb_u16_locked),
1776 ENTRY(or_u16),
1777 ENTRY(or_u16_locked),
1778 ENTRY(xor_u16),
1779 ENTRY(xor_u16_locked),
1780 ENTRY(and_u16),
1781 ENTRY(and_u16_locked),
1782 ENTRY_PFN_CAST(cmp_u16, PFNIEMAIMPLBINU16),
1783 ENTRY_PFN_CAST(test_u16, PFNIEMAIMPLBINU16),
1784 ENTRY_PFN_CAST_EX(bt_u16, PFNIEMAIMPLBINU16, 1),
1785 ENTRY_EX(btc_u16, 1),
1786 ENTRY_EX(btc_u16_locked, 1),
1787 ENTRY_EX(btr_u16, 1),
1788 ENTRY_EX(btr_u16_locked, 1),
1789 ENTRY_EX(bts_u16, 1),
1790 ENTRY_EX(bts_u16_locked, 1),
1791 ENTRY_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1792 ENTRY_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1793 ENTRY_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1794 ENTRY_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1795 ENTRY_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1796 ENTRY_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1797 ENTRY(arpl),
1798};
1799TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1800
1801
1802/*
1803 * 32-bit binary operations.
1804 */
1805#ifdef TSTIEMAIMPL_WITH_GENERATOR
1806static const BINU32_TEST_T g_aFixedTests_add_u32[] =
1807{
1808 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1809 { UINT32_MAX, 0, 1, 0, UINT32_MAX, 0 },
1810};
1811#endif
1812static BINU32_T g_aBinU32[] =
1813{
1814 ENTRY_FIX(add_u32),
1815 ENTRY(add_u32_locked),
1816 ENTRY(adc_u32),
1817 ENTRY(adc_u32_locked),
1818 ENTRY(sub_u32),
1819 ENTRY(sub_u32_locked),
1820 ENTRY(sbb_u32),
1821 ENTRY(sbb_u32_locked),
1822 ENTRY(or_u32),
1823 ENTRY(or_u32_locked),
1824 ENTRY(xor_u32),
1825 ENTRY(xor_u32_locked),
1826 ENTRY(and_u32),
1827 ENTRY(and_u32_locked),
1828 ENTRY_PFN_CAST(cmp_u32, PFNIEMAIMPLBINU32),
1829 ENTRY_PFN_CAST(test_u32, PFNIEMAIMPLBINU32),
1830 ENTRY_PFN_CAST_EX(bt_u32, PFNIEMAIMPLBINU32, 1),
1831 ENTRY_EX(btc_u32, 1),
1832 ENTRY_EX(btc_u32_locked, 1),
1833 ENTRY_EX(btr_u32, 1),
1834 ENTRY_EX(btr_u32_locked, 1),
1835 ENTRY_EX(bts_u32, 1),
1836 ENTRY_EX(bts_u32_locked, 1),
1837 ENTRY_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1838 ENTRY_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1839 ENTRY_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1840 ENTRY_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1841 ENTRY_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1842 ENTRY_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1843 ENTRY(adcx_u32),
1844 ENTRY(adox_u32),
1845};
1846TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1847
1848
1849/*
1850 * 64-bit binary operations.
1851 */
1852#ifdef TSTIEMAIMPL_WITH_GENERATOR
1853static const BINU64_TEST_T g_aFixedTests_add_u64[] =
1854{
1855 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1856 { UINT32_MAX, 0, 1, 0, UINT64_MAX, 0 },
1857};
1858#endif
1859static BINU64_T g_aBinU64[] =
1860{
1861 ENTRY_FIX(add_u64),
1862 ENTRY(add_u64_locked),
1863 ENTRY(adc_u64),
1864 ENTRY(adc_u64_locked),
1865 ENTRY(sub_u64),
1866 ENTRY(sub_u64_locked),
1867 ENTRY(sbb_u64),
1868 ENTRY(sbb_u64_locked),
1869 ENTRY(or_u64),
1870 ENTRY(or_u64_locked),
1871 ENTRY(xor_u64),
1872 ENTRY(xor_u64_locked),
1873 ENTRY(and_u64),
1874 ENTRY(and_u64_locked),
1875 ENTRY_PFN_CAST(cmp_u64, PFNIEMAIMPLBINU64),
1876 ENTRY_PFN_CAST(test_u64, PFNIEMAIMPLBINU64),
1877 ENTRY_PFN_CAST_EX(bt_u64, PFNIEMAIMPLBINU64, 1),
1878 ENTRY_EX(btc_u64, 1),
1879 ENTRY_EX(btc_u64_locked, 1),
1880 ENTRY_EX(btr_u64, 1),
1881 ENTRY_EX(btr_u64_locked, 1),
1882 ENTRY_EX(bts_u64, 1),
1883 ENTRY_EX(bts_u64_locked, 1),
1884 ENTRY_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1885 ENTRY_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1886 ENTRY_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1887 ENTRY_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1888 ENTRY_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1889 ENTRY_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1890 ENTRY(adcx_u64),
1891 ENTRY(adox_u64),
1892};
1893TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1894
1895
1896/*
1897 * XCHG
1898 */
1899static void XchgTest(void)
1900{
1901 if (!SubTestAndCheckIfEnabled("xchg"))
1902 return;
1903 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1904 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1905 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1906 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1907
1908 static struct
1909 {
1910 uint8_t cb; uint64_t fMask;
1911 union
1912 {
1913 uintptr_t pfn;
1914 FNIEMAIMPLXCHGU8 *pfnU8;
1915 FNIEMAIMPLXCHGU16 *pfnU16;
1916 FNIEMAIMPLXCHGU32 *pfnU32;
1917 FNIEMAIMPLXCHGU64 *pfnU64;
1918 } u;
1919 }
1920 s_aXchgWorkers[] =
1921 {
1922 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1923 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1924 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1925 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1926 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1927 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1928 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1929 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1930 };
1931 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1932 {
1933 RTUINT64U uIn1, uIn2, uMem, uDst;
1934 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1935 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1936 if (uIn1.u == uIn2.u)
1937 uDst.u = uIn2.u = ~uIn2.u;
1938
1939 switch (s_aXchgWorkers[i].cb)
1940 {
1941 case 1:
1942 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1943 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1944 break;
1945 case 2:
1946 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1947 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1948 break;
1949 case 4:
1950 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1951 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1952 break;
1953 case 8:
1954 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1955 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1956 break;
1957 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1958 }
1959
1960 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1961 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1962 }
1963}
1964
1965
1966/*
1967 * XADD
1968 */
1969static void XaddTest(void)
1970{
1971#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1972 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1973 static struct \
1974 { \
1975 const char *pszName; \
1976 FNIEMAIMPLXADDU ## a_cBits *pfn; \
1977 BINU ## a_cBits ## _TEST_T const *paTests; \
1978 uint32_t const *pcTests; \
1979 } const s_aFuncs[] = \
1980 { \
1981 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1982 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1983 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1984 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1985 }; \
1986 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1987 { \
1988 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1989 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1990 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1991 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1992 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1993 { \
1994 uint32_t fEfl = paTests[iTest].fEflIn; \
1995 a_Type uSrc = paTests[iTest].uSrcIn; \
1996 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1997 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1998 if ( fEfl != paTests[iTest].fEflOut \
1999 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
2000 || uSrc != paTests[iTest].uDstIn) \
2001 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2002 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
2003 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
2004 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2005 } \
2006 } \
2007 } while(0)
2008 TEST_XADD(8, uint8_t, "%#04x");
2009 TEST_XADD(16, uint16_t, "%#06x");
2010 TEST_XADD(32, uint32_t, "%#010RX32");
2011 TEST_XADD(64, uint64_t, "%#010RX64");
2012}
2013
2014
2015/*
2016 * CMPXCHG
2017 */
2018
2019static void CmpXchgTest(void)
2020{
2021#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
2022 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
2023 static struct \
2024 { \
2025 const char *pszName; \
2026 FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
2027 PFNIEMAIMPLBINU ## a_cBits pfnSub; \
2028 BINU ## a_cBits ## _TEST_T const *paTests; \
2029 uint32_t const *pcTests; \
2030 } const s_aFuncs[] = \
2031 { \
2032 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
2033 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
2034 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
2035 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
2036 }; \
2037 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
2038 { \
2039 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
2040 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
2041 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
2042 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2043 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2044 { \
2045 /* as is (99% likely to be negative). */ \
2046 uint32_t fEfl = paTests[iTest].fEflIn; \
2047 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
2048 a_Type uA = paTests[iTest].uDstIn; \
2049 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
2050 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
2051 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2052 if ( fEfl != paTests[iTest].fEflOut \
2053 || *g_pu ## a_cBits != uExpect \
2054 || uA != paTests[iTest].uSrcIn) \
2055 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2056 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
2057 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
2058 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2059 /* positive */ \
2060 uint32_t fEflExpect = paTests[iTest].fEflIn; \
2061 uA = paTests[iTest].uDstIn; \
2062 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
2063 fEfl = paTests[iTest].fEflIn; \
2064 uA = paTests[iTest].uDstIn; \
2065 *g_pu ## a_cBits = uA; \
2066 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2067 if ( fEfl != fEflExpect \
2068 || *g_pu ## a_cBits != uNew \
2069 || uA != paTests[iTest].uDstIn) \
2070 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2071 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
2072 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
2073 EFlagsDiff(fEfl, fEflExpect)); \
2074 } \
2075 } \
2076 } while(0)
2077 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
2078 TEST_CMPXCHG(16, uint16_t, "%#06x");
2079 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
2080#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
2081 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
2082#endif
2083}
2084
2085static void CmpXchg8bTest(void)
2086{
2087 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
2088 static struct
2089 {
2090 const char *pszName;
2091 FNIEMAIMPLCMPXCHG8B *pfn;
2092 } const s_aFuncs[] =
2093 {
2094 { "cmpxchg8b", iemAImpl_cmpxchg8b },
2095 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
2096 };
2097 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2098 {
2099 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2100 continue;
2101 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2102 {
2103 uint64_t const uOldValue = RandU64();
2104 uint64_t const uNewValue = RandU64();
2105
2106 /* positive test. */
2107 RTUINT64U uA, uB;
2108 uB.u = uNewValue;
2109 uA.u = uOldValue;
2110 *g_pu64 = uOldValue;
2111 uint32_t fEflIn = RandEFlags();
2112 uint32_t fEfl = fEflIn;
2113 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2114 if ( fEfl != (fEflIn | X86_EFL_ZF)
2115 || *g_pu64 != uNewValue
2116 || uA.u != uOldValue)
2117 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2118 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
2119 fEfl, *g_pu64, uA.u,
2120 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
2121 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2122
2123 /* negative */
2124 uint64_t const uExpect = ~uOldValue;
2125 *g_pu64 = uExpect;
2126 uA.u = uOldValue;
2127 uB.u = uNewValue;
2128 fEfl = fEflIn = RandEFlags();
2129 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2130 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2131 || *g_pu64 != uExpect
2132 || uA.u != uExpect)
2133 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2134 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
2135 fEfl, *g_pu64, uA.u,
2136 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2137 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2138 }
2139 }
2140}
2141
2142static void CmpXchg16bTest(void)
2143{
2144 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
2145 static struct
2146 {
2147 const char *pszName;
2148 FNIEMAIMPLCMPXCHG16B *pfn;
2149 } const s_aFuncs[] =
2150 {
2151 { "cmpxchg16b", iemAImpl_cmpxchg16b },
2152 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
2153#if !defined(RT_ARCH_ARM64)
2154 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
2155#endif
2156 };
2157 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2158 {
2159 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2160 continue;
2161#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
2162 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
2163 {
2164 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
2165 continue;
2166 }
2167#endif
2168 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2169 {
2170 RTUINT128U const uOldValue = RandU128();
2171 RTUINT128U const uNewValue = RandU128();
2172
2173 /* positive test. */
2174 RTUINT128U uA, uB;
2175 uB = uNewValue;
2176 uA = uOldValue;
2177 *g_pu128 = uOldValue;
2178 uint32_t fEflIn = RandEFlags();
2179 uint32_t fEfl = fEflIn;
2180 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2181 if ( fEfl != (fEflIn | X86_EFL_ZF)
2182 || g_pu128->s.Lo != uNewValue.s.Lo
2183 || g_pu128->s.Hi != uNewValue.s.Hi
2184 || uA.s.Lo != uOldValue.s.Lo
2185 || uA.s.Hi != uOldValue.s.Hi)
2186 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2187 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2188 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2189 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2190 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2191 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
2192 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
2193 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2194
2195 /* negative */
2196 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
2197 *g_pu128 = uExpect;
2198 uA = uOldValue;
2199 uB = uNewValue;
2200 fEfl = fEflIn = RandEFlags();
2201 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2202 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2203 || g_pu128->s.Lo != uExpect.s.Lo
2204 || g_pu128->s.Hi != uExpect.s.Hi
2205 || uA.s.Lo != uExpect.s.Lo
2206 || uA.s.Hi != uExpect.s.Hi)
2207 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2208 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2209 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2210 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2211 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2212 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
2213 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2214 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2215 }
2216 }
2217}
2218
2219
2220/*
2221 * Double shifts.
2222 *
2223 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
2224 */
2225#ifdef TSTIEMAIMPL_WITH_GENERATOR
2226# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2227static RTEXITCODE ShiftDblU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2228{ \
2229 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2230 { \
2231 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2232 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2233 continue; \
2234 IEMBINARYOUTPUT BinOut; \
2235 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[a_aSubTests[iFn].idxCpuEflFlavour], a_aSubTests[iFn].pszName), \
2236 RTEXITCODE_FAILURE); \
2237 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2238 { \
2239 a_TestType Test; \
2240 Test.fEflIn = RandEFlags(); \
2241 Test.fEflOut = Test.fEflIn; \
2242 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2243 Test.uDstOut = Test.uDstIn; \
2244 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2245 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2246 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
2247 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2248 } \
2249 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2250 } \
2251 return RTEXITCODE_SUCCESS; \
2252} \
2253static RTEXITCODE ShiftDblU ## a_cBits ## DumpAll(const char * const * papszNameFmts) \
2254{ \
2255 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2256 { \
2257 AssertReturn(DECOMPRESS_TESTS(a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2258 IEMBINARYOUTPUT BinOut; \
2259 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[a_aSubTests[iFn].idxCpuEflFlavour], a_aSubTests[iFn].pszName), \
2260 RTEXITCODE_FAILURE); \
2261 size_t cbTests = a_aSubTests[iFn].pcTests[0]; \
2262 if (!a_aSubTests[iFn].fBinary) \
2263 cbTests *= sizeof(a_aSubTests[iFn].paTests[0]); \
2264 GenerateBinaryWrite(&BinOut, a_aSubTests[iFn].paTests, cbTests); \
2265 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2266 } \
2267 return RTEXITCODE_SUCCESS; \
2268}
2269
2270#else
2271# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2272#endif
2273
2274#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2275TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
2276\
2277static a_SubTestType a_aSubTests[] = \
2278{ \
2279 ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2280 ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2281 ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2282 ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2283}; \
2284\
2285GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2286\
2287static void ShiftDblU ## a_cBits ## Test(void) \
2288{ \
2289 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2290 { \
2291 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2292 continue; \
2293 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2294 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2295 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2296 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2297 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2298 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2299 { \
2300 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2301 { \
2302 uint32_t fEfl = paTests[iTest].fEflIn; \
2303 a_Type uDst = paTests[iTest].uDstIn; \
2304 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
2305 if ( uDst != paTests[iTest].uDstOut \
2306 || fEfl != paTests[iTest].fEflOut) \
2307 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
2308 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
2309 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
2310 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2311 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
2312 else \
2313 { \
2314 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2315 *g_pfEfl = paTests[iTest].fEflIn; \
2316 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
2317 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2318 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2319 } \
2320 } \
2321 pfn = a_aSubTests[iFn].pfnNative; \
2322 } \
2323 } \
2324}
2325TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
2326TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
2327TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
2328
2329#ifdef TSTIEMAIMPL_WITH_GENERATOR
2330static RTEXITCODE ShiftDblGenerate(uint32_t cTests, const char * const * papszNameFmts)
2331{
2332 RTEXITCODE rcExit = ShiftDblU16Generate(cTests, papszNameFmts);
2333 if (rcExit == RTEXITCODE_SUCCESS)
2334 rcExit = ShiftDblU32Generate(cTests, papszNameFmts);
2335 if (rcExit == RTEXITCODE_SUCCESS)
2336 rcExit = ShiftDblU64Generate(cTests, papszNameFmts);
2337 return rcExit;
2338}
2339
2340static RTEXITCODE ShiftDblDumpAll(const char * const * papszNameFmts)
2341{
2342 RTEXITCODE rcExit = ShiftDblU16DumpAll(papszNameFmts);
2343 if (rcExit == RTEXITCODE_SUCCESS)
2344 rcExit = ShiftDblU32DumpAll(papszNameFmts);
2345 if (rcExit == RTEXITCODE_SUCCESS)
2346 rcExit = ShiftDblU64DumpAll(papszNameFmts);
2347 return rcExit;
2348}
2349#endif
2350
2351static void ShiftDblTest(void)
2352{
2353 ShiftDblU16Test();
2354 ShiftDblU32Test();
2355 ShiftDblU64Test();
2356}
2357
2358
2359/*
2360 * Unary operators.
2361 *
2362 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
2363 */
2364#ifdef TSTIEMAIMPL_WITH_GENERATOR
2365# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2366static RTEXITCODE UnaryU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2367{ \
2368 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2369 { \
2370 IEMBINARYOUTPUT BinOut; \
2371 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[0], g_aUnaryU ## a_cBits[iFn].pszName), RTEXITCODE_FAILURE); \
2372 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2373 { \
2374 a_TestType Test; \
2375 Test.fEflIn = RandEFlags(); \
2376 Test.fEflOut = Test.fEflIn; \
2377 Test.uDstIn = RandU ## a_cBits(); \
2378 Test.uDstOut = Test.uDstIn; \
2379 Test.uSrcIn = 0; \
2380 Test.uMisc = 0; \
2381 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
2382 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2383 } \
2384 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2385 } \
2386 return RTEXITCODE_SUCCESS; \
2387} \
2388static RTEXITCODE UnaryU ## a_cBits ## DumpAll(const char * const * papszNameFmts) \
2389{ \
2390 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2391 { \
2392 AssertReturn(DECOMPRESS_TESTS(g_aUnaryU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
2393 IEMBINARYOUTPUT BinOut; \
2394 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[0], g_aUnaryU ## a_cBits[iFn].pszName), RTEXITCODE_FAILURE); \
2395 uint32_t cbTests = g_aUnaryU ## a_cBits[iFn].pcTests[0]; \
2396 if (!g_aUnaryU ## a_cBits[iFn].fBinary) \
2397 cbTests *= sizeof(g_aUnaryU ## a_cBits[iFn].paTests[0]); \
2398 GenerateBinaryWrite(&BinOut, g_aUnaryU ## a_cBits[iFn].paTests, cbTests); \
2399 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2400 } \
2401 return RTEXITCODE_SUCCESS; \
2402}
2403#else
2404# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
2405#endif
2406
2407#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2408TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
2409static a_SubTestType g_aUnaryU ## a_cBits [] = \
2410{ \
2411 ENTRY(inc_u ## a_cBits), \
2412 ENTRY(inc_u ## a_cBits ## _locked), \
2413 ENTRY(dec_u ## a_cBits), \
2414 ENTRY(dec_u ## a_cBits ## _locked), \
2415 ENTRY(not_u ## a_cBits), \
2416 ENTRY(not_u ## a_cBits ## _locked), \
2417 ENTRY(neg_u ## a_cBits), \
2418 ENTRY(neg_u ## a_cBits ## _locked), \
2419}; \
2420\
2421GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2422\
2423static void UnaryU ## a_cBits ## Test(void) \
2424{ \
2425 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2426 { \
2427 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aUnaryU ## a_cBits[iFn])) \
2428 continue; \
2429 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
2430 uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
2431 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2432 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2433 { \
2434 uint32_t fEfl = paTests[iTest].fEflIn; \
2435 a_Type uDst = paTests[iTest].uDstIn; \
2436 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
2437 if ( uDst != paTests[iTest].uDstOut \
2438 || fEfl != paTests[iTest].fEflOut) \
2439 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2440 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
2441 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2442 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2443 else \
2444 { \
2445 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2446 *g_pfEfl = paTests[iTest].fEflIn; \
2447 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
2448 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2449 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2450 } \
2451 } \
2452 } \
2453}
2454TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
2455TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
2456TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
2457TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
2458
2459#ifdef TSTIEMAIMPL_WITH_GENERATOR
2460static RTEXITCODE UnaryGenerate(uint32_t cTests, const char * const * papszNameFmts)
2461{
2462 RTEXITCODE rcExit = UnaryU8Generate(cTests, papszNameFmts);
2463 if (rcExit == RTEXITCODE_SUCCESS)
2464 rcExit = UnaryU16Generate(cTests, papszNameFmts);
2465 if (rcExit == RTEXITCODE_SUCCESS)
2466 rcExit = UnaryU32Generate(cTests, papszNameFmts);
2467 if (rcExit == RTEXITCODE_SUCCESS)
2468 rcExit = UnaryU64Generate(cTests, papszNameFmts);
2469 return rcExit;
2470}
2471
2472static RTEXITCODE UnaryDumpAll(const char * const * papszNameFmts)
2473{
2474 RTEXITCODE rcExit = UnaryU8DumpAll(papszNameFmts);
2475 if (rcExit == RTEXITCODE_SUCCESS)
2476 rcExit = UnaryU16DumpAll(papszNameFmts);
2477 if (rcExit == RTEXITCODE_SUCCESS)
2478 rcExit = UnaryU32DumpAll(papszNameFmts);
2479 if (rcExit == RTEXITCODE_SUCCESS)
2480 rcExit = UnaryU64DumpAll(papszNameFmts);
2481 return rcExit;
2482}
2483#endif
2484
2485static void UnaryTest(void)
2486{
2487 UnaryU8Test();
2488 UnaryU16Test();
2489 UnaryU32Test();
2490 UnaryU64Test();
2491}
2492
2493
2494/*
2495 * Shifts.
2496 *
2497 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2498 */
2499#ifdef TSTIEMAIMPL_WITH_GENERATOR
2500# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2501static RTEXITCODE ShiftU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2502{ \
2503 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2504 { \
2505 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2506 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2507 continue; \
2508 IEMBINARYOUTPUT BinOut; \
2509 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[a_aSubTests[iFn].idxCpuEflFlavour], a_aSubTests[iFn].pszName), \
2510 RTEXITCODE_FAILURE); \
2511 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2512 { \
2513 a_TestType Test; \
2514 Test.fEflIn = RandEFlags(); \
2515 Test.fEflOut = Test.fEflIn; \
2516 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2517 Test.uDstOut = Test.uDstIn; \
2518 Test.uSrcIn = 0; \
2519 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2520 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2521 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2522 \
2523 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
2524 Test.fEflOut = Test.fEflIn; \
2525 Test.uDstOut = Test.uDstIn; \
2526 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2527 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2528 } \
2529 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2530 } \
2531 return RTEXITCODE_SUCCESS; \
2532} \
2533static RTEXITCODE ShiftU ## a_cBits ## DumpAll(const char * const * papszNameFmts) \
2534{ \
2535 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2536 { \
2537 AssertReturn(DECOMPRESS_TESTS(a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2538 IEMBINARYOUTPUT BinOut; \
2539 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[a_aSubTests[iFn].idxCpuEflFlavour], a_aSubTests[iFn].pszName), \
2540 RTEXITCODE_FAILURE); \
2541 uint32_t cbTests = a_aSubTests[iFn].pcTests[0]; \
2542 if (!a_aSubTests[iFn].fBinary) \
2543 cbTests *= sizeof(a_aSubTests[iFn].paTests[0]); \
2544 GenerateBinaryWrite(&BinOut, a_aSubTests[iFn].paTests, cbTests); \
2545 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2546 } \
2547 return RTEXITCODE_SUCCESS; \
2548}
2549#else
2550# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2551#endif
2552
2553#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2554TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2555static a_SubTestType a_aSubTests[] = \
2556{ \
2557 ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2558 ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2559 ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2560 ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2561 ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2562 ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2563 ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2564 ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2565 ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2566 ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2567 ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2568 ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2569 ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2570 ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2571}; \
2572\
2573GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2574\
2575static void ShiftU ## a_cBits ## Test(void) \
2576{ \
2577 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2578 { \
2579 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2580 continue; \
2581 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2582 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2583 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2584 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2585 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2586 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2587 { \
2588 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2589 { \
2590 uint32_t fEfl = paTests[iTest].fEflIn; \
2591 a_Type uDst = paTests[iTest].uDstIn; \
2592 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2593 if ( uDst != paTests[iTest].uDstOut \
2594 || fEfl != paTests[iTest].fEflOut ) \
2595 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2596 iTest, iVar == 0 ? "" : "/n", \
2597 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2598 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2599 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2600 else \
2601 { \
2602 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2603 *g_pfEfl = paTests[iTest].fEflIn; \
2604 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2605 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2606 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2607 } \
2608 } \
2609 pfn = a_aSubTests[iFn].pfnNative; \
2610 } \
2611 } \
2612}
2613TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2614TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2615TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2616TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2617
2618#ifdef TSTIEMAIMPL_WITH_GENERATOR
2619static RTEXITCODE ShiftGenerate(uint32_t cTests, const char * const * papszNameFmts)
2620{
2621 RTEXITCODE rcExit = ShiftU8Generate(cTests, papszNameFmts);
2622 if (rcExit == RTEXITCODE_SUCCESS)
2623 rcExit = ShiftU16Generate(cTests, papszNameFmts);
2624 if (rcExit == RTEXITCODE_SUCCESS)
2625 rcExit = ShiftU32Generate(cTests, papszNameFmts);
2626 if (rcExit == RTEXITCODE_SUCCESS)
2627 rcExit = ShiftU64Generate(cTests, papszNameFmts);
2628 return rcExit;
2629}
2630
2631static RTEXITCODE ShiftDumpAll(const char * const * papszNameFmts)
2632{
2633 RTEXITCODE rcExit = ShiftU8DumpAll(papszNameFmts);
2634 if (rcExit == RTEXITCODE_SUCCESS)
2635 rcExit = ShiftU16DumpAll(papszNameFmts);
2636 if (rcExit == RTEXITCODE_SUCCESS)
2637 rcExit = ShiftU32DumpAll(papszNameFmts);
2638 if (rcExit == RTEXITCODE_SUCCESS)
2639 rcExit = ShiftU64DumpAll(papszNameFmts);
2640 return rcExit;
2641}
2642#endif
2643
2644static void ShiftTest(void)
2645{
2646 ShiftU8Test();
2647 ShiftU16Test();
2648 ShiftU32Test();
2649 ShiftU64Test();
2650}
2651
2652
2653/*
2654 * Multiplication and division.
2655 *
2656 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2657 * Note! Currently ignoring undefined bits.
2658 */
2659
2660/* U8 */
2661TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2662static INT_MULDIV_U8_T g_aMulDivU8[] =
2663{
2664 ENTRY_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2665 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2666 ENTRY_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2667 ENTRY_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2668 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2669 ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2670 ENTRY_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2671 ENTRY_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2672 ENTRY_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2673 ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2674};
2675
2676#ifdef TSTIEMAIMPL_WITH_GENERATOR
2677static RTEXITCODE MulDivU8Generate(uint32_t cTests, const char * const * papszNameFmts)
2678{
2679 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2680 {
2681 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2682 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2683 continue;
2684 IEMBINARYOUTPUT BinOut; \
2685 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[g_aMulDivU8[iFn].idxCpuEflFlavour], g_aMulDivU8[iFn].pszName),
2686 RTEXITCODE_FAILURE);
2687 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2688 {
2689 MULDIVU8_TEST_T Test;
2690 Test.fEflIn = RandEFlags();
2691 Test.fEflOut = Test.fEflIn;
2692 Test.uDstIn = RandU16Dst(iTest);
2693 Test.uDstOut = Test.uDstIn;
2694 Test.uSrcIn = RandU8Src(iTest);
2695 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2696 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
2697 }
2698 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
2699 }
2700 return RTEXITCODE_SUCCESS;
2701}
2702static RTEXITCODE MulDivU8DumpAll(const char * const * papszNameFmts)
2703{
2704 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2705 {
2706 AssertReturn(DECOMPRESS_TESTS(g_aMulDivU8[iFn]), RTEXITCODE_FAILURE);
2707 IEMBINARYOUTPUT BinOut;
2708 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[g_aMulDivU8[iFn].idxCpuEflFlavour], g_aMulDivU8[iFn].pszName),
2709 RTEXITCODE_FAILURE);
2710 uint32_t cbTests = g_aMulDivU8[iFn].pcTests[0];
2711 if (!g_aMulDivU8[iFn].fBinary)
2712 cbTests *= sizeof(g_aMulDivU8[iFn].paTests[0]);
2713 GenerateBinaryWrite(&BinOut, g_aMulDivU8[iFn].paTests, cbTests);
2714 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
2715 }
2716 return RTEXITCODE_SUCCESS;
2717}
2718#endif
2719
2720static void MulDivU8Test(void)
2721{
2722 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2723 {
2724 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aMulDivU8[iFn])) \
2725 continue; \
2726 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2727 uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2728 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2729 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2730 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2731 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2732 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2733 {
2734 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2735 {
2736 uint32_t fEfl = paTests[iTest].fEflIn;
2737 uint16_t uDst = paTests[iTest].uDstIn;
2738 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2739 if ( uDst != paTests[iTest].uDstOut
2740 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2741 || rc != paTests[iTest].rc)
2742 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2743 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2744 "%sexpected %#08x %#06RX16 %d%s\n",
2745 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2746 iVar ? " " : "", fEfl, uDst, rc,
2747 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2748 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2749 else
2750 {
2751 *g_pu16 = paTests[iTest].uDstIn;
2752 *g_pfEfl = paTests[iTest].fEflIn;
2753 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2754 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2755 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2756 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2757 }
2758 }
2759 pfn = g_aMulDivU8[iFn].pfnNative;
2760 }
2761 }
2762}
2763
2764#ifdef TSTIEMAIMPL_WITH_GENERATOR
2765# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2766static RTEXITCODE MulDivU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2767{ \
2768 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2769 { \
2770 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2771 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2772 continue; \
2773 IEMBINARYOUTPUT BinOut; \
2774 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[a_aSubTests[iFn].idxCpuEflFlavour], a_aSubTests[iFn].pszName), \
2775 RTEXITCODE_FAILURE); \
2776 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2777 { \
2778 a_TestType Test; \
2779 Test.fEflIn = RandEFlags(); \
2780 Test.fEflOut = Test.fEflIn; \
2781 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2782 Test.uDst1Out = Test.uDst1In; \
2783 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2784 Test.uDst2Out = Test.uDst2In; \
2785 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2786 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2787 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2788 } \
2789 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2790 } \
2791 return RTEXITCODE_SUCCESS; \
2792} \
2793static RTEXITCODE MulDivU ## a_cBits ## DumpAll(const char * const * papszNameFmts) \
2794{ \
2795 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2796 { \
2797 AssertReturn(DECOMPRESS_TESTS(a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2798 IEMBINARYOUTPUT BinOut; \
2799 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[a_aSubTests[iFn].idxCpuEflFlavour], a_aSubTests[iFn].pszName), \
2800 RTEXITCODE_FAILURE); \
2801 uint32_t cbTests = a_aSubTests[iFn].pcTests[0]; \
2802 if (!a_aSubTests[iFn].fBinary) \
2803 cbTests *= sizeof(a_aSubTests[iFn].paTests[0]); \
2804 GenerateBinaryWrite(&BinOut, a_aSubTests[iFn].paTests, cbTests); \
2805 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2806 } \
2807 return RTEXITCODE_SUCCESS; \
2808}
2809#else
2810# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2811#endif
2812
2813#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2814TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2815static a_SubTestType a_aSubTests [] = \
2816{ \
2817 ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2818 ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2819 ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2820 ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2821 ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2822 ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2823 ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2824 ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2825}; \
2826\
2827GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2828\
2829static void MulDivU ## a_cBits ## Test(void) \
2830{ \
2831 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2832 { \
2833 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2834 continue; \
2835 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2836 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2837 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2838 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2839 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2840 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2841 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2842 { \
2843 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2844 { \
2845 uint32_t fEfl = paTests[iTest].fEflIn; \
2846 a_Type uDst1 = paTests[iTest].uDst1In; \
2847 a_Type uDst2 = paTests[iTest].uDst2In; \
2848 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2849 if ( uDst1 != paTests[iTest].uDst1Out \
2850 || uDst2 != paTests[iTest].uDst2Out \
2851 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2852 || rc != paTests[iTest].rc) \
2853 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2854 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2855 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2856 iTest, iVar == 0 ? "" : "/n", \
2857 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2858 fEfl, uDst1, uDst2, rc, \
2859 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2860 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2861 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2862 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2863 else \
2864 { \
2865 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2866 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2867 *g_pfEfl = paTests[iTest].fEflIn; \
2868 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2869 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2870 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2871 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2872 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2873 } \
2874 } \
2875 pfn = a_aSubTests[iFn].pfnNative; \
2876 } \
2877 } \
2878}
2879TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2880TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2881TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2882
2883#ifdef TSTIEMAIMPL_WITH_GENERATOR
2884static RTEXITCODE MulDivGenerate(uint32_t cTests, const char * const * papszNameFmts)
2885{
2886 RTEXITCODE rcExit = MulDivU8Generate(cTests, papszNameFmts);
2887 if (rcExit == RTEXITCODE_SUCCESS)
2888 rcExit = MulDivU16Generate(cTests, papszNameFmts);
2889 if (rcExit == RTEXITCODE_SUCCESS)
2890 rcExit = MulDivU32Generate(cTests, papszNameFmts);
2891 if (rcExit == RTEXITCODE_SUCCESS)
2892 rcExit = MulDivU64Generate(cTests, papszNameFmts);
2893 return rcExit;
2894}
2895
2896static RTEXITCODE MulDivDumpAll(const char * const * papszNameFmts)
2897{
2898 RTEXITCODE rcExit = MulDivU8DumpAll(papszNameFmts);
2899 if (rcExit == RTEXITCODE_SUCCESS)
2900 rcExit = MulDivU16DumpAll(papszNameFmts);
2901 if (rcExit == RTEXITCODE_SUCCESS)
2902 rcExit = MulDivU32DumpAll(papszNameFmts);
2903 if (rcExit == RTEXITCODE_SUCCESS)
2904 rcExit = MulDivU64DumpAll(papszNameFmts);
2905 return rcExit;
2906}
2907#endif
2908
2909static void MulDivTest(void)
2910{
2911 MulDivU8Test();
2912 MulDivU16Test();
2913 MulDivU32Test();
2914 MulDivU64Test();
2915}
2916
2917
2918/*
2919 * BSWAP
2920 */
2921static void BswapTest(void)
2922{
2923 if (SubTestAndCheckIfEnabled("bswap_u16"))
2924 {
2925 *g_pu32 = UINT32_C(0x12345678);
2926 iemAImpl_bswap_u16(g_pu32);
2927#if 0
2928 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2929#else
2930 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2931#endif
2932 *g_pu32 = UINT32_C(0xffff1122);
2933 iemAImpl_bswap_u16(g_pu32);
2934#if 0
2935 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2936#else
2937 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2938#endif
2939 }
2940
2941 if (SubTestAndCheckIfEnabled("bswap_u32"))
2942 {
2943 *g_pu32 = UINT32_C(0x12345678);
2944 iemAImpl_bswap_u32(g_pu32);
2945 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2946 }
2947
2948 if (SubTestAndCheckIfEnabled("bswap_u64"))
2949 {
2950 *g_pu64 = UINT64_C(0x0123456789abcdef);
2951 iemAImpl_bswap_u64(g_pu64);
2952 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2953 }
2954}
2955
2956
2957
2958/*********************************************************************************************************************************
2959* Floating point (x87 style) *
2960*********************************************************************************************************************************/
2961
2962/*
2963 * FPU constant loading.
2964 */
2965TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2966
2967static FPU_LD_CONST_T g_aFpuLdConst[] =
2968{
2969 ENTRY(fld1),
2970 ENTRY(fldl2t),
2971 ENTRY(fldl2e),
2972 ENTRY(fldpi),
2973 ENTRY(fldlg2),
2974 ENTRY(fldln2),
2975 ENTRY(fldz),
2976};
2977
2978#ifdef TSTIEMAIMPL_WITH_GENERATOR
2979static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2980{
2981 X86FXSTATE State;
2982 RT_ZERO(State);
2983 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2984 {
2985 GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2986 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2987 {
2988 State.FCW = RandFcw();
2989 State.FSW = RandFsw();
2990
2991 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2992 {
2993 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2994 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2995 g_aFpuLdConst[iFn].pfn(&State, &Res);
2996 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2997 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2998 }
2999 }
3000 GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
3001 }
3002}
3003#endif
3004
3005static void FpuLoadConstTest(void)
3006{
3007 /*
3008 * Inputs:
3009 * - FSW: C0, C1, C2, C3
3010 * - FCW: Exception masks, Precision control, Rounding control.
3011 *
3012 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
3013 */
3014 X86FXSTATE State;
3015 RT_ZERO(State);
3016 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
3017 {
3018 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdConst[iFn]))
3019 continue;
3020
3021 uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
3022 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
3023 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
3024 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
3025 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3026 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3027 {
3028 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3029 {
3030 State.FCW = paTests[iTest].fFcw;
3031 State.FSW = paTests[iTest].fFswIn;
3032 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3033 pfn(&State, &Res);
3034 if ( Res.FSW != paTests[iTest].fFswOut
3035 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
3036 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
3037 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3038 Res.FSW, FormatR80(&Res.r80Result),
3039 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
3040 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3041 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
3042 FormatFcw(paTests[iTest].fFcw) );
3043 }
3044 pfn = g_aFpuLdConst[iFn].pfnNative;
3045 }
3046 }
3047}
3048
3049
3050/*
3051 * Load floating point values from memory.
3052 */
3053#ifdef TSTIEMAIMPL_WITH_GENERATOR
3054# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
3055static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3056{ \
3057 X86FXSTATE State; \
3058 RT_ZERO(State); \
3059 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3060 { \
3061 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3062 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3063 { \
3064 State.FCW = RandFcw(); \
3065 State.FSW = RandFsw(); \
3066 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
3067 \
3068 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3069 { \
3070 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3071 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
3072 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3073 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
3074 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
3075 GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
3076 } \
3077 } \
3078 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3079 } \
3080}
3081#else
3082# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
3083#endif
3084
3085#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
3086typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
3087typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
3088TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
3089\
3090static a_SubTestType a_aSubTests[] = \
3091{ \
3092 ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
3093}; \
3094GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
3095\
3096static void FpuLdR ## a_cBits ## Test(void) \
3097{ \
3098 X86FXSTATE State; \
3099 RT_ZERO(State); \
3100 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3101 { \
3102 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3103 continue; \
3104 \
3105 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3106 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3107 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3108 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3109 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3110 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3111 { \
3112 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3113 { \
3114 a_rdTypeIn const InVal = paTests[iTest].InVal; \
3115 State.FCW = paTests[iTest].fFcw; \
3116 State.FSW = paTests[iTest].fFswIn; \
3117 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3118 pfn(&State, &Res, &InVal); \
3119 if ( Res.FSW != paTests[iTest].fFswOut \
3120 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3121 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3122 "%s -> fsw=%#06x %s\n" \
3123 "%s expected %#06x %s%s%s (%s)\n", \
3124 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3125 FormatR ## a_cBits(&paTests[iTest].InVal), \
3126 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3127 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3128 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3129 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3130 FormatFcw(paTests[iTest].fFcw) ); \
3131 } \
3132 pfn = a_aSubTests[iFn].pfnNative; \
3133 } \
3134 } \
3135}
3136
3137TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
3138TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
3139TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
3140
3141#ifdef TSTIEMAIMPL_WITH_GENERATOR
3142static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
3143{
3144 FpuLdR80Generate(pOut, cTests);
3145 FpuLdR64Generate(pOut, cTests);
3146 FpuLdR32Generate(pOut, cTests);
3147}
3148#endif
3149
3150static void FpuLdMemTest(void)
3151{
3152 FpuLdR80Test();
3153 FpuLdR64Test();
3154 FpuLdR32Test();
3155}
3156
3157
3158/*
3159 * Load integer values from memory.
3160 */
3161#ifdef TSTIEMAIMPL_WITH_GENERATOR
3162# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3163static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3164{ \
3165 X86FXSTATE State; \
3166 RT_ZERO(State); \
3167 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3168 { \
3169 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3170 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3171 { \
3172 State.FCW = RandFcw(); \
3173 State.FSW = RandFsw(); \
3174 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
3175 \
3176 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3177 { \
3178 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3179 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
3180 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3181 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
3182 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
3183 } \
3184 } \
3185 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3186 } \
3187}
3188#else
3189# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
3190#endif
3191
3192#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
3193typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
3194typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
3195TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
3196\
3197static a_SubTestType a_aSubTests[] = \
3198{ \
3199 ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
3200}; \
3201GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3202\
3203static void FpuLdI ## a_cBits ## Test(void) \
3204{ \
3205 X86FXSTATE State; \
3206 RT_ZERO(State); \
3207 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3208 { \
3209 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3210 continue; \
3211 \
3212 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3213 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3214 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3215 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3216 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3217 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3218 { \
3219 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3220 { \
3221 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
3222 State.FCW = paTests[iTest].fFcw; \
3223 State.FSW = paTests[iTest].fFswIn; \
3224 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3225 pfn(&State, &Res, &iInVal); \
3226 if ( Res.FSW != paTests[iTest].fFswOut \
3227 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3228 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
3229 "%s -> fsw=%#06x %s\n" \
3230 "%s expected %#06x %s%s%s (%s)\n", \
3231 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
3232 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3233 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3234 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3235 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3236 FormatFcw(paTests[iTest].fFcw) ); \
3237 } \
3238 pfn = a_aSubTests[iFn].pfnNative; \
3239 } \
3240 } \
3241}
3242
3243TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
3244TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
3245TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
3246
3247#ifdef TSTIEMAIMPL_WITH_GENERATOR
3248static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
3249{
3250 FpuLdI64Generate(pOut, cTests);
3251 FpuLdI32Generate(pOut, cTests);
3252 FpuLdI16Generate(pOut, cTests);
3253}
3254#endif
3255
3256static void FpuLdIntTest(void)
3257{
3258 FpuLdI64Test();
3259 FpuLdI32Test();
3260 FpuLdI16Test();
3261}
3262
3263
3264/*
3265 * Load binary coded decimal values from memory.
3266 */
3267typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
3268typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
3269TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
3270
3271static FPU_LD_D80_T g_aFpuLdD80[] =
3272{
3273 ENTRY(fld_r80_from_d80)
3274};
3275
3276#ifdef TSTIEMAIMPL_WITH_GENERATOR
3277static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
3278{
3279 X86FXSTATE State;
3280 RT_ZERO(State);
3281 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3282 {
3283 GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
3284 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3285 {
3286 State.FCW = RandFcw();
3287 State.FSW = RandFsw();
3288 RTPBCD80U InVal = RandD80Src(iTest);
3289
3290 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3291 {
3292 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3293 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
3294 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
3295 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
3296 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
3297 iTest, iRounding);
3298 }
3299 }
3300 GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
3301 }
3302}
3303#endif
3304
3305static void FpuLdD80Test(void)
3306{
3307 X86FXSTATE State;
3308 RT_ZERO(State);
3309 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3310 {
3311 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdD80[iFn]))
3312 continue;
3313
3314 uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
3315 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
3316 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
3317 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
3318 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3319 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3320 {
3321 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3322 {
3323 RTPBCD80U const InVal = paTests[iTest].InVal;
3324 State.FCW = paTests[iTest].fFcw;
3325 State.FSW = paTests[iTest].fFswIn;
3326 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3327 pfn(&State, &Res, &InVal);
3328 if ( Res.FSW != paTests[iTest].fFswOut
3329 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
3330 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
3331 "%s -> fsw=%#06x %s\n"
3332 "%s expected %#06x %s%s%s (%s)\n",
3333 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3334 FormatD80(&paTests[iTest].InVal),
3335 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3336 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
3337 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3338 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
3339 FormatFcw(paTests[iTest].fFcw) );
3340 }
3341 pfn = g_aFpuLdD80[iFn].pfnNative;
3342 }
3343 }
3344}
3345
3346
3347/*
3348 * Store values floating point values to memory.
3349 */
3350#ifdef TSTIEMAIMPL_WITH_GENERATOR
3351static const RTFLOAT80U g_aFpuStR32Specials[] =
3352{
3353 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3354 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3355 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3356 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3357};
3358static const RTFLOAT80U g_aFpuStR64Specials[] =
3359{
3360 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3361 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3362 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3363 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3364 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
3365};
3366static const RTFLOAT80U g_aFpuStR80Specials[] =
3367{
3368 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
3369};
3370# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3371static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3372{ \
3373 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
3374 X86FXSTATE State; \
3375 RT_ZERO(State); \
3376 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3377 { \
3378 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3379 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3380 { \
3381 uint16_t const fFcw = RandFcw(); \
3382 State.FSW = RandFsw(); \
3383 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
3384 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
3385 \
3386 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3387 { \
3388 /* PC doesn't influence these, so leave as is. */ \
3389 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3390 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3391 { \
3392 uint16_t uFswOut = 0; \
3393 a_rdType OutVal; \
3394 RT_ZERO(OutVal); \
3395 memset(&OutVal, 0xfe, sizeof(OutVal)); \
3396 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3397 | (iRounding << X86_FCW_RC_SHIFT); \
3398 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3399 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3400 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
3401 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3402 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3403 GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
3404 } \
3405 } \
3406 } \
3407 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3408 } \
3409}
3410#else
3411# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
3412#endif
3413
3414#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
3415typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
3416 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
3417typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
3418TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
3419\
3420static a_SubTestType a_aSubTests[] = \
3421{ \
3422 ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
3423}; \
3424GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3425\
3426static void FpuStR ## a_cBits ## Test(void) \
3427{ \
3428 X86FXSTATE State; \
3429 RT_ZERO(State); \
3430 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3431 { \
3432 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3433 continue; \
3434 \
3435 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3436 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3437 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3438 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3439 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3440 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3441 { \
3442 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3443 { \
3444 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3445 uint16_t uFswOut = 0; \
3446 a_rdType OutVal; \
3447 RT_ZERO(OutVal); \
3448 memset(&OutVal, 0xfe, sizeof(OutVal)); \
3449 State.FCW = paTests[iTest].fFcw; \
3450 State.FSW = paTests[iTest].fFswIn; \
3451 pfn(&State, &uFswOut, &OutVal, &InVal); \
3452 if ( uFswOut != paTests[iTest].fFswOut \
3453 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
3454 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3455 "%s -> fsw=%#06x %s\n" \
3456 "%s expected %#06x %s%s%s (%s)\n", \
3457 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3458 FormatR80(&paTests[iTest].InVal), \
3459 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
3460 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
3461 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3462 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
3463 FormatFcw(paTests[iTest].fFcw) ); \
3464 } \
3465 pfn = a_aSubTests[iFn].pfnNative; \
3466 } \
3467 } \
3468}
3469
3470TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
3471TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
3472TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
3473
3474#ifdef TSTIEMAIMPL_WITH_GENERATOR
3475static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
3476{
3477 FpuStR80Generate(pOut, cTests);
3478 FpuStR64Generate(pOut, cTests);
3479 FpuStR32Generate(pOut, cTests);
3480}
3481#endif
3482
3483static void FpuStMemTest(void)
3484{
3485 FpuStR80Test();
3486 FpuStR64Test();
3487 FpuStR32Test();
3488}
3489
3490
3491/*
3492 * Store integer values to memory or register.
3493 */
3494TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
3495TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
3496TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
3497
3498static FPU_ST_I16_T g_aFpuStI16[] =
3499{
3500 ENTRY(fist_r80_to_i16),
3501 ENTRY_AMD( fistt_r80_to_i16, 0),
3502 ENTRY_INTEL(fistt_r80_to_i16, 0),
3503};
3504static FPU_ST_I32_T g_aFpuStI32[] =
3505{
3506 ENTRY(fist_r80_to_i32),
3507 ENTRY(fistt_r80_to_i32),
3508};
3509static FPU_ST_I64_T g_aFpuStI64[] =
3510{
3511 ENTRY(fist_r80_to_i64),
3512 ENTRY(fistt_r80_to_i64),
3513};
3514
3515#ifdef TSTIEMAIMPL_WITH_GENERATOR
3516static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
3517{
3518 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
3519 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
3520 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3521 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3522 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3523 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3524 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3525 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3526 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3527 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3528 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3529 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3530 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3531 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3532 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
3533 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3534 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3535 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3536 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3537 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3538 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3539 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3540 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3541 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
3542 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3543 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
3544 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
3545 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
3546 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
3547 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
3548 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
3549 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
3550 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3551 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3552 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3553 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3554 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3555 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3556 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3557 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3558 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3559 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3560 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3561 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3562 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3563 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3564 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
3565};
3566static const RTFLOAT80U g_aFpuStI32Specials[] =
3567{
3568 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3569 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3570 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3571 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3572 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3573 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3574 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3575 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3576 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3577 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3578 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3579 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3580 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3581 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3582 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3583 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3584};
3585static const RTFLOAT80U g_aFpuStI64Specials[] =
3586{
3587 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3588 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3589 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3590 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3591 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3592 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3593 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3594 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3595 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3596 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3597 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3598 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3599 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3600 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3601 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3602 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3603 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3604};
3605
3606# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3607static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
3608{ \
3609 X86FXSTATE State; \
3610 RT_ZERO(State); \
3611 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3612 { \
3613 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3614 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3615 PRTSTREAM pOutFn = pOut; \
3616 if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
3617 { \
3618 if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3619 continue; \
3620 pOutFn = pOutCpu; \
3621 } \
3622 \
3623 GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
3624 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3625 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3626 { \
3627 uint16_t const fFcw = RandFcw(); \
3628 State.FSW = RandFsw(); \
3629 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3630 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3631 \
3632 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3633 { \
3634 /* PC doesn't influence these, so leave as is. */ \
3635 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3636 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3637 { \
3638 uint16_t uFswOut = 0; \
3639 a_iType iOutVal = ~(a_iType)2; \
3640 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3641 | (iRounding << X86_FCW_RC_SHIFT); \
3642 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3643 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3644 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3645 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3646 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3647 GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
3648 } \
3649 } \
3650 } \
3651 GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
3652 } \
3653}
3654#else
3655# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3656#endif
3657
3658#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3659GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3660\
3661static void FpuStI ## a_cBits ## Test(void) \
3662{ \
3663 X86FXSTATE State; \
3664 RT_ZERO(State); \
3665 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3666 { \
3667 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3668 continue; \
3669 \
3670 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3671 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3672 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3673 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3674 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3675 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3676 { \
3677 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3678 { \
3679 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3680 uint16_t uFswOut = 0; \
3681 a_iType iOutVal = ~(a_iType)2; \
3682 State.FCW = paTests[iTest].fFcw; \
3683 State.FSW = paTests[iTest].fFswIn; \
3684 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3685 if ( uFswOut != paTests[iTest].fFswOut \
3686 || iOutVal != paTests[iTest].iOutVal) \
3687 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3688 "%s -> fsw=%#06x " a_szFmt "\n" \
3689 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3690 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3691 FormatR80(&paTests[iTest].InVal), \
3692 iVar ? " " : "", uFswOut, iOutVal, \
3693 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3694 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3695 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3696 } \
3697 pfn = a_aSubTests[iFn].pfnNative; \
3698 } \
3699 } \
3700}
3701
3702//fistt_r80_to_i16 diffs for AMD, of course :-)
3703
3704TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3705TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3706TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3707
3708#ifdef TSTIEMAIMPL_WITH_GENERATOR
3709static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3710{
3711 FpuStI64Generate(pOut, pOutCpu, cTests);
3712 FpuStI32Generate(pOut, pOutCpu, cTests);
3713 FpuStI16Generate(pOut, pOutCpu, cTests);
3714}
3715#endif
3716
3717static void FpuStIntTest(void)
3718{
3719 FpuStI64Test();
3720 FpuStI32Test();
3721 FpuStI16Test();
3722}
3723
3724
3725/*
3726 * Store as packed BCD value (memory).
3727 */
3728typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3729typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3730TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3731
3732static FPU_ST_D80_T g_aFpuStD80[] =
3733{
3734 ENTRY(fst_r80_to_d80),
3735};
3736
3737#ifdef TSTIEMAIMPL_WITH_GENERATOR
3738static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3739{
3740 static RTFLOAT80U const s_aSpecials[] =
3741 {
3742 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3743 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3744 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3745 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3746 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3747 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3748 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3749 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3750 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3751 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3752 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3753 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3754 };
3755
3756 X86FXSTATE State;
3757 RT_ZERO(State);
3758 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3759 {
3760 GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3761 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3762 {
3763 uint16_t const fFcw = RandFcw();
3764 State.FSW = RandFsw();
3765 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3766
3767 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3768 {
3769 /* PC doesn't influence these, so leave as is. */
3770 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3771 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
3772 {
3773 uint16_t uFswOut = 0;
3774 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3775 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
3776 | (iRounding << X86_FCW_RC_SHIFT);
3777 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
3778 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
3779 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3780 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3781 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3782 GenFormatD80(&OutVal), iTest, iRounding, iMask);
3783 }
3784 }
3785 }
3786 GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3787 }
3788}
3789#endif
3790
3791
3792static void FpuStD80Test(void)
3793{
3794 X86FXSTATE State;
3795 RT_ZERO(State);
3796 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3797 {
3798 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuStD80[iFn]))
3799 continue;
3800
3801 uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3802 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3803 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3804 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3805 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3806 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3807 {
3808 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3809 {
3810 RTFLOAT80U const InVal = paTests[iTest].InVal;
3811 uint16_t uFswOut = 0;
3812 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3813 State.FCW = paTests[iTest].fFcw;
3814 State.FSW = paTests[iTest].fFswIn;
3815 pfn(&State, &uFswOut, &OutVal, &InVal);
3816 if ( uFswOut != paTests[iTest].fFswOut
3817 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3818 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3819 "%s -> fsw=%#06x %s\n"
3820 "%s expected %#06x %s%s%s (%s)\n",
3821 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3822 FormatR80(&paTests[iTest].InVal),
3823 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3824 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3825 FswDiff(uFswOut, paTests[iTest].fFswOut),
3826 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3827 FormatFcw(paTests[iTest].fFcw) );
3828 }
3829 pfn = g_aFpuStD80[iFn].pfnNative;
3830 }
3831 }
3832}
3833
3834
3835
3836/*********************************************************************************************************************************
3837* x87 FPU Binary Operations *
3838*********************************************************************************************************************************/
3839
3840/*
3841 * Binary FPU operations on two 80-bit floating point values.
3842 */
3843TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3844enum { kFpuBinaryHint_fprem = 1, };
3845
3846static FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3847{
3848 ENTRY(fadd_r80_by_r80),
3849 ENTRY(fsub_r80_by_r80),
3850 ENTRY(fsubr_r80_by_r80),
3851 ENTRY(fmul_r80_by_r80),
3852 ENTRY(fdiv_r80_by_r80),
3853 ENTRY(fdivr_r80_by_r80),
3854 ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3855 ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3856 ENTRY(fscale_r80_by_r80),
3857 ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3858 ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3859 ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3860 ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3861 ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3862 ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3863};
3864
3865#ifdef TSTIEMAIMPL_WITH_GENERATOR
3866static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3867{
3868 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3869
3870 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3871 {
3872 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3873 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3874 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3875 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3876 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3877 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3878 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3879 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3880 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3881 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3882 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3883 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3884 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3885 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3886 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3887 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3888 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3889 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3890 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3891 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3892 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3893 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3894 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3895 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3896 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3897 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3898 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3899 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3900 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3901 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3902 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3903 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3904 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3905 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3906 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3907 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3908 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3909 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3910 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3911 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3912 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3913 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3914 /* fscale: Negative variants for the essentials of the above. */
3915 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3916 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3917 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3918 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3919 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3920 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3921 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3922 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3923 /* fscale: Some fun with denormals and pseudo-denormals. */
3924 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3925 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3926 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3927 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3928 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3929 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3930 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3931 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3932 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3933 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3934 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3935 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3936 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3937 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3938 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3939 };
3940
3941 X86FXSTATE State;
3942 RT_ZERO(State);
3943 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3944 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3945 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3946 {
3947 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3948 PRTSTREAM pOutFn = pOut;
3949 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3950 {
3951 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3952 continue;
3953 pOutFn = pOutCpu;
3954 }
3955
3956 GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3957 uint32_t iTestOutput = 0;
3958 uint32_t cNormalInputPairs = 0;
3959 uint32_t cTargetRangeInputs = 0;
3960 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3961 {
3962 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3963 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3964 bool fTargetRange = false;
3965 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3966 {
3967 cNormalInputPairs++;
3968 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3969 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3970 cTargetRangeInputs += fTargetRange = true;
3971 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3972 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3973 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3974 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3975 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3976 cTargetRangeInputs += fTargetRange = true;
3977 }
3978 }
3979 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3980 {
3981 iTest -= 1;
3982 continue;
3983 }
3984
3985 uint16_t const fFcwExtra = 0;
3986 uint16_t const fFcw = RandFcw();
3987 State.FSW = RandFsw();
3988
3989 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3990 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3991 {
3992 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3993 | (iRounding << X86_FCW_RC_SHIFT)
3994 | (iPrecision << X86_FCW_PC_SHIFT)
3995 | X86_FCW_MASK_ALL;
3996 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3997 pfn(&State, &ResM, &InVal1, &InVal2);
3998 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3999 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
4000 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4001
4002 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4003 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4004 pfn(&State, &ResU, &InVal1, &InVal2);
4005 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4006 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
4007 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4008
4009 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4010 if (fXcpt)
4011 {
4012 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4013 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4014 pfn(&State, &Res1, &InVal1, &InVal2);
4015 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4016 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
4017 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4018 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4019 {
4020 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4021 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4022 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4023 pfn(&State, &Res2, &InVal1, &InVal2);
4024 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4025 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
4026 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4027 }
4028 if (!RT_IS_POWER_OF_TWO(fXcpt))
4029 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4030 if (fUnmasked & fXcpt)
4031 {
4032 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4033 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4034 pfn(&State, &Res3, &InVal1, &InVal2);
4035 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4036 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
4037 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4038 }
4039 }
4040
4041 /* If the values are in range and caused no exceptions, do the whole series of
4042 partial reminders till we get the non-partial one or run into an exception. */
4043 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
4044 {
4045 IEMFPURESULT ResPrev = ResM;
4046 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
4047 {
4048 State.FCW = State.FCW | X86_FCW_MASK_ALL;
4049 State.FSW = ResPrev.FSW;
4050 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4051 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
4052 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
4053 State.FCW | fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
4054 GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
4055 iTest, iRounding, iPrecision, i + 1, iTestOutput++);
4056 ResPrev = ResSeq;
4057 }
4058 }
4059 }
4060 }
4061 GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
4062 }
4063}
4064#endif
4065
4066
4067static void FpuBinaryR80Test(void)
4068{
4069 X86FXSTATE State;
4070 RT_ZERO(State);
4071 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
4072 {
4073 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryR80[iFn]))
4074 continue;
4075
4076 uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
4077 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
4078 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
4079 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
4080 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4081 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4082 {
4083 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4084 {
4085 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4086 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4087 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4088 State.FCW = paTests[iTest].fFcw;
4089 State.FSW = paTests[iTest].fFswIn;
4090 pfn(&State, &Res, &InVal1, &InVal2);
4091 if ( Res.FSW != paTests[iTest].fFswOut
4092 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
4093 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4094 "%s -> fsw=%#06x %s\n"
4095 "%s expected %#06x %s%s%s (%s)\n",
4096 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4097 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4098 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4099 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4100 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4101 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4102 FormatFcw(paTests[iTest].fFcw) );
4103 }
4104 pfn = g_aFpuBinaryR80[iFn].pfnNative;
4105 }
4106 }
4107}
4108
4109
4110/*
4111 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
4112 */
4113#define int64_t_IS_NORMAL(a) 1
4114#define int32_t_IS_NORMAL(a) 1
4115#define int16_t_IS_NORMAL(a) 1
4116
4117#ifdef TSTIEMAIMPL_WITH_GENERATOR
4118static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
4119{
4120 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4121 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4122};
4123static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
4124{
4125 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4126 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4127};
4128static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
4129{
4130 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4131};
4132static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
4133{
4134 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4135};
4136
4137# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4138static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
4139{ \
4140 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4141 \
4142 X86FXSTATE State; \
4143 RT_ZERO(State); \
4144 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4145 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4146 { \
4147 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
4148 uint32_t cNormalInputPairs = 0; \
4149 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
4150 { \
4151 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4152 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
4153 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4154 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
4155 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4156 cNormalInputPairs++; \
4157 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4158 { \
4159 iTest -= 1; \
4160 continue; \
4161 } \
4162 \
4163 uint16_t const fFcw = RandFcw(); \
4164 State.FSW = RandFsw(); \
4165 \
4166 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
4167 { \
4168 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
4169 { \
4170 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4171 { \
4172 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
4173 | (iRounding << X86_FCW_RC_SHIFT) \
4174 | (iPrecision << X86_FCW_PC_SHIFT) \
4175 | iMask; \
4176 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4177 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
4178 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
4179 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
4180 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
4181 } \
4182 } \
4183 } \
4184 } \
4185 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
4186 } \
4187}
4188#else
4189# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4190#endif
4191
4192#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
4193TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
4194\
4195static a_SubTestType a_aSubTests[] = \
4196{ \
4197 ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
4198 ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
4199 ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
4200 ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
4201 ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
4202 ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
4203}; \
4204\
4205GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4206\
4207static void FpuBinary ## a_UpBits ## Test(void) \
4208{ \
4209 X86FXSTATE State; \
4210 RT_ZERO(State); \
4211 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4212 { \
4213 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4214 continue; \
4215 \
4216 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
4217 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4218 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
4219 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4220 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4221 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4222 { \
4223 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4224 { \
4225 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4226 a_Type2 const InVal2 = paTests[iTest].InVal2; \
4227 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4228 State.FCW = paTests[iTest].fFcw; \
4229 State.FSW = paTests[iTest].fFswIn; \
4230 pfn(&State, &Res, &InVal1, &InVal2); \
4231 if ( Res.FSW != paTests[iTest].fFswOut \
4232 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
4233 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4234 "%s -> fsw=%#06x %s\n" \
4235 "%s expected %#06x %s%s%s (%s)\n", \
4236 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4237 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4238 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
4239 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
4240 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
4241 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
4242 FormatFcw(paTests[iTest].fFcw) ); \
4243 } \
4244 pfn = a_aSubTests[iFn].pfnNative; \
4245 } \
4246 } \
4247}
4248
4249TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
4250TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
4251TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
4252TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
4253
4254
4255/*
4256 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
4257 */
4258#ifdef TSTIEMAIMPL_WITH_GENERATOR
4259static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
4260{
4261 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4262 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4263};
4264static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
4265{
4266 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4267 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4268};
4269static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
4270{
4271 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4272 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4273};
4274static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
4275{
4276 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4277};
4278static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
4279{
4280 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4281};
4282
4283# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4284static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
4285{ \
4286 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4287 \
4288 X86FXSTATE State; \
4289 RT_ZERO(State); \
4290 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4291 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4292 { \
4293 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
4294 uint32_t cNormalInputPairs = 0; \
4295 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
4296 { \
4297 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4298 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
4299 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4300 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
4301 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4302 cNormalInputPairs++; \
4303 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4304 { \
4305 iTest -= 1; \
4306 continue; \
4307 } \
4308 \
4309 uint16_t const fFcw = RandFcw(); \
4310 State.FSW = RandFsw(); \
4311 \
4312 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
4313 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4314 { \
4315 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
4316 uint16_t fFswOut = 0; \
4317 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
4318 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
4319 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
4320 iTest, iMask ? 'c' : 'u'); \
4321 } \
4322 } \
4323 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
4324 } \
4325}
4326#else
4327# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4328#endif
4329
4330#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
4331TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
4332\
4333static a_SubTestType a_aSubTests[] = \
4334{ \
4335 __VA_ARGS__ \
4336}; \
4337\
4338GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4339\
4340static void FpuBinaryFsw ## a_UpBits ## Test(void) \
4341{ \
4342 X86FXSTATE State; \
4343 RT_ZERO(State); \
4344 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4345 { \
4346 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4347 continue; \
4348 \
4349 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
4350 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4351 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
4352 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4353 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4354 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4355 { \
4356 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4357 { \
4358 uint16_t fFswOut = 0; \
4359 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4360 a_Type2 const InVal2 = paTests[iTest].InVal2; \
4361 State.FCW = paTests[iTest].fFcw; \
4362 State.FSW = paTests[iTest].fFswIn; \
4363 pfn(&State, &fFswOut, &InVal1, &InVal2); \
4364 if (fFswOut != paTests[iTest].fFswOut) \
4365 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4366 "%s -> fsw=%#06x\n" \
4367 "%s expected %#06x %s (%s)\n", \
4368 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4369 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4370 iVar ? " " : "", fFswOut, \
4371 iVar ? " " : "", paTests[iTest].fFswOut, \
4372 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
4373 } \
4374 pfn = a_aSubTests[iFn].pfnNative; \
4375 } \
4376 } \
4377}
4378
4379TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
4380TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
4381TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
4382TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
4383TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
4384
4385
4386/*
4387 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
4388 */
4389TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
4390
4391static FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
4392{
4393 ENTRY(fcomi_r80_by_r80),
4394 ENTRY(fucomi_r80_by_r80),
4395};
4396
4397#ifdef TSTIEMAIMPL_WITH_GENERATOR
4398static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
4399{
4400 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4401 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4402};
4403
4404static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
4405{
4406 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
4407
4408 X86FXSTATE State;
4409 RT_ZERO(State);
4410 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4411 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4412 {
4413 GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
4414 uint32_t cNormalInputPairs = 0;
4415 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
4416 {
4417 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
4418 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
4419 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
4420 cNormalInputPairs++;
4421 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4422 {
4423 iTest -= 1;
4424 continue;
4425 }
4426
4427 uint16_t const fFcw = RandFcw();
4428 State.FSW = RandFsw();
4429
4430 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
4431 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4432 {
4433 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
4434 uint16_t uFswOut = 0;
4435 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
4436 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
4437 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
4438 iTest, iMask ? 'c' : 'u');
4439 }
4440 }
4441 GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
4442 }
4443}
4444#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
4445
4446static void FpuBinaryEflR80Test(void)
4447{
4448 X86FXSTATE State;
4449 RT_ZERO(State);
4450 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4451 {
4452 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryEflR80[iFn]))
4453 continue;
4454
4455 uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
4456 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
4457 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
4458 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
4459 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4460 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4461 {
4462 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4463 {
4464 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4465 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4466 State.FCW = paTests[iTest].fFcw;
4467 State.FSW = paTests[iTest].fFswIn;
4468 uint16_t uFswOut = 0;
4469 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
4470 if ( uFswOut != paTests[iTest].fFswOut
4471 || fEflOut != paTests[iTest].fEflOut)
4472 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4473 "%s -> fsw=%#06x efl=%#08x\n"
4474 "%s expected %#06x %#08x %s%s (%s)\n",
4475 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4476 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4477 iVar ? " " : "", uFswOut, fEflOut,
4478 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
4479 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
4480 FormatFcw(paTests[iTest].fFcw));
4481 }
4482 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
4483 }
4484 }
4485}
4486
4487
4488/*********************************************************************************************************************************
4489* x87 FPU Unary Operations *
4490*********************************************************************************************************************************/
4491
4492/*
4493 * Unary FPU operations on one 80-bit floating point value.
4494 *
4495 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
4496 * a rounding error or not.
4497 */
4498TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
4499
4500enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
4501static FPU_UNARY_R80_T g_aFpuUnaryR80[] =
4502{
4503 ENTRY_EX( fabs_r80, kUnary_Accurate),
4504 ENTRY_EX( fchs_r80, kUnary_Accurate),
4505 ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
4506 ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
4507 ENTRY_EX( fsqrt_r80, kUnary_Accurate),
4508 ENTRY_EX( frndint_r80, kUnary_Accurate),
4509 ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
4510 ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
4511 ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
4512 ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
4513};
4514
4515#ifdef TSTIEMAIMPL_WITH_GENERATOR
4516
4517static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
4518{
4519 if ( enmKind == kUnary_Rounding_F2xm1
4520 && RTFLOAT80U_IS_NORMAL(pr80Val)
4521 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
4522 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
4523 return true;
4524 return false;
4525}
4526
4527static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4528{
4529 static RTFLOAT80U const s_aSpecials[] =
4530 {
4531 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
4532 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
4533 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
4534 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
4535 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
4536 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
4537 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
4538 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
4539 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
4540 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
4541 };
4542 X86FXSTATE State;
4543 RT_ZERO(State);
4544 uint32_t cMinNormals = cTests / 4;
4545 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4546 {
4547 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
4548 PRTSTREAM pOutFn = pOut;
4549 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4550 {
4551 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4552 continue;
4553 pOutFn = pOutCpu;
4554 }
4555
4556 GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4557 uint32_t iTestOutput = 0;
4558 uint32_t cNormalInputs = 0;
4559 uint32_t cTargetRangeInputs = 0;
4560 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4561 {
4562 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4563 if (RTFLOAT80U_IS_NORMAL(&InVal))
4564 {
4565 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
4566 {
4567 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
4568 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4569 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4570 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4571 cTargetRangeInputs++;
4572 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4573 {
4574 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4575 cTargetRangeInputs++;
4576 }
4577 }
4578 cNormalInputs++;
4579 }
4580 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4581 {
4582 iTest -= 1;
4583 continue;
4584 }
4585
4586 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4587 uint16_t const fFcw = RandFcw();
4588 State.FSW = RandFsw();
4589
4590 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4591 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4592 {
4593 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4594 | (iRounding << X86_FCW_RC_SHIFT)
4595 | (iPrecision << X86_FCW_PC_SHIFT)
4596 | X86_FCW_MASK_ALL;
4597 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4598 pfn(&State, &ResM, &InVal);
4599 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4600 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
4601 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4602
4603 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4604 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4605 pfn(&State, &ResU, &InVal);
4606 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4607 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
4608 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4609
4610 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4611 if (fXcpt)
4612 {
4613 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4614 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4615 pfn(&State, &Res1, &InVal);
4616 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4617 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
4618 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4619 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4620 {
4621 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4622 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4623 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4624 pfn(&State, &Res2, &InVal);
4625 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4626 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
4627 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4628 }
4629 if (!RT_IS_POWER_OF_TWO(fXcpt))
4630 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4631 if (fUnmasked & fXcpt)
4632 {
4633 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4634 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4635 pfn(&State, &Res3, &InVal);
4636 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4637 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
4638 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4639 }
4640 }
4641 }
4642 }
4643 GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
4644 }
4645}
4646#endif
4647
4648static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4649{
4650 if (fFcw1 == fFcw2)
4651 return true;
4652 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4653 {
4654 *pfRndErr = true;
4655 return true;
4656 }
4657 return false;
4658}
4659
4660static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4661{
4662 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4663 return true;
4664 if ( fRndErrOk
4665 && pr80Val1->s.fSign == pr80Val2->s.fSign)
4666 {
4667 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4668 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4669 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4670 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4671 ||
4672 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4673 && pr80Val1->s.uMantissa == UINT64_MAX
4674 && pr80Val2->s.uMantissa == RT_BIT_64(63))
4675 ||
4676 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4677 && pr80Val2->s.uMantissa == UINT64_MAX
4678 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4679 {
4680 *pfRndErr = true;
4681 return true;
4682 }
4683 }
4684 return false;
4685}
4686
4687
4688static void FpuUnaryR80Test(void)
4689{
4690 X86FXSTATE State;
4691 RT_ZERO(State);
4692 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4693 {
4694 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryR80[iFn]))
4695 continue;
4696
4697 uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4698 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4699 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4700 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4701 uint32_t cRndErrs = 0;
4702 uint32_t cPossibleRndErrs = 0;
4703 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4704 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4705 {
4706 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4707 {
4708 RTFLOAT80U const InVal = paTests[iTest].InVal;
4709 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4710 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4711 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4712 State.FSW = paTests[iTest].fFswIn;
4713 pfn(&State, &Res, &InVal);
4714 bool fRndErr = false;
4715 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4716 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4717 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4718 "%s -> fsw=%#06x %s\n"
4719 "%s expected %#06x %s%s%s%s (%s)\n",
4720 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4721 FormatR80(&paTests[iTest].InVal),
4722 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4723 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4724 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4725 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4726 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4727 cRndErrs += fRndErr;
4728 cPossibleRndErrs += fRndErrOk;
4729 }
4730 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4731 }
4732 if (cPossibleRndErrs > 0)
4733 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4734 }
4735}
4736
4737
4738/*
4739 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4740 */
4741TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4742
4743static FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4744{
4745 ENTRY(ftst_r80),
4746 ENTRY_EX(fxam_r80, 1),
4747};
4748
4749#ifdef TSTIEMAIMPL_WITH_GENERATOR
4750static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4751{
4752 static RTFLOAT80U const s_aSpecials[] =
4753 {
4754 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4755 };
4756
4757 X86FXSTATE State;
4758 RT_ZERO(State);
4759 uint32_t cMinNormals = cTests / 4;
4760 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4761 {
4762 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4763 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4764 PRTSTREAM pOutFn = pOut;
4765 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4766 {
4767 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4768 continue;
4769 pOutFn = pOutCpu;
4770 }
4771 State.FTW = 0;
4772
4773 GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4774 uint32_t cNormalInputs = 0;
4775 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4776 {
4777 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4778 if (RTFLOAT80U_IS_NORMAL(&InVal))
4779 cNormalInputs++;
4780 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4781 {
4782 iTest -= 1;
4783 continue;
4784 }
4785
4786 uint16_t const fFcw = RandFcw();
4787 State.FSW = RandFsw();
4788 if (!fIsFxam)
4789 {
4790 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4791 {
4792 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4793 {
4794 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4795 {
4796 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4797 | (iRounding << X86_FCW_RC_SHIFT)
4798 | (iPrecision << X86_FCW_PC_SHIFT)
4799 | iMask;
4800 uint16_t fFswOut = 0;
4801 pfn(&State, &fFswOut, &InVal);
4802 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4803 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4804 iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4805 }
4806 }
4807 }
4808 }
4809 else
4810 {
4811 uint16_t fFswOut = 0;
4812 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4813 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4814 State.FCW = fFcw;
4815 pfn(&State, &fFswOut, &InVal);
4816 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4817 fFcw | fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4818 }
4819 }
4820 GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4821 }
4822}
4823#endif
4824
4825
4826static void FpuUnaryFswR80Test(void)
4827{
4828 X86FXSTATE State;
4829 RT_ZERO(State);
4830 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4831 {
4832 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryFswR80[iFn]))
4833 continue;
4834
4835 uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4836 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4837 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4838 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4839 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4840 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4841 {
4842 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4843 {
4844 RTFLOAT80U const InVal = paTests[iTest].InVal;
4845 uint16_t fFswOut = 0;
4846 State.FSW = paTests[iTest].fFswIn;
4847 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4848 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4849 pfn(&State, &fFswOut, &InVal);
4850 if (fFswOut != paTests[iTest].fFswOut)
4851 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4852 "%s -> fsw=%#06x\n"
4853 "%s expected %#06x %s (%s%s)\n",
4854 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4855 FormatR80(&paTests[iTest].InVal),
4856 iVar ? " " : "", fFswOut,
4857 iVar ? " " : "", paTests[iTest].fFswOut,
4858 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4859 paTests[iTest].fFcw & 0x80 ? " empty" : "");
4860 }
4861 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4862 }
4863 }
4864}
4865
4866/*
4867 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
4868 */
4869TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4870
4871static FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4872{
4873 ENTRY(fxtract_r80_r80),
4874 ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4875 ENTRY_INTEL(fptan_r80_r80, 0),
4876 ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4877 ENTRY_INTEL(fsincos_r80_r80, 0),
4878};
4879
4880#ifdef TSTIEMAIMPL_WITH_GENERATOR
4881static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4882{
4883 static RTFLOAT80U const s_aSpecials[] =
4884 {
4885 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4886 };
4887
4888 X86FXSTATE State;
4889 RT_ZERO(State);
4890 uint32_t cMinNormals = cTests / 4;
4891 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4892 {
4893 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4894 PRTSTREAM pOutFn = pOut;
4895 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4896 {
4897 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4898 continue;
4899 pOutFn = pOutCpu;
4900 }
4901
4902 GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4903 uint32_t iTestOutput = 0;
4904 uint32_t cNormalInputs = 0;
4905 uint32_t cTargetRangeInputs = 0;
4906 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4907 {
4908 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4909 if (RTFLOAT80U_IS_NORMAL(&InVal))
4910 {
4911 if (iFn != 0)
4912 {
4913 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4914 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4915 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4916 cTargetRangeInputs++;
4917 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4918 {
4919 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4920 cTargetRangeInputs++;
4921 }
4922 }
4923 cNormalInputs++;
4924 }
4925 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4926 {
4927 iTest -= 1;
4928 continue;
4929 }
4930
4931 uint16_t const fFcwExtra = 0; /* for rounding error indication */
4932 uint16_t const fFcw = RandFcw();
4933 State.FSW = RandFsw();
4934
4935 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4936 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4937 {
4938 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4939 | (iRounding << X86_FCW_RC_SHIFT)
4940 | (iPrecision << X86_FCW_PC_SHIFT)
4941 | X86_FCW_MASK_ALL;
4942 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4943 pfn(&State, &ResM, &InVal);
4944 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4945 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4946 GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4947
4948 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4949 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4950 pfn(&State, &ResU, &InVal);
4951 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4952 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4953 GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4954
4955 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4956 if (fXcpt)
4957 {
4958 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4959 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4960 pfn(&State, &Res1, &InVal);
4961 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4962 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4963 GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4964 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4965 {
4966 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4967 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4968 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4969 pfn(&State, &Res2, &InVal);
4970 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4971 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4972 GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4973 }
4974 if (!RT_IS_POWER_OF_TWO(fXcpt))
4975 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4976 if (fUnmasked & fXcpt)
4977 {
4978 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4979 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4980 pfn(&State, &Res3, &InVal);
4981 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4982 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4983 GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4984 }
4985 }
4986 }
4987 }
4988 GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4989 }
4990}
4991#endif
4992
4993
4994static void FpuUnaryTwoR80Test(void)
4995{
4996 X86FXSTATE State;
4997 RT_ZERO(State);
4998 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4999 {
5000 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryTwoR80[iFn]))
5001 continue;
5002
5003 uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
5004 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
5005 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
5006 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
5007 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5008 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5009 {
5010 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5011 {
5012 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5013 RTFLOAT80U const InVal = paTests[iTest].InVal;
5014 State.FCW = paTests[iTest].fFcw;
5015 State.FSW = paTests[iTest].fFswIn;
5016 pfn(&State, &Res, &InVal);
5017 if ( Res.FSW != paTests[iTest].fFswOut
5018 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
5019 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
5020 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5021 "%s -> fsw=%#06x %s %s\n"
5022 "%s expected %#06x %s %s %s%s%s (%s)\n",
5023 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5024 FormatR80(&paTests[iTest].InVal),
5025 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
5026 iVar ? " " : "", paTests[iTest].fFswOut,
5027 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
5028 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
5029 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
5030 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
5031 }
5032 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
5033 }
5034 }
5035}
5036
5037
5038/*********************************************************************************************************************************
5039* SSE floating point Binary Operations *
5040*********************************************************************************************************************************/
5041
5042/*
5043 * Binary SSE operations on packed single precision floating point values.
5044 */
5045TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5046
5047static SSE_BINARY_R32_T g_aSseBinaryR32[] =
5048{
5049 ENTRY_BIN(addps_u128),
5050 ENTRY_BIN(mulps_u128),
5051 ENTRY_BIN(subps_u128),
5052 ENTRY_BIN(minps_u128),
5053 ENTRY_BIN(divps_u128),
5054 ENTRY_BIN(maxps_u128),
5055 ENTRY_BIN(haddps_u128),
5056 ENTRY_BIN(hsubps_u128),
5057 ENTRY_BIN(sqrtps_u128),
5058 ENTRY_BIN(addsubps_u128),
5059 ENTRY_BIN(cvtps2pd_u128),
5060};
5061
5062#ifdef TSTIEMAIMPL_WITH_GENERATOR
5063static RTEXITCODE SseBinaryR32Generate(const char *pszDataFileFmt, uint32_t cTests)
5064{
5065 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5066
5067 static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
5068 {
5069 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
5070 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
5071 /** @todo More specials. */
5072 };
5073
5074 X86FXSTATE State;
5075 RT_ZERO(State);
5076 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5077 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5078 {
5079 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
5080
5081 IEMBINARYOUTPUT BinOut;
5082 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR32[iFn].pszName), RTEXITCODE_FAILURE);
5083
5084 uint32_t cNormalInputPairs = 0;
5085 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5086 {
5087 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5088
5089 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5090 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5091 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5092 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5093
5094 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5095 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
5096 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
5097 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
5098
5099 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
5100 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
5101 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
5102 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
5103 cNormalInputPairs++;
5104 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5105 {
5106 iTest -= 1;
5107 continue;
5108 }
5109
5110 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5111 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5112 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5113 for (uint8_t iFz = 0; iFz < 2; iFz++)
5114 {
5115 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5116 | (iRounding << X86_MXCSR_RC_SHIFT)
5117 | (iDaz ? X86_MXCSR_DAZ : 0)
5118 | (iFz ? X86_MXCSR_FZ : 0)
5119 | X86_MXCSR_XCPT_MASK;
5120 IEMSSERESULT ResM; RT_ZERO(ResM);
5121 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
5122 TestData.fMxcsrIn = State.MXCSR;
5123 TestData.fMxcsrOut = ResM.MXCSR;
5124 TestData.OutVal = ResM.uResult;
5125 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5126
5127 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5128 IEMSSERESULT ResU; RT_ZERO(ResU);
5129 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
5130 TestData.fMxcsrIn = State.MXCSR;
5131 TestData.fMxcsrOut = ResU.MXCSR;
5132 TestData.OutVal = ResU.uResult;
5133 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5134
5135 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5136 if (fXcpt)
5137 {
5138 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5139 IEMSSERESULT Res1; RT_ZERO(Res1);
5140 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
5141 TestData.fMxcsrIn = State.MXCSR;
5142 TestData.fMxcsrOut = Res1.MXCSR;
5143 TestData.OutVal = Res1.uResult;
5144 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5145
5146 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5147 {
5148 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5149 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5150 IEMSSERESULT Res2; RT_ZERO(Res2);
5151 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
5152 TestData.fMxcsrIn = State.MXCSR;
5153 TestData.fMxcsrOut = Res2.MXCSR;
5154 TestData.OutVal = Res2.uResult;
5155 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5156 }
5157 if (!RT_IS_POWER_OF_TWO(fXcpt))
5158 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5159 if (fUnmasked & fXcpt)
5160 {
5161 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5162 IEMSSERESULT Res3; RT_ZERO(Res3);
5163 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
5164 TestData.fMxcsrIn = State.MXCSR;
5165 TestData.fMxcsrOut = Res3.MXCSR;
5166 TestData.OutVal = Res3.uResult;
5167 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5168 }
5169 }
5170 }
5171 }
5172 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5173 }
5174
5175 return RTEXITCODE_SUCCESS;
5176}
5177#endif
5178
5179static void SseBinaryR32Test(void)
5180{
5181 X86FXSTATE State;
5182 RT_ZERO(State);
5183 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5184 {
5185 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32[iFn]))
5186 continue;
5187
5188 uint32_t const cbTests = *g_aSseBinaryR32[iFn].pcTests;
5189 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
5190 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
5191 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
5192 if (!cbTests) RTTestSkipped(g_hTest, "no tests");
5193 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5194 {
5195 for (uint32_t iTest = 0; iTest < cbTests / sizeof(paTests[0]); iTest++)
5196 {
5197 IEMSSERESULT Res; RT_ZERO(Res);
5198
5199 State.MXCSR = paTests[iTest].fMxcsrIn;
5200 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5201 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5202 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5203 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5204 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5205 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5206 || !fValsIdentical)
5207 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
5208 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5209 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5210 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5211 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5212 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5213 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
5214 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
5215 iVar ? " " : "", Res.MXCSR,
5216 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5217 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5218 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5219 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5220 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5221 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5222 !fValsIdentical ? " - val" : "",
5223 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5224 }
5225 pfn = g_aSseBinaryR32[iFn].pfnNative;
5226 }
5227 }
5228}
5229
5230
5231/*
5232 * Binary SSE operations on packed single precision floating point values.
5233 */
5234TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5235
5236static SSE_BINARY_R64_T g_aSseBinaryR64[] =
5237{
5238 ENTRY_BIN(addpd_u128),
5239 ENTRY_BIN(mulpd_u128),
5240 ENTRY_BIN(subpd_u128),
5241 ENTRY_BIN(minpd_u128),
5242 ENTRY_BIN(divpd_u128),
5243 ENTRY_BIN(maxpd_u128),
5244 ENTRY_BIN(haddpd_u128),
5245 ENTRY_BIN(hsubpd_u128),
5246 ENTRY_BIN(sqrtpd_u128),
5247 ENTRY_BIN(addsubpd_u128),
5248 ENTRY_BIN(cvtpd2ps_u128),
5249};
5250
5251#ifdef TSTIEMAIMPL_WITH_GENERATOR
5252static RTEXITCODE SseBinaryR64Generate(const char *pszDataFileFmt, uint32_t cTests)
5253{
5254 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5255
5256 static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
5257 {
5258 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
5259 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
5260 /** @todo More specials. */
5261 };
5262
5263 X86FXSTATE State;
5264 RT_ZERO(State);
5265 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5266 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5267 {
5268 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
5269
5270 IEMBINARYOUTPUT BinOut;
5271 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR64[iFn].pszName), RTEXITCODE_FAILURE);
5272
5273 uint32_t cNormalInputPairs = 0;
5274 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5275 {
5276 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5277
5278 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5279 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5280 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5281 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5282
5283 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5284 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
5285 cNormalInputPairs++;
5286 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5287 {
5288 iTest -= 1;
5289 continue;
5290 }
5291
5292 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5293 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5294 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5295 for (uint8_t iFz = 0; iFz < 2; iFz++)
5296 {
5297 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5298 | (iRounding << X86_MXCSR_RC_SHIFT)
5299 | (iDaz ? X86_MXCSR_DAZ : 0)
5300 | (iFz ? X86_MXCSR_FZ : 0)
5301 | X86_MXCSR_XCPT_MASK;
5302 IEMSSERESULT ResM; RT_ZERO(ResM);
5303 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
5304 TestData.fMxcsrIn = State.MXCSR;
5305 TestData.fMxcsrOut = ResM.MXCSR;
5306 TestData.OutVal = ResM.uResult;
5307 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5308
5309 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5310 IEMSSERESULT ResU; RT_ZERO(ResU);
5311 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
5312 TestData.fMxcsrIn = State.MXCSR;
5313 TestData.fMxcsrOut = ResU.MXCSR;
5314 TestData.OutVal = ResU.uResult;
5315 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5316
5317 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5318 if (fXcpt)
5319 {
5320 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5321 IEMSSERESULT Res1; RT_ZERO(Res1);
5322 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
5323 TestData.fMxcsrIn = State.MXCSR;
5324 TestData.fMxcsrOut = Res1.MXCSR;
5325 TestData.OutVal = Res1.uResult;
5326 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5327
5328 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5329 {
5330 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5331 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5332 IEMSSERESULT Res2; RT_ZERO(Res2);
5333 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
5334 TestData.fMxcsrIn = State.MXCSR;
5335 TestData.fMxcsrOut = Res2.MXCSR;
5336 TestData.OutVal = Res2.uResult;
5337 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5338 }
5339 if (!RT_IS_POWER_OF_TWO(fXcpt))
5340 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5341 if (fUnmasked & fXcpt)
5342 {
5343 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5344 IEMSSERESULT Res3; RT_ZERO(Res3);
5345 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
5346 TestData.fMxcsrIn = State.MXCSR;
5347 TestData.fMxcsrOut = Res3.MXCSR;
5348 TestData.OutVal = Res3.uResult;
5349 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5350 }
5351 }
5352 }
5353 }
5354 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5355 }
5356
5357 return RTEXITCODE_SUCCESS;
5358}
5359#endif
5360
5361
5362static void SseBinaryR64Test(void)
5363{
5364 X86FXSTATE State;
5365 RT_ZERO(State);
5366 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5367 {
5368 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64[iFn]))
5369 continue;
5370
5371 uint32_t const cTests = *g_aSseBinaryR64[iFn].pcTests;
5372 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
5373 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
5374 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
5375 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5376 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5377 {
5378 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
5379 {
5380 IEMSSERESULT Res; RT_ZERO(Res);
5381
5382 State.MXCSR = paTests[iTest].fMxcsrIn;
5383 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5384 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5385 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5386 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5387 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
5388 "%s -> mxcsr=%#08x %s'%s\n"
5389 "%s expected %#08x %s'%s%s%s (%s)\n",
5390 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5391 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5392 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
5393 iVar ? " " : "", Res.MXCSR,
5394 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5395 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5396 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5397 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5398 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5399 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5400 ? " - val" : "",
5401 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5402 }
5403 pfn = g_aSseBinaryR64[iFn].pfnNative;
5404 }
5405 }
5406}
5407
5408
5409/*
5410 * Binary SSE operations on packed single precision floating point values.
5411 */
5412TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
5413
5414static SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
5415{
5416 ENTRY_BIN(addss_u128_r32),
5417 ENTRY_BIN(mulss_u128_r32),
5418 ENTRY_BIN(subss_u128_r32),
5419 ENTRY_BIN(minss_u128_r32),
5420 ENTRY_BIN(divss_u128_r32),
5421 ENTRY_BIN(maxss_u128_r32),
5422 ENTRY_BIN(cvtss2sd_u128_r32),
5423 ENTRY_BIN(sqrtss_u128_r32),
5424};
5425
5426#ifdef TSTIEMAIMPL_WITH_GENERATOR
5427static RTEXITCODE SseBinaryU128R32Generate(const char *pszDataFileFmt, uint32_t cTests)
5428{
5429 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5430
5431 static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
5432 {
5433 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5434 /** @todo More specials. */
5435 };
5436
5437 X86FXSTATE State;
5438 RT_ZERO(State);
5439 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5440 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5441 {
5442 PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
5443
5444 IEMBINARYOUTPUT BinOut;
5445 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryU128R32[iFn].pszName), RTEXITCODE_FAILURE);
5446
5447 uint32_t cNormalInputPairs = 0;
5448 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5449 {
5450 SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
5451
5452 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5453 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5454 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5455 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5456
5457 TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5458
5459 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
5460 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
5461 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
5462 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
5463 && RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
5464 cNormalInputPairs++;
5465 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5466 {
5467 iTest -= 1;
5468 continue;
5469 }
5470
5471 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5472 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5473 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5474 for (uint8_t iFz = 0; iFz < 2; iFz++)
5475 {
5476 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5477 | (iRounding << X86_MXCSR_RC_SHIFT)
5478 | (iDaz ? X86_MXCSR_DAZ : 0)
5479 | (iFz ? X86_MXCSR_FZ : 0)
5480 | X86_MXCSR_XCPT_MASK;
5481 IEMSSERESULT ResM; RT_ZERO(ResM);
5482 pfn(&State, &ResM, &TestData.InVal1, &TestData.r32Val2);
5483 TestData.fMxcsrIn = State.MXCSR;
5484 TestData.fMxcsrOut = ResM.MXCSR;
5485 TestData.OutVal = ResM.uResult;
5486 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5487
5488 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5489 IEMSSERESULT ResU; RT_ZERO(ResU);
5490 pfn(&State, &ResU, &TestData.InVal1, &TestData.r32Val2);
5491 TestData.fMxcsrIn = State.MXCSR;
5492 TestData.fMxcsrOut = ResU.MXCSR;
5493 TestData.OutVal = ResU.uResult;
5494 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5495
5496 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5497 if (fXcpt)
5498 {
5499 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5500 IEMSSERESULT Res1; RT_ZERO(Res1);
5501 pfn(&State, &Res1, &TestData.InVal1, &TestData.r32Val2);
5502 TestData.fMxcsrIn = State.MXCSR;
5503 TestData.fMxcsrOut = Res1.MXCSR;
5504 TestData.OutVal = Res1.uResult;
5505 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5506
5507 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5508 {
5509 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5510 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5511 IEMSSERESULT Res2; RT_ZERO(Res2);
5512 pfn(&State, &Res2, &TestData.InVal1, &TestData.r32Val2);
5513 TestData.fMxcsrIn = State.MXCSR;
5514 TestData.fMxcsrOut = Res2.MXCSR;
5515 TestData.OutVal = Res2.uResult;
5516 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5517 }
5518 if (!RT_IS_POWER_OF_TWO(fXcpt))
5519 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5520 if (fUnmasked & fXcpt)
5521 {
5522 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5523 IEMSSERESULT Res3; RT_ZERO(Res3);
5524 pfn(&State, &Res3, &TestData.InVal1, &TestData.r32Val2);
5525 TestData.fMxcsrIn = State.MXCSR;
5526 TestData.fMxcsrOut = Res3.MXCSR;
5527 TestData.OutVal = Res3.uResult;
5528 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5529 }
5530 }
5531 }
5532 }
5533 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5534 }
5535
5536 return RTEXITCODE_SUCCESS;
5537}
5538#endif
5539
5540static void SseBinaryU128R32Test(void)
5541{
5542 X86FXSTATE State;
5543 RT_ZERO(State);
5544 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5545 {
5546 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R32[iFn]))
5547 continue;
5548
5549 uint32_t const cTests = *g_aSseBinaryU128R32[iFn].pcTests;
5550 SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
5551 PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
5552 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
5553 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5554 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5555 {
5556 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
5557 {
5558 IEMSSERESULT Res; RT_ZERO(Res);
5559
5560 State.MXCSR = paTests[iTest].fMxcsrIn;
5561 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
5562 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5563 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5564 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5565 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5566 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5567 || !fValsIdentical)
5568 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
5569 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5570 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5571 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5572 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5573 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5574 FormatR32(&paTests[iTest].r32Val2),
5575 iVar ? " " : "", Res.MXCSR,
5576 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5577 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5578 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5579 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5580 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5581 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5582 !fValsIdentical ? " - val" : "",
5583 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5584 }
5585 }
5586 }
5587}
5588
5589
5590/*
5591 * Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
5592 */
5593TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
5594
5595static SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
5596{
5597 ENTRY_BIN(addsd_u128_r64),
5598 ENTRY_BIN(mulsd_u128_r64),
5599 ENTRY_BIN(subsd_u128_r64),
5600 ENTRY_BIN(minsd_u128_r64),
5601 ENTRY_BIN(divsd_u128_r64),
5602 ENTRY_BIN(maxsd_u128_r64),
5603 ENTRY_BIN(cvtsd2ss_u128_r64),
5604 ENTRY_BIN(sqrtsd_u128_r64),
5605};
5606
5607#ifdef TSTIEMAIMPL_WITH_GENERATOR
5608static RTEXITCODE SseBinaryU128R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5609{
5610 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5611
5612 static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
5613 {
5614 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5615 /** @todo More specials. */
5616 };
5617
5618 X86FXSTATE State;
5619 RT_ZERO(State);
5620 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5621 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5622 {
5623 PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
5624
5625 IEMBINARYOUTPUT BinOut;
5626 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryU128R64[iFn].pszName), RTEXITCODE_FAILURE);
5627
5628 uint32_t cNormalInputPairs = 0;
5629 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5630 {
5631 SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
5632
5633 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5634 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5635 TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5636
5637 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5638 && RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
5639 cNormalInputPairs++;
5640 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5641 {
5642 iTest -= 1;
5643 continue;
5644 }
5645
5646 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5647 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5648 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5649 for (uint8_t iFz = 0; iFz < 2; iFz++)
5650 {
5651 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5652 | (iRounding << X86_MXCSR_RC_SHIFT)
5653 | (iDaz ? X86_MXCSR_DAZ : 0)
5654 | (iFz ? X86_MXCSR_FZ : 0)
5655 | X86_MXCSR_XCPT_MASK;
5656 IEMSSERESULT ResM; RT_ZERO(ResM);
5657 pfn(&State, &ResM, &TestData.InVal1, &TestData.r64Val2);
5658 TestData.fMxcsrIn = State.MXCSR;
5659 TestData.fMxcsrOut = ResM.MXCSR;
5660 TestData.OutVal = ResM.uResult;
5661 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5662
5663 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5664 IEMSSERESULT ResU; RT_ZERO(ResU);
5665 pfn(&State, &ResU, &TestData.InVal1, &TestData.r64Val2);
5666 TestData.fMxcsrIn = State.MXCSR;
5667 TestData.fMxcsrOut = ResU.MXCSR;
5668 TestData.OutVal = ResU.uResult;
5669 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5670
5671 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5672 if (fXcpt)
5673 {
5674 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5675 IEMSSERESULT Res1; RT_ZERO(Res1);
5676 pfn(&State, &Res1, &TestData.InVal1, &TestData.r64Val2);
5677 TestData.fMxcsrIn = State.MXCSR;
5678 TestData.fMxcsrOut = Res1.MXCSR;
5679 TestData.OutVal = Res1.uResult;
5680 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5681
5682 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5683 {
5684 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5685 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5686 IEMSSERESULT Res2; RT_ZERO(Res2);
5687 pfn(&State, &Res2, &TestData.InVal1, &TestData.r64Val2);
5688 TestData.fMxcsrIn = State.MXCSR;
5689 TestData.fMxcsrOut = Res2.MXCSR;
5690 TestData.OutVal = Res2.uResult;
5691 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5692 }
5693 if (!RT_IS_POWER_OF_TWO(fXcpt))
5694 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5695 if (fUnmasked & fXcpt)
5696 {
5697 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5698 IEMSSERESULT Res3; RT_ZERO(Res3);
5699 pfn(&State, &Res3, &TestData.InVal1, &TestData.r64Val2);
5700 TestData.fMxcsrIn = State.MXCSR;
5701 TestData.fMxcsrOut = Res3.MXCSR;
5702 TestData.OutVal = Res3.uResult;
5703 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5704 }
5705 }
5706 }
5707 }
5708 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5709 }
5710
5711 return RTEXITCODE_SUCCESS;
5712}
5713#endif
5714
5715
5716static void SseBinaryU128R64Test(void)
5717{
5718 X86FXSTATE State;
5719 RT_ZERO(State);
5720 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5721 {
5722 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R64[iFn]))
5723 continue;
5724
5725 uint32_t const cTests = *g_aSseBinaryU128R64[iFn].pcTests;
5726 SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
5727 PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
5728 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
5729 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5730 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5731 {
5732 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_U128_R64_TEST_T); iTest++)
5733 {
5734 IEMSSERESULT Res; RT_ZERO(Res);
5735
5736 State.MXCSR = paTests[iTest].fMxcsrIn;
5737 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
5738 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5739 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5740 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5741 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
5742 "%s -> mxcsr=%#08x %s'%s\n"
5743 "%s expected %#08x %s'%s%s%s (%s)\n",
5744 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5745 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5746 FormatR64(&paTests[iTest].r64Val2),
5747 iVar ? " " : "", Res.MXCSR,
5748 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5749 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5750 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5751 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5752 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5753 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5754 ? " - val" : "",
5755 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5756 }
5757 }
5758 }
5759}
5760
5761
5762/*
5763 * SSE operations converting single double-precision floating point values to signed double-word integers (cvttsd2si and friends).
5764 */
5765TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R64_T, SSE_BINARY_I32_R64_TEST_T, PFNIEMAIMPLSSEF2I32U64);
5766
5767static SSE_BINARY_I32_R64_T g_aSseBinaryI32R64[] =
5768{
5769 ENTRY_BIN(cvttsd2si_i32_r64),
5770 ENTRY_BIN(cvtsd2si_i32_r64),
5771};
5772
5773#ifdef TSTIEMAIMPL_WITH_GENERATOR
5774static RTEXITCODE SseBinaryI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5775{
5776 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5777
5778 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
5779 {
5780 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5781 /** @todo More specials. */
5782 };
5783
5784 X86FXSTATE State;
5785 RT_ZERO(State);
5786 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5787 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5788 {
5789 PFNIEMAIMPLSSEF2I32U64 const pfn = g_aSseBinaryI32R64[iFn].pfnNative ? g_aSseBinaryI32R64[iFn].pfnNative : g_aSseBinaryI32R64[iFn].pfn;
5790
5791 IEMBINARYOUTPUT BinOut;
5792 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryI32R64[iFn].pszName), RTEXITCODE_FAILURE);
5793
5794 uint32_t cNormalInputPairs = 0;
5795 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5796 {
5797 SSE_BINARY_I32_R64_TEST_T TestData; RT_ZERO(TestData);
5798
5799 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
5800
5801 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
5802 cNormalInputPairs++;
5803 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5804 {
5805 iTest -= 1;
5806 continue;
5807 }
5808
5809 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5810 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5811 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5812 for (uint8_t iFz = 0; iFz < 2; iFz++)
5813 {
5814 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5815 | (iRounding << X86_MXCSR_RC_SHIFT)
5816 | (iDaz ? X86_MXCSR_DAZ : 0)
5817 | (iFz ? X86_MXCSR_FZ : 0)
5818 | X86_MXCSR_XCPT_MASK;
5819 uint32_t fMxcsrM; int32_t i32OutM;
5820 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r64ValIn.u);
5821 TestData.fMxcsrIn = State.MXCSR;
5822 TestData.fMxcsrOut = fMxcsrM;
5823 TestData.i32ValOut = i32OutM;
5824 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5825
5826 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5827 uint32_t fMxcsrU; int32_t i32OutU;
5828 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r64ValIn.u);
5829 TestData.fMxcsrIn = State.MXCSR;
5830 TestData.fMxcsrOut = fMxcsrU;
5831 TestData.i32ValOut = i32OutU;
5832 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5833
5834 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5835 if (fXcpt)
5836 {
5837 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5838 uint32_t fMxcsr1; int32_t i32Out1;
5839 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r64ValIn.u);
5840 TestData.fMxcsrIn = State.MXCSR;
5841 TestData.fMxcsrOut = fMxcsr1;
5842 TestData.i32ValOut = i32Out1;
5843 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5844
5845 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5846 {
5847 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5848 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5849 uint32_t fMxcsr2; int32_t i32Out2;
5850 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r64ValIn.u);
5851 TestData.fMxcsrIn = State.MXCSR;
5852 TestData.fMxcsrOut = fMxcsr2;
5853 TestData.i32ValOut = i32Out2;
5854 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5855 }
5856 if (!RT_IS_POWER_OF_TWO(fXcpt))
5857 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5858 if (fUnmasked & fXcpt)
5859 {
5860 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5861 uint32_t fMxcsr3; int32_t i32Out3;
5862 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r64ValIn.u);
5863 TestData.fMxcsrIn = State.MXCSR;
5864 TestData.fMxcsrOut = fMxcsr3;
5865 TestData.i32ValOut = i32Out3;
5866 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5867 }
5868 }
5869 }
5870 }
5871 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5872 }
5873
5874 return RTEXITCODE_SUCCESS;
5875}
5876#endif
5877
5878
5879static void SseBinaryI32R64Test(void)
5880{
5881 X86FXSTATE State;
5882 RT_ZERO(State);
5883 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5884 {
5885 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R64[iFn]))
5886 continue;
5887
5888 uint32_t const cTests = *g_aSseBinaryI32R64[iFn].pcTests;
5889 SSE_BINARY_I32_R64_TEST_T const * const paTests = g_aSseBinaryI32R64[iFn].paTests;
5890 PFNIEMAIMPLSSEF2I32U64 pfn = g_aSseBinaryI32R64[iFn].pfn;
5891 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
5892 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5893 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5894 {
5895 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I32_R64_TEST_T); iTest++)
5896 {
5897 uint32_t fMxcsr = 0;
5898 int32_t i32Dst = 0;
5899
5900 State.MXCSR = paTests[iTest].fMxcsrIn;
5901 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r64ValIn.u);
5902 if ( fMxcsr != paTests[iTest].fMxcsrOut
5903 || i32Dst != paTests[iTest].i32ValOut)
5904 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5905 "%s -> mxcsr=%#08x %RI32\n"
5906 "%s expected %#08x %RI32%s%s (%s)\n",
5907 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5908 FormatR64(&paTests[iTest].r64ValIn),
5909 iVar ? " " : "", fMxcsr, i32Dst,
5910 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
5911 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5912 i32Dst != paTests[iTest].i32ValOut
5913 ? " - val" : "",
5914 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5915 }
5916 }
5917 }
5918}
5919
5920
5921/*
5922 * SSE operations converting single double-precision floating point values to signed quad-word integers (cvttsd2si and friends).
5923 */
5924TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R64_T, SSE_BINARY_I64_R64_TEST_T, PFNIEMAIMPLSSEF2I64U64);
5925
5926static SSE_BINARY_I64_R64_T g_aSseBinaryI64R64[] =
5927{
5928 ENTRY_BIN(cvttsd2si_i64_r64),
5929 ENTRY_BIN(cvtsd2si_i64_r64),
5930};
5931
5932#ifdef TSTIEMAIMPL_WITH_GENERATOR
5933static RTEXITCODE SseBinaryI64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5934{
5935 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5936
5937 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
5938 {
5939 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5940 /** @todo More specials. */
5941 };
5942
5943 X86FXSTATE State;
5944 RT_ZERO(State);
5945 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5946 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
5947 {
5948 PFNIEMAIMPLSSEF2I64U64 const pfn = g_aSseBinaryI64R64[iFn].pfnNative ? g_aSseBinaryI64R64[iFn].pfnNative : g_aSseBinaryI64R64[iFn].pfn;
5949
5950 IEMBINARYOUTPUT BinOut;
5951 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryI64R64[iFn].pszName), RTEXITCODE_FAILURE);
5952
5953 uint32_t cNormalInputPairs = 0;
5954 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5955 {
5956 SSE_BINARY_I64_R64_TEST_T TestData; RT_ZERO(TestData);
5957
5958 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
5959
5960 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
5961 cNormalInputPairs++;
5962 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5963 {
5964 iTest -= 1;
5965 continue;
5966 }
5967
5968 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5969 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5970 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5971 for (uint8_t iFz = 0; iFz < 2; iFz++)
5972 {
5973 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5974 | (iRounding << X86_MXCSR_RC_SHIFT)
5975 | (iDaz ? X86_MXCSR_DAZ : 0)
5976 | (iFz ? X86_MXCSR_FZ : 0)
5977 | X86_MXCSR_XCPT_MASK;
5978 uint32_t fMxcsrM; int64_t i64OutM;
5979 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r64ValIn.u);
5980 TestData.fMxcsrIn = State.MXCSR;
5981 TestData.fMxcsrOut = fMxcsrM;
5982 TestData.i64ValOut = i64OutM;
5983 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5984
5985 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5986 uint32_t fMxcsrU; int64_t i64OutU;
5987 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r64ValIn.u);
5988 TestData.fMxcsrIn = State.MXCSR;
5989 TestData.fMxcsrOut = fMxcsrU;
5990 TestData.i64ValOut = i64OutU;
5991 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5992
5993 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5994 if (fXcpt)
5995 {
5996 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5997 uint32_t fMxcsr1; int64_t i64Out1;
5998 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r64ValIn.u);
5999 TestData.fMxcsrIn = State.MXCSR;
6000 TestData.fMxcsrOut = fMxcsr1;
6001 TestData.i64ValOut = i64Out1;
6002 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6003
6004 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6005 {
6006 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6007 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6008 uint32_t fMxcsr2; int64_t i64Out2;
6009 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r64ValIn.u);
6010 TestData.fMxcsrIn = State.MXCSR;
6011 TestData.fMxcsrOut = fMxcsr2;
6012 TestData.i64ValOut = i64Out2;
6013 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6014 }
6015 if (!RT_IS_POWER_OF_TWO(fXcpt))
6016 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6017 if (fUnmasked & fXcpt)
6018 {
6019 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6020 uint32_t fMxcsr3; int64_t i64Out3;
6021 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r64ValIn.u);
6022 TestData.fMxcsrIn = State.MXCSR;
6023 TestData.fMxcsrOut = fMxcsr3;
6024 TestData.i64ValOut = i64Out3;
6025 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6026 }
6027 }
6028 }
6029 }
6030 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6031 }
6032
6033 return RTEXITCODE_SUCCESS;
6034}
6035#endif
6036
6037
6038static void SseBinaryI64R64Test(void)
6039{
6040 X86FXSTATE State;
6041 RT_ZERO(State);
6042 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
6043 {
6044 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R64[iFn]))
6045 continue;
6046
6047 uint32_t const cTests = *g_aSseBinaryI64R64[iFn].pcTests;
6048 SSE_BINARY_I64_R64_TEST_T const * const paTests = g_aSseBinaryI64R64[iFn].paTests;
6049 PFNIEMAIMPLSSEF2I64U64 pfn = g_aSseBinaryI64R64[iFn].pfn;
6050 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
6051 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6052 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6053 {
6054 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I64_R64_TEST_T); iTest++)
6055 {
6056 uint32_t fMxcsr = 0;
6057 int64_t i64Dst = 0;
6058
6059 State.MXCSR = paTests[iTest].fMxcsrIn;
6060 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r64ValIn.u);
6061 if ( fMxcsr != paTests[iTest].fMxcsrOut
6062 || i64Dst != paTests[iTest].i64ValOut)
6063 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6064 "%s -> mxcsr=%#08x %RI64\n"
6065 "%s expected %#08x %RI64%s%s (%s)\n",
6066 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6067 FormatR64(&paTests[iTest].r64ValIn),
6068 iVar ? " " : "", fMxcsr, i64Dst,
6069 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6070 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6071 i64Dst != paTests[iTest].i64ValOut
6072 ? " - val" : "",
6073 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6074 }
6075 }
6076 }
6077}
6078
6079
6080/*
6081 * SSE operations converting single single-precision floating point values to signed double-word integers (cvttss2si and friends).
6082 */
6083TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R32_T, SSE_BINARY_I32_R32_TEST_T, PFNIEMAIMPLSSEF2I32U32);
6084
6085static SSE_BINARY_I32_R32_T g_aSseBinaryI32R32[] =
6086{
6087 ENTRY_BIN(cvttss2si_i32_r32),
6088 ENTRY_BIN(cvtss2si_i32_r32),
6089};
6090
6091#ifdef TSTIEMAIMPL_WITH_GENERATOR
6092static RTEXITCODE SseBinaryI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
6093{
6094 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6095
6096 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6097 {
6098 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6099 /** @todo More specials. */
6100 };
6101
6102 X86FXSTATE State;
6103 RT_ZERO(State);
6104 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6105 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6106 {
6107 PFNIEMAIMPLSSEF2I32U32 const pfn = g_aSseBinaryI32R32[iFn].pfnNative ? g_aSseBinaryI32R32[iFn].pfnNative : g_aSseBinaryI32R32[iFn].pfn;
6108
6109 IEMBINARYOUTPUT BinOut;
6110 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryI32R32[iFn].pszName), RTEXITCODE_FAILURE);
6111
6112 uint32_t cNormalInputPairs = 0;
6113 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6114 {
6115 SSE_BINARY_I32_R32_TEST_T TestData; RT_ZERO(TestData);
6116
6117 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6118
6119 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6120 cNormalInputPairs++;
6121 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6122 {
6123 iTest -= 1;
6124 continue;
6125 }
6126
6127 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6128 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6129 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6130 for (uint8_t iFz = 0; iFz < 2; iFz++)
6131 {
6132 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6133 | (iRounding << X86_MXCSR_RC_SHIFT)
6134 | (iDaz ? X86_MXCSR_DAZ : 0)
6135 | (iFz ? X86_MXCSR_FZ : 0)
6136 | X86_MXCSR_XCPT_MASK;
6137 uint32_t fMxcsrM; int32_t i32OutM;
6138 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r32ValIn.u);
6139 TestData.fMxcsrIn = State.MXCSR;
6140 TestData.fMxcsrOut = fMxcsrM;
6141 TestData.i32ValOut = i32OutM;
6142 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6143
6144 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6145 uint32_t fMxcsrU; int32_t i32OutU;
6146 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r32ValIn.u);
6147 TestData.fMxcsrIn = State.MXCSR;
6148 TestData.fMxcsrOut = fMxcsrU;
6149 TestData.i32ValOut = i32OutU;
6150 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6151
6152 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6153 if (fXcpt)
6154 {
6155 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6156 uint32_t fMxcsr1; int32_t i32Out1;
6157 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r32ValIn.u);
6158 TestData.fMxcsrIn = State.MXCSR;
6159 TestData.fMxcsrOut = fMxcsr1;
6160 TestData.i32ValOut = i32Out1;
6161 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6162
6163 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6164 {
6165 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6166 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6167 uint32_t fMxcsr2; int32_t i32Out2;
6168 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r32ValIn.u);
6169 TestData.fMxcsrIn = State.MXCSR;
6170 TestData.fMxcsrOut = fMxcsr2;
6171 TestData.i32ValOut = i32Out2;
6172 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6173 }
6174 if (!RT_IS_POWER_OF_TWO(fXcpt))
6175 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6176 if (fUnmasked & fXcpt)
6177 {
6178 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6179 uint32_t fMxcsr3; int32_t i32Out3;
6180 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r32ValIn.u);
6181 TestData.fMxcsrIn = State.MXCSR;
6182 TestData.fMxcsrOut = fMxcsr3;
6183 TestData.i32ValOut = i32Out3;
6184 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6185 }
6186 }
6187 }
6188 }
6189 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6190 }
6191
6192 return RTEXITCODE_SUCCESS;
6193}
6194#endif
6195
6196
6197static void SseBinaryI32R32Test(void)
6198{
6199 X86FXSTATE State;
6200 RT_ZERO(State);
6201 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6202 {
6203 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R32[iFn]))
6204 continue;
6205
6206 uint32_t const cTests = *g_aSseBinaryI32R32[iFn].pcTests;
6207 SSE_BINARY_I32_R32_TEST_T const * const paTests = g_aSseBinaryI32R32[iFn].paTests;
6208 PFNIEMAIMPLSSEF2I32U32 pfn = g_aSseBinaryI32R32[iFn].pfn;
6209 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R32[iFn]);
6210 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6211 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6212 {
6213 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I32_R32_TEST_T); iTest++)
6214 {
6215 uint32_t fMxcsr = 0;
6216 int32_t i32Dst = 0;
6217
6218 State.MXCSR = paTests[iTest].fMxcsrIn;
6219 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r32ValIn.u);
6220 if ( fMxcsr != paTests[iTest].fMxcsrOut
6221 || i32Dst != paTests[iTest].i32ValOut)
6222 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6223 "%s -> mxcsr=%#08x %RI32\n"
6224 "%s expected %#08x %RI32%s%s (%s)\n",
6225 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6226 FormatR32(&paTests[iTest].r32ValIn),
6227 iVar ? " " : "", fMxcsr, i32Dst,
6228 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
6229 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6230 i32Dst != paTests[iTest].i32ValOut
6231 ? " - val" : "",
6232 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6233 }
6234 }
6235 }
6236}
6237
6238
6239/*
6240 * SSE operations converting single single-precision floating point values to signed quad-word integers (cvttss2si and friends).
6241 */
6242TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R32_T, SSE_BINARY_I64_R32_TEST_T, PFNIEMAIMPLSSEF2I64U32);
6243
6244static SSE_BINARY_I64_R32_T g_aSseBinaryI64R32[] =
6245{
6246 ENTRY_BIN(cvttss2si_i64_r32),
6247 ENTRY_BIN(cvtss2si_i64_r32),
6248};
6249
6250#ifdef TSTIEMAIMPL_WITH_GENERATOR
6251static RTEXITCODE SseBinaryI64R32Generate(const char *pszDataFileFmt, uint32_t cTests)
6252{
6253 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6254
6255 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6256 {
6257 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6258 /** @todo More specials. */
6259 };
6260
6261 X86FXSTATE State;
6262 RT_ZERO(State);
6263 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6264 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6265 {
6266 PFNIEMAIMPLSSEF2I64U32 const pfn = g_aSseBinaryI64R32[iFn].pfnNative ? g_aSseBinaryI64R32[iFn].pfnNative : g_aSseBinaryI64R32[iFn].pfn;
6267
6268 IEMBINARYOUTPUT BinOut;
6269 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryI64R32[iFn].pszName), RTEXITCODE_FAILURE);
6270
6271 uint32_t cNormalInputPairs = 0;
6272 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6273 {
6274 SSE_BINARY_I64_R32_TEST_T TestData; RT_ZERO(TestData);
6275
6276 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6277
6278 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6279 cNormalInputPairs++;
6280 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6281 {
6282 iTest -= 1;
6283 continue;
6284 }
6285
6286 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6287 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6288 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6289 for (uint8_t iFz = 0; iFz < 2; iFz++)
6290 {
6291 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6292 | (iRounding << X86_MXCSR_RC_SHIFT)
6293 | (iDaz ? X86_MXCSR_DAZ : 0)
6294 | (iFz ? X86_MXCSR_FZ : 0)
6295 | X86_MXCSR_XCPT_MASK;
6296 uint32_t fMxcsrM; int64_t i64OutM;
6297 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r32ValIn.u);
6298 TestData.fMxcsrIn = State.MXCSR;
6299 TestData.fMxcsrOut = fMxcsrM;
6300 TestData.i64ValOut = i64OutM;
6301 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6302
6303 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6304 uint32_t fMxcsrU; int64_t i64OutU;
6305 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r32ValIn.u);
6306 TestData.fMxcsrIn = State.MXCSR;
6307 TestData.fMxcsrOut = fMxcsrU;
6308 TestData.i64ValOut = i64OutU;
6309 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6310
6311 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6312 if (fXcpt)
6313 {
6314 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6315 uint32_t fMxcsr1; int64_t i64Out1;
6316 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r32ValIn.u);
6317 TestData.fMxcsrIn = State.MXCSR;
6318 TestData.fMxcsrOut = fMxcsr1;
6319 TestData.i64ValOut = i64Out1;
6320 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6321
6322 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6323 {
6324 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6325 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6326 uint32_t fMxcsr2; int64_t i64Out2;
6327 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r32ValIn.u);
6328 TestData.fMxcsrIn = State.MXCSR;
6329 TestData.fMxcsrOut = fMxcsr2;
6330 TestData.i64ValOut = i64Out2;
6331 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6332 }
6333 if (!RT_IS_POWER_OF_TWO(fXcpt))
6334 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6335 if (fUnmasked & fXcpt)
6336 {
6337 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6338 uint32_t fMxcsr3; int64_t i64Out3;
6339 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r32ValIn.u);
6340 TestData.fMxcsrIn = State.MXCSR;
6341 TestData.fMxcsrOut = fMxcsr3;
6342 TestData.i64ValOut = i64Out3;
6343 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6344 }
6345 }
6346 }
6347 }
6348 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6349 }
6350
6351 return RTEXITCODE_SUCCESS;
6352}
6353#endif
6354
6355
6356static void SseBinaryI64R32Test(void)
6357{
6358 X86FXSTATE State;
6359 RT_ZERO(State);
6360 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6361 {
6362 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R32[iFn]))
6363 continue;
6364
6365 uint32_t const cTests = *g_aSseBinaryI64R32[iFn].pcTests;
6366 SSE_BINARY_I64_R32_TEST_T const * const paTests = g_aSseBinaryI64R32[iFn].paTests;
6367 PFNIEMAIMPLSSEF2I64U32 pfn = g_aSseBinaryI64R32[iFn].pfn;
6368 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI64R32[iFn]);
6369 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6370 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6371 {
6372 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I64_R32_TEST_T); iTest++)
6373 {
6374 uint32_t fMxcsr = 0;
6375 int64_t i64Dst = 0;
6376
6377 State.MXCSR = paTests[iTest].fMxcsrIn;
6378 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r32ValIn.u);
6379 if ( fMxcsr != paTests[iTest].fMxcsrOut
6380 || i64Dst != paTests[iTest].i64ValOut)
6381 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6382 "%s -> mxcsr=%#08x %RI64\n"
6383 "%s expected %#08x %RI64%s%s (%s)\n",
6384 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6385 FormatR32(&paTests[iTest].r32ValIn),
6386 iVar ? " " : "", fMxcsr, i64Dst,
6387 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6388 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6389 i64Dst != paTests[iTest].i64ValOut
6390 ? " - val" : "",
6391 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6392 }
6393 }
6394 }
6395}
6396
6397
6398/*
6399 * SSE operations converting single signed double-word integers to double-precision floating point values (probably only cvtsi2sd).
6400 */
6401TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I32_T, SSE_BINARY_R64_I32_TEST_T, PFNIEMAIMPLSSEF2R64I32);
6402
6403static SSE_BINARY_R64_I32_T g_aSseBinaryR64I32[] =
6404{
6405 ENTRY_BIN(cvtsi2sd_r64_i32)
6406};
6407
6408#ifdef TSTIEMAIMPL_WITH_GENERATOR
6409static RTEXITCODE SseBinaryR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
6410{
6411 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6412
6413 static int32_t const s_aSpecials[] =
6414 {
6415 INT32_MIN,
6416 INT32_MAX,
6417 /** @todo More specials. */
6418 };
6419
6420 X86FXSTATE State;
6421 RT_ZERO(State);
6422 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6423 {
6424 PFNIEMAIMPLSSEF2R64I32 const pfn = g_aSseBinaryR64I32[iFn].pfnNative ? g_aSseBinaryR64I32[iFn].pfnNative : g_aSseBinaryR64I32[iFn].pfn;
6425
6426 IEMBINARYOUTPUT BinOut;
6427 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR64I32[iFn].pszName), RTEXITCODE_FAILURE);
6428
6429 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6430 {
6431 SSE_BINARY_R64_I32_TEST_T TestData; RT_ZERO(TestData);
6432
6433 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6434
6435 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6436 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6437 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6438 for (uint8_t iFz = 0; iFz < 2; iFz++)
6439 {
6440 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6441 | (iRounding << X86_MXCSR_RC_SHIFT)
6442 | (iDaz ? X86_MXCSR_DAZ : 0)
6443 | (iFz ? X86_MXCSR_FZ : 0)
6444 | X86_MXCSR_XCPT_MASK;
6445 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6446 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i32ValIn);
6447 TestData.fMxcsrIn = State.MXCSR;
6448 TestData.fMxcsrOut = fMxcsrM;
6449 TestData.r64ValOut = r64OutM;
6450 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6451
6452 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6453 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6454 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i32ValIn);
6455 TestData.fMxcsrIn = State.MXCSR;
6456 TestData.fMxcsrOut = fMxcsrU;
6457 TestData.r64ValOut = r64OutU;
6458 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6459
6460 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6461 if (fXcpt)
6462 {
6463 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6464 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6465 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i32ValIn);
6466 TestData.fMxcsrIn = State.MXCSR;
6467 TestData.fMxcsrOut = fMxcsr1;
6468 TestData.r64ValOut = r64Out1;
6469 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6470
6471 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6472 {
6473 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6474 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6475 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6476 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i32ValIn);
6477 TestData.fMxcsrIn = State.MXCSR;
6478 TestData.fMxcsrOut = fMxcsr2;
6479 TestData.r64ValOut = r64Out2;
6480 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6481 }
6482 if (!RT_IS_POWER_OF_TWO(fXcpt))
6483 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6484 if (fUnmasked & fXcpt)
6485 {
6486 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6487 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6488 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i32ValIn);
6489 TestData.fMxcsrIn = State.MXCSR;
6490 TestData.fMxcsrOut = fMxcsr3;
6491 TestData.r64ValOut = r64Out3;
6492 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6493 }
6494 }
6495 }
6496 }
6497 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6498 }
6499
6500 return RTEXITCODE_SUCCESS;
6501}
6502#endif
6503
6504
6505static void SseBinaryR64I32Test(void)
6506{
6507 X86FXSTATE State;
6508 RT_ZERO(State);
6509 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6510 {
6511 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I32[iFn]))
6512 continue;
6513
6514 uint32_t const cTests = *g_aSseBinaryR64I32[iFn].pcTests;
6515 SSE_BINARY_R64_I32_TEST_T const * const paTests = g_aSseBinaryR64I32[iFn].paTests;
6516 PFNIEMAIMPLSSEF2R64I32 pfn = g_aSseBinaryR64I32[iFn].pfn;
6517 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I32[iFn]);
6518 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6519 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6520 {
6521 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R64_I32_TEST_T); iTest++)
6522 {
6523 uint32_t fMxcsr = 0;
6524 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6525
6526 State.MXCSR = paTests[iTest].fMxcsrIn;
6527 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i32ValIn);
6528 if ( fMxcsr != paTests[iTest].fMxcsrOut
6529 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6530 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6531 "%s -> mxcsr=%#08x %s\n"
6532 "%s expected %#08x %s%s%s (%s)\n",
6533 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6534 &paTests[iTest].i32ValIn,
6535 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6536 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6537 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6538 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6539 ? " - val" : "",
6540 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6541 }
6542 }
6543 }
6544}
6545
6546
6547/*
6548 * SSE operations converting single signed quad-word integers to double-precision floating point values (probably only cvtsi2sd).
6549 */
6550TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I64_T, SSE_BINARY_R64_I64_TEST_T, PFNIEMAIMPLSSEF2R64I64);
6551
6552static SSE_BINARY_R64_I64_T g_aSseBinaryR64I64[] =
6553{
6554 ENTRY_BIN(cvtsi2sd_r64_i64),
6555};
6556
6557#ifdef TSTIEMAIMPL_WITH_GENERATOR
6558static RTEXITCODE SseBinaryR64I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6559{
6560 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6561
6562 static int64_t const s_aSpecials[] =
6563 {
6564 INT64_MIN,
6565 INT64_MAX
6566 /** @todo More specials. */
6567 };
6568
6569 X86FXSTATE State;
6570 RT_ZERO(State);
6571 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6572 {
6573 PFNIEMAIMPLSSEF2R64I64 const pfn = g_aSseBinaryR64I64[iFn].pfnNative ? g_aSseBinaryR64I64[iFn].pfnNative : g_aSseBinaryR64I64[iFn].pfn;
6574
6575 IEMBINARYOUTPUT BinOut;
6576 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR64I64[iFn].pszName), RTEXITCODE_FAILURE);
6577
6578 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6579 {
6580 SSE_BINARY_R64_I64_TEST_T TestData; RT_ZERO(TestData);
6581
6582 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6583
6584 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6585 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6586 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6587 for (uint8_t iFz = 0; iFz < 2; iFz++)
6588 {
6589 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6590 | (iRounding << X86_MXCSR_RC_SHIFT)
6591 | (iDaz ? X86_MXCSR_DAZ : 0)
6592 | (iFz ? X86_MXCSR_FZ : 0)
6593 | X86_MXCSR_XCPT_MASK;
6594 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6595 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i64ValIn);
6596 TestData.fMxcsrIn = State.MXCSR;
6597 TestData.fMxcsrOut = fMxcsrM;
6598 TestData.r64ValOut = r64OutM;
6599 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6600
6601 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6602 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6603 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i64ValIn);
6604 TestData.fMxcsrIn = State.MXCSR;
6605 TestData.fMxcsrOut = fMxcsrU;
6606 TestData.r64ValOut = r64OutU;
6607 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6608
6609 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6610 if (fXcpt)
6611 {
6612 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6613 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6614 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i64ValIn);
6615 TestData.fMxcsrIn = State.MXCSR;
6616 TestData.fMxcsrOut = fMxcsr1;
6617 TestData.r64ValOut = r64Out1;
6618 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6619
6620 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6621 {
6622 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6623 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6624 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6625 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i64ValIn);
6626 TestData.fMxcsrIn = State.MXCSR;
6627 TestData.fMxcsrOut = fMxcsr2;
6628 TestData.r64ValOut = r64Out2;
6629 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6630 }
6631 if (!RT_IS_POWER_OF_TWO(fXcpt))
6632 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6633 if (fUnmasked & fXcpt)
6634 {
6635 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6636 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6637 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i64ValIn);
6638 TestData.fMxcsrIn = State.MXCSR;
6639 TestData.fMxcsrOut = fMxcsr3;
6640 TestData.r64ValOut = r64Out3;
6641 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6642 }
6643 }
6644 }
6645 }
6646 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6647 }
6648
6649 return RTEXITCODE_SUCCESS;
6650}
6651#endif
6652
6653
6654static void SseBinaryR64I64Test(void)
6655{
6656 X86FXSTATE State;
6657 RT_ZERO(State);
6658 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6659 {
6660 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I64[iFn]))
6661 continue;
6662
6663 uint32_t const cTests = *g_aSseBinaryR64I64[iFn].pcTests;
6664 SSE_BINARY_R64_I64_TEST_T const * const paTests = g_aSseBinaryR64I64[iFn].paTests;
6665 PFNIEMAIMPLSSEF2R64I64 pfn = g_aSseBinaryR64I64[iFn].pfn;
6666 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I64[iFn]);
6667 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6668 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6669 {
6670 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R64_I64_TEST_T); iTest++)
6671 {
6672 uint32_t fMxcsr = 0;
6673 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6674
6675 State.MXCSR = paTests[iTest].fMxcsrIn;
6676 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i64ValIn);
6677 if ( fMxcsr != paTests[iTest].fMxcsrOut
6678 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6679 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6680 "%s -> mxcsr=%#08x %s\n"
6681 "%s expected %#08x %s%s%s (%s)\n",
6682 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6683 &paTests[iTest].i64ValIn,
6684 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6685 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6686 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6687 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6688 ? " - val" : "",
6689 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6690 }
6691 }
6692 }
6693}
6694
6695
6696/*
6697 * SSE operations converting single signed double-word integers to single-precision floating point values (probably only cvtsi2ss).
6698 */
6699TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I32_T, SSE_BINARY_R32_I32_TEST_T, PFNIEMAIMPLSSEF2R32I32);
6700
6701static SSE_BINARY_R32_I32_T g_aSseBinaryR32I32[] =
6702{
6703 ENTRY_BIN(cvtsi2ss_r32_i32),
6704};
6705
6706#ifdef TSTIEMAIMPL_WITH_GENERATOR
6707static RTEXITCODE SseBinaryR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
6708{
6709 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6710
6711 static int32_t const s_aSpecials[] =
6712 {
6713 INT32_MIN,
6714 INT32_MAX,
6715 /** @todo More specials. */
6716 };
6717
6718 X86FXSTATE State;
6719 RT_ZERO(State);
6720 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6721 {
6722 PFNIEMAIMPLSSEF2R32I32 const pfn = g_aSseBinaryR32I32[iFn].pfnNative ? g_aSseBinaryR32I32[iFn].pfnNative : g_aSseBinaryR32I32[iFn].pfn;
6723
6724 IEMBINARYOUTPUT BinOut;
6725 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR32I32[iFn].pszName), RTEXITCODE_FAILURE);
6726
6727 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6728 {
6729 SSE_BINARY_R32_I32_TEST_T TestData; RT_ZERO(TestData);
6730
6731 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6732
6733 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6734 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6735 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6736 for (uint8_t iFz = 0; iFz < 2; iFz++)
6737 {
6738 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6739 | (iRounding << X86_MXCSR_RC_SHIFT)
6740 | (iDaz ? X86_MXCSR_DAZ : 0)
6741 | (iFz ? X86_MXCSR_FZ : 0)
6742 | X86_MXCSR_XCPT_MASK;
6743 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6744 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i32ValIn);
6745 TestData.fMxcsrIn = State.MXCSR;
6746 TestData.fMxcsrOut = fMxcsrM;
6747 TestData.r32ValOut = r32OutM;
6748 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6749
6750 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6751 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6752 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i32ValIn);
6753 TestData.fMxcsrIn = State.MXCSR;
6754 TestData.fMxcsrOut = fMxcsrU;
6755 TestData.r32ValOut = r32OutU;
6756 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6757
6758 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6759 if (fXcpt)
6760 {
6761 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6762 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6763 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i32ValIn);
6764 TestData.fMxcsrIn = State.MXCSR;
6765 TestData.fMxcsrOut = fMxcsr1;
6766 TestData.r32ValOut = r32Out1;
6767 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6768
6769 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6770 {
6771 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6772 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6773 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6774 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i32ValIn);
6775 TestData.fMxcsrIn = State.MXCSR;
6776 TestData.fMxcsrOut = fMxcsr2;
6777 TestData.r32ValOut = r32Out2;
6778 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6779 }
6780 if (!RT_IS_POWER_OF_TWO(fXcpt))
6781 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6782 if (fUnmasked & fXcpt)
6783 {
6784 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6785 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
6786 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i32ValIn);
6787 TestData.fMxcsrIn = State.MXCSR;
6788 TestData.fMxcsrOut = fMxcsr3;
6789 TestData.r32ValOut = r32Out3;
6790 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6791 }
6792 }
6793 }
6794 }
6795 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6796 }
6797
6798 return RTEXITCODE_SUCCESS;
6799}
6800#endif
6801
6802
6803static void SseBinaryR32I32Test(void)
6804{
6805 X86FXSTATE State;
6806 RT_ZERO(State);
6807 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6808 {
6809 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I32[iFn]))
6810 continue;
6811
6812 uint32_t const cTests = *g_aSseBinaryR32I32[iFn].pcTests;
6813 SSE_BINARY_R32_I32_TEST_T const * const paTests = g_aSseBinaryR32I32[iFn].paTests;
6814 PFNIEMAIMPLSSEF2R32I32 pfn = g_aSseBinaryR32I32[iFn].pfn;
6815 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I32[iFn]);
6816 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6817 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6818 {
6819 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R32_I32_TEST_T); iTest++)
6820 {
6821 uint32_t fMxcsr = 0;
6822 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
6823
6824 State.MXCSR = paTests[iTest].fMxcsrIn;
6825 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i32ValIn);
6826 if ( fMxcsr != paTests[iTest].fMxcsrOut
6827 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
6828 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6829 "%s -> mxcsr=%#08x %RI32\n"
6830 "%s expected %#08x %RI32%s%s (%s)\n",
6831 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6832 &paTests[iTest].i32ValIn,
6833 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
6834 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
6835 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6836 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
6837 ? " - val" : "",
6838 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6839 }
6840 }
6841 }
6842}
6843
6844
6845/*
6846 * SSE operations converting single signed quad-word integers to single-precision floating point values (probably only cvtsi2ss).
6847 */
6848TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I64_T, SSE_BINARY_R32_I64_TEST_T, PFNIEMAIMPLSSEF2R32I64);
6849
6850static SSE_BINARY_R32_I64_T g_aSseBinaryR32I64[] =
6851{
6852 ENTRY_BIN(cvtsi2ss_r32_i64),
6853};
6854
6855#ifdef TSTIEMAIMPL_WITH_GENERATOR
6856static RTEXITCODE SseBinaryR32I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6857{
6858 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6859
6860 static int64_t const s_aSpecials[] =
6861 {
6862 INT64_MIN,
6863 INT64_MAX
6864 /** @todo More specials. */
6865 };
6866
6867 X86FXSTATE State;
6868 RT_ZERO(State);
6869 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
6870 {
6871 PFNIEMAIMPLSSEF2R32I64 const pfn = g_aSseBinaryR32I64[iFn].pfnNative ? g_aSseBinaryR32I64[iFn].pfnNative : g_aSseBinaryR32I64[iFn].pfn;
6872
6873 IEMBINARYOUTPUT BinOut;
6874 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR32I64[iFn].pszName), RTEXITCODE_FAILURE);
6875
6876 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6877 {
6878 SSE_BINARY_R32_I64_TEST_T TestData; RT_ZERO(TestData);
6879
6880 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6881
6882 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6883 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6884 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6885 for (uint8_t iFz = 0; iFz < 2; iFz++)
6886 {
6887 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6888 | (iRounding << X86_MXCSR_RC_SHIFT)
6889 | (iDaz ? X86_MXCSR_DAZ : 0)
6890 | (iFz ? X86_MXCSR_FZ : 0)
6891 | X86_MXCSR_XCPT_MASK;
6892 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6893 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i64ValIn);
6894 TestData.fMxcsrIn = State.MXCSR;
6895 TestData.fMxcsrOut = fMxcsrM;
6896 TestData.r32ValOut = r32OutM;
6897 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6898
6899 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6900 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6901 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i64ValIn);
6902 TestData.fMxcsrIn = State.MXCSR;
6903 TestData.fMxcsrOut = fMxcsrU;
6904 TestData.r32ValOut = r32OutU;
6905 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6906
6907 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6908 if (fXcpt)
6909 {
6910 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6911 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6912 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i64ValIn);
6913 TestData.fMxcsrIn = State.MXCSR;
6914 TestData.fMxcsrOut = fMxcsr1;
6915 TestData.r32ValOut = r32Out1;
6916 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6917
6918 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6919 {
6920 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6921 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6922 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6923 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i64ValIn);
6924 TestData.fMxcsrIn = State.MXCSR;
6925 TestData.fMxcsrOut = fMxcsr2;
6926 TestData.r32ValOut = r32Out2;
6927 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6928 }
6929 if (!RT_IS_POWER_OF_TWO(fXcpt))
6930 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6931 if (fUnmasked & fXcpt)
6932 {
6933 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6934 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
6935 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i64ValIn);
6936 TestData.fMxcsrIn = State.MXCSR;
6937 TestData.fMxcsrOut = fMxcsr3;
6938 TestData.r32ValOut = r32Out3;
6939 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6940 }
6941 }
6942 }
6943 }
6944 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6945 }
6946
6947 return RTEXITCODE_SUCCESS;
6948}
6949#endif
6950
6951
6952static void SseBinaryR32I64Test(void)
6953{
6954 X86FXSTATE State;
6955 RT_ZERO(State);
6956 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
6957 {
6958 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I64[iFn]))
6959 continue;
6960
6961 uint32_t const cTests = *g_aSseBinaryR32I64[iFn].pcTests;
6962 SSE_BINARY_R32_I64_TEST_T const * const paTests = g_aSseBinaryR32I64[iFn].paTests;
6963 PFNIEMAIMPLSSEF2R32I64 pfn = g_aSseBinaryR32I64[iFn].pfn;
6964 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I64[iFn]);
6965 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6966 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6967 {
6968 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R32_I64_TEST_T); iTest++)
6969 {
6970 uint32_t fMxcsr = 0;
6971 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
6972
6973 State.MXCSR = paTests[iTest].fMxcsrIn;
6974 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i64ValIn);
6975 if ( fMxcsr != paTests[iTest].fMxcsrOut
6976 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
6977 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6978 "%s -> mxcsr=%#08x %RI32\n"
6979 "%s expected %#08x %RI32%s%s (%s)\n",
6980 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6981 &paTests[iTest].i64ValIn,
6982 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
6983 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
6984 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6985 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
6986 ? " - val" : "",
6987 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6988 }
6989 }
6990 }
6991}
6992
6993
6994/*
6995 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
6996 */
6997TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R32_R32_T, SSE_COMPARE_EFL_R32_R32_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
6998
6999static SSE_COMPARE_EFL_R32_R32_T g_aSseCompareEflR32R32[] =
7000{
7001 ENTRY_BIN(ucomiss_u128),
7002 ENTRY_BIN(comiss_u128),
7003 ENTRY_BIN_AVX(vucomiss_u128),
7004 ENTRY_BIN_AVX(vcomiss_u128),
7005};
7006
7007#ifdef TSTIEMAIMPL_WITH_GENERATOR
7008static RTEXITCODE SseCompareEflR32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
7009{
7010 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7011
7012 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7013 {
7014 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7015 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7016 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7017 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7018 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7019 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7020 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7021 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7022 /** @todo More specials. */
7023 };
7024
7025 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7026 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7027 {
7028 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR32R32[iFn].pfnNative ? g_aSseCompareEflR32R32[iFn].pfnNative : g_aSseCompareEflR32R32[iFn].pfn;
7029
7030 IEMBINARYOUTPUT BinOut;
7031 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseCompareEflR32R32[iFn].pszName), RTEXITCODE_FAILURE);
7032
7033 uint32_t cNormalInputPairs = 0;
7034 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7035 {
7036 SSE_COMPARE_EFL_R32_R32_TEST_T TestData; RT_ZERO(TestData);
7037 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7038 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7039
7040 TestData.r32ValIn1 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7041 TestData.r32ValIn2 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7042
7043 ValIn1.ar32[0] = TestData.r32ValIn1;
7044 ValIn2.ar32[0] = TestData.r32ValIn2;
7045
7046 if ( RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn1)
7047 && RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn2))
7048 cNormalInputPairs++;
7049 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7050 {
7051 iTest -= 1;
7052 continue;
7053 }
7054
7055 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7056 uint32_t const fEFlags = RandEFlags();
7057 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7058 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7059 for (uint8_t iFz = 0; iFz < 2; iFz++)
7060 {
7061 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7062 | (iRounding << X86_MXCSR_RC_SHIFT)
7063 | (iDaz ? X86_MXCSR_DAZ : 0)
7064 | (iFz ? X86_MXCSR_FZ : 0)
7065 | X86_MXCSR_XCPT_MASK;
7066 uint32_t fMxcsrM = fMxcsrIn;
7067 uint32_t fEFlagsM = fEFlags;
7068 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
7069 TestData.fMxcsrIn = fMxcsrIn;
7070 TestData.fMxcsrOut = fMxcsrM;
7071 TestData.fEflIn = fEFlags;
7072 TestData.fEflOut = fEFlagsM;
7073 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7074
7075 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7076 uint32_t fMxcsrU = fMxcsrIn;
7077 uint32_t fEFlagsU = fEFlags;
7078 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
7079 TestData.fMxcsrIn = fMxcsrIn;
7080 TestData.fMxcsrOut = fMxcsrU;
7081 TestData.fEflIn = fEFlags;
7082 TestData.fEflOut = fEFlagsU;
7083 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7084
7085 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7086 if (fXcpt)
7087 {
7088 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7089 uint32_t fMxcsr1 = fMxcsrIn;
7090 uint32_t fEFlags1 = fEFlags;
7091 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
7092 TestData.fMxcsrIn = fMxcsrIn;
7093 TestData.fMxcsrOut = fMxcsr1;
7094 TestData.fEflIn = fEFlags;
7095 TestData.fEflOut = fEFlags1;
7096 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7097
7098 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7099 {
7100 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7101 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7102 uint32_t fMxcsr2 = fMxcsrIn;
7103 uint32_t fEFlags2 = fEFlags;
7104 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
7105 TestData.fMxcsrIn = fMxcsrIn;
7106 TestData.fMxcsrOut = fMxcsr2;
7107 TestData.fEflIn = fEFlags;
7108 TestData.fEflOut = fEFlags2;
7109 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7110 }
7111 if (!RT_IS_POWER_OF_TWO(fXcpt))
7112 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7113 if (fUnmasked & fXcpt)
7114 {
7115 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7116 uint32_t fMxcsr3 = fMxcsrIn;
7117 uint32_t fEFlags3 = fEFlags;
7118 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
7119 TestData.fMxcsrIn = fMxcsrIn;
7120 TestData.fMxcsrOut = fMxcsr3;
7121 TestData.fEflIn = fEFlags;
7122 TestData.fEflOut = fEFlags3;
7123 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7124 }
7125 }
7126 }
7127 }
7128 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7129 }
7130
7131 return RTEXITCODE_SUCCESS;
7132}
7133#endif
7134
7135static void SseCompareEflR32R32Test(void)
7136{
7137 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7138 {
7139 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR32R32[iFn]))
7140 continue;
7141
7142 uint32_t const cTests = *g_aSseCompareEflR32R32[iFn].pcTests;
7143 SSE_COMPARE_EFL_R32_R32_TEST_T const * const paTests = g_aSseCompareEflR32R32[iFn].paTests;
7144 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR32R32[iFn].pfn;
7145 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR32R32[iFn]);
7146 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7147 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7148 {
7149 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_EFL_R32_R32_TEST_T); iTest++)
7150 {
7151 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7152 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7153
7154 ValIn1.ar32[0] = paTests[iTest].r32ValIn1;
7155 ValIn2.ar32[0] = paTests[iTest].r32ValIn2;
7156 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7157 uint32_t fEFlags = paTests[iTest].fEflIn;
7158 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
7159 if ( fMxcsr != paTests[iTest].fMxcsrOut
7160 || fEFlags != paTests[iTest].fEflOut)
7161 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7162 "%s -> mxcsr=%#08x %#08x\n"
7163 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7164 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7165 FormatR32(&paTests[iTest].r32ValIn1), FormatR32(&paTests[iTest].r32ValIn2),
7166 iVar ? " " : "", fMxcsr, fEFlags,
7167 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7168 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7169 FormatMxcsr(paTests[iTest].fMxcsrIn),
7170 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7171 }
7172 }
7173 }
7174}
7175
7176
7177/*
7178 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
7179 */
7180TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R64_R64_T, SSE_COMPARE_EFL_R64_R64_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
7181
7182static SSE_COMPARE_EFL_R64_R64_T g_aSseCompareEflR64R64[] =
7183{
7184 ENTRY_BIN(ucomisd_u128),
7185 ENTRY_BIN(comisd_u128),
7186 ENTRY_BIN_AVX(vucomisd_u128),
7187 ENTRY_BIN_AVX(vcomisd_u128)
7188};
7189
7190#ifdef TSTIEMAIMPL_WITH_GENERATOR
7191static RTEXITCODE SseCompareEflR64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
7192{
7193 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7194
7195 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7196 {
7197 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7198 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7199 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7200 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7201 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7202 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7203 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7204 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7205 /** @todo More specials. */
7206 };
7207
7208 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7209 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7210 {
7211 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR64R64[iFn].pfnNative ? g_aSseCompareEflR64R64[iFn].pfnNative : g_aSseCompareEflR64R64[iFn].pfn;
7212
7213 IEMBINARYOUTPUT BinOut;
7214 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseCompareEflR64R64[iFn].pszName), RTEXITCODE_FAILURE);
7215
7216 uint32_t cNormalInputPairs = 0;
7217 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7218 {
7219 SSE_COMPARE_EFL_R64_R64_TEST_T TestData; RT_ZERO(TestData);
7220 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7221 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7222
7223 TestData.r64ValIn1 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7224 TestData.r64ValIn2 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7225
7226 ValIn1.ar64[0] = TestData.r64ValIn1;
7227 ValIn2.ar64[0] = TestData.r64ValIn2;
7228
7229 if ( RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn1)
7230 && RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn2))
7231 cNormalInputPairs++;
7232 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7233 {
7234 iTest -= 1;
7235 continue;
7236 }
7237
7238 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7239 uint32_t const fEFlags = RandEFlags();
7240 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7241 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7242 for (uint8_t iFz = 0; iFz < 2; iFz++)
7243 {
7244 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7245 | (iRounding << X86_MXCSR_RC_SHIFT)
7246 | (iDaz ? X86_MXCSR_DAZ : 0)
7247 | (iFz ? X86_MXCSR_FZ : 0)
7248 | X86_MXCSR_XCPT_MASK;
7249 uint32_t fMxcsrM = fMxcsrIn;
7250 uint32_t fEFlagsM = fEFlags;
7251 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
7252 TestData.fMxcsrIn = fMxcsrIn;
7253 TestData.fMxcsrOut = fMxcsrM;
7254 TestData.fEflIn = fEFlags;
7255 TestData.fEflOut = fEFlagsM;
7256 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7257
7258 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7259 uint32_t fMxcsrU = fMxcsrIn;
7260 uint32_t fEFlagsU = fEFlags;
7261 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
7262 TestData.fMxcsrIn = fMxcsrIn;
7263 TestData.fMxcsrOut = fMxcsrU;
7264 TestData.fEflIn = fEFlags;
7265 TestData.fEflOut = fEFlagsU;
7266 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7267
7268 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7269 if (fXcpt)
7270 {
7271 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7272 uint32_t fMxcsr1 = fMxcsrIn;
7273 uint32_t fEFlags1 = fEFlags;
7274 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
7275 TestData.fMxcsrIn = fMxcsrIn;
7276 TestData.fMxcsrOut = fMxcsr1;
7277 TestData.fEflIn = fEFlags;
7278 TestData.fEflOut = fEFlags1;
7279 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7280
7281 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7282 {
7283 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7284 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7285 uint32_t fMxcsr2 = fMxcsrIn;
7286 uint32_t fEFlags2 = fEFlags;
7287 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
7288 TestData.fMxcsrIn = fMxcsrIn;
7289 TestData.fMxcsrOut = fMxcsr2;
7290 TestData.fEflIn = fEFlags;
7291 TestData.fEflOut = fEFlags2;
7292 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7293 }
7294 if (!RT_IS_POWER_OF_TWO(fXcpt))
7295 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7296 if (fUnmasked & fXcpt)
7297 {
7298 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7299 uint32_t fMxcsr3 = fMxcsrIn;
7300 uint32_t fEFlags3 = fEFlags;
7301 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
7302 TestData.fMxcsrIn = fMxcsrIn;
7303 TestData.fMxcsrOut = fMxcsr3;
7304 TestData.fEflIn = fEFlags;
7305 TestData.fEflOut = fEFlags3;
7306 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7307 }
7308 }
7309 }
7310 }
7311 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7312 }
7313
7314 return RTEXITCODE_SUCCESS;
7315}
7316#endif
7317
7318static void SseCompareEflR64R64Test(void)
7319{
7320 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7321 {
7322 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR64R64[iFn]))
7323 continue;
7324
7325 uint32_t const cTests = *g_aSseCompareEflR64R64[iFn].pcTests;
7326 SSE_COMPARE_EFL_R64_R64_TEST_T const * const paTests = g_aSseCompareEflR64R64[iFn].paTests;
7327 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR64R64[iFn].pfn;
7328 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR64R64[iFn]);
7329 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7330 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7331 {
7332 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_EFL_R64_R64_TEST_T); iTest++)
7333 {
7334 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7335 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7336
7337 ValIn1.ar64[0] = paTests[iTest].r64ValIn1;
7338 ValIn2.ar64[0] = paTests[iTest].r64ValIn2;
7339 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7340 uint32_t fEFlags = paTests[iTest].fEflIn;
7341 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
7342 if ( fMxcsr != paTests[iTest].fMxcsrOut
7343 || fEFlags != paTests[iTest].fEflOut)
7344 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7345 "%s -> mxcsr=%#08x %#08x\n"
7346 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7347 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7348 FormatR64(&paTests[iTest].r64ValIn1), FormatR64(&paTests[iTest].r64ValIn2),
7349 iVar ? " " : "", fMxcsr, fEFlags,
7350 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7351 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7352 FormatMxcsr(paTests[iTest].fMxcsrIn),
7353 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7354 }
7355 }
7356 }
7357}
7358
7359
7360/*
7361 * Compare SSE operations on packed and single single-precision floating point values - outputting a mask.
7362 */
7363/** Maximum immediate to try to keep the testdata size under control (at least a little bit)- */
7364#define SSE_COMPARE_F2_XMM_IMM8_MAX 0x1f
7365
7366TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_F2_XMM_IMM8_T, SSE_COMPARE_F2_XMM_IMM8_TEST_T, PFNIEMAIMPLMXCSRF2XMMIMM8);
7367
7368static SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR32Imm8[] =
7369{
7370 ENTRY_BIN(cmpps_u128),
7371 ENTRY_BIN(cmpss_u128)
7372};
7373
7374#ifdef TSTIEMAIMPL_WITH_GENERATOR
7375static RTEXITCODE SseCompareF2XmmR32Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
7376{
7377 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7378
7379 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7380 {
7381 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7382 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7383 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7384 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7385 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7386 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7387 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7388 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7389 /** @todo More specials. */
7390 };
7391
7392 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7393 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7394 {
7395 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR32Imm8[iFn].pfnNative : g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7396
7397 IEMBINARYOUTPUT BinOut;
7398 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseCompareF2XmmR32Imm8[iFn].pszName), RTEXITCODE_FAILURE);
7399
7400 uint32_t cNormalInputPairs = 0;
7401 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7402 {
7403 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7404
7405 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7406 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7407 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7408 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7409
7410 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7411 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7412 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7413 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7414
7415 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
7416 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
7417 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
7418 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
7419 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
7420 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
7421 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
7422 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
7423 cNormalInputPairs++;
7424 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7425 {
7426 iTest -= 1;
7427 continue;
7428 }
7429
7430 IEMMEDIAF2XMMSRC Src;
7431 Src.uSrc1 = TestData.InVal1;
7432 Src.uSrc2 = TestData.InVal2;
7433 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7434 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7435 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7436 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7437 for (uint8_t iFz = 0; iFz < 2; iFz++)
7438 {
7439 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7440 | (iRounding << X86_MXCSR_RC_SHIFT)
7441 | (iDaz ? X86_MXCSR_DAZ : 0)
7442 | (iFz ? X86_MXCSR_FZ : 0)
7443 | X86_MXCSR_XCPT_MASK;
7444 uint32_t fMxcsrM = fMxcsrIn;
7445 X86XMMREG ResM;
7446 pfn(&fMxcsrM, &ResM, &Src, bImm);
7447 TestData.fMxcsrIn = fMxcsrIn;
7448 TestData.fMxcsrOut = fMxcsrM;
7449 TestData.bImm = bImm;
7450 TestData.OutVal = ResM;
7451 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7452
7453 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7454 uint32_t fMxcsrU = fMxcsrIn;
7455 X86XMMREG ResU;
7456 pfn(&fMxcsrU, &ResU, &Src, bImm);
7457 TestData.fMxcsrIn = fMxcsrIn;
7458 TestData.fMxcsrOut = fMxcsrU;
7459 TestData.bImm = bImm;
7460 TestData.OutVal = ResU;
7461 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7462
7463 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7464 if (fXcpt)
7465 {
7466 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7467 uint32_t fMxcsr1 = fMxcsrIn;
7468 X86XMMREG Res1;
7469 pfn(&fMxcsr1, &Res1, &Src, bImm);
7470 TestData.fMxcsrIn = fMxcsrIn;
7471 TestData.fMxcsrOut = fMxcsr1;
7472 TestData.bImm = bImm;
7473 TestData.OutVal = Res1;
7474 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7475
7476 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7477 {
7478 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7479 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7480 uint32_t fMxcsr2 = fMxcsrIn;
7481 X86XMMREG Res2;
7482 pfn(&fMxcsr2, &Res2, &Src, bImm);
7483 TestData.fMxcsrIn = fMxcsrIn;
7484 TestData.fMxcsrOut = fMxcsr2;
7485 TestData.bImm = bImm;
7486 TestData.OutVal = Res2;
7487 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7488 }
7489 if (!RT_IS_POWER_OF_TWO(fXcpt))
7490 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7491 if (fUnmasked & fXcpt)
7492 {
7493 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7494 uint32_t fMxcsr3 = fMxcsrIn;
7495 X86XMMREG Res3;
7496 pfn(&fMxcsr3, &Res3, &Src, bImm);
7497 TestData.fMxcsrIn = fMxcsrIn;
7498 TestData.fMxcsrOut = fMxcsr3;
7499 TestData.bImm = bImm;
7500 TestData.OutVal = Res3;
7501 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7502 }
7503 }
7504 }
7505 }
7506 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7507 }
7508
7509 return RTEXITCODE_SUCCESS;
7510}
7511#endif
7512
7513static void SseCompareF2XmmR32Imm8Test(void)
7514{
7515 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7516 {
7517 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF2XmmR32Imm8[iFn]))
7518 continue;
7519
7520 uint32_t const cTests = *g_aSseCompareF2XmmR32Imm8[iFn].pcTests;
7521 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR32Imm8[iFn].paTests;
7522 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7523 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR32Imm8[iFn]);
7524 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7525 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7526 {
7527 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_F2_XMM_IMM8_TEST_T); iTest++)
7528 {
7529 IEMMEDIAF2XMMSRC Src;
7530 X86XMMREG ValOut;
7531
7532 Src.uSrc1 = paTests[iTest].InVal1;
7533 Src.uSrc2 = paTests[iTest].InVal2;
7534 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7535 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7536 if ( fMxcsr != paTests[iTest].fMxcsrOut
7537 || ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7538 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7539 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7540 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7541 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s imm8=%x\n"
7542 "%s -> mxcsr=%#08x %RX32'%RX32'%RX32'%RX32\n"
7543 "%s expected %#08x %RX32'%RX32'%RX32'%RX32%s%s (%s)\n",
7544 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7545 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
7546 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
7547 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
7548 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
7549 paTests[iTest].bImm,
7550 iVar ? " " : "", fMxcsr, ValOut.au32[0], ValOut.au32[1], ValOut.au32[2], ValOut.au32[3],
7551 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7552 paTests[iTest].OutVal.au32[0], paTests[iTest].OutVal.au32[1],
7553 paTests[iTest].OutVal.au32[2], paTests[iTest].OutVal.au32[3],
7554 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7555 ( ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7556 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7557 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7558 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7559 ? " - val" : "",
7560 FormatMxcsr(paTests[iTest].fMxcsrIn));
7561 }
7562 }
7563 }
7564}
7565
7566
7567/*
7568 * Compare SSE operations on packed and single double-precision floating point values - outputting a mask.
7569 */
7570static SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR64Imm8[] =
7571{
7572 ENTRY_BIN(cmppd_u128),
7573 ENTRY_BIN(cmpsd_u128)
7574};
7575
7576#ifdef TSTIEMAIMPL_WITH_GENERATOR
7577static RTEXITCODE SseCompareF2XmmR64Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
7578{
7579 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7580
7581 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7582 {
7583 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7584 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7585 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7586 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7587 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7588 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7589 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7590 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7591 /** @todo More specials. */
7592 };
7593
7594 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7595 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7596 {
7597 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR64Imm8[iFn].pfnNative : g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7598
7599 IEMBINARYOUTPUT BinOut;
7600 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseCompareF2XmmR64Imm8[iFn].pszName), RTEXITCODE_FAILURE);
7601
7602 uint32_t cNormalInputPairs = 0;
7603 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7604 {
7605 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7606
7607 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7608 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7609
7610 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7611 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7612
7613 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0])
7614 && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
7615 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0])
7616 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
7617 cNormalInputPairs++;
7618 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7619 {
7620 iTest -= 1;
7621 continue;
7622 }
7623
7624 IEMMEDIAF2XMMSRC Src;
7625 Src.uSrc1 = TestData.InVal1;
7626 Src.uSrc2 = TestData.InVal2;
7627 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7628 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7629 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7630 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7631 for (uint8_t iFz = 0; iFz < 2; iFz++)
7632 {
7633 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7634 | (iRounding << X86_MXCSR_RC_SHIFT)
7635 | (iDaz ? X86_MXCSR_DAZ : 0)
7636 | (iFz ? X86_MXCSR_FZ : 0)
7637 | X86_MXCSR_XCPT_MASK;
7638 uint32_t fMxcsrM = fMxcsrIn;
7639 X86XMMREG ResM;
7640 pfn(&fMxcsrM, &ResM, &Src, bImm);
7641 TestData.fMxcsrIn = fMxcsrIn;
7642 TestData.fMxcsrOut = fMxcsrM;
7643 TestData.bImm = bImm;
7644 TestData.OutVal = ResM;
7645 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7646
7647 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7648 uint32_t fMxcsrU = fMxcsrIn;
7649 X86XMMREG ResU;
7650 pfn(&fMxcsrU, &ResU, &Src, bImm);
7651 TestData.fMxcsrIn = fMxcsrIn;
7652 TestData.fMxcsrOut = fMxcsrU;
7653 TestData.bImm = bImm;
7654 TestData.OutVal = ResU;
7655 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7656
7657 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7658 if (fXcpt)
7659 {
7660 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7661 uint32_t fMxcsr1 = fMxcsrIn;
7662 X86XMMREG Res1;
7663 pfn(&fMxcsr1, &Res1, &Src, bImm);
7664 TestData.fMxcsrIn = fMxcsrIn;
7665 TestData.fMxcsrOut = fMxcsr1;
7666 TestData.bImm = bImm;
7667 TestData.OutVal = Res1;
7668 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7669
7670 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7671 {
7672 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7673 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7674 uint32_t fMxcsr2 = fMxcsrIn;
7675 X86XMMREG Res2;
7676 pfn(&fMxcsr2, &Res2, &Src, bImm);
7677 TestData.fMxcsrIn = fMxcsrIn;
7678 TestData.fMxcsrOut = fMxcsr2;
7679 TestData.bImm = bImm;
7680 TestData.OutVal = Res2;
7681 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7682 }
7683 if (!RT_IS_POWER_OF_TWO(fXcpt))
7684 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7685 if (fUnmasked & fXcpt)
7686 {
7687 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7688 uint32_t fMxcsr3 = fMxcsrIn;
7689 X86XMMREG Res3;
7690 pfn(&fMxcsr3, &Res3, &Src, bImm);
7691 TestData.fMxcsrIn = fMxcsrIn;
7692 TestData.fMxcsrOut = fMxcsr3;
7693 TestData.bImm = bImm;
7694 TestData.OutVal = Res3;
7695 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7696 }
7697 }
7698 }
7699 }
7700 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7701 }
7702
7703 return RTEXITCODE_SUCCESS;
7704}
7705#endif
7706
7707static void SseCompareF2XmmR64Imm8Test(void)
7708{
7709 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7710 {
7711 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF2XmmR64Imm8[iFn]))
7712 continue;
7713
7714 uint32_t const cTests = *g_aSseCompareF2XmmR64Imm8[iFn].pcTests;
7715 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR64Imm8[iFn].paTests;
7716 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7717 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR64Imm8[iFn]);
7718 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7719 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7720 {
7721 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_F2_XMM_IMM8_TEST_T); iTest++)
7722 {
7723 IEMMEDIAF2XMMSRC Src;
7724 X86XMMREG ValOut;
7725
7726 Src.uSrc1 = paTests[iTest].InVal1;
7727 Src.uSrc2 = paTests[iTest].InVal2;
7728 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7729 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7730 if ( fMxcsr != paTests[iTest].fMxcsrOut
7731 || ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7732 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7733 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s imm8=%x\n"
7734 "%s -> mxcsr=%#08x %RX64'%RX64\n"
7735 "%s expected %#08x %RX64'%RX64%s%s (%s)\n",
7736 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7737 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
7738 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
7739 paTests[iTest].bImm,
7740 iVar ? " " : "", fMxcsr, ValOut.au64[0], ValOut.au64[1],
7741 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7742 paTests[iTest].OutVal.au64[0], paTests[iTest].OutVal.au64[1],
7743 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7744 ( ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7745 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7746 ? " - val" : "",
7747 FormatMxcsr(paTests[iTest].fMxcsrIn));
7748 }
7749 }
7750 }
7751}
7752
7753
7754/*
7755 * Convert SSE operations converting signed double-words to single-precision floating point values.
7756 */
7757TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_T, SSE_CONVERT_XMM_TEST_T, PFNIEMAIMPLFPSSEF2U128);
7758
7759static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R32[] =
7760{
7761 ENTRY_BIN(cvtdq2ps_u128)
7762};
7763
7764#ifdef TSTIEMAIMPL_WITH_GENERATOR
7765static RTEXITCODE SseConvertXmmI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
7766{
7767 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7768
7769 static int32_t const s_aSpecials[] =
7770 {
7771 INT32_MIN,
7772 INT32_MIN / 2,
7773 0,
7774 INT32_MAX / 2,
7775 INT32_MAX,
7776 (int32_t)0x80000000
7777 /** @todo More specials. */
7778 };
7779
7780 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7781 {
7782 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R32[iFn].pfnNative ? g_aSseConvertXmmI32R32[iFn].pfnNative : g_aSseConvertXmmI32R32[iFn].pfn;
7783
7784 IEMBINARYOUTPUT BinOut;
7785 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmI32R32[iFn].pszName), RTEXITCODE_FAILURE);
7786
7787 X86FXSTATE State;
7788 RT_ZERO(State);
7789 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7790 {
7791 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7792
7793 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7794 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7795 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7796 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7797
7798 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7799 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7800 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7801 for (uint8_t iFz = 0; iFz < 2; iFz++)
7802 {
7803 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7804 | (iRounding << X86_MXCSR_RC_SHIFT)
7805 | (iDaz ? X86_MXCSR_DAZ : 0)
7806 | (iFz ? X86_MXCSR_FZ : 0)
7807 | X86_MXCSR_XCPT_MASK;
7808 IEMSSERESULT ResM; RT_ZERO(ResM);
7809 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7810 TestData.fMxcsrIn = State.MXCSR;
7811 TestData.fMxcsrOut = ResM.MXCSR;
7812 TestData.OutVal = ResM.uResult;
7813 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7814
7815 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7816 IEMSSERESULT ResU; RT_ZERO(ResU);
7817 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7818 TestData.fMxcsrIn = State.MXCSR;
7819 TestData.fMxcsrOut = ResU.MXCSR;
7820 TestData.OutVal = ResU.uResult;
7821 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7822
7823 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7824 if (fXcpt)
7825 {
7826 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7827 IEMSSERESULT Res1; RT_ZERO(Res1);
7828 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7829 TestData.fMxcsrIn = State.MXCSR;
7830 TestData.fMxcsrOut = Res1.MXCSR;
7831 TestData.OutVal = Res1.uResult;
7832 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7833
7834 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7835 {
7836 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7837 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7838 IEMSSERESULT Res2; RT_ZERO(Res2);
7839 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7840 TestData.fMxcsrIn = State.MXCSR;
7841 TestData.fMxcsrOut = Res2.MXCSR;
7842 TestData.OutVal = Res2.uResult;
7843 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7844 }
7845 if (!RT_IS_POWER_OF_TWO(fXcpt))
7846 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7847 if (fUnmasked & fXcpt)
7848 {
7849 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7850 IEMSSERESULT Res3; RT_ZERO(Res3);
7851 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7852 TestData.fMxcsrIn = State.MXCSR;
7853 TestData.fMxcsrOut = Res3.MXCSR;
7854 TestData.OutVal = Res3.uResult;
7855 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7856 }
7857 }
7858 }
7859 }
7860 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7861 }
7862
7863 return RTEXITCODE_SUCCESS;
7864}
7865#endif
7866
7867static void SseConvertXmmI32R32Test(void)
7868{
7869 X86FXSTATE State;
7870 RT_ZERO(State);
7871
7872 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7873 {
7874 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R32[iFn]))
7875 continue;
7876
7877 uint32_t const cTests = *g_aSseConvertXmmI32R32[iFn].pcTests;
7878 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R32[iFn].paTests;
7879 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R32[iFn].pfn;
7880 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R32[iFn]);
7881 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7882 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7883 {
7884 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7885 {
7886 IEMSSERESULT Res; RT_ZERO(Res);
7887
7888 State.MXCSR = paTests[iTest].fMxcsrIn;
7889 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7890 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7891 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7892 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7893 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7894 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7895 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
7896 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
7897 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
7898 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7899 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
7900 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
7901 iVar ? " " : "", Res.MXCSR,
7902 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
7903 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
7904 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7905 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
7906 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
7907 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7908 ( !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7909 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7910 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7911 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7912 ? " - val" : "",
7913 FormatMxcsr(paTests[iTest].fMxcsrIn));
7914 }
7915 }
7916 }
7917}
7918
7919
7920/*
7921 * Convert SSE operations converting signed double-words to single-precision floating point values.
7922 */
7923static SSE_CONVERT_XMM_T g_aSseConvertXmmR32I32[] =
7924{
7925 ENTRY_BIN(cvtps2dq_u128),
7926 ENTRY_BIN(cvttps2dq_u128)
7927};
7928
7929#ifdef TSTIEMAIMPL_WITH_GENERATOR
7930static RTEXITCODE SseConvertXmmR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
7931{
7932 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7933
7934 static struct { RTFLOAT32U aVal1[4]; } const s_aSpecials[] =
7935 {
7936 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
7937 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
7938 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
7939 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
7940 /** @todo More specials. */
7941 };
7942
7943 X86FXSTATE State;
7944 RT_ZERO(State);
7945 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7946 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
7947 {
7948 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR32I32[iFn].pfnNative ? g_aSseConvertXmmR32I32[iFn].pfnNative : g_aSseConvertXmmR32I32[iFn].pfn;
7949
7950 IEMBINARYOUTPUT BinOut;
7951 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmR32I32[iFn].pszName), RTEXITCODE_FAILURE);
7952
7953 uint32_t cNormalInputPairs = 0;
7954 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7955 {
7956 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7957
7958 TestData.InVal.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
7959 TestData.InVal.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
7960 TestData.InVal.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
7961 TestData.InVal.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
7962
7963 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[0])
7964 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[1])
7965 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[2])
7966 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[3]))
7967 cNormalInputPairs++;
7968 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7969 {
7970 iTest -= 1;
7971 continue;
7972 }
7973
7974 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7975 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7976 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7977 for (uint8_t iFz = 0; iFz < 2; iFz++)
7978 {
7979 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7980 | (iRounding << X86_MXCSR_RC_SHIFT)
7981 | (iDaz ? X86_MXCSR_DAZ : 0)
7982 | (iFz ? X86_MXCSR_FZ : 0)
7983 | X86_MXCSR_XCPT_MASK;
7984 IEMSSERESULT ResM; RT_ZERO(ResM);
7985 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7986 TestData.fMxcsrIn = State.MXCSR;
7987 TestData.fMxcsrOut = ResM.MXCSR;
7988 TestData.OutVal = ResM.uResult;
7989 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7990
7991 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7992 IEMSSERESULT ResU; RT_ZERO(ResU);
7993 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7994 TestData.fMxcsrIn = State.MXCSR;
7995 TestData.fMxcsrOut = ResU.MXCSR;
7996 TestData.OutVal = ResU.uResult;
7997 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7998
7999 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8000 if (fXcpt)
8001 {
8002 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8003 IEMSSERESULT Res1; RT_ZERO(Res1);
8004 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8005 TestData.fMxcsrIn = State.MXCSR;
8006 TestData.fMxcsrOut = Res1.MXCSR;
8007 TestData.OutVal = Res1.uResult;
8008 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8009
8010 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8011 {
8012 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8013 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8014 IEMSSERESULT Res2; RT_ZERO(Res2);
8015 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8016 TestData.fMxcsrIn = State.MXCSR;
8017 TestData.fMxcsrOut = Res2.MXCSR;
8018 TestData.OutVal = Res2.uResult;
8019 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8020 }
8021 if (!RT_IS_POWER_OF_TWO(fXcpt))
8022 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8023 if (fUnmasked & fXcpt)
8024 {
8025 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8026 IEMSSERESULT Res3; RT_ZERO(Res3);
8027 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8028 TestData.fMxcsrIn = State.MXCSR;
8029 TestData.fMxcsrOut = Res3.MXCSR;
8030 TestData.OutVal = Res3.uResult;
8031 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8032 }
8033 }
8034 }
8035 }
8036 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8037 }
8038
8039 return RTEXITCODE_SUCCESS;
8040}
8041#endif
8042
8043static void SseConvertXmmR32I32Test(void)
8044{
8045 X86FXSTATE State;
8046 RT_ZERO(State);
8047
8048 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
8049 {
8050 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32I32[iFn]))
8051 continue;
8052
8053 uint32_t const cTests = *g_aSseConvertXmmR32I32[iFn].pcTests;
8054 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR32I32[iFn].paTests;
8055 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR32I32[iFn].pfn;
8056 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32I32[iFn]);
8057 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8058 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8059 {
8060 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8061 {
8062 IEMSSERESULT Res; RT_ZERO(Res);
8063
8064 State.MXCSR = paTests[iTest].fMxcsrIn;
8065 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8066 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8067 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8068 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8069 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8070 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8071 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s \n"
8072 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8073 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8074 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8075 FormatR32(&paTests[iTest].InVal.ar32[0]), FormatR32(&paTests[iTest].InVal.ar32[1]),
8076 FormatR32(&paTests[iTest].InVal.ar32[2]), FormatR32(&paTests[iTest].InVal.ar32[3]),
8077 iVar ? " " : "", Res.MXCSR,
8078 Res.uResult.ai32[0], Res.uResult.ai32[1],
8079 Res.uResult.ai32[2], Res.uResult.ai32[3],
8080 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8081 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8082 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8083 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8084 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8085 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8086 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8087 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8088 ? " - val" : "",
8089 FormatMxcsr(paTests[iTest].fMxcsrIn));
8090 }
8091 }
8092 }
8093}
8094
8095
8096/*
8097 * Convert SSE operations converting signed double-words to double-precision floating point values.
8098 */
8099static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R64[] =
8100{
8101 ENTRY_BIN(cvtdq2pd_u128)
8102};
8103
8104#ifdef TSTIEMAIMPL_WITH_GENERATOR
8105static RTEXITCODE SseConvertXmmI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
8106{
8107 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8108
8109 static int32_t const s_aSpecials[] =
8110 {
8111 INT32_MIN,
8112 INT32_MIN / 2,
8113 0,
8114 INT32_MAX / 2,
8115 INT32_MAX,
8116 (int32_t)0x80000000
8117 /** @todo More specials. */
8118 };
8119
8120 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8121 {
8122 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R64[iFn].pfnNative ? g_aSseConvertXmmI32R64[iFn].pfnNative : g_aSseConvertXmmI32R64[iFn].pfn;
8123
8124 IEMBINARYOUTPUT BinOut;
8125 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmI32R64[iFn].pszName), RTEXITCODE_FAILURE);
8126
8127 X86FXSTATE State;
8128 RT_ZERO(State);
8129 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8130 {
8131 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8132
8133 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8134 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8135 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8136 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8137
8138 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8139 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8140 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8141 for (uint8_t iFz = 0; iFz < 2; iFz++)
8142 {
8143 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
8144 | (iRounding << X86_MXCSR_RC_SHIFT)
8145 | (iDaz ? X86_MXCSR_DAZ : 0)
8146 | (iFz ? X86_MXCSR_FZ : 0)
8147 | X86_MXCSR_XCPT_MASK;
8148 IEMSSERESULT ResM; RT_ZERO(ResM);
8149 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
8150 TestData.fMxcsrIn = State.MXCSR;
8151 TestData.fMxcsrOut = ResM.MXCSR;
8152 TestData.OutVal = ResM.uResult;
8153 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8154
8155 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
8156 IEMSSERESULT ResU; RT_ZERO(ResU);
8157 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
8158 TestData.fMxcsrIn = State.MXCSR;
8159 TestData.fMxcsrOut = ResU.MXCSR;
8160 TestData.OutVal = ResU.uResult;
8161 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8162
8163 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8164 if (fXcpt)
8165 {
8166 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8167 IEMSSERESULT Res1; RT_ZERO(Res1);
8168 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8169 TestData.fMxcsrIn = State.MXCSR;
8170 TestData.fMxcsrOut = Res1.MXCSR;
8171 TestData.OutVal = Res1.uResult;
8172 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8173
8174 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8175 {
8176 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8177 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8178 IEMSSERESULT Res2; RT_ZERO(Res2);
8179 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8180 TestData.fMxcsrIn = State.MXCSR;
8181 TestData.fMxcsrOut = Res2.MXCSR;
8182 TestData.OutVal = Res2.uResult;
8183 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8184 }
8185 if (!RT_IS_POWER_OF_TWO(fXcpt))
8186 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8187 if (fUnmasked & fXcpt)
8188 {
8189 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8190 IEMSSERESULT Res3; RT_ZERO(Res3);
8191 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8192 TestData.fMxcsrIn = State.MXCSR;
8193 TestData.fMxcsrOut = Res3.MXCSR;
8194 TestData.OutVal = Res3.uResult;
8195 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8196 }
8197 }
8198 }
8199 }
8200 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8201 }
8202
8203 return RTEXITCODE_SUCCESS;
8204}
8205#endif
8206
8207static void SseConvertXmmI32R64Test(void)
8208{
8209 X86FXSTATE State;
8210 RT_ZERO(State);
8211
8212 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8213 {
8214 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R64[iFn]))
8215 continue;
8216
8217 uint32_t const cTests = *g_aSseConvertXmmI32R64[iFn].pcTests;
8218 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R64[iFn].paTests;
8219 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R64[iFn].pfn;
8220 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R64[iFn]);
8221 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8222 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8223 {
8224 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8225 {
8226 IEMSSERESULT Res; RT_ZERO(Res);
8227
8228 State.MXCSR = paTests[iTest].fMxcsrIn;
8229 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8230 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8231 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
8232 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8233 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
8234 "%s -> mxcsr=%#08x %s'%s\n"
8235 "%s expected %#08x %s'%s%s%s (%s)\n",
8236 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8237 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8238 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
8239 iVar ? " " : "", Res.MXCSR,
8240 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
8241 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8242 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8243 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8244 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
8245 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8246 ? " - val" : "",
8247 FormatMxcsr(paTests[iTest].fMxcsrIn));
8248 }
8249 }
8250 }
8251}
8252
8253
8254/*
8255 * Convert SSE operations converting signed double-words to double-precision floating point values.
8256 */
8257static SSE_CONVERT_XMM_T g_aSseConvertXmmR64I32[] =
8258{
8259 ENTRY_BIN(cvtpd2dq_u128),
8260 ENTRY_BIN(cvttpd2dq_u128)
8261};
8262
8263#ifdef TSTIEMAIMPL_WITH_GENERATOR
8264static RTEXITCODE SseConvertXmmR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
8265{
8266 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8267
8268 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8269 {
8270 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8271 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8272 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8273 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8274 /** @todo More specials. */
8275 };
8276
8277 X86FXSTATE State;
8278 RT_ZERO(State);
8279 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8280 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8281 {
8282 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR64I32[iFn].pfnNative ? g_aSseConvertXmmR64I32[iFn].pfnNative : g_aSseConvertXmmR64I32[iFn].pfn;
8283
8284 IEMBINARYOUTPUT BinOut;
8285 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmR64I32[iFn].pszName), RTEXITCODE_FAILURE);
8286
8287 uint32_t cNormalInputPairs = 0;
8288 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8289 {
8290 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8291
8292 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8293 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8294
8295 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8296 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8297 cNormalInputPairs++;
8298 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8299 {
8300 iTest -= 1;
8301 continue;
8302 }
8303
8304 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8305 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8306 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8307 for (uint8_t iFz = 0; iFz < 2; iFz++)
8308 {
8309 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
8310 | (iRounding << X86_MXCSR_RC_SHIFT)
8311 | (iDaz ? X86_MXCSR_DAZ : 0)
8312 | (iFz ? X86_MXCSR_FZ : 0)
8313 | X86_MXCSR_XCPT_MASK;
8314 IEMSSERESULT ResM; RT_ZERO(ResM);
8315 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
8316 TestData.fMxcsrIn = State.MXCSR;
8317 TestData.fMxcsrOut = ResM.MXCSR;
8318 TestData.OutVal = ResM.uResult;
8319 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8320
8321 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
8322 IEMSSERESULT ResU; RT_ZERO(ResU);
8323 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
8324 TestData.fMxcsrIn = State.MXCSR;
8325 TestData.fMxcsrOut = ResU.MXCSR;
8326 TestData.OutVal = ResU.uResult;
8327 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8328
8329 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8330 if (fXcpt)
8331 {
8332 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8333 IEMSSERESULT Res1; RT_ZERO(Res1);
8334 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8335 TestData.fMxcsrIn = State.MXCSR;
8336 TestData.fMxcsrOut = Res1.MXCSR;
8337 TestData.OutVal = Res1.uResult;
8338 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8339
8340 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8341 {
8342 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8343 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8344 IEMSSERESULT Res2; RT_ZERO(Res2);
8345 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8346 TestData.fMxcsrIn = State.MXCSR;
8347 TestData.fMxcsrOut = Res2.MXCSR;
8348 TestData.OutVal = Res2.uResult;
8349 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8350 }
8351 if (!RT_IS_POWER_OF_TWO(fXcpt))
8352 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8353 if (fUnmasked & fXcpt)
8354 {
8355 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8356 IEMSSERESULT Res3; RT_ZERO(Res3);
8357 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8358 TestData.fMxcsrIn = State.MXCSR;
8359 TestData.fMxcsrOut = Res3.MXCSR;
8360 TestData.OutVal = Res3.uResult;
8361 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8362 }
8363 }
8364 }
8365 }
8366 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8367 }
8368
8369 return RTEXITCODE_SUCCESS;
8370}
8371#endif
8372
8373static void SseConvertXmmR64I32Test(void)
8374{
8375 X86FXSTATE State;
8376 RT_ZERO(State);
8377
8378 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8379 {
8380 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64I32[iFn]))
8381 continue;
8382
8383 uint32_t const cTests = *g_aSseConvertXmmR64I32[iFn].pcTests;
8384 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR64I32[iFn].paTests;
8385 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR64I32[iFn].pfn;
8386 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64I32[iFn]);
8387 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8388 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8389 {
8390 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8391 {
8392 IEMSSERESULT Res; RT_ZERO(Res);
8393
8394 State.MXCSR = paTests[iTest].fMxcsrIn;
8395 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8396 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8397 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8398 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8399 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8400 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8401 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8402 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8403 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8404 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8405 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8406 iVar ? " " : "", Res.MXCSR,
8407 Res.uResult.ai32[0], Res.uResult.ai32[1],
8408 Res.uResult.ai32[2], Res.uResult.ai32[3],
8409 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8410 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8411 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8412 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8413 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8414 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8415 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8416 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8417 ? " - val" : "",
8418 FormatMxcsr(paTests[iTest].fMxcsrIn));
8419 }
8420 }
8421 }
8422}
8423
8424
8425/*
8426 * Convert SSE operations converting double-precision floating point values to signed double-word values.
8427 */
8428TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_XMM_T, SSE_CONVERT_MM_XMM_TEST_T, PFNIEMAIMPLMXCSRU64U128);
8429
8430static SSE_CONVERT_MM_XMM_T g_aSseConvertMmXmm[] =
8431{
8432 ENTRY_BIN(cvtpd2pi_u128),
8433 ENTRY_BIN(cvttpd2pi_u128)
8434};
8435
8436#ifdef TSTIEMAIMPL_WITH_GENERATOR
8437static RTEXITCODE SseConvertMmXmmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8438{
8439 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8440
8441 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8442 {
8443 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8444 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8445 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8446 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8447 /** @todo More specials. */
8448 };
8449
8450 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8451 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8452 {
8453 PFNIEMAIMPLMXCSRU64U128 const pfn = g_aSseConvertMmXmm[iFn].pfnNative ? g_aSseConvertMmXmm[iFn].pfnNative : g_aSseConvertMmXmm[iFn].pfn;
8454
8455 IEMBINARYOUTPUT BinOut;
8456 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertMmXmm[iFn].pszName), RTEXITCODE_FAILURE);
8457
8458 uint32_t cNormalInputPairs = 0;
8459 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8460 {
8461 SSE_CONVERT_MM_XMM_TEST_T TestData; RT_ZERO(TestData);
8462
8463 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8464 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8465
8466 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8467 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8468 cNormalInputPairs++;
8469 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8470 {
8471 iTest -= 1;
8472 continue;
8473 }
8474
8475 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8476 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8477 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8478 for (uint8_t iFz = 0; iFz < 2; iFz++)
8479 {
8480 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8481 | (iRounding << X86_MXCSR_RC_SHIFT)
8482 | (iDaz ? X86_MXCSR_DAZ : 0)
8483 | (iFz ? X86_MXCSR_FZ : 0)
8484 | X86_MXCSR_XCPT_MASK;
8485 uint32_t fMxcsrM = fMxcsrIn;
8486 uint64_t u64ResM;
8487 pfn(&fMxcsrM, &u64ResM, &TestData.InVal);
8488 TestData.fMxcsrIn = fMxcsrIn;
8489 TestData.fMxcsrOut = fMxcsrM;
8490 TestData.OutVal.u = u64ResM;
8491 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8492
8493 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8494 uint32_t fMxcsrU = fMxcsrIn;
8495 uint64_t u64ResU;
8496 pfn(&fMxcsrU, &u64ResU, &TestData.InVal);
8497 TestData.fMxcsrIn = fMxcsrIn;
8498 TestData.fMxcsrOut = fMxcsrU;
8499 TestData.OutVal.u = u64ResU;
8500 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8501
8502 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8503 if (fXcpt)
8504 {
8505 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8506 uint32_t fMxcsr1 = fMxcsrIn;
8507 uint64_t u64Res1;
8508 pfn(&fMxcsr1, &u64Res1, &TestData.InVal);
8509 TestData.fMxcsrIn = fMxcsrIn;
8510 TestData.fMxcsrOut = fMxcsr1;
8511 TestData.OutVal.u = u64Res1;
8512 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8513
8514 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8515 {
8516 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8517 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8518 uint32_t fMxcsr2 = fMxcsrIn;
8519 uint64_t u64Res2;
8520 pfn(&fMxcsr2, &u64Res2, &TestData.InVal);
8521 TestData.fMxcsrIn = fMxcsrIn;
8522 TestData.fMxcsrOut = fMxcsr2;
8523 TestData.OutVal.u = u64Res2;
8524 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8525 }
8526 if (!RT_IS_POWER_OF_TWO(fXcpt))
8527 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8528 if (fUnmasked & fXcpt)
8529 {
8530 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8531 uint32_t fMxcsr3 = fMxcsrIn;
8532 uint64_t u64Res3;
8533 pfn(&fMxcsr3, &u64Res3, &TestData.InVal);
8534 TestData.fMxcsrIn = fMxcsrIn;
8535 TestData.fMxcsrOut = fMxcsr3;
8536 TestData.OutVal.u = u64Res3;
8537 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8538 }
8539 }
8540 }
8541 }
8542 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8543 }
8544
8545 return RTEXITCODE_SUCCESS;
8546}
8547#endif
8548
8549static void SseConvertMmXmmTest(void)
8550{
8551 X86FXSTATE State;
8552 RT_ZERO(State);
8553
8554 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8555 {
8556 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmXmm[iFn]))
8557 continue;
8558
8559 uint32_t const cTests = *g_aSseConvertMmXmm[iFn].pcTests;
8560 SSE_CONVERT_MM_XMM_TEST_T const * const paTests = g_aSseConvertMmXmm[iFn].paTests;
8561 PFNIEMAIMPLMXCSRU64U128 pfn = g_aSseConvertMmXmm[iFn].pfn;
8562 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmXmm[iFn]);
8563 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8564 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8565 {
8566 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8567 {
8568 RTUINT64U ValOut;
8569 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8570 pfn(&fMxcsr, &ValOut.u, &paTests[iTest].InVal);
8571 if ( fMxcsr != paTests[iTest].fMxcsrOut
8572 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8573 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8574 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s\n"
8575 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8576 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8577 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8578 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8579 iVar ? " " : "", fMxcsr, ValOut.ai32[0], ValOut.ai32[1],
8580 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8581 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8582 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8583 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8584 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8585 ? " - val" : "",
8586 FormatMxcsr(paTests[iTest].fMxcsrIn));
8587 }
8588 }
8589 }
8590}
8591
8592
8593/*
8594 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8595 */
8596TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R64_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8597
8598static SSE_CONVERT_XMM_R64_MM_T g_aSseConvertXmmR64Mm[] =
8599{
8600 ENTRY_BIN(cvtpi2pd_u128)
8601};
8602
8603#ifdef TSTIEMAIMPL_WITH_GENERATOR
8604static RTEXITCODE SseConvertXmmR64MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8605{
8606 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8607
8608 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8609 {
8610 { { INT32_MIN, INT32_MIN } },
8611 { { INT32_MAX, INT32_MAX } }
8612 /** @todo More specials. */
8613 };
8614
8615 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8616 {
8617 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR64Mm[iFn].pfnNative ? g_aSseConvertXmmR64Mm[iFn].pfnNative : g_aSseConvertXmmR64Mm[iFn].pfn;
8618
8619 IEMBINARYOUTPUT BinOut;
8620 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmR64Mm[iFn].pszName), RTEXITCODE_FAILURE);
8621
8622 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8623 {
8624 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8625
8626 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8627 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8628
8629 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8630 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8631 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8632 for (uint8_t iFz = 0; iFz < 2; iFz++)
8633 {
8634 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8635 | (iRounding << X86_MXCSR_RC_SHIFT)
8636 | (iDaz ? X86_MXCSR_DAZ : 0)
8637 | (iFz ? X86_MXCSR_FZ : 0)
8638 | X86_MXCSR_XCPT_MASK;
8639 uint32_t fMxcsrM = fMxcsrIn;
8640 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8641 TestData.fMxcsrIn = fMxcsrIn;
8642 TestData.fMxcsrOut = fMxcsrM;
8643 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8644
8645 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8646 uint32_t fMxcsrU = fMxcsrIn;
8647 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8648 TestData.fMxcsrIn = fMxcsrIn;
8649 TestData.fMxcsrOut = fMxcsrU;
8650 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8651
8652 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8653 if (fXcpt)
8654 {
8655 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8656 uint32_t fMxcsr1 = fMxcsrIn;
8657 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8658 TestData.fMxcsrIn = fMxcsrIn;
8659 TestData.fMxcsrOut = fMxcsr1;
8660 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8661
8662 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8663 {
8664 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8665 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8666 uint32_t fMxcsr2 = fMxcsrIn;
8667 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8668 TestData.fMxcsrIn = fMxcsrIn;
8669 TestData.fMxcsrOut = fMxcsr2;
8670 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8671 }
8672 if (!RT_IS_POWER_OF_TWO(fXcpt))
8673 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8674 if (fUnmasked & fXcpt)
8675 {
8676 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8677 uint32_t fMxcsr3 = fMxcsrIn;
8678 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8679 TestData.fMxcsrIn = fMxcsrIn;
8680 TestData.fMxcsrOut = fMxcsr3;
8681 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8682 }
8683 }
8684 }
8685 }
8686 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8687 }
8688
8689 return RTEXITCODE_SUCCESS;
8690}
8691#endif
8692
8693static void SseConvertXmmR64MmTest(void)
8694{
8695 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8696 {
8697 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64Mm[iFn]))
8698 continue;
8699
8700 uint32_t const cTests = *g_aSseConvertXmmR64Mm[iFn].pcTests;
8701 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR64Mm[iFn].paTests;
8702 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR64Mm[iFn].pfn;
8703 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64Mm[iFn]);
8704 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8705 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8706 {
8707 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8708 {
8709 X86XMMREG ValOut;
8710 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8711 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8712 if ( fMxcsr != paTests[iTest].fMxcsrOut
8713 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8714 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8715 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8716 "%s -> mxcsr=%#08x %s'%s\n"
8717 "%s expected %#08x %s'%s%s%s (%s)\n",
8718 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8719 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8720 iVar ? " " : "", fMxcsr,
8721 FormatR64(&ValOut.ar64[0]), FormatR64(&ValOut.ar64[1]),
8722 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8723 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8724 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8725 ( !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8726 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8727 ? " - val" : "",
8728 FormatMxcsr(paTests[iTest].fMxcsrIn));
8729 }
8730 }
8731 }
8732}
8733
8734
8735/*
8736 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8737 */
8738TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R32_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8739
8740static SSE_CONVERT_XMM_R32_MM_T g_aSseConvertXmmR32Mm[] =
8741{
8742 ENTRY_BIN(cvtpi2ps_u128)
8743};
8744
8745#ifdef TSTIEMAIMPL_WITH_GENERATOR
8746static RTEXITCODE SseConvertXmmR32MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8747{
8748 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8749
8750 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8751 {
8752 { { INT32_MIN, INT32_MIN } },
8753 { { INT32_MAX, INT32_MAX } }
8754 /** @todo More specials. */
8755 };
8756
8757 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8758 {
8759 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR32Mm[iFn].pfnNative ? g_aSseConvertXmmR32Mm[iFn].pfnNative : g_aSseConvertXmmR32Mm[iFn].pfn;
8760
8761 IEMBINARYOUTPUT BinOut;
8762 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmR32Mm[iFn].pszName), RTEXITCODE_FAILURE);
8763
8764 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8765 {
8766 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8767
8768 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8769 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8770
8771 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8772 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8773 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8774 for (uint8_t iFz = 0; iFz < 2; iFz++)
8775 {
8776 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8777 | (iRounding << X86_MXCSR_RC_SHIFT)
8778 | (iDaz ? X86_MXCSR_DAZ : 0)
8779 | (iFz ? X86_MXCSR_FZ : 0)
8780 | X86_MXCSR_XCPT_MASK;
8781 uint32_t fMxcsrM = fMxcsrIn;
8782 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8783 TestData.fMxcsrIn = fMxcsrIn;
8784 TestData.fMxcsrOut = fMxcsrM;
8785 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8786
8787 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8788 uint32_t fMxcsrU = fMxcsrIn;
8789 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8790 TestData.fMxcsrIn = fMxcsrIn;
8791 TestData.fMxcsrOut = fMxcsrU;
8792 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8793
8794 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8795 if (fXcpt)
8796 {
8797 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8798 uint32_t fMxcsr1 = fMxcsrIn;
8799 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8800 TestData.fMxcsrIn = fMxcsrIn;
8801 TestData.fMxcsrOut = fMxcsr1;
8802 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8803
8804 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8805 {
8806 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8807 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8808 uint32_t fMxcsr2 = fMxcsrIn;
8809 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8810 TestData.fMxcsrIn = fMxcsrIn;
8811 TestData.fMxcsrOut = fMxcsr2;
8812 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8813 }
8814 if (!RT_IS_POWER_OF_TWO(fXcpt))
8815 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8816 if (fUnmasked & fXcpt)
8817 {
8818 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8819 uint32_t fMxcsr3 = fMxcsrIn;
8820 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8821 TestData.fMxcsrIn = fMxcsrIn;
8822 TestData.fMxcsrOut = fMxcsr3;
8823 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8824 }
8825 }
8826 }
8827 }
8828 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8829 }
8830
8831 return RTEXITCODE_SUCCESS;
8832}
8833#endif
8834
8835static void SseConvertXmmR32MmTest(void)
8836{
8837 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8838 {
8839 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32Mm[iFn]))
8840 continue;
8841
8842 uint32_t const cTests = *g_aSseConvertXmmR32Mm[iFn].pcTests;
8843 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR32Mm[iFn].paTests;
8844 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR32Mm[iFn].pfn;
8845 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32Mm[iFn]);
8846 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8847 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8848 {
8849 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8850 {
8851 X86XMMREG ValOut;
8852 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8853 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8854 if ( fMxcsr != paTests[iTest].fMxcsrOut
8855 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8856 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8857 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8858 "%s -> mxcsr=%#08x %s'%s\n"
8859 "%s expected %#08x %s'%s%s%s (%s)\n",
8860 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8861 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8862 iVar ? " " : "", fMxcsr,
8863 FormatR32(&ValOut.ar32[0]), FormatR32(&ValOut.ar32[1]),
8864 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8865 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
8866 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8867 ( !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8868 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8869 ? " - val" : "",
8870 FormatMxcsr(paTests[iTest].fMxcsrIn));
8871 }
8872 }
8873 }
8874}
8875
8876
8877/*
8878 * Convert SSE operations converting single-precision floating point values to signed double-word values.
8879 */
8880TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_I32_XMM_R32_T, SSE_CONVERT_MM_R32_TEST_T, PFNIEMAIMPLMXCSRU64U64);
8881
8882static SSE_CONVERT_MM_I32_XMM_R32_T g_aSseConvertMmI32XmmR32[] =
8883{
8884 ENTRY_BIN(cvtps2pi_u128),
8885 ENTRY_BIN(cvttps2pi_u128)
8886};
8887
8888#ifdef TSTIEMAIMPL_WITH_GENERATOR
8889static RTEXITCODE SseConvertMmI32XmmR32Generate(const char *pszDataFileFmt, uint32_t cTests)
8890{
8891 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8892
8893 static struct { RTFLOAT32U aVal1[2]; } const s_aSpecials[] =
8894 {
8895 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
8896 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
8897 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
8898 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
8899 /** @todo More specials. */
8900 };
8901
8902 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8903 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
8904 {
8905 PFNIEMAIMPLMXCSRU64U64 const pfn = g_aSseConvertMmI32XmmR32[iFn].pfnNative ? g_aSseConvertMmI32XmmR32[iFn].pfnNative : g_aSseConvertMmI32XmmR32[iFn].pfn;
8906
8907 IEMBINARYOUTPUT BinOut;
8908 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertMmI32XmmR32[iFn].pszName), RTEXITCODE_FAILURE);
8909
8910 uint32_t cNormalInputPairs = 0;
8911 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8912 {
8913 SSE_CONVERT_MM_R32_TEST_T TestData; RT_ZERO(TestData);
8914
8915 TestData.ar32InVal[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8916 TestData.ar32InVal[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8917
8918 if ( RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[0])
8919 && RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[1]))
8920 cNormalInputPairs++;
8921 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8922 {
8923 iTest -= 1;
8924 continue;
8925 }
8926
8927 RTFLOAT64U TestVal;
8928 TestVal.au32[0] = TestData.ar32InVal[0].u;
8929 TestVal.au32[1] = TestData.ar32InVal[1].u;
8930
8931 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8932 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8933 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8934 for (uint8_t iFz = 0; iFz < 2; iFz++)
8935 {
8936 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8937 | (iRounding << X86_MXCSR_RC_SHIFT)
8938 | (iDaz ? X86_MXCSR_DAZ : 0)
8939 | (iFz ? X86_MXCSR_FZ : 0)
8940 | X86_MXCSR_XCPT_MASK;
8941 uint32_t fMxcsrM = fMxcsrIn;
8942 uint64_t u64ResM;
8943 pfn(&fMxcsrM, &u64ResM, TestVal.u);
8944 TestData.fMxcsrIn = fMxcsrIn;
8945 TestData.fMxcsrOut = fMxcsrM;
8946 TestData.OutVal.u = u64ResM;
8947 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8948
8949 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8950 uint32_t fMxcsrU = fMxcsrIn;
8951 uint64_t u64ResU;
8952 pfn(&fMxcsrU, &u64ResU, TestVal.u);
8953 TestData.fMxcsrIn = fMxcsrIn;
8954 TestData.fMxcsrOut = fMxcsrU;
8955 TestData.OutVal.u = u64ResU;
8956 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8957
8958 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8959 if (fXcpt)
8960 {
8961 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8962 uint32_t fMxcsr1 = fMxcsrIn;
8963 uint64_t u64Res1;
8964 pfn(&fMxcsr1, &u64Res1, TestVal.u);
8965 TestData.fMxcsrIn = fMxcsrIn;
8966 TestData.fMxcsrOut = fMxcsr1;
8967 TestData.OutVal.u = u64Res1;
8968 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8969
8970 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8971 {
8972 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8973 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8974 uint32_t fMxcsr2 = fMxcsrIn;
8975 uint64_t u64Res2;
8976 pfn(&fMxcsr2, &u64Res2, TestVal.u);
8977 TestData.fMxcsrIn = fMxcsrIn;
8978 TestData.fMxcsrOut = fMxcsr2;
8979 TestData.OutVal.u = u64Res2;
8980 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8981 }
8982 if (!RT_IS_POWER_OF_TWO(fXcpt))
8983 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8984 if (fUnmasked & fXcpt)
8985 {
8986 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8987 uint32_t fMxcsr3 = fMxcsrIn;
8988 uint64_t u64Res3;
8989 pfn(&fMxcsr3, &u64Res3, TestVal.u);
8990 TestData.fMxcsrIn = fMxcsrIn;
8991 TestData.fMxcsrOut = fMxcsr3;
8992 TestData.OutVal.u = u64Res3;
8993 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8994 }
8995 }
8996 }
8997 }
8998 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8999 }
9000
9001 return RTEXITCODE_SUCCESS;
9002}
9003#endif
9004
9005static void SseConvertMmI32XmmR32Test(void)
9006{
9007 X86FXSTATE State;
9008 RT_ZERO(State);
9009
9010 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
9011 {
9012 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmI32XmmR32[iFn]))
9013 continue;
9014
9015 uint32_t const cTests = *g_aSseConvertMmI32XmmR32[iFn].pcTests;
9016 SSE_CONVERT_MM_R32_TEST_T const * const paTests = g_aSseConvertMmI32XmmR32[iFn].paTests;
9017 PFNIEMAIMPLMXCSRU64U64 pfn = g_aSseConvertMmI32XmmR32[iFn].pfn;
9018 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmI32XmmR32[iFn]);
9019 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9020 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9021 {
9022 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9023 {
9024 RTUINT64U ValOut;
9025 RTUINT64U ValIn;
9026
9027 ValIn.au32[0] = paTests[iTest].ar32InVal[0].u;
9028 ValIn.au32[1] = paTests[iTest].ar32InVal[1].u;
9029
9030 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
9031 pfn(&fMxcsr, &ValOut.u, ValIn.u);
9032 if ( fMxcsr != paTests[iTest].fMxcsrOut
9033 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9034 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9035 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
9036 "%s -> mxcsr=%#08x %RI32'%RI32\n"
9037 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
9038 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9039 FormatR32(&paTests[iTest].ar32InVal[0]), FormatR32(&paTests[iTest].ar32InVal[1]),
9040 iVar ? " " : "", fMxcsr,
9041 ValOut.ai32[0], ValOut.ai32[1],
9042 iVar ? " " : "", paTests[iTest].fMxcsrOut,
9043 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
9044 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9045 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9046 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9047 ? " - val" : "",
9048 FormatMxcsr(paTests[iTest].fMxcsrIn));
9049 }
9050 }
9051 }
9052}
9053
9054
9055/*
9056 * SSE 4.2 pcmpxstrx instructions.
9057 */
9058TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRI_T, SSE_PCMPISTRI_TEST_T, PFNIEMAIMPLPCMPISTRIU128IMM8);
9059
9060static SSE_PCMPISTRI_T g_aSsePcmpistri[] =
9061{
9062 ENTRY_BIN_SSE_OPT(pcmpistri_u128),
9063};
9064
9065#ifdef TSTIEMAIMPL_WITH_GENERATOR
9066static RTEXITCODE SseComparePcmpistriGenerate(const char *pszDataFileFmt, uint32_t cTests)
9067{
9068 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9069
9070 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9071 {
9072 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9073 /** @todo More specials. */
9074 };
9075
9076 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9077 {
9078 PFNIEMAIMPLPCMPISTRIU128IMM8 const pfn = g_aSsePcmpistri[iFn].pfnNative ? g_aSsePcmpistri[iFn].pfnNative : g_aSsePcmpistri[iFn].pfn;
9079
9080 IEMBINARYOUTPUT BinOut;
9081 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSsePcmpistri[iFn].pszName), RTEXITCODE_FAILURE);
9082
9083 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9084 {
9085 SSE_PCMPISTRI_TEST_T TestData; RT_ZERO(TestData);
9086
9087 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9088 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9089
9090 IEMPCMPISTRXSRC TestVal;
9091 TestVal.uSrc1 = TestData.InVal1.uXmm;
9092 TestVal.uSrc2 = TestData.InVal2.uXmm;
9093
9094 uint32_t const fEFlagsIn = RandEFlags();
9095 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9096 {
9097 uint32_t fEFlagsOut = fEFlagsIn;
9098 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9099 TestData.fEFlagsIn = fEFlagsIn;
9100 TestData.fEFlagsOut = fEFlagsOut;
9101 TestData.bImm = (uint8_t)u16Imm;
9102 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9103 }
9104
9105 /* Repeat the test with the input value being the same. */
9106 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9107 TestVal.uSrc1 = TestData.InVal1.uXmm;
9108 TestVal.uSrc2 = TestData.InVal2.uXmm;
9109
9110 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9111 {
9112 uint32_t fEFlagsOut = fEFlagsIn;
9113 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9114 TestData.fEFlagsIn = fEFlagsIn;
9115 TestData.fEFlagsOut = fEFlagsOut;
9116 TestData.bImm = (uint8_t)u16Imm;
9117 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9118 }
9119 }
9120 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9121 }
9122
9123 return RTEXITCODE_SUCCESS;
9124}
9125#endif
9126
9127static void SseComparePcmpistriTest(void)
9128{
9129 X86FXSTATE State;
9130 RT_ZERO(State);
9131
9132 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9133 {
9134 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistri[iFn]))
9135 continue;
9136
9137 uint32_t const cTests = *g_aSsePcmpistri[iFn].pcTests;
9138 SSE_PCMPISTRI_TEST_T const * const paTests = g_aSsePcmpistri[iFn].paTests;
9139 PFNIEMAIMPLPCMPISTRIU128IMM8 pfn = g_aSsePcmpistri[iFn].pfn;
9140 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistri[iFn]);
9141 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9142 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9143 {
9144 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9145 {
9146 IEMPCMPISTRXSRC TestVal;
9147 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9148 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9149
9150 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9151 uint32_t u32EcxOut = 0;
9152 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9153 if ( fEFlags != paTests[iTest].fEFlagsOut
9154 || u32EcxOut != paTests[iTest].u32EcxOut)
9155 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9156 "%s -> efl=%#08x %RU32\n"
9157 "%s expected %#08x %RU32%s%s\n",
9158 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9159 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9160 iVar ? " " : "", fEFlags, u32EcxOut,
9161 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9162 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9163 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9164 }
9165 }
9166 }
9167}
9168
9169
9170TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRM_T, SSE_PCMPISTRM_TEST_T, PFNIEMAIMPLPCMPISTRMU128IMM8);
9171
9172static SSE_PCMPISTRM_T g_aSsePcmpistrm[] =
9173{
9174 ENTRY_BIN_SSE_OPT(pcmpistrm_u128),
9175};
9176
9177#ifdef TSTIEMAIMPL_WITH_GENERATOR
9178static RTEXITCODE SseComparePcmpistrmGenerate(const char *pszDataFileFmt, uint32_t cTests)
9179{
9180 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9181
9182 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9183 {
9184 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9185 /** @todo More specials. */
9186 };
9187
9188 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9189 {
9190 PFNIEMAIMPLPCMPISTRMU128IMM8 const pfn = g_aSsePcmpistrm[iFn].pfnNative ? g_aSsePcmpistrm[iFn].pfnNative : g_aSsePcmpistrm[iFn].pfn;
9191
9192 IEMBINARYOUTPUT BinOut;
9193 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSsePcmpistrm[iFn].pszName), RTEXITCODE_FAILURE);
9194
9195 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9196 {
9197 SSE_PCMPISTRM_TEST_T TestData; RT_ZERO(TestData);
9198
9199 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9200 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9201
9202 IEMPCMPISTRXSRC TestVal;
9203 TestVal.uSrc1 = TestData.InVal1.uXmm;
9204 TestVal.uSrc2 = TestData.InVal2.uXmm;
9205
9206 uint32_t const fEFlagsIn = RandEFlags();
9207 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9208 {
9209 uint32_t fEFlagsOut = fEFlagsIn;
9210 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9211 TestData.fEFlagsIn = fEFlagsIn;
9212 TestData.fEFlagsOut = fEFlagsOut;
9213 TestData.bImm = (uint8_t)u16Imm;
9214 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9215 }
9216
9217 /* Repeat the test with the input value being the same. */
9218 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9219 TestVal.uSrc1 = TestData.InVal1.uXmm;
9220 TestVal.uSrc2 = TestData.InVal2.uXmm;
9221
9222 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9223 {
9224 uint32_t fEFlagsOut = fEFlagsIn;
9225 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9226 TestData.fEFlagsIn = fEFlagsIn;
9227 TestData.fEFlagsOut = fEFlagsOut;
9228 TestData.bImm = (uint8_t)u16Imm;
9229 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9230 }
9231 }
9232 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9233 }
9234
9235 return RTEXITCODE_SUCCESS;
9236}
9237#endif
9238
9239static void SseComparePcmpistrmTest(void)
9240{
9241 X86FXSTATE State;
9242 RT_ZERO(State);
9243
9244 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9245 {
9246 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistrm[iFn]))
9247 continue;
9248
9249 uint32_t const cTests = *g_aSsePcmpistrm[iFn].pcTests;
9250 SSE_PCMPISTRM_TEST_T const * const paTests = g_aSsePcmpistrm[iFn].paTests;
9251 PFNIEMAIMPLPCMPISTRMU128IMM8 pfn = g_aSsePcmpistrm[iFn].pfn;
9252 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistrm[iFn]);
9253 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9254 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9255 {
9256 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9257 {
9258 IEMPCMPISTRXSRC TestVal;
9259 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9260 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9261
9262 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9263 RTUINT128U OutVal;
9264 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9265 if ( fEFlags != paTests[iTest].fEFlagsOut
9266 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9267 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9268 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9269 "%s -> efl=%#08x %s\n"
9270 "%s expected %#08x %s%s%s\n",
9271 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9272 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9273 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9274 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9275 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9276 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9277 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9278 }
9279 }
9280 }
9281}
9282
9283
9284TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRI_T, SSE_PCMPESTRI_TEST_T, PFNIEMAIMPLPCMPESTRIU128IMM8);
9285
9286static SSE_PCMPESTRI_T g_aSsePcmpestri[] =
9287{
9288 ENTRY_BIN_SSE_OPT(pcmpestri_u128),
9289};
9290
9291#ifdef TSTIEMAIMPL_WITH_GENERATOR
9292static RTEXITCODE SseComparePcmpestriGenerate(const char *pszDataFileFmt, uint32_t cTests)
9293{
9294 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9295
9296 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9297 {
9298 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9299 /** @todo More specials. */
9300 };
9301
9302 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9303 {
9304 PFNIEMAIMPLPCMPESTRIU128IMM8 const pfn = g_aSsePcmpestri[iFn].pfnNative ? g_aSsePcmpestri[iFn].pfnNative : g_aSsePcmpestri[iFn].pfn;
9305
9306 IEMBINARYOUTPUT BinOut;
9307 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSsePcmpestri[iFn].pszName), RTEXITCODE_FAILURE);
9308
9309 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9310 {
9311 SSE_PCMPESTRI_TEST_T TestData; RT_ZERO(TestData);
9312
9313 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9314 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9315
9316 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9317 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9318 {
9319 TestData.u64Rax = (uint64_t)i64Rax;
9320 TestData.u64Rdx = (uint64_t)i64Rdx;
9321
9322 IEMPCMPESTRXSRC TestVal;
9323 TestVal.uSrc1 = TestData.InVal1.uXmm;
9324 TestVal.uSrc2 = TestData.InVal2.uXmm;
9325 TestVal.u64Rax = TestData.u64Rax;
9326 TestVal.u64Rdx = TestData.u64Rdx;
9327
9328 uint32_t const fEFlagsIn = RandEFlags();
9329 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9330 {
9331 uint32_t fEFlagsOut = fEFlagsIn;
9332 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9333 TestData.fEFlagsIn = fEFlagsIn;
9334 TestData.fEFlagsOut = fEFlagsOut;
9335 TestData.bImm = (uint8_t)u16Imm;
9336 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9337 }
9338
9339 /* Repeat the test with the input value being the same. */
9340 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9341 TestVal.uSrc1 = TestData.InVal1.uXmm;
9342 TestVal.uSrc2 = TestData.InVal2.uXmm;
9343
9344 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9345 {
9346 uint32_t fEFlagsOut = fEFlagsIn;
9347 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9348 TestData.fEFlagsIn = fEFlagsIn;
9349 TestData.fEFlagsOut = fEFlagsOut;
9350 TestData.bImm = (uint8_t)u16Imm;
9351 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9352 }
9353 }
9354 }
9355 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9356 }
9357
9358 return RTEXITCODE_SUCCESS;
9359}
9360#endif
9361
9362static void SseComparePcmpestriTest(void)
9363{
9364 X86FXSTATE State;
9365 RT_ZERO(State);
9366
9367 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9368 {
9369 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestri[iFn]))
9370 continue;
9371
9372 uint32_t const cTests = *g_aSsePcmpestri[iFn].pcTests;
9373 SSE_PCMPESTRI_TEST_T const * const paTests = g_aSsePcmpestri[iFn].paTests;
9374 PFNIEMAIMPLPCMPESTRIU128IMM8 pfn = g_aSsePcmpestri[iFn].pfn;
9375 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestri[iFn]);
9376 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9377 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9378 {
9379 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9380 {
9381 IEMPCMPESTRXSRC TestVal;
9382 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9383 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9384 TestVal.u64Rax = paTests[iTest].u64Rax;
9385 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9386
9387 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9388 uint32_t u32EcxOut = 0;
9389 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9390 if ( fEFlags != paTests[iTest].fEFlagsOut
9391 || u32EcxOut != paTests[iTest].u32EcxOut)
9392 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9393 "%s -> efl=%#08x %RU32\n"
9394 "%s expected %#08x %RU32%s%s\n",
9395 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9396 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9397 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9398 paTests[iTest].bImm,
9399 iVar ? " " : "", fEFlags, u32EcxOut,
9400 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9401 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9402 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9403 }
9404 }
9405 }
9406}
9407
9408
9409TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRM_T, SSE_PCMPESTRM_TEST_T, PFNIEMAIMPLPCMPESTRMU128IMM8);
9410
9411static SSE_PCMPESTRM_T g_aSsePcmpestrm[] =
9412{
9413 ENTRY_BIN_SSE_OPT(pcmpestrm_u128),
9414};
9415
9416#ifdef TSTIEMAIMPL_WITH_GENERATOR
9417static RTEXITCODE SseComparePcmpestrmGenerate(const char *pszDataFileFmt, uint32_t cTests)
9418{
9419 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9420
9421 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9422 {
9423 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9424 /** @todo More specials. */
9425 };
9426
9427 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9428 {
9429 PFNIEMAIMPLPCMPESTRMU128IMM8 const pfn = g_aSsePcmpestrm[iFn].pfnNative ? g_aSsePcmpestrm[iFn].pfnNative : g_aSsePcmpestrm[iFn].pfn;
9430
9431 IEMBINARYOUTPUT BinOut;
9432 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSsePcmpestrm[iFn].pszName), RTEXITCODE_FAILURE);
9433
9434 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9435 {
9436 SSE_PCMPESTRM_TEST_T TestData; RT_ZERO(TestData);
9437
9438 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9439 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9440
9441 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9442 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9443 {
9444 TestData.u64Rax = (uint64_t)i64Rax;
9445 TestData.u64Rdx = (uint64_t)i64Rdx;
9446
9447 IEMPCMPESTRXSRC TestVal;
9448 TestVal.uSrc1 = TestData.InVal1.uXmm;
9449 TestVal.uSrc2 = TestData.InVal2.uXmm;
9450 TestVal.u64Rax = TestData.u64Rax;
9451 TestVal.u64Rdx = TestData.u64Rdx;
9452
9453 uint32_t const fEFlagsIn = RandEFlags();
9454 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9455 {
9456 uint32_t fEFlagsOut = fEFlagsIn;
9457 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9458 TestData.fEFlagsIn = fEFlagsIn;
9459 TestData.fEFlagsOut = fEFlagsOut;
9460 TestData.bImm = (uint8_t)u16Imm;
9461 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9462 }
9463
9464 /* Repeat the test with the input value being the same. */
9465 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9466 TestVal.uSrc1 = TestData.InVal1.uXmm;
9467 TestVal.uSrc2 = TestData.InVal2.uXmm;
9468
9469 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9470 {
9471 uint32_t fEFlagsOut = fEFlagsIn;
9472 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9473 TestData.fEFlagsIn = fEFlagsIn;
9474 TestData.fEFlagsOut = fEFlagsOut;
9475 TestData.bImm = (uint8_t)u16Imm;
9476 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9477 }
9478 }
9479 }
9480 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9481 }
9482
9483 return RTEXITCODE_SUCCESS;
9484}
9485#endif
9486
9487static void SseComparePcmpestrmTest(void)
9488{
9489 X86FXSTATE State;
9490 RT_ZERO(State);
9491
9492 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9493 {
9494 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestrm[iFn]))
9495 continue;
9496
9497 uint32_t const cTests = *g_aSsePcmpestrm[iFn].pcTests;
9498 SSE_PCMPESTRM_TEST_T const * const paTests = g_aSsePcmpestrm[iFn].paTests;
9499 PFNIEMAIMPLPCMPESTRMU128IMM8 pfn = g_aSsePcmpestrm[iFn].pfn;
9500 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestrm[iFn]);
9501 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9502 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9503 {
9504 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9505 {
9506 IEMPCMPESTRXSRC TestVal;
9507 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9508 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9509 TestVal.u64Rax = paTests[iTest].u64Rax;
9510 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9511
9512 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9513 RTUINT128U OutVal;
9514 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9515 if ( fEFlags != paTests[iTest].fEFlagsOut
9516 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9517 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9518 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9519 "%s -> efl=%#08x %s\n"
9520 "%s expected %#08x %s%s%s\n",
9521 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9522 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9523 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9524 paTests[iTest].bImm,
9525 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9526 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9527 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9528 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9529 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9530 }
9531 }
9532 }
9533}
9534
9535
9536
9537int main(int argc, char **argv)
9538{
9539 int rc = RTR3InitExe(argc, &argv, 0);
9540 if (RT_FAILURE(rc))
9541 return RTMsgInitFailure(rc);
9542
9543 /*
9544 * Determin the host CPU.
9545 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
9546 */
9547#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
9548 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
9549 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
9550 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9551#else
9552 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9553#endif
9554
9555 /*
9556 * Parse arguments.
9557 */
9558 enum { kModeNotSet, kModeTest, kModeGenerate, kModeDump }
9559 enmMode = kModeNotSet;
9560 bool fInt = true;
9561 bool fFpuLdSt = true;
9562 bool fFpuBinary1 = true;
9563 bool fFpuBinary2 = true;
9564 bool fFpuOther = true;
9565 bool fCpuData = true;
9566 bool fCommonData = true;
9567 bool fSseFpBinary = true;
9568 bool fSseFpOther = true;
9569 bool fSsePcmpxstrx = true;
9570 uint32_t const cDefaultTests = 96;
9571 uint32_t cTests = cDefaultTests;
9572 RTGETOPTDEF const s_aOptions[] =
9573 {
9574 // mode:
9575 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
9576 { "--dump", 'G', RTGETOPT_REQ_NOTHING },
9577 { "--test", 't', RTGETOPT_REQ_NOTHING },
9578 { "--benchmark", 'b', RTGETOPT_REQ_NOTHING },
9579 // test selection (both)
9580 { "--all", 'a', RTGETOPT_REQ_NOTHING },
9581 { "--none", 'z', RTGETOPT_REQ_NOTHING },
9582 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
9583 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
9584 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
9585 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
9586 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
9587 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
9588 { "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
9589 { "--sse-fp-other", 'T', RTGETOPT_REQ_NOTHING },
9590 { "--sse-pcmpxstrx", 'C', RTGETOPT_REQ_NOTHING },
9591 { "--int", 'i', RTGETOPT_REQ_NOTHING },
9592 { "--include", 'I', RTGETOPT_REQ_STRING },
9593 { "--exclude", 'X', RTGETOPT_REQ_STRING },
9594 // generation parameters
9595 { "--common", 'm', RTGETOPT_REQ_NOTHING },
9596 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
9597 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
9598 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
9599 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
9600 };
9601
9602 RTGETOPTSTATE State;
9603 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
9604 AssertRCReturn(rc, RTEXITCODE_FAILURE);
9605
9606 RTGETOPTUNION ValueUnion;
9607 while ((rc = RTGetOpt(&State, &ValueUnion)))
9608 {
9609 switch (rc)
9610 {
9611 case 'g':
9612 enmMode = kModeGenerate;
9613 g_cPicoSecBenchmark = 0;
9614 break;
9615 case 'G':
9616 enmMode = kModeDump;
9617 g_cPicoSecBenchmark = 0;
9618 break;
9619 case 't':
9620 enmMode = kModeTest;
9621 g_cPicoSecBenchmark = 0;
9622 break;
9623 case 'b':
9624 enmMode = kModeTest;
9625 g_cPicoSecBenchmark += RT_NS_1SEC / 2 * UINT64_C(1000); /* half a second in pico seconds */
9626 break;
9627
9628 case 'a':
9629 fCpuData = true;
9630 fCommonData = true;
9631 fInt = true;
9632 fFpuLdSt = true;
9633 fFpuBinary1 = true;
9634 fFpuBinary2 = true;
9635 fFpuOther = true;
9636 fSseFpBinary = true;
9637 fSseFpOther = true;
9638 fSsePcmpxstrx = true;
9639 break;
9640 case 'z':
9641 fCpuData = false;
9642 fCommonData = false;
9643 fInt = false;
9644 fFpuLdSt = false;
9645 fFpuBinary1 = false;
9646 fFpuBinary2 = false;
9647 fFpuOther = false;
9648 fSseFpBinary = false;
9649 fSseFpOther = false;
9650 fSsePcmpxstrx = false;
9651 break;
9652
9653 case 'F':
9654 fFpuLdSt = true;
9655 break;
9656 case 'O':
9657 fFpuOther = true;
9658 break;
9659 case 'B':
9660 fFpuBinary1 = true;
9661 break;
9662 case 'P':
9663 fFpuBinary2 = true;
9664 break;
9665 case 'S':
9666 fSseFpBinary = true;
9667 break;
9668 case 'T':
9669 fSseFpOther = true;
9670 break;
9671 case 'C':
9672 fSsePcmpxstrx = true;
9673 break;
9674 case 'i':
9675 fInt = true;
9676 break;
9677
9678 case 'I':
9679 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
9680 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
9681 RT_ELEMENTS(g_apszIncludeTestPatterns));
9682 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
9683 break;
9684 case 'X':
9685 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
9686 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
9687 RT_ELEMENTS(g_apszExcludeTestPatterns));
9688 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
9689 break;
9690
9691 case 'm':
9692 fCommonData = true;
9693 break;
9694 case 'c':
9695 fCpuData = true;
9696 break;
9697 case 'n':
9698 cTests = ValueUnion.u32;
9699 break;
9700
9701 case 'q':
9702 g_cVerbosity = 0;
9703 break;
9704 case 'v':
9705 g_cVerbosity++;
9706 break;
9707
9708 case 'h':
9709 RTPrintf("usage: %s <-g|-t> [options]\n"
9710 "\n"
9711 "Mode:\n"
9712 " -g, --generate\n"
9713 " Generate test data.\n"
9714 " -t, --test\n"
9715 " Execute tests.\n"
9716 " -b, --benchmark\n"
9717 " Execute tests and do 1/2 seconds of benchmarking.\n"
9718 " Repeating the option increases the benchmark duration by 0.5 seconds.\n"
9719 "\n"
9720 "Test selection (both modes):\n"
9721 " -a, --all\n"
9722 " Enable all tests and generated test data. (default)\n"
9723 " -z, --zap, --none\n"
9724 " Disable all tests and test data types.\n"
9725 " -i, --int\n"
9726 " Enable non-FPU tests.\n"
9727 " -F, --fpu-ld-st\n"
9728 " Enable FPU load and store tests.\n"
9729 " -B, --fpu-binary-1\n"
9730 " Enable FPU binary 80-bit FP tests.\n"
9731 " -P, --fpu-binary-2\n"
9732 " Enable FPU binary 64- and 32-bit FP tests.\n"
9733 " -O, --fpu-other\n"
9734 " Enable FPU binary 64- and 32-bit FP tests.\n"
9735 " -S, --sse-fp-binary\n"
9736 " Enable SSE binary 64- and 32-bit FP tests.\n"
9737 " -T, --sse-fp-other\n"
9738 " Enable misc SSE 64- and 32-bit FP tests.\n"
9739 " -C, --sse-pcmpxstrx\n"
9740 " Enable SSE pcmpxstrx tests.\n"
9741 " -I,--include=<test-patter>\n"
9742 " Enable tests matching the given pattern.\n"
9743 " -X,--exclude=<test-patter>\n"
9744 " Skip tests matching the given pattern (overrides --include).\n"
9745 "\n"
9746 "Generation:\n"
9747 " -m, --common\n"
9748 " Enable generating common test data.\n"
9749 " -c, --only-cpu\n"
9750 " Enable generating CPU specific test data.\n"
9751 " -n, --number-of-test <count>\n"
9752 " Number of tests to generate. Default: %u\n"
9753 "\n"
9754 "Other:\n"
9755 " -v, --verbose\n"
9756 " -q, --quiet\n"
9757 " Noise level. Default: --quiet\n"
9758 , argv[0], cDefaultTests);
9759 return RTEXITCODE_SUCCESS;
9760 default:
9761 return RTGetOptPrintError(rc, &ValueUnion);
9762 }
9763 }
9764
9765 /*
9766 * Generate data?
9767 */
9768 if (enmMode == kModeGenerate)
9769 {
9770#ifdef TSTIEMAIMPL_WITH_GENERATOR
9771 char szCpuDesc[256] = {0};
9772 RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
9773 const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
9774# if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
9775 const char * const pszBitBucket = "NUL";
9776# else
9777 const char * const pszBitBucket = "/dev/null";
9778# endif
9779
9780 if (cTests == 0)
9781 cTests = cDefaultTests;
9782 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
9783 g_cZeroSrcTests = g_cZeroDstTests * 2;
9784
9785 if (fInt)
9786 {
9787 const char * const apszNameFmts[] =
9788 {
9789 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataInt-%s.bin.gz" : NULL,
9790 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataInt-%s-Intel.bin.gz" : NULL,
9791 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataInt-%s-Amd.bin.gz" : NULL,
9792 };
9793 RTEXITCODE rcExit = BinU8Generate(cTests, apszNameFmts);
9794 if (rcExit == RTEXITCODE_SUCCESS)
9795 rcExit = BinU16Generate(cTests, apszNameFmts);
9796 if (rcExit == RTEXITCODE_SUCCESS)
9797 rcExit = BinU32Generate(cTests, apszNameFmts);
9798 if (rcExit == RTEXITCODE_SUCCESS)
9799 rcExit = BinU64Generate(cTests, apszNameFmts);
9800 if (rcExit == RTEXITCODE_SUCCESS)
9801 rcExit = ShiftDblGenerate(RT_MAX(cTests, 128), apszNameFmts);
9802 if (rcExit == RTEXITCODE_SUCCESS)
9803 rcExit = UnaryGenerate(cTests, apszNameFmts);
9804 if (rcExit == RTEXITCODE_SUCCESS)
9805 rcExit = ShiftGenerate(cTests, apszNameFmts);
9806 if (rcExit == RTEXITCODE_SUCCESS)
9807 rcExit = MulDivGenerate(cTests, apszNameFmts);
9808 if (rcExit != RTEXITCODE_SUCCESS)
9809 return rcExit;
9810 }
9811
9812 if (fFpuLdSt)
9813 {
9814 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
9815 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9816 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9817 ? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
9818 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9819 if (!pStrmData || !pStrmDataCpu)
9820 return RTEXITCODE_FAILURE;
9821
9822 FpuLdConstGenerate(pStrmData, cTests);
9823 FpuLdIntGenerate(pStrmData, cTests);
9824 FpuLdD80Generate(pStrmData, cTests);
9825 FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
9826 FpuStD80Generate(pStrmData, cTests);
9827 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
9828 FpuLdMemGenerate(pStrmData, cTests2);
9829 FpuStMemGenerate(pStrmData, cTests2);
9830
9831 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9832 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9833 if (rcExit != RTEXITCODE_SUCCESS)
9834 return rcExit;
9835 }
9836
9837 if (fFpuBinary1)
9838 {
9839 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
9840 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9841 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9842 ? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
9843 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9844 if (!pStrmData || !pStrmDataCpu)
9845 return RTEXITCODE_FAILURE;
9846
9847 FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
9848 FpuBinaryFswR80Generate(pStrmData, cTests);
9849 FpuBinaryEflR80Generate(pStrmData, cTests);
9850
9851 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9852 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9853 if (rcExit != RTEXITCODE_SUCCESS)
9854 return rcExit;
9855 }
9856
9857 if (fFpuBinary2)
9858 {
9859 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
9860 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9861 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9862 ? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
9863 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9864 if (!pStrmData || !pStrmDataCpu)
9865 return RTEXITCODE_FAILURE;
9866
9867 FpuBinaryR64Generate(pStrmData, cTests);
9868 FpuBinaryR32Generate(pStrmData, cTests);
9869 FpuBinaryI32Generate(pStrmData, cTests);
9870 FpuBinaryI16Generate(pStrmData, cTests);
9871 FpuBinaryFswR64Generate(pStrmData, cTests);
9872 FpuBinaryFswR32Generate(pStrmData, cTests);
9873 FpuBinaryFswI32Generate(pStrmData, cTests);
9874 FpuBinaryFswI16Generate(pStrmData, cTests);
9875
9876 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9877 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9878 if (rcExit != RTEXITCODE_SUCCESS)
9879 return rcExit;
9880 }
9881
9882 if (fFpuOther)
9883 {
9884 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
9885 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9886 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9887 ? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
9888 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9889 if (!pStrmData || !pStrmDataCpu)
9890 return RTEXITCODE_FAILURE;
9891
9892 FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
9893 FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
9894 FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
9895
9896 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9897 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9898 if (rcExit != RTEXITCODE_SUCCESS)
9899 return rcExit;
9900 }
9901
9902 if (fSseFpBinary)
9903 {
9904 const char * const pszDataFileFmt = fCommonData ? "tstIEMAImplDataSseBinary-%s.bin.gz" : NULL;
9905
9906 RTEXITCODE rcExit = SseBinaryR32Generate(pszDataFileFmt, cTests);
9907 if (rcExit == RTEXITCODE_SUCCESS)
9908 rcExit = SseBinaryR64Generate(pszDataFileFmt, cTests);
9909 if (rcExit == RTEXITCODE_SUCCESS)
9910 rcExit = SseBinaryU128R32Generate(pszDataFileFmt, cTests);
9911 if (rcExit == RTEXITCODE_SUCCESS)
9912 rcExit = SseBinaryU128R64Generate(pszDataFileFmt, cTests);
9913
9914 if (rcExit == RTEXITCODE_SUCCESS)
9915 rcExit = SseBinaryI32R64Generate(pszDataFileFmt, cTests);
9916 if (rcExit == RTEXITCODE_SUCCESS)
9917 rcExit = SseBinaryI64R64Generate(pszDataFileFmt, cTests);
9918 if (rcExit == RTEXITCODE_SUCCESS)
9919 rcExit = SseBinaryI32R32Generate(pszDataFileFmt, cTests);
9920 if (rcExit == RTEXITCODE_SUCCESS)
9921 rcExit = SseBinaryI64R32Generate(pszDataFileFmt, cTests);
9922
9923 if (rcExit == RTEXITCODE_SUCCESS)
9924 rcExit = SseBinaryR64I32Generate(pszDataFileFmt, cTests);
9925 if (rcExit == RTEXITCODE_SUCCESS)
9926 rcExit = SseBinaryR64I64Generate(pszDataFileFmt, cTests);
9927 if (rcExit == RTEXITCODE_SUCCESS)
9928 rcExit = SseBinaryR32I32Generate(pszDataFileFmt, cTests);
9929 if (rcExit == RTEXITCODE_SUCCESS)
9930 rcExit = SseBinaryR32I64Generate(pszDataFileFmt, cTests);
9931 if (rcExit != RTEXITCODE_SUCCESS)
9932 return rcExit;
9933 }
9934
9935 if (fSseFpOther)
9936 {
9937 const char * const pszDataFileFmtCmp = fCommonData ? "tstIEMAImplDataSseCompare-%s.bin.gz" : NULL;
9938 const char * const pszDataFileFmtConv = fCommonData ? "tstIEMAImplDataSseConvert-%s.bin.gz" : NULL;
9939
9940 RTEXITCODE rcExit = SseCompareEflR32R32Generate(pszDataFileFmtCmp, cTests);
9941 if (rcExit == RTEXITCODE_SUCCESS)
9942 rcExit = SseCompareEflR64R64Generate(pszDataFileFmtCmp, cTests);
9943 if (rcExit == RTEXITCODE_SUCCESS)
9944 rcExit = SseCompareF2XmmR32Imm8Generate(pszDataFileFmtCmp, cTests);
9945 if (rcExit == RTEXITCODE_SUCCESS)
9946 rcExit = SseCompareF2XmmR64Imm8Generate(pszDataFileFmtCmp, cTests);
9947 if (rcExit == RTEXITCODE_SUCCESS)
9948 rcExit = SseConvertXmmI32R32Generate(pszDataFileFmtConv, cTests);
9949 if (rcExit == RTEXITCODE_SUCCESS)
9950 rcExit = SseConvertXmmR32I32Generate(pszDataFileFmtConv, cTests);
9951 if (rcExit == RTEXITCODE_SUCCESS)
9952 rcExit = SseConvertXmmI32R64Generate(pszDataFileFmtConv, cTests);
9953 if (rcExit == RTEXITCODE_SUCCESS)
9954 rcExit = SseConvertXmmR64I32Generate(pszDataFileFmtConv, cTests);
9955 if (rcExit == RTEXITCODE_SUCCESS)
9956 rcExit = SseConvertMmXmmGenerate(pszDataFileFmtConv, cTests);
9957 if (rcExit == RTEXITCODE_SUCCESS)
9958 rcExit = SseConvertXmmR32MmGenerate(pszDataFileFmtConv, cTests);
9959 if (rcExit == RTEXITCODE_SUCCESS)
9960 rcExit = SseConvertXmmR64MmGenerate(pszDataFileFmtConv, cTests);
9961 if (rcExit == RTEXITCODE_SUCCESS)
9962 rcExit = SseConvertMmI32XmmR32Generate(pszDataFileFmtConv, cTests);
9963 if (rcExit != RTEXITCODE_SUCCESS)
9964 return rcExit;
9965 }
9966
9967 if (fSsePcmpxstrx)
9968 {
9969 const char * const pszDataFileFmtCmp = fCommonData ? "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz" : NULL;
9970
9971 RTEXITCODE rcExit = SseComparePcmpistriGenerate(pszDataFileFmtCmp, cTests);
9972 if (rcExit == RTEXITCODE_SUCCESS)
9973 rcExit = SseComparePcmpistrmGenerate(pszDataFileFmtCmp, cTests);
9974 if (rcExit == RTEXITCODE_SUCCESS)
9975 rcExit = SseComparePcmpestriGenerate(pszDataFileFmtCmp, cTests);
9976 if (rcExit == RTEXITCODE_SUCCESS)
9977 rcExit = SseComparePcmpestrmGenerate(pszDataFileFmtCmp, cTests);
9978 if (rcExit != RTEXITCODE_SUCCESS)
9979 return rcExit;
9980 }
9981
9982 return RTEXITCODE_SUCCESS;
9983#else
9984 return RTMsgErrorExitFailure("Test data generator not compiled in!");
9985#endif
9986 }
9987
9988 /*
9989 * Dump tables.
9990 */
9991 if (enmMode == kModeDump)
9992 {
9993#ifdef TSTIEMAIMPL_WITH_GENERATOR
9994 if (fInt)
9995 {
9996 const char * const apszNameFmts[] =
9997 {
9998 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataInt-%s.bin.gz" : NULL,
9999 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataInt-%s-Intel.bin.gz" : NULL,
10000 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataInt-%s-Amd.bin.gz" : NULL,
10001 };
10002 RTEXITCODE rcExit = BinU8DumpAll(apszNameFmts);
10003 if (rcExit == RTEXITCODE_SUCCESS)
10004 rcExit = BinU16DumpAll(apszNameFmts);
10005 if (rcExit == RTEXITCODE_SUCCESS)
10006 rcExit = BinU32DumpAll(apszNameFmts);
10007 if (rcExit == RTEXITCODE_SUCCESS)
10008 rcExit = BinU64DumpAll(apszNameFmts);
10009 if (rcExit == RTEXITCODE_SUCCESS)
10010 rcExit = ShiftDblDumpAll(apszNameFmts);
10011 if (rcExit == RTEXITCODE_SUCCESS)
10012 rcExit = UnaryDumpAll(apszNameFmts);
10013 if (rcExit == RTEXITCODE_SUCCESS)
10014 rcExit = ShiftDumpAll(apszNameFmts);
10015 if (rcExit == RTEXITCODE_SUCCESS)
10016 rcExit = MulDivDumpAll(apszNameFmts);
10017 if (rcExit != RTEXITCODE_SUCCESS)
10018 return rcExit;
10019 }
10020
10021 return RTEXITCODE_SUCCESS;
10022#else
10023 return RTMsgErrorExitFailure("Test data generator not compiled in!");
10024#endif
10025 }
10026
10027
10028 /*
10029 * Do testing. Currrently disabled by default as data needs to be checked
10030 * on both intel and AMD systems first.
10031 */
10032 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
10033 AssertRCReturn(rc, RTEXITCODE_FAILURE);
10034 if (enmMode == kModeTest)
10035 {
10036 RTTestBanner(g_hTest);
10037
10038 /* Allocate guarded memory for use in the tests. */
10039#define ALLOC_GUARDED_VAR(a_puVar) do { \
10040 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
10041 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
10042 } while (0)
10043 ALLOC_GUARDED_VAR(g_pu8);
10044 ALLOC_GUARDED_VAR(g_pu16);
10045 ALLOC_GUARDED_VAR(g_pu32);
10046 ALLOC_GUARDED_VAR(g_pu64);
10047 ALLOC_GUARDED_VAR(g_pu128);
10048 ALLOC_GUARDED_VAR(g_pu8Two);
10049 ALLOC_GUARDED_VAR(g_pu16Two);
10050 ALLOC_GUARDED_VAR(g_pu32Two);
10051 ALLOC_GUARDED_VAR(g_pu64Two);
10052 ALLOC_GUARDED_VAR(g_pu128Two);
10053 ALLOC_GUARDED_VAR(g_pfEfl);
10054 if (RTTestErrorCount(g_hTest) == 0)
10055 {
10056 if (fInt)
10057 {
10058 BinU8Test();
10059 BinU16Test();
10060 BinU32Test();
10061 BinU64Test();
10062 XchgTest();
10063 XaddTest();
10064 CmpXchgTest();
10065 CmpXchg8bTest();
10066 CmpXchg16bTest();
10067 ShiftDblTest();
10068 UnaryTest();
10069 ShiftTest();
10070 MulDivTest();
10071 BswapTest();
10072 }
10073
10074 if (fFpuLdSt)
10075 {
10076 FpuLoadConstTest();
10077 FpuLdMemTest();
10078 FpuLdIntTest();
10079 FpuLdD80Test();
10080 FpuStMemTest();
10081 FpuStIntTest();
10082 FpuStD80Test();
10083 }
10084
10085 if (fFpuBinary1)
10086 {
10087 FpuBinaryR80Test();
10088 FpuBinaryFswR80Test();
10089 FpuBinaryEflR80Test();
10090 }
10091
10092 if (fFpuBinary2)
10093 {
10094 FpuBinaryR64Test();
10095 FpuBinaryR32Test();
10096 FpuBinaryI32Test();
10097 FpuBinaryI16Test();
10098 FpuBinaryFswR64Test();
10099 FpuBinaryFswR32Test();
10100 FpuBinaryFswI32Test();
10101 FpuBinaryFswI16Test();
10102 }
10103
10104 if (fFpuOther)
10105 {
10106 FpuUnaryR80Test();
10107 FpuUnaryFswR80Test();
10108 FpuUnaryTwoR80Test();
10109 }
10110
10111 if (fSseFpBinary)
10112 {
10113 SseBinaryR32Test();
10114 SseBinaryR64Test();
10115 SseBinaryU128R32Test();
10116 SseBinaryU128R64Test();
10117
10118 SseBinaryI32R64Test();
10119 SseBinaryI64R64Test();
10120 SseBinaryI32R32Test();
10121 SseBinaryI64R32Test();
10122
10123 SseBinaryR64I32Test();
10124 SseBinaryR64I64Test();
10125 SseBinaryR32I32Test();
10126 SseBinaryR32I64Test();
10127 }
10128
10129 if (fSseFpOther)
10130 {
10131 SseCompareEflR32R32Test();
10132 SseCompareEflR64R64Test();
10133 SseCompareEflR64R64Test();
10134 SseCompareF2XmmR32Imm8Test();
10135 SseCompareF2XmmR64Imm8Test();
10136 SseConvertXmmI32R32Test();
10137 SseConvertXmmR32I32Test();
10138 SseConvertXmmI32R64Test();
10139 SseConvertXmmR64I32Test();
10140 SseConvertMmXmmTest();
10141 SseConvertXmmR32MmTest();
10142 SseConvertXmmR64MmTest();
10143 SseConvertMmI32XmmR32Test();
10144 }
10145
10146 if (fSsePcmpxstrx)
10147 {
10148 SseComparePcmpistriTest();
10149 SseComparePcmpistrmTest();
10150 SseComparePcmpestriTest();
10151 SseComparePcmpestrmTest();
10152 }
10153 }
10154 return RTTestSummaryAndDestroy(g_hTest);
10155 }
10156 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
10157}
10158
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette