VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 103064

Last change on this file since 103064 was 103064, checked in by vboxsync, 13 months ago

tstIEMAImpl: Working on converting the C++ data to compressed binary. FP. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 479.1 KB
Line 
1/* $Id: tstIEMAImpl.cpp 103064 2024-01-25 14:41:25Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include "../include/IEMInternal.h"
33
34#include <iprt/errcore.h>
35#include <VBox/log.h>
36#include <iprt/assert.h>
37#include <iprt/ctype.h>
38#include <iprt/err.h>
39#include <iprt/getopt.h>
40#include <iprt/initterm.h>
41#include <iprt/file.h>
42#include <iprt/mem.h>
43#include <iprt/message.h>
44#include <iprt/mp.h>
45#include <iprt/rand.h>
46#include <iprt/stream.h>
47#include <iprt/string.h>
48#include <iprt/test.h>
49#include <iprt/time.h>
50#include <iprt/thread.h>
51#include <iprt/vfs.h>
52#include <iprt/zip.h>
53#include <VBox/version.h>
54
55#include "tstIEMAImpl.h"
56
57
58/*********************************************************************************************************************************
59* Defined Constants And Macros *
60*********************************************************************************************************************************/
61#define ENTRY_BIN_FIX(a_Name) ENTRY_BIN_FIX_EX(a_Name, 0)
62#ifdef TSTIEMAIMPL_WITH_GENERATOR
63# define ENTRY_BIN_FIX_EX(a_Name, a_uExtra) \
64 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
65 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
66 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, \
67 RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
68#else
69# define ENTRY_BIN_FIX_EX(a_Name, a_uExtra) ENTRY_BIN_EX(a_Name, a_uExtra)
70#endif
71
72#define ENTRY_BIN_PFN_CAST(a_Name, a_pfnType) ENTRY_BIN_PFN_CAST_EX(a_Name, a_pfnType, 0)
73#define ENTRY_BIN_PFN_CAST_EX(a_Name, a_pfnType, a_uExtra) \
74 { RT_XSTR(a_Name), (a_pfnType)iemAImpl_ ## a_Name, NULL, \
75 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
76 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
77
78#define ENTRY_BIN(a_Name) ENTRY_BIN_EX(a_Name, 0)
79#define ENTRY_BIN_EX(a_Name, a_uExtra) \
80 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
81 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
82 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
83
84#define ENTRY_BIN_AVX(a_Name) ENTRY_BIN_AVX_EX(a_Name, 0)
85#ifndef IEM_WITHOUT_ASSEMBLY
86# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
87 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
88 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
89 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
90#else
91# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
92 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
93 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
94 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
95#endif
96
97#define ENTRY_BIN_SSE_OPT(a_Name) ENTRY_BIN_SSE_OPT_EX(a_Name, 0)
98#ifndef IEM_WITHOUT_ASSEMBLY
99# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
100 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
101 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
102 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
103#else
104# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
105 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
106 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
107 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
108#endif
109
110#define ENTRY_BIN_INTEL(a_Name, a_fEflUndef) ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, 0)
111#define ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
112 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
113 g_abTests_ ## a_Name ## _intel, &g_cbTests_ ## a_Name ## _intel, \
114 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
115
116#define ENTRY_BIN_AMD(a_Name, a_fEflUndef) ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, 0)
117#define ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
118 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
119 g_abTests_ ## a_Name ## _amd, &g_cbTests_ ## a_Name ## _amd, \
120 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
121
122#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
123 typedef struct a_TypeName \
124 { \
125 const char *pszName; \
126 const a_FunctionPtrType pfn; \
127 const a_FunctionPtrType pfnNative; \
128 void const * const pvCompressedTests; \
129 uint32_t const *pcbCompressedTests; \
130 uint32_t const uExtra; \
131 uint8_t const idxCpuEflFlavour; \
132 uint16_t const cFixedTests; \
133 a_TestType const * const paFixedTests; \
134 a_TestType const *paTests; /**< The decompressed info. */ \
135 uint32_t cTests; /**< The decompressed info. */ \
136 } a_TypeName
137
138#define COUNT_VARIATIONS(a_SubTest) \
139 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
140
141
142/*********************************************************************************************************************************
143* Structures and Typedefs *
144*********************************************************************************************************************************/
145typedef struct IEMBINARYOUTPUT
146{
147 /** The output file. */
148 RTVFSFILE hVfsFile;
149 /** The stream we write uncompressed binary test data to. */
150 RTVFSIOSTREAM hVfsUncompressed;
151 /** Write status. */
152 int rcWrite;
153 /** Set if NULL. */
154 bool fNull;
155 /** Filename. */
156 char szFilename[79];
157} IEMBINARYOUTPUT;
158typedef IEMBINARYOUTPUT *PIEMBINARYOUTPUT;
159
160
161/*********************************************************************************************************************************
162* Global Variables *
163*********************************************************************************************************************************/
164static RTTEST g_hTest;
165static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
166#ifdef TSTIEMAIMPL_WITH_GENERATOR
167static uint32_t g_cZeroDstTests = 2;
168static uint32_t g_cZeroSrcTests = 4;
169#endif
170static uint8_t *g_pu8, *g_pu8Two;
171static uint16_t *g_pu16, *g_pu16Two;
172static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
173static uint64_t *g_pu64, *g_pu64Two;
174static RTUINT128U *g_pu128, *g_pu128Two;
175
176static char g_aszBuf[32][256];
177static unsigned g_idxBuf = 0;
178
179static uint32_t g_cIncludeTestPatterns;
180static uint32_t g_cExcludeTestPatterns;
181static const char *g_apszIncludeTestPatterns[64];
182static const char *g_apszExcludeTestPatterns[64];
183
184/** Higher value, means longer benchmarking. */
185static uint64_t g_cPicoSecBenchmark = 0;
186
187static unsigned g_cVerbosity = 0;
188
189
190/*********************************************************************************************************************************
191* Internal Functions *
192*********************************************************************************************************************************/
193static const char *FormatR80(PCRTFLOAT80U pr80);
194static const char *FormatR64(PCRTFLOAT64U pr64);
195static const char *FormatR32(PCRTFLOAT32U pr32);
196
197
198/*
199 * Random helpers.
200 */
201
202static uint32_t RandEFlags(void)
203{
204 uint32_t fEfl = RTRandU32();
205 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
206}
207
208#ifdef TSTIEMAIMPL_WITH_GENERATOR
209
210static uint8_t RandU8(void)
211{
212 return RTRandU32Ex(0, 0xff);
213}
214
215
216static uint16_t RandU16(void)
217{
218 return RTRandU32Ex(0, 0xffff);
219}
220
221
222static uint32_t RandU32(void)
223{
224 return RTRandU32();
225}
226
227#endif
228
229static uint64_t RandU64(void)
230{
231 return RTRandU64();
232}
233
234
235static RTUINT128U RandU128(void)
236{
237 RTUINT128U Ret;
238 Ret.s.Hi = RTRandU64();
239 Ret.s.Lo = RTRandU64();
240 return Ret;
241}
242
243#ifdef TSTIEMAIMPL_WITH_GENERATOR
244
245static uint8_t RandU8Dst(uint32_t iTest)
246{
247 if (iTest < g_cZeroDstTests)
248 return 0;
249 return RandU8();
250}
251
252
253static uint8_t RandU8Src(uint32_t iTest)
254{
255 if (iTest < g_cZeroSrcTests)
256 return 0;
257 return RandU8();
258}
259
260
261static uint16_t RandU16Dst(uint32_t iTest)
262{
263 if (iTest < g_cZeroDstTests)
264 return 0;
265 return RandU16();
266}
267
268
269static uint16_t RandU16Src(uint32_t iTest)
270{
271 if (iTest < g_cZeroSrcTests)
272 return 0;
273 return RandU16();
274}
275
276
277static uint32_t RandU32Dst(uint32_t iTest)
278{
279 if (iTest < g_cZeroDstTests)
280 return 0;
281 return RandU32();
282}
283
284
285static uint32_t RandU32Src(uint32_t iTest)
286{
287 if (iTest < g_cZeroSrcTests)
288 return 0;
289 return RandU32();
290}
291
292
293static uint64_t RandU64Dst(uint32_t iTest)
294{
295 if (iTest < g_cZeroDstTests)
296 return 0;
297 return RandU64();
298}
299
300
301static uint64_t RandU64Src(uint32_t iTest)
302{
303 if (iTest < g_cZeroSrcTests)
304 return 0;
305 return RandU64();
306}
307
308
309/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
310static int16_t RandI16Src2(uint32_t iTest)
311{
312 if (iTest < 18 * 4)
313 switch (iTest % 4)
314 {
315 case 0: return 0;
316 case 1: return INT16_MAX;
317 case 2: return INT16_MIN;
318 case 3: break;
319 }
320 return (int16_t)RandU16();
321}
322
323
324/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
325static int32_t RandI32Src2(uint32_t iTest)
326{
327 if (iTest < 18 * 4)
328 switch (iTest % 4)
329 {
330 case 0: return 0;
331 case 1: return INT32_MAX;
332 case 2: return INT32_MIN;
333 case 3: break;
334 }
335 return (int32_t)RandU32();
336}
337
338
339static int64_t RandI64Src(uint32_t iTest)
340{
341 RT_NOREF(iTest);
342 return (int64_t)RandU64();
343}
344
345
346static uint16_t RandFcw(void)
347{
348 return RandU16() & ~X86_FCW_ZERO_MASK;
349}
350
351
352static uint16_t RandFsw(void)
353{
354 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
355 return RandU16();
356}
357
358
359static uint32_t RandMxcsr(void)
360{
361 return RandU32() & ~X86_MXCSR_ZERO_MASK;
362}
363
364
365static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
366{
367 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
368 pr80->sj64.uFraction >>= cShift;
369 else
370 pr80->sj64.uFraction = (cShift % 19) + 1;
371}
372
373
374
375static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
376{
377 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
378
379 RTFLOAT80U r80;
380 r80.au64[0] = RandU64();
381 r80.au16[4] = RandU16();
382
383 /*
384 * Adjust the random stuff according to bType.
385 */
386 bType &= 0x1f;
387 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
388 {
389 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
390 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
391 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
392 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
393 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
394 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
395 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
396 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
397 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
398 }
399 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
400 {
401 /* Denormals (4,5) and Pseudo denormals (6,7) */
402 if (bType & 1)
403 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
404 else if (r80.sj64.uFraction == 0 && bType < 6)
405 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
406 r80.sj64.uExponent = 0;
407 r80.sj64.fInteger = bType >= 6;
408 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
409 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
410 }
411 else if (bType == 8 || bType == 9)
412 {
413 /* Pseudo NaN. */
414 if (bType & 1)
415 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
416 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
417 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
418 r80.sj64.uExponent = 0x7fff;
419 if (r80.sj64.fInteger)
420 r80.sj64.uFraction |= RT_BIT_64(62);
421 else
422 r80.sj64.uFraction &= ~RT_BIT_64(62);
423 r80.sj64.fInteger = 0;
424 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
425 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
426 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
427 }
428 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
429 {
430 /* Quiet and signalling NaNs. */
431 if (bType & 1)
432 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
433 else if (r80.sj64.uFraction == 0)
434 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
435 r80.sj64.uExponent = 0x7fff;
436 if (bType < 12)
437 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
438 else
439 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
440 r80.sj64.fInteger = 1;
441 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
442 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
443 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
444 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
445 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
446 }
447 else if (bType == 14 || bType == 15)
448 {
449 /* Unnormals */
450 if (bType & 1)
451 SafeR80FractionShift(&r80, RandU8() % 62);
452 r80.sj64.fInteger = 0;
453 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
454 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
455 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
456 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
457 }
458 else if (bType < 26)
459 {
460 /* Make sure we have lots of normalized values. */
461 if (!fIntTarget)
462 {
463 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
464 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
465 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
466 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
467 r80.sj64.fInteger = 1;
468 if (r80.sj64.uExponent <= uMinExp)
469 r80.sj64.uExponent = uMinExp + 1;
470 else if (r80.sj64.uExponent >= uMaxExp)
471 r80.sj64.uExponent = uMaxExp - 1;
472
473 if (bType == 16)
474 { /* All 1s is useful to testing rounding. Also try trigger special
475 behaviour by sometimes rounding out of range, while we're at it. */
476 r80.sj64.uFraction = RT_BIT_64(63) - 1;
477 uint8_t bExp = RandU8();
478 if ((bExp & 3) == 0)
479 r80.sj64.uExponent = uMaxExp - 1;
480 else if ((bExp & 3) == 1)
481 r80.sj64.uExponent = uMinExp + 1;
482 else if ((bExp & 3) == 2)
483 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
484 }
485 }
486 else
487 {
488 /* integer target: */
489 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
490 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
491 r80.sj64.fInteger = 1;
492 if (r80.sj64.uExponent < uMinExp)
493 r80.sj64.uExponent = uMinExp;
494 else if (r80.sj64.uExponent > uMaxExp)
495 r80.sj64.uExponent = uMaxExp;
496
497 if (bType == 16)
498 { /* All 1s is useful to testing rounding. Also try trigger special
499 behaviour by sometimes rounding out of range, while we're at it. */
500 r80.sj64.uFraction = RT_BIT_64(63) - 1;
501 uint8_t bExp = RandU8();
502 if ((bExp & 3) == 0)
503 r80.sj64.uExponent = uMaxExp;
504 else if ((bExp & 3) == 1)
505 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
506 }
507 }
508
509 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
510 }
511 return r80;
512}
513
514
515static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
516{
517 /*
518 * Make it more likely that we get a good selection of special values.
519 */
520 return RandR80Ex(RandU8(), cTarget, fIntTarget);
521
522}
523
524
525static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
526{
527 /* Make sure we cover all the basic types first before going for random selection: */
528 if (iTest <= 18)
529 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
530 return RandR80(cTarget, fIntTarget);
531}
532
533
534/**
535 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
536 * to a 0..17, covering all basic value types.
537 */
538static uint8_t RandR80Src12RemapType(uint8_t bType)
539{
540 switch (bType)
541 {
542 case 0: return 18; /* normal */
543 case 1: return 16; /* normal extreme rounding */
544 case 2: return 14; /* unnormal */
545 case 3: return 12; /* Signalling NaN */
546 case 4: return 10; /* Quiet NaN */
547 case 5: return 8; /* PseudoNaN */
548 case 6: return 6; /* Pseudo Denormal */
549 case 7: return 4; /* Denormal */
550 case 8: return 3; /* Indefinite */
551 case 9: return 2; /* Infinity */
552 case 10: return 1; /* Pseudo-Infinity */
553 case 11: return 0; /* Zero */
554 default: AssertFailedReturn(18);
555 }
556}
557
558
559/**
560 * This works in tandem with RandR80Src2 to make sure we cover all operand
561 * type mixes first before we venture into regular random testing.
562 *
563 * There are 11 basic variations, when we leave out the five odd ones using
564 * SafeR80FractionShift. Because of the special normalized value targetting at
565 * rounding, we make it an even 12. So 144 combinations for two operands.
566 */
567static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
568{
569 if (cPartnerBits == 80)
570 {
571 Assert(!fPartnerInt);
572 if (iTest < 12 * 12)
573 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
574 }
575 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
576 {
577 if (iTest < 12 * 10)
578 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
579 }
580 else if (iTest < 18 * 4 && fPartnerInt)
581 return RandR80Ex(iTest / 4);
582 return RandR80();
583}
584
585
586/** Partner to RandR80Src1. */
587static RTFLOAT80U RandR80Src2(uint32_t iTest)
588{
589 if (iTest < 12 * 12)
590 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
591 return RandR80();
592}
593
594
595static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
596{
597 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
598 pr64->s64.uFraction >>= cShift;
599 else
600 pr64->s64.uFraction = (cShift % 19) + 1;
601}
602
603
604static RTFLOAT64U RandR64Ex(uint8_t bType)
605{
606 RTFLOAT64U r64;
607 r64.u = RandU64();
608
609 /*
610 * Make it more likely that we get a good selection of special values.
611 * On average 6 out of 16 calls should return a special value.
612 */
613 bType &= 0xf;
614 if (bType == 0 || bType == 1)
615 {
616 /* 0 or Infinity. We only keep fSign here. */
617 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
618 r64.s.uFractionHigh = 0;
619 r64.s.uFractionLow = 0;
620 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
621 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
622 }
623 else if (bType == 2 || bType == 3)
624 {
625 /* Subnormals */
626 if (bType == 3)
627 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
628 else if (r64.s64.uFraction == 0)
629 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
630 r64.s64.uExponent = 0;
631 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
632 }
633 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
634 {
635 /* NaNs */
636 if (bType & 1)
637 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
638 else if (r64.s64.uFraction == 0)
639 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
640 r64.s64.uExponent = 0x7ff;
641 if (bType < 6)
642 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
643 else
644 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
645 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
646 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
647 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
648 }
649 else if (bType < 12)
650 {
651 /* Make sure we have lots of normalized values. */
652 if (r64.s.uExponent == 0)
653 r64.s.uExponent = 1;
654 else if (r64.s.uExponent == 0x7ff)
655 r64.s.uExponent = 0x7fe;
656 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
657 }
658 return r64;
659}
660
661
662static RTFLOAT64U RandR64Src(uint32_t iTest)
663{
664 if (iTest < 16)
665 return RandR64Ex(iTest);
666 return RandR64Ex(RandU8());
667}
668
669
670/** Pairing with a 80-bit floating point arg. */
671static RTFLOAT64U RandR64Src2(uint32_t iTest)
672{
673 if (iTest < 12 * 10)
674 return RandR64Ex(9 - iTest % 10); /* start with normal values */
675 return RandR64Ex(RandU8());
676}
677
678
679static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
680{
681 if (pr32->s.uFraction >= RT_BIT_32(cShift))
682 pr32->s.uFraction >>= cShift;
683 else
684 pr32->s.uFraction = (cShift % 19) + 1;
685}
686
687
688static RTFLOAT32U RandR32Ex(uint8_t bType)
689{
690 RTFLOAT32U r32;
691 r32.u = RandU32();
692
693 /*
694 * Make it more likely that we get a good selection of special values.
695 * On average 6 out of 16 calls should return a special value.
696 */
697 bType &= 0xf;
698 if (bType == 0 || bType == 1)
699 {
700 /* 0 or Infinity. We only keep fSign here. */
701 r32.s.uExponent = bType == 0 ? 0 : 0xff;
702 r32.s.uFraction = 0;
703 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
704 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
705 }
706 else if (bType == 2 || bType == 3)
707 {
708 /* Subnormals */
709 if (bType == 3)
710 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
711 else if (r32.s.uFraction == 0)
712 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
713 r32.s.uExponent = 0;
714 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
715 }
716 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
717 {
718 /* NaNs */
719 if (bType & 1)
720 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
721 else if (r32.s.uFraction == 0)
722 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
723 r32.s.uExponent = 0xff;
724 if (bType < 6)
725 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
726 else
727 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
728 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
729 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
730 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
731 }
732 else if (bType < 12)
733 {
734 /* Make sure we have lots of normalized values. */
735 if (r32.s.uExponent == 0)
736 r32.s.uExponent = 1;
737 else if (r32.s.uExponent == 0xff)
738 r32.s.uExponent = 0xfe;
739 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
740 }
741 return r32;
742}
743
744
745static RTFLOAT32U RandR32Src(uint32_t iTest)
746{
747 if (iTest < 16)
748 return RandR32Ex(iTest);
749 return RandR32Ex(RandU8());
750}
751
752
753/** Pairing with a 80-bit floating point arg. */
754static RTFLOAT32U RandR32Src2(uint32_t iTest)
755{
756 if (iTest < 12 * 10)
757 return RandR32Ex(9 - iTest % 10); /* start with normal values */
758 return RandR32Ex(RandU8());
759}
760
761
762static RTPBCD80U RandD80Src(uint32_t iTest)
763{
764 if (iTest < 3)
765 {
766 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
767 return d80Zero;
768 }
769 if (iTest < 5)
770 {
771 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
772 return d80Ind;
773 }
774
775 RTPBCD80U d80;
776 uint8_t b = RandU8();
777 d80.s.fSign = b & 1;
778
779 if ((iTest & 7) >= 6)
780 {
781 /* Illegal */
782 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
783 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
784 d80.s.abPairs[iPair] = RandU8();
785 }
786 else
787 {
788 /* Normal */
789 d80.s.uPad = 0;
790 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
791 {
792 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
793 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
794 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
795 }
796 }
797 return d80;
798}
799
800
801static const char *GenFormatR80(PCRTFLOAT80U plrd)
802{
803 if (RTFLOAT80U_IS_ZERO(plrd))
804 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
805 if (RTFLOAT80U_IS_INF(plrd))
806 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
807 if (RTFLOAT80U_IS_INDEFINITE(plrd))
808 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
809 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
810 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
811 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
812 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
813
814 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
815 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
816 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
817 return pszBuf;
818}
819
820static const char *GenFormatR64(PCRTFLOAT64U prd)
821{
822 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
823 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
824 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
825 return pszBuf;
826}
827
828
829static const char *GenFormatR32(PCRTFLOAT32U pr)
830{
831 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
832 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
833 return pszBuf;
834}
835
836
837static const char *GenFormatD80(PCRTPBCD80U pd80)
838{
839 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
840 size_t off;
841 if (pd80->s.uPad == 0)
842 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
843 else
844 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
845 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
846 while (iPair-- > 0)
847 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
848 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
849 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
850 pszBuf[off++] = ')';
851 pszBuf[off++] = '\0';
852 return pszBuf;
853}
854
855
856static const char *GenFormatI64(int64_t i64)
857{
858 if (i64 == INT64_MIN) /* This one is problematic */
859 return "INT64_MIN";
860 if (i64 == INT64_MAX)
861 return "INT64_MAX";
862 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
863 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
864 return pszBuf;
865}
866
867#if 0 /* unused */
868static const char *GenFormatI64(int64_t const *pi64)
869{
870 return GenFormatI64(*pi64);
871}
872#endif
873
874static const char *GenFormatI32(int32_t i32)
875{
876 if (i32 == INT32_MIN) /* This one is problematic */
877 return "INT32_MIN";
878 if (i32 == INT32_MAX)
879 return "INT32_MAX";
880 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
881 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
882 return pszBuf;
883}
884
885
886const char *GenFormatI32(int32_t const *pi32)
887{
888 return GenFormatI32(*pi32);
889}
890
891
892const char *GenFormatI16(int16_t i16)
893{
894 if (i16 == INT16_MIN) /* This one is problematic */
895 return "INT16_MIN";
896 if (i16 == INT16_MAX)
897 return "INT16_MAX";
898 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
899 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
900 return pszBuf;
901}
902
903
904const char *GenFormatI16(int16_t const *pi16)
905{
906 return GenFormatI16(*pi16);
907}
908
909
910static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
911{
912 /* We want to tag the generated source code with the revision that produced it. */
913 static char s_szRev[] = "$Revision: 103064 $";
914 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
915 size_t cchRev = 0;
916 while (RT_C_IS_DIGIT(pszRev[cchRev]))
917 cchRev++;
918
919 RTStrmPrintf(pOut,
920 "/* $Id: tstIEMAImpl.cpp 103064 2024-01-25 14:41:25Z vboxsync $ */\n"
921 "/** @file\n"
922 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
923 " */\n"
924 "\n"
925 "/*\n"
926 " * Copyright (C) 2022-" VBOX_C_YEAR " Oracle and/or its affiliates.\n"
927 " *\n"
928 " * This file is part of VirtualBox base platform packages, as\n"
929 " * available from https://www.virtualbox.org.\n"
930 " *\n"
931 " * This program is free software; you can redistribute it and/or\n"
932 " * modify it under the terms of the GNU General Public License\n"
933 " * as published by the Free Software Foundation, in version 3 of the\n"
934 " * License.\n"
935 " *\n"
936 " * This program is distributed in the hope that it will be useful, but\n"
937 " * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
938 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
939 " * General Public License for more details.\n"
940 " *\n"
941 " * You should have received a copy of the GNU General Public License\n"
942 " * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
943 " *\n"
944 " * SPDX-License-Identifier: GPL-3.0-only\n"
945 " */\n"
946 "\n"
947 "#include \"tstIEMAImpl.h\"\n"
948 "\n"
949 ,
950 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
951}
952
953
954static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
955{
956 PRTSTREAM pOut = NULL;
957 int rc = RTStrmOpen(pszFilename, "w", &pOut);
958 if (RT_SUCCESS(rc))
959 {
960 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
961 return pOut;
962 }
963 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
964 return NULL;
965}
966
967
968static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
969{
970 RTStrmPrintf(pOut,
971 "\n"
972 "/* end of file */\n");
973 int rc = RTStrmClose(pOut);
974 if (RT_SUCCESS(rc))
975 return rcExit;
976 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
977}
978
979
980static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
981{
982 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
983}
984
985
986static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
987{
988 RTStrmPrintf(pOut,
989 "};\n"
990 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
991 "\n",
992 pszName, pszName);
993}
994
995
996static bool GenerateBinaryOpen(PIEMBINARYOUTPUT pBinOut, const char *pszFilenameFmt, const char *pszName)
997{
998 pBinOut->hVfsFile = NIL_RTVFSFILE;
999 pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1000 if (pszFilenameFmt)
1001 {
1002 pBinOut->fNull = false;
1003 if (RTStrPrintf2(pBinOut->szFilename, sizeof(pBinOut->szFilename), pszFilenameFmt, pszName) > 0)
1004 {
1005 RTMsgInfo("GenerateBinaryOpen: %s...\n", pBinOut->szFilename);
1006 pBinOut->rcWrite = RTVfsFileOpenNormal(pBinOut->szFilename,
1007 RTFILE_O_CREATE_REPLACE | RTFILE_O_WRITE | RTFILE_O_DENY_READWRITE,
1008 &pBinOut->hVfsFile);
1009 if (RT_SUCCESS(pBinOut->rcWrite))
1010 {
1011 RTVFSIOSTREAM hVfsIoFile = RTVfsFileToIoStream(pBinOut->hVfsFile);
1012 if (hVfsIoFile != NIL_RTVFSIOSTREAM)
1013 {
1014 pBinOut->rcWrite = RTZipGzipCompressIoStream(hVfsIoFile, 0 /*fFlags*/, 9, &pBinOut->hVfsUncompressed);
1015 RTVfsIoStrmRelease(hVfsIoFile);
1016 if (RT_SUCCESS(pBinOut->rcWrite))
1017 {
1018 pBinOut->rcWrite = VINF_SUCCESS;
1019 return true;
1020 }
1021
1022 RTMsgError("RTZipGzipCompressIoStream: %Rrc", pBinOut->rcWrite);
1023 }
1024 else
1025 {
1026 RTMsgError("RTVfsFileToIoStream failed!");
1027 pBinOut->rcWrite = VERR_VFS_CHAIN_CAST_FAILED;
1028 }
1029 RTVfsFileRelease(pBinOut->hVfsFile);
1030 RTFileDelete(pBinOut->szFilename);
1031 }
1032 else
1033 RTMsgError("Failed to open '%s' for writing: %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1034 }
1035 else
1036 {
1037 RTMsgError("filename too long: %s + %s", pszFilenameFmt, pszName);
1038 pBinOut->rcWrite = VERR_BUFFER_OVERFLOW;
1039 }
1040 return false;
1041 }
1042 RTMsgInfo("GenerateBinaryOpen: %s -> /dev/null\n", pszName);
1043 pBinOut->rcWrite = VERR_IGNORED;
1044 pBinOut->fNull = true;
1045 pBinOut->szFilename[0] = '\0';
1046 return true;
1047}
1048
1049# define GENERATE_BINARY_OPEN(a_pBinOut, a_papszNameFmts, a_Entry) \
1050 GenerateBinaryOpen((a_pBinOut), a_papszNameFmts[(a_Entry).idxCpuEflFlavour], (a_Entry).pszName)
1051
1052
1053static void GenerateBinaryWrite(PIEMBINARYOUTPUT pBinOut, const void *pvData, size_t cbData)
1054{
1055 if (RT_SUCCESS_NP(pBinOut->rcWrite))
1056 {
1057 pBinOut->rcWrite = RTVfsIoStrmWrite(pBinOut->hVfsUncompressed, pvData, cbData, true /*fBlocking*/, NULL);
1058 if (RT_SUCCESS(pBinOut->rcWrite))
1059 return;
1060 RTMsgError("Error writing '%s': %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1061 }
1062}
1063
1064
1065static bool GenerateBinaryClose(PIEMBINARYOUTPUT pBinOut)
1066{
1067 if (!pBinOut->fNull)
1068 {
1069 /* This is rather jovial about rcWrite. */
1070 int const rc1 = RTVfsIoStrmFlush(pBinOut->hVfsUncompressed);
1071 RTVfsIoStrmRelease(pBinOut->hVfsUncompressed);
1072 pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1073 if (RT_FAILURE(rc1))
1074 RTMsgError("Error flushing '%s' (uncompressed stream): %Rrc", pBinOut->szFilename, rc1);
1075
1076 int const rc2 = RTVfsFileFlush(pBinOut->hVfsFile);
1077 RTVfsFileRelease(pBinOut->hVfsFile);
1078 pBinOut->hVfsFile = NIL_RTVFSFILE;
1079 if (RT_FAILURE(rc2))
1080 RTMsgError("Error flushing '%s' (compressed file): %Rrc", pBinOut->szFilename, rc2);
1081
1082 return RT_SUCCESS(rc2) && RT_SUCCESS(rc1) && RT_SUCCESS(pBinOut->rcWrite);
1083 }
1084 return true;
1085}
1086
1087/* Helper for DumpAll. */
1088# define DUMP_TEST_ENTRY(a_Entry, a_papszNameFmts) \
1089 do { \
1090 AssertReturn(DECOMPRESS_TESTS(a_Entry), RTEXITCODE_FAILURE); \
1091 IEMBINARYOUTPUT BinOut; \
1092 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, a_papszNameFmts, (a_Entry)), RTEXITCODE_FAILURE); \
1093 GenerateBinaryWrite(&BinOut, (a_Entry).paTests, (a_Entry).cTests); \
1094 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1095 } while (0)
1096
1097#endif /* TSTIEMAIMPL_WITH_GENERATOR */
1098
1099
1100/*
1101 * Test helpers.
1102 */
1103static bool IsTestEnabled(const char *pszName)
1104{
1105 /* Process excludes first: */
1106 uint32_t i = g_cExcludeTestPatterns;
1107 while (i-- > 0)
1108 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1109 return false;
1110
1111 /* If no include patterns, everything is included: */
1112 i = g_cIncludeTestPatterns;
1113 if (!i)
1114 return true;
1115
1116 /* Otherwise only tests in the include patters gets tested: */
1117 while (i-- > 0)
1118 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1119 return true;
1120
1121 return false;
1122}
1123
1124
1125static bool SubTestAndCheckIfEnabled(const char *pszName)
1126{
1127 RTTestSub(g_hTest, pszName);
1128 if (IsTestEnabled(pszName))
1129 return true;
1130 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
1131 return false;
1132}
1133
1134
1135/** Decompresses test data before use as required. */
1136static int DecompressBinaryTest(void const *pvCompressed, uint32_t cbCompressed, size_t cbEntry,
1137 void **ppvTests, uint32_t *pcTests)
1138{
1139 /* Open a memory stream for the compressed binary data. */
1140 RTVFSIOSTREAM hVfsIos = NIL_RTVFSIOSTREAM;
1141 int rc = RTVfsIoStrmFromBuffer(RTFILE_O_READ, pvCompressed, cbCompressed, &hVfsIos);
1142 RTTESTI_CHECK_RC_OK_RET(rc, rc);
1143
1144 /* Open a decompressed stream for it. */
1145 RTVFSIOSTREAM hVfsIosDecomp = NIL_RTVFSIOSTREAM;
1146 rc = RTZipGzipDecompressIoStream(hVfsIos, RTZIPGZIPDECOMP_F_ALLOW_ZLIB_HDR, &hVfsIosDecomp);
1147 RTTESTI_CHECK_RC_OK(rc);
1148 if (RT_SUCCESS(rc))
1149 {
1150 /* Initial output buffer allocation. */
1151 size_t cbDecompressedAlloc = cbCompressed <= _16M ? (size_t)cbCompressed * 16 : (size_t)cbCompressed * 4;
1152 uint8_t *pbDecompressed = (uint8_t *)RTMemAllocZ(cbDecompressedAlloc);
1153 if (pbDecompressed)
1154 {
1155 size_t off = 0;
1156 for (;;)
1157 {
1158 size_t cbRead = 0;
1159 rc = RTVfsIoStrmRead(hVfsIosDecomp, &pbDecompressed[off], cbDecompressedAlloc - off, true /*fBlocking*/, &cbRead);
1160 if (RT_FAILURE(rc))
1161 break;
1162 if (rc == VINF_EOF && cbRead == 0)
1163 break;
1164 off += cbRead;
1165
1166 if (cbDecompressedAlloc < off + 256)
1167 {
1168 size_t const cbNew = cbDecompressedAlloc < _128M ? cbDecompressedAlloc * 2 : cbDecompressedAlloc + _32M;
1169 void * const pvNew = RTMemRealloc(pbDecompressed, cbNew);
1170 AssertBreakStmt(pvNew, rc = VERR_NO_MEMORY);
1171 cbDecompressedAlloc = cbNew;
1172 pbDecompressed = (uint8_t *)pvNew;
1173 }
1174 }
1175 if (RT_SUCCESS(rc))
1176 {
1177 if ((off % cbEntry) == 0)
1178 {
1179 if (cbDecompressedAlloc - off > _512K)
1180 {
1181 void * const pvNew = RTMemRealloc(pbDecompressed, off);
1182 if (pvNew)
1183 pbDecompressed = (uint8_t *)pvNew;
1184 }
1185
1186 /* Done! */
1187 *pcTests = (uint32_t)(off / cbEntry);
1188 *ppvTests = pbDecompressed;
1189 RTMEM_WILL_LEAK(pbDecompressed);
1190
1191 pbDecompressed = NULL;
1192 rc = VINF_SUCCESS;
1193 }
1194 else
1195 {
1196 RTTestIFailed("Uneven decompressed data size: %#zx vs entry size %#zx -> %#zx", off, cbEntry, off % cbEntry);
1197 rc = VERR_IO_BAD_LENGTH;
1198 }
1199 }
1200 else
1201 RTTestIFailed("Failed to decompress binary stream: %Rrc (off=%#zx, cbCompressed=%#x)", rc, off, cbCompressed);
1202 RTMemFree(pbDecompressed);
1203 }
1204 else
1205 {
1206 RTTestIFailed("Out of memory decompressing test data");
1207 rc = VERR_NO_MEMORY;
1208 }
1209 RTVfsIoStrmRelease(hVfsIosDecomp);
1210 }
1211 RTVfsIoStrmRelease(hVfsIos);
1212 return rc;
1213}
1214
1215#define DECOMPRESS_TESTS(a_Entry) \
1216 RT_SUCCESS(DecompressBinaryTest((a_Entry).pvCompressedTests, *(a_Entry).pcbCompressedTests, sizeof((a_Entry).paTests[0]), \
1217 (void **)&(a_Entry).paTests, &(a_Entry).cTests))
1218
1219
1220/** Decompresses test data before use as required. */
1221static int SubTestAndCheckIfEnabledAndDecompress(const char *pszName, void const *pvCompressed, uint32_t cbCompressed,
1222 size_t cbEntry, void **ppvTests, uint32_t *pcTests)
1223{
1224 if (SubTestAndCheckIfEnabled(pszName))
1225 {
1226 int const rc = DecompressBinaryTest(pvCompressed, cbCompressed, cbEntry, ppvTests, pcTests);
1227 if (RT_SUCCESS(rc))
1228 return true;
1229 }
1230 return false;
1231}
1232
1233#define SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_Entry) \
1234 SubTestAndCheckIfEnabledAndDecompress((a_Entry).pszName, (a_Entry).pvCompressedTests, *(a_Entry).pcbCompressedTests, \
1235 sizeof((a_Entry).paTests[0]), (void **)&(a_Entry).paTests, &(a_Entry).cTests)
1236
1237
1238static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
1239{
1240 if (fActual == fExpected)
1241 return "";
1242
1243 uint32_t const fXor = fActual ^ fExpected;
1244 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1245 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1246
1247 static struct
1248 {
1249 const char *pszName;
1250 uint32_t fFlag;
1251 } const s_aFlags[] =
1252 {
1253#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
1254 EFL_ENTRY(CF),
1255 EFL_ENTRY(PF),
1256 EFL_ENTRY(AF),
1257 EFL_ENTRY(ZF),
1258 EFL_ENTRY(SF),
1259 EFL_ENTRY(TF),
1260 EFL_ENTRY(IF),
1261 EFL_ENTRY(DF),
1262 EFL_ENTRY(OF),
1263 EFL_ENTRY(IOPL),
1264 EFL_ENTRY(NT),
1265 EFL_ENTRY(RF),
1266 EFL_ENTRY(VM),
1267 EFL_ENTRY(AC),
1268 EFL_ENTRY(VIF),
1269 EFL_ENTRY(VIP),
1270 EFL_ENTRY(ID),
1271 };
1272 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1273 if (s_aFlags[i].fFlag & fXor)
1274 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1275 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1276 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1277 return pszBuf;
1278}
1279
1280
1281static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
1282{
1283 if (fActual == fExpected)
1284 return "";
1285
1286 uint16_t const fXor = fActual ^ fExpected;
1287 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1288 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1289
1290 static struct
1291 {
1292 const char *pszName;
1293 uint32_t fFlag;
1294 } const s_aFlags[] =
1295 {
1296#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1297 FSW_ENTRY(IE),
1298 FSW_ENTRY(DE),
1299 FSW_ENTRY(ZE),
1300 FSW_ENTRY(OE),
1301 FSW_ENTRY(UE),
1302 FSW_ENTRY(PE),
1303 FSW_ENTRY(SF),
1304 FSW_ENTRY(ES),
1305 FSW_ENTRY(C0),
1306 FSW_ENTRY(C1),
1307 FSW_ENTRY(C2),
1308 FSW_ENTRY(C3),
1309 FSW_ENTRY(B),
1310 };
1311 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1312 if (s_aFlags[i].fFlag & fXor)
1313 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1314 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1315 if (fXor & X86_FSW_TOP_MASK)
1316 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1317 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1318#if 0 /* For debugging fprem & fprem1 */
1319 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1320 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1321#endif
1322 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1323 return pszBuf;
1324}
1325
1326
1327static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1328{
1329 if (fActual == fExpected)
1330 return "";
1331
1332 uint16_t const fXor = fActual ^ fExpected;
1333 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1334 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1335
1336 static struct
1337 {
1338 const char *pszName;
1339 uint32_t fFlag;
1340 } const s_aFlags[] =
1341 {
1342#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1343 MXCSR_ENTRY(IE),
1344 MXCSR_ENTRY(DE),
1345 MXCSR_ENTRY(ZE),
1346 MXCSR_ENTRY(OE),
1347 MXCSR_ENTRY(UE),
1348 MXCSR_ENTRY(PE),
1349
1350 MXCSR_ENTRY(IM),
1351 MXCSR_ENTRY(DM),
1352 MXCSR_ENTRY(ZM),
1353 MXCSR_ENTRY(OM),
1354 MXCSR_ENTRY(UM),
1355 MXCSR_ENTRY(PM),
1356
1357 MXCSR_ENTRY(DAZ),
1358 MXCSR_ENTRY(FZ),
1359#undef MXCSR_ENTRY
1360 };
1361 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1362 if (s_aFlags[i].fFlag & fXor)
1363 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1364 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1365 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1366 return pszBuf;
1367}
1368
1369
1370static const char *FormatFcw(uint16_t fFcw)
1371{
1372 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1373
1374 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1375 switch (fFcw & X86_FCW_PC_MASK)
1376 {
1377 case X86_FCW_PC_24: pszPC = "PC24"; break;
1378 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1379 case X86_FCW_PC_53: pszPC = "PC53"; break;
1380 case X86_FCW_PC_64: pszPC = "PC64"; break;
1381 }
1382
1383 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1384 switch (fFcw & X86_FCW_RC_MASK)
1385 {
1386 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1387 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1388 case X86_FCW_RC_UP: pszRC = "UP"; break;
1389 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1390 }
1391 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1392
1393 static struct
1394 {
1395 const char *pszName;
1396 uint32_t fFlag;
1397 } const s_aFlags[] =
1398 {
1399#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1400 FCW_ENTRY(IM),
1401 FCW_ENTRY(DM),
1402 FCW_ENTRY(ZM),
1403 FCW_ENTRY(OM),
1404 FCW_ENTRY(UM),
1405 FCW_ENTRY(PM),
1406 { "6M", 64 },
1407 };
1408 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1409 if (fFcw & s_aFlags[i].fFlag)
1410 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1411
1412 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1413 return pszBuf;
1414}
1415
1416
1417static const char *FormatMxcsr(uint32_t fMxcsr)
1418{
1419 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1420
1421 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1422 switch (fMxcsr & X86_MXCSR_RC_MASK)
1423 {
1424 case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1425 case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1426 case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1427 case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1428 }
1429
1430 const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1431 const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1432 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1433
1434 static struct
1435 {
1436 const char *pszName;
1437 uint32_t fFlag;
1438 } const s_aFlags[] =
1439 {
1440#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1441 MXCSR_ENTRY(IE),
1442 MXCSR_ENTRY(DE),
1443 MXCSR_ENTRY(ZE),
1444 MXCSR_ENTRY(OE),
1445 MXCSR_ENTRY(UE),
1446 MXCSR_ENTRY(PE),
1447
1448 MXCSR_ENTRY(IM),
1449 MXCSR_ENTRY(DM),
1450 MXCSR_ENTRY(ZM),
1451 MXCSR_ENTRY(OM),
1452 MXCSR_ENTRY(UM),
1453 MXCSR_ENTRY(PM),
1454 { "6M", 64 },
1455 };
1456 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1457 if (fMxcsr & s_aFlags[i].fFlag)
1458 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1459
1460 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1461 return pszBuf;
1462}
1463
1464
1465static const char *FormatR80(PCRTFLOAT80U pr80)
1466{
1467 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1468 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1469 return pszBuf;
1470}
1471
1472
1473static const char *FormatR64(PCRTFLOAT64U pr64)
1474{
1475 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1476 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1477 return pszBuf;
1478}
1479
1480
1481static const char *FormatR32(PCRTFLOAT32U pr32)
1482{
1483 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1484 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1485 return pszBuf;
1486}
1487
1488
1489static const char *FormatD80(PCRTPBCD80U pd80)
1490{
1491 /* There is only one indefinite endcoding (same as for 80-bit
1492 floating point), so get it out of the way first: */
1493 if (RTPBCD80U_IS_INDEFINITE(pd80))
1494 return "Ind";
1495
1496 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1497 size_t off = 0;
1498 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1499 unsigned cBadDigits = 0;
1500 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1501 while (iPair-- > 0)
1502 {
1503 static const char s_szDigits[] = "0123456789abcdef";
1504 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1505 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1506 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1507 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1508 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1509 }
1510 if (cBadDigits || pd80->s.uPad != 0)
1511 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1512 pszBuf[off] = '\0';
1513 return pszBuf;
1514}
1515
1516
1517#if 0
1518static const char *FormatI64(int64_t const *piVal)
1519{
1520 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1521 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1522 return pszBuf;
1523}
1524#endif
1525
1526
1527static const char *FormatI32(int32_t const *piVal)
1528{
1529 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1530 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1531 return pszBuf;
1532}
1533
1534
1535static const char *FormatI16(int16_t const *piVal)
1536{
1537 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1538 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1539 return pszBuf;
1540}
1541
1542
1543static const char *FormatU128(PCRTUINT128U puVal)
1544{
1545 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1546 RTStrFormatU128(pszBuf, sizeof(g_aszBuf[0]), puVal, 16, 0, 0, RTSTR_F_SPECIAL);
1547 return pszBuf;
1548}
1549
1550
1551/*
1552 * Binary operations.
1553 */
1554TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1555TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1556TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1557TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1558
1559#ifdef TSTIEMAIMPL_WITH_GENERATOR
1560# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1561static RTEXITCODE BinU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
1562{ \
1563 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1564 { \
1565 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1566 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1567 IEMBINARYOUTPUT BinOut; \
1568 if ( g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1569 && g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1570 continue; \
1571 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aBinU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
1572 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1573 { \
1574 a_TestType Test; \
1575 Test.fEflIn = RandEFlags(); \
1576 Test.fEflOut = Test.fEflIn; \
1577 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1578 Test.uDstOut = Test.uDstIn; \
1579 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1580 if (g_aBinU ## a_cBits[iFn].uExtra) \
1581 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1582 Test.uMisc = 0; \
1583 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1584 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1585 } \
1586 for (uint32_t iTest = 0; iTest < g_aBinU ## a_cBits[iFn].cFixedTests; iTest++ ) \
1587 { \
1588 a_TestType Test; \
1589 Test.fEflIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
1590 : g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn; \
1591 Test.fEflOut = Test.fEflIn; \
1592 Test.uDstIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uDstIn; \
1593 Test.uDstOut = Test.uDstIn; \
1594 Test.uSrcIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uSrcIn; \
1595 Test.uMisc = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uMisc; \
1596 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1597 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1598 } \
1599 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1600 } \
1601 return RTEXITCODE_SUCCESS; \
1602} \
1603/* Temp for conversion. */ \
1604static RTEXITCODE BinU ## a_cBits ## DumpAll(const char * const * papszNameFmts) \
1605{ \
1606 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1607 DUMP_TEST_ENTRY(g_aBinU ## a_cBits[iFn], papszNameFmts); \
1608 return RTEXITCODE_SUCCESS; \
1609}
1610
1611#else
1612# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1613#endif
1614
1615
1616/** Based on a quick probe run, guess how long to run the benchmark. */
1617static uint32_t EstimateIterations(uint32_t cProbeIterations, uint64_t cNsProbe)
1618{
1619 uint64_t cPicoSecPerIteration = cNsProbe * 1000 / cProbeIterations;
1620 uint64_t cIterations = g_cPicoSecBenchmark / cPicoSecPerIteration;
1621 if (cIterations > _2G)
1622 return _2G;
1623 if (cIterations < _4K)
1624 return _4K;
1625 return RT_ALIGN_32((uint32_t)cIterations, _4K);
1626}
1627
1628
1629#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1630GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1631\
1632static uint64_t BinU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLBINU ## a_cBits pfn, a_TestType const *pEntry) \
1633{ \
1634 uint32_t const fEflIn = pEntry->fEflIn; \
1635 a_uType const uDstIn = pEntry->uDstIn; \
1636 a_uType const uSrcIn = pEntry->uSrcIn; \
1637 cIterations /= 4; \
1638 RTThreadYield(); \
1639 uint64_t const nsStart = RTTimeNanoTS(); \
1640 for (uint32_t i = 0; i < cIterations; i++) \
1641 { \
1642 uint32_t fBenchEfl = fEflIn; \
1643 a_uType uBenchDst = uDstIn; \
1644 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1645 \
1646 fBenchEfl = fEflIn; \
1647 uBenchDst = uDstIn; \
1648 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1649 \
1650 fBenchEfl = fEflIn; \
1651 uBenchDst = uDstIn; \
1652 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1653 \
1654 fBenchEfl = fEflIn; \
1655 uBenchDst = uDstIn; \
1656 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1657 } \
1658 return RTTimeNanoTS() - nsStart; \
1659} \
1660\
1661static void BinU ## a_cBits ## Test(void) \
1662{ \
1663 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1664 { \
1665 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
1666 continue; \
1667 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1668 uint32_t const cTests = a_aSubTests[iFn].cTests; \
1669 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1670 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1671 if (!cTests) { RTTestSkipped(g_hTest, "no tests"); continue; } \
1672 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1673 { \
1674 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1675 { \
1676 uint32_t fEfl = paTests[iTest].fEflIn; \
1677 a_uType uDst = paTests[iTest].uDstIn; \
1678 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1679 if ( uDst != paTests[iTest].uDstOut \
1680 || fEfl != paTests[iTest].fEflOut ) \
1681 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1682 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1683 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1684 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1685 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1686 else \
1687 { \
1688 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1689 *g_pfEfl = paTests[iTest].fEflIn; \
1690 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1691 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1692 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1693 } \
1694 } \
1695 \
1696 /* Benchmark if all succeeded. */ \
1697 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
1698 { \
1699 uint32_t const iTest = cTests / 2; \
1700 uint32_t const cIterations = EstimateIterations(_64K, BinU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
1701 uint64_t const cNsRealRun = BinU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
1702 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
1703 "%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
1704 } \
1705 \
1706 /* Next variation is native. */ \
1707 pfn = a_aSubTests[iFn].pfnNative; \
1708 } \
1709 } \
1710}
1711
1712
1713/*
1714 * 8-bit binary operations.
1715 */
1716static BINU8_T g_aBinU8[] =
1717{
1718 ENTRY_BIN(add_u8),
1719 ENTRY_BIN(add_u8_locked),
1720 ENTRY_BIN(adc_u8),
1721 ENTRY_BIN(adc_u8_locked),
1722 ENTRY_BIN(sub_u8),
1723 ENTRY_BIN(sub_u8_locked),
1724 ENTRY_BIN(sbb_u8),
1725 ENTRY_BIN(sbb_u8_locked),
1726 ENTRY_BIN(or_u8),
1727 ENTRY_BIN(or_u8_locked),
1728 ENTRY_BIN(xor_u8),
1729 ENTRY_BIN(xor_u8_locked),
1730 ENTRY_BIN(and_u8),
1731 ENTRY_BIN(and_u8_locked),
1732 ENTRY_BIN_PFN_CAST(cmp_u8, PFNIEMAIMPLBINU8),
1733 ENTRY_BIN_PFN_CAST(test_u8, PFNIEMAIMPLBINU8),
1734};
1735TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1736
1737
1738/*
1739 * 16-bit binary operations.
1740 */
1741#ifdef TSTIEMAIMPL_WITH_GENERATOR
1742static const BINU16_TEST_T g_aFixedTests_add_u16[] =
1743{
1744 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1745 { UINT32_MAX, 0, 1, 0, UINT16_MAX, 0 },
1746};
1747#endif
1748static BINU16_T g_aBinU16[] =
1749{
1750 ENTRY_BIN_FIX(add_u16),
1751 ENTRY_BIN(add_u16_locked),
1752 ENTRY_BIN(adc_u16),
1753 ENTRY_BIN(adc_u16_locked),
1754 ENTRY_BIN(sub_u16),
1755 ENTRY_BIN(sub_u16_locked),
1756 ENTRY_BIN(sbb_u16),
1757 ENTRY_BIN(sbb_u16_locked),
1758 ENTRY_BIN(or_u16),
1759 ENTRY_BIN(or_u16_locked),
1760 ENTRY_BIN(xor_u16),
1761 ENTRY_BIN(xor_u16_locked),
1762 ENTRY_BIN(and_u16),
1763 ENTRY_BIN(and_u16_locked),
1764 ENTRY_BIN_PFN_CAST(cmp_u16, PFNIEMAIMPLBINU16),
1765 ENTRY_BIN_PFN_CAST(test_u16, PFNIEMAIMPLBINU16),
1766 ENTRY_BIN_PFN_CAST_EX(bt_u16, PFNIEMAIMPLBINU16, 1),
1767 ENTRY_BIN_EX(btc_u16, 1),
1768 ENTRY_BIN_EX(btc_u16_locked, 1),
1769 ENTRY_BIN_EX(btr_u16, 1),
1770 ENTRY_BIN_EX(btr_u16_locked, 1),
1771 ENTRY_BIN_EX(bts_u16, 1),
1772 ENTRY_BIN_EX(bts_u16_locked, 1),
1773 ENTRY_BIN_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1774 ENTRY_BIN_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1775 ENTRY_BIN_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1776 ENTRY_BIN_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1777 ENTRY_BIN_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1778 ENTRY_BIN_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1779 ENTRY_BIN(arpl),
1780};
1781TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1782
1783
1784/*
1785 * 32-bit binary operations.
1786 */
1787#ifdef TSTIEMAIMPL_WITH_GENERATOR
1788static const BINU32_TEST_T g_aFixedTests_add_u32[] =
1789{
1790 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1791 { UINT32_MAX, 0, 1, 0, UINT32_MAX, 0 },
1792};
1793#endif
1794static BINU32_T g_aBinU32[] =
1795{
1796 ENTRY_BIN_FIX(add_u32),
1797 ENTRY_BIN(add_u32_locked),
1798 ENTRY_BIN(adc_u32),
1799 ENTRY_BIN(adc_u32_locked),
1800 ENTRY_BIN(sub_u32),
1801 ENTRY_BIN(sub_u32_locked),
1802 ENTRY_BIN(sbb_u32),
1803 ENTRY_BIN(sbb_u32_locked),
1804 ENTRY_BIN(or_u32),
1805 ENTRY_BIN(or_u32_locked),
1806 ENTRY_BIN(xor_u32),
1807 ENTRY_BIN(xor_u32_locked),
1808 ENTRY_BIN(and_u32),
1809 ENTRY_BIN(and_u32_locked),
1810 ENTRY_BIN_PFN_CAST(cmp_u32, PFNIEMAIMPLBINU32),
1811 ENTRY_BIN_PFN_CAST(test_u32, PFNIEMAIMPLBINU32),
1812 ENTRY_BIN_PFN_CAST_EX(bt_u32, PFNIEMAIMPLBINU32, 1),
1813 ENTRY_BIN_EX(btc_u32, 1),
1814 ENTRY_BIN_EX(btc_u32_locked, 1),
1815 ENTRY_BIN_EX(btr_u32, 1),
1816 ENTRY_BIN_EX(btr_u32_locked, 1),
1817 ENTRY_BIN_EX(bts_u32, 1),
1818 ENTRY_BIN_EX(bts_u32_locked, 1),
1819 ENTRY_BIN_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1820 ENTRY_BIN_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1821 ENTRY_BIN_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1822 ENTRY_BIN_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1823 ENTRY_BIN_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1824 ENTRY_BIN_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1825 ENTRY_BIN(adcx_u32),
1826 ENTRY_BIN(adox_u32),
1827};
1828TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1829
1830
1831/*
1832 * 64-bit binary operations.
1833 */
1834#ifdef TSTIEMAIMPL_WITH_GENERATOR
1835static const BINU64_TEST_T g_aFixedTests_add_u64[] =
1836{
1837 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1838 { UINT32_MAX, 0, 1, 0, UINT64_MAX, 0 },
1839};
1840#endif
1841static BINU64_T g_aBinU64[] =
1842{
1843 ENTRY_BIN_FIX(add_u64),
1844 ENTRY_BIN(add_u64_locked),
1845 ENTRY_BIN(adc_u64),
1846 ENTRY_BIN(adc_u64_locked),
1847 ENTRY_BIN(sub_u64),
1848 ENTRY_BIN(sub_u64_locked),
1849 ENTRY_BIN(sbb_u64),
1850 ENTRY_BIN(sbb_u64_locked),
1851 ENTRY_BIN(or_u64),
1852 ENTRY_BIN(or_u64_locked),
1853 ENTRY_BIN(xor_u64),
1854 ENTRY_BIN(xor_u64_locked),
1855 ENTRY_BIN(and_u64),
1856 ENTRY_BIN(and_u64_locked),
1857 ENTRY_BIN_PFN_CAST(cmp_u64, PFNIEMAIMPLBINU64),
1858 ENTRY_BIN_PFN_CAST(test_u64, PFNIEMAIMPLBINU64),
1859 ENTRY_BIN_PFN_CAST_EX(bt_u64, PFNIEMAIMPLBINU64, 1),
1860 ENTRY_BIN_EX(btc_u64, 1),
1861 ENTRY_BIN_EX(btc_u64_locked, 1),
1862 ENTRY_BIN_EX(btr_u64, 1),
1863 ENTRY_BIN_EX(btr_u64_locked, 1),
1864 ENTRY_BIN_EX(bts_u64, 1),
1865 ENTRY_BIN_EX(bts_u64_locked, 1),
1866 ENTRY_BIN_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1867 ENTRY_BIN_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1868 ENTRY_BIN_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1869 ENTRY_BIN_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1870 ENTRY_BIN_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1871 ENTRY_BIN_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1872 ENTRY_BIN(adcx_u64),
1873 ENTRY_BIN(adox_u64),
1874};
1875TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1876
1877
1878/*
1879 * XCHG
1880 */
1881static void XchgTest(void)
1882{
1883 if (!SubTestAndCheckIfEnabled("xchg"))
1884 return;
1885 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1886 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1887 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1888 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1889
1890 static struct
1891 {
1892 uint8_t cb; uint64_t fMask;
1893 union
1894 {
1895 uintptr_t pfn;
1896 FNIEMAIMPLXCHGU8 *pfnU8;
1897 FNIEMAIMPLXCHGU16 *pfnU16;
1898 FNIEMAIMPLXCHGU32 *pfnU32;
1899 FNIEMAIMPLXCHGU64 *pfnU64;
1900 } u;
1901 }
1902 s_aXchgWorkers[] =
1903 {
1904 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1905 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1906 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1907 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1908 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1909 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1910 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1911 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1912 };
1913 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1914 {
1915 RTUINT64U uIn1, uIn2, uMem, uDst;
1916 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1917 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1918 if (uIn1.u == uIn2.u)
1919 uDst.u = uIn2.u = ~uIn2.u;
1920
1921 switch (s_aXchgWorkers[i].cb)
1922 {
1923 case 1:
1924 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1925 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1926 break;
1927 case 2:
1928 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1929 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1930 break;
1931 case 4:
1932 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1933 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1934 break;
1935 case 8:
1936 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1937 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1938 break;
1939 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1940 }
1941
1942 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1943 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1944 }
1945}
1946
1947
1948/*
1949 * XADD
1950 */
1951static void XaddTest(void)
1952{
1953#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1954 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1955 static struct \
1956 { \
1957 const char * const pszName; \
1958 FNIEMAIMPLXADDU ## a_cBits * const pfn; \
1959 void const * const pvCompressedTests; \
1960 uint32_t const * const pcbCompressedTests; \
1961 BINU ## a_cBits ## _TEST_T const *paTests; \
1962 uint32_t cTests; \
1963 } s_aFuncs[] = \
1964 { \
1965 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1966 g_abTests_add_u ## a_cBits, &g_cbTests_add_u ## a_cBits }, \
1967 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1968 g_abTests_add_u ## a_cBits, &g_cbTests_add_u ## a_cBits }, \
1969 }; \
1970 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1971 { \
1972 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(s_aFuncs[iFn])) continue; \
1973 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1974 uint32_t const cTests = s_aFuncs[iFn].cTests; \
1975 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1976 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1977 { \
1978 uint32_t fEfl = paTests[iTest].fEflIn; \
1979 a_Type uSrc = paTests[iTest].uSrcIn; \
1980 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1981 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1982 if ( fEfl != paTests[iTest].fEflOut \
1983 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
1984 || uSrc != paTests[iTest].uDstIn) \
1985 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1986 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1987 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1988 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1989 } \
1990 } \
1991 } while(0)
1992 TEST_XADD(8, uint8_t, "%#04x");
1993 TEST_XADD(16, uint16_t, "%#06x");
1994 TEST_XADD(32, uint32_t, "%#010RX32");
1995 TEST_XADD(64, uint64_t, "%#010RX64");
1996}
1997
1998
1999/*
2000 * CMPXCHG
2001 */
2002
2003static void CmpXchgTest(void)
2004{
2005#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
2006 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
2007 static struct \
2008 { \
2009 const char * const pszName; \
2010 FNIEMAIMPLCMPXCHGU ## a_cBits * const pfn; \
2011 PFNIEMAIMPLBINU ## a_cBits const pfnSub; \
2012 void const * const pvCompressedTests; \
2013 uint32_t const * const pcbCompressedTests; \
2014 BINU ## a_cBits ## _TEST_T const *paTests; \
2015 uint32_t cTests; \
2016 } s_aFuncs[] = \
2017 { \
2018 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
2019 g_abTests_cmp_u ## a_cBits, &g_cbTests_cmp_u ## a_cBits }, \
2020 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
2021 g_abTests_cmp_u ## a_cBits, &g_cbTests_cmp_u ## a_cBits }, \
2022 }; \
2023 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
2024 { \
2025 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(s_aFuncs[iFn])) continue; \
2026 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
2027 uint32_t const cTests = s_aFuncs[iFn].cTests; \
2028 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2029 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2030 { \
2031 /* as is (99% likely to be negative). */ \
2032 uint32_t fEfl = paTests[iTest].fEflIn; \
2033 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
2034 a_Type uA = paTests[iTest].uDstIn; \
2035 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
2036 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
2037 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2038 if ( fEfl != paTests[iTest].fEflOut \
2039 || *g_pu ## a_cBits != uExpect \
2040 || uA != paTests[iTest].uSrcIn) \
2041 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2042 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
2043 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
2044 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2045 /* positive */ \
2046 uint32_t fEflExpect = paTests[iTest].fEflIn; \
2047 uA = paTests[iTest].uDstIn; \
2048 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
2049 fEfl = paTests[iTest].fEflIn; \
2050 uA = paTests[iTest].uDstIn; \
2051 *g_pu ## a_cBits = uA; \
2052 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2053 if ( fEfl != fEflExpect \
2054 || *g_pu ## a_cBits != uNew \
2055 || uA != paTests[iTest].uDstIn) \
2056 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2057 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
2058 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
2059 EFlagsDiff(fEfl, fEflExpect)); \
2060 } \
2061 } \
2062 } while(0)
2063 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
2064 TEST_CMPXCHG(16, uint16_t, "%#06x");
2065 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
2066#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
2067 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
2068#endif
2069}
2070
2071static void CmpXchg8bTest(void)
2072{
2073 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
2074 static struct
2075 {
2076 const char *pszName;
2077 FNIEMAIMPLCMPXCHG8B *pfn;
2078 } const s_aFuncs[] =
2079 {
2080 { "cmpxchg8b", iemAImpl_cmpxchg8b },
2081 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
2082 };
2083 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2084 {
2085 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2086 continue;
2087 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2088 {
2089 uint64_t const uOldValue = RandU64();
2090 uint64_t const uNewValue = RandU64();
2091
2092 /* positive test. */
2093 RTUINT64U uA, uB;
2094 uB.u = uNewValue;
2095 uA.u = uOldValue;
2096 *g_pu64 = uOldValue;
2097 uint32_t fEflIn = RandEFlags();
2098 uint32_t fEfl = fEflIn;
2099 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2100 if ( fEfl != (fEflIn | X86_EFL_ZF)
2101 || *g_pu64 != uNewValue
2102 || uA.u != uOldValue)
2103 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2104 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
2105 fEfl, *g_pu64, uA.u,
2106 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
2107 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2108
2109 /* negative */
2110 uint64_t const uExpect = ~uOldValue;
2111 *g_pu64 = uExpect;
2112 uA.u = uOldValue;
2113 uB.u = uNewValue;
2114 fEfl = fEflIn = RandEFlags();
2115 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2116 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2117 || *g_pu64 != uExpect
2118 || uA.u != uExpect)
2119 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2120 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
2121 fEfl, *g_pu64, uA.u,
2122 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2123 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2124 }
2125 }
2126}
2127
2128static void CmpXchg16bTest(void)
2129{
2130 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
2131 static struct
2132 {
2133 const char *pszName;
2134 FNIEMAIMPLCMPXCHG16B *pfn;
2135 } const s_aFuncs[] =
2136 {
2137 { "cmpxchg16b", iemAImpl_cmpxchg16b },
2138 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
2139#if !defined(RT_ARCH_ARM64)
2140 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
2141#endif
2142 };
2143 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2144 {
2145 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2146 continue;
2147#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
2148 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
2149 {
2150 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
2151 continue;
2152 }
2153#endif
2154 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2155 {
2156 RTUINT128U const uOldValue = RandU128();
2157 RTUINT128U const uNewValue = RandU128();
2158
2159 /* positive test. */
2160 RTUINT128U uA, uB;
2161 uB = uNewValue;
2162 uA = uOldValue;
2163 *g_pu128 = uOldValue;
2164 uint32_t fEflIn = RandEFlags();
2165 uint32_t fEfl = fEflIn;
2166 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2167 if ( fEfl != (fEflIn | X86_EFL_ZF)
2168 || g_pu128->s.Lo != uNewValue.s.Lo
2169 || g_pu128->s.Hi != uNewValue.s.Hi
2170 || uA.s.Lo != uOldValue.s.Lo
2171 || uA.s.Hi != uOldValue.s.Hi)
2172 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2173 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2174 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2175 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2176 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2177 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
2178 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
2179 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2180
2181 /* negative */
2182 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
2183 *g_pu128 = uExpect;
2184 uA = uOldValue;
2185 uB = uNewValue;
2186 fEfl = fEflIn = RandEFlags();
2187 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2188 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2189 || g_pu128->s.Lo != uExpect.s.Lo
2190 || g_pu128->s.Hi != uExpect.s.Hi
2191 || uA.s.Lo != uExpect.s.Lo
2192 || uA.s.Hi != uExpect.s.Hi)
2193 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2194 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2195 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2196 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2197 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2198 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
2199 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2200 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2201 }
2202 }
2203}
2204
2205
2206/*
2207 * Double shifts.
2208 *
2209 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
2210 */
2211#ifdef TSTIEMAIMPL_WITH_GENERATOR
2212# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2213static RTEXITCODE ShiftDblU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2214{ \
2215 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2216 { \
2217 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2218 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2219 continue; \
2220 IEMBINARYOUTPUT BinOut; \
2221 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2222 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2223 { \
2224 a_TestType Test; \
2225 Test.fEflIn = RandEFlags(); \
2226 Test.fEflOut = Test.fEflIn; \
2227 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2228 Test.uDstOut = Test.uDstIn; \
2229 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2230 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2231 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
2232 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2233 } \
2234 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2235 } \
2236 return RTEXITCODE_SUCCESS; \
2237} \
2238static RTEXITCODE ShiftDblU ## a_cBits ## DumpAll(const char * const * papszNameFmts) \
2239{ \
2240 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2241 DUMP_TEST_ENTRY(a_aSubTests[iFn], papszNameFmts); \
2242 return RTEXITCODE_SUCCESS; \
2243}
2244
2245#else
2246# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2247#endif
2248
2249#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2250TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
2251\
2252static a_SubTestType a_aSubTests[] = \
2253{ \
2254 ENTRY_BIN_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2255 ENTRY_BIN_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2256 ENTRY_BIN_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2257 ENTRY_BIN_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2258}; \
2259\
2260GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2261\
2262static void ShiftDblU ## a_cBits ## Test(void) \
2263{ \
2264 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2265 { \
2266 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2267 continue; \
2268 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2269 uint32_t const cTests = a_aSubTests[iFn].cTests; \
2270 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2271 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2272 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2273 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2274 { \
2275 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2276 { \
2277 uint32_t fEfl = paTests[iTest].fEflIn; \
2278 a_Type uDst = paTests[iTest].uDstIn; \
2279 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
2280 if ( uDst != paTests[iTest].uDstOut \
2281 || fEfl != paTests[iTest].fEflOut) \
2282 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
2283 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
2284 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
2285 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2286 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
2287 else \
2288 { \
2289 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2290 *g_pfEfl = paTests[iTest].fEflIn; \
2291 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
2292 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2293 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2294 } \
2295 } \
2296 pfn = a_aSubTests[iFn].pfnNative; \
2297 } \
2298 } \
2299}
2300TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
2301TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
2302TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
2303
2304#ifdef TSTIEMAIMPL_WITH_GENERATOR
2305static RTEXITCODE ShiftDblGenerate(uint32_t cTests, const char * const * papszNameFmts)
2306{
2307 RTEXITCODE rcExit = ShiftDblU16Generate(cTests, papszNameFmts);
2308 if (rcExit == RTEXITCODE_SUCCESS)
2309 rcExit = ShiftDblU32Generate(cTests, papszNameFmts);
2310 if (rcExit == RTEXITCODE_SUCCESS)
2311 rcExit = ShiftDblU64Generate(cTests, papszNameFmts);
2312 return rcExit;
2313}
2314
2315static RTEXITCODE ShiftDblDumpAll(const char * const * papszNameFmts)
2316{
2317 RTEXITCODE rcExit = ShiftDblU16DumpAll(papszNameFmts);
2318 if (rcExit == RTEXITCODE_SUCCESS)
2319 rcExit = ShiftDblU32DumpAll(papszNameFmts);
2320 if (rcExit == RTEXITCODE_SUCCESS)
2321 rcExit = ShiftDblU64DumpAll(papszNameFmts);
2322 return rcExit;
2323}
2324#endif
2325
2326static void ShiftDblTest(void)
2327{
2328 ShiftDblU16Test();
2329 ShiftDblU32Test();
2330 ShiftDblU64Test();
2331}
2332
2333
2334/*
2335 * Unary operators.
2336 *
2337 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
2338 */
2339#ifdef TSTIEMAIMPL_WITH_GENERATOR
2340# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2341static RTEXITCODE UnaryU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2342{ \
2343 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2344 { \
2345 IEMBINARYOUTPUT BinOut; \
2346 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aUnaryU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
2347 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2348 { \
2349 a_TestType Test; \
2350 Test.fEflIn = RandEFlags(); \
2351 Test.fEflOut = Test.fEflIn; \
2352 Test.uDstIn = RandU ## a_cBits(); \
2353 Test.uDstOut = Test.uDstIn; \
2354 Test.uSrcIn = 0; \
2355 Test.uMisc = 0; \
2356 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
2357 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2358 } \
2359 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2360 } \
2361 return RTEXITCODE_SUCCESS; \
2362} \
2363static RTEXITCODE UnaryU ## a_cBits ## DumpAll(const char * const * papszNameFmts) \
2364{ \
2365 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2366 DUMP_TEST_ENTRY(g_aUnaryU ## a_cBits[iFn], papszNameFmts); \
2367 return RTEXITCODE_SUCCESS; \
2368}
2369#else
2370# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
2371#endif
2372
2373#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2374TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
2375static a_SubTestType g_aUnaryU ## a_cBits [] = \
2376{ \
2377 ENTRY_BIN(inc_u ## a_cBits), \
2378 ENTRY_BIN(inc_u ## a_cBits ## _locked), \
2379 ENTRY_BIN(dec_u ## a_cBits), \
2380 ENTRY_BIN(dec_u ## a_cBits ## _locked), \
2381 ENTRY_BIN(not_u ## a_cBits), \
2382 ENTRY_BIN(not_u ## a_cBits ## _locked), \
2383 ENTRY_BIN(neg_u ## a_cBits), \
2384 ENTRY_BIN(neg_u ## a_cBits ## _locked), \
2385}; \
2386\
2387GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2388\
2389static void UnaryU ## a_cBits ## Test(void) \
2390{ \
2391 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2392 { \
2393 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aUnaryU ## a_cBits[iFn])) \
2394 continue; \
2395 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
2396 uint32_t const cTests = g_aUnaryU ## a_cBits[iFn].cTests; \
2397 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2398 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2399 { \
2400 uint32_t fEfl = paTests[iTest].fEflIn; \
2401 a_Type uDst = paTests[iTest].uDstIn; \
2402 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
2403 if ( uDst != paTests[iTest].uDstOut \
2404 || fEfl != paTests[iTest].fEflOut) \
2405 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2406 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
2407 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2408 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2409 else \
2410 { \
2411 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2412 *g_pfEfl = paTests[iTest].fEflIn; \
2413 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
2414 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2415 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2416 } \
2417 } \
2418 } \
2419}
2420TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
2421TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
2422TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
2423TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
2424
2425#ifdef TSTIEMAIMPL_WITH_GENERATOR
2426static RTEXITCODE UnaryGenerate(uint32_t cTests, const char * const * papszNameFmts)
2427{
2428 RTEXITCODE rcExit = UnaryU8Generate(cTests, papszNameFmts);
2429 if (rcExit == RTEXITCODE_SUCCESS)
2430 rcExit = UnaryU16Generate(cTests, papszNameFmts);
2431 if (rcExit == RTEXITCODE_SUCCESS)
2432 rcExit = UnaryU32Generate(cTests, papszNameFmts);
2433 if (rcExit == RTEXITCODE_SUCCESS)
2434 rcExit = UnaryU64Generate(cTests, papszNameFmts);
2435 return rcExit;
2436}
2437
2438static RTEXITCODE UnaryDumpAll(const char * const * papszNameFmts)
2439{
2440 RTEXITCODE rcExit = UnaryU8DumpAll(papszNameFmts);
2441 if (rcExit == RTEXITCODE_SUCCESS)
2442 rcExit = UnaryU16DumpAll(papszNameFmts);
2443 if (rcExit == RTEXITCODE_SUCCESS)
2444 rcExit = UnaryU32DumpAll(papszNameFmts);
2445 if (rcExit == RTEXITCODE_SUCCESS)
2446 rcExit = UnaryU64DumpAll(papszNameFmts);
2447 return rcExit;
2448}
2449#endif
2450
2451static void UnaryTest(void)
2452{
2453 UnaryU8Test();
2454 UnaryU16Test();
2455 UnaryU32Test();
2456 UnaryU64Test();
2457}
2458
2459
2460/*
2461 * Shifts.
2462 *
2463 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2464 */
2465#ifdef TSTIEMAIMPL_WITH_GENERATOR
2466# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2467static RTEXITCODE ShiftU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2468{ \
2469 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2470 { \
2471 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2472 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2473 continue; \
2474 IEMBINARYOUTPUT BinOut; \
2475 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2476 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2477 { \
2478 a_TestType Test; \
2479 Test.fEflIn = RandEFlags(); \
2480 Test.fEflOut = Test.fEflIn; \
2481 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2482 Test.uDstOut = Test.uDstIn; \
2483 Test.uSrcIn = 0; \
2484 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2485 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2486 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2487 \
2488 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
2489 Test.fEflOut = Test.fEflIn; \
2490 Test.uDstOut = Test.uDstIn; \
2491 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2492 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2493 } \
2494 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2495 } \
2496 return RTEXITCODE_SUCCESS; \
2497} \
2498static RTEXITCODE ShiftU ## a_cBits ## DumpAll(const char * const * papszNameFmts) \
2499{ \
2500 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2501 DUMP_TEST_ENTRY(a_aSubTests[iFn], papszNameFmts); \
2502 return RTEXITCODE_SUCCESS; \
2503}
2504#else
2505# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2506#endif
2507
2508#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2509TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2510static a_SubTestType a_aSubTests[] = \
2511{ \
2512 ENTRY_BIN_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2513 ENTRY_BIN_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2514 ENTRY_BIN_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2515 ENTRY_BIN_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2516 ENTRY_BIN_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2517 ENTRY_BIN_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2518 ENTRY_BIN_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2519 ENTRY_BIN_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2520 ENTRY_BIN_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2521 ENTRY_BIN_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2522 ENTRY_BIN_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2523 ENTRY_BIN_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2524 ENTRY_BIN_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2525 ENTRY_BIN_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2526}; \
2527\
2528GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2529\
2530static void ShiftU ## a_cBits ## Test(void) \
2531{ \
2532 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2533 { \
2534 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2535 continue; \
2536 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2537 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2538 uint32_t const cTests = a_aSubTests[iFn].cTests; \
2539 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2540 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2541 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2542 { \
2543 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2544 { \
2545 uint32_t fEfl = paTests[iTest].fEflIn; \
2546 a_Type uDst = paTests[iTest].uDstIn; \
2547 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2548 if ( uDst != paTests[iTest].uDstOut \
2549 || fEfl != paTests[iTest].fEflOut ) \
2550 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2551 iTest, iVar == 0 ? "" : "/n", \
2552 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2553 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2554 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2555 else \
2556 { \
2557 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2558 *g_pfEfl = paTests[iTest].fEflIn; \
2559 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2560 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2561 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2562 } \
2563 } \
2564 pfn = a_aSubTests[iFn].pfnNative; \
2565 } \
2566 } \
2567}
2568TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2569TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2570TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2571TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2572
2573#ifdef TSTIEMAIMPL_WITH_GENERATOR
2574static RTEXITCODE ShiftGenerate(uint32_t cTests, const char * const * papszNameFmts)
2575{
2576 RTEXITCODE rcExit = ShiftU8Generate(cTests, papszNameFmts);
2577 if (rcExit == RTEXITCODE_SUCCESS)
2578 rcExit = ShiftU16Generate(cTests, papszNameFmts);
2579 if (rcExit == RTEXITCODE_SUCCESS)
2580 rcExit = ShiftU32Generate(cTests, papszNameFmts);
2581 if (rcExit == RTEXITCODE_SUCCESS)
2582 rcExit = ShiftU64Generate(cTests, papszNameFmts);
2583 return rcExit;
2584}
2585
2586static RTEXITCODE ShiftDumpAll(const char * const * papszNameFmts)
2587{
2588 RTEXITCODE rcExit = ShiftU8DumpAll(papszNameFmts);
2589 if (rcExit == RTEXITCODE_SUCCESS)
2590 rcExit = ShiftU16DumpAll(papszNameFmts);
2591 if (rcExit == RTEXITCODE_SUCCESS)
2592 rcExit = ShiftU32DumpAll(papszNameFmts);
2593 if (rcExit == RTEXITCODE_SUCCESS)
2594 rcExit = ShiftU64DumpAll(papszNameFmts);
2595 return rcExit;
2596}
2597#endif
2598
2599static void ShiftTest(void)
2600{
2601 ShiftU8Test();
2602 ShiftU16Test();
2603 ShiftU32Test();
2604 ShiftU64Test();
2605}
2606
2607
2608/*
2609 * Multiplication and division.
2610 *
2611 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2612 * Note! Currently ignoring undefined bits.
2613 */
2614
2615/* U8 */
2616TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2617static INT_MULDIV_U8_T g_aMulDivU8[] =
2618{
2619 ENTRY_BIN_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2620 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2621 ENTRY_BIN_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2622 ENTRY_BIN_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2623 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2624 ENTRY_BIN_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2625 ENTRY_BIN_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2626 ENTRY_BIN_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2627 ENTRY_BIN_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2628 ENTRY_BIN_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2629};
2630
2631#ifdef TSTIEMAIMPL_WITH_GENERATOR
2632static RTEXITCODE MulDivU8Generate(uint32_t cTests, const char * const * papszNameFmts)
2633{
2634 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2635 {
2636 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2637 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2638 continue;
2639 IEMBINARYOUTPUT BinOut; \
2640 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aMulDivU8[iFn]), RTEXITCODE_FAILURE); \
2641 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2642 {
2643 MULDIVU8_TEST_T Test;
2644 Test.fEflIn = RandEFlags();
2645 Test.fEflOut = Test.fEflIn;
2646 Test.uDstIn = RandU16Dst(iTest);
2647 Test.uDstOut = Test.uDstIn;
2648 Test.uSrcIn = RandU8Src(iTest);
2649 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2650 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
2651 }
2652 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
2653 }
2654 return RTEXITCODE_SUCCESS;
2655}
2656static RTEXITCODE MulDivU8DumpAll(const char * const * papszNameFmts)
2657{
2658 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2659 DUMP_TEST_ENTRY(g_aMulDivU8[iFn], papszNameFmts);
2660 return RTEXITCODE_SUCCESS;
2661}
2662#endif
2663
2664static void MulDivU8Test(void)
2665{
2666 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2667 {
2668 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aMulDivU8[iFn])) \
2669 continue; \
2670 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2671 uint32_t const cTests = g_aMulDivU8[iFn].cTests;
2672 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2673 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2674 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2675 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2676 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2677 {
2678 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2679 {
2680 uint32_t fEfl = paTests[iTest].fEflIn;
2681 uint16_t uDst = paTests[iTest].uDstIn;
2682 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2683 if ( uDst != paTests[iTest].uDstOut
2684 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2685 || rc != paTests[iTest].rc)
2686 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2687 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2688 "%sexpected %#08x %#06RX16 %d%s\n",
2689 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2690 iVar ? " " : "", fEfl, uDst, rc,
2691 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2692 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2693 else
2694 {
2695 *g_pu16 = paTests[iTest].uDstIn;
2696 *g_pfEfl = paTests[iTest].fEflIn;
2697 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2698 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2699 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2700 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2701 }
2702 }
2703 pfn = g_aMulDivU8[iFn].pfnNative;
2704 }
2705 }
2706}
2707
2708#ifdef TSTIEMAIMPL_WITH_GENERATOR
2709# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2710static RTEXITCODE MulDivU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2711{ \
2712 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2713 { \
2714 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2715 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2716 continue; \
2717 IEMBINARYOUTPUT BinOut; \
2718 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2719 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2720 { \
2721 a_TestType Test; \
2722 Test.fEflIn = RandEFlags(); \
2723 Test.fEflOut = Test.fEflIn; \
2724 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2725 Test.uDst1Out = Test.uDst1In; \
2726 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2727 Test.uDst2Out = Test.uDst2In; \
2728 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2729 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2730 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2731 } \
2732 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2733 } \
2734 return RTEXITCODE_SUCCESS; \
2735} \
2736static RTEXITCODE MulDivU ## a_cBits ## DumpAll(const char * const * papszNameFmts) \
2737{ \
2738 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2739 DUMP_TEST_ENTRY(a_aSubTests[iFn], papszNameFmts); \
2740 return RTEXITCODE_SUCCESS; \
2741}
2742
2743#else
2744# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2745#endif
2746
2747#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2748TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2749static a_SubTestType a_aSubTests [] = \
2750{ \
2751 ENTRY_BIN_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2752 ENTRY_BIN_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2753 ENTRY_BIN_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2754 ENTRY_BIN_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2755 ENTRY_BIN_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2756 ENTRY_BIN_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2757 ENTRY_BIN_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2758 ENTRY_BIN_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2759}; \
2760\
2761GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2762\
2763static void MulDivU ## a_cBits ## Test(void) \
2764{ \
2765 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2766 { \
2767 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2768 continue; \
2769 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2770 uint32_t const cTests = a_aSubTests[iFn].cTests; \
2771 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2772 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2773 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2774 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2775 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2776 { \
2777 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2778 { \
2779 uint32_t fEfl = paTests[iTest].fEflIn; \
2780 a_Type uDst1 = paTests[iTest].uDst1In; \
2781 a_Type uDst2 = paTests[iTest].uDst2In; \
2782 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2783 if ( uDst1 != paTests[iTest].uDst1Out \
2784 || uDst2 != paTests[iTest].uDst2Out \
2785 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2786 || rc != paTests[iTest].rc) \
2787 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2788 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2789 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2790 iTest, iVar == 0 ? "" : "/n", \
2791 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2792 fEfl, uDst1, uDst2, rc, \
2793 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2794 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2795 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2796 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2797 else \
2798 { \
2799 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2800 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2801 *g_pfEfl = paTests[iTest].fEflIn; \
2802 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2803 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2804 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2805 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2806 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2807 } \
2808 } \
2809 pfn = a_aSubTests[iFn].pfnNative; \
2810 } \
2811 } \
2812}
2813TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2814TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2815TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2816
2817#ifdef TSTIEMAIMPL_WITH_GENERATOR
2818static RTEXITCODE MulDivGenerate(uint32_t cTests, const char * const * papszNameFmts)
2819{
2820 RTEXITCODE rcExit = MulDivU8Generate(cTests, papszNameFmts);
2821 if (rcExit == RTEXITCODE_SUCCESS)
2822 rcExit = MulDivU16Generate(cTests, papszNameFmts);
2823 if (rcExit == RTEXITCODE_SUCCESS)
2824 rcExit = MulDivU32Generate(cTests, papszNameFmts);
2825 if (rcExit == RTEXITCODE_SUCCESS)
2826 rcExit = MulDivU64Generate(cTests, papszNameFmts);
2827 return rcExit;
2828}
2829
2830static RTEXITCODE MulDivDumpAll(const char * const * papszNameFmts)
2831{
2832 RTEXITCODE rcExit = MulDivU8DumpAll(papszNameFmts);
2833 if (rcExit == RTEXITCODE_SUCCESS)
2834 rcExit = MulDivU16DumpAll(papszNameFmts);
2835 if (rcExit == RTEXITCODE_SUCCESS)
2836 rcExit = MulDivU32DumpAll(papszNameFmts);
2837 if (rcExit == RTEXITCODE_SUCCESS)
2838 rcExit = MulDivU64DumpAll(papszNameFmts);
2839 return rcExit;
2840}
2841#endif
2842
2843static void MulDivTest(void)
2844{
2845 MulDivU8Test();
2846 MulDivU16Test();
2847 MulDivU32Test();
2848 MulDivU64Test();
2849}
2850
2851
2852/*
2853 * BSWAP
2854 */
2855static void BswapTest(void)
2856{
2857 if (SubTestAndCheckIfEnabled("bswap_u16"))
2858 {
2859 *g_pu32 = UINT32_C(0x12345678);
2860 iemAImpl_bswap_u16(g_pu32);
2861#if 0
2862 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2863#else
2864 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2865#endif
2866 *g_pu32 = UINT32_C(0xffff1122);
2867 iemAImpl_bswap_u16(g_pu32);
2868#if 0
2869 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2870#else
2871 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2872#endif
2873 }
2874
2875 if (SubTestAndCheckIfEnabled("bswap_u32"))
2876 {
2877 *g_pu32 = UINT32_C(0x12345678);
2878 iemAImpl_bswap_u32(g_pu32);
2879 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2880 }
2881
2882 if (SubTestAndCheckIfEnabled("bswap_u64"))
2883 {
2884 *g_pu64 = UINT64_C(0x0123456789abcdef);
2885 iemAImpl_bswap_u64(g_pu64);
2886 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2887 }
2888}
2889
2890
2891
2892/*********************************************************************************************************************************
2893* Floating point (x87 style) *
2894*********************************************************************************************************************************/
2895
2896/*
2897 * FPU constant loading.
2898 */
2899TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2900
2901static FPU_LD_CONST_T g_aFpuLdConst[] =
2902{
2903 ENTRY_BIN(fld1),
2904 ENTRY_BIN(fldl2t),
2905 ENTRY_BIN(fldl2e),
2906 ENTRY_BIN(fldpi),
2907 ENTRY_BIN(fldlg2),
2908 ENTRY_BIN(fldln2),
2909 ENTRY_BIN(fldz),
2910};
2911
2912#ifdef TSTIEMAIMPL_WITH_GENERATOR
2913static RTEXITCODE FpuLdConstGenerate(uint32_t cTests, const char * const *papszNameFmts)
2914{
2915 X86FXSTATE State;
2916 RT_ZERO(State);
2917 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2918 {
2919 IEMBINARYOUTPUT BinOut;
2920 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuLdConst[iFn]), RTEXITCODE_FAILURE);
2921 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2922 {
2923 State.FCW = RandFcw();
2924 State.FSW = RandFsw();
2925
2926 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2927 {
2928 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2929 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2930 g_aFpuLdConst[iFn].pfn(&State, &Res);
2931 FPU_LD_CONST_TEST_T const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result };
2932 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
2933 }
2934 }
2935 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
2936 }
2937 return RTEXITCODE_SUCCESS;
2938}
2939
2940static RTEXITCODE FpuLdConstDumpAll(const char * const *papszNameFmts)
2941{
2942 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2943 DUMP_TEST_ENTRY(g_aFpuLdConst[iFn], papszNameFmts);
2944 return RTEXITCODE_SUCCESS;
2945}
2946#endif
2947
2948static void FpuLoadConstTest(void)
2949{
2950 /*
2951 * Inputs:
2952 * - FSW: C0, C1, C2, C3
2953 * - FCW: Exception masks, Precision control, Rounding control.
2954 *
2955 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2956 */
2957 X86FXSTATE State;
2958 RT_ZERO(State);
2959 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2960 {
2961 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdConst[iFn]))
2962 continue;
2963
2964 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2965 uint32_t const cTests = g_aFpuLdConst[iFn].cTests;
2966 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2967 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2968 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2969 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2970 {
2971 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2972 {
2973 State.FCW = paTests[iTest].fFcw;
2974 State.FSW = paTests[iTest].fFswIn;
2975 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2976 pfn(&State, &Res);
2977 if ( Res.FSW != paTests[iTest].fFswOut
2978 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2979 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2980 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2981 Res.FSW, FormatR80(&Res.r80Result),
2982 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2983 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2984 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2985 FormatFcw(paTests[iTest].fFcw) );
2986 }
2987 pfn = g_aFpuLdConst[iFn].pfnNative;
2988 }
2989 }
2990}
2991
2992
2993/*
2994 * Load floating point values from memory.
2995 */
2996#ifdef TSTIEMAIMPL_WITH_GENERATOR
2997# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2998static RTEXITCODE FpuLdR ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
2999{ \
3000 X86FXSTATE State; \
3001 RT_ZERO(State); \
3002 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3003 { \
3004 IEMBINARYOUTPUT BinOut; \
3005 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3006 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3007 { \
3008 State.FCW = RandFcw(); \
3009 State.FSW = RandFsw(); \
3010 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
3011 \
3012 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3013 { \
3014 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3015 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
3016 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3017 a_TestType const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result, InVal }; \
3018 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3019 } \
3020 } \
3021 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3022 } \
3023 return RTEXITCODE_SUCCESS; \
3024} \
3025static RTEXITCODE FpuLdR ## a_cBits ## DumpAll(const char * const *papszNameFmts) \
3026{ \
3027 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3028 DUMP_TEST_ENTRY(a_aSubTests[iFn], papszNameFmts); \
3029 return RTEXITCODE_SUCCESS; \
3030}
3031#else
3032# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
3033#endif
3034
3035#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
3036typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
3037typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
3038TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
3039\
3040static a_SubTestType a_aSubTests[] = \
3041{ \
3042 ENTRY_BIN(RT_CONCAT(fld_r80_from_r,a_cBits)) \
3043}; \
3044GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
3045\
3046static void FpuLdR ## a_cBits ## Test(void) \
3047{ \
3048 X86FXSTATE State; \
3049 RT_ZERO(State); \
3050 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3051 { \
3052 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3053 continue; \
3054 \
3055 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3056 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3057 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3058 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3059 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3060 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3061 { \
3062 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3063 { \
3064 a_rdTypeIn const InVal = paTests[iTest].InVal; \
3065 State.FCW = paTests[iTest].fFcw; \
3066 State.FSW = paTests[iTest].fFswIn; \
3067 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3068 pfn(&State, &Res, &InVal); \
3069 if ( Res.FSW != paTests[iTest].fFswOut \
3070 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3071 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3072 "%s -> fsw=%#06x %s\n" \
3073 "%s expected %#06x %s%s%s (%s)\n", \
3074 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3075 FormatR ## a_cBits(&paTests[iTest].InVal), \
3076 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3077 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3078 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3079 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3080 FormatFcw(paTests[iTest].fFcw) ); \
3081 } \
3082 pfn = a_aSubTests[iFn].pfnNative; \
3083 } \
3084 } \
3085}
3086
3087TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
3088TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
3089TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
3090
3091#ifdef TSTIEMAIMPL_WITH_GENERATOR
3092static RTEXITCODE FpuLdMemGenerate(uint32_t cTests, const char * const *papszNameFmts)
3093{
3094 RTEXITCODE rcExit = FpuLdR80Generate(cTests, papszNameFmts);
3095 if (rcExit == RTEXITCODE_SUCCESS)
3096 rcExit = FpuLdR64Generate(cTests, papszNameFmts);
3097 if (rcExit == RTEXITCODE_SUCCESS)
3098 rcExit = FpuLdR32Generate(cTests, papszNameFmts);
3099 return rcExit;
3100}
3101
3102static RTEXITCODE FpuLdMemDumpAll(const char * const *papszNameFmts)
3103{
3104 RTEXITCODE rcExit = FpuLdR80DumpAll(papszNameFmts);
3105 if (rcExit == RTEXITCODE_SUCCESS)
3106 rcExit = FpuLdR64DumpAll(papszNameFmts);
3107 if (rcExit == RTEXITCODE_SUCCESS)
3108 rcExit = FpuLdR32DumpAll(papszNameFmts);
3109 return rcExit;
3110}
3111#endif
3112
3113static void FpuLdMemTest(void)
3114{
3115 FpuLdR80Test();
3116 FpuLdR64Test();
3117 FpuLdR32Test();
3118}
3119
3120
3121/*
3122 * Load integer values from memory.
3123 */
3124#ifdef TSTIEMAIMPL_WITH_GENERATOR
3125# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3126static RTEXITCODE FpuLdI ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3127{ \
3128 X86FXSTATE State; \
3129 RT_ZERO(State); \
3130 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3131 { \
3132 IEMBINARYOUTPUT BinOut; \
3133 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3134 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3135 { \
3136 State.FCW = RandFcw(); \
3137 State.FSW = RandFsw(); \
3138 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
3139 \
3140 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3141 { \
3142 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3143 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
3144 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3145 a_TestType const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result }; \
3146 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3147 } \
3148 } \
3149 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3150 } \
3151 return RTEXITCODE_SUCCESS; \
3152} \
3153static RTEXITCODE FpuLdI ## a_cBits ## DumpAll(const char * const *papszNameFmts) \
3154{ \
3155 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3156 DUMP_TEST_ENTRY(a_aSubTests[iFn], papszNameFmts); \
3157 return RTEXITCODE_SUCCESS; \
3158}
3159#else
3160# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
3161#endif
3162
3163#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
3164typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
3165typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
3166TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
3167\
3168static a_SubTestType a_aSubTests[] = \
3169{ \
3170 ENTRY_BIN(RT_CONCAT(fild_r80_from_i,a_cBits)) \
3171}; \
3172GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3173\
3174static void FpuLdI ## a_cBits ## Test(void) \
3175{ \
3176 X86FXSTATE State; \
3177 RT_ZERO(State); \
3178 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3179 { \
3180 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3181 continue; \
3182 \
3183 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3184 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3185 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3186 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3187 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3188 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3189 { \
3190 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3191 { \
3192 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
3193 State.FCW = paTests[iTest].fFcw; \
3194 State.FSW = paTests[iTest].fFswIn; \
3195 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3196 pfn(&State, &Res, &iInVal); \
3197 if ( Res.FSW != paTests[iTest].fFswOut \
3198 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3199 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
3200 "%s -> fsw=%#06x %s\n" \
3201 "%s expected %#06x %s%s%s (%s)\n", \
3202 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
3203 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3204 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3205 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3206 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3207 FormatFcw(paTests[iTest].fFcw) ); \
3208 } \
3209 pfn = a_aSubTests[iFn].pfnNative; \
3210 } \
3211 } \
3212}
3213
3214TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
3215TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
3216TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
3217
3218#ifdef TSTIEMAIMPL_WITH_GENERATOR
3219static RTEXITCODE FpuLdIntGenerate(uint32_t cTests, const char * const *papszNameFmts)
3220{
3221 RTEXITCODE rcExit = FpuLdI64Generate(cTests, papszNameFmts);
3222 if (rcExit == RTEXITCODE_SUCCESS)
3223 rcExit = FpuLdI32Generate(cTests, papszNameFmts);
3224 if (rcExit == RTEXITCODE_SUCCESS)
3225 rcExit = FpuLdI16Generate(cTests, papszNameFmts);
3226 return rcExit;
3227}
3228
3229static RTEXITCODE FpuLdIntDumpAll(const char * const *papszNameFmts)
3230{
3231 RTEXITCODE rcExit = FpuLdI64DumpAll(papszNameFmts);
3232 if (rcExit == RTEXITCODE_SUCCESS)
3233 rcExit = FpuLdI32DumpAll(papszNameFmts);
3234 if (rcExit == RTEXITCODE_SUCCESS)
3235 rcExit = FpuLdI16DumpAll(papszNameFmts);
3236 return rcExit;
3237}
3238#endif
3239
3240static void FpuLdIntTest(void)
3241{
3242 FpuLdI64Test();
3243 FpuLdI32Test();
3244 FpuLdI16Test();
3245}
3246
3247
3248/*
3249 * Load binary coded decimal values from memory.
3250 */
3251typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
3252typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
3253TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
3254
3255static FPU_LD_D80_T g_aFpuLdD80[] =
3256{
3257 ENTRY_BIN(fld_r80_from_d80)
3258};
3259
3260#ifdef TSTIEMAIMPL_WITH_GENERATOR
3261static RTEXITCODE FpuLdD80Generate(uint32_t cTests, const char * const *papszNameFmts)
3262{
3263 X86FXSTATE State;
3264 RT_ZERO(State);
3265 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3266 {
3267 IEMBINARYOUTPUT BinOut;
3268 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuLdD80[iFn]), RTEXITCODE_FAILURE);
3269 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3270 {
3271 State.FCW = RandFcw();
3272 State.FSW = RandFsw();
3273 RTPBCD80U InVal = RandD80Src(iTest);
3274
3275 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3276 {
3277 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3278 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
3279 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
3280 FPU_D80_IN_TEST_T const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result, InVal };
3281 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3282 }
3283 }
3284 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3285 }
3286 return RTEXITCODE_SUCCESS;
3287}
3288static RTEXITCODE FpuLdD80DumpAll(const char * const *papszNameFmts)
3289{
3290 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3291 DUMP_TEST_ENTRY(g_aFpuLdD80[iFn], papszNameFmts);
3292 return RTEXITCODE_SUCCESS;
3293}
3294#endif
3295
3296static void FpuLdD80Test(void)
3297{
3298 X86FXSTATE State;
3299 RT_ZERO(State);
3300 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3301 {
3302 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdD80[iFn]))
3303 continue;
3304
3305 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
3306 uint32_t const cTests = g_aFpuLdD80[iFn].cTests;
3307 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
3308 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
3309 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3310 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3311 {
3312 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3313 {
3314 RTPBCD80U const InVal = paTests[iTest].InVal;
3315 State.FCW = paTests[iTest].fFcw;
3316 State.FSW = paTests[iTest].fFswIn;
3317 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3318 pfn(&State, &Res, &InVal);
3319 if ( Res.FSW != paTests[iTest].fFswOut
3320 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
3321 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
3322 "%s -> fsw=%#06x %s\n"
3323 "%s expected %#06x %s%s%s (%s)\n",
3324 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3325 FormatD80(&paTests[iTest].InVal),
3326 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3327 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
3328 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3329 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
3330 FormatFcw(paTests[iTest].fFcw) );
3331 }
3332 pfn = g_aFpuLdD80[iFn].pfnNative;
3333 }
3334 }
3335}
3336
3337
3338/*
3339 * Store values floating point values to memory.
3340 */
3341#ifdef TSTIEMAIMPL_WITH_GENERATOR
3342static const RTFLOAT80U g_aFpuStR32Specials[] =
3343{
3344 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3345 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3346 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3347 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3348};
3349static const RTFLOAT80U g_aFpuStR64Specials[] =
3350{
3351 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3352 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3353 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3354 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3355 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
3356};
3357static const RTFLOAT80U g_aFpuStR80Specials[] =
3358{
3359 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
3360};
3361# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3362static RTEXITCODE FpuStR ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3363{ \
3364 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
3365 X86FXSTATE State; \
3366 RT_ZERO(State); \
3367 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3368 { \
3369 IEMBINARYOUTPUT BinOut; \
3370 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3371 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3372 { \
3373 uint16_t const fFcw = RandFcw(); \
3374 State.FSW = RandFsw(); \
3375 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
3376 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
3377 \
3378 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3379 { \
3380 /* PC doesn't influence these, so leave as is. */ \
3381 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3382 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3383 { \
3384 uint16_t uFswOut = 0; \
3385 a_rdType OutVal; \
3386 RT_ZERO(OutVal); \
3387 memset(&OutVal, 0xfe, sizeof(OutVal)); \
3388 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3389 | (iRounding << X86_FCW_RC_SHIFT); \
3390 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3391 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3392 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
3393 a_TestType const Test = { State.FCW, State.FSW, uFswOut, InVal, OutVal }; \
3394 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3395 } \
3396 } \
3397 } \
3398 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3399 } \
3400 return RTEXITCODE_SUCCESS; \
3401} \
3402static RTEXITCODE FpuStR ## a_cBits ## DumpAll(const char * const *papszNameFmts) \
3403{ \
3404 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3405 DUMP_TEST_ENTRY(a_aSubTests[iFn], papszNameFmts); \
3406 return RTEXITCODE_SUCCESS; \
3407}
3408#else
3409# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
3410#endif
3411
3412#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
3413typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
3414 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
3415typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
3416TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
3417\
3418static a_SubTestType a_aSubTests[] = \
3419{ \
3420 ENTRY_BIN(RT_CONCAT(fst_r80_to_r,a_cBits)) \
3421}; \
3422GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3423\
3424static void FpuStR ## a_cBits ## Test(void) \
3425{ \
3426 X86FXSTATE State; \
3427 RT_ZERO(State); \
3428 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3429 { \
3430 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3431 continue; \
3432 \
3433 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3434 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3435 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3436 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3437 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3438 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3439 { \
3440 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3441 { \
3442 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3443 uint16_t uFswOut = 0; \
3444 a_rdType OutVal; \
3445 RT_ZERO(OutVal); \
3446 memset(&OutVal, 0xfe, sizeof(OutVal)); \
3447 State.FCW = paTests[iTest].fFcw; \
3448 State.FSW = paTests[iTest].fFswIn; \
3449 pfn(&State, &uFswOut, &OutVal, &InVal); \
3450 if ( uFswOut != paTests[iTest].fFswOut \
3451 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
3452 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3453 "%s -> fsw=%#06x %s\n" \
3454 "%s expected %#06x %s%s%s (%s)\n", \
3455 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3456 FormatR80(&paTests[iTest].InVal), \
3457 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
3458 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
3459 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3460 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
3461 FormatFcw(paTests[iTest].fFcw) ); \
3462 } \
3463 pfn = a_aSubTests[iFn].pfnNative; \
3464 } \
3465 } \
3466}
3467
3468TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
3469TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
3470TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
3471
3472#ifdef TSTIEMAIMPL_WITH_GENERATOR
3473static RTEXITCODE FpuStMemGenerate(uint32_t cTests, const char * const *papszNameFmts)
3474{
3475 RTEXITCODE rcExit = FpuStR80Generate(cTests, papszNameFmts);
3476 if (rcExit == RTEXITCODE_SUCCESS)
3477 rcExit = FpuStR64Generate(cTests, papszNameFmts);
3478 if (rcExit == RTEXITCODE_SUCCESS)
3479 rcExit = FpuStR32Generate(cTests, papszNameFmts);
3480 return rcExit;
3481}
3482
3483static RTEXITCODE FpuStMemDumpAll(const char * const *papszNameFmts)
3484{
3485 RTEXITCODE rcExit = FpuStR80DumpAll(papszNameFmts);
3486 if (rcExit == RTEXITCODE_SUCCESS)
3487 rcExit = FpuStR64DumpAll(papszNameFmts);
3488 if (rcExit == RTEXITCODE_SUCCESS)
3489 rcExit = FpuStR32DumpAll(papszNameFmts);
3490 return rcExit;
3491}
3492#endif
3493
3494static void FpuStMemTest(void)
3495{
3496 FpuStR80Test();
3497 FpuStR64Test();
3498 FpuStR32Test();
3499}
3500
3501
3502/*
3503 * Store integer values to memory or register.
3504 */
3505TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
3506TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
3507TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
3508
3509static FPU_ST_I16_T g_aFpuStI16[] =
3510{
3511 ENTRY_BIN(fist_r80_to_i16),
3512 ENTRY_BIN_AMD( fistt_r80_to_i16, 0),
3513 ENTRY_BIN_INTEL(fistt_r80_to_i16, 0),
3514};
3515static FPU_ST_I32_T g_aFpuStI32[] =
3516{
3517 ENTRY_BIN(fist_r80_to_i32),
3518 ENTRY_BIN(fistt_r80_to_i32),
3519};
3520static FPU_ST_I64_T g_aFpuStI64[] =
3521{
3522 ENTRY_BIN(fist_r80_to_i64),
3523 ENTRY_BIN(fistt_r80_to_i64),
3524};
3525
3526#ifdef TSTIEMAIMPL_WITH_GENERATOR
3527static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
3528{
3529 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
3530 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
3531 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3532 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3533 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3534 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3535 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3536 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3537 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3538 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3539 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3540 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3541 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3542 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3543 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
3544 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3545 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3546 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3547 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3548 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3549 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3550 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3551 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3552 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
3553 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3554 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
3555 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
3556 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
3557 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
3558 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
3559 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
3560 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
3561 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3562 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3563 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3564 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3565 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3566 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3567 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3568 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3569 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3570 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3571 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3572 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3573 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3574 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3575 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
3576};
3577static const RTFLOAT80U g_aFpuStI32Specials[] =
3578{
3579 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3580 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3581 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3582 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3583 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3584 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3585 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3586 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3587 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3588 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3589 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3590 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3591 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3592 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3593 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3594 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3595};
3596static const RTFLOAT80U g_aFpuStI64Specials[] =
3597{
3598 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3599 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3600 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3601 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3602 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3603 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3604 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3605 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3606 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3607 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3608 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3609 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3610 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3611 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3612 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3613 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3614 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3615};
3616
3617# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3618static RTEXITCODE FpuStI ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3619{ \
3620 X86FXSTATE State; \
3621 RT_ZERO(State); \
3622 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3623 { \
3624 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3625 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3626 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
3627 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3628 continue; \
3629 \
3630 IEMBINARYOUTPUT BinOut; \
3631 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3632 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3633 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3634 { \
3635 uint16_t const fFcw = RandFcw(); \
3636 State.FSW = RandFsw(); \
3637 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3638 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3639 \
3640 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3641 { \
3642 /* PC doesn't influence these, so leave as is. */ \
3643 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3644 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3645 { \
3646 uint16_t uFswOut = 0; \
3647 a_iType iOutVal = ~(a_iType)2; \
3648 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3649 | (iRounding << X86_FCW_RC_SHIFT); \
3650 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3651 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3652 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3653 a_TestType const Test = { State.FCW, State.FSW, uFswOut, InVal, iOutVal }; \
3654 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3655 } \
3656 } \
3657 } \
3658 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3659 } \
3660 return RTEXITCODE_SUCCESS; \
3661} \
3662static RTEXITCODE FpuStI ## a_cBits ## DumpAll(const char * const *papszNameFmts) \
3663{ \
3664 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3665 DUMP_TEST_ENTRY(a_aSubTests[iFn], papszNameFmts); \
3666 return RTEXITCODE_SUCCESS; \
3667}
3668#else
3669# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3670#endif
3671
3672#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3673GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3674\
3675static void FpuStI ## a_cBits ## Test(void) \
3676{ \
3677 X86FXSTATE State; \
3678 RT_ZERO(State); \
3679 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3680 { \
3681 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3682 continue; \
3683 \
3684 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3685 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3686 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3687 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3688 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3689 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3690 { \
3691 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3692 { \
3693 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3694 uint16_t uFswOut = 0; \
3695 a_iType iOutVal = ~(a_iType)2; \
3696 State.FCW = paTests[iTest].fFcw; \
3697 State.FSW = paTests[iTest].fFswIn; \
3698 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3699 if ( uFswOut != paTests[iTest].fFswOut \
3700 || iOutVal != paTests[iTest].iOutVal) \
3701 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3702 "%s -> fsw=%#06x " a_szFmt "\n" \
3703 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3704 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3705 FormatR80(&paTests[iTest].InVal), \
3706 iVar ? " " : "", uFswOut, iOutVal, \
3707 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3708 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3709 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3710 } \
3711 pfn = a_aSubTests[iFn].pfnNative; \
3712 } \
3713 } \
3714}
3715
3716//fistt_r80_to_i16 diffs for AMD, of course :-)
3717
3718TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3719TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3720TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3721
3722#ifdef TSTIEMAIMPL_WITH_GENERATOR
3723static RTEXITCODE FpuStIntGenerate(uint32_t cTests, const char * const *papszNameFmts)
3724{
3725 RTEXITCODE rcExit = FpuStI64Generate(cTests, papszNameFmts);
3726 if (rcExit == RTEXITCODE_SUCCESS)
3727 rcExit = FpuStI32Generate(cTests, papszNameFmts);
3728 if (rcExit == RTEXITCODE_SUCCESS)
3729 rcExit = FpuStI16Generate(cTests, papszNameFmts);
3730 return rcExit;
3731}
3732static RTEXITCODE FpuStIntDumpAll(const char * const *papszNameFmts)
3733{
3734 RTEXITCODE rcExit = FpuStI64DumpAll(papszNameFmts);
3735 if (rcExit == RTEXITCODE_SUCCESS)
3736 rcExit = FpuStI32DumpAll(papszNameFmts);
3737 if (rcExit == RTEXITCODE_SUCCESS)
3738 rcExit = FpuStI16DumpAll(papszNameFmts);
3739 return rcExit;
3740}
3741#endif
3742
3743static void FpuStIntTest(void)
3744{
3745 FpuStI64Test();
3746 FpuStI32Test();
3747 FpuStI16Test();
3748}
3749
3750
3751/*
3752 * Store as packed BCD value (memory).
3753 */
3754typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3755typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3756TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3757
3758static FPU_ST_D80_T g_aFpuStD80[] =
3759{
3760 ENTRY_BIN(fst_r80_to_d80),
3761};
3762
3763#ifdef TSTIEMAIMPL_WITH_GENERATOR
3764static RTEXITCODE FpuStD80Generate(uint32_t cTests, const char * const *papszNameFmts)
3765{
3766 static RTFLOAT80U const s_aSpecials[] =
3767 {
3768 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3769 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3770 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3771 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3772 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3773 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3774 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3775 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3776 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3777 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3778 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3779 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3780 };
3781
3782 X86FXSTATE State;
3783 RT_ZERO(State);
3784 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3785 {
3786 IEMBINARYOUTPUT BinOut;
3787 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuStD80[iFn]), RTEXITCODE_FAILURE);
3788 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3789 {
3790 uint16_t const fFcw = RandFcw();
3791 State.FSW = RandFsw();
3792 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3793
3794 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3795 {
3796 /* PC doesn't influence these, so leave as is. */
3797 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3798 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
3799 {
3800 uint16_t uFswOut = 0;
3801 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3802 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
3803 | (iRounding << X86_FCW_RC_SHIFT);
3804 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
3805 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
3806 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3807 FPU_ST_D80_TEST_T const Test = { State.FCW, State.FSW, uFswOut, InVal, OutVal };
3808 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3809 }
3810 }
3811 }
3812 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3813 }
3814 return RTEXITCODE_SUCCESS;
3815}
3816
3817static RTEXITCODE FpuStD80DumpAll(const char * const *papszNameFmts)
3818{
3819 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3820 DUMP_TEST_ENTRY(g_aFpuStD80[iFn], papszNameFmts);
3821 return RTEXITCODE_SUCCESS;
3822}
3823#endif
3824
3825
3826static void FpuStD80Test(void)
3827{
3828 X86FXSTATE State;
3829 RT_ZERO(State);
3830 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3831 {
3832 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuStD80[iFn]))
3833 continue;
3834
3835 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3836 uint32_t const cTests = g_aFpuStD80[iFn].cTests;
3837 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3838 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3839 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3840 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3841 {
3842 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3843 {
3844 RTFLOAT80U const InVal = paTests[iTest].InVal;
3845 uint16_t uFswOut = 0;
3846 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3847 State.FCW = paTests[iTest].fFcw;
3848 State.FSW = paTests[iTest].fFswIn;
3849 pfn(&State, &uFswOut, &OutVal, &InVal);
3850 if ( uFswOut != paTests[iTest].fFswOut
3851 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3852 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3853 "%s -> fsw=%#06x %s\n"
3854 "%s expected %#06x %s%s%s (%s)\n",
3855 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3856 FormatR80(&paTests[iTest].InVal),
3857 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3858 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3859 FswDiff(uFswOut, paTests[iTest].fFswOut),
3860 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3861 FormatFcw(paTests[iTest].fFcw) );
3862 }
3863 pfn = g_aFpuStD80[iFn].pfnNative;
3864 }
3865 }
3866}
3867
3868
3869
3870/*********************************************************************************************************************************
3871* x87 FPU Binary Operations *
3872*********************************************************************************************************************************/
3873
3874/*
3875 * Binary FPU operations on two 80-bit floating point values.
3876 */
3877TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3878enum { kFpuBinaryHint_fprem = 1, };
3879
3880static FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3881{
3882 ENTRY_BIN(fadd_r80_by_r80),
3883 ENTRY_BIN(fsub_r80_by_r80),
3884 ENTRY_BIN(fsubr_r80_by_r80),
3885 ENTRY_BIN(fmul_r80_by_r80),
3886 ENTRY_BIN(fdiv_r80_by_r80),
3887 ENTRY_BIN(fdivr_r80_by_r80),
3888 ENTRY_BIN_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3889 ENTRY_BIN_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3890 ENTRY_BIN(fscale_r80_by_r80),
3891 ENTRY_BIN_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3892 ENTRY_BIN_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3893 ENTRY_BIN_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3894 ENTRY_BIN_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3895 ENTRY_BIN_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3896 ENTRY_BIN_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3897};
3898
3899#ifdef TSTIEMAIMPL_WITH_GENERATOR
3900static RTEXITCODE FpuBinaryR80Generate(uint32_t cTests, const char * const *papszNameFmts)
3901{
3902 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3903
3904 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3905 {
3906 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3907 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3908 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3909 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3910 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3911 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3912 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3913 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3914 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3915 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3916 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3917 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3918 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3919 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3920 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3921 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3922 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3923 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3924 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3925 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3926 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3927 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3928 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3929 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3930 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3931 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3932 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3933 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3934 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3935 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3936 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3937 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3938 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3939 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3940 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3941 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3942 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3943 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3944 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3945 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3946 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3947 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3948 /* fscale: Negative variants for the essentials of the above. */
3949 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3950 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3951 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3952 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3953 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3954 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3955 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3956 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3957 /* fscale: Some fun with denormals and pseudo-denormals. */
3958 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3959 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3960 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3961 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3962 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3963 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3964 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3965 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3966 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3967 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3968 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3969 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3970 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3971 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3972 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3973 };
3974
3975 X86FXSTATE State;
3976 RT_ZERO(State);
3977 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3978 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3979 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3980 {
3981 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3982 if ( g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
3983 && g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3984 continue;
3985
3986 IEMBINARYOUTPUT BinOut;
3987 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuBinaryR80[iFn]), RTEXITCODE_FAILURE);
3988 uint32_t cNormalInputPairs = 0;
3989 uint32_t cTargetRangeInputs = 0;
3990 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3991 {
3992 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3993 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3994 bool fTargetRange = false;
3995 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3996 {
3997 cNormalInputPairs++;
3998 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3999 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
4000 cTargetRangeInputs += fTargetRange = true;
4001 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
4002 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
4003 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
4004 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
4005 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
4006 cTargetRangeInputs += fTargetRange = true;
4007 }
4008 }
4009 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4010 {
4011 iTest -= 1;
4012 continue;
4013 }
4014
4015 uint16_t const fFcwExtra = 0;
4016 uint16_t const fFcw = RandFcw();
4017 State.FSW = RandFsw();
4018
4019 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4020 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4021 {
4022 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4023 | (iRounding << X86_FCW_RC_SHIFT)
4024 | (iPrecision << X86_FCW_PC_SHIFT)
4025 | X86_FCW_MASK_ALL;
4026 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4027 pfn(&State, &ResM, &InVal1, &InVal2);
4028 FPU_BINARY_R80_TEST_T const TestM
4029 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResM.FSW, InVal1, InVal2, ResM.r80Result };
4030 GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
4031
4032 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4033 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4034 pfn(&State, &ResU, &InVal1, &InVal2);
4035 FPU_BINARY_R80_TEST_T const TestU
4036 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResU.FSW, InVal1, InVal2, ResU.r80Result };
4037 GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
4038
4039 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4040 if (fXcpt)
4041 {
4042 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4043 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4044 pfn(&State, &Res1, &InVal1, &InVal2);
4045 FPU_BINARY_R80_TEST_T const Test1
4046 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res1.FSW, InVal1, InVal2, Res1.r80Result };
4047 GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
4048
4049 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4050 {
4051 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4052 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4053 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4054 pfn(&State, &Res2, &InVal1, &InVal2);
4055 FPU_BINARY_R80_TEST_T const Test2
4056 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res2.FSW, InVal1, InVal2, Res2.r80Result };
4057 GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
4058 }
4059 if (!RT_IS_POWER_OF_TWO(fXcpt))
4060 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4061 if (fUnmasked & fXcpt)
4062 {
4063 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4064 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4065 pfn(&State, &Res3, &InVal1, &InVal2);
4066 FPU_BINARY_R80_TEST_T const Test3
4067 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res3.FSW, InVal1, InVal2, Res3.r80Result };
4068 GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
4069 }
4070 }
4071
4072 /* If the values are in range and caused no exceptions, do the whole series of
4073 partial reminders till we get the non-partial one or run into an exception. */
4074 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
4075 {
4076 IEMFPURESULT ResPrev = ResM;
4077 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
4078 {
4079 State.FCW = State.FCW | X86_FCW_MASK_ALL;
4080 State.FSW = ResPrev.FSW;
4081 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4082 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
4083 FPU_BINARY_R80_TEST_T const TestSeq
4084 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResSeq.FSW, ResPrev.r80Result, InVal2, ResSeq.r80Result };
4085 GenerateBinaryWrite(&BinOut, &TestSeq, sizeof(TestSeq));
4086 ResPrev = ResSeq;
4087 }
4088 }
4089 }
4090 }
4091 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4092 }
4093 return RTEXITCODE_SUCCESS;
4094}
4095
4096static RTEXITCODE FpuBinaryR80DumpAll(const char * const *papszNameFmts)
4097{
4098 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
4099 DUMP_TEST_ENTRY(g_aFpuBinaryR80[iFn], papszNameFmts);
4100 return RTEXITCODE_SUCCESS;
4101}
4102#endif
4103
4104
4105static void FpuBinaryR80Test(void)
4106{
4107 X86FXSTATE State;
4108 RT_ZERO(State);
4109 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
4110 {
4111 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryR80[iFn]))
4112 continue;
4113
4114 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
4115 uint32_t const cTests = g_aFpuBinaryR80[iFn].cTests;
4116 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
4117 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
4118 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4119 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4120 {
4121 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4122 {
4123 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4124 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4125 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4126 State.FCW = paTests[iTest].fFcw;
4127 State.FSW = paTests[iTest].fFswIn;
4128 pfn(&State, &Res, &InVal1, &InVal2);
4129 if ( Res.FSW != paTests[iTest].fFswOut
4130 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
4131 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4132 "%s -> fsw=%#06x %s\n"
4133 "%s expected %#06x %s%s%s (%s)\n",
4134 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4135 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4136 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4137 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4138 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4139 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4140 FormatFcw(paTests[iTest].fFcw) );
4141 }
4142 pfn = g_aFpuBinaryR80[iFn].pfnNative;
4143 }
4144 }
4145}
4146
4147
4148/*
4149 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
4150 */
4151#define int64_t_IS_NORMAL(a) 1
4152#define int32_t_IS_NORMAL(a) 1
4153#define int16_t_IS_NORMAL(a) 1
4154
4155#ifdef TSTIEMAIMPL_WITH_GENERATOR
4156static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
4157{
4158 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4159 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4160};
4161static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
4162{
4163 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4164 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4165};
4166static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
4167{
4168 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4169};
4170static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
4171{
4172 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4173};
4174
4175# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4176static RTEXITCODE FpuBinary ## a_UpBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4177{ \
4178 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4179 \
4180 X86FXSTATE State; \
4181 RT_ZERO(State); \
4182 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4183 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4184 { \
4185 IEMBINARYOUTPUT BinOut; \
4186 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4187 uint32_t cNormalInputPairs = 0; \
4188 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
4189 { \
4190 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4191 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
4192 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4193 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
4194 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4195 cNormalInputPairs++; \
4196 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4197 { \
4198 iTest -= 1; \
4199 continue; \
4200 } \
4201 \
4202 uint16_t const fFcw = RandFcw(); \
4203 State.FSW = RandFsw(); \
4204 \
4205 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
4206 { \
4207 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
4208 { \
4209 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4210 { \
4211 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
4212 | (iRounding << X86_FCW_RC_SHIFT) \
4213 | (iPrecision << X86_FCW_PC_SHIFT) \
4214 | iMask; \
4215 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4216 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
4217 a_TestType const Test = { State.FCW, State.FSW, Res.FSW, InVal1, InVal2, Res.r80Result }; \
4218 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4219 } \
4220 } \
4221 } \
4222 } \
4223 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4224 } \
4225 return RTEXITCODE_SUCCESS; \
4226} \
4227static RTEXITCODE FpuBinary ## a_UpBits ## DumpAll(const char * const *papszNameFmts) \
4228{ \
4229 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4230 DUMP_TEST_ENTRY(a_aSubTests[iFn], papszNameFmts); \
4231 return RTEXITCODE_SUCCESS; \
4232}
4233#else
4234# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4235#endif
4236
4237#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
4238TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
4239\
4240static a_SubTestType a_aSubTests[] = \
4241{ \
4242 ENTRY_BIN(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
4243 ENTRY_BIN(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
4244 ENTRY_BIN(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
4245 ENTRY_BIN(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
4246 ENTRY_BIN(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
4247 ENTRY_BIN(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
4248}; \
4249\
4250GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4251\
4252static void FpuBinary ## a_UpBits ## Test(void) \
4253{ \
4254 X86FXSTATE State; \
4255 RT_ZERO(State); \
4256 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4257 { \
4258 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4259 continue; \
4260 \
4261 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4262 uint32_t const cTests = a_aSubTests[iFn].cTests; \
4263 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
4264 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4265 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4266 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4267 { \
4268 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4269 { \
4270 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4271 a_Type2 const InVal2 = paTests[iTest].InVal2; \
4272 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4273 State.FCW = paTests[iTest].fFcw; \
4274 State.FSW = paTests[iTest].fFswIn; \
4275 pfn(&State, &Res, &InVal1, &InVal2); \
4276 if ( Res.FSW != paTests[iTest].fFswOut \
4277 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
4278 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4279 "%s -> fsw=%#06x %s\n" \
4280 "%s expected %#06x %s%s%s (%s)\n", \
4281 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4282 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4283 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
4284 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
4285 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
4286 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
4287 FormatFcw(paTests[iTest].fFcw) ); \
4288 } \
4289 pfn = a_aSubTests[iFn].pfnNative; \
4290 } \
4291 } \
4292}
4293
4294TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
4295TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
4296TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
4297TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
4298
4299
4300/*
4301 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
4302 */
4303#ifdef TSTIEMAIMPL_WITH_GENERATOR
4304static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
4305{
4306 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4307 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4308};
4309static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
4310{
4311 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4312 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4313};
4314static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
4315{
4316 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4317 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4318};
4319static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
4320{
4321 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4322};
4323static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
4324{
4325 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4326};
4327
4328# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4329static RTEXITCODE FpuBinaryFsw ## a_UpBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4330{ \
4331 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4332 \
4333 X86FXSTATE State; \
4334 RT_ZERO(State); \
4335 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4336 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4337 { \
4338 IEMBINARYOUTPUT BinOut; \
4339 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4340 uint32_t cNormalInputPairs = 0; \
4341 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
4342 { \
4343 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4344 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
4345 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4346 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
4347 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4348 cNormalInputPairs++; \
4349 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4350 { \
4351 iTest -= 1; \
4352 continue; \
4353 } \
4354 \
4355 uint16_t const fFcw = RandFcw(); \
4356 State.FSW = RandFsw(); \
4357 \
4358 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
4359 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4360 { \
4361 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
4362 uint16_t fFswOut = 0; \
4363 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
4364 a_TestType const Test = { State.FCW, State.FSW, fFswOut, InVal1, InVal2 }; \
4365 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4366 } \
4367 } \
4368 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4369 } \
4370 return RTEXITCODE_SUCCESS; \
4371} \
4372static RTEXITCODE FpuBinaryFsw ## a_UpBits ## DumpAll(const char * const *papszNameFmts) \
4373{ \
4374 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4375 DUMP_TEST_ENTRY(a_aSubTests[iFn], papszNameFmts); \
4376 return RTEXITCODE_SUCCESS; \
4377}
4378#else
4379# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4380#endif
4381
4382#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
4383TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
4384\
4385static a_SubTestType a_aSubTests[] = \
4386{ \
4387 __VA_ARGS__ \
4388}; \
4389\
4390GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4391\
4392static void FpuBinaryFsw ## a_UpBits ## Test(void) \
4393{ \
4394 X86FXSTATE State; \
4395 RT_ZERO(State); \
4396 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4397 { \
4398 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4399 continue; \
4400 \
4401 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4402 uint32_t const cTests = a_aSubTests[iFn].cTests; \
4403 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
4404 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4405 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4406 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4407 { \
4408 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4409 { \
4410 uint16_t fFswOut = 0; \
4411 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4412 a_Type2 const InVal2 = paTests[iTest].InVal2; \
4413 State.FCW = paTests[iTest].fFcw; \
4414 State.FSW = paTests[iTest].fFswIn; \
4415 pfn(&State, &fFswOut, &InVal1, &InVal2); \
4416 if (fFswOut != paTests[iTest].fFswOut) \
4417 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4418 "%s -> fsw=%#06x\n" \
4419 "%s expected %#06x %s (%s)\n", \
4420 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4421 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4422 iVar ? " " : "", fFswOut, \
4423 iVar ? " " : "", paTests[iTest].fFswOut, \
4424 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
4425 } \
4426 pfn = a_aSubTests[iFn].pfnNative; \
4427 } \
4428 } \
4429}
4430
4431TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY_BIN(fcom_r80_by_r80), ENTRY_BIN(fucom_r80_by_r80))
4432TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY_BIN(fcom_r80_by_r64))
4433TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY_BIN(fcom_r80_by_r32))
4434TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY_BIN(ficom_r80_by_i32))
4435TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY_BIN(ficom_r80_by_i16))
4436
4437
4438/*
4439 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
4440 */
4441TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
4442
4443static FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
4444{
4445 ENTRY_BIN(fcomi_r80_by_r80),
4446 ENTRY_BIN(fucomi_r80_by_r80),
4447};
4448
4449#ifdef TSTIEMAIMPL_WITH_GENERATOR
4450static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
4451{
4452 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4453 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4454};
4455
4456static RTEXITCODE FpuBinaryEflR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4457{
4458 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
4459
4460 X86FXSTATE State;
4461 RT_ZERO(State);
4462 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4463 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4464 {
4465 IEMBINARYOUTPUT BinOut;
4466 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuBinaryEflR80[iFn]), RTEXITCODE_FAILURE);
4467 uint32_t cNormalInputPairs = 0;
4468 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
4469 {
4470 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
4471 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
4472 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
4473 cNormalInputPairs++;
4474 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4475 {
4476 iTest -= 1;
4477 continue;
4478 }
4479
4480 uint16_t const fFcw = RandFcw();
4481 State.FSW = RandFsw();
4482
4483 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
4484 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4485 {
4486 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
4487 uint16_t uFswOut = 0;
4488 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
4489 FPU_BINARY_EFL_R80_TEST_T const Test = { State.FCW, State.FSW, uFswOut, InVal1, InVal2, fEflOut, };
4490 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
4491 }
4492 }
4493 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4494 }
4495 return RTEXITCODE_SUCCESS;
4496}
4497
4498static RTEXITCODE FpuBinaryEflR80DumpAll(const char * const *papszNameFmts)
4499{
4500 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4501 DUMP_TEST_ENTRY(g_aFpuBinaryEflR80[iFn], papszNameFmts);
4502 return RTEXITCODE_SUCCESS;
4503}
4504#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
4505
4506static void FpuBinaryEflR80Test(void)
4507{
4508 X86FXSTATE State;
4509 RT_ZERO(State);
4510 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4511 {
4512 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryEflR80[iFn]))
4513 continue;
4514
4515 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
4516 uint32_t const cTests = g_aFpuBinaryEflR80[iFn].cTests;
4517 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
4518 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
4519 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4520 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4521 {
4522 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4523 {
4524 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4525 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4526 State.FCW = paTests[iTest].fFcw;
4527 State.FSW = paTests[iTest].fFswIn;
4528 uint16_t uFswOut = 0;
4529 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
4530 if ( uFswOut != paTests[iTest].fFswOut
4531 || fEflOut != paTests[iTest].fEflOut)
4532 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4533 "%s -> fsw=%#06x efl=%#08x\n"
4534 "%s expected %#06x %#08x %s%s (%s)\n",
4535 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4536 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4537 iVar ? " " : "", uFswOut, fEflOut,
4538 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
4539 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
4540 FormatFcw(paTests[iTest].fFcw));
4541 }
4542 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
4543 }
4544 }
4545}
4546
4547
4548/*********************************************************************************************************************************
4549* x87 FPU Unary Operations *
4550*********************************************************************************************************************************/
4551
4552/*
4553 * Unary FPU operations on one 80-bit floating point value.
4554 *
4555 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
4556 * a rounding error or not.
4557 */
4558TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
4559
4560enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
4561static FPU_UNARY_R80_T g_aFpuUnaryR80[] =
4562{
4563 ENTRY_BIN_EX( fabs_r80, kUnary_Accurate),
4564 ENTRY_BIN_EX( fchs_r80, kUnary_Accurate),
4565 ENTRY_BIN_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
4566 ENTRY_BIN_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
4567 ENTRY_BIN_EX( fsqrt_r80, kUnary_Accurate),
4568 ENTRY_BIN_EX( frndint_r80, kUnary_Accurate),
4569 ENTRY_BIN_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
4570 ENTRY_BIN_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
4571 ENTRY_BIN_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
4572 ENTRY_BIN_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
4573};
4574
4575#ifdef TSTIEMAIMPL_WITH_GENERATOR
4576
4577static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
4578{
4579 if ( enmKind == kUnary_Rounding_F2xm1
4580 && RTFLOAT80U_IS_NORMAL(pr80Val)
4581 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
4582 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
4583 return true;
4584 return false;
4585}
4586
4587static RTEXITCODE FpuUnaryR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4588{
4589 static RTFLOAT80U const s_aSpecials[] =
4590 {
4591 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
4592 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
4593 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
4594 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
4595 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
4596 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
4597 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
4598 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
4599 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
4600 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
4601 };
4602 X86FXSTATE State;
4603 RT_ZERO(State);
4604 uint32_t cMinNormals = cTests / 4;
4605 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4606 {
4607 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
4608 if ( g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
4609 && g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4610 continue;
4611
4612 IEMBINARYOUTPUT BinOut;
4613 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryR80[iFn]), RTEXITCODE_FAILURE);
4614 uint32_t cNormalInputs = 0;
4615 uint32_t cTargetRangeInputs = 0;
4616 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4617 {
4618 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4619 if (RTFLOAT80U_IS_NORMAL(&InVal))
4620 {
4621 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
4622 {
4623 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
4624 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4625 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4626 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4627 cTargetRangeInputs++;
4628 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4629 {
4630 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4631 cTargetRangeInputs++;
4632 }
4633 }
4634 cNormalInputs++;
4635 }
4636 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4637 {
4638 iTest -= 1;
4639 continue;
4640 }
4641
4642 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4643 uint16_t const fFcw = RandFcw();
4644 State.FSW = RandFsw();
4645
4646 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4647 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4648 {
4649 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4650 | (iRounding << X86_FCW_RC_SHIFT)
4651 | (iPrecision << X86_FCW_PC_SHIFT)
4652 | X86_FCW_MASK_ALL;
4653 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4654 pfn(&State, &ResM, &InVal);
4655 FPU_UNARY_R80_TEST_T const TestM
4656 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResM.FSW, InVal, ResM.r80Result };
4657 GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
4658
4659 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4660 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4661 pfn(&State, &ResU, &InVal);
4662 FPU_UNARY_R80_TEST_T const TestU
4663 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResU.FSW, InVal, ResU.r80Result };
4664 GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
4665
4666 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4667 if (fXcpt)
4668 {
4669 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4670 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4671 pfn(&State, &Res1, &InVal);
4672 FPU_UNARY_R80_TEST_T const Test1
4673 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res1.FSW, InVal, Res1.r80Result };
4674 GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
4675 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4676 {
4677 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4678 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4679 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4680 pfn(&State, &Res2, &InVal);
4681 FPU_UNARY_R80_TEST_T const Test2
4682 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res2.FSW, InVal, Res2.r80Result };
4683 GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
4684 }
4685 if (!RT_IS_POWER_OF_TWO(fXcpt))
4686 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4687 if (fUnmasked & fXcpt)
4688 {
4689 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4690 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4691 pfn(&State, &Res3, &InVal);
4692 FPU_UNARY_R80_TEST_T const Test3
4693 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res3.FSW, InVal, Res3.r80Result };
4694 GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
4695 }
4696 }
4697 }
4698 }
4699 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4700 }
4701 return RTEXITCODE_SUCCESS;
4702}
4703
4704static RTEXITCODE FpuUnaryR80DumpAll(const char * const *papszNameFmts)
4705{
4706 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4707 DUMP_TEST_ENTRY(g_aFpuUnaryR80[iFn], papszNameFmts);
4708 return RTEXITCODE_SUCCESS;
4709}
4710#endif
4711
4712static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4713{
4714 if (fFcw1 == fFcw2)
4715 return true;
4716 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4717 {
4718 *pfRndErr = true;
4719 return true;
4720 }
4721 return false;
4722}
4723
4724static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4725{
4726 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4727 return true;
4728 if ( fRndErrOk
4729 && pr80Val1->s.fSign == pr80Val2->s.fSign)
4730 {
4731 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4732 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4733 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4734 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4735 ||
4736 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4737 && pr80Val1->s.uMantissa == UINT64_MAX
4738 && pr80Val2->s.uMantissa == RT_BIT_64(63))
4739 ||
4740 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4741 && pr80Val2->s.uMantissa == UINT64_MAX
4742 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4743 {
4744 *pfRndErr = true;
4745 return true;
4746 }
4747 }
4748 return false;
4749}
4750
4751
4752static void FpuUnaryR80Test(void)
4753{
4754 X86FXSTATE State;
4755 RT_ZERO(State);
4756 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4757 {
4758 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryR80[iFn]))
4759 continue;
4760
4761 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4762 uint32_t const cTests = g_aFpuUnaryR80[iFn].cTests;
4763 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4764 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4765 uint32_t cRndErrs = 0;
4766 uint32_t cPossibleRndErrs = 0;
4767 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4768 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4769 {
4770 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4771 {
4772 RTFLOAT80U const InVal = paTests[iTest].InVal;
4773 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4774 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4775 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4776 State.FSW = paTests[iTest].fFswIn;
4777 pfn(&State, &Res, &InVal);
4778 bool fRndErr = false;
4779 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4780 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4781 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4782 "%s -> fsw=%#06x %s\n"
4783 "%s expected %#06x %s%s%s%s (%s)\n",
4784 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4785 FormatR80(&paTests[iTest].InVal),
4786 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4787 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4788 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4789 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4790 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4791 cRndErrs += fRndErr;
4792 cPossibleRndErrs += fRndErrOk;
4793 }
4794 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4795 }
4796 if (cPossibleRndErrs > 0)
4797 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4798 }
4799}
4800
4801
4802/*
4803 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4804 */
4805TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4806
4807static FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4808{
4809 ENTRY_BIN(ftst_r80),
4810 ENTRY_BIN_EX(fxam_r80, 1),
4811};
4812
4813#ifdef TSTIEMAIMPL_WITH_GENERATOR
4814static RTEXITCODE FpuUnaryFswR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4815{
4816 static RTFLOAT80U const s_aSpecials[] =
4817 {
4818 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4819 };
4820
4821 X86FXSTATE State;
4822 RT_ZERO(State);
4823 uint32_t cMinNormals = cTests / 4;
4824 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4825 {
4826 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4827 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4828 if ( g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
4829 && g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4830 continue;
4831 State.FTW = 0;
4832
4833 IEMBINARYOUTPUT BinOut;
4834 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryFswR80[iFn]), RTEXITCODE_FAILURE);
4835 uint32_t cNormalInputs = 0;
4836 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4837 {
4838 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4839 if (RTFLOAT80U_IS_NORMAL(&InVal))
4840 cNormalInputs++;
4841 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4842 {
4843 iTest -= 1;
4844 continue;
4845 }
4846
4847 uint16_t const fFcw = RandFcw();
4848 State.FSW = RandFsw();
4849 if (!fIsFxam)
4850 {
4851 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4852 {
4853 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4854 {
4855 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4856 {
4857 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4858 | (iRounding << X86_FCW_RC_SHIFT)
4859 | (iPrecision << X86_FCW_PC_SHIFT)
4860 | iMask;
4861 uint16_t fFswOut = 0;
4862 pfn(&State, &fFswOut, &InVal);
4863 FPU_UNARY_R80_TEST_T const Test = { State.FCW, State.FSW, fFswOut, InVal };
4864 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
4865 }
4866 }
4867 }
4868 }
4869 else
4870 {
4871 uint16_t fFswOut = 0;
4872 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4873 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4874 State.FCW = fFcw;
4875 pfn(&State, &fFswOut, &InVal);
4876 FPU_UNARY_R80_TEST_T const Test = { (uint16_t)(fFcw | fEmpty), State.FSW, fFswOut, InVal };
4877 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
4878 }
4879 }
4880 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4881 }
4882 return RTEXITCODE_SUCCESS;
4883}
4884
4885static RTEXITCODE FpuUnaryFswR80DumpAll(const char * const *papszNameFmts)
4886{
4887 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4888 DUMP_TEST_ENTRY(g_aFpuUnaryFswR80[iFn], papszNameFmts);
4889 return RTEXITCODE_SUCCESS;
4890}
4891#endif
4892
4893
4894static void FpuUnaryFswR80Test(void)
4895{
4896 X86FXSTATE State;
4897 RT_ZERO(State);
4898 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4899 {
4900 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryFswR80[iFn]))
4901 continue;
4902
4903 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4904 uint32_t const cTests = g_aFpuUnaryFswR80[iFn].cTests;
4905 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4906 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4907 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4908 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4909 {
4910 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4911 {
4912 RTFLOAT80U const InVal = paTests[iTest].InVal;
4913 uint16_t fFswOut = 0;
4914 State.FSW = paTests[iTest].fFswIn;
4915 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4916 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4917 pfn(&State, &fFswOut, &InVal);
4918 if (fFswOut != paTests[iTest].fFswOut)
4919 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4920 "%s -> fsw=%#06x\n"
4921 "%s expected %#06x %s (%s%s)\n",
4922 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4923 FormatR80(&paTests[iTest].InVal),
4924 iVar ? " " : "", fFswOut,
4925 iVar ? " " : "", paTests[iTest].fFswOut,
4926 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4927 paTests[iTest].fFcw & 0x80 ? " empty" : "");
4928 }
4929 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4930 }
4931 }
4932}
4933
4934/*
4935 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
4936 */
4937TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4938
4939static FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4940{
4941 ENTRY_BIN(fxtract_r80_r80),
4942 ENTRY_BIN_AMD( fptan_r80_r80, 0), // rounding differences
4943 ENTRY_BIN_INTEL(fptan_r80_r80, 0),
4944 ENTRY_BIN_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4945 ENTRY_BIN_INTEL(fsincos_r80_r80, 0),
4946};
4947
4948#ifdef TSTIEMAIMPL_WITH_GENERATOR
4949static RTEXITCODE FpuUnaryTwoR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4950{
4951 static RTFLOAT80U const s_aSpecials[] =
4952 {
4953 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4954 };
4955
4956 X86FXSTATE State;
4957 RT_ZERO(State);
4958 uint32_t cMinNormals = cTests / 4;
4959 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4960 {
4961 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4962 if ( g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
4963 && g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4964 continue;
4965
4966 IEMBINARYOUTPUT BinOut;
4967 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryTwoR80[iFn]), RTEXITCODE_FAILURE);
4968 uint32_t cNormalInputs = 0;
4969 uint32_t cTargetRangeInputs = 0;
4970 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4971 {
4972 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4973 if (RTFLOAT80U_IS_NORMAL(&InVal))
4974 {
4975 if (iFn != 0)
4976 {
4977 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4978 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4979 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4980 cTargetRangeInputs++;
4981 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4982 {
4983 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4984 cTargetRangeInputs++;
4985 }
4986 }
4987 cNormalInputs++;
4988 }
4989 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4990 {
4991 iTest -= 1;
4992 continue;
4993 }
4994
4995 uint16_t const fFcwExtra = 0; /* for rounding error indication */
4996 uint16_t const fFcw = RandFcw();
4997 State.FSW = RandFsw();
4998
4999 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5000 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5001 {
5002 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
5003 | (iRounding << X86_FCW_RC_SHIFT)
5004 | (iPrecision << X86_FCW_PC_SHIFT)
5005 | X86_FCW_MASK_ALL;
5006 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5007 pfn(&State, &ResM, &InVal);
5008 FPU_UNARY_TWO_R80_TEST_T const TestM
5009 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResM.FSW, InVal, ResM.r80Result1, ResM.r80Result2 };
5010 GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
5011
5012 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
5013 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5014 pfn(&State, &ResU, &InVal);
5015 FPU_UNARY_TWO_R80_TEST_T const TestU
5016 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResU.FSW, InVal, ResU.r80Result1, ResU.r80Result2 };
5017 GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
5018
5019 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
5020 if (fXcpt)
5021 {
5022 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
5023 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5024 pfn(&State, &Res1, &InVal);
5025 FPU_UNARY_TWO_R80_TEST_T const Test1
5026 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res1.FSW, InVal, Res1.r80Result1, Res1.r80Result2 };
5027 GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
5028
5029 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
5030 {
5031 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
5032 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
5033 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5034 pfn(&State, &Res2, &InVal);
5035 FPU_UNARY_TWO_R80_TEST_T const Test2
5036 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res2.FSW, InVal, Res2.r80Result1, Res2.r80Result2 };
5037 GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
5038 }
5039 if (!RT_IS_POWER_OF_TWO(fXcpt))
5040 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
5041 if (fUnmasked & fXcpt)
5042 {
5043 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
5044 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5045 pfn(&State, &Res3, &InVal);
5046 FPU_UNARY_TWO_R80_TEST_T const Test3
5047 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res3.FSW, InVal, Res3.r80Result1, Res3.r80Result2 };
5048 GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
5049 }
5050 }
5051 }
5052 }
5053 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5054 }
5055 return RTEXITCODE_SUCCESS;
5056}
5057
5058static RTEXITCODE FpuUnaryTwoR80DumpAll(const char * const *papszNameFmts)
5059{
5060 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
5061 DUMP_TEST_ENTRY(g_aFpuUnaryTwoR80[iFn], papszNameFmts);
5062 return RTEXITCODE_SUCCESS;
5063}
5064#endif
5065
5066
5067static void FpuUnaryTwoR80Test(void)
5068{
5069 X86FXSTATE State;
5070 RT_ZERO(State);
5071 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
5072 {
5073 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryTwoR80[iFn]))
5074 continue;
5075
5076 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
5077 uint32_t const cTests = g_aFpuUnaryTwoR80[iFn].cTests;
5078 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
5079 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
5080 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5081 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5082 {
5083 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5084 {
5085 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5086 RTFLOAT80U const InVal = paTests[iTest].InVal;
5087 State.FCW = paTests[iTest].fFcw;
5088 State.FSW = paTests[iTest].fFswIn;
5089 pfn(&State, &Res, &InVal);
5090 if ( Res.FSW != paTests[iTest].fFswOut
5091 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
5092 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
5093 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5094 "%s -> fsw=%#06x %s %s\n"
5095 "%s expected %#06x %s %s %s%s%s (%s)\n",
5096 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5097 FormatR80(&paTests[iTest].InVal),
5098 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
5099 iVar ? " " : "", paTests[iTest].fFswOut,
5100 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
5101 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
5102 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
5103 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
5104 }
5105 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
5106 }
5107 }
5108}
5109
5110
5111/*********************************************************************************************************************************
5112* SSE floating point Binary Operations *
5113*********************************************************************************************************************************/
5114
5115/*
5116 * Binary SSE operations on packed single precision floating point values.
5117 */
5118TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5119
5120static SSE_BINARY_R32_T g_aSseBinaryR32[] =
5121{
5122 ENTRY_BIN(addps_u128),
5123 ENTRY_BIN(mulps_u128),
5124 ENTRY_BIN(subps_u128),
5125 ENTRY_BIN(minps_u128),
5126 ENTRY_BIN(divps_u128),
5127 ENTRY_BIN(maxps_u128),
5128 ENTRY_BIN(haddps_u128),
5129 ENTRY_BIN(hsubps_u128),
5130 ENTRY_BIN(sqrtps_u128),
5131 ENTRY_BIN(addsubps_u128),
5132 ENTRY_BIN(cvtps2pd_u128),
5133};
5134
5135#ifdef TSTIEMAIMPL_WITH_GENERATOR
5136static RTEXITCODE SseBinaryR32Generate(const char *pszDataFileFmt, uint32_t cTests)
5137{
5138 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5139
5140 static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
5141 {
5142 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
5143 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
5144 /** @todo More specials. */
5145 };
5146
5147 X86FXSTATE State;
5148 RT_ZERO(State);
5149 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5150 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5151 {
5152 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
5153
5154 IEMBINARYOUTPUT BinOut;
5155 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR32[iFn].pszName), RTEXITCODE_FAILURE);
5156
5157 uint32_t cNormalInputPairs = 0;
5158 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5159 {
5160 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5161
5162 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5163 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5164 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5165 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5166
5167 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5168 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
5169 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
5170 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
5171
5172 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
5173 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
5174 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
5175 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
5176 cNormalInputPairs++;
5177 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5178 {
5179 iTest -= 1;
5180 continue;
5181 }
5182
5183 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5184 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5185 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5186 for (uint8_t iFz = 0; iFz < 2; iFz++)
5187 {
5188 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5189 | (iRounding << X86_MXCSR_RC_SHIFT)
5190 | (iDaz ? X86_MXCSR_DAZ : 0)
5191 | (iFz ? X86_MXCSR_FZ : 0)
5192 | X86_MXCSR_XCPT_MASK;
5193 IEMSSERESULT ResM; RT_ZERO(ResM);
5194 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
5195 TestData.fMxcsrIn = State.MXCSR;
5196 TestData.fMxcsrOut = ResM.MXCSR;
5197 TestData.OutVal = ResM.uResult;
5198 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5199
5200 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5201 IEMSSERESULT ResU; RT_ZERO(ResU);
5202 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
5203 TestData.fMxcsrIn = State.MXCSR;
5204 TestData.fMxcsrOut = ResU.MXCSR;
5205 TestData.OutVal = ResU.uResult;
5206 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5207
5208 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5209 if (fXcpt)
5210 {
5211 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5212 IEMSSERESULT Res1; RT_ZERO(Res1);
5213 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
5214 TestData.fMxcsrIn = State.MXCSR;
5215 TestData.fMxcsrOut = Res1.MXCSR;
5216 TestData.OutVal = Res1.uResult;
5217 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5218
5219 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5220 {
5221 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5222 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5223 IEMSSERESULT Res2; RT_ZERO(Res2);
5224 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
5225 TestData.fMxcsrIn = State.MXCSR;
5226 TestData.fMxcsrOut = Res2.MXCSR;
5227 TestData.OutVal = Res2.uResult;
5228 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5229 }
5230 if (!RT_IS_POWER_OF_TWO(fXcpt))
5231 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5232 if (fUnmasked & fXcpt)
5233 {
5234 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5235 IEMSSERESULT Res3; RT_ZERO(Res3);
5236 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
5237 TestData.fMxcsrIn = State.MXCSR;
5238 TestData.fMxcsrOut = Res3.MXCSR;
5239 TestData.OutVal = Res3.uResult;
5240 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5241 }
5242 }
5243 }
5244 }
5245 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5246 }
5247
5248 return RTEXITCODE_SUCCESS;
5249}
5250#endif
5251
5252static void SseBinaryR32Test(void)
5253{
5254 X86FXSTATE State;
5255 RT_ZERO(State);
5256 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5257 {
5258 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32[iFn]))
5259 continue;
5260
5261 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
5262 uint32_t const cbTests = g_aSseBinaryR32[iFn].cTests;
5263 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
5264 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
5265 if (!cbTests) RTTestSkipped(g_hTest, "no tests");
5266 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5267 {
5268 for (uint32_t iTest = 0; iTest < cbTests / sizeof(paTests[0]); iTest++)
5269 {
5270 IEMSSERESULT Res; RT_ZERO(Res);
5271
5272 State.MXCSR = paTests[iTest].fMxcsrIn;
5273 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5274 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5275 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5276 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5277 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5278 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5279 || !fValsIdentical)
5280 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
5281 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5282 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5283 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5284 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5285 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5286 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
5287 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
5288 iVar ? " " : "", Res.MXCSR,
5289 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5290 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5291 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5292 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5293 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5294 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5295 !fValsIdentical ? " - val" : "",
5296 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5297 }
5298 pfn = g_aSseBinaryR32[iFn].pfnNative;
5299 }
5300 }
5301}
5302
5303
5304/*
5305 * Binary SSE operations on packed single precision floating point values.
5306 */
5307TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5308
5309static SSE_BINARY_R64_T g_aSseBinaryR64[] =
5310{
5311 ENTRY_BIN(addpd_u128),
5312 ENTRY_BIN(mulpd_u128),
5313 ENTRY_BIN(subpd_u128),
5314 ENTRY_BIN(minpd_u128),
5315 ENTRY_BIN(divpd_u128),
5316 ENTRY_BIN(maxpd_u128),
5317 ENTRY_BIN(haddpd_u128),
5318 ENTRY_BIN(hsubpd_u128),
5319 ENTRY_BIN(sqrtpd_u128),
5320 ENTRY_BIN(addsubpd_u128),
5321 ENTRY_BIN(cvtpd2ps_u128),
5322};
5323
5324#ifdef TSTIEMAIMPL_WITH_GENERATOR
5325static RTEXITCODE SseBinaryR64Generate(const char *pszDataFileFmt, uint32_t cTests)
5326{
5327 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5328
5329 static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
5330 {
5331 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
5332 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
5333 /** @todo More specials. */
5334 };
5335
5336 X86FXSTATE State;
5337 RT_ZERO(State);
5338 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5339 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5340 {
5341 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
5342
5343 IEMBINARYOUTPUT BinOut;
5344 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR64[iFn].pszName), RTEXITCODE_FAILURE);
5345
5346 uint32_t cNormalInputPairs = 0;
5347 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5348 {
5349 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5350
5351 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5352 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5353 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5354 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5355
5356 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5357 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
5358 cNormalInputPairs++;
5359 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5360 {
5361 iTest -= 1;
5362 continue;
5363 }
5364
5365 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5366 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5367 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5368 for (uint8_t iFz = 0; iFz < 2; iFz++)
5369 {
5370 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5371 | (iRounding << X86_MXCSR_RC_SHIFT)
5372 | (iDaz ? X86_MXCSR_DAZ : 0)
5373 | (iFz ? X86_MXCSR_FZ : 0)
5374 | X86_MXCSR_XCPT_MASK;
5375 IEMSSERESULT ResM; RT_ZERO(ResM);
5376 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
5377 TestData.fMxcsrIn = State.MXCSR;
5378 TestData.fMxcsrOut = ResM.MXCSR;
5379 TestData.OutVal = ResM.uResult;
5380 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5381
5382 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5383 IEMSSERESULT ResU; RT_ZERO(ResU);
5384 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
5385 TestData.fMxcsrIn = State.MXCSR;
5386 TestData.fMxcsrOut = ResU.MXCSR;
5387 TestData.OutVal = ResU.uResult;
5388 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5389
5390 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5391 if (fXcpt)
5392 {
5393 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5394 IEMSSERESULT Res1; RT_ZERO(Res1);
5395 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
5396 TestData.fMxcsrIn = State.MXCSR;
5397 TestData.fMxcsrOut = Res1.MXCSR;
5398 TestData.OutVal = Res1.uResult;
5399 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5400
5401 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5402 {
5403 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5404 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5405 IEMSSERESULT Res2; RT_ZERO(Res2);
5406 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
5407 TestData.fMxcsrIn = State.MXCSR;
5408 TestData.fMxcsrOut = Res2.MXCSR;
5409 TestData.OutVal = Res2.uResult;
5410 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5411 }
5412 if (!RT_IS_POWER_OF_TWO(fXcpt))
5413 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5414 if (fUnmasked & fXcpt)
5415 {
5416 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5417 IEMSSERESULT Res3; RT_ZERO(Res3);
5418 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
5419 TestData.fMxcsrIn = State.MXCSR;
5420 TestData.fMxcsrOut = Res3.MXCSR;
5421 TestData.OutVal = Res3.uResult;
5422 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5423 }
5424 }
5425 }
5426 }
5427 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5428 }
5429
5430 return RTEXITCODE_SUCCESS;
5431}
5432#endif
5433
5434
5435static void SseBinaryR64Test(void)
5436{
5437 X86FXSTATE State;
5438 RT_ZERO(State);
5439 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5440 {
5441 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64[iFn]))
5442 continue;
5443
5444 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
5445 uint32_t const cTests = g_aSseBinaryR64[iFn].cTests;
5446 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
5447 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
5448 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5449 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5450 {
5451 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5452 {
5453 IEMSSERESULT Res; RT_ZERO(Res);
5454
5455 State.MXCSR = paTests[iTest].fMxcsrIn;
5456 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5457 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5458 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5459 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5460 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
5461 "%s -> mxcsr=%#08x %s'%s\n"
5462 "%s expected %#08x %s'%s%s%s (%s)\n",
5463 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5464 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5465 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
5466 iVar ? " " : "", Res.MXCSR,
5467 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5468 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5469 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5470 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5471 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5472 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5473 ? " - val" : "",
5474 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5475 }
5476 pfn = g_aSseBinaryR64[iFn].pfnNative;
5477 }
5478 }
5479}
5480
5481
5482/*
5483 * Binary SSE operations on packed single precision floating point values.
5484 */
5485TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
5486
5487static SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
5488{
5489 ENTRY_BIN(addss_u128_r32),
5490 ENTRY_BIN(mulss_u128_r32),
5491 ENTRY_BIN(subss_u128_r32),
5492 ENTRY_BIN(minss_u128_r32),
5493 ENTRY_BIN(divss_u128_r32),
5494 ENTRY_BIN(maxss_u128_r32),
5495 ENTRY_BIN(cvtss2sd_u128_r32),
5496 ENTRY_BIN(sqrtss_u128_r32),
5497};
5498
5499#ifdef TSTIEMAIMPL_WITH_GENERATOR
5500static RTEXITCODE SseBinaryU128R32Generate(const char *pszDataFileFmt, uint32_t cTests)
5501{
5502 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5503
5504 static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
5505 {
5506 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5507 /** @todo More specials. */
5508 };
5509
5510 X86FXSTATE State;
5511 RT_ZERO(State);
5512 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5513 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5514 {
5515 PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
5516
5517 IEMBINARYOUTPUT BinOut;
5518 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryU128R32[iFn].pszName), RTEXITCODE_FAILURE);
5519
5520 uint32_t cNormalInputPairs = 0;
5521 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5522 {
5523 SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
5524
5525 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5526 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5527 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5528 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5529
5530 TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5531
5532 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
5533 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
5534 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
5535 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
5536 && RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
5537 cNormalInputPairs++;
5538 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5539 {
5540 iTest -= 1;
5541 continue;
5542 }
5543
5544 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5545 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5546 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5547 for (uint8_t iFz = 0; iFz < 2; iFz++)
5548 {
5549 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5550 | (iRounding << X86_MXCSR_RC_SHIFT)
5551 | (iDaz ? X86_MXCSR_DAZ : 0)
5552 | (iFz ? X86_MXCSR_FZ : 0)
5553 | X86_MXCSR_XCPT_MASK;
5554 IEMSSERESULT ResM; RT_ZERO(ResM);
5555 pfn(&State, &ResM, &TestData.InVal1, &TestData.r32Val2);
5556 TestData.fMxcsrIn = State.MXCSR;
5557 TestData.fMxcsrOut = ResM.MXCSR;
5558 TestData.OutVal = ResM.uResult;
5559 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5560
5561 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5562 IEMSSERESULT ResU; RT_ZERO(ResU);
5563 pfn(&State, &ResU, &TestData.InVal1, &TestData.r32Val2);
5564 TestData.fMxcsrIn = State.MXCSR;
5565 TestData.fMxcsrOut = ResU.MXCSR;
5566 TestData.OutVal = ResU.uResult;
5567 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5568
5569 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5570 if (fXcpt)
5571 {
5572 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5573 IEMSSERESULT Res1; RT_ZERO(Res1);
5574 pfn(&State, &Res1, &TestData.InVal1, &TestData.r32Val2);
5575 TestData.fMxcsrIn = State.MXCSR;
5576 TestData.fMxcsrOut = Res1.MXCSR;
5577 TestData.OutVal = Res1.uResult;
5578 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5579
5580 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5581 {
5582 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5583 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5584 IEMSSERESULT Res2; RT_ZERO(Res2);
5585 pfn(&State, &Res2, &TestData.InVal1, &TestData.r32Val2);
5586 TestData.fMxcsrIn = State.MXCSR;
5587 TestData.fMxcsrOut = Res2.MXCSR;
5588 TestData.OutVal = Res2.uResult;
5589 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5590 }
5591 if (!RT_IS_POWER_OF_TWO(fXcpt))
5592 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5593 if (fUnmasked & fXcpt)
5594 {
5595 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5596 IEMSSERESULT Res3; RT_ZERO(Res3);
5597 pfn(&State, &Res3, &TestData.InVal1, &TestData.r32Val2);
5598 TestData.fMxcsrIn = State.MXCSR;
5599 TestData.fMxcsrOut = Res3.MXCSR;
5600 TestData.OutVal = Res3.uResult;
5601 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5602 }
5603 }
5604 }
5605 }
5606 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5607 }
5608
5609 return RTEXITCODE_SUCCESS;
5610}
5611#endif
5612
5613static void SseBinaryU128R32Test(void)
5614{
5615 X86FXSTATE State;
5616 RT_ZERO(State);
5617 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5618 {
5619 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R32[iFn]))
5620 continue;
5621
5622 SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
5623 uint32_t const cTests = g_aSseBinaryU128R32[iFn].cTests;
5624 PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
5625 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
5626 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5627 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5628 {
5629 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5630 {
5631 IEMSSERESULT Res; RT_ZERO(Res);
5632
5633 State.MXCSR = paTests[iTest].fMxcsrIn;
5634 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
5635 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5636 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5637 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5638 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5639 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5640 || !fValsIdentical)
5641 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
5642 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5643 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5644 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5645 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5646 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5647 FormatR32(&paTests[iTest].r32Val2),
5648 iVar ? " " : "", Res.MXCSR,
5649 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5650 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5651 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5652 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5653 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5654 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5655 !fValsIdentical ? " - val" : "",
5656 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5657 }
5658 }
5659 }
5660}
5661
5662
5663/*
5664 * Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
5665 */
5666TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
5667
5668static SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
5669{
5670 ENTRY_BIN(addsd_u128_r64),
5671 ENTRY_BIN(mulsd_u128_r64),
5672 ENTRY_BIN(subsd_u128_r64),
5673 ENTRY_BIN(minsd_u128_r64),
5674 ENTRY_BIN(divsd_u128_r64),
5675 ENTRY_BIN(maxsd_u128_r64),
5676 ENTRY_BIN(cvtsd2ss_u128_r64),
5677 ENTRY_BIN(sqrtsd_u128_r64),
5678};
5679
5680#ifdef TSTIEMAIMPL_WITH_GENERATOR
5681static RTEXITCODE SseBinaryU128R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5682{
5683 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5684
5685 static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
5686 {
5687 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5688 /** @todo More specials. */
5689 };
5690
5691 X86FXSTATE State;
5692 RT_ZERO(State);
5693 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5694 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5695 {
5696 PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
5697
5698 IEMBINARYOUTPUT BinOut;
5699 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryU128R64[iFn].pszName), RTEXITCODE_FAILURE);
5700
5701 uint32_t cNormalInputPairs = 0;
5702 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5703 {
5704 SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
5705
5706 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5707 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5708 TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5709
5710 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5711 && RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
5712 cNormalInputPairs++;
5713 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5714 {
5715 iTest -= 1;
5716 continue;
5717 }
5718
5719 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5720 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5721 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5722 for (uint8_t iFz = 0; iFz < 2; iFz++)
5723 {
5724 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5725 | (iRounding << X86_MXCSR_RC_SHIFT)
5726 | (iDaz ? X86_MXCSR_DAZ : 0)
5727 | (iFz ? X86_MXCSR_FZ : 0)
5728 | X86_MXCSR_XCPT_MASK;
5729 IEMSSERESULT ResM; RT_ZERO(ResM);
5730 pfn(&State, &ResM, &TestData.InVal1, &TestData.r64Val2);
5731 TestData.fMxcsrIn = State.MXCSR;
5732 TestData.fMxcsrOut = ResM.MXCSR;
5733 TestData.OutVal = ResM.uResult;
5734 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5735
5736 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5737 IEMSSERESULT ResU; RT_ZERO(ResU);
5738 pfn(&State, &ResU, &TestData.InVal1, &TestData.r64Val2);
5739 TestData.fMxcsrIn = State.MXCSR;
5740 TestData.fMxcsrOut = ResU.MXCSR;
5741 TestData.OutVal = ResU.uResult;
5742 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5743
5744 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5745 if (fXcpt)
5746 {
5747 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5748 IEMSSERESULT Res1; RT_ZERO(Res1);
5749 pfn(&State, &Res1, &TestData.InVal1, &TestData.r64Val2);
5750 TestData.fMxcsrIn = State.MXCSR;
5751 TestData.fMxcsrOut = Res1.MXCSR;
5752 TestData.OutVal = Res1.uResult;
5753 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5754
5755 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5756 {
5757 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5758 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5759 IEMSSERESULT Res2; RT_ZERO(Res2);
5760 pfn(&State, &Res2, &TestData.InVal1, &TestData.r64Val2);
5761 TestData.fMxcsrIn = State.MXCSR;
5762 TestData.fMxcsrOut = Res2.MXCSR;
5763 TestData.OutVal = Res2.uResult;
5764 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5765 }
5766 if (!RT_IS_POWER_OF_TWO(fXcpt))
5767 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5768 if (fUnmasked & fXcpt)
5769 {
5770 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5771 IEMSSERESULT Res3; RT_ZERO(Res3);
5772 pfn(&State, &Res3, &TestData.InVal1, &TestData.r64Val2);
5773 TestData.fMxcsrIn = State.MXCSR;
5774 TestData.fMxcsrOut = Res3.MXCSR;
5775 TestData.OutVal = Res3.uResult;
5776 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5777 }
5778 }
5779 }
5780 }
5781 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5782 }
5783
5784 return RTEXITCODE_SUCCESS;
5785}
5786#endif
5787
5788
5789static void SseBinaryU128R64Test(void)
5790{
5791 X86FXSTATE State;
5792 RT_ZERO(State);
5793 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5794 {
5795 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R64[iFn]))
5796 continue;
5797
5798 SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
5799 uint32_t const cTests = g_aSseBinaryU128R64[iFn].cTests;
5800 PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
5801 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
5802 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5803 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5804 {
5805 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5806 {
5807 IEMSSERESULT Res; RT_ZERO(Res);
5808
5809 State.MXCSR = paTests[iTest].fMxcsrIn;
5810 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
5811 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5812 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5813 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5814 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
5815 "%s -> mxcsr=%#08x %s'%s\n"
5816 "%s expected %#08x %s'%s%s%s (%s)\n",
5817 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5818 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5819 FormatR64(&paTests[iTest].r64Val2),
5820 iVar ? " " : "", Res.MXCSR,
5821 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5822 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5823 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5824 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5825 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5826 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5827 ? " - val" : "",
5828 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5829 }
5830 }
5831 }
5832}
5833
5834
5835/*
5836 * SSE operations converting single double-precision floating point values to signed double-word integers (cvttsd2si and friends).
5837 */
5838TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R64_T, SSE_BINARY_I32_R64_TEST_T, PFNIEMAIMPLSSEF2I32U64);
5839
5840static SSE_BINARY_I32_R64_T g_aSseBinaryI32R64[] =
5841{
5842 ENTRY_BIN(cvttsd2si_i32_r64),
5843 ENTRY_BIN(cvtsd2si_i32_r64),
5844};
5845
5846#ifdef TSTIEMAIMPL_WITH_GENERATOR
5847static RTEXITCODE SseBinaryI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5848{
5849 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5850
5851 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
5852 {
5853 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5854 /** @todo More specials. */
5855 };
5856
5857 X86FXSTATE State;
5858 RT_ZERO(State);
5859 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5860 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5861 {
5862 PFNIEMAIMPLSSEF2I32U64 const pfn = g_aSseBinaryI32R64[iFn].pfnNative ? g_aSseBinaryI32R64[iFn].pfnNative : g_aSseBinaryI32R64[iFn].pfn;
5863
5864 IEMBINARYOUTPUT BinOut;
5865 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryI32R64[iFn].pszName), RTEXITCODE_FAILURE);
5866
5867 uint32_t cNormalInputPairs = 0;
5868 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5869 {
5870 SSE_BINARY_I32_R64_TEST_T TestData; RT_ZERO(TestData);
5871
5872 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
5873
5874 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
5875 cNormalInputPairs++;
5876 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5877 {
5878 iTest -= 1;
5879 continue;
5880 }
5881
5882 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5883 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5884 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5885 for (uint8_t iFz = 0; iFz < 2; iFz++)
5886 {
5887 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5888 | (iRounding << X86_MXCSR_RC_SHIFT)
5889 | (iDaz ? X86_MXCSR_DAZ : 0)
5890 | (iFz ? X86_MXCSR_FZ : 0)
5891 | X86_MXCSR_XCPT_MASK;
5892 uint32_t fMxcsrM; int32_t i32OutM;
5893 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r64ValIn.u);
5894 TestData.fMxcsrIn = State.MXCSR;
5895 TestData.fMxcsrOut = fMxcsrM;
5896 TestData.i32ValOut = i32OutM;
5897 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5898
5899 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5900 uint32_t fMxcsrU; int32_t i32OutU;
5901 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r64ValIn.u);
5902 TestData.fMxcsrIn = State.MXCSR;
5903 TestData.fMxcsrOut = fMxcsrU;
5904 TestData.i32ValOut = i32OutU;
5905 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5906
5907 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5908 if (fXcpt)
5909 {
5910 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5911 uint32_t fMxcsr1; int32_t i32Out1;
5912 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r64ValIn.u);
5913 TestData.fMxcsrIn = State.MXCSR;
5914 TestData.fMxcsrOut = fMxcsr1;
5915 TestData.i32ValOut = i32Out1;
5916 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5917
5918 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5919 {
5920 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5921 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5922 uint32_t fMxcsr2; int32_t i32Out2;
5923 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r64ValIn.u);
5924 TestData.fMxcsrIn = State.MXCSR;
5925 TestData.fMxcsrOut = fMxcsr2;
5926 TestData.i32ValOut = i32Out2;
5927 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5928 }
5929 if (!RT_IS_POWER_OF_TWO(fXcpt))
5930 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5931 if (fUnmasked & fXcpt)
5932 {
5933 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5934 uint32_t fMxcsr3; int32_t i32Out3;
5935 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r64ValIn.u);
5936 TestData.fMxcsrIn = State.MXCSR;
5937 TestData.fMxcsrOut = fMxcsr3;
5938 TestData.i32ValOut = i32Out3;
5939 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5940 }
5941 }
5942 }
5943 }
5944 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5945 }
5946
5947 return RTEXITCODE_SUCCESS;
5948}
5949#endif
5950
5951
5952static void SseBinaryI32R64Test(void)
5953{
5954 X86FXSTATE State;
5955 RT_ZERO(State);
5956 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5957 {
5958 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R64[iFn]))
5959 continue;
5960
5961 SSE_BINARY_I32_R64_TEST_T const * const paTests = g_aSseBinaryI32R64[iFn].paTests;
5962 uint32_t const cTests = g_aSseBinaryI32R64[iFn].cTests;
5963 PFNIEMAIMPLSSEF2I32U64 pfn = g_aSseBinaryI32R64[iFn].pfn;
5964 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
5965 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5966 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5967 {
5968 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5969 {
5970 uint32_t fMxcsr = 0;
5971 int32_t i32Dst = 0;
5972
5973 State.MXCSR = paTests[iTest].fMxcsrIn;
5974 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r64ValIn.u);
5975 if ( fMxcsr != paTests[iTest].fMxcsrOut
5976 || i32Dst != paTests[iTest].i32ValOut)
5977 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5978 "%s -> mxcsr=%#08x %RI32\n"
5979 "%s expected %#08x %RI32%s%s (%s)\n",
5980 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5981 FormatR64(&paTests[iTest].r64ValIn),
5982 iVar ? " " : "", fMxcsr, i32Dst,
5983 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
5984 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5985 i32Dst != paTests[iTest].i32ValOut
5986 ? " - val" : "",
5987 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5988 }
5989 }
5990 }
5991}
5992
5993
5994/*
5995 * SSE operations converting single double-precision floating point values to signed quad-word integers (cvttsd2si and friends).
5996 */
5997TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R64_T, SSE_BINARY_I64_R64_TEST_T, PFNIEMAIMPLSSEF2I64U64);
5998
5999static SSE_BINARY_I64_R64_T g_aSseBinaryI64R64[] =
6000{
6001 ENTRY_BIN(cvttsd2si_i64_r64),
6002 ENTRY_BIN(cvtsd2si_i64_r64),
6003};
6004
6005#ifdef TSTIEMAIMPL_WITH_GENERATOR
6006static RTEXITCODE SseBinaryI64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
6007{
6008 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6009
6010 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
6011 {
6012 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6013 /** @todo More specials. */
6014 };
6015
6016 X86FXSTATE State;
6017 RT_ZERO(State);
6018 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6019 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
6020 {
6021 PFNIEMAIMPLSSEF2I64U64 const pfn = g_aSseBinaryI64R64[iFn].pfnNative ? g_aSseBinaryI64R64[iFn].pfnNative : g_aSseBinaryI64R64[iFn].pfn;
6022
6023 IEMBINARYOUTPUT BinOut;
6024 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryI64R64[iFn].pszName), RTEXITCODE_FAILURE);
6025
6026 uint32_t cNormalInputPairs = 0;
6027 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6028 {
6029 SSE_BINARY_I64_R64_TEST_T TestData; RT_ZERO(TestData);
6030
6031 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
6032
6033 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
6034 cNormalInputPairs++;
6035 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6036 {
6037 iTest -= 1;
6038 continue;
6039 }
6040
6041 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6042 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6043 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6044 for (uint8_t iFz = 0; iFz < 2; iFz++)
6045 {
6046 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6047 | (iRounding << X86_MXCSR_RC_SHIFT)
6048 | (iDaz ? X86_MXCSR_DAZ : 0)
6049 | (iFz ? X86_MXCSR_FZ : 0)
6050 | X86_MXCSR_XCPT_MASK;
6051 uint32_t fMxcsrM; int64_t i64OutM;
6052 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r64ValIn.u);
6053 TestData.fMxcsrIn = State.MXCSR;
6054 TestData.fMxcsrOut = fMxcsrM;
6055 TestData.i64ValOut = i64OutM;
6056 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6057
6058 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6059 uint32_t fMxcsrU; int64_t i64OutU;
6060 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r64ValIn.u);
6061 TestData.fMxcsrIn = State.MXCSR;
6062 TestData.fMxcsrOut = fMxcsrU;
6063 TestData.i64ValOut = i64OutU;
6064 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6065
6066 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6067 if (fXcpt)
6068 {
6069 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6070 uint32_t fMxcsr1; int64_t i64Out1;
6071 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r64ValIn.u);
6072 TestData.fMxcsrIn = State.MXCSR;
6073 TestData.fMxcsrOut = fMxcsr1;
6074 TestData.i64ValOut = i64Out1;
6075 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6076
6077 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6078 {
6079 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6080 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6081 uint32_t fMxcsr2; int64_t i64Out2;
6082 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r64ValIn.u);
6083 TestData.fMxcsrIn = State.MXCSR;
6084 TestData.fMxcsrOut = fMxcsr2;
6085 TestData.i64ValOut = i64Out2;
6086 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6087 }
6088 if (!RT_IS_POWER_OF_TWO(fXcpt))
6089 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6090 if (fUnmasked & fXcpt)
6091 {
6092 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6093 uint32_t fMxcsr3; int64_t i64Out3;
6094 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r64ValIn.u);
6095 TestData.fMxcsrIn = State.MXCSR;
6096 TestData.fMxcsrOut = fMxcsr3;
6097 TestData.i64ValOut = i64Out3;
6098 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6099 }
6100 }
6101 }
6102 }
6103 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6104 }
6105
6106 return RTEXITCODE_SUCCESS;
6107}
6108#endif
6109
6110
6111static void SseBinaryI64R64Test(void)
6112{
6113 X86FXSTATE State;
6114 RT_ZERO(State);
6115 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
6116 {
6117 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R64[iFn]))
6118 continue;
6119
6120 SSE_BINARY_I64_R64_TEST_T const * const paTests = g_aSseBinaryI64R64[iFn].paTests;
6121 uint32_t const cTests = g_aSseBinaryI64R64[iFn].cTests;
6122 PFNIEMAIMPLSSEF2I64U64 pfn = g_aSseBinaryI64R64[iFn].pfn;
6123 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
6124 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6125 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6126 {
6127 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6128 {
6129 uint32_t fMxcsr = 0;
6130 int64_t i64Dst = 0;
6131
6132 State.MXCSR = paTests[iTest].fMxcsrIn;
6133 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r64ValIn.u);
6134 if ( fMxcsr != paTests[iTest].fMxcsrOut
6135 || i64Dst != paTests[iTest].i64ValOut)
6136 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6137 "%s -> mxcsr=%#08x %RI64\n"
6138 "%s expected %#08x %RI64%s%s (%s)\n",
6139 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6140 FormatR64(&paTests[iTest].r64ValIn),
6141 iVar ? " " : "", fMxcsr, i64Dst,
6142 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6143 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6144 i64Dst != paTests[iTest].i64ValOut
6145 ? " - val" : "",
6146 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6147 }
6148 }
6149 }
6150}
6151
6152
6153/*
6154 * SSE operations converting single single-precision floating point values to signed double-word integers (cvttss2si and friends).
6155 */
6156TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R32_T, SSE_BINARY_I32_R32_TEST_T, PFNIEMAIMPLSSEF2I32U32);
6157
6158static SSE_BINARY_I32_R32_T g_aSseBinaryI32R32[] =
6159{
6160 ENTRY_BIN(cvttss2si_i32_r32),
6161 ENTRY_BIN(cvtss2si_i32_r32),
6162};
6163
6164#ifdef TSTIEMAIMPL_WITH_GENERATOR
6165static RTEXITCODE SseBinaryI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
6166{
6167 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6168
6169 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6170 {
6171 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6172 /** @todo More specials. */
6173 };
6174
6175 X86FXSTATE State;
6176 RT_ZERO(State);
6177 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6178 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6179 {
6180 PFNIEMAIMPLSSEF2I32U32 const pfn = g_aSseBinaryI32R32[iFn].pfnNative ? g_aSseBinaryI32R32[iFn].pfnNative : g_aSseBinaryI32R32[iFn].pfn;
6181
6182 IEMBINARYOUTPUT BinOut;
6183 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryI32R32[iFn].pszName), RTEXITCODE_FAILURE);
6184
6185 uint32_t cNormalInputPairs = 0;
6186 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6187 {
6188 SSE_BINARY_I32_R32_TEST_T TestData; RT_ZERO(TestData);
6189
6190 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6191
6192 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6193 cNormalInputPairs++;
6194 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6195 {
6196 iTest -= 1;
6197 continue;
6198 }
6199
6200 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6201 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6202 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6203 for (uint8_t iFz = 0; iFz < 2; iFz++)
6204 {
6205 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6206 | (iRounding << X86_MXCSR_RC_SHIFT)
6207 | (iDaz ? X86_MXCSR_DAZ : 0)
6208 | (iFz ? X86_MXCSR_FZ : 0)
6209 | X86_MXCSR_XCPT_MASK;
6210 uint32_t fMxcsrM; int32_t i32OutM;
6211 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r32ValIn.u);
6212 TestData.fMxcsrIn = State.MXCSR;
6213 TestData.fMxcsrOut = fMxcsrM;
6214 TestData.i32ValOut = i32OutM;
6215 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6216
6217 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6218 uint32_t fMxcsrU; int32_t i32OutU;
6219 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r32ValIn.u);
6220 TestData.fMxcsrIn = State.MXCSR;
6221 TestData.fMxcsrOut = fMxcsrU;
6222 TestData.i32ValOut = i32OutU;
6223 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6224
6225 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6226 if (fXcpt)
6227 {
6228 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6229 uint32_t fMxcsr1; int32_t i32Out1;
6230 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r32ValIn.u);
6231 TestData.fMxcsrIn = State.MXCSR;
6232 TestData.fMxcsrOut = fMxcsr1;
6233 TestData.i32ValOut = i32Out1;
6234 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6235
6236 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6237 {
6238 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6239 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6240 uint32_t fMxcsr2; int32_t i32Out2;
6241 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r32ValIn.u);
6242 TestData.fMxcsrIn = State.MXCSR;
6243 TestData.fMxcsrOut = fMxcsr2;
6244 TestData.i32ValOut = i32Out2;
6245 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6246 }
6247 if (!RT_IS_POWER_OF_TWO(fXcpt))
6248 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6249 if (fUnmasked & fXcpt)
6250 {
6251 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6252 uint32_t fMxcsr3; int32_t i32Out3;
6253 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r32ValIn.u);
6254 TestData.fMxcsrIn = State.MXCSR;
6255 TestData.fMxcsrOut = fMxcsr3;
6256 TestData.i32ValOut = i32Out3;
6257 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6258 }
6259 }
6260 }
6261 }
6262 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6263 }
6264
6265 return RTEXITCODE_SUCCESS;
6266}
6267#endif
6268
6269
6270static void SseBinaryI32R32Test(void)
6271{
6272 X86FXSTATE State;
6273 RT_ZERO(State);
6274 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6275 {
6276 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R32[iFn]))
6277 continue;
6278
6279 SSE_BINARY_I32_R32_TEST_T const * const paTests = g_aSseBinaryI32R32[iFn].paTests;
6280 uint32_t const cTests = g_aSseBinaryI32R32[iFn].cTests;
6281 PFNIEMAIMPLSSEF2I32U32 pfn = g_aSseBinaryI32R32[iFn].pfn;
6282 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R32[iFn]);
6283 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6284 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6285 {
6286 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6287 {
6288 uint32_t fMxcsr = 0;
6289 int32_t i32Dst = 0;
6290
6291 State.MXCSR = paTests[iTest].fMxcsrIn;
6292 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r32ValIn.u);
6293 if ( fMxcsr != paTests[iTest].fMxcsrOut
6294 || i32Dst != paTests[iTest].i32ValOut)
6295 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6296 "%s -> mxcsr=%#08x %RI32\n"
6297 "%s expected %#08x %RI32%s%s (%s)\n",
6298 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6299 FormatR32(&paTests[iTest].r32ValIn),
6300 iVar ? " " : "", fMxcsr, i32Dst,
6301 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
6302 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6303 i32Dst != paTests[iTest].i32ValOut
6304 ? " - val" : "",
6305 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6306 }
6307 }
6308 }
6309}
6310
6311
6312/*
6313 * SSE operations converting single single-precision floating point values to signed quad-word integers (cvttss2si and friends).
6314 */
6315TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R32_T, SSE_BINARY_I64_R32_TEST_T, PFNIEMAIMPLSSEF2I64U32);
6316
6317static SSE_BINARY_I64_R32_T g_aSseBinaryI64R32[] =
6318{
6319 ENTRY_BIN(cvttss2si_i64_r32),
6320 ENTRY_BIN(cvtss2si_i64_r32),
6321};
6322
6323#ifdef TSTIEMAIMPL_WITH_GENERATOR
6324static RTEXITCODE SseBinaryI64R32Generate(const char *pszDataFileFmt, uint32_t cTests)
6325{
6326 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6327
6328 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6329 {
6330 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6331 /** @todo More specials. */
6332 };
6333
6334 X86FXSTATE State;
6335 RT_ZERO(State);
6336 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6337 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6338 {
6339 PFNIEMAIMPLSSEF2I64U32 const pfn = g_aSseBinaryI64R32[iFn].pfnNative ? g_aSseBinaryI64R32[iFn].pfnNative : g_aSseBinaryI64R32[iFn].pfn;
6340
6341 IEMBINARYOUTPUT BinOut;
6342 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryI64R32[iFn].pszName), RTEXITCODE_FAILURE);
6343
6344 uint32_t cNormalInputPairs = 0;
6345 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6346 {
6347 SSE_BINARY_I64_R32_TEST_T TestData; RT_ZERO(TestData);
6348
6349 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6350
6351 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6352 cNormalInputPairs++;
6353 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6354 {
6355 iTest -= 1;
6356 continue;
6357 }
6358
6359 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6360 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6361 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6362 for (uint8_t iFz = 0; iFz < 2; iFz++)
6363 {
6364 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6365 | (iRounding << X86_MXCSR_RC_SHIFT)
6366 | (iDaz ? X86_MXCSR_DAZ : 0)
6367 | (iFz ? X86_MXCSR_FZ : 0)
6368 | X86_MXCSR_XCPT_MASK;
6369 uint32_t fMxcsrM; int64_t i64OutM;
6370 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r32ValIn.u);
6371 TestData.fMxcsrIn = State.MXCSR;
6372 TestData.fMxcsrOut = fMxcsrM;
6373 TestData.i64ValOut = i64OutM;
6374 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6375
6376 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6377 uint32_t fMxcsrU; int64_t i64OutU;
6378 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r32ValIn.u);
6379 TestData.fMxcsrIn = State.MXCSR;
6380 TestData.fMxcsrOut = fMxcsrU;
6381 TestData.i64ValOut = i64OutU;
6382 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6383
6384 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6385 if (fXcpt)
6386 {
6387 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6388 uint32_t fMxcsr1; int64_t i64Out1;
6389 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r32ValIn.u);
6390 TestData.fMxcsrIn = State.MXCSR;
6391 TestData.fMxcsrOut = fMxcsr1;
6392 TestData.i64ValOut = i64Out1;
6393 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6394
6395 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6396 {
6397 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6398 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6399 uint32_t fMxcsr2; int64_t i64Out2;
6400 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r32ValIn.u);
6401 TestData.fMxcsrIn = State.MXCSR;
6402 TestData.fMxcsrOut = fMxcsr2;
6403 TestData.i64ValOut = i64Out2;
6404 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6405 }
6406 if (!RT_IS_POWER_OF_TWO(fXcpt))
6407 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6408 if (fUnmasked & fXcpt)
6409 {
6410 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6411 uint32_t fMxcsr3; int64_t i64Out3;
6412 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r32ValIn.u);
6413 TestData.fMxcsrIn = State.MXCSR;
6414 TestData.fMxcsrOut = fMxcsr3;
6415 TestData.i64ValOut = i64Out3;
6416 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6417 }
6418 }
6419 }
6420 }
6421 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6422 }
6423
6424 return RTEXITCODE_SUCCESS;
6425}
6426#endif
6427
6428
6429static void SseBinaryI64R32Test(void)
6430{
6431 X86FXSTATE State;
6432 RT_ZERO(State);
6433 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6434 {
6435 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R32[iFn]))
6436 continue;
6437
6438 SSE_BINARY_I64_R32_TEST_T const * const paTests = g_aSseBinaryI64R32[iFn].paTests;
6439 uint32_t const cTests = g_aSseBinaryI64R32[iFn].cTests;
6440 PFNIEMAIMPLSSEF2I64U32 pfn = g_aSseBinaryI64R32[iFn].pfn;
6441 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI64R32[iFn]);
6442 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6443 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6444 {
6445 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6446 {
6447 uint32_t fMxcsr = 0;
6448 int64_t i64Dst = 0;
6449
6450 State.MXCSR = paTests[iTest].fMxcsrIn;
6451 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r32ValIn.u);
6452 if ( fMxcsr != paTests[iTest].fMxcsrOut
6453 || i64Dst != paTests[iTest].i64ValOut)
6454 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6455 "%s -> mxcsr=%#08x %RI64\n"
6456 "%s expected %#08x %RI64%s%s (%s)\n",
6457 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6458 FormatR32(&paTests[iTest].r32ValIn),
6459 iVar ? " " : "", fMxcsr, i64Dst,
6460 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6461 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6462 i64Dst != paTests[iTest].i64ValOut
6463 ? " - val" : "",
6464 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6465 }
6466 }
6467 }
6468}
6469
6470
6471/*
6472 * SSE operations converting single signed double-word integers to double-precision floating point values (probably only cvtsi2sd).
6473 */
6474TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I32_T, SSE_BINARY_R64_I32_TEST_T, PFNIEMAIMPLSSEF2R64I32);
6475
6476static SSE_BINARY_R64_I32_T g_aSseBinaryR64I32[] =
6477{
6478 ENTRY_BIN(cvtsi2sd_r64_i32)
6479};
6480
6481#ifdef TSTIEMAIMPL_WITH_GENERATOR
6482static RTEXITCODE SseBinaryR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
6483{
6484 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6485
6486 static int32_t const s_aSpecials[] =
6487 {
6488 INT32_MIN,
6489 INT32_MAX,
6490 /** @todo More specials. */
6491 };
6492
6493 X86FXSTATE State;
6494 RT_ZERO(State);
6495 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6496 {
6497 PFNIEMAIMPLSSEF2R64I32 const pfn = g_aSseBinaryR64I32[iFn].pfnNative ? g_aSseBinaryR64I32[iFn].pfnNative : g_aSseBinaryR64I32[iFn].pfn;
6498
6499 IEMBINARYOUTPUT BinOut;
6500 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR64I32[iFn].pszName), RTEXITCODE_FAILURE);
6501
6502 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6503 {
6504 SSE_BINARY_R64_I32_TEST_T TestData; RT_ZERO(TestData);
6505
6506 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6507
6508 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6509 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6510 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6511 for (uint8_t iFz = 0; iFz < 2; iFz++)
6512 {
6513 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6514 | (iRounding << X86_MXCSR_RC_SHIFT)
6515 | (iDaz ? X86_MXCSR_DAZ : 0)
6516 | (iFz ? X86_MXCSR_FZ : 0)
6517 | X86_MXCSR_XCPT_MASK;
6518 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6519 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i32ValIn);
6520 TestData.fMxcsrIn = State.MXCSR;
6521 TestData.fMxcsrOut = fMxcsrM;
6522 TestData.r64ValOut = r64OutM;
6523 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6524
6525 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6526 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6527 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i32ValIn);
6528 TestData.fMxcsrIn = State.MXCSR;
6529 TestData.fMxcsrOut = fMxcsrU;
6530 TestData.r64ValOut = r64OutU;
6531 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6532
6533 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6534 if (fXcpt)
6535 {
6536 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6537 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6538 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i32ValIn);
6539 TestData.fMxcsrIn = State.MXCSR;
6540 TestData.fMxcsrOut = fMxcsr1;
6541 TestData.r64ValOut = r64Out1;
6542 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6543
6544 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6545 {
6546 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6547 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6548 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6549 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i32ValIn);
6550 TestData.fMxcsrIn = State.MXCSR;
6551 TestData.fMxcsrOut = fMxcsr2;
6552 TestData.r64ValOut = r64Out2;
6553 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6554 }
6555 if (!RT_IS_POWER_OF_TWO(fXcpt))
6556 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6557 if (fUnmasked & fXcpt)
6558 {
6559 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6560 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6561 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i32ValIn);
6562 TestData.fMxcsrIn = State.MXCSR;
6563 TestData.fMxcsrOut = fMxcsr3;
6564 TestData.r64ValOut = r64Out3;
6565 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6566 }
6567 }
6568 }
6569 }
6570 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6571 }
6572
6573 return RTEXITCODE_SUCCESS;
6574}
6575#endif
6576
6577
6578static void SseBinaryR64I32Test(void)
6579{
6580 X86FXSTATE State;
6581 RT_ZERO(State);
6582 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6583 {
6584 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I32[iFn]))
6585 continue;
6586
6587 SSE_BINARY_R64_I32_TEST_T const * const paTests = g_aSseBinaryR64I32[iFn].paTests;
6588 uint32_t const cTests = g_aSseBinaryR64I32[iFn].cTests;
6589 PFNIEMAIMPLSSEF2R64I32 pfn = g_aSseBinaryR64I32[iFn].pfn;
6590 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I32[iFn]);
6591 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6592 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6593 {
6594 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6595 {
6596 uint32_t fMxcsr = 0;
6597 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6598
6599 State.MXCSR = paTests[iTest].fMxcsrIn;
6600 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i32ValIn);
6601 if ( fMxcsr != paTests[iTest].fMxcsrOut
6602 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6603 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6604 "%s -> mxcsr=%#08x %s\n"
6605 "%s expected %#08x %s%s%s (%s)\n",
6606 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6607 &paTests[iTest].i32ValIn,
6608 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6609 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6610 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6611 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6612 ? " - val" : "",
6613 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6614 }
6615 }
6616 }
6617}
6618
6619
6620/*
6621 * SSE operations converting single signed quad-word integers to double-precision floating point values (probably only cvtsi2sd).
6622 */
6623TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I64_T, SSE_BINARY_R64_I64_TEST_T, PFNIEMAIMPLSSEF2R64I64);
6624
6625static SSE_BINARY_R64_I64_T g_aSseBinaryR64I64[] =
6626{
6627 ENTRY_BIN(cvtsi2sd_r64_i64),
6628};
6629
6630#ifdef TSTIEMAIMPL_WITH_GENERATOR
6631static RTEXITCODE SseBinaryR64I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6632{
6633 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6634
6635 static int64_t const s_aSpecials[] =
6636 {
6637 INT64_MIN,
6638 INT64_MAX
6639 /** @todo More specials. */
6640 };
6641
6642 X86FXSTATE State;
6643 RT_ZERO(State);
6644 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6645 {
6646 PFNIEMAIMPLSSEF2R64I64 const pfn = g_aSseBinaryR64I64[iFn].pfnNative ? g_aSseBinaryR64I64[iFn].pfnNative : g_aSseBinaryR64I64[iFn].pfn;
6647
6648 IEMBINARYOUTPUT BinOut;
6649 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR64I64[iFn].pszName), RTEXITCODE_FAILURE);
6650
6651 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6652 {
6653 SSE_BINARY_R64_I64_TEST_T TestData; RT_ZERO(TestData);
6654
6655 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6656
6657 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6658 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6659 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6660 for (uint8_t iFz = 0; iFz < 2; iFz++)
6661 {
6662 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6663 | (iRounding << X86_MXCSR_RC_SHIFT)
6664 | (iDaz ? X86_MXCSR_DAZ : 0)
6665 | (iFz ? X86_MXCSR_FZ : 0)
6666 | X86_MXCSR_XCPT_MASK;
6667 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6668 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i64ValIn);
6669 TestData.fMxcsrIn = State.MXCSR;
6670 TestData.fMxcsrOut = fMxcsrM;
6671 TestData.r64ValOut = r64OutM;
6672 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6673
6674 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6675 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6676 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i64ValIn);
6677 TestData.fMxcsrIn = State.MXCSR;
6678 TestData.fMxcsrOut = fMxcsrU;
6679 TestData.r64ValOut = r64OutU;
6680 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6681
6682 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6683 if (fXcpt)
6684 {
6685 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6686 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6687 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i64ValIn);
6688 TestData.fMxcsrIn = State.MXCSR;
6689 TestData.fMxcsrOut = fMxcsr1;
6690 TestData.r64ValOut = r64Out1;
6691 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6692
6693 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6694 {
6695 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6696 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6697 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6698 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i64ValIn);
6699 TestData.fMxcsrIn = State.MXCSR;
6700 TestData.fMxcsrOut = fMxcsr2;
6701 TestData.r64ValOut = r64Out2;
6702 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6703 }
6704 if (!RT_IS_POWER_OF_TWO(fXcpt))
6705 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6706 if (fUnmasked & fXcpt)
6707 {
6708 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6709 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6710 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i64ValIn);
6711 TestData.fMxcsrIn = State.MXCSR;
6712 TestData.fMxcsrOut = fMxcsr3;
6713 TestData.r64ValOut = r64Out3;
6714 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6715 }
6716 }
6717 }
6718 }
6719 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6720 }
6721
6722 return RTEXITCODE_SUCCESS;
6723}
6724#endif
6725
6726
6727static void SseBinaryR64I64Test(void)
6728{
6729 X86FXSTATE State;
6730 RT_ZERO(State);
6731 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6732 {
6733 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I64[iFn]))
6734 continue;
6735
6736 SSE_BINARY_R64_I64_TEST_T const * const paTests = g_aSseBinaryR64I64[iFn].paTests;
6737 uint32_t const cTests = g_aSseBinaryR64I64[iFn].cTests;
6738 PFNIEMAIMPLSSEF2R64I64 pfn = g_aSseBinaryR64I64[iFn].pfn;
6739 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I64[iFn]);
6740 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6741 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6742 {
6743 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6744 {
6745 uint32_t fMxcsr = 0;
6746 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6747
6748 State.MXCSR = paTests[iTest].fMxcsrIn;
6749 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i64ValIn);
6750 if ( fMxcsr != paTests[iTest].fMxcsrOut
6751 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6752 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6753 "%s -> mxcsr=%#08x %s\n"
6754 "%s expected %#08x %s%s%s (%s)\n",
6755 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6756 &paTests[iTest].i64ValIn,
6757 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6758 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6759 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6760 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6761 ? " - val" : "",
6762 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6763 }
6764 }
6765 }
6766}
6767
6768
6769/*
6770 * SSE operations converting single signed double-word integers to single-precision floating point values (probably only cvtsi2ss).
6771 */
6772TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I32_T, SSE_BINARY_R32_I32_TEST_T, PFNIEMAIMPLSSEF2R32I32);
6773
6774static SSE_BINARY_R32_I32_T g_aSseBinaryR32I32[] =
6775{
6776 ENTRY_BIN(cvtsi2ss_r32_i32),
6777};
6778
6779#ifdef TSTIEMAIMPL_WITH_GENERATOR
6780static RTEXITCODE SseBinaryR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
6781{
6782 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6783
6784 static int32_t const s_aSpecials[] =
6785 {
6786 INT32_MIN,
6787 INT32_MAX,
6788 /** @todo More specials. */
6789 };
6790
6791 X86FXSTATE State;
6792 RT_ZERO(State);
6793 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6794 {
6795 PFNIEMAIMPLSSEF2R32I32 const pfn = g_aSseBinaryR32I32[iFn].pfnNative ? g_aSseBinaryR32I32[iFn].pfnNative : g_aSseBinaryR32I32[iFn].pfn;
6796
6797 IEMBINARYOUTPUT BinOut;
6798 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR32I32[iFn].pszName), RTEXITCODE_FAILURE);
6799
6800 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6801 {
6802 SSE_BINARY_R32_I32_TEST_T TestData; RT_ZERO(TestData);
6803
6804 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6805
6806 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6807 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6808 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6809 for (uint8_t iFz = 0; iFz < 2; iFz++)
6810 {
6811 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6812 | (iRounding << X86_MXCSR_RC_SHIFT)
6813 | (iDaz ? X86_MXCSR_DAZ : 0)
6814 | (iFz ? X86_MXCSR_FZ : 0)
6815 | X86_MXCSR_XCPT_MASK;
6816 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6817 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i32ValIn);
6818 TestData.fMxcsrIn = State.MXCSR;
6819 TestData.fMxcsrOut = fMxcsrM;
6820 TestData.r32ValOut = r32OutM;
6821 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6822
6823 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6824 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6825 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i32ValIn);
6826 TestData.fMxcsrIn = State.MXCSR;
6827 TestData.fMxcsrOut = fMxcsrU;
6828 TestData.r32ValOut = r32OutU;
6829 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6830
6831 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6832 if (fXcpt)
6833 {
6834 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6835 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6836 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i32ValIn);
6837 TestData.fMxcsrIn = State.MXCSR;
6838 TestData.fMxcsrOut = fMxcsr1;
6839 TestData.r32ValOut = r32Out1;
6840 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6841
6842 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6843 {
6844 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6845 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6846 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6847 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i32ValIn);
6848 TestData.fMxcsrIn = State.MXCSR;
6849 TestData.fMxcsrOut = fMxcsr2;
6850 TestData.r32ValOut = r32Out2;
6851 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6852 }
6853 if (!RT_IS_POWER_OF_TWO(fXcpt))
6854 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6855 if (fUnmasked & fXcpt)
6856 {
6857 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6858 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
6859 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i32ValIn);
6860 TestData.fMxcsrIn = State.MXCSR;
6861 TestData.fMxcsrOut = fMxcsr3;
6862 TestData.r32ValOut = r32Out3;
6863 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6864 }
6865 }
6866 }
6867 }
6868 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6869 }
6870
6871 return RTEXITCODE_SUCCESS;
6872}
6873#endif
6874
6875
6876static void SseBinaryR32I32Test(void)
6877{
6878 X86FXSTATE State;
6879 RT_ZERO(State);
6880 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6881 {
6882 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I32[iFn]))
6883 continue;
6884
6885 SSE_BINARY_R32_I32_TEST_T const * const paTests = g_aSseBinaryR32I32[iFn].paTests;
6886 uint32_t const cTests = g_aSseBinaryR32I32[iFn].cTests;
6887 PFNIEMAIMPLSSEF2R32I32 pfn = g_aSseBinaryR32I32[iFn].pfn;
6888 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I32[iFn]);
6889 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6890 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6891 {
6892 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6893 {
6894 uint32_t fMxcsr = 0;
6895 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
6896
6897 State.MXCSR = paTests[iTest].fMxcsrIn;
6898 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i32ValIn);
6899 if ( fMxcsr != paTests[iTest].fMxcsrOut
6900 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
6901 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6902 "%s -> mxcsr=%#08x %RI32\n"
6903 "%s expected %#08x %RI32%s%s (%s)\n",
6904 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6905 &paTests[iTest].i32ValIn,
6906 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
6907 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
6908 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6909 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
6910 ? " - val" : "",
6911 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6912 }
6913 }
6914 }
6915}
6916
6917
6918/*
6919 * SSE operations converting single signed quad-word integers to single-precision floating point values (probably only cvtsi2ss).
6920 */
6921TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I64_T, SSE_BINARY_R32_I64_TEST_T, PFNIEMAIMPLSSEF2R32I64);
6922
6923static SSE_BINARY_R32_I64_T g_aSseBinaryR32I64[] =
6924{
6925 ENTRY_BIN(cvtsi2ss_r32_i64),
6926};
6927
6928#ifdef TSTIEMAIMPL_WITH_GENERATOR
6929static RTEXITCODE SseBinaryR32I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6930{
6931 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6932
6933 static int64_t const s_aSpecials[] =
6934 {
6935 INT64_MIN,
6936 INT64_MAX
6937 /** @todo More specials. */
6938 };
6939
6940 X86FXSTATE State;
6941 RT_ZERO(State);
6942 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
6943 {
6944 PFNIEMAIMPLSSEF2R32I64 const pfn = g_aSseBinaryR32I64[iFn].pfnNative ? g_aSseBinaryR32I64[iFn].pfnNative : g_aSseBinaryR32I64[iFn].pfn;
6945
6946 IEMBINARYOUTPUT BinOut;
6947 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseBinaryR32I64[iFn].pszName), RTEXITCODE_FAILURE);
6948
6949 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6950 {
6951 SSE_BINARY_R32_I64_TEST_T TestData; RT_ZERO(TestData);
6952
6953 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6954
6955 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6956 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6957 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6958 for (uint8_t iFz = 0; iFz < 2; iFz++)
6959 {
6960 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6961 | (iRounding << X86_MXCSR_RC_SHIFT)
6962 | (iDaz ? X86_MXCSR_DAZ : 0)
6963 | (iFz ? X86_MXCSR_FZ : 0)
6964 | X86_MXCSR_XCPT_MASK;
6965 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6966 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i64ValIn);
6967 TestData.fMxcsrIn = State.MXCSR;
6968 TestData.fMxcsrOut = fMxcsrM;
6969 TestData.r32ValOut = r32OutM;
6970 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6971
6972 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6973 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6974 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i64ValIn);
6975 TestData.fMxcsrIn = State.MXCSR;
6976 TestData.fMxcsrOut = fMxcsrU;
6977 TestData.r32ValOut = r32OutU;
6978 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6979
6980 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6981 if (fXcpt)
6982 {
6983 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6984 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6985 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i64ValIn);
6986 TestData.fMxcsrIn = State.MXCSR;
6987 TestData.fMxcsrOut = fMxcsr1;
6988 TestData.r32ValOut = r32Out1;
6989 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6990
6991 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6992 {
6993 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6994 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6995 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6996 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i64ValIn);
6997 TestData.fMxcsrIn = State.MXCSR;
6998 TestData.fMxcsrOut = fMxcsr2;
6999 TestData.r32ValOut = r32Out2;
7000 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7001 }
7002 if (!RT_IS_POWER_OF_TWO(fXcpt))
7003 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7004 if (fUnmasked & fXcpt)
7005 {
7006 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7007 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
7008 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i64ValIn);
7009 TestData.fMxcsrIn = State.MXCSR;
7010 TestData.fMxcsrOut = fMxcsr3;
7011 TestData.r32ValOut = r32Out3;
7012 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7013 }
7014 }
7015 }
7016 }
7017 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7018 }
7019
7020 return RTEXITCODE_SUCCESS;
7021}
7022#endif
7023
7024
7025static void SseBinaryR32I64Test(void)
7026{
7027 X86FXSTATE State;
7028 RT_ZERO(State);
7029 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
7030 {
7031 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I64[iFn]))
7032 continue;
7033
7034 SSE_BINARY_R32_I64_TEST_T const * const paTests = g_aSseBinaryR32I64[iFn].paTests;
7035 uint32_t const cTests = g_aSseBinaryR32I64[iFn].cTests;
7036 PFNIEMAIMPLSSEF2R32I64 pfn = g_aSseBinaryR32I64[iFn].pfn;
7037 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I64[iFn]);
7038 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7039 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7040 {
7041 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7042 {
7043 uint32_t fMxcsr = 0;
7044 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
7045
7046 State.MXCSR = paTests[iTest].fMxcsrIn;
7047 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i64ValIn);
7048 if ( fMxcsr != paTests[iTest].fMxcsrOut
7049 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
7050 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
7051 "%s -> mxcsr=%#08x %RI32\n"
7052 "%s expected %#08x %RI32%s%s (%s)\n",
7053 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7054 &paTests[iTest].i64ValIn,
7055 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
7056 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
7057 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7058 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
7059 ? " - val" : "",
7060 FormatMxcsr(paTests[iTest].fMxcsrIn) );
7061 }
7062 }
7063 }
7064}
7065
7066
7067/*
7068 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
7069 */
7070TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R32_R32_T, SSE_COMPARE_EFL_R32_R32_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
7071
7072static SSE_COMPARE_EFL_R32_R32_T g_aSseCompareEflR32R32[] =
7073{
7074 ENTRY_BIN(ucomiss_u128),
7075 ENTRY_BIN(comiss_u128),
7076 ENTRY_BIN_AVX(vucomiss_u128),
7077 ENTRY_BIN_AVX(vcomiss_u128),
7078};
7079
7080#ifdef TSTIEMAIMPL_WITH_GENERATOR
7081static RTEXITCODE SseCompareEflR32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
7082{
7083 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7084
7085 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7086 {
7087 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7088 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7089 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7090 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7091 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7092 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7093 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7094 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7095 /** @todo More specials. */
7096 };
7097
7098 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7099 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7100 {
7101 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR32R32[iFn].pfnNative ? g_aSseCompareEflR32R32[iFn].pfnNative : g_aSseCompareEflR32R32[iFn].pfn;
7102
7103 IEMBINARYOUTPUT BinOut;
7104 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseCompareEflR32R32[iFn].pszName), RTEXITCODE_FAILURE);
7105
7106 uint32_t cNormalInputPairs = 0;
7107 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7108 {
7109 SSE_COMPARE_EFL_R32_R32_TEST_T TestData; RT_ZERO(TestData);
7110 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7111 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7112
7113 TestData.r32ValIn1 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7114 TestData.r32ValIn2 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7115
7116 ValIn1.ar32[0] = TestData.r32ValIn1;
7117 ValIn2.ar32[0] = TestData.r32ValIn2;
7118
7119 if ( RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn1)
7120 && RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn2))
7121 cNormalInputPairs++;
7122 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7123 {
7124 iTest -= 1;
7125 continue;
7126 }
7127
7128 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7129 uint32_t const fEFlags = RandEFlags();
7130 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7131 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7132 for (uint8_t iFz = 0; iFz < 2; iFz++)
7133 {
7134 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7135 | (iRounding << X86_MXCSR_RC_SHIFT)
7136 | (iDaz ? X86_MXCSR_DAZ : 0)
7137 | (iFz ? X86_MXCSR_FZ : 0)
7138 | X86_MXCSR_XCPT_MASK;
7139 uint32_t fMxcsrM = fMxcsrIn;
7140 uint32_t fEFlagsM = fEFlags;
7141 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
7142 TestData.fMxcsrIn = fMxcsrIn;
7143 TestData.fMxcsrOut = fMxcsrM;
7144 TestData.fEflIn = fEFlags;
7145 TestData.fEflOut = fEFlagsM;
7146 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7147
7148 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7149 uint32_t fMxcsrU = fMxcsrIn;
7150 uint32_t fEFlagsU = fEFlags;
7151 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
7152 TestData.fMxcsrIn = fMxcsrIn;
7153 TestData.fMxcsrOut = fMxcsrU;
7154 TestData.fEflIn = fEFlags;
7155 TestData.fEflOut = fEFlagsU;
7156 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7157
7158 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7159 if (fXcpt)
7160 {
7161 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7162 uint32_t fMxcsr1 = fMxcsrIn;
7163 uint32_t fEFlags1 = fEFlags;
7164 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
7165 TestData.fMxcsrIn = fMxcsrIn;
7166 TestData.fMxcsrOut = fMxcsr1;
7167 TestData.fEflIn = fEFlags;
7168 TestData.fEflOut = fEFlags1;
7169 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7170
7171 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7172 {
7173 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7174 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7175 uint32_t fMxcsr2 = fMxcsrIn;
7176 uint32_t fEFlags2 = fEFlags;
7177 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
7178 TestData.fMxcsrIn = fMxcsrIn;
7179 TestData.fMxcsrOut = fMxcsr2;
7180 TestData.fEflIn = fEFlags;
7181 TestData.fEflOut = fEFlags2;
7182 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7183 }
7184 if (!RT_IS_POWER_OF_TWO(fXcpt))
7185 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7186 if (fUnmasked & fXcpt)
7187 {
7188 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7189 uint32_t fMxcsr3 = fMxcsrIn;
7190 uint32_t fEFlags3 = fEFlags;
7191 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
7192 TestData.fMxcsrIn = fMxcsrIn;
7193 TestData.fMxcsrOut = fMxcsr3;
7194 TestData.fEflIn = fEFlags;
7195 TestData.fEflOut = fEFlags3;
7196 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7197 }
7198 }
7199 }
7200 }
7201 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7202 }
7203
7204 return RTEXITCODE_SUCCESS;
7205}
7206#endif
7207
7208static void SseCompareEflR32R32Test(void)
7209{
7210 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7211 {
7212 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR32R32[iFn]))
7213 continue;
7214
7215 SSE_COMPARE_EFL_R32_R32_TEST_T const * const paTests = g_aSseCompareEflR32R32[iFn].paTests;
7216 uint32_t const cTests = g_aSseCompareEflR32R32[iFn].cTests;
7217 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR32R32[iFn].pfn;
7218 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR32R32[iFn]);
7219 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7220 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7221 {
7222 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7223 {
7224 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7225 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7226
7227 ValIn1.ar32[0] = paTests[iTest].r32ValIn1;
7228 ValIn2.ar32[0] = paTests[iTest].r32ValIn2;
7229 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7230 uint32_t fEFlags = paTests[iTest].fEflIn;
7231 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
7232 if ( fMxcsr != paTests[iTest].fMxcsrOut
7233 || fEFlags != paTests[iTest].fEflOut)
7234 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7235 "%s -> mxcsr=%#08x %#08x\n"
7236 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7237 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7238 FormatR32(&paTests[iTest].r32ValIn1), FormatR32(&paTests[iTest].r32ValIn2),
7239 iVar ? " " : "", fMxcsr, fEFlags,
7240 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7241 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7242 FormatMxcsr(paTests[iTest].fMxcsrIn),
7243 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7244 }
7245 }
7246 }
7247}
7248
7249
7250/*
7251 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
7252 */
7253TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R64_R64_T, SSE_COMPARE_EFL_R64_R64_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
7254
7255static SSE_COMPARE_EFL_R64_R64_T g_aSseCompareEflR64R64[] =
7256{
7257 ENTRY_BIN(ucomisd_u128),
7258 ENTRY_BIN(comisd_u128),
7259 ENTRY_BIN_AVX(vucomisd_u128),
7260 ENTRY_BIN_AVX(vcomisd_u128)
7261};
7262
7263#ifdef TSTIEMAIMPL_WITH_GENERATOR
7264static RTEXITCODE SseCompareEflR64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
7265{
7266 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7267
7268 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7269 {
7270 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7271 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7272 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7273 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7274 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7275 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7276 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7277 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7278 /** @todo More specials. */
7279 };
7280
7281 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7282 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7283 {
7284 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR64R64[iFn].pfnNative ? g_aSseCompareEflR64R64[iFn].pfnNative : g_aSseCompareEflR64R64[iFn].pfn;
7285
7286 IEMBINARYOUTPUT BinOut;
7287 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseCompareEflR64R64[iFn].pszName), RTEXITCODE_FAILURE);
7288
7289 uint32_t cNormalInputPairs = 0;
7290 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7291 {
7292 SSE_COMPARE_EFL_R64_R64_TEST_T TestData; RT_ZERO(TestData);
7293 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7294 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7295
7296 TestData.r64ValIn1 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7297 TestData.r64ValIn2 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7298
7299 ValIn1.ar64[0] = TestData.r64ValIn1;
7300 ValIn2.ar64[0] = TestData.r64ValIn2;
7301
7302 if ( RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn1)
7303 && RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn2))
7304 cNormalInputPairs++;
7305 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7306 {
7307 iTest -= 1;
7308 continue;
7309 }
7310
7311 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7312 uint32_t const fEFlags = RandEFlags();
7313 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7314 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7315 for (uint8_t iFz = 0; iFz < 2; iFz++)
7316 {
7317 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7318 | (iRounding << X86_MXCSR_RC_SHIFT)
7319 | (iDaz ? X86_MXCSR_DAZ : 0)
7320 | (iFz ? X86_MXCSR_FZ : 0)
7321 | X86_MXCSR_XCPT_MASK;
7322 uint32_t fMxcsrM = fMxcsrIn;
7323 uint32_t fEFlagsM = fEFlags;
7324 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
7325 TestData.fMxcsrIn = fMxcsrIn;
7326 TestData.fMxcsrOut = fMxcsrM;
7327 TestData.fEflIn = fEFlags;
7328 TestData.fEflOut = fEFlagsM;
7329 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7330
7331 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7332 uint32_t fMxcsrU = fMxcsrIn;
7333 uint32_t fEFlagsU = fEFlags;
7334 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
7335 TestData.fMxcsrIn = fMxcsrIn;
7336 TestData.fMxcsrOut = fMxcsrU;
7337 TestData.fEflIn = fEFlags;
7338 TestData.fEflOut = fEFlagsU;
7339 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7340
7341 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7342 if (fXcpt)
7343 {
7344 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7345 uint32_t fMxcsr1 = fMxcsrIn;
7346 uint32_t fEFlags1 = fEFlags;
7347 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
7348 TestData.fMxcsrIn = fMxcsrIn;
7349 TestData.fMxcsrOut = fMxcsr1;
7350 TestData.fEflIn = fEFlags;
7351 TestData.fEflOut = fEFlags1;
7352 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7353
7354 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7355 {
7356 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7357 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7358 uint32_t fMxcsr2 = fMxcsrIn;
7359 uint32_t fEFlags2 = fEFlags;
7360 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
7361 TestData.fMxcsrIn = fMxcsrIn;
7362 TestData.fMxcsrOut = fMxcsr2;
7363 TestData.fEflIn = fEFlags;
7364 TestData.fEflOut = fEFlags2;
7365 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7366 }
7367 if (!RT_IS_POWER_OF_TWO(fXcpt))
7368 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7369 if (fUnmasked & fXcpt)
7370 {
7371 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7372 uint32_t fMxcsr3 = fMxcsrIn;
7373 uint32_t fEFlags3 = fEFlags;
7374 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
7375 TestData.fMxcsrIn = fMxcsrIn;
7376 TestData.fMxcsrOut = fMxcsr3;
7377 TestData.fEflIn = fEFlags;
7378 TestData.fEflOut = fEFlags3;
7379 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7380 }
7381 }
7382 }
7383 }
7384 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7385 }
7386
7387 return RTEXITCODE_SUCCESS;
7388}
7389#endif
7390
7391static void SseCompareEflR64R64Test(void)
7392{
7393 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7394 {
7395 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR64R64[iFn]))
7396 continue;
7397
7398 SSE_COMPARE_EFL_R64_R64_TEST_T const * const paTests = g_aSseCompareEflR64R64[iFn].paTests;
7399 uint32_t const cTests = g_aSseCompareEflR64R64[iFn].cTests;
7400 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR64R64[iFn].pfn;
7401 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR64R64[iFn]);
7402 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7403 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7404 {
7405 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7406 {
7407 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7408 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7409
7410 ValIn1.ar64[0] = paTests[iTest].r64ValIn1;
7411 ValIn2.ar64[0] = paTests[iTest].r64ValIn2;
7412 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7413 uint32_t fEFlags = paTests[iTest].fEflIn;
7414 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
7415 if ( fMxcsr != paTests[iTest].fMxcsrOut
7416 || fEFlags != paTests[iTest].fEflOut)
7417 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7418 "%s -> mxcsr=%#08x %#08x\n"
7419 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7420 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7421 FormatR64(&paTests[iTest].r64ValIn1), FormatR64(&paTests[iTest].r64ValIn2),
7422 iVar ? " " : "", fMxcsr, fEFlags,
7423 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7424 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7425 FormatMxcsr(paTests[iTest].fMxcsrIn),
7426 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7427 }
7428 }
7429 }
7430}
7431
7432
7433/*
7434 * Compare SSE operations on packed and single single-precision floating point values - outputting a mask.
7435 */
7436/** Maximum immediate to try to keep the testdata size under control (at least a little bit)- */
7437#define SSE_COMPARE_F2_XMM_IMM8_MAX 0x1f
7438
7439TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_F2_XMM_IMM8_T, SSE_COMPARE_F2_XMM_IMM8_TEST_T, PFNIEMAIMPLMXCSRF2XMMIMM8);
7440
7441static SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR32Imm8[] =
7442{
7443 ENTRY_BIN(cmpps_u128),
7444 ENTRY_BIN(cmpss_u128)
7445};
7446
7447#ifdef TSTIEMAIMPL_WITH_GENERATOR
7448static RTEXITCODE SseCompareF2XmmR32Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
7449{
7450 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7451
7452 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7453 {
7454 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7455 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7456 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7457 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7458 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7459 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7460 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7461 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7462 /** @todo More specials. */
7463 };
7464
7465 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7466 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7467 {
7468 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR32Imm8[iFn].pfnNative : g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7469
7470 IEMBINARYOUTPUT BinOut;
7471 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseCompareF2XmmR32Imm8[iFn].pszName), RTEXITCODE_FAILURE);
7472
7473 uint32_t cNormalInputPairs = 0;
7474 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7475 {
7476 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7477
7478 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7479 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7480 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7481 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7482
7483 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7484 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7485 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7486 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7487
7488 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
7489 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
7490 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
7491 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
7492 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
7493 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
7494 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
7495 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
7496 cNormalInputPairs++;
7497 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7498 {
7499 iTest -= 1;
7500 continue;
7501 }
7502
7503 IEMMEDIAF2XMMSRC Src;
7504 Src.uSrc1 = TestData.InVal1;
7505 Src.uSrc2 = TestData.InVal2;
7506 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7507 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7508 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7509 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7510 for (uint8_t iFz = 0; iFz < 2; iFz++)
7511 {
7512 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7513 | (iRounding << X86_MXCSR_RC_SHIFT)
7514 | (iDaz ? X86_MXCSR_DAZ : 0)
7515 | (iFz ? X86_MXCSR_FZ : 0)
7516 | X86_MXCSR_XCPT_MASK;
7517 uint32_t fMxcsrM = fMxcsrIn;
7518 X86XMMREG ResM;
7519 pfn(&fMxcsrM, &ResM, &Src, bImm);
7520 TestData.fMxcsrIn = fMxcsrIn;
7521 TestData.fMxcsrOut = fMxcsrM;
7522 TestData.bImm = bImm;
7523 TestData.OutVal = ResM;
7524 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7525
7526 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7527 uint32_t fMxcsrU = fMxcsrIn;
7528 X86XMMREG ResU;
7529 pfn(&fMxcsrU, &ResU, &Src, bImm);
7530 TestData.fMxcsrIn = fMxcsrIn;
7531 TestData.fMxcsrOut = fMxcsrU;
7532 TestData.bImm = bImm;
7533 TestData.OutVal = ResU;
7534 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7535
7536 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7537 if (fXcpt)
7538 {
7539 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7540 uint32_t fMxcsr1 = fMxcsrIn;
7541 X86XMMREG Res1;
7542 pfn(&fMxcsr1, &Res1, &Src, bImm);
7543 TestData.fMxcsrIn = fMxcsrIn;
7544 TestData.fMxcsrOut = fMxcsr1;
7545 TestData.bImm = bImm;
7546 TestData.OutVal = Res1;
7547 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7548
7549 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7550 {
7551 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7552 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7553 uint32_t fMxcsr2 = fMxcsrIn;
7554 X86XMMREG Res2;
7555 pfn(&fMxcsr2, &Res2, &Src, bImm);
7556 TestData.fMxcsrIn = fMxcsrIn;
7557 TestData.fMxcsrOut = fMxcsr2;
7558 TestData.bImm = bImm;
7559 TestData.OutVal = Res2;
7560 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7561 }
7562 if (!RT_IS_POWER_OF_TWO(fXcpt))
7563 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7564 if (fUnmasked & fXcpt)
7565 {
7566 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7567 uint32_t fMxcsr3 = fMxcsrIn;
7568 X86XMMREG Res3;
7569 pfn(&fMxcsr3, &Res3, &Src, bImm);
7570 TestData.fMxcsrIn = fMxcsrIn;
7571 TestData.fMxcsrOut = fMxcsr3;
7572 TestData.bImm = bImm;
7573 TestData.OutVal = Res3;
7574 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7575 }
7576 }
7577 }
7578 }
7579 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7580 }
7581
7582 return RTEXITCODE_SUCCESS;
7583}
7584#endif
7585
7586static void SseCompareF2XmmR32Imm8Test(void)
7587{
7588 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7589 {
7590 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF2XmmR32Imm8[iFn]))
7591 continue;
7592
7593 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR32Imm8[iFn].paTests;
7594 uint32_t const cTests = g_aSseCompareF2XmmR32Imm8[iFn].cTests;
7595 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7596 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR32Imm8[iFn]);
7597 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7598 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7599 {
7600 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7601 {
7602 IEMMEDIAF2XMMSRC Src;
7603 X86XMMREG ValOut;
7604
7605 Src.uSrc1 = paTests[iTest].InVal1;
7606 Src.uSrc2 = paTests[iTest].InVal2;
7607 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7608 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7609 if ( fMxcsr != paTests[iTest].fMxcsrOut
7610 || ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7611 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7612 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7613 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7614 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s imm8=%x\n"
7615 "%s -> mxcsr=%#08x %RX32'%RX32'%RX32'%RX32\n"
7616 "%s expected %#08x %RX32'%RX32'%RX32'%RX32%s%s (%s)\n",
7617 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7618 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
7619 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
7620 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
7621 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
7622 paTests[iTest].bImm,
7623 iVar ? " " : "", fMxcsr, ValOut.au32[0], ValOut.au32[1], ValOut.au32[2], ValOut.au32[3],
7624 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7625 paTests[iTest].OutVal.au32[0], paTests[iTest].OutVal.au32[1],
7626 paTests[iTest].OutVal.au32[2], paTests[iTest].OutVal.au32[3],
7627 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7628 ( ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7629 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7630 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7631 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7632 ? " - val" : "",
7633 FormatMxcsr(paTests[iTest].fMxcsrIn));
7634 }
7635 }
7636 }
7637}
7638
7639
7640/*
7641 * Compare SSE operations on packed and single double-precision floating point values - outputting a mask.
7642 */
7643static SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR64Imm8[] =
7644{
7645 ENTRY_BIN(cmppd_u128),
7646 ENTRY_BIN(cmpsd_u128)
7647};
7648
7649#ifdef TSTIEMAIMPL_WITH_GENERATOR
7650static RTEXITCODE SseCompareF2XmmR64Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
7651{
7652 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7653
7654 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7655 {
7656 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7657 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7658 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7659 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7660 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7661 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7662 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7663 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7664 /** @todo More specials. */
7665 };
7666
7667 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7668 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7669 {
7670 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR64Imm8[iFn].pfnNative : g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7671
7672 IEMBINARYOUTPUT BinOut;
7673 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseCompareF2XmmR64Imm8[iFn].pszName), RTEXITCODE_FAILURE);
7674
7675 uint32_t cNormalInputPairs = 0;
7676 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7677 {
7678 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7679
7680 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7681 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7682
7683 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7684 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7685
7686 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0])
7687 && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
7688 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0])
7689 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
7690 cNormalInputPairs++;
7691 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7692 {
7693 iTest -= 1;
7694 continue;
7695 }
7696
7697 IEMMEDIAF2XMMSRC Src;
7698 Src.uSrc1 = TestData.InVal1;
7699 Src.uSrc2 = TestData.InVal2;
7700 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7701 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7702 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7703 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7704 for (uint8_t iFz = 0; iFz < 2; iFz++)
7705 {
7706 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7707 | (iRounding << X86_MXCSR_RC_SHIFT)
7708 | (iDaz ? X86_MXCSR_DAZ : 0)
7709 | (iFz ? X86_MXCSR_FZ : 0)
7710 | X86_MXCSR_XCPT_MASK;
7711 uint32_t fMxcsrM = fMxcsrIn;
7712 X86XMMREG ResM;
7713 pfn(&fMxcsrM, &ResM, &Src, bImm);
7714 TestData.fMxcsrIn = fMxcsrIn;
7715 TestData.fMxcsrOut = fMxcsrM;
7716 TestData.bImm = bImm;
7717 TestData.OutVal = ResM;
7718 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7719
7720 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7721 uint32_t fMxcsrU = fMxcsrIn;
7722 X86XMMREG ResU;
7723 pfn(&fMxcsrU, &ResU, &Src, bImm);
7724 TestData.fMxcsrIn = fMxcsrIn;
7725 TestData.fMxcsrOut = fMxcsrU;
7726 TestData.bImm = bImm;
7727 TestData.OutVal = ResU;
7728 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7729
7730 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7731 if (fXcpt)
7732 {
7733 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7734 uint32_t fMxcsr1 = fMxcsrIn;
7735 X86XMMREG Res1;
7736 pfn(&fMxcsr1, &Res1, &Src, bImm);
7737 TestData.fMxcsrIn = fMxcsrIn;
7738 TestData.fMxcsrOut = fMxcsr1;
7739 TestData.bImm = bImm;
7740 TestData.OutVal = Res1;
7741 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7742
7743 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7744 {
7745 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7746 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7747 uint32_t fMxcsr2 = fMxcsrIn;
7748 X86XMMREG Res2;
7749 pfn(&fMxcsr2, &Res2, &Src, bImm);
7750 TestData.fMxcsrIn = fMxcsrIn;
7751 TestData.fMxcsrOut = fMxcsr2;
7752 TestData.bImm = bImm;
7753 TestData.OutVal = Res2;
7754 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7755 }
7756 if (!RT_IS_POWER_OF_TWO(fXcpt))
7757 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7758 if (fUnmasked & fXcpt)
7759 {
7760 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7761 uint32_t fMxcsr3 = fMxcsrIn;
7762 X86XMMREG Res3;
7763 pfn(&fMxcsr3, &Res3, &Src, bImm);
7764 TestData.fMxcsrIn = fMxcsrIn;
7765 TestData.fMxcsrOut = fMxcsr3;
7766 TestData.bImm = bImm;
7767 TestData.OutVal = Res3;
7768 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7769 }
7770 }
7771 }
7772 }
7773 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7774 }
7775
7776 return RTEXITCODE_SUCCESS;
7777}
7778#endif
7779
7780static void SseCompareF2XmmR64Imm8Test(void)
7781{
7782 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7783 {
7784 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF2XmmR64Imm8[iFn]))
7785 continue;
7786
7787 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR64Imm8[iFn].paTests;
7788 uint32_t const cTests = g_aSseCompareF2XmmR64Imm8[iFn].cTests;
7789 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7790 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR64Imm8[iFn]);
7791 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7792 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7793 {
7794 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7795 {
7796 IEMMEDIAF2XMMSRC Src;
7797 X86XMMREG ValOut;
7798
7799 Src.uSrc1 = paTests[iTest].InVal1;
7800 Src.uSrc2 = paTests[iTest].InVal2;
7801 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7802 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7803 if ( fMxcsr != paTests[iTest].fMxcsrOut
7804 || ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7805 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7806 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s imm8=%x\n"
7807 "%s -> mxcsr=%#08x %RX64'%RX64\n"
7808 "%s expected %#08x %RX64'%RX64%s%s (%s)\n",
7809 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7810 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
7811 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
7812 paTests[iTest].bImm,
7813 iVar ? " " : "", fMxcsr, ValOut.au64[0], ValOut.au64[1],
7814 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7815 paTests[iTest].OutVal.au64[0], paTests[iTest].OutVal.au64[1],
7816 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7817 ( ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7818 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7819 ? " - val" : "",
7820 FormatMxcsr(paTests[iTest].fMxcsrIn));
7821 }
7822 }
7823 }
7824}
7825
7826
7827/*
7828 * Convert SSE operations converting signed double-words to single-precision floating point values.
7829 */
7830TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_T, SSE_CONVERT_XMM_TEST_T, PFNIEMAIMPLFPSSEF2U128);
7831
7832static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R32[] =
7833{
7834 ENTRY_BIN(cvtdq2ps_u128)
7835};
7836
7837#ifdef TSTIEMAIMPL_WITH_GENERATOR
7838static RTEXITCODE SseConvertXmmI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
7839{
7840 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7841
7842 static int32_t const s_aSpecials[] =
7843 {
7844 INT32_MIN,
7845 INT32_MIN / 2,
7846 0,
7847 INT32_MAX / 2,
7848 INT32_MAX,
7849 (int32_t)0x80000000
7850 /** @todo More specials. */
7851 };
7852
7853 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7854 {
7855 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R32[iFn].pfnNative ? g_aSseConvertXmmI32R32[iFn].pfnNative : g_aSseConvertXmmI32R32[iFn].pfn;
7856
7857 IEMBINARYOUTPUT BinOut;
7858 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmI32R32[iFn].pszName), RTEXITCODE_FAILURE);
7859
7860 X86FXSTATE State;
7861 RT_ZERO(State);
7862 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7863 {
7864 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7865
7866 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7867 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7868 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7869 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7870
7871 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7872 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7873 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7874 for (uint8_t iFz = 0; iFz < 2; iFz++)
7875 {
7876 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7877 | (iRounding << X86_MXCSR_RC_SHIFT)
7878 | (iDaz ? X86_MXCSR_DAZ : 0)
7879 | (iFz ? X86_MXCSR_FZ : 0)
7880 | X86_MXCSR_XCPT_MASK;
7881 IEMSSERESULT ResM; RT_ZERO(ResM);
7882 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7883 TestData.fMxcsrIn = State.MXCSR;
7884 TestData.fMxcsrOut = ResM.MXCSR;
7885 TestData.OutVal = ResM.uResult;
7886 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7887
7888 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7889 IEMSSERESULT ResU; RT_ZERO(ResU);
7890 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7891 TestData.fMxcsrIn = State.MXCSR;
7892 TestData.fMxcsrOut = ResU.MXCSR;
7893 TestData.OutVal = ResU.uResult;
7894 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7895
7896 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7897 if (fXcpt)
7898 {
7899 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7900 IEMSSERESULT Res1; RT_ZERO(Res1);
7901 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7902 TestData.fMxcsrIn = State.MXCSR;
7903 TestData.fMxcsrOut = Res1.MXCSR;
7904 TestData.OutVal = Res1.uResult;
7905 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7906
7907 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7908 {
7909 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7910 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7911 IEMSSERESULT Res2; RT_ZERO(Res2);
7912 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7913 TestData.fMxcsrIn = State.MXCSR;
7914 TestData.fMxcsrOut = Res2.MXCSR;
7915 TestData.OutVal = Res2.uResult;
7916 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7917 }
7918 if (!RT_IS_POWER_OF_TWO(fXcpt))
7919 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7920 if (fUnmasked & fXcpt)
7921 {
7922 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7923 IEMSSERESULT Res3; RT_ZERO(Res3);
7924 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7925 TestData.fMxcsrIn = State.MXCSR;
7926 TestData.fMxcsrOut = Res3.MXCSR;
7927 TestData.OutVal = Res3.uResult;
7928 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7929 }
7930 }
7931 }
7932 }
7933 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7934 }
7935
7936 return RTEXITCODE_SUCCESS;
7937}
7938#endif
7939
7940static void SseConvertXmmI32R32Test(void)
7941{
7942 X86FXSTATE State;
7943 RT_ZERO(State);
7944
7945 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7946 {
7947 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R32[iFn]))
7948 continue;
7949
7950 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R32[iFn].paTests;
7951 uint32_t const cTests = g_aSseConvertXmmI32R32[iFn].cTests;
7952 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R32[iFn].pfn;
7953 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R32[iFn]);
7954 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7955 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7956 {
7957 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7958 {
7959 IEMSSERESULT Res; RT_ZERO(Res);
7960
7961 State.MXCSR = paTests[iTest].fMxcsrIn;
7962 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7963 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7964 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7965 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7966 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7967 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7968 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
7969 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
7970 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
7971 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7972 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
7973 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
7974 iVar ? " " : "", Res.MXCSR,
7975 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
7976 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
7977 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7978 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
7979 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
7980 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7981 ( !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7982 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7983 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7984 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7985 ? " - val" : "",
7986 FormatMxcsr(paTests[iTest].fMxcsrIn));
7987 }
7988 }
7989 }
7990}
7991
7992
7993/*
7994 * Convert SSE operations converting signed double-words to single-precision floating point values.
7995 */
7996static SSE_CONVERT_XMM_T g_aSseConvertXmmR32I32[] =
7997{
7998 ENTRY_BIN(cvtps2dq_u128),
7999 ENTRY_BIN(cvttps2dq_u128)
8000};
8001
8002#ifdef TSTIEMAIMPL_WITH_GENERATOR
8003static RTEXITCODE SseConvertXmmR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
8004{
8005 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8006
8007 static struct { RTFLOAT32U aVal1[4]; } const s_aSpecials[] =
8008 {
8009 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
8010 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
8011 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
8012 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
8013 /** @todo More specials. */
8014 };
8015
8016 X86FXSTATE State;
8017 RT_ZERO(State);
8018 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8019 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
8020 {
8021 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR32I32[iFn].pfnNative ? g_aSseConvertXmmR32I32[iFn].pfnNative : g_aSseConvertXmmR32I32[iFn].pfn;
8022
8023 IEMBINARYOUTPUT BinOut;
8024 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmR32I32[iFn].pszName), RTEXITCODE_FAILURE);
8025
8026 uint32_t cNormalInputPairs = 0;
8027 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8028 {
8029 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8030
8031 TestData.InVal.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8032 TestData.InVal.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8033 TestData.InVal.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
8034 TestData.InVal.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
8035
8036 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[0])
8037 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[1])
8038 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[2])
8039 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[3]))
8040 cNormalInputPairs++;
8041 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8042 {
8043 iTest -= 1;
8044 continue;
8045 }
8046
8047 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8048 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8049 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8050 for (uint8_t iFz = 0; iFz < 2; iFz++)
8051 {
8052 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
8053 | (iRounding << X86_MXCSR_RC_SHIFT)
8054 | (iDaz ? X86_MXCSR_DAZ : 0)
8055 | (iFz ? X86_MXCSR_FZ : 0)
8056 | X86_MXCSR_XCPT_MASK;
8057 IEMSSERESULT ResM; RT_ZERO(ResM);
8058 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
8059 TestData.fMxcsrIn = State.MXCSR;
8060 TestData.fMxcsrOut = ResM.MXCSR;
8061 TestData.OutVal = ResM.uResult;
8062 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8063
8064 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
8065 IEMSSERESULT ResU; RT_ZERO(ResU);
8066 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
8067 TestData.fMxcsrIn = State.MXCSR;
8068 TestData.fMxcsrOut = ResU.MXCSR;
8069 TestData.OutVal = ResU.uResult;
8070 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8071
8072 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8073 if (fXcpt)
8074 {
8075 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8076 IEMSSERESULT Res1; RT_ZERO(Res1);
8077 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8078 TestData.fMxcsrIn = State.MXCSR;
8079 TestData.fMxcsrOut = Res1.MXCSR;
8080 TestData.OutVal = Res1.uResult;
8081 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8082
8083 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8084 {
8085 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8086 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8087 IEMSSERESULT Res2; RT_ZERO(Res2);
8088 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8089 TestData.fMxcsrIn = State.MXCSR;
8090 TestData.fMxcsrOut = Res2.MXCSR;
8091 TestData.OutVal = Res2.uResult;
8092 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8093 }
8094 if (!RT_IS_POWER_OF_TWO(fXcpt))
8095 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8096 if (fUnmasked & fXcpt)
8097 {
8098 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8099 IEMSSERESULT Res3; RT_ZERO(Res3);
8100 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8101 TestData.fMxcsrIn = State.MXCSR;
8102 TestData.fMxcsrOut = Res3.MXCSR;
8103 TestData.OutVal = Res3.uResult;
8104 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8105 }
8106 }
8107 }
8108 }
8109 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8110 }
8111
8112 return RTEXITCODE_SUCCESS;
8113}
8114#endif
8115
8116static void SseConvertXmmR32I32Test(void)
8117{
8118 X86FXSTATE State;
8119 RT_ZERO(State);
8120
8121 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
8122 {
8123 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32I32[iFn]))
8124 continue;
8125
8126 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR32I32[iFn].paTests;
8127 uint32_t const cTests = g_aSseConvertXmmR32I32[iFn].cTests;
8128 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR32I32[iFn].pfn;
8129 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32I32[iFn]);
8130 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8131 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8132 {
8133 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8134 {
8135 IEMSSERESULT Res; RT_ZERO(Res);
8136
8137 State.MXCSR = paTests[iTest].fMxcsrIn;
8138 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8139 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8140 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8141 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8142 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8143 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8144 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s \n"
8145 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8146 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8147 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8148 FormatR32(&paTests[iTest].InVal.ar32[0]), FormatR32(&paTests[iTest].InVal.ar32[1]),
8149 FormatR32(&paTests[iTest].InVal.ar32[2]), FormatR32(&paTests[iTest].InVal.ar32[3]),
8150 iVar ? " " : "", Res.MXCSR,
8151 Res.uResult.ai32[0], Res.uResult.ai32[1],
8152 Res.uResult.ai32[2], Res.uResult.ai32[3],
8153 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8154 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8155 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8156 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8157 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8158 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8159 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8160 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8161 ? " - val" : "",
8162 FormatMxcsr(paTests[iTest].fMxcsrIn));
8163 }
8164 }
8165 }
8166}
8167
8168
8169/*
8170 * Convert SSE operations converting signed double-words to double-precision floating point values.
8171 */
8172static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R64[] =
8173{
8174 ENTRY_BIN(cvtdq2pd_u128)
8175};
8176
8177#ifdef TSTIEMAIMPL_WITH_GENERATOR
8178static RTEXITCODE SseConvertXmmI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
8179{
8180 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8181
8182 static int32_t const s_aSpecials[] =
8183 {
8184 INT32_MIN,
8185 INT32_MIN / 2,
8186 0,
8187 INT32_MAX / 2,
8188 INT32_MAX,
8189 (int32_t)0x80000000
8190 /** @todo More specials. */
8191 };
8192
8193 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8194 {
8195 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R64[iFn].pfnNative ? g_aSseConvertXmmI32R64[iFn].pfnNative : g_aSseConvertXmmI32R64[iFn].pfn;
8196
8197 IEMBINARYOUTPUT BinOut;
8198 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmI32R64[iFn].pszName), RTEXITCODE_FAILURE);
8199
8200 X86FXSTATE State;
8201 RT_ZERO(State);
8202 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8203 {
8204 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8205
8206 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8207 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8208 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8209 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8210
8211 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8212 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8213 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8214 for (uint8_t iFz = 0; iFz < 2; iFz++)
8215 {
8216 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
8217 | (iRounding << X86_MXCSR_RC_SHIFT)
8218 | (iDaz ? X86_MXCSR_DAZ : 0)
8219 | (iFz ? X86_MXCSR_FZ : 0)
8220 | X86_MXCSR_XCPT_MASK;
8221 IEMSSERESULT ResM; RT_ZERO(ResM);
8222 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
8223 TestData.fMxcsrIn = State.MXCSR;
8224 TestData.fMxcsrOut = ResM.MXCSR;
8225 TestData.OutVal = ResM.uResult;
8226 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8227
8228 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
8229 IEMSSERESULT ResU; RT_ZERO(ResU);
8230 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
8231 TestData.fMxcsrIn = State.MXCSR;
8232 TestData.fMxcsrOut = ResU.MXCSR;
8233 TestData.OutVal = ResU.uResult;
8234 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8235
8236 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8237 if (fXcpt)
8238 {
8239 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8240 IEMSSERESULT Res1; RT_ZERO(Res1);
8241 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8242 TestData.fMxcsrIn = State.MXCSR;
8243 TestData.fMxcsrOut = Res1.MXCSR;
8244 TestData.OutVal = Res1.uResult;
8245 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8246
8247 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8248 {
8249 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8250 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8251 IEMSSERESULT Res2; RT_ZERO(Res2);
8252 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8253 TestData.fMxcsrIn = State.MXCSR;
8254 TestData.fMxcsrOut = Res2.MXCSR;
8255 TestData.OutVal = Res2.uResult;
8256 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8257 }
8258 if (!RT_IS_POWER_OF_TWO(fXcpt))
8259 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8260 if (fUnmasked & fXcpt)
8261 {
8262 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8263 IEMSSERESULT Res3; RT_ZERO(Res3);
8264 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8265 TestData.fMxcsrIn = State.MXCSR;
8266 TestData.fMxcsrOut = Res3.MXCSR;
8267 TestData.OutVal = Res3.uResult;
8268 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8269 }
8270 }
8271 }
8272 }
8273 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8274 }
8275
8276 return RTEXITCODE_SUCCESS;
8277}
8278#endif
8279
8280static void SseConvertXmmI32R64Test(void)
8281{
8282 X86FXSTATE State;
8283 RT_ZERO(State);
8284
8285 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8286 {
8287 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R64[iFn]))
8288 continue;
8289
8290 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R64[iFn].paTests;
8291 uint32_t const cTests = g_aSseConvertXmmI32R64[iFn].cTests;
8292 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R64[iFn].pfn;
8293 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R64[iFn]);
8294 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8295 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8296 {
8297 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8298 {
8299 IEMSSERESULT Res; RT_ZERO(Res);
8300
8301 State.MXCSR = paTests[iTest].fMxcsrIn;
8302 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8303 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8304 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
8305 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8306 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
8307 "%s -> mxcsr=%#08x %s'%s\n"
8308 "%s expected %#08x %s'%s%s%s (%s)\n",
8309 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8310 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8311 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
8312 iVar ? " " : "", Res.MXCSR,
8313 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
8314 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8315 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8316 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8317 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
8318 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8319 ? " - val" : "",
8320 FormatMxcsr(paTests[iTest].fMxcsrIn));
8321 }
8322 }
8323 }
8324}
8325
8326
8327/*
8328 * Convert SSE operations converting signed double-words to double-precision floating point values.
8329 */
8330static SSE_CONVERT_XMM_T g_aSseConvertXmmR64I32[] =
8331{
8332 ENTRY_BIN(cvtpd2dq_u128),
8333 ENTRY_BIN(cvttpd2dq_u128)
8334};
8335
8336#ifdef TSTIEMAIMPL_WITH_GENERATOR
8337static RTEXITCODE SseConvertXmmR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
8338{
8339 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8340
8341 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8342 {
8343 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8344 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8345 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8346 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8347 /** @todo More specials. */
8348 };
8349
8350 X86FXSTATE State;
8351 RT_ZERO(State);
8352 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8353 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8354 {
8355 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR64I32[iFn].pfnNative ? g_aSseConvertXmmR64I32[iFn].pfnNative : g_aSseConvertXmmR64I32[iFn].pfn;
8356
8357 IEMBINARYOUTPUT BinOut;
8358 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmR64I32[iFn].pszName), RTEXITCODE_FAILURE);
8359
8360 uint32_t cNormalInputPairs = 0;
8361 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8362 {
8363 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8364
8365 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8366 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8367
8368 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8369 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8370 cNormalInputPairs++;
8371 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8372 {
8373 iTest -= 1;
8374 continue;
8375 }
8376
8377 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8378 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8379 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8380 for (uint8_t iFz = 0; iFz < 2; iFz++)
8381 {
8382 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
8383 | (iRounding << X86_MXCSR_RC_SHIFT)
8384 | (iDaz ? X86_MXCSR_DAZ : 0)
8385 | (iFz ? X86_MXCSR_FZ : 0)
8386 | X86_MXCSR_XCPT_MASK;
8387 IEMSSERESULT ResM; RT_ZERO(ResM);
8388 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
8389 TestData.fMxcsrIn = State.MXCSR;
8390 TestData.fMxcsrOut = ResM.MXCSR;
8391 TestData.OutVal = ResM.uResult;
8392 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8393
8394 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
8395 IEMSSERESULT ResU; RT_ZERO(ResU);
8396 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
8397 TestData.fMxcsrIn = State.MXCSR;
8398 TestData.fMxcsrOut = ResU.MXCSR;
8399 TestData.OutVal = ResU.uResult;
8400 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8401
8402 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8403 if (fXcpt)
8404 {
8405 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8406 IEMSSERESULT Res1; RT_ZERO(Res1);
8407 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8408 TestData.fMxcsrIn = State.MXCSR;
8409 TestData.fMxcsrOut = Res1.MXCSR;
8410 TestData.OutVal = Res1.uResult;
8411 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8412
8413 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8414 {
8415 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8416 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8417 IEMSSERESULT Res2; RT_ZERO(Res2);
8418 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8419 TestData.fMxcsrIn = State.MXCSR;
8420 TestData.fMxcsrOut = Res2.MXCSR;
8421 TestData.OutVal = Res2.uResult;
8422 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8423 }
8424 if (!RT_IS_POWER_OF_TWO(fXcpt))
8425 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8426 if (fUnmasked & fXcpt)
8427 {
8428 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8429 IEMSSERESULT Res3; RT_ZERO(Res3);
8430 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8431 TestData.fMxcsrIn = State.MXCSR;
8432 TestData.fMxcsrOut = Res3.MXCSR;
8433 TestData.OutVal = Res3.uResult;
8434 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8435 }
8436 }
8437 }
8438 }
8439 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8440 }
8441
8442 return RTEXITCODE_SUCCESS;
8443}
8444#endif
8445
8446static void SseConvertXmmR64I32Test(void)
8447{
8448 X86FXSTATE State;
8449 RT_ZERO(State);
8450
8451 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8452 {
8453 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64I32[iFn]))
8454 continue;
8455
8456 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR64I32[iFn].paTests;
8457 uint32_t const cTests = g_aSseConvertXmmR64I32[iFn].cTests;
8458 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR64I32[iFn].pfn;
8459 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64I32[iFn]);
8460 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8461 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8462 {
8463 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8464 {
8465 IEMSSERESULT Res; RT_ZERO(Res);
8466
8467 State.MXCSR = paTests[iTest].fMxcsrIn;
8468 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8469 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8470 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8471 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8472 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8473 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8474 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8475 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8476 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8477 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8478 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8479 iVar ? " " : "", Res.MXCSR,
8480 Res.uResult.ai32[0], Res.uResult.ai32[1],
8481 Res.uResult.ai32[2], Res.uResult.ai32[3],
8482 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8483 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8484 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8485 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8486 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8487 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8488 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8489 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8490 ? " - val" : "",
8491 FormatMxcsr(paTests[iTest].fMxcsrIn));
8492 }
8493 }
8494 }
8495}
8496
8497
8498/*
8499 * Convert SSE operations converting double-precision floating point values to signed double-word values.
8500 */
8501TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_XMM_T, SSE_CONVERT_MM_XMM_TEST_T, PFNIEMAIMPLMXCSRU64U128);
8502
8503static SSE_CONVERT_MM_XMM_T g_aSseConvertMmXmm[] =
8504{
8505 ENTRY_BIN(cvtpd2pi_u128),
8506 ENTRY_BIN(cvttpd2pi_u128)
8507};
8508
8509#ifdef TSTIEMAIMPL_WITH_GENERATOR
8510static RTEXITCODE SseConvertMmXmmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8511{
8512 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8513
8514 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8515 {
8516 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8517 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8518 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8519 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8520 /** @todo More specials. */
8521 };
8522
8523 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8524 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8525 {
8526 PFNIEMAIMPLMXCSRU64U128 const pfn = g_aSseConvertMmXmm[iFn].pfnNative ? g_aSseConvertMmXmm[iFn].pfnNative : g_aSseConvertMmXmm[iFn].pfn;
8527
8528 IEMBINARYOUTPUT BinOut;
8529 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertMmXmm[iFn].pszName), RTEXITCODE_FAILURE);
8530
8531 uint32_t cNormalInputPairs = 0;
8532 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8533 {
8534 SSE_CONVERT_MM_XMM_TEST_T TestData; RT_ZERO(TestData);
8535
8536 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8537 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8538
8539 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8540 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8541 cNormalInputPairs++;
8542 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8543 {
8544 iTest -= 1;
8545 continue;
8546 }
8547
8548 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8549 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8550 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8551 for (uint8_t iFz = 0; iFz < 2; iFz++)
8552 {
8553 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8554 | (iRounding << X86_MXCSR_RC_SHIFT)
8555 | (iDaz ? X86_MXCSR_DAZ : 0)
8556 | (iFz ? X86_MXCSR_FZ : 0)
8557 | X86_MXCSR_XCPT_MASK;
8558 uint32_t fMxcsrM = fMxcsrIn;
8559 uint64_t u64ResM;
8560 pfn(&fMxcsrM, &u64ResM, &TestData.InVal);
8561 TestData.fMxcsrIn = fMxcsrIn;
8562 TestData.fMxcsrOut = fMxcsrM;
8563 TestData.OutVal.u = u64ResM;
8564 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8565
8566 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8567 uint32_t fMxcsrU = fMxcsrIn;
8568 uint64_t u64ResU;
8569 pfn(&fMxcsrU, &u64ResU, &TestData.InVal);
8570 TestData.fMxcsrIn = fMxcsrIn;
8571 TestData.fMxcsrOut = fMxcsrU;
8572 TestData.OutVal.u = u64ResU;
8573 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8574
8575 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8576 if (fXcpt)
8577 {
8578 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8579 uint32_t fMxcsr1 = fMxcsrIn;
8580 uint64_t u64Res1;
8581 pfn(&fMxcsr1, &u64Res1, &TestData.InVal);
8582 TestData.fMxcsrIn = fMxcsrIn;
8583 TestData.fMxcsrOut = fMxcsr1;
8584 TestData.OutVal.u = u64Res1;
8585 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8586
8587 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8588 {
8589 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8590 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8591 uint32_t fMxcsr2 = fMxcsrIn;
8592 uint64_t u64Res2;
8593 pfn(&fMxcsr2, &u64Res2, &TestData.InVal);
8594 TestData.fMxcsrIn = fMxcsrIn;
8595 TestData.fMxcsrOut = fMxcsr2;
8596 TestData.OutVal.u = u64Res2;
8597 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8598 }
8599 if (!RT_IS_POWER_OF_TWO(fXcpt))
8600 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8601 if (fUnmasked & fXcpt)
8602 {
8603 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8604 uint32_t fMxcsr3 = fMxcsrIn;
8605 uint64_t u64Res3;
8606 pfn(&fMxcsr3, &u64Res3, &TestData.InVal);
8607 TestData.fMxcsrIn = fMxcsrIn;
8608 TestData.fMxcsrOut = fMxcsr3;
8609 TestData.OutVal.u = u64Res3;
8610 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8611 }
8612 }
8613 }
8614 }
8615 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8616 }
8617
8618 return RTEXITCODE_SUCCESS;
8619}
8620#endif
8621
8622static void SseConvertMmXmmTest(void)
8623{
8624 X86FXSTATE State;
8625 RT_ZERO(State);
8626
8627 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8628 {
8629 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmXmm[iFn]))
8630 continue;
8631
8632 SSE_CONVERT_MM_XMM_TEST_T const * const paTests = g_aSseConvertMmXmm[iFn].paTests;
8633 uint32_t const cTests = g_aSseConvertMmXmm[iFn].cTests;
8634 PFNIEMAIMPLMXCSRU64U128 pfn = g_aSseConvertMmXmm[iFn].pfn;
8635 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmXmm[iFn]);
8636 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8637 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8638 {
8639 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8640 {
8641 RTUINT64U ValOut;
8642 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8643 pfn(&fMxcsr, &ValOut.u, &paTests[iTest].InVal);
8644 if ( fMxcsr != paTests[iTest].fMxcsrOut
8645 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8646 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8647 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s\n"
8648 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8649 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8650 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8651 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8652 iVar ? " " : "", fMxcsr, ValOut.ai32[0], ValOut.ai32[1],
8653 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8654 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8655 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8656 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8657 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8658 ? " - val" : "",
8659 FormatMxcsr(paTests[iTest].fMxcsrIn));
8660 }
8661 }
8662 }
8663}
8664
8665
8666/*
8667 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8668 */
8669TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R64_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8670
8671static SSE_CONVERT_XMM_R64_MM_T g_aSseConvertXmmR64Mm[] =
8672{
8673 ENTRY_BIN(cvtpi2pd_u128)
8674};
8675
8676#ifdef TSTIEMAIMPL_WITH_GENERATOR
8677static RTEXITCODE SseConvertXmmR64MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8678{
8679 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8680
8681 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8682 {
8683 { { INT32_MIN, INT32_MIN } },
8684 { { INT32_MAX, INT32_MAX } }
8685 /** @todo More specials. */
8686 };
8687
8688 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8689 {
8690 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR64Mm[iFn].pfnNative ? g_aSseConvertXmmR64Mm[iFn].pfnNative : g_aSseConvertXmmR64Mm[iFn].pfn;
8691
8692 IEMBINARYOUTPUT BinOut;
8693 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmR64Mm[iFn].pszName), RTEXITCODE_FAILURE);
8694
8695 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8696 {
8697 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8698
8699 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8700 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8701
8702 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8703 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8704 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8705 for (uint8_t iFz = 0; iFz < 2; iFz++)
8706 {
8707 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8708 | (iRounding << X86_MXCSR_RC_SHIFT)
8709 | (iDaz ? X86_MXCSR_DAZ : 0)
8710 | (iFz ? X86_MXCSR_FZ : 0)
8711 | X86_MXCSR_XCPT_MASK;
8712 uint32_t fMxcsrM = fMxcsrIn;
8713 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8714 TestData.fMxcsrIn = fMxcsrIn;
8715 TestData.fMxcsrOut = fMxcsrM;
8716 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8717
8718 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8719 uint32_t fMxcsrU = fMxcsrIn;
8720 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8721 TestData.fMxcsrIn = fMxcsrIn;
8722 TestData.fMxcsrOut = fMxcsrU;
8723 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8724
8725 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8726 if (fXcpt)
8727 {
8728 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8729 uint32_t fMxcsr1 = fMxcsrIn;
8730 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8731 TestData.fMxcsrIn = fMxcsrIn;
8732 TestData.fMxcsrOut = fMxcsr1;
8733 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8734
8735 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8736 {
8737 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8738 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8739 uint32_t fMxcsr2 = fMxcsrIn;
8740 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8741 TestData.fMxcsrIn = fMxcsrIn;
8742 TestData.fMxcsrOut = fMxcsr2;
8743 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8744 }
8745 if (!RT_IS_POWER_OF_TWO(fXcpt))
8746 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8747 if (fUnmasked & fXcpt)
8748 {
8749 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8750 uint32_t fMxcsr3 = fMxcsrIn;
8751 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8752 TestData.fMxcsrIn = fMxcsrIn;
8753 TestData.fMxcsrOut = fMxcsr3;
8754 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8755 }
8756 }
8757 }
8758 }
8759 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8760 }
8761
8762 return RTEXITCODE_SUCCESS;
8763}
8764#endif
8765
8766static void SseConvertXmmR64MmTest(void)
8767{
8768 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8769 {
8770 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64Mm[iFn]))
8771 continue;
8772
8773 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR64Mm[iFn].paTests;
8774 uint32_t const cTests = g_aSseConvertXmmR64Mm[iFn].cTests;
8775 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR64Mm[iFn].pfn;
8776 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64Mm[iFn]);
8777 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8778 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8779 {
8780 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8781 {
8782 X86XMMREG ValOut;
8783 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8784 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8785 if ( fMxcsr != paTests[iTest].fMxcsrOut
8786 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8787 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8788 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8789 "%s -> mxcsr=%#08x %s'%s\n"
8790 "%s expected %#08x %s'%s%s%s (%s)\n",
8791 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8792 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8793 iVar ? " " : "", fMxcsr,
8794 FormatR64(&ValOut.ar64[0]), FormatR64(&ValOut.ar64[1]),
8795 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8796 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8797 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8798 ( !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8799 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8800 ? " - val" : "",
8801 FormatMxcsr(paTests[iTest].fMxcsrIn));
8802 }
8803 }
8804 }
8805}
8806
8807
8808/*
8809 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8810 */
8811TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R32_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8812
8813static SSE_CONVERT_XMM_R32_MM_T g_aSseConvertXmmR32Mm[] =
8814{
8815 ENTRY_BIN(cvtpi2ps_u128)
8816};
8817
8818#ifdef TSTIEMAIMPL_WITH_GENERATOR
8819static RTEXITCODE SseConvertXmmR32MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8820{
8821 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8822
8823 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8824 {
8825 { { INT32_MIN, INT32_MIN } },
8826 { { INT32_MAX, INT32_MAX } }
8827 /** @todo More specials. */
8828 };
8829
8830 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8831 {
8832 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR32Mm[iFn].pfnNative ? g_aSseConvertXmmR32Mm[iFn].pfnNative : g_aSseConvertXmmR32Mm[iFn].pfn;
8833
8834 IEMBINARYOUTPUT BinOut;
8835 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertXmmR32Mm[iFn].pszName), RTEXITCODE_FAILURE);
8836
8837 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8838 {
8839 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8840
8841 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8842 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8843
8844 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8845 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8846 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8847 for (uint8_t iFz = 0; iFz < 2; iFz++)
8848 {
8849 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8850 | (iRounding << X86_MXCSR_RC_SHIFT)
8851 | (iDaz ? X86_MXCSR_DAZ : 0)
8852 | (iFz ? X86_MXCSR_FZ : 0)
8853 | X86_MXCSR_XCPT_MASK;
8854 uint32_t fMxcsrM = fMxcsrIn;
8855 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8856 TestData.fMxcsrIn = fMxcsrIn;
8857 TestData.fMxcsrOut = fMxcsrM;
8858 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8859
8860 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8861 uint32_t fMxcsrU = fMxcsrIn;
8862 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8863 TestData.fMxcsrIn = fMxcsrIn;
8864 TestData.fMxcsrOut = fMxcsrU;
8865 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8866
8867 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8868 if (fXcpt)
8869 {
8870 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8871 uint32_t fMxcsr1 = fMxcsrIn;
8872 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8873 TestData.fMxcsrIn = fMxcsrIn;
8874 TestData.fMxcsrOut = fMxcsr1;
8875 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8876
8877 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8878 {
8879 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8880 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8881 uint32_t fMxcsr2 = fMxcsrIn;
8882 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8883 TestData.fMxcsrIn = fMxcsrIn;
8884 TestData.fMxcsrOut = fMxcsr2;
8885 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8886 }
8887 if (!RT_IS_POWER_OF_TWO(fXcpt))
8888 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8889 if (fUnmasked & fXcpt)
8890 {
8891 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8892 uint32_t fMxcsr3 = fMxcsrIn;
8893 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8894 TestData.fMxcsrIn = fMxcsrIn;
8895 TestData.fMxcsrOut = fMxcsr3;
8896 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8897 }
8898 }
8899 }
8900 }
8901 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8902 }
8903
8904 return RTEXITCODE_SUCCESS;
8905}
8906#endif
8907
8908static void SseConvertXmmR32MmTest(void)
8909{
8910 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8911 {
8912 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32Mm[iFn]))
8913 continue;
8914
8915 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR32Mm[iFn].paTests;
8916 uint32_t const cTests = g_aSseConvertXmmR32Mm[iFn].cTests;
8917 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR32Mm[iFn].pfn;
8918 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32Mm[iFn]);
8919 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8920 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8921 {
8922 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8923 {
8924 X86XMMREG ValOut;
8925 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8926 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8927 if ( fMxcsr != paTests[iTest].fMxcsrOut
8928 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8929 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8930 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8931 "%s -> mxcsr=%#08x %s'%s\n"
8932 "%s expected %#08x %s'%s%s%s (%s)\n",
8933 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8934 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8935 iVar ? " " : "", fMxcsr,
8936 FormatR32(&ValOut.ar32[0]), FormatR32(&ValOut.ar32[1]),
8937 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8938 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
8939 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8940 ( !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8941 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8942 ? " - val" : "",
8943 FormatMxcsr(paTests[iTest].fMxcsrIn));
8944 }
8945 }
8946 }
8947}
8948
8949
8950/*
8951 * Convert SSE operations converting single-precision floating point values to signed double-word values.
8952 */
8953TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_I32_XMM_R32_T, SSE_CONVERT_MM_R32_TEST_T, PFNIEMAIMPLMXCSRU64U64);
8954
8955static SSE_CONVERT_MM_I32_XMM_R32_T g_aSseConvertMmI32XmmR32[] =
8956{
8957 ENTRY_BIN(cvtps2pi_u128),
8958 ENTRY_BIN(cvttps2pi_u128)
8959};
8960
8961#ifdef TSTIEMAIMPL_WITH_GENERATOR
8962static RTEXITCODE SseConvertMmI32XmmR32Generate(const char *pszDataFileFmt, uint32_t cTests)
8963{
8964 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8965
8966 static struct { RTFLOAT32U aVal1[2]; } const s_aSpecials[] =
8967 {
8968 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
8969 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
8970 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
8971 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
8972 /** @todo More specials. */
8973 };
8974
8975 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8976 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
8977 {
8978 PFNIEMAIMPLMXCSRU64U64 const pfn = g_aSseConvertMmI32XmmR32[iFn].pfnNative ? g_aSseConvertMmI32XmmR32[iFn].pfnNative : g_aSseConvertMmI32XmmR32[iFn].pfn;
8979
8980 IEMBINARYOUTPUT BinOut;
8981 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSseConvertMmI32XmmR32[iFn].pszName), RTEXITCODE_FAILURE);
8982
8983 uint32_t cNormalInputPairs = 0;
8984 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8985 {
8986 SSE_CONVERT_MM_R32_TEST_T TestData; RT_ZERO(TestData);
8987
8988 TestData.ar32InVal[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8989 TestData.ar32InVal[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8990
8991 if ( RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[0])
8992 && RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[1]))
8993 cNormalInputPairs++;
8994 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8995 {
8996 iTest -= 1;
8997 continue;
8998 }
8999
9000 RTFLOAT64U TestVal;
9001 TestVal.au32[0] = TestData.ar32InVal[0].u;
9002 TestVal.au32[1] = TestData.ar32InVal[1].u;
9003
9004 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9005 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9006 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9007 for (uint8_t iFz = 0; iFz < 2; iFz++)
9008 {
9009 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9010 | (iRounding << X86_MXCSR_RC_SHIFT)
9011 | (iDaz ? X86_MXCSR_DAZ : 0)
9012 | (iFz ? X86_MXCSR_FZ : 0)
9013 | X86_MXCSR_XCPT_MASK;
9014 uint32_t fMxcsrM = fMxcsrIn;
9015 uint64_t u64ResM;
9016 pfn(&fMxcsrM, &u64ResM, TestVal.u);
9017 TestData.fMxcsrIn = fMxcsrIn;
9018 TestData.fMxcsrOut = fMxcsrM;
9019 TestData.OutVal.u = u64ResM;
9020 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9021
9022 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9023 uint32_t fMxcsrU = fMxcsrIn;
9024 uint64_t u64ResU;
9025 pfn(&fMxcsrU, &u64ResU, TestVal.u);
9026 TestData.fMxcsrIn = fMxcsrIn;
9027 TestData.fMxcsrOut = fMxcsrU;
9028 TestData.OutVal.u = u64ResU;
9029 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9030
9031 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9032 if (fXcpt)
9033 {
9034 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
9035 uint32_t fMxcsr1 = fMxcsrIn;
9036 uint64_t u64Res1;
9037 pfn(&fMxcsr1, &u64Res1, TestVal.u);
9038 TestData.fMxcsrIn = fMxcsrIn;
9039 TestData.fMxcsrOut = fMxcsr1;
9040 TestData.OutVal.u = u64Res1;
9041 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9042
9043 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9044 {
9045 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9046 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9047 uint32_t fMxcsr2 = fMxcsrIn;
9048 uint64_t u64Res2;
9049 pfn(&fMxcsr2, &u64Res2, TestVal.u);
9050 TestData.fMxcsrIn = fMxcsrIn;
9051 TestData.fMxcsrOut = fMxcsr2;
9052 TestData.OutVal.u = u64Res2;
9053 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9054 }
9055 if (!RT_IS_POWER_OF_TWO(fXcpt))
9056 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9057 if (fUnmasked & fXcpt)
9058 {
9059 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9060 uint32_t fMxcsr3 = fMxcsrIn;
9061 uint64_t u64Res3;
9062 pfn(&fMxcsr3, &u64Res3, TestVal.u);
9063 TestData.fMxcsrIn = fMxcsrIn;
9064 TestData.fMxcsrOut = fMxcsr3;
9065 TestData.OutVal.u = u64Res3;
9066 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9067 }
9068 }
9069 }
9070 }
9071 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9072 }
9073
9074 return RTEXITCODE_SUCCESS;
9075}
9076#endif
9077
9078static void SseConvertMmI32XmmR32Test(void)
9079{
9080 X86FXSTATE State;
9081 RT_ZERO(State);
9082
9083 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
9084 {
9085 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmI32XmmR32[iFn]))
9086 continue;
9087
9088 SSE_CONVERT_MM_R32_TEST_T const * const paTests = g_aSseConvertMmI32XmmR32[iFn].paTests;
9089 uint32_t const cTests = g_aSseConvertMmI32XmmR32[iFn].cTests;
9090 PFNIEMAIMPLMXCSRU64U64 pfn = g_aSseConvertMmI32XmmR32[iFn].pfn;
9091 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmI32XmmR32[iFn]);
9092 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9093 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9094 {
9095 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9096 {
9097 RTUINT64U ValOut;
9098 RTUINT64U ValIn;
9099
9100 ValIn.au32[0] = paTests[iTest].ar32InVal[0].u;
9101 ValIn.au32[1] = paTests[iTest].ar32InVal[1].u;
9102
9103 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
9104 pfn(&fMxcsr, &ValOut.u, ValIn.u);
9105 if ( fMxcsr != paTests[iTest].fMxcsrOut
9106 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9107 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9108 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
9109 "%s -> mxcsr=%#08x %RI32'%RI32\n"
9110 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
9111 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9112 FormatR32(&paTests[iTest].ar32InVal[0]), FormatR32(&paTests[iTest].ar32InVal[1]),
9113 iVar ? " " : "", fMxcsr,
9114 ValOut.ai32[0], ValOut.ai32[1],
9115 iVar ? " " : "", paTests[iTest].fMxcsrOut,
9116 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
9117 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9118 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9119 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9120 ? " - val" : "",
9121 FormatMxcsr(paTests[iTest].fMxcsrIn));
9122 }
9123 }
9124 }
9125}
9126
9127
9128/*
9129 * SSE 4.2 pcmpxstrx instructions.
9130 */
9131TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRI_T, SSE_PCMPISTRI_TEST_T, PFNIEMAIMPLPCMPISTRIU128IMM8);
9132
9133static SSE_PCMPISTRI_T g_aSsePcmpistri[] =
9134{
9135 ENTRY_BIN_SSE_OPT(pcmpistri_u128),
9136};
9137
9138#ifdef TSTIEMAIMPL_WITH_GENERATOR
9139static RTEXITCODE SseComparePcmpistriGenerate(const char *pszDataFileFmt, uint32_t cTests)
9140{
9141 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9142
9143 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9144 {
9145 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9146 /** @todo More specials. */
9147 };
9148
9149 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9150 {
9151 PFNIEMAIMPLPCMPISTRIU128IMM8 const pfn = g_aSsePcmpistri[iFn].pfnNative ? g_aSsePcmpistri[iFn].pfnNative : g_aSsePcmpistri[iFn].pfn;
9152
9153 IEMBINARYOUTPUT BinOut;
9154 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSsePcmpistri[iFn].pszName), RTEXITCODE_FAILURE);
9155
9156 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9157 {
9158 SSE_PCMPISTRI_TEST_T TestData; RT_ZERO(TestData);
9159
9160 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9161 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9162
9163 IEMPCMPISTRXSRC TestVal;
9164 TestVal.uSrc1 = TestData.InVal1.uXmm;
9165 TestVal.uSrc2 = TestData.InVal2.uXmm;
9166
9167 uint32_t const fEFlagsIn = RandEFlags();
9168 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9169 {
9170 uint32_t fEFlagsOut = fEFlagsIn;
9171 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9172 TestData.fEFlagsIn = fEFlagsIn;
9173 TestData.fEFlagsOut = fEFlagsOut;
9174 TestData.bImm = (uint8_t)u16Imm;
9175 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9176 }
9177
9178 /* Repeat the test with the input value being the same. */
9179 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9180 TestVal.uSrc1 = TestData.InVal1.uXmm;
9181 TestVal.uSrc2 = TestData.InVal2.uXmm;
9182
9183 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9184 {
9185 uint32_t fEFlagsOut = fEFlagsIn;
9186 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9187 TestData.fEFlagsIn = fEFlagsIn;
9188 TestData.fEFlagsOut = fEFlagsOut;
9189 TestData.bImm = (uint8_t)u16Imm;
9190 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9191 }
9192 }
9193 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9194 }
9195
9196 return RTEXITCODE_SUCCESS;
9197}
9198#endif
9199
9200static void SseComparePcmpistriTest(void)
9201{
9202 X86FXSTATE State;
9203 RT_ZERO(State);
9204
9205 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9206 {
9207 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistri[iFn]))
9208 continue;
9209
9210 SSE_PCMPISTRI_TEST_T const * const paTests = g_aSsePcmpistri[iFn].paTests;
9211 uint32_t const cTests = g_aSsePcmpistri[iFn].cTests;
9212 PFNIEMAIMPLPCMPISTRIU128IMM8 pfn = g_aSsePcmpistri[iFn].pfn;
9213 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistri[iFn]);
9214 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9215 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9216 {
9217 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9218 {
9219 IEMPCMPISTRXSRC TestVal;
9220 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9221 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9222
9223 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9224 uint32_t u32EcxOut = 0;
9225 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9226 if ( fEFlags != paTests[iTest].fEFlagsOut
9227 || u32EcxOut != paTests[iTest].u32EcxOut)
9228 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9229 "%s -> efl=%#08x %RU32\n"
9230 "%s expected %#08x %RU32%s%s\n",
9231 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9232 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9233 iVar ? " " : "", fEFlags, u32EcxOut,
9234 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9235 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9236 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9237 }
9238 }
9239 }
9240}
9241
9242
9243TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRM_T, SSE_PCMPISTRM_TEST_T, PFNIEMAIMPLPCMPISTRMU128IMM8);
9244
9245static SSE_PCMPISTRM_T g_aSsePcmpistrm[] =
9246{
9247 ENTRY_BIN_SSE_OPT(pcmpistrm_u128),
9248};
9249
9250#ifdef TSTIEMAIMPL_WITH_GENERATOR
9251static RTEXITCODE SseComparePcmpistrmGenerate(const char *pszDataFileFmt, uint32_t cTests)
9252{
9253 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9254
9255 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9256 {
9257 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9258 /** @todo More specials. */
9259 };
9260
9261 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9262 {
9263 PFNIEMAIMPLPCMPISTRMU128IMM8 const pfn = g_aSsePcmpistrm[iFn].pfnNative ? g_aSsePcmpistrm[iFn].pfnNative : g_aSsePcmpistrm[iFn].pfn;
9264
9265 IEMBINARYOUTPUT BinOut;
9266 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSsePcmpistrm[iFn].pszName), RTEXITCODE_FAILURE);
9267
9268 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9269 {
9270 SSE_PCMPISTRM_TEST_T TestData; RT_ZERO(TestData);
9271
9272 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9273 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9274
9275 IEMPCMPISTRXSRC TestVal;
9276 TestVal.uSrc1 = TestData.InVal1.uXmm;
9277 TestVal.uSrc2 = TestData.InVal2.uXmm;
9278
9279 uint32_t const fEFlagsIn = RandEFlags();
9280 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9281 {
9282 uint32_t fEFlagsOut = fEFlagsIn;
9283 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9284 TestData.fEFlagsIn = fEFlagsIn;
9285 TestData.fEFlagsOut = fEFlagsOut;
9286 TestData.bImm = (uint8_t)u16Imm;
9287 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9288 }
9289
9290 /* Repeat the test with the input value being the same. */
9291 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9292 TestVal.uSrc1 = TestData.InVal1.uXmm;
9293 TestVal.uSrc2 = TestData.InVal2.uXmm;
9294
9295 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9296 {
9297 uint32_t fEFlagsOut = fEFlagsIn;
9298 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9299 TestData.fEFlagsIn = fEFlagsIn;
9300 TestData.fEFlagsOut = fEFlagsOut;
9301 TestData.bImm = (uint8_t)u16Imm;
9302 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9303 }
9304 }
9305 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9306 }
9307
9308 return RTEXITCODE_SUCCESS;
9309}
9310#endif
9311
9312static void SseComparePcmpistrmTest(void)
9313{
9314 X86FXSTATE State;
9315 RT_ZERO(State);
9316
9317 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9318 {
9319 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistrm[iFn]))
9320 continue;
9321
9322 SSE_PCMPISTRM_TEST_T const * const paTests = g_aSsePcmpistrm[iFn].paTests;
9323 uint32_t const cTests = g_aSsePcmpistrm[iFn].cTests;
9324 PFNIEMAIMPLPCMPISTRMU128IMM8 pfn = g_aSsePcmpistrm[iFn].pfn;
9325 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistrm[iFn]);
9326 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9327 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9328 {
9329 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9330 {
9331 IEMPCMPISTRXSRC TestVal;
9332 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9333 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9334
9335 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9336 RTUINT128U OutVal;
9337 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9338 if ( fEFlags != paTests[iTest].fEFlagsOut
9339 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9340 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9341 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9342 "%s -> efl=%#08x %s\n"
9343 "%s expected %#08x %s%s%s\n",
9344 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9345 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9346 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9347 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9348 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9349 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9350 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9351 }
9352 }
9353 }
9354}
9355
9356
9357TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRI_T, SSE_PCMPESTRI_TEST_T, PFNIEMAIMPLPCMPESTRIU128IMM8);
9358
9359static SSE_PCMPESTRI_T g_aSsePcmpestri[] =
9360{
9361 ENTRY_BIN_SSE_OPT(pcmpestri_u128),
9362};
9363
9364#ifdef TSTIEMAIMPL_WITH_GENERATOR
9365static RTEXITCODE SseComparePcmpestriGenerate(const char *pszDataFileFmt, uint32_t cTests)
9366{
9367 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9368
9369 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9370 {
9371 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9372 /** @todo More specials. */
9373 };
9374
9375 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9376 {
9377 PFNIEMAIMPLPCMPESTRIU128IMM8 const pfn = g_aSsePcmpestri[iFn].pfnNative ? g_aSsePcmpestri[iFn].pfnNative : g_aSsePcmpestri[iFn].pfn;
9378
9379 IEMBINARYOUTPUT BinOut;
9380 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSsePcmpestri[iFn].pszName), RTEXITCODE_FAILURE);
9381
9382 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9383 {
9384 SSE_PCMPESTRI_TEST_T TestData; RT_ZERO(TestData);
9385
9386 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9387 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9388
9389 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9390 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9391 {
9392 TestData.u64Rax = (uint64_t)i64Rax;
9393 TestData.u64Rdx = (uint64_t)i64Rdx;
9394
9395 IEMPCMPESTRXSRC TestVal;
9396 TestVal.uSrc1 = TestData.InVal1.uXmm;
9397 TestVal.uSrc2 = TestData.InVal2.uXmm;
9398 TestVal.u64Rax = TestData.u64Rax;
9399 TestVal.u64Rdx = TestData.u64Rdx;
9400
9401 uint32_t const fEFlagsIn = RandEFlags();
9402 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9403 {
9404 uint32_t fEFlagsOut = fEFlagsIn;
9405 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9406 TestData.fEFlagsIn = fEFlagsIn;
9407 TestData.fEFlagsOut = fEFlagsOut;
9408 TestData.bImm = (uint8_t)u16Imm;
9409 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9410 }
9411
9412 /* Repeat the test with the input value being the same. */
9413 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9414 TestVal.uSrc1 = TestData.InVal1.uXmm;
9415 TestVal.uSrc2 = TestData.InVal2.uXmm;
9416
9417 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9418 {
9419 uint32_t fEFlagsOut = fEFlagsIn;
9420 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9421 TestData.fEFlagsIn = fEFlagsIn;
9422 TestData.fEFlagsOut = fEFlagsOut;
9423 TestData.bImm = (uint8_t)u16Imm;
9424 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9425 }
9426 }
9427 }
9428 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9429 }
9430
9431 return RTEXITCODE_SUCCESS;
9432}
9433#endif
9434
9435static void SseComparePcmpestriTest(void)
9436{
9437 X86FXSTATE State;
9438 RT_ZERO(State);
9439
9440 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9441 {
9442 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestri[iFn]))
9443 continue;
9444
9445 SSE_PCMPESTRI_TEST_T const * const paTests = g_aSsePcmpestri[iFn].paTests;
9446 uint32_t const cTests = g_aSsePcmpestri[iFn].cTests;
9447 PFNIEMAIMPLPCMPESTRIU128IMM8 pfn = g_aSsePcmpestri[iFn].pfn;
9448 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestri[iFn]);
9449 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9450 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9451 {
9452 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9453 {
9454 IEMPCMPESTRXSRC TestVal;
9455 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9456 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9457 TestVal.u64Rax = paTests[iTest].u64Rax;
9458 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9459
9460 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9461 uint32_t u32EcxOut = 0;
9462 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9463 if ( fEFlags != paTests[iTest].fEFlagsOut
9464 || u32EcxOut != paTests[iTest].u32EcxOut)
9465 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9466 "%s -> efl=%#08x %RU32\n"
9467 "%s expected %#08x %RU32%s%s\n",
9468 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9469 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9470 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9471 paTests[iTest].bImm,
9472 iVar ? " " : "", fEFlags, u32EcxOut,
9473 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9474 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9475 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9476 }
9477 }
9478 }
9479}
9480
9481
9482TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRM_T, SSE_PCMPESTRM_TEST_T, PFNIEMAIMPLPCMPESTRMU128IMM8);
9483
9484static SSE_PCMPESTRM_T g_aSsePcmpestrm[] =
9485{
9486 ENTRY_BIN_SSE_OPT(pcmpestrm_u128),
9487};
9488
9489#ifdef TSTIEMAIMPL_WITH_GENERATOR
9490static RTEXITCODE SseComparePcmpestrmGenerate(const char *pszDataFileFmt, uint32_t cTests)
9491{
9492 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9493
9494 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9495 {
9496 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9497 /** @todo More specials. */
9498 };
9499
9500 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9501 {
9502 PFNIEMAIMPLPCMPESTRMU128IMM8 const pfn = g_aSsePcmpestrm[iFn].pfnNative ? g_aSsePcmpestrm[iFn].pfnNative : g_aSsePcmpestrm[iFn].pfn;
9503
9504 IEMBINARYOUTPUT BinOut;
9505 AssertReturn(GenerateBinaryOpen(&BinOut, pszDataFileFmt, g_aSsePcmpestrm[iFn].pszName), RTEXITCODE_FAILURE);
9506
9507 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9508 {
9509 SSE_PCMPESTRM_TEST_T TestData; RT_ZERO(TestData);
9510
9511 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9512 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9513
9514 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9515 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9516 {
9517 TestData.u64Rax = (uint64_t)i64Rax;
9518 TestData.u64Rdx = (uint64_t)i64Rdx;
9519
9520 IEMPCMPESTRXSRC TestVal;
9521 TestVal.uSrc1 = TestData.InVal1.uXmm;
9522 TestVal.uSrc2 = TestData.InVal2.uXmm;
9523 TestVal.u64Rax = TestData.u64Rax;
9524 TestVal.u64Rdx = TestData.u64Rdx;
9525
9526 uint32_t const fEFlagsIn = RandEFlags();
9527 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9528 {
9529 uint32_t fEFlagsOut = fEFlagsIn;
9530 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9531 TestData.fEFlagsIn = fEFlagsIn;
9532 TestData.fEFlagsOut = fEFlagsOut;
9533 TestData.bImm = (uint8_t)u16Imm;
9534 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9535 }
9536
9537 /* Repeat the test with the input value being the same. */
9538 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9539 TestVal.uSrc1 = TestData.InVal1.uXmm;
9540 TestVal.uSrc2 = TestData.InVal2.uXmm;
9541
9542 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9543 {
9544 uint32_t fEFlagsOut = fEFlagsIn;
9545 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9546 TestData.fEFlagsIn = fEFlagsIn;
9547 TestData.fEFlagsOut = fEFlagsOut;
9548 TestData.bImm = (uint8_t)u16Imm;
9549 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9550 }
9551 }
9552 }
9553 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9554 }
9555
9556 return RTEXITCODE_SUCCESS;
9557}
9558#endif
9559
9560static void SseComparePcmpestrmTest(void)
9561{
9562 X86FXSTATE State;
9563 RT_ZERO(State);
9564
9565 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9566 {
9567 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestrm[iFn]))
9568 continue;
9569
9570 SSE_PCMPESTRM_TEST_T const * const paTests = g_aSsePcmpestrm[iFn].paTests;
9571 uint32_t const cTests = g_aSsePcmpestrm[iFn].cTests;
9572 PFNIEMAIMPLPCMPESTRMU128IMM8 pfn = g_aSsePcmpestrm[iFn].pfn;
9573 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestrm[iFn]);
9574 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9575 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9576 {
9577 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9578 {
9579 IEMPCMPESTRXSRC TestVal;
9580 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9581 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9582 TestVal.u64Rax = paTests[iTest].u64Rax;
9583 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9584
9585 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9586 RTUINT128U OutVal;
9587 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9588 if ( fEFlags != paTests[iTest].fEFlagsOut
9589 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9590 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9591 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9592 "%s -> efl=%#08x %s\n"
9593 "%s expected %#08x %s%s%s\n",
9594 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9595 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9596 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9597 paTests[iTest].bImm,
9598 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9599 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9600 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9601 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9602 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9603 }
9604 }
9605 }
9606}
9607
9608
9609
9610int main(int argc, char **argv)
9611{
9612 int rc = RTR3InitExe(argc, &argv, 0);
9613 if (RT_FAILURE(rc))
9614 return RTMsgInitFailure(rc);
9615
9616 /*
9617 * Determin the host CPU.
9618 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
9619 */
9620#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
9621 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
9622 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
9623 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9624#else
9625 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9626#endif
9627
9628 /*
9629 * Parse arguments.
9630 */
9631 enum { kModeNotSet, kModeTest, kModeGenerate, kModeDump }
9632 enmMode = kModeNotSet;
9633 bool fInt = true;
9634 bool fFpuLdSt = true;
9635 bool fFpuBinary1 = true;
9636 bool fFpuBinary2 = true;
9637 bool fFpuOther = true;
9638 bool fCpuData = true;
9639 bool fCommonData = true;
9640 bool fSseFpBinary = true;
9641 bool fSseFpOther = true;
9642 bool fSsePcmpxstrx = true;
9643 uint32_t const cDefaultTests = 96;
9644 uint32_t cTests = cDefaultTests;
9645 RTGETOPTDEF const s_aOptions[] =
9646 {
9647 // mode:
9648 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
9649 { "--dump", 'G', RTGETOPT_REQ_NOTHING },
9650 { "--test", 't', RTGETOPT_REQ_NOTHING },
9651 { "--benchmark", 'b', RTGETOPT_REQ_NOTHING },
9652 // test selection (both)
9653 { "--all", 'a', RTGETOPT_REQ_NOTHING },
9654 { "--none", 'z', RTGETOPT_REQ_NOTHING },
9655 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
9656 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
9657 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
9658 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
9659 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
9660 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
9661 { "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
9662 { "--sse-fp-other", 'T', RTGETOPT_REQ_NOTHING },
9663 { "--sse-pcmpxstrx", 'C', RTGETOPT_REQ_NOTHING },
9664 { "--int", 'i', RTGETOPT_REQ_NOTHING },
9665 { "--include", 'I', RTGETOPT_REQ_STRING },
9666 { "--exclude", 'X', RTGETOPT_REQ_STRING },
9667 // generation parameters
9668 { "--common", 'm', RTGETOPT_REQ_NOTHING },
9669 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
9670 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
9671 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
9672 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
9673 };
9674
9675 RTGETOPTSTATE State;
9676 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
9677 AssertRCReturn(rc, RTEXITCODE_FAILURE);
9678
9679 RTGETOPTUNION ValueUnion;
9680 while ((rc = RTGetOpt(&State, &ValueUnion)))
9681 {
9682 switch (rc)
9683 {
9684 case 'g':
9685 enmMode = kModeGenerate;
9686 g_cPicoSecBenchmark = 0;
9687 break;
9688 case 'G':
9689 enmMode = kModeDump;
9690 g_cPicoSecBenchmark = 0;
9691 break;
9692 case 't':
9693 enmMode = kModeTest;
9694 g_cPicoSecBenchmark = 0;
9695 break;
9696 case 'b':
9697 enmMode = kModeTest;
9698 g_cPicoSecBenchmark += RT_NS_1SEC / 2 * UINT64_C(1000); /* half a second in pico seconds */
9699 break;
9700
9701 case 'a':
9702 fCpuData = true;
9703 fCommonData = true;
9704 fInt = true;
9705 fFpuLdSt = true;
9706 fFpuBinary1 = true;
9707 fFpuBinary2 = true;
9708 fFpuOther = true;
9709 fSseFpBinary = true;
9710 fSseFpOther = true;
9711 fSsePcmpxstrx = true;
9712 break;
9713 case 'z':
9714 fCpuData = false;
9715 fCommonData = false;
9716 fInt = false;
9717 fFpuLdSt = false;
9718 fFpuBinary1 = false;
9719 fFpuBinary2 = false;
9720 fFpuOther = false;
9721 fSseFpBinary = false;
9722 fSseFpOther = false;
9723 fSsePcmpxstrx = false;
9724 break;
9725
9726 case 'F':
9727 fFpuLdSt = true;
9728 break;
9729 case 'O':
9730 fFpuOther = true;
9731 break;
9732 case 'B':
9733 fFpuBinary1 = true;
9734 break;
9735 case 'P':
9736 fFpuBinary2 = true;
9737 break;
9738 case 'S':
9739 fSseFpBinary = true;
9740 break;
9741 case 'T':
9742 fSseFpOther = true;
9743 break;
9744 case 'C':
9745 fSsePcmpxstrx = true;
9746 break;
9747 case 'i':
9748 fInt = true;
9749 break;
9750
9751 case 'I':
9752 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
9753 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
9754 RT_ELEMENTS(g_apszIncludeTestPatterns));
9755 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
9756 break;
9757 case 'X':
9758 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
9759 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
9760 RT_ELEMENTS(g_apszExcludeTestPatterns));
9761 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
9762 break;
9763
9764 case 'm':
9765 fCommonData = true;
9766 break;
9767 case 'c':
9768 fCpuData = true;
9769 break;
9770 case 'n':
9771 cTests = ValueUnion.u32;
9772 break;
9773
9774 case 'q':
9775 g_cVerbosity = 0;
9776 break;
9777 case 'v':
9778 g_cVerbosity++;
9779 break;
9780
9781 case 'h':
9782 RTPrintf("usage: %s <-g|-t> [options]\n"
9783 "\n"
9784 "Mode:\n"
9785 " -g, --generate\n"
9786 " Generate test data.\n"
9787 " -t, --test\n"
9788 " Execute tests.\n"
9789 " -b, --benchmark\n"
9790 " Execute tests and do 1/2 seconds of benchmarking.\n"
9791 " Repeating the option increases the benchmark duration by 0.5 seconds.\n"
9792 "\n"
9793 "Test selection (both modes):\n"
9794 " -a, --all\n"
9795 " Enable all tests and generated test data. (default)\n"
9796 " -z, --zap, --none\n"
9797 " Disable all tests and test data types.\n"
9798 " -i, --int\n"
9799 " Enable non-FPU tests.\n"
9800 " -F, --fpu-ld-st\n"
9801 " Enable FPU load and store tests.\n"
9802 " -B, --fpu-binary-1\n"
9803 " Enable FPU binary 80-bit FP tests.\n"
9804 " -P, --fpu-binary-2\n"
9805 " Enable FPU binary 64- and 32-bit FP tests.\n"
9806 " -O, --fpu-other\n"
9807 " Enable FPU binary 64- and 32-bit FP tests.\n"
9808 " -S, --sse-fp-binary\n"
9809 " Enable SSE binary 64- and 32-bit FP tests.\n"
9810 " -T, --sse-fp-other\n"
9811 " Enable misc SSE 64- and 32-bit FP tests.\n"
9812 " -C, --sse-pcmpxstrx\n"
9813 " Enable SSE pcmpxstrx tests.\n"
9814 " -I,--include=<test-patter>\n"
9815 " Enable tests matching the given pattern.\n"
9816 " -X,--exclude=<test-patter>\n"
9817 " Skip tests matching the given pattern (overrides --include).\n"
9818 "\n"
9819 "Generation:\n"
9820 " -m, --common\n"
9821 " Enable generating common test data.\n"
9822 " -c, --only-cpu\n"
9823 " Enable generating CPU specific test data.\n"
9824 " -n, --number-of-test <count>\n"
9825 " Number of tests to generate. Default: %u\n"
9826 "\n"
9827 "Other:\n"
9828 " -v, --verbose\n"
9829 " -q, --quiet\n"
9830 " Noise level. Default: --quiet\n"
9831 , argv[0], cDefaultTests);
9832 return RTEXITCODE_SUCCESS;
9833 default:
9834 return RTGetOptPrintError(rc, &ValueUnion);
9835 }
9836 }
9837
9838 /*
9839 * Generate data?
9840 */
9841 if (enmMode == kModeGenerate)
9842 {
9843#ifdef TSTIEMAIMPL_WITH_GENERATOR
9844 if (cTests == 0)
9845 cTests = cDefaultTests;
9846 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
9847 g_cZeroSrcTests = g_cZeroDstTests * 2;
9848
9849 if (fInt)
9850 {
9851 const char * const apszNameFmts[] =
9852 {
9853 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataInt-%s.bin.gz" : NULL,
9854 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataInt-%s-Intel.bin.gz" : NULL,
9855 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataInt-%s-Amd.bin.gz" : NULL,
9856 };
9857 RTEXITCODE rcExit = BinU8Generate(cTests, apszNameFmts);
9858 if (rcExit == RTEXITCODE_SUCCESS)
9859 rcExit = BinU16Generate(cTests, apszNameFmts);
9860 if (rcExit == RTEXITCODE_SUCCESS)
9861 rcExit = BinU32Generate(cTests, apszNameFmts);
9862 if (rcExit == RTEXITCODE_SUCCESS)
9863 rcExit = BinU64Generate(cTests, apszNameFmts);
9864 if (rcExit == RTEXITCODE_SUCCESS)
9865 rcExit = ShiftDblGenerate(RT_MAX(cTests, 128), apszNameFmts);
9866 if (rcExit == RTEXITCODE_SUCCESS)
9867 rcExit = UnaryGenerate(cTests, apszNameFmts);
9868 if (rcExit == RTEXITCODE_SUCCESS)
9869 rcExit = ShiftGenerate(cTests, apszNameFmts);
9870 if (rcExit == RTEXITCODE_SUCCESS)
9871 rcExit = MulDivGenerate(cTests, apszNameFmts);
9872 if (rcExit != RTEXITCODE_SUCCESS)
9873 return rcExit;
9874 }
9875
9876 if (fFpuLdSt)
9877 {
9878 const char * const apszNameFmts[] =
9879 {
9880 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataFpuLdSt-%s.bin.gz" : NULL,
9881 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataFpuLdSt-%s.bin.gz" : NULL,
9882 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataFpuLdSt-%s.bin.gz" : NULL,
9883 };
9884 RTEXITCODE rcExit = FpuLdConstGenerate(cTests, apszNameFmts);
9885 if (rcExit == RTEXITCODE_SUCCESS)
9886 rcExit = FpuLdIntGenerate(cTests, apszNameFmts);
9887 if (rcExit == RTEXITCODE_SUCCESS)
9888 rcExit = FpuLdD80Generate(cTests, apszNameFmts);
9889 if (rcExit == RTEXITCODE_SUCCESS)
9890 rcExit = FpuStIntGenerate(cTests, apszNameFmts);
9891 if (rcExit == RTEXITCODE_SUCCESS)
9892 rcExit = FpuStD80Generate(cTests, apszNameFmts);
9893 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
9894 if (rcExit == RTEXITCODE_SUCCESS)
9895 rcExit = FpuLdMemGenerate(cTests2, apszNameFmts);
9896 if (rcExit == RTEXITCODE_SUCCESS)
9897 rcExit = FpuStMemGenerate(cTests2, apszNameFmts);
9898 if (rcExit != RTEXITCODE_SUCCESS)
9899 return rcExit;
9900 }
9901
9902 if (fFpuBinary1)
9903 {
9904 const char * const apszNameFmts[] =
9905 {
9906 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataFpuBinary1-%s.bin.gz" : NULL,
9907 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataFpuBinary1-%s.bin.gz" : NULL,
9908 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataFpuBinary1-%s.bin.gz" : NULL,
9909 };
9910 RTEXITCODE rcExit = FpuBinaryR80Generate(cTests, apszNameFmts);
9911 if (rcExit == RTEXITCODE_SUCCESS)
9912 rcExit = FpuBinaryFswR80Generate(cTests, apszNameFmts);
9913 if (rcExit == RTEXITCODE_SUCCESS)
9914 rcExit = FpuBinaryEflR80Generate(cTests, apszNameFmts);
9915 if (rcExit != RTEXITCODE_SUCCESS)
9916 return rcExit;
9917 }
9918
9919 if (fFpuBinary2)
9920 {
9921 const char * const apszNameFmts[] =
9922 {
9923 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataFpuBinary2-%s.bin.gz" : NULL,
9924 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataFpuBinary2-%s.bin.gz" : NULL,
9925 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataFpuBinary2-%s.bin.gz" : NULL,
9926 };
9927 RTEXITCODE rcExit = FpuBinaryR64Generate(cTests, apszNameFmts);
9928 if (rcExit == RTEXITCODE_SUCCESS)
9929 rcExit = FpuBinaryR32Generate(cTests, apszNameFmts);
9930 if (rcExit == RTEXITCODE_SUCCESS)
9931 rcExit = FpuBinaryI32Generate(cTests, apszNameFmts);
9932 if (rcExit == RTEXITCODE_SUCCESS)
9933 rcExit = FpuBinaryI16Generate(cTests, apszNameFmts);
9934 if (rcExit == RTEXITCODE_SUCCESS)
9935 rcExit = FpuBinaryFswR64Generate(cTests, apszNameFmts);
9936 if (rcExit == RTEXITCODE_SUCCESS)
9937 rcExit = FpuBinaryFswR32Generate(cTests, apszNameFmts);
9938 if (rcExit == RTEXITCODE_SUCCESS)
9939 rcExit = FpuBinaryFswI32Generate(cTests, apszNameFmts);
9940 if (rcExit == RTEXITCODE_SUCCESS)
9941 rcExit = FpuBinaryFswI16Generate(cTests, apszNameFmts);
9942 if (rcExit != RTEXITCODE_SUCCESS)
9943 return rcExit;
9944 }
9945
9946 if (fFpuOther)
9947 {
9948 const char * const apszNameFmts[] =
9949 {
9950 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataFpuOther-%s.bin.gz" : NULL,
9951 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataFpuOther-%s.bin.gz" : NULL,
9952 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataFpuOther-%s.bin.gz" : NULL,
9953 };
9954 RTEXITCODE rcExit = FpuUnaryR80Generate(cTests, apszNameFmts);
9955 if (rcExit == RTEXITCODE_SUCCESS)
9956 rcExit = FpuUnaryFswR80Generate(cTests, apszNameFmts);
9957 if (rcExit == RTEXITCODE_SUCCESS)
9958 rcExit = FpuUnaryTwoR80Generate(cTests, apszNameFmts);
9959 if (rcExit != RTEXITCODE_SUCCESS)
9960 return rcExit;
9961 }
9962
9963 if (fSseFpBinary)
9964 {
9965 const char * const pszDataFileFmt = fCommonData ? "tstIEMAImplDataSseBinary-%s.bin.gz" : NULL;
9966
9967 RTEXITCODE rcExit = SseBinaryR32Generate(pszDataFileFmt, cTests);
9968 if (rcExit == RTEXITCODE_SUCCESS)
9969 rcExit = SseBinaryR64Generate(pszDataFileFmt, cTests);
9970 if (rcExit == RTEXITCODE_SUCCESS)
9971 rcExit = SseBinaryU128R32Generate(pszDataFileFmt, cTests);
9972 if (rcExit == RTEXITCODE_SUCCESS)
9973 rcExit = SseBinaryU128R64Generate(pszDataFileFmt, cTests);
9974
9975 if (rcExit == RTEXITCODE_SUCCESS)
9976 rcExit = SseBinaryI32R64Generate(pszDataFileFmt, cTests);
9977 if (rcExit == RTEXITCODE_SUCCESS)
9978 rcExit = SseBinaryI64R64Generate(pszDataFileFmt, cTests);
9979 if (rcExit == RTEXITCODE_SUCCESS)
9980 rcExit = SseBinaryI32R32Generate(pszDataFileFmt, cTests);
9981 if (rcExit == RTEXITCODE_SUCCESS)
9982 rcExit = SseBinaryI64R32Generate(pszDataFileFmt, cTests);
9983
9984 if (rcExit == RTEXITCODE_SUCCESS)
9985 rcExit = SseBinaryR64I32Generate(pszDataFileFmt, cTests);
9986 if (rcExit == RTEXITCODE_SUCCESS)
9987 rcExit = SseBinaryR64I64Generate(pszDataFileFmt, cTests);
9988 if (rcExit == RTEXITCODE_SUCCESS)
9989 rcExit = SseBinaryR32I32Generate(pszDataFileFmt, cTests);
9990 if (rcExit == RTEXITCODE_SUCCESS)
9991 rcExit = SseBinaryR32I64Generate(pszDataFileFmt, cTests);
9992 if (rcExit != RTEXITCODE_SUCCESS)
9993 return rcExit;
9994 }
9995
9996 if (fSseFpOther)
9997 {
9998 const char * const pszDataFileFmtCmp = fCommonData ? "tstIEMAImplDataSseCompare-%s.bin.gz" : NULL;
9999 const char * const pszDataFileFmtConv = fCommonData ? "tstIEMAImplDataSseConvert-%s.bin.gz" : NULL;
10000
10001 RTEXITCODE rcExit = SseCompareEflR32R32Generate(pszDataFileFmtCmp, cTests);
10002 if (rcExit == RTEXITCODE_SUCCESS)
10003 rcExit = SseCompareEflR64R64Generate(pszDataFileFmtCmp, cTests);
10004 if (rcExit == RTEXITCODE_SUCCESS)
10005 rcExit = SseCompareF2XmmR32Imm8Generate(pszDataFileFmtCmp, cTests);
10006 if (rcExit == RTEXITCODE_SUCCESS)
10007 rcExit = SseCompareF2XmmR64Imm8Generate(pszDataFileFmtCmp, cTests);
10008 if (rcExit == RTEXITCODE_SUCCESS)
10009 rcExit = SseConvertXmmI32R32Generate(pszDataFileFmtConv, cTests);
10010 if (rcExit == RTEXITCODE_SUCCESS)
10011 rcExit = SseConvertXmmR32I32Generate(pszDataFileFmtConv, cTests);
10012 if (rcExit == RTEXITCODE_SUCCESS)
10013 rcExit = SseConvertXmmI32R64Generate(pszDataFileFmtConv, cTests);
10014 if (rcExit == RTEXITCODE_SUCCESS)
10015 rcExit = SseConvertXmmR64I32Generate(pszDataFileFmtConv, cTests);
10016 if (rcExit == RTEXITCODE_SUCCESS)
10017 rcExit = SseConvertMmXmmGenerate(pszDataFileFmtConv, cTests);
10018 if (rcExit == RTEXITCODE_SUCCESS)
10019 rcExit = SseConvertXmmR32MmGenerate(pszDataFileFmtConv, cTests);
10020 if (rcExit == RTEXITCODE_SUCCESS)
10021 rcExit = SseConvertXmmR64MmGenerate(pszDataFileFmtConv, cTests);
10022 if (rcExit == RTEXITCODE_SUCCESS)
10023 rcExit = SseConvertMmI32XmmR32Generate(pszDataFileFmtConv, cTests);
10024 if (rcExit != RTEXITCODE_SUCCESS)
10025 return rcExit;
10026 }
10027
10028 if (fSsePcmpxstrx)
10029 {
10030 const char * const pszDataFileFmtCmp = fCommonData ? "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz" : NULL;
10031
10032 RTEXITCODE rcExit = SseComparePcmpistriGenerate(pszDataFileFmtCmp, cTests);
10033 if (rcExit == RTEXITCODE_SUCCESS)
10034 rcExit = SseComparePcmpistrmGenerate(pszDataFileFmtCmp, cTests);
10035 if (rcExit == RTEXITCODE_SUCCESS)
10036 rcExit = SseComparePcmpestriGenerate(pszDataFileFmtCmp, cTests);
10037 if (rcExit == RTEXITCODE_SUCCESS)
10038 rcExit = SseComparePcmpestrmGenerate(pszDataFileFmtCmp, cTests);
10039 if (rcExit != RTEXITCODE_SUCCESS)
10040 return rcExit;
10041 }
10042
10043 return RTEXITCODE_SUCCESS;
10044#else
10045 return RTMsgErrorExitFailure("Test data generator not compiled in!");
10046#endif
10047 }
10048
10049 /*
10050 * Dump tables.
10051 */
10052 if (enmMode == kModeDump)
10053 {
10054#ifdef TSTIEMAIMPL_WITH_GENERATOR
10055 if (fInt)
10056 {
10057 const char * const apszNameFmts[] =
10058 {
10059 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataInt-%s.bin.gz" : NULL,
10060 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataInt-%s-Intel.bin.gz" : NULL,
10061 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataInt-%s-Amd.bin.gz" : NULL,
10062 };
10063 RTEXITCODE rcExit = BinU8DumpAll(apszNameFmts);
10064 if (rcExit == RTEXITCODE_SUCCESS)
10065 rcExit = BinU16DumpAll(apszNameFmts);
10066 if (rcExit == RTEXITCODE_SUCCESS)
10067 rcExit = BinU32DumpAll(apszNameFmts);
10068 if (rcExit == RTEXITCODE_SUCCESS)
10069 rcExit = BinU64DumpAll(apszNameFmts);
10070 if (rcExit == RTEXITCODE_SUCCESS)
10071 rcExit = ShiftDblDumpAll(apszNameFmts);
10072 if (rcExit == RTEXITCODE_SUCCESS)
10073 rcExit = UnaryDumpAll(apszNameFmts);
10074 if (rcExit == RTEXITCODE_SUCCESS)
10075 rcExit = ShiftDumpAll(apszNameFmts);
10076 if (rcExit == RTEXITCODE_SUCCESS)
10077 rcExit = MulDivDumpAll(apszNameFmts);
10078 if (rcExit != RTEXITCODE_SUCCESS)
10079 return rcExit;
10080 }
10081
10082 if (fFpuLdSt)
10083 {
10084 const char * const apszNameFmts[] =
10085 {
10086 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataFpuLdSt-%s.bin.gz" : NULL,
10087 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataFpuLdSt-%s.bin.gz" : NULL,
10088 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataFpuLdSt-%s.bin.gz" : NULL,
10089 };
10090 RTEXITCODE rcExit = FpuLdConstDumpAll(apszNameFmts);
10091 if (rcExit == RTEXITCODE_SUCCESS)
10092 rcExit = FpuLdIntDumpAll(apszNameFmts);
10093 if (rcExit == RTEXITCODE_SUCCESS)
10094 rcExit = FpuLdD80DumpAll(apszNameFmts);
10095 if (rcExit == RTEXITCODE_SUCCESS)
10096 rcExit = FpuStIntDumpAll(apszNameFmts);
10097 if (rcExit == RTEXITCODE_SUCCESS)
10098 rcExit = FpuStD80DumpAll(apszNameFmts);
10099 if (rcExit == RTEXITCODE_SUCCESS)
10100 rcExit = FpuLdMemDumpAll(apszNameFmts);
10101 if (rcExit == RTEXITCODE_SUCCESS)
10102 rcExit = FpuStMemDumpAll(apszNameFmts);
10103 if (rcExit != RTEXITCODE_SUCCESS)
10104 return rcExit;
10105 }
10106
10107 if (fFpuBinary1)
10108 {
10109 const char * const apszNameFmts[] =
10110 {
10111 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataFpuBinary1-%s.bin.gz" : NULL,
10112 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataFpuBinary1-%s.bin.gz" : NULL,
10113 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataFpuBinary1-%s.bin.gz" : NULL,
10114 };
10115 RTEXITCODE rcExit = FpuBinaryR80DumpAll(apszNameFmts);
10116 if (rcExit == RTEXITCODE_SUCCESS)
10117 rcExit = FpuBinaryFswR80DumpAll(apszNameFmts);
10118 if (rcExit == RTEXITCODE_SUCCESS)
10119 rcExit = FpuBinaryEflR80DumpAll(apszNameFmts);
10120 if (rcExit != RTEXITCODE_SUCCESS)
10121 return rcExit;
10122 }
10123
10124 if (fFpuBinary2)
10125 {
10126 const char * const apszNameFmts[] =
10127 {
10128 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataFpuBinary2-%s.bin.gz" : NULL,
10129 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataFpuBinary2-%s.bin.gz" : NULL,
10130 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataFpuBinary2-%s.bin.gz" : NULL,
10131 };
10132 RTEXITCODE rcExit = FpuBinaryR64DumpAll(apszNameFmts);
10133 if (rcExit == RTEXITCODE_SUCCESS)
10134 rcExit = FpuBinaryR32DumpAll(apszNameFmts);
10135 if (rcExit == RTEXITCODE_SUCCESS)
10136 rcExit = FpuBinaryI32DumpAll(apszNameFmts);
10137 if (rcExit == RTEXITCODE_SUCCESS)
10138 rcExit = FpuBinaryI16DumpAll(apszNameFmts);
10139 if (rcExit == RTEXITCODE_SUCCESS)
10140 rcExit = FpuBinaryFswR64DumpAll(apszNameFmts);
10141 if (rcExit == RTEXITCODE_SUCCESS)
10142 rcExit = FpuBinaryFswR32DumpAll(apszNameFmts);
10143 if (rcExit == RTEXITCODE_SUCCESS)
10144 rcExit = FpuBinaryFswI32DumpAll(apszNameFmts);
10145 if (rcExit == RTEXITCODE_SUCCESS)
10146 rcExit = FpuBinaryFswI16DumpAll(apszNameFmts);
10147 if (rcExit != RTEXITCODE_SUCCESS)
10148 return rcExit;
10149 }
10150
10151 if (fFpuOther)
10152 {
10153 const char * const apszNameFmts[] =
10154 {
10155 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? "tstIEMAImplDataFpuOther-%s.bin.gz" : NULL,
10156 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? "tstIEMAImplDataFpuOther-%s.bin.gz" : NULL,
10157 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? "tstIEMAImplDataFpuOther-%s.bin.gz" : NULL,
10158 };
10159 RTEXITCODE rcExit = FpuUnaryR80DumpAll(apszNameFmts);
10160 if (rcExit == RTEXITCODE_SUCCESS)
10161 rcExit = FpuUnaryFswR80DumpAll(apszNameFmts);
10162 if (rcExit == RTEXITCODE_SUCCESS)
10163 rcExit = FpuUnaryTwoR80DumpAll(apszNameFmts);
10164 if (rcExit != RTEXITCODE_SUCCESS)
10165 return rcExit;
10166 }
10167
10168 return RTEXITCODE_SUCCESS;
10169#else
10170 return RTMsgErrorExitFailure("Test data generator not compiled in!");
10171#endif
10172 }
10173
10174
10175 /*
10176 * Do testing. Currrently disabled by default as data needs to be checked
10177 * on both intel and AMD systems first.
10178 */
10179 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
10180 AssertRCReturn(rc, RTEXITCODE_FAILURE);
10181 if (enmMode == kModeTest)
10182 {
10183 RTTestBanner(g_hTest);
10184
10185 /* Allocate guarded memory for use in the tests. */
10186#define ALLOC_GUARDED_VAR(a_puVar) do { \
10187 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
10188 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
10189 } while (0)
10190 ALLOC_GUARDED_VAR(g_pu8);
10191 ALLOC_GUARDED_VAR(g_pu16);
10192 ALLOC_GUARDED_VAR(g_pu32);
10193 ALLOC_GUARDED_VAR(g_pu64);
10194 ALLOC_GUARDED_VAR(g_pu128);
10195 ALLOC_GUARDED_VAR(g_pu8Two);
10196 ALLOC_GUARDED_VAR(g_pu16Two);
10197 ALLOC_GUARDED_VAR(g_pu32Two);
10198 ALLOC_GUARDED_VAR(g_pu64Two);
10199 ALLOC_GUARDED_VAR(g_pu128Two);
10200 ALLOC_GUARDED_VAR(g_pfEfl);
10201 if (RTTestErrorCount(g_hTest) == 0)
10202 {
10203 if (fInt)
10204 {
10205 BinU8Test();
10206 BinU16Test();
10207 BinU32Test();
10208 BinU64Test();
10209 XchgTest();
10210 XaddTest();
10211 CmpXchgTest();
10212 CmpXchg8bTest();
10213 CmpXchg16bTest();
10214 ShiftDblTest();
10215 UnaryTest();
10216 ShiftTest();
10217 MulDivTest();
10218 BswapTest();
10219 }
10220
10221 if (fFpuLdSt)
10222 {
10223 FpuLoadConstTest();
10224 FpuLdMemTest();
10225 FpuLdIntTest();
10226 FpuLdD80Test();
10227 FpuStMemTest();
10228 FpuStIntTest();
10229 FpuStD80Test();
10230 }
10231
10232 if (fFpuBinary1)
10233 {
10234 FpuBinaryR80Test();
10235 FpuBinaryFswR80Test();
10236 FpuBinaryEflR80Test();
10237 }
10238
10239 if (fFpuBinary2)
10240 {
10241 FpuBinaryR64Test();
10242 FpuBinaryR32Test();
10243 FpuBinaryI32Test();
10244 FpuBinaryI16Test();
10245 FpuBinaryFswR64Test();
10246 FpuBinaryFswR32Test();
10247 FpuBinaryFswI32Test();
10248 FpuBinaryFswI16Test();
10249 }
10250
10251 if (fFpuOther)
10252 {
10253 FpuUnaryR80Test();
10254 FpuUnaryFswR80Test();
10255 FpuUnaryTwoR80Test();
10256 }
10257
10258 if (fSseFpBinary)
10259 {
10260 SseBinaryR32Test();
10261 SseBinaryR64Test();
10262 SseBinaryU128R32Test();
10263 SseBinaryU128R64Test();
10264
10265 SseBinaryI32R64Test();
10266 SseBinaryI64R64Test();
10267 SseBinaryI32R32Test();
10268 SseBinaryI64R32Test();
10269
10270 SseBinaryR64I32Test();
10271 SseBinaryR64I64Test();
10272 SseBinaryR32I32Test();
10273 SseBinaryR32I64Test();
10274 }
10275
10276 if (fSseFpOther)
10277 {
10278 SseCompareEflR32R32Test();
10279 SseCompareEflR64R64Test();
10280 SseCompareEflR64R64Test();
10281 SseCompareF2XmmR32Imm8Test();
10282 SseCompareF2XmmR64Imm8Test();
10283 SseConvertXmmI32R32Test();
10284 SseConvertXmmR32I32Test();
10285 SseConvertXmmI32R64Test();
10286 SseConvertXmmR64I32Test();
10287 SseConvertMmXmmTest();
10288 SseConvertXmmR32MmTest();
10289 SseConvertXmmR64MmTest();
10290 SseConvertMmI32XmmR32Test();
10291 }
10292
10293 if (fSsePcmpxstrx)
10294 {
10295 SseComparePcmpistriTest();
10296 SseComparePcmpistrmTest();
10297 SseComparePcmpestriTest();
10298 SseComparePcmpestrmTest();
10299 }
10300 }
10301 return RTTestSummaryAndDestroy(g_hTest);
10302 }
10303 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
10304}
10305
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette