VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 102781

Last change on this file since 102781 was 102551, checked in by vboxsync, 14 months ago

tstIEMAImpl: Added the ability to test specific fixed test inputs in addition to the generated ones. (Thought I had some buggy AImpl code, but problem was with the caller.) bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 462.8 KB
Line 
1/* $Id: tstIEMAImpl.cpp 102551 2023-12-08 16:40:43Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include "../include/IEMInternal.h"
33
34#include <iprt/errcore.h>
35#include <VBox/log.h>
36#include <iprt/assert.h>
37#include <iprt/ctype.h>
38#include <iprt/getopt.h>
39#include <iprt/initterm.h>
40#include <iprt/message.h>
41#include <iprt/mp.h>
42#include <iprt/rand.h>
43#include <iprt/stream.h>
44#include <iprt/string.h>
45#include <iprt/test.h>
46#include <VBox/version.h>
47
48#include "tstIEMAImpl.h"
49
50
51/*********************************************************************************************************************************
52* Defined Constants And Macros *
53*********************************************************************************************************************************/
54#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
55#define ENTRY_EX(a_Name, a_uExtra) \
56 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
57 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
58 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
59
60#define ENTRY_FIX(a_Name) ENTRY_FIX_EX(a_Name, 0)
61#ifdef TSTIEMAIMPL_WITH_GENERATOR
62# define ENTRY_FIX_EX(a_Name, a_uExtra) \
63 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
64 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
65 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, \
66 RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
67#else
68# define ENTRY_FIX_EX(a_Name, a_uExtra) ENTRY_EX(a_Name, a_uExtra)
69#endif
70
71#define ENTRY_PFN_CAST(a_Name, a_pfnType) ENTRY_PFN_CAST_EX(a_Name, a_pfnType, 0)
72#define ENTRY_PFN_CAST_EX(a_Name, a_pfnType, a_uExtra) \
73 { RT_XSTR(a_Name), (a_pfnType)iemAImpl_ ## a_Name, NULL, \
74 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
75 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
76
77#define ENTRY_BIN(a_Name) ENTRY_EX_BIN(a_Name, 0)
78#define ENTRY_EX_BIN(a_Name, a_uExtra) \
79 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
80 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
81 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
82
83#define ENTRY_BIN_AVX(a_Name) ENTRY_BIN_AVX_EX(a_Name, 0)
84#ifndef IEM_WITHOUT_ASSEMBLY
85# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
86 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
87 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
88 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
89#else
90# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
91 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
92 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
93 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
94#endif
95
96#define ENTRY_BIN_SSE_OPT(a_Name) ENTRY_BIN_SSE_OPT_EX(a_Name, 0)
97#ifndef IEM_WITHOUT_ASSEMBLY
98# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
99 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
100 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
101 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
102#else
103# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
104 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
105 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
106 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
107#endif
108
109
110#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
111#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
112 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
113 g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
114 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
115
116#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
117#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
118 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
119 g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
120 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
121
122#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
123 typedef struct a_TypeName \
124 { \
125 const char *pszName; \
126 a_FunctionPtrType pfn; \
127 a_FunctionPtrType pfnNative; \
128 a_TestType const *paTests; \
129 uint32_t const *pcTests; \
130 uint32_t uExtra; \
131 uint8_t idxCpuEflFlavour; \
132 uint16_t cFixedTests; \
133 a_TestType const *paFixedTests; \
134 } a_TypeName
135
136#define COUNT_VARIATIONS(a_SubTest) \
137 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
138
139
140/*********************************************************************************************************************************
141* Global Variables *
142*********************************************************************************************************************************/
143static RTTEST g_hTest;
144static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
145#ifdef TSTIEMAIMPL_WITH_GENERATOR
146static uint32_t g_cZeroDstTests = 2;
147static uint32_t g_cZeroSrcTests = 4;
148#endif
149static uint8_t *g_pu8, *g_pu8Two;
150static uint16_t *g_pu16, *g_pu16Two;
151static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
152static uint64_t *g_pu64, *g_pu64Two;
153static RTUINT128U *g_pu128, *g_pu128Two;
154
155static char g_aszBuf[32][256];
156static unsigned g_idxBuf = 0;
157
158static uint32_t g_cIncludeTestPatterns;
159static uint32_t g_cExcludeTestPatterns;
160static const char *g_apszIncludeTestPatterns[64];
161static const char *g_apszExcludeTestPatterns[64];
162
163static unsigned g_cVerbosity = 0;
164
165
166/*********************************************************************************************************************************
167* Internal Functions *
168*********************************************************************************************************************************/
169static const char *FormatR80(PCRTFLOAT80U pr80);
170static const char *FormatR64(PCRTFLOAT64U pr64);
171static const char *FormatR32(PCRTFLOAT32U pr32);
172
173
174/*
175 * Random helpers.
176 */
177
178static uint32_t RandEFlags(void)
179{
180 uint32_t fEfl = RTRandU32();
181 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
182}
183
184#ifdef TSTIEMAIMPL_WITH_GENERATOR
185
186static uint8_t RandU8(void)
187{
188 return RTRandU32Ex(0, 0xff);
189}
190
191
192static uint16_t RandU16(void)
193{
194 return RTRandU32Ex(0, 0xffff);
195}
196
197
198static uint32_t RandU32(void)
199{
200 return RTRandU32();
201}
202
203#endif
204
205static uint64_t RandU64(void)
206{
207 return RTRandU64();
208}
209
210
211static RTUINT128U RandU128(void)
212{
213 RTUINT128U Ret;
214 Ret.s.Hi = RTRandU64();
215 Ret.s.Lo = RTRandU64();
216 return Ret;
217}
218
219#ifdef TSTIEMAIMPL_WITH_GENERATOR
220
221static uint8_t RandU8Dst(uint32_t iTest)
222{
223 if (iTest < g_cZeroDstTests)
224 return 0;
225 return RandU8();
226}
227
228
229static uint8_t RandU8Src(uint32_t iTest)
230{
231 if (iTest < g_cZeroSrcTests)
232 return 0;
233 return RandU8();
234}
235
236
237static uint16_t RandU16Dst(uint32_t iTest)
238{
239 if (iTest < g_cZeroDstTests)
240 return 0;
241 return RandU16();
242}
243
244
245static uint16_t RandU16Src(uint32_t iTest)
246{
247 if (iTest < g_cZeroSrcTests)
248 return 0;
249 return RandU16();
250}
251
252
253static uint32_t RandU32Dst(uint32_t iTest)
254{
255 if (iTest < g_cZeroDstTests)
256 return 0;
257 return RandU32();
258}
259
260
261static uint32_t RandU32Src(uint32_t iTest)
262{
263 if (iTest < g_cZeroSrcTests)
264 return 0;
265 return RandU32();
266}
267
268
269static uint64_t RandU64Dst(uint32_t iTest)
270{
271 if (iTest < g_cZeroDstTests)
272 return 0;
273 return RandU64();
274}
275
276
277static uint64_t RandU64Src(uint32_t iTest)
278{
279 if (iTest < g_cZeroSrcTests)
280 return 0;
281 return RandU64();
282}
283
284
285/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
286static int16_t RandI16Src2(uint32_t iTest)
287{
288 if (iTest < 18 * 4)
289 switch (iTest % 4)
290 {
291 case 0: return 0;
292 case 1: return INT16_MAX;
293 case 2: return INT16_MIN;
294 case 3: break;
295 }
296 return (int16_t)RandU16();
297}
298
299
300/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
301static int32_t RandI32Src2(uint32_t iTest)
302{
303 if (iTest < 18 * 4)
304 switch (iTest % 4)
305 {
306 case 0: return 0;
307 case 1: return INT32_MAX;
308 case 2: return INT32_MIN;
309 case 3: break;
310 }
311 return (int32_t)RandU32();
312}
313
314
315static int64_t RandI64Src(uint32_t iTest)
316{
317 RT_NOREF(iTest);
318 return (int64_t)RandU64();
319}
320
321
322static uint16_t RandFcw(void)
323{
324 return RandU16() & ~X86_FCW_ZERO_MASK;
325}
326
327
328static uint16_t RandFsw(void)
329{
330 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
331 return RandU16();
332}
333
334
335static uint32_t RandMxcsr(void)
336{
337 return RandU32() & ~X86_MXCSR_ZERO_MASK;
338}
339
340
341static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
342{
343 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
344 pr80->sj64.uFraction >>= cShift;
345 else
346 pr80->sj64.uFraction = (cShift % 19) + 1;
347}
348
349
350
351static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
352{
353 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
354
355 RTFLOAT80U r80;
356 r80.au64[0] = RandU64();
357 r80.au16[4] = RandU16();
358
359 /*
360 * Adjust the random stuff according to bType.
361 */
362 bType &= 0x1f;
363 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
364 {
365 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
366 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
367 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
368 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
369 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
370 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
371 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
372 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
373 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
374 }
375 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
376 {
377 /* Denormals (4,5) and Pseudo denormals (6,7) */
378 if (bType & 1)
379 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
380 else if (r80.sj64.uFraction == 0 && bType < 6)
381 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
382 r80.sj64.uExponent = 0;
383 r80.sj64.fInteger = bType >= 6;
384 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
385 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
386 }
387 else if (bType == 8 || bType == 9)
388 {
389 /* Pseudo NaN. */
390 if (bType & 1)
391 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
392 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
393 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
394 r80.sj64.uExponent = 0x7fff;
395 if (r80.sj64.fInteger)
396 r80.sj64.uFraction |= RT_BIT_64(62);
397 else
398 r80.sj64.uFraction &= ~RT_BIT_64(62);
399 r80.sj64.fInteger = 0;
400 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
401 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
402 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
403 }
404 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
405 {
406 /* Quiet and signalling NaNs. */
407 if (bType & 1)
408 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
409 else if (r80.sj64.uFraction == 0)
410 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
411 r80.sj64.uExponent = 0x7fff;
412 if (bType < 12)
413 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
414 else
415 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
416 r80.sj64.fInteger = 1;
417 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
418 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
419 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
420 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
421 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
422 }
423 else if (bType == 14 || bType == 15)
424 {
425 /* Unnormals */
426 if (bType & 1)
427 SafeR80FractionShift(&r80, RandU8() % 62);
428 r80.sj64.fInteger = 0;
429 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
430 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
431 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
432 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
433 }
434 else if (bType < 26)
435 {
436 /* Make sure we have lots of normalized values. */
437 if (!fIntTarget)
438 {
439 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
440 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
441 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
442 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
443 r80.sj64.fInteger = 1;
444 if (r80.sj64.uExponent <= uMinExp)
445 r80.sj64.uExponent = uMinExp + 1;
446 else if (r80.sj64.uExponent >= uMaxExp)
447 r80.sj64.uExponent = uMaxExp - 1;
448
449 if (bType == 16)
450 { /* All 1s is useful to testing rounding. Also try trigger special
451 behaviour by sometimes rounding out of range, while we're at it. */
452 r80.sj64.uFraction = RT_BIT_64(63) - 1;
453 uint8_t bExp = RandU8();
454 if ((bExp & 3) == 0)
455 r80.sj64.uExponent = uMaxExp - 1;
456 else if ((bExp & 3) == 1)
457 r80.sj64.uExponent = uMinExp + 1;
458 else if ((bExp & 3) == 2)
459 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
460 }
461 }
462 else
463 {
464 /* integer target: */
465 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
466 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
467 r80.sj64.fInteger = 1;
468 if (r80.sj64.uExponent < uMinExp)
469 r80.sj64.uExponent = uMinExp;
470 else if (r80.sj64.uExponent > uMaxExp)
471 r80.sj64.uExponent = uMaxExp;
472
473 if (bType == 16)
474 { /* All 1s is useful to testing rounding. Also try trigger special
475 behaviour by sometimes rounding out of range, while we're at it. */
476 r80.sj64.uFraction = RT_BIT_64(63) - 1;
477 uint8_t bExp = RandU8();
478 if ((bExp & 3) == 0)
479 r80.sj64.uExponent = uMaxExp;
480 else if ((bExp & 3) == 1)
481 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
482 }
483 }
484
485 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
486 }
487 return r80;
488}
489
490
491static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
492{
493 /*
494 * Make it more likely that we get a good selection of special values.
495 */
496 return RandR80Ex(RandU8(), cTarget, fIntTarget);
497
498}
499
500
501static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
502{
503 /* Make sure we cover all the basic types first before going for random selection: */
504 if (iTest <= 18)
505 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
506 return RandR80(cTarget, fIntTarget);
507}
508
509
510/**
511 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
512 * to a 0..17, covering all basic value types.
513 */
514static uint8_t RandR80Src12RemapType(uint8_t bType)
515{
516 switch (bType)
517 {
518 case 0: return 18; /* normal */
519 case 1: return 16; /* normal extreme rounding */
520 case 2: return 14; /* unnormal */
521 case 3: return 12; /* Signalling NaN */
522 case 4: return 10; /* Quiet NaN */
523 case 5: return 8; /* PseudoNaN */
524 case 6: return 6; /* Pseudo Denormal */
525 case 7: return 4; /* Denormal */
526 case 8: return 3; /* Indefinite */
527 case 9: return 2; /* Infinity */
528 case 10: return 1; /* Pseudo-Infinity */
529 case 11: return 0; /* Zero */
530 default: AssertFailedReturn(18);
531 }
532}
533
534
535/**
536 * This works in tandem with RandR80Src2 to make sure we cover all operand
537 * type mixes first before we venture into regular random testing.
538 *
539 * There are 11 basic variations, when we leave out the five odd ones using
540 * SafeR80FractionShift. Because of the special normalized value targetting at
541 * rounding, we make it an even 12. So 144 combinations for two operands.
542 */
543static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
544{
545 if (cPartnerBits == 80)
546 {
547 Assert(!fPartnerInt);
548 if (iTest < 12 * 12)
549 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
550 }
551 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
552 {
553 if (iTest < 12 * 10)
554 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
555 }
556 else if (iTest < 18 * 4 && fPartnerInt)
557 return RandR80Ex(iTest / 4);
558 return RandR80();
559}
560
561
562/** Partner to RandR80Src1. */
563static RTFLOAT80U RandR80Src2(uint32_t iTest)
564{
565 if (iTest < 12 * 12)
566 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
567 return RandR80();
568}
569
570
571static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
572{
573 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
574 pr64->s64.uFraction >>= cShift;
575 else
576 pr64->s64.uFraction = (cShift % 19) + 1;
577}
578
579
580static RTFLOAT64U RandR64Ex(uint8_t bType)
581{
582 RTFLOAT64U r64;
583 r64.u = RandU64();
584
585 /*
586 * Make it more likely that we get a good selection of special values.
587 * On average 6 out of 16 calls should return a special value.
588 */
589 bType &= 0xf;
590 if (bType == 0 || bType == 1)
591 {
592 /* 0 or Infinity. We only keep fSign here. */
593 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
594 r64.s.uFractionHigh = 0;
595 r64.s.uFractionLow = 0;
596 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
597 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
598 }
599 else if (bType == 2 || bType == 3)
600 {
601 /* Subnormals */
602 if (bType == 3)
603 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
604 else if (r64.s64.uFraction == 0)
605 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
606 r64.s64.uExponent = 0;
607 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
608 }
609 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
610 {
611 /* NaNs */
612 if (bType & 1)
613 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
614 else if (r64.s64.uFraction == 0)
615 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
616 r64.s64.uExponent = 0x7ff;
617 if (bType < 6)
618 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
619 else
620 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
621 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
622 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
623 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
624 }
625 else if (bType < 12)
626 {
627 /* Make sure we have lots of normalized values. */
628 if (r64.s.uExponent == 0)
629 r64.s.uExponent = 1;
630 else if (r64.s.uExponent == 0x7ff)
631 r64.s.uExponent = 0x7fe;
632 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
633 }
634 return r64;
635}
636
637
638static RTFLOAT64U RandR64Src(uint32_t iTest)
639{
640 if (iTest < 16)
641 return RandR64Ex(iTest);
642 return RandR64Ex(RandU8());
643}
644
645
646/** Pairing with a 80-bit floating point arg. */
647static RTFLOAT64U RandR64Src2(uint32_t iTest)
648{
649 if (iTest < 12 * 10)
650 return RandR64Ex(9 - iTest % 10); /* start with normal values */
651 return RandR64Ex(RandU8());
652}
653
654
655static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
656{
657 if (pr32->s.uFraction >= RT_BIT_32(cShift))
658 pr32->s.uFraction >>= cShift;
659 else
660 pr32->s.uFraction = (cShift % 19) + 1;
661}
662
663
664static RTFLOAT32U RandR32Ex(uint8_t bType)
665{
666 RTFLOAT32U r32;
667 r32.u = RandU32();
668
669 /*
670 * Make it more likely that we get a good selection of special values.
671 * On average 6 out of 16 calls should return a special value.
672 */
673 bType &= 0xf;
674 if (bType == 0 || bType == 1)
675 {
676 /* 0 or Infinity. We only keep fSign here. */
677 r32.s.uExponent = bType == 0 ? 0 : 0xff;
678 r32.s.uFraction = 0;
679 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
680 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
681 }
682 else if (bType == 2 || bType == 3)
683 {
684 /* Subnormals */
685 if (bType == 3)
686 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
687 else if (r32.s.uFraction == 0)
688 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
689 r32.s.uExponent = 0;
690 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
691 }
692 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
693 {
694 /* NaNs */
695 if (bType & 1)
696 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
697 else if (r32.s.uFraction == 0)
698 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
699 r32.s.uExponent = 0xff;
700 if (bType < 6)
701 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
702 else
703 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
704 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
705 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
706 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
707 }
708 else if (bType < 12)
709 {
710 /* Make sure we have lots of normalized values. */
711 if (r32.s.uExponent == 0)
712 r32.s.uExponent = 1;
713 else if (r32.s.uExponent == 0xff)
714 r32.s.uExponent = 0xfe;
715 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
716 }
717 return r32;
718}
719
720
721static RTFLOAT32U RandR32Src(uint32_t iTest)
722{
723 if (iTest < 16)
724 return RandR32Ex(iTest);
725 return RandR32Ex(RandU8());
726}
727
728
729/** Pairing with a 80-bit floating point arg. */
730static RTFLOAT32U RandR32Src2(uint32_t iTest)
731{
732 if (iTest < 12 * 10)
733 return RandR32Ex(9 - iTest % 10); /* start with normal values */
734 return RandR32Ex(RandU8());
735}
736
737
738static RTPBCD80U RandD80Src(uint32_t iTest)
739{
740 if (iTest < 3)
741 {
742 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
743 return d80Zero;
744 }
745 if (iTest < 5)
746 {
747 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
748 return d80Ind;
749 }
750
751 RTPBCD80U d80;
752 uint8_t b = RandU8();
753 d80.s.fSign = b & 1;
754
755 if ((iTest & 7) >= 6)
756 {
757 /* Illegal */
758 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
759 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
760 d80.s.abPairs[iPair] = RandU8();
761 }
762 else
763 {
764 /* Normal */
765 d80.s.uPad = 0;
766 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
767 {
768 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
769 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
770 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
771 }
772 }
773 return d80;
774}
775
776
777static const char *GenFormatR80(PCRTFLOAT80U plrd)
778{
779 if (RTFLOAT80U_IS_ZERO(plrd))
780 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
781 if (RTFLOAT80U_IS_INF(plrd))
782 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
783 if (RTFLOAT80U_IS_INDEFINITE(plrd))
784 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
785 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
786 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
787 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
788 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
789
790 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
791 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
792 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
793 return pszBuf;
794}
795
796static const char *GenFormatR64(PCRTFLOAT64U prd)
797{
798 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
799 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
800 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
801 return pszBuf;
802}
803
804
805static const char *GenFormatR32(PCRTFLOAT32U pr)
806{
807 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
808 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
809 return pszBuf;
810}
811
812
813static const char *GenFormatD80(PCRTPBCD80U pd80)
814{
815 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
816 size_t off;
817 if (pd80->s.uPad == 0)
818 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
819 else
820 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
821 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
822 while (iPair-- > 0)
823 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
824 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
825 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
826 pszBuf[off++] = ')';
827 pszBuf[off++] = '\0';
828 return pszBuf;
829}
830
831
832static const char *GenFormatI64(int64_t i64)
833{
834 if (i64 == INT64_MIN) /* This one is problematic */
835 return "INT64_MIN";
836 if (i64 == INT64_MAX)
837 return "INT64_MAX";
838 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
839 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
840 return pszBuf;
841}
842
843#if 0 /* unused */
844static const char *GenFormatI64(int64_t const *pi64)
845{
846 return GenFormatI64(*pi64);
847}
848#endif
849
850static const char *GenFormatI32(int32_t i32)
851{
852 if (i32 == INT32_MIN) /* This one is problematic */
853 return "INT32_MIN";
854 if (i32 == INT32_MAX)
855 return "INT32_MAX";
856 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
857 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
858 return pszBuf;
859}
860
861
862const char *GenFormatI32(int32_t const *pi32)
863{
864 return GenFormatI32(*pi32);
865}
866
867
868const char *GenFormatI16(int16_t i16)
869{
870 if (i16 == INT16_MIN) /* This one is problematic */
871 return "INT16_MIN";
872 if (i16 == INT16_MAX)
873 return "INT16_MAX";
874 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
875 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
876 return pszBuf;
877}
878
879
880const char *GenFormatI16(int16_t const *pi16)
881{
882 return GenFormatI16(*pi16);
883}
884
885
886static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
887{
888 /* We want to tag the generated source code with the revision that produced it. */
889 static char s_szRev[] = "$Revision: 102551 $";
890 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
891 size_t cchRev = 0;
892 while (RT_C_IS_DIGIT(pszRev[cchRev]))
893 cchRev++;
894
895 RTStrmPrintf(pOut,
896 "/* $Id: tstIEMAImpl.cpp 102551 2023-12-08 16:40:43Z vboxsync $ */\n"
897 "/** @file\n"
898 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
899 " */\n"
900 "\n"
901 "/*\n"
902 " * Copyright (C) 2022-" VBOX_C_YEAR " Oracle and/or its affiliates.\n"
903 " *\n"
904 " * This file is part of VirtualBox base platform packages, as\n"
905 " * available from https://www.virtualbox.org.\n"
906 " *\n"
907 " * This program is free software; you can redistribute it and/or\n"
908 " * modify it under the terms of the GNU General Public License\n"
909 " * as published by the Free Software Foundation, in version 3 of the\n"
910 " * License.\n"
911 " *\n"
912 " * This program is distributed in the hope that it will be useful, but\n"
913 " * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
914 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
915 " * General Public License for more details.\n"
916 " *\n"
917 " * You should have received a copy of the GNU General Public License\n"
918 " * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
919 " *\n"
920 " * SPDX-License-Identifier: GPL-3.0-only\n"
921 " */\n"
922 "\n"
923 "#include \"tstIEMAImpl.h\"\n"
924 "\n"
925 ,
926 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
927}
928
929
930static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
931{
932 PRTSTREAM pOut = NULL;
933 int rc = RTStrmOpen(pszFilename, "w", &pOut);
934 if (RT_SUCCESS(rc))
935 {
936 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
937 return pOut;
938 }
939 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
940 return NULL;
941}
942
943
944static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
945{
946 RTStrmPrintf(pOut,
947 "\n"
948 "/* end of file */\n");
949 int rc = RTStrmClose(pOut);
950 if (RT_SUCCESS(rc))
951 return rcExit;
952 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
953}
954
955
956static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
957{
958 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
959}
960
961
962static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
963{
964 RTStrmPrintf(pOut,
965 "};\n"
966 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
967 "\n",
968 pszName, pszName);
969}
970
971#endif /* TSTIEMAIMPL_WITH_GENERATOR */
972
973
974/*
975 * Test helpers.
976 */
977static bool IsTestEnabled(const char *pszName)
978{
979 /* Process excludes first: */
980 uint32_t i = g_cExcludeTestPatterns;
981 while (i-- > 0)
982 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
983 return false;
984
985 /* If no include patterns, everything is included: */
986 i = g_cIncludeTestPatterns;
987 if (!i)
988 return true;
989
990 /* Otherwise only tests in the include patters gets tested: */
991 while (i-- > 0)
992 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
993 return true;
994
995 return false;
996}
997
998
999static bool SubTestAndCheckIfEnabled(const char *pszName)
1000{
1001 RTTestSub(g_hTest, pszName);
1002 if (IsTestEnabled(pszName))
1003 return true;
1004 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
1005 return false;
1006}
1007
1008
1009static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
1010{
1011 if (fActual == fExpected)
1012 return "";
1013
1014 uint32_t const fXor = fActual ^ fExpected;
1015 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1016 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1017
1018 static struct
1019 {
1020 const char *pszName;
1021 uint32_t fFlag;
1022 } const s_aFlags[] =
1023 {
1024#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
1025 EFL_ENTRY(CF),
1026 EFL_ENTRY(PF),
1027 EFL_ENTRY(AF),
1028 EFL_ENTRY(ZF),
1029 EFL_ENTRY(SF),
1030 EFL_ENTRY(TF),
1031 EFL_ENTRY(IF),
1032 EFL_ENTRY(DF),
1033 EFL_ENTRY(OF),
1034 EFL_ENTRY(IOPL),
1035 EFL_ENTRY(NT),
1036 EFL_ENTRY(RF),
1037 EFL_ENTRY(VM),
1038 EFL_ENTRY(AC),
1039 EFL_ENTRY(VIF),
1040 EFL_ENTRY(VIP),
1041 EFL_ENTRY(ID),
1042 };
1043 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1044 if (s_aFlags[i].fFlag & fXor)
1045 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1046 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1047 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1048 return pszBuf;
1049}
1050
1051
1052static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
1053{
1054 if (fActual == fExpected)
1055 return "";
1056
1057 uint16_t const fXor = fActual ^ fExpected;
1058 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1059 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1060
1061 static struct
1062 {
1063 const char *pszName;
1064 uint32_t fFlag;
1065 } const s_aFlags[] =
1066 {
1067#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1068 FSW_ENTRY(IE),
1069 FSW_ENTRY(DE),
1070 FSW_ENTRY(ZE),
1071 FSW_ENTRY(OE),
1072 FSW_ENTRY(UE),
1073 FSW_ENTRY(PE),
1074 FSW_ENTRY(SF),
1075 FSW_ENTRY(ES),
1076 FSW_ENTRY(C0),
1077 FSW_ENTRY(C1),
1078 FSW_ENTRY(C2),
1079 FSW_ENTRY(C3),
1080 FSW_ENTRY(B),
1081 };
1082 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1083 if (s_aFlags[i].fFlag & fXor)
1084 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1085 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1086 if (fXor & X86_FSW_TOP_MASK)
1087 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1088 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1089#if 0 /* For debugging fprem & fprem1 */
1090 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1091 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1092#endif
1093 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1094 return pszBuf;
1095}
1096
1097
1098static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1099{
1100 if (fActual == fExpected)
1101 return "";
1102
1103 uint16_t const fXor = fActual ^ fExpected;
1104 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1105 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1106
1107 static struct
1108 {
1109 const char *pszName;
1110 uint32_t fFlag;
1111 } const s_aFlags[] =
1112 {
1113#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1114 MXCSR_ENTRY(IE),
1115 MXCSR_ENTRY(DE),
1116 MXCSR_ENTRY(ZE),
1117 MXCSR_ENTRY(OE),
1118 MXCSR_ENTRY(UE),
1119 MXCSR_ENTRY(PE),
1120
1121 MXCSR_ENTRY(IM),
1122 MXCSR_ENTRY(DM),
1123 MXCSR_ENTRY(ZM),
1124 MXCSR_ENTRY(OM),
1125 MXCSR_ENTRY(UM),
1126 MXCSR_ENTRY(PM),
1127
1128 MXCSR_ENTRY(DAZ),
1129 MXCSR_ENTRY(FZ),
1130#undef MXCSR_ENTRY
1131 };
1132 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1133 if (s_aFlags[i].fFlag & fXor)
1134 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1135 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1136 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1137 return pszBuf;
1138}
1139
1140
1141static const char *FormatFcw(uint16_t fFcw)
1142{
1143 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1144
1145 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1146 switch (fFcw & X86_FCW_PC_MASK)
1147 {
1148 case X86_FCW_PC_24: pszPC = "PC24"; break;
1149 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1150 case X86_FCW_PC_53: pszPC = "PC53"; break;
1151 case X86_FCW_PC_64: pszPC = "PC64"; break;
1152 }
1153
1154 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1155 switch (fFcw & X86_FCW_RC_MASK)
1156 {
1157 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1158 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1159 case X86_FCW_RC_UP: pszRC = "UP"; break;
1160 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1161 }
1162 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1163
1164 static struct
1165 {
1166 const char *pszName;
1167 uint32_t fFlag;
1168 } const s_aFlags[] =
1169 {
1170#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1171 FCW_ENTRY(IM),
1172 FCW_ENTRY(DM),
1173 FCW_ENTRY(ZM),
1174 FCW_ENTRY(OM),
1175 FCW_ENTRY(UM),
1176 FCW_ENTRY(PM),
1177 { "6M", 64 },
1178 };
1179 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1180 if (fFcw & s_aFlags[i].fFlag)
1181 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1182
1183 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1184 return pszBuf;
1185}
1186
1187
1188static const char *FormatMxcsr(uint32_t fMxcsr)
1189{
1190 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1191
1192 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1193 switch (fMxcsr & X86_MXCSR_RC_MASK)
1194 {
1195 case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1196 case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1197 case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1198 case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1199 }
1200
1201 const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1202 const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1203 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1204
1205 static struct
1206 {
1207 const char *pszName;
1208 uint32_t fFlag;
1209 } const s_aFlags[] =
1210 {
1211#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1212 MXCSR_ENTRY(IE),
1213 MXCSR_ENTRY(DE),
1214 MXCSR_ENTRY(ZE),
1215 MXCSR_ENTRY(OE),
1216 MXCSR_ENTRY(UE),
1217 MXCSR_ENTRY(PE),
1218
1219 MXCSR_ENTRY(IM),
1220 MXCSR_ENTRY(DM),
1221 MXCSR_ENTRY(ZM),
1222 MXCSR_ENTRY(OM),
1223 MXCSR_ENTRY(UM),
1224 MXCSR_ENTRY(PM),
1225 { "6M", 64 },
1226 };
1227 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1228 if (fMxcsr & s_aFlags[i].fFlag)
1229 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1230
1231 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1232 return pszBuf;
1233}
1234
1235
1236static const char *FormatR80(PCRTFLOAT80U pr80)
1237{
1238 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1239 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1240 return pszBuf;
1241}
1242
1243
1244static const char *FormatR64(PCRTFLOAT64U pr64)
1245{
1246 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1247 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1248 return pszBuf;
1249}
1250
1251
1252static const char *FormatR32(PCRTFLOAT32U pr32)
1253{
1254 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1255 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1256 return pszBuf;
1257}
1258
1259
1260static const char *FormatD80(PCRTPBCD80U pd80)
1261{
1262 /* There is only one indefinite endcoding (same as for 80-bit
1263 floating point), so get it out of the way first: */
1264 if (RTPBCD80U_IS_INDEFINITE(pd80))
1265 return "Ind";
1266
1267 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1268 size_t off = 0;
1269 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1270 unsigned cBadDigits = 0;
1271 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1272 while (iPair-- > 0)
1273 {
1274 static const char s_szDigits[] = "0123456789abcdef";
1275 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1276 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1277 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1278 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1279 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1280 }
1281 if (cBadDigits || pd80->s.uPad != 0)
1282 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1283 pszBuf[off] = '\0';
1284 return pszBuf;
1285}
1286
1287
1288#if 0
1289static const char *FormatI64(int64_t const *piVal)
1290{
1291 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1292 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1293 return pszBuf;
1294}
1295#endif
1296
1297
1298static const char *FormatI32(int32_t const *piVal)
1299{
1300 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1301 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1302 return pszBuf;
1303}
1304
1305
1306static const char *FormatI16(int16_t const *piVal)
1307{
1308 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1309 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1310 return pszBuf;
1311}
1312
1313
1314static const char *FormatU128(PCRTUINT128U puVal)
1315{
1316 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1317 RTStrFormatU128(pszBuf, sizeof(g_aszBuf[0]), puVal, 16, 0, 0, RTSTR_F_SPECIAL);
1318 return pszBuf;
1319}
1320
1321
1322/*
1323 * Binary operations.
1324 */
1325TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1326TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1327TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1328TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1329
1330#ifdef TSTIEMAIMPL_WITH_GENERATOR
1331# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1332static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1333{ \
1334 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1335 { \
1336 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1337 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1338 PRTSTREAM pOutFn = pOut; \
1339 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1340 { \
1341 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1342 continue; \
1343 pOutFn = pOutCpu; \
1344 } \
1345 \
1346 GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1347 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1348 { \
1349 a_TestType Test; \
1350 Test.fEflIn = RandEFlags(); \
1351 Test.fEflOut = Test.fEflIn; \
1352 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1353 Test.uDstOut = Test.uDstIn; \
1354 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1355 if (g_aBinU ## a_cBits[iFn].uExtra) \
1356 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1357 Test.uMisc = 0; \
1358 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1359 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1360 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1361 } \
1362 for (uint32_t iTest = 0; iTest < g_aBinU ## a_cBits[iFn].cFixedTests; iTest++ ) \
1363 { \
1364 a_TestType Test; \
1365 Test.fEflIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
1366 : g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn; \
1367 Test.fEflOut = Test.fEflIn; \
1368 Test.uDstIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uDstIn; \
1369 Test.uDstOut = Test.uDstIn; \
1370 Test.uSrcIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uSrcIn; \
1371 Test.uMisc = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uMisc; \
1372 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1373 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* fixed #%u */\n", \
1374 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1375 } \
1376 GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1377 } \
1378}
1379#else
1380# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1381#endif
1382
1383#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1384GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1385\
1386static void BinU ## a_cBits ## Test(void) \
1387{ \
1388 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1389 { \
1390 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1391 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1392 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1393 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1394 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1395 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1396 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1397 { \
1398 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1399 { \
1400 uint32_t fEfl = paTests[iTest].fEflIn; \
1401 a_uType uDst = paTests[iTest].uDstIn; \
1402 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1403 if ( uDst != paTests[iTest].uDstOut \
1404 || fEfl != paTests[iTest].fEflOut) \
1405 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1406 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1407 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1408 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1409 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1410 else \
1411 { \
1412 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1413 *g_pfEfl = paTests[iTest].fEflIn; \
1414 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1415 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1416 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1417 } \
1418 } \
1419 pfn = a_aSubTests[iFn].pfnNative; \
1420 } \
1421 } \
1422}
1423
1424
1425/*
1426 * 8-bit binary operations.
1427 */
1428static const BINU8_T g_aBinU8[] =
1429{
1430 ENTRY(add_u8),
1431 ENTRY(add_u8_locked),
1432 ENTRY(adc_u8),
1433 ENTRY(adc_u8_locked),
1434 ENTRY(sub_u8),
1435 ENTRY(sub_u8_locked),
1436 ENTRY(sbb_u8),
1437 ENTRY(sbb_u8_locked),
1438 ENTRY(or_u8),
1439 ENTRY(or_u8_locked),
1440 ENTRY(xor_u8),
1441 ENTRY(xor_u8_locked),
1442 ENTRY(and_u8),
1443 ENTRY(and_u8_locked),
1444 ENTRY_PFN_CAST(cmp_u8, PFNIEMAIMPLBINU8),
1445 ENTRY_PFN_CAST(test_u8, PFNIEMAIMPLBINU8),
1446};
1447TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1448
1449
1450/*
1451 * 16-bit binary operations.
1452 */
1453#ifdef TSTIEMAIMPL_WITH_GENERATOR
1454static const BINU16_TEST_T g_aFixedTests_add_u16[] =
1455{
1456 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1457 { UINT32_MAX, 0, 1, 0, UINT16_MAX, 0 },
1458};
1459#endif
1460static const BINU16_T g_aBinU16[] =
1461{
1462 ENTRY_FIX(add_u16),
1463 ENTRY(add_u16_locked),
1464 ENTRY(adc_u16),
1465 ENTRY(adc_u16_locked),
1466 ENTRY(sub_u16),
1467 ENTRY(sub_u16_locked),
1468 ENTRY(sbb_u16),
1469 ENTRY(sbb_u16_locked),
1470 ENTRY(or_u16),
1471 ENTRY(or_u16_locked),
1472 ENTRY(xor_u16),
1473 ENTRY(xor_u16_locked),
1474 ENTRY(and_u16),
1475 ENTRY(and_u16_locked),
1476 ENTRY_PFN_CAST(cmp_u16, PFNIEMAIMPLBINU16),
1477 ENTRY_PFN_CAST(test_u16, PFNIEMAIMPLBINU16),
1478 ENTRY_PFN_CAST_EX(bt_u16, PFNIEMAIMPLBINU16, 1),
1479 ENTRY_EX(btc_u16, 1),
1480 ENTRY_EX(btc_u16_locked, 1),
1481 ENTRY_EX(btr_u16, 1),
1482 ENTRY_EX(btr_u16_locked, 1),
1483 ENTRY_EX(bts_u16, 1),
1484 ENTRY_EX(bts_u16_locked, 1),
1485 ENTRY_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1486 ENTRY_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1487 ENTRY_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1488 ENTRY_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1489 ENTRY_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1490 ENTRY_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1491 ENTRY(arpl),
1492};
1493TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1494
1495
1496/*
1497 * 32-bit binary operations.
1498 */
1499#ifdef TSTIEMAIMPL_WITH_GENERATOR
1500static const BINU32_TEST_T g_aFixedTests_add_u32[] =
1501{
1502 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1503 { UINT32_MAX, 0, 1, 0, UINT32_MAX, 0 },
1504};
1505#endif
1506static const BINU32_T g_aBinU32[] =
1507{
1508 ENTRY_FIX(add_u32),
1509 ENTRY(add_u32_locked),
1510 ENTRY(adc_u32),
1511 ENTRY(adc_u32_locked),
1512 ENTRY(sub_u32),
1513 ENTRY(sub_u32_locked),
1514 ENTRY(sbb_u32),
1515 ENTRY(sbb_u32_locked),
1516 ENTRY(or_u32),
1517 ENTRY(or_u32_locked),
1518 ENTRY(xor_u32),
1519 ENTRY(xor_u32_locked),
1520 ENTRY(and_u32),
1521 ENTRY(and_u32_locked),
1522 ENTRY_PFN_CAST(cmp_u32, PFNIEMAIMPLBINU32),
1523 ENTRY_PFN_CAST(test_u32, PFNIEMAIMPLBINU32),
1524 ENTRY_PFN_CAST_EX(bt_u32, PFNIEMAIMPLBINU32, 1),
1525 ENTRY_EX(btc_u32, 1),
1526 ENTRY_EX(btc_u32_locked, 1),
1527 ENTRY_EX(btr_u32, 1),
1528 ENTRY_EX(btr_u32_locked, 1),
1529 ENTRY_EX(bts_u32, 1),
1530 ENTRY_EX(bts_u32_locked, 1),
1531 ENTRY_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1532 ENTRY_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1533 ENTRY_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1534 ENTRY_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1535 ENTRY_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1536 ENTRY_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1537};
1538TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1539
1540
1541/*
1542 * 64-bit binary operations.
1543 */
1544#ifdef TSTIEMAIMPL_WITH_GENERATOR
1545static const BINU64_TEST_T g_aFixedTests_add_u64[] =
1546{
1547 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1548 { UINT32_MAX, 0, 1, 0, UINT64_MAX, 0 },
1549};
1550#endif
1551static const BINU64_T g_aBinU64[] =
1552{
1553 ENTRY_FIX(add_u64),
1554 ENTRY(add_u64_locked),
1555 ENTRY(adc_u64),
1556 ENTRY(adc_u64_locked),
1557 ENTRY(sub_u64),
1558 ENTRY(sub_u64_locked),
1559 ENTRY(sbb_u64),
1560 ENTRY(sbb_u64_locked),
1561 ENTRY(or_u64),
1562 ENTRY(or_u64_locked),
1563 ENTRY(xor_u64),
1564 ENTRY(xor_u64_locked),
1565 ENTRY(and_u64),
1566 ENTRY(and_u64_locked),
1567 ENTRY_PFN_CAST(cmp_u64, PFNIEMAIMPLBINU64),
1568 ENTRY_PFN_CAST(test_u64, PFNIEMAIMPLBINU64),
1569 ENTRY_PFN_CAST_EX(bt_u64, PFNIEMAIMPLBINU64, 1),
1570 ENTRY_EX(btc_u64, 1),
1571 ENTRY_EX(btc_u64_locked, 1),
1572 ENTRY_EX(btr_u64, 1),
1573 ENTRY_EX(btr_u64_locked, 1),
1574 ENTRY_EX(bts_u64, 1),
1575 ENTRY_EX(bts_u64_locked, 1),
1576 ENTRY_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1577 ENTRY_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1578 ENTRY_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1579 ENTRY_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1580 ENTRY_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1581 ENTRY_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1582};
1583TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1584
1585
1586/*
1587 * XCHG
1588 */
1589static void XchgTest(void)
1590{
1591 if (!SubTestAndCheckIfEnabled("xchg"))
1592 return;
1593 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1594 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1595 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1596 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1597
1598 static struct
1599 {
1600 uint8_t cb; uint64_t fMask;
1601 union
1602 {
1603 uintptr_t pfn;
1604 FNIEMAIMPLXCHGU8 *pfnU8;
1605 FNIEMAIMPLXCHGU16 *pfnU16;
1606 FNIEMAIMPLXCHGU32 *pfnU32;
1607 FNIEMAIMPLXCHGU64 *pfnU64;
1608 } u;
1609 }
1610 s_aXchgWorkers[] =
1611 {
1612 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1613 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1614 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1615 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1616 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1617 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1618 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1619 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1620 };
1621 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1622 {
1623 RTUINT64U uIn1, uIn2, uMem, uDst;
1624 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1625 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1626 if (uIn1.u == uIn2.u)
1627 uDst.u = uIn2.u = ~uIn2.u;
1628
1629 switch (s_aXchgWorkers[i].cb)
1630 {
1631 case 1:
1632 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1633 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1634 break;
1635 case 2:
1636 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1637 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1638 break;
1639 case 4:
1640 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1641 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1642 break;
1643 case 8:
1644 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1645 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1646 break;
1647 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1648 }
1649
1650 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1651 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1652 }
1653}
1654
1655
1656/*
1657 * XADD
1658 */
1659static void XaddTest(void)
1660{
1661#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1662 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1663 static struct \
1664 { \
1665 const char *pszName; \
1666 FNIEMAIMPLXADDU ## a_cBits *pfn; \
1667 BINU ## a_cBits ## _TEST_T const *paTests; \
1668 uint32_t const *pcTests; \
1669 } const s_aFuncs[] = \
1670 { \
1671 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1672 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1673 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1674 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1675 }; \
1676 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1677 { \
1678 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1679 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1680 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1681 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1682 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1683 { \
1684 uint32_t fEfl = paTests[iTest].fEflIn; \
1685 a_Type uSrc = paTests[iTest].uSrcIn; \
1686 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1687 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1688 if ( fEfl != paTests[iTest].fEflOut \
1689 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
1690 || uSrc != paTests[iTest].uDstIn) \
1691 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1692 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1693 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1694 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1695 } \
1696 } \
1697 } while(0)
1698 TEST_XADD(8, uint8_t, "%#04x");
1699 TEST_XADD(16, uint16_t, "%#06x");
1700 TEST_XADD(32, uint32_t, "%#010RX32");
1701 TEST_XADD(64, uint64_t, "%#010RX64");
1702}
1703
1704
1705/*
1706 * CMPXCHG
1707 */
1708
1709static void CmpXchgTest(void)
1710{
1711#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1712 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
1713 static struct \
1714 { \
1715 const char *pszName; \
1716 FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1717 PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1718 BINU ## a_cBits ## _TEST_T const *paTests; \
1719 uint32_t const *pcTests; \
1720 } const s_aFuncs[] = \
1721 { \
1722 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1723 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1724 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1725 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1726 }; \
1727 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1728 { \
1729 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1730 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1731 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1732 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1733 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1734 { \
1735 /* as is (99% likely to be negative). */ \
1736 uint32_t fEfl = paTests[iTest].fEflIn; \
1737 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1738 a_Type uA = paTests[iTest].uDstIn; \
1739 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1740 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1741 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1742 if ( fEfl != paTests[iTest].fEflOut \
1743 || *g_pu ## a_cBits != uExpect \
1744 || uA != paTests[iTest].uSrcIn) \
1745 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1746 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1747 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1748 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1749 /* positive */ \
1750 uint32_t fEflExpect = paTests[iTest].fEflIn; \
1751 uA = paTests[iTest].uDstIn; \
1752 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1753 fEfl = paTests[iTest].fEflIn; \
1754 uA = paTests[iTest].uDstIn; \
1755 *g_pu ## a_cBits = uA; \
1756 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1757 if ( fEfl != fEflExpect \
1758 || *g_pu ## a_cBits != uNew \
1759 || uA != paTests[iTest].uDstIn) \
1760 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1761 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1762 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1763 EFlagsDiff(fEfl, fEflExpect)); \
1764 } \
1765 } \
1766 } while(0)
1767 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1768 TEST_CMPXCHG(16, uint16_t, "%#06x");
1769 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1770#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1771 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1772#endif
1773}
1774
1775static void CmpXchg8bTest(void)
1776{
1777 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
1778 static struct
1779 {
1780 const char *pszName;
1781 FNIEMAIMPLCMPXCHG8B *pfn;
1782 } const s_aFuncs[] =
1783 {
1784 { "cmpxchg8b", iemAImpl_cmpxchg8b },
1785 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1786 };
1787 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1788 {
1789 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1790 continue;
1791 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1792 {
1793 uint64_t const uOldValue = RandU64();
1794 uint64_t const uNewValue = RandU64();
1795
1796 /* positive test. */
1797 RTUINT64U uA, uB;
1798 uB.u = uNewValue;
1799 uA.u = uOldValue;
1800 *g_pu64 = uOldValue;
1801 uint32_t fEflIn = RandEFlags();
1802 uint32_t fEfl = fEflIn;
1803 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1804 if ( fEfl != (fEflIn | X86_EFL_ZF)
1805 || *g_pu64 != uNewValue
1806 || uA.u != uOldValue)
1807 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1808 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1809 fEfl, *g_pu64, uA.u,
1810 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1811 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1812
1813 /* negative */
1814 uint64_t const uExpect = ~uOldValue;
1815 *g_pu64 = uExpect;
1816 uA.u = uOldValue;
1817 uB.u = uNewValue;
1818 fEfl = fEflIn = RandEFlags();
1819 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1820 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1821 || *g_pu64 != uExpect
1822 || uA.u != uExpect)
1823 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1824 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1825 fEfl, *g_pu64, uA.u,
1826 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1827 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1828 }
1829 }
1830}
1831
1832static void CmpXchg16bTest(void)
1833{
1834 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1835 static struct
1836 {
1837 const char *pszName;
1838 FNIEMAIMPLCMPXCHG16B *pfn;
1839 } const s_aFuncs[] =
1840 {
1841 { "cmpxchg16b", iemAImpl_cmpxchg16b },
1842 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1843#if !defined(RT_ARCH_ARM64)
1844 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1845#endif
1846 };
1847 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1848 {
1849 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1850 continue;
1851#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1852 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1853 {
1854 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1855 continue;
1856 }
1857#endif
1858 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1859 {
1860 RTUINT128U const uOldValue = RandU128();
1861 RTUINT128U const uNewValue = RandU128();
1862
1863 /* positive test. */
1864 RTUINT128U uA, uB;
1865 uB = uNewValue;
1866 uA = uOldValue;
1867 *g_pu128 = uOldValue;
1868 uint32_t fEflIn = RandEFlags();
1869 uint32_t fEfl = fEflIn;
1870 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1871 if ( fEfl != (fEflIn | X86_EFL_ZF)
1872 || g_pu128->s.Lo != uNewValue.s.Lo
1873 || g_pu128->s.Hi != uNewValue.s.Hi
1874 || uA.s.Lo != uOldValue.s.Lo
1875 || uA.s.Hi != uOldValue.s.Hi)
1876 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1877 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1878 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1879 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1880 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1881 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1882 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1883 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1884
1885 /* negative */
1886 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1887 *g_pu128 = uExpect;
1888 uA = uOldValue;
1889 uB = uNewValue;
1890 fEfl = fEflIn = RandEFlags();
1891 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1892 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1893 || g_pu128->s.Lo != uExpect.s.Lo
1894 || g_pu128->s.Hi != uExpect.s.Hi
1895 || uA.s.Lo != uExpect.s.Lo
1896 || uA.s.Hi != uExpect.s.Hi)
1897 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1898 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1899 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1900 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1901 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1902 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1903 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1904 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1905 }
1906 }
1907}
1908
1909
1910/*
1911 * Double shifts.
1912 *
1913 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1914 */
1915#ifdef TSTIEMAIMPL_WITH_GENERATOR
1916# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1917static void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1918{ \
1919 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1920 { \
1921 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1922 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1923 continue; \
1924 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1925 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1926 { \
1927 a_TestType Test; \
1928 Test.fEflIn = RandEFlags(); \
1929 Test.fEflOut = Test.fEflIn; \
1930 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1931 Test.uDstOut = Test.uDstIn; \
1932 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1933 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1934 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1935 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1936 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1937 } \
1938 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1939 } \
1940}
1941#else
1942# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1943#endif
1944
1945#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1946TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1947\
1948static a_SubTestType const a_aSubTests[] = \
1949{ \
1950 ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1951 ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1952 ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1953 ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1954}; \
1955\
1956GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1957\
1958static void ShiftDblU ## a_cBits ## Test(void) \
1959{ \
1960 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1961 { \
1962 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1963 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1964 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1965 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1966 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1967 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1968 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1969 { \
1970 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1971 { \
1972 uint32_t fEfl = paTests[iTest].fEflIn; \
1973 a_Type uDst = paTests[iTest].uDstIn; \
1974 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1975 if ( uDst != paTests[iTest].uDstOut \
1976 || fEfl != paTests[iTest].fEflOut) \
1977 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1978 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1979 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1980 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1981 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1982 else \
1983 { \
1984 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1985 *g_pfEfl = paTests[iTest].fEflIn; \
1986 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1987 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1988 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1989 } \
1990 } \
1991 pfn = a_aSubTests[iFn].pfnNative; \
1992 } \
1993 } \
1994}
1995TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1996TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1997TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1998
1999#ifdef TSTIEMAIMPL_WITH_GENERATOR
2000static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
2001{
2002 ShiftDblU16Generate(pOut, cTests);
2003 ShiftDblU32Generate(pOut, cTests);
2004 ShiftDblU64Generate(pOut, cTests);
2005}
2006#endif
2007
2008static void ShiftDblTest(void)
2009{
2010 ShiftDblU16Test();
2011 ShiftDblU32Test();
2012 ShiftDblU64Test();
2013}
2014
2015
2016/*
2017 * Unary operators.
2018 *
2019 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
2020 */
2021#ifdef TSTIEMAIMPL_WITH_GENERATOR
2022# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2023static void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2024{ \
2025 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2026 { \
2027 GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
2028 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2029 { \
2030 a_TestType Test; \
2031 Test.fEflIn = RandEFlags(); \
2032 Test.fEflOut = Test.fEflIn; \
2033 Test.uDstIn = RandU ## a_cBits(); \
2034 Test.uDstOut = Test.uDstIn; \
2035 Test.uSrcIn = 0; \
2036 Test.uMisc = 0; \
2037 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
2038 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
2039 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
2040 } \
2041 GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
2042 } \
2043}
2044#else
2045# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
2046#endif
2047
2048#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2049TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
2050static a_SubTestType const g_aUnaryU ## a_cBits [] = \
2051{ \
2052 ENTRY(inc_u ## a_cBits), \
2053 ENTRY(inc_u ## a_cBits ## _locked), \
2054 ENTRY(dec_u ## a_cBits), \
2055 ENTRY(dec_u ## a_cBits ## _locked), \
2056 ENTRY(not_u ## a_cBits), \
2057 ENTRY(not_u ## a_cBits ## _locked), \
2058 ENTRY(neg_u ## a_cBits), \
2059 ENTRY(neg_u ## a_cBits ## _locked), \
2060}; \
2061\
2062GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2063\
2064static void UnaryU ## a_cBits ## Test(void) \
2065{ \
2066 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2067 { \
2068 if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
2069 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
2070 uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
2071 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2072 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2073 { \
2074 uint32_t fEfl = paTests[iTest].fEflIn; \
2075 a_Type uDst = paTests[iTest].uDstIn; \
2076 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
2077 if ( uDst != paTests[iTest].uDstOut \
2078 || fEfl != paTests[iTest].fEflOut) \
2079 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2080 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
2081 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2082 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2083 else \
2084 { \
2085 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2086 *g_pfEfl = paTests[iTest].fEflIn; \
2087 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
2088 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2089 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2090 } \
2091 } \
2092 } \
2093}
2094TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
2095TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
2096TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
2097TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
2098
2099#ifdef TSTIEMAIMPL_WITH_GENERATOR
2100static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
2101{
2102 UnaryU8Generate(pOut, cTests);
2103 UnaryU16Generate(pOut, cTests);
2104 UnaryU32Generate(pOut, cTests);
2105 UnaryU64Generate(pOut, cTests);
2106}
2107#endif
2108
2109static void UnaryTest(void)
2110{
2111 UnaryU8Test();
2112 UnaryU16Test();
2113 UnaryU32Test();
2114 UnaryU64Test();
2115}
2116
2117
2118/*
2119 * Shifts.
2120 *
2121 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2122 */
2123#ifdef TSTIEMAIMPL_WITH_GENERATOR
2124# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2125static void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2126{ \
2127 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2128 { \
2129 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2130 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2131 continue; \
2132 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2133 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2134 { \
2135 a_TestType Test; \
2136 Test.fEflIn = RandEFlags(); \
2137 Test.fEflOut = Test.fEflIn; \
2138 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2139 Test.uDstOut = Test.uDstIn; \
2140 Test.uSrcIn = 0; \
2141 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2142 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2143 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
2144 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2145 \
2146 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
2147 Test.fEflOut = Test.fEflIn; \
2148 Test.uDstOut = Test.uDstIn; \
2149 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2150 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
2151 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2152 } \
2153 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2154 } \
2155}
2156#else
2157# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2158#endif
2159
2160#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2161TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2162static a_SubTestType const a_aSubTests[] = \
2163{ \
2164 ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2165 ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2166 ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2167 ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2168 ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2169 ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2170 ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2171 ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2172 ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2173 ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2174 ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2175 ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2176 ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2177 ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2178}; \
2179\
2180GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2181\
2182static void ShiftU ## a_cBits ## Test(void) \
2183{ \
2184 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2185 { \
2186 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2187 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2188 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2189 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2190 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2191 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2192 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2193 { \
2194 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2195 { \
2196 uint32_t fEfl = paTests[iTest].fEflIn; \
2197 a_Type uDst = paTests[iTest].uDstIn; \
2198 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2199 if ( uDst != paTests[iTest].uDstOut \
2200 || fEfl != paTests[iTest].fEflOut ) \
2201 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2202 iTest, iVar == 0 ? "" : "/n", \
2203 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2204 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2205 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2206 else \
2207 { \
2208 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2209 *g_pfEfl = paTests[iTest].fEflIn; \
2210 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2211 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2212 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2213 } \
2214 } \
2215 pfn = a_aSubTests[iFn].pfnNative; \
2216 } \
2217 } \
2218}
2219TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2220TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2221TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2222TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2223
2224#ifdef TSTIEMAIMPL_WITH_GENERATOR
2225static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2226{
2227 ShiftU8Generate(pOut, cTests);
2228 ShiftU16Generate(pOut, cTests);
2229 ShiftU32Generate(pOut, cTests);
2230 ShiftU64Generate(pOut, cTests);
2231}
2232#endif
2233
2234static void ShiftTest(void)
2235{
2236 ShiftU8Test();
2237 ShiftU16Test();
2238 ShiftU32Test();
2239 ShiftU64Test();
2240}
2241
2242
2243/*
2244 * Multiplication and division.
2245 *
2246 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2247 * Note! Currently ignoring undefined bits.
2248 */
2249
2250/* U8 */
2251TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2252static INT_MULDIV_U8_T const g_aMulDivU8[] =
2253{
2254 ENTRY_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2255 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2256 ENTRY_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2257 ENTRY_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2258 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2259 ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2260 ENTRY_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2261 ENTRY_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2262 ENTRY_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2263 ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2264};
2265
2266#ifdef TSTIEMAIMPL_WITH_GENERATOR
2267static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2268{
2269 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2270 {
2271 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2272 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2273 continue;
2274 GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2275 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2276 {
2277 MULDIVU8_TEST_T Test;
2278 Test.fEflIn = RandEFlags();
2279 Test.fEflOut = Test.fEflIn;
2280 Test.uDstIn = RandU16Dst(iTest);
2281 Test.uDstOut = Test.uDstIn;
2282 Test.uSrcIn = RandU8Src(iTest);
2283 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2284 RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2285 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2286 }
2287 GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2288 }
2289}
2290#endif
2291
2292static void MulDivU8Test(void)
2293{
2294 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2295 {
2296 if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2297 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2298 uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2299 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2300 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2301 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2302 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2303 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2304 {
2305 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2306 {
2307 uint32_t fEfl = paTests[iTest].fEflIn;
2308 uint16_t uDst = paTests[iTest].uDstIn;
2309 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2310 if ( uDst != paTests[iTest].uDstOut
2311 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2312 || rc != paTests[iTest].rc)
2313 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2314 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2315 "%sexpected %#08x %#06RX16 %d%s\n",
2316 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2317 iVar ? " " : "", fEfl, uDst, rc,
2318 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2319 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2320 else
2321 {
2322 *g_pu16 = paTests[iTest].uDstIn;
2323 *g_pfEfl = paTests[iTest].fEflIn;
2324 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2325 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2326 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2327 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2328 }
2329 }
2330 pfn = g_aMulDivU8[iFn].pfnNative;
2331 }
2332 }
2333}
2334
2335#ifdef TSTIEMAIMPL_WITH_GENERATOR
2336# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2337void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2338{ \
2339 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2340 { \
2341 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2342 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2343 continue; \
2344 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2345 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2346 { \
2347 a_TestType Test; \
2348 Test.fEflIn = RandEFlags(); \
2349 Test.fEflOut = Test.fEflIn; \
2350 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2351 Test.uDst1Out = Test.uDst1In; \
2352 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2353 Test.uDst2Out = Test.uDst2In; \
2354 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2355 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2356 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2357 Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2358 Test.rc, iTest); \
2359 } \
2360 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2361 } \
2362}
2363#else
2364# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2365#endif
2366
2367#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2368TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2369static a_SubTestType const a_aSubTests [] = \
2370{ \
2371 ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2372 ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2373 ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2374 ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2375 ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2376 ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2377 ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2378 ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2379}; \
2380\
2381GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2382\
2383static void MulDivU ## a_cBits ## Test(void) \
2384{ \
2385 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2386 { \
2387 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2388 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2389 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2390 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2391 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2392 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2393 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2394 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2395 { \
2396 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2397 { \
2398 uint32_t fEfl = paTests[iTest].fEflIn; \
2399 a_Type uDst1 = paTests[iTest].uDst1In; \
2400 a_Type uDst2 = paTests[iTest].uDst2In; \
2401 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2402 if ( uDst1 != paTests[iTest].uDst1Out \
2403 || uDst2 != paTests[iTest].uDst2Out \
2404 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2405 || rc != paTests[iTest].rc) \
2406 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2407 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2408 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2409 iTest, iVar == 0 ? "" : "/n", \
2410 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2411 fEfl, uDst1, uDst2, rc, \
2412 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2413 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2414 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2415 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2416 else \
2417 { \
2418 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2419 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2420 *g_pfEfl = paTests[iTest].fEflIn; \
2421 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2422 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2423 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2424 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2425 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2426 } \
2427 } \
2428 pfn = a_aSubTests[iFn].pfnNative; \
2429 } \
2430 } \
2431}
2432TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2433TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2434TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2435
2436#ifdef TSTIEMAIMPL_WITH_GENERATOR
2437static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2438{
2439 MulDivU8Generate(pOut, cTests);
2440 MulDivU16Generate(pOut, cTests);
2441 MulDivU32Generate(pOut, cTests);
2442 MulDivU64Generate(pOut, cTests);
2443}
2444#endif
2445
2446static void MulDivTest(void)
2447{
2448 MulDivU8Test();
2449 MulDivU16Test();
2450 MulDivU32Test();
2451 MulDivU64Test();
2452}
2453
2454
2455/*
2456 * BSWAP
2457 */
2458static void BswapTest(void)
2459{
2460 if (SubTestAndCheckIfEnabled("bswap_u16"))
2461 {
2462 *g_pu32 = UINT32_C(0x12345678);
2463 iemAImpl_bswap_u16(g_pu32);
2464#if 0
2465 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2466#else
2467 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2468#endif
2469 *g_pu32 = UINT32_C(0xffff1122);
2470 iemAImpl_bswap_u16(g_pu32);
2471#if 0
2472 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2473#else
2474 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2475#endif
2476 }
2477
2478 if (SubTestAndCheckIfEnabled("bswap_u32"))
2479 {
2480 *g_pu32 = UINT32_C(0x12345678);
2481 iemAImpl_bswap_u32(g_pu32);
2482 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2483 }
2484
2485 if (SubTestAndCheckIfEnabled("bswap_u64"))
2486 {
2487 *g_pu64 = UINT64_C(0x0123456789abcdef);
2488 iemAImpl_bswap_u64(g_pu64);
2489 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2490 }
2491}
2492
2493
2494
2495/*********************************************************************************************************************************
2496* Floating point (x87 style) *
2497*********************************************************************************************************************************/
2498
2499/*
2500 * FPU constant loading.
2501 */
2502TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2503
2504static const FPU_LD_CONST_T g_aFpuLdConst[] =
2505{
2506 ENTRY(fld1),
2507 ENTRY(fldl2t),
2508 ENTRY(fldl2e),
2509 ENTRY(fldpi),
2510 ENTRY(fldlg2),
2511 ENTRY(fldln2),
2512 ENTRY(fldz),
2513};
2514
2515#ifdef TSTIEMAIMPL_WITH_GENERATOR
2516static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2517{
2518 X86FXSTATE State;
2519 RT_ZERO(State);
2520 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2521 {
2522 GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2523 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2524 {
2525 State.FCW = RandFcw();
2526 State.FSW = RandFsw();
2527
2528 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2529 {
2530 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2531 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2532 g_aFpuLdConst[iFn].pfn(&State, &Res);
2533 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2534 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2535 }
2536 }
2537 GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2538 }
2539}
2540#endif
2541
2542static void FpuLoadConstTest(void)
2543{
2544 /*
2545 * Inputs:
2546 * - FSW: C0, C1, C2, C3
2547 * - FCW: Exception masks, Precision control, Rounding control.
2548 *
2549 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2550 */
2551 X86FXSTATE State;
2552 RT_ZERO(State);
2553 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2554 {
2555 if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2556 continue;
2557
2558 uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2559 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2560 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2561 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2562 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2563 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2564 {
2565 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2566 {
2567 State.FCW = paTests[iTest].fFcw;
2568 State.FSW = paTests[iTest].fFswIn;
2569 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2570 pfn(&State, &Res);
2571 if ( Res.FSW != paTests[iTest].fFswOut
2572 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2573 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2574 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2575 Res.FSW, FormatR80(&Res.r80Result),
2576 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2577 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2578 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2579 FormatFcw(paTests[iTest].fFcw) );
2580 }
2581 pfn = g_aFpuLdConst[iFn].pfnNative;
2582 }
2583 }
2584}
2585
2586
2587/*
2588 * Load floating point values from memory.
2589 */
2590#ifdef TSTIEMAIMPL_WITH_GENERATOR
2591# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2592static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2593{ \
2594 X86FXSTATE State; \
2595 RT_ZERO(State); \
2596 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2597 { \
2598 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2599 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2600 { \
2601 State.FCW = RandFcw(); \
2602 State.FSW = RandFsw(); \
2603 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2604 \
2605 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2606 { \
2607 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2608 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2609 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2610 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2611 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2612 GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2613 } \
2614 } \
2615 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2616 } \
2617}
2618#else
2619# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2620#endif
2621
2622#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2623typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2624typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2625TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2626\
2627static const a_SubTestType a_aSubTests[] = \
2628{ \
2629 ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2630}; \
2631GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2632\
2633static void FpuLdR ## a_cBits ## Test(void) \
2634{ \
2635 X86FXSTATE State; \
2636 RT_ZERO(State); \
2637 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2638 { \
2639 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2640 \
2641 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2642 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2643 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2644 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2645 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2646 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2647 { \
2648 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2649 { \
2650 a_rdTypeIn const InVal = paTests[iTest].InVal; \
2651 State.FCW = paTests[iTest].fFcw; \
2652 State.FSW = paTests[iTest].fFswIn; \
2653 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2654 pfn(&State, &Res, &InVal); \
2655 if ( Res.FSW != paTests[iTest].fFswOut \
2656 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2657 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2658 "%s -> fsw=%#06x %s\n" \
2659 "%s expected %#06x %s%s%s (%s)\n", \
2660 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2661 FormatR ## a_cBits(&paTests[iTest].InVal), \
2662 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2663 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2664 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2665 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2666 FormatFcw(paTests[iTest].fFcw) ); \
2667 } \
2668 pfn = a_aSubTests[iFn].pfnNative; \
2669 } \
2670 } \
2671}
2672
2673TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2674TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2675TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2676
2677#ifdef TSTIEMAIMPL_WITH_GENERATOR
2678static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2679{
2680 FpuLdR80Generate(pOut, cTests);
2681 FpuLdR64Generate(pOut, cTests);
2682 FpuLdR32Generate(pOut, cTests);
2683}
2684#endif
2685
2686static void FpuLdMemTest(void)
2687{
2688 FpuLdR80Test();
2689 FpuLdR64Test();
2690 FpuLdR32Test();
2691}
2692
2693
2694/*
2695 * Load integer values from memory.
2696 */
2697#ifdef TSTIEMAIMPL_WITH_GENERATOR
2698# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2699static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2700{ \
2701 X86FXSTATE State; \
2702 RT_ZERO(State); \
2703 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2704 { \
2705 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2706 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2707 { \
2708 State.FCW = RandFcw(); \
2709 State.FSW = RandFsw(); \
2710 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2711 \
2712 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2713 { \
2714 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2715 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2716 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2717 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2718 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2719 } \
2720 } \
2721 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2722 } \
2723}
2724#else
2725# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2726#endif
2727
2728#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2729typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2730typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2731TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2732\
2733static const a_SubTestType a_aSubTests[] = \
2734{ \
2735 ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2736}; \
2737GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2738\
2739static void FpuLdI ## a_cBits ## Test(void) \
2740{ \
2741 X86FXSTATE State; \
2742 RT_ZERO(State); \
2743 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2744 { \
2745 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2746 \
2747 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2748 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2749 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2750 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2751 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2752 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2753 { \
2754 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2755 { \
2756 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2757 State.FCW = paTests[iTest].fFcw; \
2758 State.FSW = paTests[iTest].fFswIn; \
2759 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2760 pfn(&State, &Res, &iInVal); \
2761 if ( Res.FSW != paTests[iTest].fFswOut \
2762 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2763 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2764 "%s -> fsw=%#06x %s\n" \
2765 "%s expected %#06x %s%s%s (%s)\n", \
2766 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2767 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2768 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2769 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2770 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2771 FormatFcw(paTests[iTest].fFcw) ); \
2772 } \
2773 pfn = a_aSubTests[iFn].pfnNative; \
2774 } \
2775 } \
2776}
2777
2778TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2779TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2780TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2781
2782#ifdef TSTIEMAIMPL_WITH_GENERATOR
2783static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2784{
2785 FpuLdI64Generate(pOut, cTests);
2786 FpuLdI32Generate(pOut, cTests);
2787 FpuLdI16Generate(pOut, cTests);
2788}
2789#endif
2790
2791static void FpuLdIntTest(void)
2792{
2793 FpuLdI64Test();
2794 FpuLdI32Test();
2795 FpuLdI16Test();
2796}
2797
2798
2799/*
2800 * Load binary coded decimal values from memory.
2801 */
2802typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2803typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2804TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2805
2806static const FPU_LD_D80_T g_aFpuLdD80[] =
2807{
2808 ENTRY(fld_r80_from_d80)
2809};
2810
2811#ifdef TSTIEMAIMPL_WITH_GENERATOR
2812static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2813{
2814 X86FXSTATE State;
2815 RT_ZERO(State);
2816 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2817 {
2818 GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2819 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2820 {
2821 State.FCW = RandFcw();
2822 State.FSW = RandFsw();
2823 RTPBCD80U InVal = RandD80Src(iTest);
2824
2825 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2826 {
2827 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2828 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2829 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2830 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2831 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2832 iTest, iRounding);
2833 }
2834 }
2835 GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2836 }
2837}
2838#endif
2839
2840static void FpuLdD80Test(void)
2841{
2842 X86FXSTATE State;
2843 RT_ZERO(State);
2844 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2845 {
2846 if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2847 continue;
2848
2849 uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2850 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2851 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2852 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2853 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2854 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2855 {
2856 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2857 {
2858 RTPBCD80U const InVal = paTests[iTest].InVal;
2859 State.FCW = paTests[iTest].fFcw;
2860 State.FSW = paTests[iTest].fFswIn;
2861 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2862 pfn(&State, &Res, &InVal);
2863 if ( Res.FSW != paTests[iTest].fFswOut
2864 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2865 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2866 "%s -> fsw=%#06x %s\n"
2867 "%s expected %#06x %s%s%s (%s)\n",
2868 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2869 FormatD80(&paTests[iTest].InVal),
2870 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2871 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2872 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2873 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2874 FormatFcw(paTests[iTest].fFcw) );
2875 }
2876 pfn = g_aFpuLdD80[iFn].pfnNative;
2877 }
2878 }
2879}
2880
2881
2882/*
2883 * Store values floating point values to memory.
2884 */
2885#ifdef TSTIEMAIMPL_WITH_GENERATOR
2886static const RTFLOAT80U g_aFpuStR32Specials[] =
2887{
2888 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2889 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2890 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2891 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2892};
2893static const RTFLOAT80U g_aFpuStR64Specials[] =
2894{
2895 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2896 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2897 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2898 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2899 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2900};
2901static const RTFLOAT80U g_aFpuStR80Specials[] =
2902{
2903 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2904};
2905# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2906static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2907{ \
2908 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2909 X86FXSTATE State; \
2910 RT_ZERO(State); \
2911 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2912 { \
2913 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2914 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2915 { \
2916 uint16_t const fFcw = RandFcw(); \
2917 State.FSW = RandFsw(); \
2918 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2919 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2920 \
2921 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2922 { \
2923 /* PC doesn't influence these, so leave as is. */ \
2924 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2925 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2926 { \
2927 uint16_t uFswOut = 0; \
2928 a_rdType OutVal; \
2929 RT_ZERO(OutVal); \
2930 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2931 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2932 | (iRounding << X86_FCW_RC_SHIFT); \
2933 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2934 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2935 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2936 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2937 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2938 GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2939 } \
2940 } \
2941 } \
2942 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2943 } \
2944}
2945#else
2946# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2947#endif
2948
2949#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2950typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2951 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2952typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2953TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2954\
2955static const a_SubTestType a_aSubTests[] = \
2956{ \
2957 ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2958}; \
2959GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2960\
2961static void FpuStR ## a_cBits ## Test(void) \
2962{ \
2963 X86FXSTATE State; \
2964 RT_ZERO(State); \
2965 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2966 { \
2967 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2968 \
2969 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2970 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2971 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2972 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2973 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2974 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2975 { \
2976 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2977 { \
2978 RTFLOAT80U const InVal = paTests[iTest].InVal; \
2979 uint16_t uFswOut = 0; \
2980 a_rdType OutVal; \
2981 RT_ZERO(OutVal); \
2982 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2983 State.FCW = paTests[iTest].fFcw; \
2984 State.FSW = paTests[iTest].fFswIn; \
2985 pfn(&State, &uFswOut, &OutVal, &InVal); \
2986 if ( uFswOut != paTests[iTest].fFswOut \
2987 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2988 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2989 "%s -> fsw=%#06x %s\n" \
2990 "%s expected %#06x %s%s%s (%s)\n", \
2991 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2992 FormatR80(&paTests[iTest].InVal), \
2993 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2994 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2995 FswDiff(uFswOut, paTests[iTest].fFswOut), \
2996 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2997 FormatFcw(paTests[iTest].fFcw) ); \
2998 } \
2999 pfn = a_aSubTests[iFn].pfnNative; \
3000 } \
3001 } \
3002}
3003
3004TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
3005TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
3006TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
3007
3008#ifdef TSTIEMAIMPL_WITH_GENERATOR
3009static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
3010{
3011 FpuStR80Generate(pOut, cTests);
3012 FpuStR64Generate(pOut, cTests);
3013 FpuStR32Generate(pOut, cTests);
3014}
3015#endif
3016
3017static void FpuStMemTest(void)
3018{
3019 FpuStR80Test();
3020 FpuStR64Test();
3021 FpuStR32Test();
3022}
3023
3024
3025/*
3026 * Store integer values to memory or register.
3027 */
3028TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
3029TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
3030TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
3031
3032static const FPU_ST_I16_T g_aFpuStI16[] =
3033{
3034 ENTRY(fist_r80_to_i16),
3035 ENTRY_AMD( fistt_r80_to_i16, 0),
3036 ENTRY_INTEL(fistt_r80_to_i16, 0),
3037};
3038static const FPU_ST_I32_T g_aFpuStI32[] =
3039{
3040 ENTRY(fist_r80_to_i32),
3041 ENTRY(fistt_r80_to_i32),
3042};
3043static const FPU_ST_I64_T g_aFpuStI64[] =
3044{
3045 ENTRY(fist_r80_to_i64),
3046 ENTRY(fistt_r80_to_i64),
3047};
3048
3049#ifdef TSTIEMAIMPL_WITH_GENERATOR
3050static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
3051{
3052 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
3053 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
3054 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3055 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3056 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3057 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3058 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3059 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3060 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3061 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3062 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3063 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3064 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3065 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3066 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
3067 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3068 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3069 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3070 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3071 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3072 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3073 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3074 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3075 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
3076 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3077 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
3078 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
3079 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
3080 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
3081 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
3082 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
3083 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
3084 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3085 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3086 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3087 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3088 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3089 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3090 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3091 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3092 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3093 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3094 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3095 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3096 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3097 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3098 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
3099};
3100static const RTFLOAT80U g_aFpuStI32Specials[] =
3101{
3102 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3103 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3104 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3105 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3106 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3107 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3108 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3109 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3110 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3111 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3112 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3113 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3114 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3115 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3116 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3117 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3118};
3119static const RTFLOAT80U g_aFpuStI64Specials[] =
3120{
3121 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3122 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3123 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3124 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3125 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3126 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3127 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3128 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3129 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3130 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3131 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3132 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3133 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3134 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3135 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3136 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3137 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3138};
3139
3140# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3141static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
3142{ \
3143 X86FXSTATE State; \
3144 RT_ZERO(State); \
3145 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3146 { \
3147 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3148 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3149 PRTSTREAM pOutFn = pOut; \
3150 if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
3151 { \
3152 if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3153 continue; \
3154 pOutFn = pOutCpu; \
3155 } \
3156 \
3157 GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
3158 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3159 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3160 { \
3161 uint16_t const fFcw = RandFcw(); \
3162 State.FSW = RandFsw(); \
3163 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3164 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3165 \
3166 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3167 { \
3168 /* PC doesn't influence these, so leave as is. */ \
3169 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3170 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3171 { \
3172 uint16_t uFswOut = 0; \
3173 a_iType iOutVal = ~(a_iType)2; \
3174 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3175 | (iRounding << X86_FCW_RC_SHIFT); \
3176 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3177 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3178 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3179 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3180 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3181 GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
3182 } \
3183 } \
3184 } \
3185 GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
3186 } \
3187}
3188#else
3189# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3190#endif
3191
3192#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3193GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3194\
3195static void FpuStI ## a_cBits ## Test(void) \
3196{ \
3197 X86FXSTATE State; \
3198 RT_ZERO(State); \
3199 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3200 { \
3201 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3202 \
3203 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3204 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3205 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3206 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3207 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3208 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3209 { \
3210 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3211 { \
3212 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3213 uint16_t uFswOut = 0; \
3214 a_iType iOutVal = ~(a_iType)2; \
3215 State.FCW = paTests[iTest].fFcw; \
3216 State.FSW = paTests[iTest].fFswIn; \
3217 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3218 if ( uFswOut != paTests[iTest].fFswOut \
3219 || iOutVal != paTests[iTest].iOutVal) \
3220 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3221 "%s -> fsw=%#06x " a_szFmt "\n" \
3222 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3223 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3224 FormatR80(&paTests[iTest].InVal), \
3225 iVar ? " " : "", uFswOut, iOutVal, \
3226 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3227 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3228 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3229 } \
3230 pfn = a_aSubTests[iFn].pfnNative; \
3231 } \
3232 } \
3233}
3234
3235//fistt_r80_to_i16 diffs for AMD, of course :-)
3236
3237TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3238TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3239TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3240
3241#ifdef TSTIEMAIMPL_WITH_GENERATOR
3242static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3243{
3244 FpuStI64Generate(pOut, pOutCpu, cTests);
3245 FpuStI32Generate(pOut, pOutCpu, cTests);
3246 FpuStI16Generate(pOut, pOutCpu, cTests);
3247}
3248#endif
3249
3250static void FpuStIntTest(void)
3251{
3252 FpuStI64Test();
3253 FpuStI32Test();
3254 FpuStI16Test();
3255}
3256
3257
3258/*
3259 * Store as packed BCD value (memory).
3260 */
3261typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3262typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3263TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3264
3265static const FPU_ST_D80_T g_aFpuStD80[] =
3266{
3267 ENTRY(fst_r80_to_d80),
3268};
3269
3270#ifdef TSTIEMAIMPL_WITH_GENERATOR
3271static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3272{
3273 static RTFLOAT80U const s_aSpecials[] =
3274 {
3275 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3276 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3277 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3278 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3279 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3280 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3281 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3282 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3283 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3284 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3285 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3286 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3287 };
3288
3289 X86FXSTATE State;
3290 RT_ZERO(State);
3291 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3292 {
3293 GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3294 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3295 {
3296 uint16_t const fFcw = RandFcw();
3297 State.FSW = RandFsw();
3298 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3299
3300 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3301 {
3302 /* PC doesn't influence these, so leave as is. */
3303 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3304 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
3305 {
3306 uint16_t uFswOut = 0;
3307 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3308 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
3309 | (iRounding << X86_FCW_RC_SHIFT);
3310 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
3311 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
3312 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3313 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3314 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3315 GenFormatD80(&OutVal), iTest, iRounding, iMask);
3316 }
3317 }
3318 }
3319 GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3320 }
3321}
3322#endif
3323
3324
3325static void FpuStD80Test(void)
3326{
3327 X86FXSTATE State;
3328 RT_ZERO(State);
3329 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3330 {
3331 if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3332 continue;
3333
3334 uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3335 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3336 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3337 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3338 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3339 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3340 {
3341 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3342 {
3343 RTFLOAT80U const InVal = paTests[iTest].InVal;
3344 uint16_t uFswOut = 0;
3345 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3346 State.FCW = paTests[iTest].fFcw;
3347 State.FSW = paTests[iTest].fFswIn;
3348 pfn(&State, &uFswOut, &OutVal, &InVal);
3349 if ( uFswOut != paTests[iTest].fFswOut
3350 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3351 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3352 "%s -> fsw=%#06x %s\n"
3353 "%s expected %#06x %s%s%s (%s)\n",
3354 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3355 FormatR80(&paTests[iTest].InVal),
3356 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3357 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3358 FswDiff(uFswOut, paTests[iTest].fFswOut),
3359 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3360 FormatFcw(paTests[iTest].fFcw) );
3361 }
3362 pfn = g_aFpuStD80[iFn].pfnNative;
3363 }
3364 }
3365}
3366
3367
3368
3369/*********************************************************************************************************************************
3370* x87 FPU Binary Operations *
3371*********************************************************************************************************************************/
3372
3373/*
3374 * Binary FPU operations on two 80-bit floating point values.
3375 */
3376TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3377enum { kFpuBinaryHint_fprem = 1, };
3378
3379static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3380{
3381 ENTRY(fadd_r80_by_r80),
3382 ENTRY(fsub_r80_by_r80),
3383 ENTRY(fsubr_r80_by_r80),
3384 ENTRY(fmul_r80_by_r80),
3385 ENTRY(fdiv_r80_by_r80),
3386 ENTRY(fdivr_r80_by_r80),
3387 ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3388 ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3389 ENTRY(fscale_r80_by_r80),
3390 ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3391 ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3392 ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3393 ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3394 ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3395 ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3396};
3397
3398#ifdef TSTIEMAIMPL_WITH_GENERATOR
3399static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3400{
3401 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3402
3403 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3404 {
3405 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3406 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3407 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3408 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3409 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3410 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3411 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3412 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3413 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3414 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3415 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3416 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3417 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3418 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3419 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3420 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3421 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3422 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3423 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3424 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3425 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3426 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3427 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3428 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3429 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3430 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3431 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3432 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3433 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3434 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3435 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3436 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3437 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3438 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3439 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3440 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3441 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3442 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3443 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3444 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3445 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3446 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3447 /* fscale: Negative variants for the essentials of the above. */
3448 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3449 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3450 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3451 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3452 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3453 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3454 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3455 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3456 /* fscale: Some fun with denormals and pseudo-denormals. */
3457 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3458 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3459 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3460 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3461 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3462 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3463 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3464 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3465 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3466 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3467 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3468 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3469 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3470 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3471 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3472 };
3473
3474 X86FXSTATE State;
3475 RT_ZERO(State);
3476 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3477 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3478 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3479 {
3480 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3481 PRTSTREAM pOutFn = pOut;
3482 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3483 {
3484 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3485 continue;
3486 pOutFn = pOutCpu;
3487 }
3488
3489 GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3490 uint32_t iTestOutput = 0;
3491 uint32_t cNormalInputPairs = 0;
3492 uint32_t cTargetRangeInputs = 0;
3493 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3494 {
3495 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3496 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3497 bool fTargetRange = false;
3498 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3499 {
3500 cNormalInputPairs++;
3501 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3502 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3503 cTargetRangeInputs += fTargetRange = true;
3504 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3505 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3506 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3507 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3508 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3509 cTargetRangeInputs += fTargetRange = true;
3510 }
3511 }
3512 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3513 {
3514 iTest -= 1;
3515 continue;
3516 }
3517
3518 uint16_t const fFcwExtra = 0;
3519 uint16_t const fFcw = RandFcw();
3520 State.FSW = RandFsw();
3521
3522 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3523 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3524 {
3525 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3526 | (iRounding << X86_FCW_RC_SHIFT)
3527 | (iPrecision << X86_FCW_PC_SHIFT)
3528 | X86_FCW_MASK_ALL;
3529 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3530 pfn(&State, &ResM, &InVal1, &InVal2);
3531 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3532 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3533 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3534
3535 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3536 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3537 pfn(&State, &ResU, &InVal1, &InVal2);
3538 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3539 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3540 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3541
3542 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3543 if (fXcpt)
3544 {
3545 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3546 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3547 pfn(&State, &Res1, &InVal1, &InVal2);
3548 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3549 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3550 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3551 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3552 {
3553 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3554 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3555 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3556 pfn(&State, &Res2, &InVal1, &InVal2);
3557 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3558 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3559 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3560 }
3561 if (!RT_IS_POWER_OF_TWO(fXcpt))
3562 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3563 if (fUnmasked & fXcpt)
3564 {
3565 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3566 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3567 pfn(&State, &Res3, &InVal1, &InVal2);
3568 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3569 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3570 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3571 }
3572 }
3573
3574 /* If the values are in range and caused no exceptions, do the whole series of
3575 partial reminders till we get the non-partial one or run into an exception. */
3576 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3577 {
3578 IEMFPURESULT ResPrev = ResM;
3579 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
3580 {
3581 State.FCW = State.FCW | X86_FCW_MASK_ALL;
3582 State.FSW = ResPrev.FSW;
3583 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3584 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
3585 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
3586 State.FCW | fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
3587 GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
3588 iTest, iRounding, iPrecision, i + 1, iTestOutput++);
3589 ResPrev = ResSeq;
3590 }
3591 }
3592 }
3593 }
3594 GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3595 }
3596}
3597#endif
3598
3599
3600static void FpuBinaryR80Test(void)
3601{
3602 X86FXSTATE State;
3603 RT_ZERO(State);
3604 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3605 {
3606 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3607 continue;
3608
3609 uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3610 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3611 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3612 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3613 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3614 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3615 {
3616 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3617 {
3618 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3619 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3620 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3621 State.FCW = paTests[iTest].fFcw;
3622 State.FSW = paTests[iTest].fFswIn;
3623 pfn(&State, &Res, &InVal1, &InVal2);
3624 if ( Res.FSW != paTests[iTest].fFswOut
3625 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3626 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3627 "%s -> fsw=%#06x %s\n"
3628 "%s expected %#06x %s%s%s (%s)\n",
3629 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3630 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3631 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3632 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3633 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3634 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3635 FormatFcw(paTests[iTest].fFcw) );
3636 }
3637 pfn = g_aFpuBinaryR80[iFn].pfnNative;
3638 }
3639 }
3640}
3641
3642
3643/*
3644 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3645 */
3646#define int64_t_IS_NORMAL(a) 1
3647#define int32_t_IS_NORMAL(a) 1
3648#define int16_t_IS_NORMAL(a) 1
3649
3650#ifdef TSTIEMAIMPL_WITH_GENERATOR
3651static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3652{
3653 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3654 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3655};
3656static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3657{
3658 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3659 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3660};
3661static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3662{
3663 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3664};
3665static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3666{
3667 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3668};
3669
3670# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3671static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3672{ \
3673 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3674 \
3675 X86FXSTATE State; \
3676 RT_ZERO(State); \
3677 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3678 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3679 { \
3680 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3681 uint32_t cNormalInputPairs = 0; \
3682 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3683 { \
3684 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3685 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3686 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3687 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3688 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3689 cNormalInputPairs++; \
3690 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3691 { \
3692 iTest -= 1; \
3693 continue; \
3694 } \
3695 \
3696 uint16_t const fFcw = RandFcw(); \
3697 State.FSW = RandFsw(); \
3698 \
3699 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3700 { \
3701 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3702 { \
3703 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3704 { \
3705 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
3706 | (iRounding << X86_FCW_RC_SHIFT) \
3707 | (iPrecision << X86_FCW_PC_SHIFT) \
3708 | iMask; \
3709 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3710 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3711 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3712 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3713 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3714 } \
3715 } \
3716 } \
3717 } \
3718 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3719 } \
3720}
3721#else
3722# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3723#endif
3724
3725#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3726TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3727\
3728static const a_SubTestType a_aSubTests[] = \
3729{ \
3730 ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3731 ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3732 ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3733 ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3734 ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3735 ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3736}; \
3737\
3738GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3739\
3740static void FpuBinary ## a_UpBits ## Test(void) \
3741{ \
3742 X86FXSTATE State; \
3743 RT_ZERO(State); \
3744 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3745 { \
3746 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3747 \
3748 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3749 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3750 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3751 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3752 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3753 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3754 { \
3755 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3756 { \
3757 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3758 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3759 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3760 State.FCW = paTests[iTest].fFcw; \
3761 State.FSW = paTests[iTest].fFswIn; \
3762 pfn(&State, &Res, &InVal1, &InVal2); \
3763 if ( Res.FSW != paTests[iTest].fFswOut \
3764 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3765 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3766 "%s -> fsw=%#06x %s\n" \
3767 "%s expected %#06x %s%s%s (%s)\n", \
3768 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3769 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3770 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3771 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3772 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3773 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3774 FormatFcw(paTests[iTest].fFcw) ); \
3775 } \
3776 pfn = a_aSubTests[iFn].pfnNative; \
3777 } \
3778 } \
3779}
3780
3781TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3782TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3783TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3784TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3785
3786
3787/*
3788 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3789 */
3790#ifdef TSTIEMAIMPL_WITH_GENERATOR
3791static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3792{
3793 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3794 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3795};
3796static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3797{
3798 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3799 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3800};
3801static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3802{
3803 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3804 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3805};
3806static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3807{
3808 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3809};
3810static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3811{
3812 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3813};
3814
3815# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3816static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3817{ \
3818 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3819 \
3820 X86FXSTATE State; \
3821 RT_ZERO(State); \
3822 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3823 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3824 { \
3825 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3826 uint32_t cNormalInputPairs = 0; \
3827 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3828 { \
3829 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3830 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3831 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3832 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3833 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3834 cNormalInputPairs++; \
3835 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3836 { \
3837 iTest -= 1; \
3838 continue; \
3839 } \
3840 \
3841 uint16_t const fFcw = RandFcw(); \
3842 State.FSW = RandFsw(); \
3843 \
3844 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3845 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3846 { \
3847 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
3848 uint16_t fFswOut = 0; \
3849 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3850 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3851 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3852 iTest, iMask ? 'c' : 'u'); \
3853 } \
3854 } \
3855 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3856 } \
3857}
3858#else
3859# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3860#endif
3861
3862#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3863TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3864\
3865static const a_SubTestType a_aSubTests[] = \
3866{ \
3867 __VA_ARGS__ \
3868}; \
3869\
3870GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3871\
3872static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3873{ \
3874 X86FXSTATE State; \
3875 RT_ZERO(State); \
3876 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3877 { \
3878 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3879 \
3880 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3881 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3882 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3883 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3884 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3885 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3886 { \
3887 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3888 { \
3889 uint16_t fFswOut = 0; \
3890 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3891 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3892 State.FCW = paTests[iTest].fFcw; \
3893 State.FSW = paTests[iTest].fFswIn; \
3894 pfn(&State, &fFswOut, &InVal1, &InVal2); \
3895 if (fFswOut != paTests[iTest].fFswOut) \
3896 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3897 "%s -> fsw=%#06x\n" \
3898 "%s expected %#06x %s (%s)\n", \
3899 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3900 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3901 iVar ? " " : "", fFswOut, \
3902 iVar ? " " : "", paTests[iTest].fFswOut, \
3903 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3904 } \
3905 pfn = a_aSubTests[iFn].pfnNative; \
3906 } \
3907 } \
3908}
3909
3910TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3911TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3912TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3913TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3914TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3915
3916
3917/*
3918 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3919 */
3920TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3921
3922static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3923{
3924 ENTRY(fcomi_r80_by_r80),
3925 ENTRY(fucomi_r80_by_r80),
3926};
3927
3928#ifdef TSTIEMAIMPL_WITH_GENERATOR
3929static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3930{
3931 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3932 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3933};
3934
3935static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3936{
3937 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3938
3939 X86FXSTATE State;
3940 RT_ZERO(State);
3941 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3942 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3943 {
3944 GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3945 uint32_t cNormalInputPairs = 0;
3946 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3947 {
3948 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3949 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3950 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3951 cNormalInputPairs++;
3952 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3953 {
3954 iTest -= 1;
3955 continue;
3956 }
3957
3958 uint16_t const fFcw = RandFcw();
3959 State.FSW = RandFsw();
3960
3961 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3962 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3963 {
3964 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
3965 uint16_t uFswOut = 0;
3966 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3967 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3968 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3969 iTest, iMask ? 'c' : 'u');
3970 }
3971 }
3972 GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3973 }
3974}
3975#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
3976
3977static void FpuBinaryEflR80Test(void)
3978{
3979 X86FXSTATE State;
3980 RT_ZERO(State);
3981 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3982 {
3983 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3984 continue;
3985
3986 uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3987 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3988 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3989 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3990 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3991 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3992 {
3993 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3994 {
3995 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3996 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3997 State.FCW = paTests[iTest].fFcw;
3998 State.FSW = paTests[iTest].fFswIn;
3999 uint16_t uFswOut = 0;
4000 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
4001 if ( uFswOut != paTests[iTest].fFswOut
4002 || fEflOut != paTests[iTest].fEflOut)
4003 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4004 "%s -> fsw=%#06x efl=%#08x\n"
4005 "%s expected %#06x %#08x %s%s (%s)\n",
4006 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4007 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4008 iVar ? " " : "", uFswOut, fEflOut,
4009 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
4010 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
4011 FormatFcw(paTests[iTest].fFcw));
4012 }
4013 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
4014 }
4015 }
4016}
4017
4018
4019/*********************************************************************************************************************************
4020* x87 FPU Unary Operations *
4021*********************************************************************************************************************************/
4022
4023/*
4024 * Unary FPU operations on one 80-bit floating point value.
4025 *
4026 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
4027 * a rounding error or not.
4028 */
4029TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
4030
4031enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
4032static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
4033{
4034 ENTRY_EX( fabs_r80, kUnary_Accurate),
4035 ENTRY_EX( fchs_r80, kUnary_Accurate),
4036 ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
4037 ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
4038 ENTRY_EX( fsqrt_r80, kUnary_Accurate),
4039 ENTRY_EX( frndint_r80, kUnary_Accurate),
4040 ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
4041 ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
4042 ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
4043 ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
4044};
4045
4046#ifdef TSTIEMAIMPL_WITH_GENERATOR
4047
4048static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
4049{
4050 if ( enmKind == kUnary_Rounding_F2xm1
4051 && RTFLOAT80U_IS_NORMAL(pr80Val)
4052 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
4053 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
4054 return true;
4055 return false;
4056}
4057
4058static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4059{
4060 static RTFLOAT80U const s_aSpecials[] =
4061 {
4062 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
4063 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
4064 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
4065 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
4066 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
4067 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
4068 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
4069 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
4070 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
4071 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
4072 };
4073 X86FXSTATE State;
4074 RT_ZERO(State);
4075 uint32_t cMinNormals = cTests / 4;
4076 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4077 {
4078 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
4079 PRTSTREAM pOutFn = pOut;
4080 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4081 {
4082 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4083 continue;
4084 pOutFn = pOutCpu;
4085 }
4086
4087 GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4088 uint32_t iTestOutput = 0;
4089 uint32_t cNormalInputs = 0;
4090 uint32_t cTargetRangeInputs = 0;
4091 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4092 {
4093 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4094 if (RTFLOAT80U_IS_NORMAL(&InVal))
4095 {
4096 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
4097 {
4098 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
4099 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4100 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4101 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4102 cTargetRangeInputs++;
4103 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4104 {
4105 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4106 cTargetRangeInputs++;
4107 }
4108 }
4109 cNormalInputs++;
4110 }
4111 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4112 {
4113 iTest -= 1;
4114 continue;
4115 }
4116
4117 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4118 uint16_t const fFcw = RandFcw();
4119 State.FSW = RandFsw();
4120
4121 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4122 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4123 {
4124 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4125 | (iRounding << X86_FCW_RC_SHIFT)
4126 | (iPrecision << X86_FCW_PC_SHIFT)
4127 | X86_FCW_MASK_ALL;
4128 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4129 pfn(&State, &ResM, &InVal);
4130 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4131 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
4132 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4133
4134 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4135 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4136 pfn(&State, &ResU, &InVal);
4137 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4138 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
4139 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4140
4141 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4142 if (fXcpt)
4143 {
4144 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4145 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4146 pfn(&State, &Res1, &InVal);
4147 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4148 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
4149 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4150 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4151 {
4152 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4153 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4154 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4155 pfn(&State, &Res2, &InVal);
4156 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4157 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
4158 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4159 }
4160 if (!RT_IS_POWER_OF_TWO(fXcpt))
4161 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4162 if (fUnmasked & fXcpt)
4163 {
4164 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4165 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4166 pfn(&State, &Res3, &InVal);
4167 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4168 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
4169 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4170 }
4171 }
4172 }
4173 }
4174 GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
4175 }
4176}
4177#endif
4178
4179static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4180{
4181 if (fFcw1 == fFcw2)
4182 return true;
4183 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4184 {
4185 *pfRndErr = true;
4186 return true;
4187 }
4188 return false;
4189}
4190
4191static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4192{
4193 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4194 return true;
4195 if ( fRndErrOk
4196 && pr80Val1->s.fSign == pr80Val2->s.fSign)
4197 {
4198 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4199 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4200 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4201 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4202 ||
4203 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4204 && pr80Val1->s.uMantissa == UINT64_MAX
4205 && pr80Val2->s.uMantissa == RT_BIT_64(63))
4206 ||
4207 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4208 && pr80Val2->s.uMantissa == UINT64_MAX
4209 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4210 {
4211 *pfRndErr = true;
4212 return true;
4213 }
4214 }
4215 return false;
4216}
4217
4218
4219static void FpuUnaryR80Test(void)
4220{
4221 X86FXSTATE State;
4222 RT_ZERO(State);
4223 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4224 {
4225 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
4226 continue;
4227
4228 uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4229 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4230 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4231 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4232 uint32_t cRndErrs = 0;
4233 uint32_t cPossibleRndErrs = 0;
4234 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4235 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4236 {
4237 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4238 {
4239 RTFLOAT80U const InVal = paTests[iTest].InVal;
4240 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4241 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4242 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4243 State.FSW = paTests[iTest].fFswIn;
4244 pfn(&State, &Res, &InVal);
4245 bool fRndErr = false;
4246 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4247 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4248 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4249 "%s -> fsw=%#06x %s\n"
4250 "%s expected %#06x %s%s%s%s (%s)\n",
4251 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4252 FormatR80(&paTests[iTest].InVal),
4253 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4254 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4255 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4256 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4257 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4258 cRndErrs += fRndErr;
4259 cPossibleRndErrs += fRndErrOk;
4260 }
4261 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4262 }
4263 if (cPossibleRndErrs > 0)
4264 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4265 }
4266}
4267
4268
4269/*
4270 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4271 */
4272TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4273
4274static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4275{
4276 ENTRY(ftst_r80),
4277 ENTRY_EX(fxam_r80, 1),
4278};
4279
4280#ifdef TSTIEMAIMPL_WITH_GENERATOR
4281static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4282{
4283 static RTFLOAT80U const s_aSpecials[] =
4284 {
4285 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4286 };
4287
4288 X86FXSTATE State;
4289 RT_ZERO(State);
4290 uint32_t cMinNormals = cTests / 4;
4291 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4292 {
4293 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4294 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4295 PRTSTREAM pOutFn = pOut;
4296 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4297 {
4298 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4299 continue;
4300 pOutFn = pOutCpu;
4301 }
4302 State.FTW = 0;
4303
4304 GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4305 uint32_t cNormalInputs = 0;
4306 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4307 {
4308 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4309 if (RTFLOAT80U_IS_NORMAL(&InVal))
4310 cNormalInputs++;
4311 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4312 {
4313 iTest -= 1;
4314 continue;
4315 }
4316
4317 uint16_t const fFcw = RandFcw();
4318 State.FSW = RandFsw();
4319 if (!fIsFxam)
4320 {
4321 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4322 {
4323 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4324 {
4325 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4326 {
4327 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4328 | (iRounding << X86_FCW_RC_SHIFT)
4329 | (iPrecision << X86_FCW_PC_SHIFT)
4330 | iMask;
4331 uint16_t fFswOut = 0;
4332 pfn(&State, &fFswOut, &InVal);
4333 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4334 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4335 iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4336 }
4337 }
4338 }
4339 }
4340 else
4341 {
4342 uint16_t fFswOut = 0;
4343 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4344 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4345 State.FCW = fFcw;
4346 pfn(&State, &fFswOut, &InVal);
4347 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4348 fFcw | fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4349 }
4350 }
4351 GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4352 }
4353}
4354#endif
4355
4356
4357static void FpuUnaryFswR80Test(void)
4358{
4359 X86FXSTATE State;
4360 RT_ZERO(State);
4361 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4362 {
4363 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4364 continue;
4365
4366 uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4367 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4368 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4369 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4370 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4371 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4372 {
4373 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4374 {
4375 RTFLOAT80U const InVal = paTests[iTest].InVal;
4376 uint16_t fFswOut = 0;
4377 State.FSW = paTests[iTest].fFswIn;
4378 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4379 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4380 pfn(&State, &fFswOut, &InVal);
4381 if (fFswOut != paTests[iTest].fFswOut)
4382 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4383 "%s -> fsw=%#06x\n"
4384 "%s expected %#06x %s (%s%s)\n",
4385 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4386 FormatR80(&paTests[iTest].InVal),
4387 iVar ? " " : "", fFswOut,
4388 iVar ? " " : "", paTests[iTest].fFswOut,
4389 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4390 paTests[iTest].fFcw & 0x80 ? " empty" : "");
4391 }
4392 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4393 }
4394 }
4395}
4396
4397/*
4398 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
4399 */
4400TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4401
4402static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4403{
4404 ENTRY(fxtract_r80_r80),
4405 ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4406 ENTRY_INTEL(fptan_r80_r80, 0),
4407 ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4408 ENTRY_INTEL(fsincos_r80_r80, 0),
4409};
4410
4411#ifdef TSTIEMAIMPL_WITH_GENERATOR
4412static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4413{
4414 static RTFLOAT80U const s_aSpecials[] =
4415 {
4416 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4417 };
4418
4419 X86FXSTATE State;
4420 RT_ZERO(State);
4421 uint32_t cMinNormals = cTests / 4;
4422 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4423 {
4424 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4425 PRTSTREAM pOutFn = pOut;
4426 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4427 {
4428 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4429 continue;
4430 pOutFn = pOutCpu;
4431 }
4432
4433 GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4434 uint32_t iTestOutput = 0;
4435 uint32_t cNormalInputs = 0;
4436 uint32_t cTargetRangeInputs = 0;
4437 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4438 {
4439 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4440 if (RTFLOAT80U_IS_NORMAL(&InVal))
4441 {
4442 if (iFn != 0)
4443 {
4444 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4445 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4446 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4447 cTargetRangeInputs++;
4448 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4449 {
4450 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4451 cTargetRangeInputs++;
4452 }
4453 }
4454 cNormalInputs++;
4455 }
4456 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4457 {
4458 iTest -= 1;
4459 continue;
4460 }
4461
4462 uint16_t const fFcwExtra = 0; /* for rounding error indication */
4463 uint16_t const fFcw = RandFcw();
4464 State.FSW = RandFsw();
4465
4466 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4467 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4468 {
4469 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4470 | (iRounding << X86_FCW_RC_SHIFT)
4471 | (iPrecision << X86_FCW_PC_SHIFT)
4472 | X86_FCW_MASK_ALL;
4473 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4474 pfn(&State, &ResM, &InVal);
4475 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4476 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4477 GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4478
4479 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4480 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4481 pfn(&State, &ResU, &InVal);
4482 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4483 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4484 GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4485
4486 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4487 if (fXcpt)
4488 {
4489 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4490 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4491 pfn(&State, &Res1, &InVal);
4492 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4493 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4494 GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4495 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4496 {
4497 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4498 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4499 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4500 pfn(&State, &Res2, &InVal);
4501 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4502 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4503 GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4504 }
4505 if (!RT_IS_POWER_OF_TWO(fXcpt))
4506 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4507 if (fUnmasked & fXcpt)
4508 {
4509 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4510 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4511 pfn(&State, &Res3, &InVal);
4512 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4513 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4514 GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4515 }
4516 }
4517 }
4518 }
4519 GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4520 }
4521}
4522#endif
4523
4524
4525static void FpuUnaryTwoR80Test(void)
4526{
4527 X86FXSTATE State;
4528 RT_ZERO(State);
4529 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4530 {
4531 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4532 continue;
4533
4534 uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4535 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4536 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4537 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4538 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4539 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4540 {
4541 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4542 {
4543 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4544 RTFLOAT80U const InVal = paTests[iTest].InVal;
4545 State.FCW = paTests[iTest].fFcw;
4546 State.FSW = paTests[iTest].fFswIn;
4547 pfn(&State, &Res, &InVal);
4548 if ( Res.FSW != paTests[iTest].fFswOut
4549 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4550 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4551 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4552 "%s -> fsw=%#06x %s %s\n"
4553 "%s expected %#06x %s %s %s%s%s (%s)\n",
4554 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4555 FormatR80(&paTests[iTest].InVal),
4556 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4557 iVar ? " " : "", paTests[iTest].fFswOut,
4558 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4559 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4560 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4561 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4562 }
4563 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4564 }
4565 }
4566}
4567
4568
4569/*********************************************************************************************************************************
4570* SSE floating point Binary Operations *
4571*********************************************************************************************************************************/
4572
4573/*
4574 * Binary SSE operations on packed single precision floating point values.
4575 */
4576TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4577
4578static const SSE_BINARY_R32_T g_aSseBinaryR32[] =
4579{
4580 ENTRY_BIN(addps_u128),
4581 ENTRY_BIN(mulps_u128),
4582 ENTRY_BIN(subps_u128),
4583 ENTRY_BIN(minps_u128),
4584 ENTRY_BIN(divps_u128),
4585 ENTRY_BIN(maxps_u128),
4586 ENTRY_BIN(haddps_u128),
4587 ENTRY_BIN(hsubps_u128),
4588 ENTRY_BIN(sqrtps_u128),
4589 ENTRY_BIN(addsubps_u128),
4590 ENTRY_BIN(cvtps2pd_u128),
4591};
4592
4593#ifdef TSTIEMAIMPL_WITH_GENERATOR
4594static RTEXITCODE SseBinaryR32Generate(const char *pszDataFileFmt, uint32_t cTests)
4595{
4596 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4597
4598 static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
4599 {
4600 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
4601 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
4602 /** @todo More specials. */
4603 };
4604
4605 X86FXSTATE State;
4606 RT_ZERO(State);
4607 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4608 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4609 {
4610 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
4611
4612 PRTSTREAM pStrmOut = NULL;
4613 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32[iFn].pszName);
4614 if (RT_FAILURE(rc))
4615 {
4616 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4617 return RTEXITCODE_FAILURE;
4618 }
4619
4620 uint32_t cNormalInputPairs = 0;
4621 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4622 {
4623 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4624
4625 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4626 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4627 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4628 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4629
4630 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4631 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
4632 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
4633 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
4634
4635 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
4636 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
4637 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
4638 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
4639 cNormalInputPairs++;
4640 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4641 {
4642 iTest -= 1;
4643 continue;
4644 }
4645
4646 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4647 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4648 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4649 for (uint8_t iFz = 0; iFz < 2; iFz++)
4650 {
4651 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4652 | (iRounding << X86_MXCSR_RC_SHIFT)
4653 | (iDaz ? X86_MXCSR_DAZ : 0)
4654 | (iFz ? X86_MXCSR_FZ : 0)
4655 | X86_MXCSR_XCPT_MASK;
4656 IEMSSERESULT ResM; RT_ZERO(ResM);
4657 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4658 TestData.fMxcsrIn = State.MXCSR;
4659 TestData.fMxcsrOut = ResM.MXCSR;
4660 TestData.OutVal = ResM.uResult;
4661 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4662
4663 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4664 IEMSSERESULT ResU; RT_ZERO(ResU);
4665 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4666 TestData.fMxcsrIn = State.MXCSR;
4667 TestData.fMxcsrOut = ResU.MXCSR;
4668 TestData.OutVal = ResU.uResult;
4669 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4670
4671 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4672 if (fXcpt)
4673 {
4674 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4675 IEMSSERESULT Res1; RT_ZERO(Res1);
4676 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4677 TestData.fMxcsrIn = State.MXCSR;
4678 TestData.fMxcsrOut = Res1.MXCSR;
4679 TestData.OutVal = Res1.uResult;
4680 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4681
4682 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4683 {
4684 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4685 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4686 IEMSSERESULT Res2; RT_ZERO(Res2);
4687 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4688 TestData.fMxcsrIn = State.MXCSR;
4689 TestData.fMxcsrOut = Res2.MXCSR;
4690 TestData.OutVal = Res2.uResult;
4691 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4692 }
4693 if (!RT_IS_POWER_OF_TWO(fXcpt))
4694 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4695 if (fUnmasked & fXcpt)
4696 {
4697 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4698 IEMSSERESULT Res3; RT_ZERO(Res3);
4699 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4700 TestData.fMxcsrIn = State.MXCSR;
4701 TestData.fMxcsrOut = Res3.MXCSR;
4702 TestData.OutVal = Res3.uResult;
4703 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4704 }
4705 }
4706 }
4707 }
4708 rc = RTStrmClose(pStrmOut);
4709 if (RT_FAILURE(rc))
4710 {
4711 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4712 return RTEXITCODE_FAILURE;
4713 }
4714 }
4715
4716 return RTEXITCODE_SUCCESS;
4717}
4718#endif
4719
4720static void SseBinaryR32Test(void)
4721{
4722 X86FXSTATE State;
4723 RT_ZERO(State);
4724 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4725 {
4726 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32[iFn].pszName))
4727 continue;
4728
4729 uint32_t const cTests = *g_aSseBinaryR32[iFn].pcTests;
4730 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
4731 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
4732 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
4733 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4734 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4735 {
4736 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4737 {
4738 IEMSSERESULT Res; RT_ZERO(Res);
4739
4740 State.MXCSR = paTests[iTest].fMxcsrIn;
4741 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4742 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
4743 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
4744 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
4745 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
4746 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4747 || !fValsIdentical)
4748 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
4749 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
4750 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
4751 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4752 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
4753 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
4754 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
4755 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
4756 iVar ? " " : "", Res.MXCSR,
4757 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
4758 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
4759 iVar ? " " : "", paTests[iTest].fMxcsrOut,
4760 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
4761 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
4762 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4763 !fValsIdentical ? " - val" : "",
4764 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4765 }
4766 pfn = g_aSseBinaryR32[iFn].pfnNative;
4767 }
4768 }
4769}
4770
4771
4772/*
4773 * Binary SSE operations on packed single precision floating point values.
4774 */
4775TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4776
4777static const SSE_BINARY_R64_T g_aSseBinaryR64[] =
4778{
4779 ENTRY_BIN(addpd_u128),
4780 ENTRY_BIN(mulpd_u128),
4781 ENTRY_BIN(subpd_u128),
4782 ENTRY_BIN(minpd_u128),
4783 ENTRY_BIN(divpd_u128),
4784 ENTRY_BIN(maxpd_u128),
4785 ENTRY_BIN(haddpd_u128),
4786 ENTRY_BIN(hsubpd_u128),
4787 ENTRY_BIN(sqrtpd_u128),
4788 ENTRY_BIN(addsubpd_u128),
4789 ENTRY_BIN(cvtpd2ps_u128),
4790};
4791
4792#ifdef TSTIEMAIMPL_WITH_GENERATOR
4793static RTEXITCODE SseBinaryR64Generate(const char *pszDataFileFmt, uint32_t cTests)
4794{
4795 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4796
4797 static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
4798 {
4799 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
4800 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
4801 /** @todo More specials. */
4802 };
4803
4804 X86FXSTATE State;
4805 RT_ZERO(State);
4806 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4807 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4808 {
4809 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
4810
4811 PRTSTREAM pStrmOut = NULL;
4812 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64[iFn].pszName);
4813 if (RT_FAILURE(rc))
4814 {
4815 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4816 return RTEXITCODE_FAILURE;
4817 }
4818
4819 uint32_t cNormalInputPairs = 0;
4820 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4821 {
4822 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4823
4824 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4825 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4826 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4827 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4828
4829 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
4830 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
4831 cNormalInputPairs++;
4832 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4833 {
4834 iTest -= 1;
4835 continue;
4836 }
4837
4838 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4839 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4840 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4841 for (uint8_t iFz = 0; iFz < 2; iFz++)
4842 {
4843 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4844 | (iRounding << X86_MXCSR_RC_SHIFT)
4845 | (iDaz ? X86_MXCSR_DAZ : 0)
4846 | (iFz ? X86_MXCSR_FZ : 0)
4847 | X86_MXCSR_XCPT_MASK;
4848 IEMSSERESULT ResM; RT_ZERO(ResM);
4849 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4850 TestData.fMxcsrIn = State.MXCSR;
4851 TestData.fMxcsrOut = ResM.MXCSR;
4852 TestData.OutVal = ResM.uResult;
4853 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4854
4855 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4856 IEMSSERESULT ResU; RT_ZERO(ResU);
4857 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4858 TestData.fMxcsrIn = State.MXCSR;
4859 TestData.fMxcsrOut = ResU.MXCSR;
4860 TestData.OutVal = ResU.uResult;
4861 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4862
4863 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4864 if (fXcpt)
4865 {
4866 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4867 IEMSSERESULT Res1; RT_ZERO(Res1);
4868 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4869 TestData.fMxcsrIn = State.MXCSR;
4870 TestData.fMxcsrOut = Res1.MXCSR;
4871 TestData.OutVal = Res1.uResult;
4872 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4873
4874 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4875 {
4876 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4877 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4878 IEMSSERESULT Res2; RT_ZERO(Res2);
4879 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4880 TestData.fMxcsrIn = State.MXCSR;
4881 TestData.fMxcsrOut = Res2.MXCSR;
4882 TestData.OutVal = Res2.uResult;
4883 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4884 }
4885 if (!RT_IS_POWER_OF_TWO(fXcpt))
4886 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4887 if (fUnmasked & fXcpt)
4888 {
4889 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4890 IEMSSERESULT Res3; RT_ZERO(Res3);
4891 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4892 TestData.fMxcsrIn = State.MXCSR;
4893 TestData.fMxcsrOut = Res3.MXCSR;
4894 TestData.OutVal = Res3.uResult;
4895 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4896 }
4897 }
4898 }
4899 }
4900 rc = RTStrmClose(pStrmOut);
4901 if (RT_FAILURE(rc))
4902 {
4903 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4904 return RTEXITCODE_FAILURE;
4905 }
4906 }
4907
4908 return RTEXITCODE_SUCCESS;
4909}
4910#endif
4911
4912
4913static void SseBinaryR64Test(void)
4914{
4915 X86FXSTATE State;
4916 RT_ZERO(State);
4917 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4918 {
4919 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64[iFn].pszName))
4920 continue;
4921
4922 uint32_t const cTests = *g_aSseBinaryR64[iFn].pcTests;
4923 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
4924 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
4925 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
4926 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4927 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4928 {
4929 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4930 {
4931 IEMSSERESULT Res; RT_ZERO(Res);
4932
4933 State.MXCSR = paTests[iTest].fMxcsrIn;
4934 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4935 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4936 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4937 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4938 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
4939 "%s -> mxcsr=%#08x %s'%s\n"
4940 "%s expected %#08x %s'%s%s%s (%s)\n",
4941 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4942 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
4943 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
4944 iVar ? " " : "", Res.MXCSR,
4945 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
4946 iVar ? " " : "", paTests[iTest].fMxcsrOut,
4947 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
4948 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4949 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4950 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4951 ? " - val" : "",
4952 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4953 }
4954 pfn = g_aSseBinaryR64[iFn].pfnNative;
4955 }
4956 }
4957}
4958
4959
4960/*
4961 * Binary SSE operations on packed single precision floating point values.
4962 */
4963TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
4964
4965static const SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
4966{
4967 ENTRY_BIN(addss_u128_r32),
4968 ENTRY_BIN(mulss_u128_r32),
4969 ENTRY_BIN(subss_u128_r32),
4970 ENTRY_BIN(minss_u128_r32),
4971 ENTRY_BIN(divss_u128_r32),
4972 ENTRY_BIN(maxss_u128_r32),
4973 ENTRY_BIN(cvtss2sd_u128_r32),
4974 ENTRY_BIN(sqrtss_u128_r32),
4975};
4976
4977#ifdef TSTIEMAIMPL_WITH_GENERATOR
4978static RTEXITCODE SseBinaryU128R32Generate(const char *pszDataFileFmt, uint32_t cTests)
4979{
4980 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4981
4982 static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
4983 {
4984 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
4985 /** @todo More specials. */
4986 };
4987
4988 X86FXSTATE State;
4989 RT_ZERO(State);
4990 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4991 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
4992 {
4993 PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
4994
4995 PRTSTREAM pStrmOut = NULL;
4996 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R32[iFn].pszName);
4997 if (RT_FAILURE(rc))
4998 {
4999 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
5000 return RTEXITCODE_FAILURE;
5001 }
5002
5003 uint32_t cNormalInputPairs = 0;
5004 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5005 {
5006 SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
5007
5008 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5009 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5010 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5011 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5012
5013 TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5014
5015 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
5016 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
5017 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
5018 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
5019 && RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
5020 cNormalInputPairs++;
5021 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5022 {
5023 iTest -= 1;
5024 continue;
5025 }
5026
5027 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5028 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5029 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5030 for (uint8_t iFz = 0; iFz < 2; iFz++)
5031 {
5032 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5033 | (iRounding << X86_MXCSR_RC_SHIFT)
5034 | (iDaz ? X86_MXCSR_DAZ : 0)
5035 | (iFz ? X86_MXCSR_FZ : 0)
5036 | X86_MXCSR_XCPT_MASK;
5037 IEMSSERESULT ResM; RT_ZERO(ResM);
5038 pfn(&State, &ResM, &TestData.InVal1, &TestData.r32Val2);
5039 TestData.fMxcsrIn = State.MXCSR;
5040 TestData.fMxcsrOut = ResM.MXCSR;
5041 TestData.OutVal = ResM.uResult;
5042 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5043
5044 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5045 IEMSSERESULT ResU; RT_ZERO(ResU);
5046 pfn(&State, &ResU, &TestData.InVal1, &TestData.r32Val2);
5047 TestData.fMxcsrIn = State.MXCSR;
5048 TestData.fMxcsrOut = ResU.MXCSR;
5049 TestData.OutVal = ResU.uResult;
5050 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5051
5052 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5053 if (fXcpt)
5054 {
5055 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5056 IEMSSERESULT Res1; RT_ZERO(Res1);
5057 pfn(&State, &Res1, &TestData.InVal1, &TestData.r32Val2);
5058 TestData.fMxcsrIn = State.MXCSR;
5059 TestData.fMxcsrOut = Res1.MXCSR;
5060 TestData.OutVal = Res1.uResult;
5061 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5062
5063 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5064 {
5065 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5066 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5067 IEMSSERESULT Res2; RT_ZERO(Res2);
5068 pfn(&State, &Res2, &TestData.InVal1, &TestData.r32Val2);
5069 TestData.fMxcsrIn = State.MXCSR;
5070 TestData.fMxcsrOut = Res2.MXCSR;
5071 TestData.OutVal = Res2.uResult;
5072 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5073 }
5074 if (!RT_IS_POWER_OF_TWO(fXcpt))
5075 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5076 if (fUnmasked & fXcpt)
5077 {
5078 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5079 IEMSSERESULT Res3; RT_ZERO(Res3);
5080 pfn(&State, &Res3, &TestData.InVal1, &TestData.r32Val2);
5081 TestData.fMxcsrIn = State.MXCSR;
5082 TestData.fMxcsrOut = Res3.MXCSR;
5083 TestData.OutVal = Res3.uResult;
5084 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5085 }
5086 }
5087 }
5088 }
5089 rc = RTStrmClose(pStrmOut);
5090 if (RT_FAILURE(rc))
5091 {
5092 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
5093 return RTEXITCODE_FAILURE;
5094 }
5095 }
5096
5097 return RTEXITCODE_SUCCESS;
5098}
5099#endif
5100
5101static void SseBinaryU128R32Test(void)
5102{
5103 X86FXSTATE State;
5104 RT_ZERO(State);
5105 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5106 {
5107 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R32[iFn].pszName))
5108 continue;
5109
5110 uint32_t const cTests = *g_aSseBinaryU128R32[iFn].pcTests;
5111 SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
5112 PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
5113 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
5114 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5115 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5116 {
5117 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
5118 {
5119 IEMSSERESULT Res; RT_ZERO(Res);
5120
5121 State.MXCSR = paTests[iTest].fMxcsrIn;
5122 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
5123 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5124 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5125 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5126 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5127 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5128 || !fValsIdentical)
5129 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
5130 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5131 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5132 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5133 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5134 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5135 FormatR32(&paTests[iTest].r32Val2),
5136 iVar ? " " : "", Res.MXCSR,
5137 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5138 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5139 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5140 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5141 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5142 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5143 !fValsIdentical ? " - val" : "",
5144 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5145 }
5146 }
5147 }
5148}
5149
5150
5151/*
5152 * Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
5153 */
5154TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
5155
5156static const SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
5157{
5158 ENTRY_BIN(addsd_u128_r64),
5159 ENTRY_BIN(mulsd_u128_r64),
5160 ENTRY_BIN(subsd_u128_r64),
5161 ENTRY_BIN(minsd_u128_r64),
5162 ENTRY_BIN(divsd_u128_r64),
5163 ENTRY_BIN(maxsd_u128_r64),
5164 ENTRY_BIN(cvtsd2ss_u128_r64),
5165 ENTRY_BIN(sqrtsd_u128_r64),
5166};
5167
5168#ifdef TSTIEMAIMPL_WITH_GENERATOR
5169static RTEXITCODE SseBinaryU128R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5170{
5171 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5172
5173 static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
5174 {
5175 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5176 /** @todo More specials. */
5177 };
5178
5179 X86FXSTATE State;
5180 RT_ZERO(State);
5181 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5182 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5183 {
5184 PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
5185
5186 PRTSTREAM pStrmOut = NULL;
5187 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R64[iFn].pszName);
5188 if (RT_FAILURE(rc))
5189 {
5190 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5191 return RTEXITCODE_FAILURE;
5192 }
5193
5194 uint32_t cNormalInputPairs = 0;
5195 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5196 {
5197 SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
5198
5199 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5200 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5201 TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5202
5203 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5204 && RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
5205 cNormalInputPairs++;
5206 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5207 {
5208 iTest -= 1;
5209 continue;
5210 }
5211
5212 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5213 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5214 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5215 for (uint8_t iFz = 0; iFz < 2; iFz++)
5216 {
5217 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5218 | (iRounding << X86_MXCSR_RC_SHIFT)
5219 | (iDaz ? X86_MXCSR_DAZ : 0)
5220 | (iFz ? X86_MXCSR_FZ : 0)
5221 | X86_MXCSR_XCPT_MASK;
5222 IEMSSERESULT ResM; RT_ZERO(ResM);
5223 pfn(&State, &ResM, &TestData.InVal1, &TestData.r64Val2);
5224 TestData.fMxcsrIn = State.MXCSR;
5225 TestData.fMxcsrOut = ResM.MXCSR;
5226 TestData.OutVal = ResM.uResult;
5227 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5228
5229 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5230 IEMSSERESULT ResU; RT_ZERO(ResU);
5231 pfn(&State, &ResU, &TestData.InVal1, &TestData.r64Val2);
5232 TestData.fMxcsrIn = State.MXCSR;
5233 TestData.fMxcsrOut = ResU.MXCSR;
5234 TestData.OutVal = ResU.uResult;
5235 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5236
5237 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5238 if (fXcpt)
5239 {
5240 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5241 IEMSSERESULT Res1; RT_ZERO(Res1);
5242 pfn(&State, &Res1, &TestData.InVal1, &TestData.r64Val2);
5243 TestData.fMxcsrIn = State.MXCSR;
5244 TestData.fMxcsrOut = Res1.MXCSR;
5245 TestData.OutVal = Res1.uResult;
5246 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5247
5248 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5249 {
5250 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5251 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5252 IEMSSERESULT Res2; RT_ZERO(Res2);
5253 pfn(&State, &Res2, &TestData.InVal1, &TestData.r64Val2);
5254 TestData.fMxcsrIn = State.MXCSR;
5255 TestData.fMxcsrOut = Res2.MXCSR;
5256 TestData.OutVal = Res2.uResult;
5257 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5258 }
5259 if (!RT_IS_POWER_OF_TWO(fXcpt))
5260 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5261 if (fUnmasked & fXcpt)
5262 {
5263 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5264 IEMSSERESULT Res3; RT_ZERO(Res3);
5265 pfn(&State, &Res3, &TestData.InVal1, &TestData.r64Val2);
5266 TestData.fMxcsrIn = State.MXCSR;
5267 TestData.fMxcsrOut = Res3.MXCSR;
5268 TestData.OutVal = Res3.uResult;
5269 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5270 }
5271 }
5272 }
5273 }
5274 rc = RTStrmClose(pStrmOut);
5275 if (RT_FAILURE(rc))
5276 {
5277 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5278 return RTEXITCODE_FAILURE;
5279 }
5280 }
5281
5282 return RTEXITCODE_SUCCESS;
5283}
5284#endif
5285
5286
5287static void SseBinaryU128R64Test(void)
5288{
5289 X86FXSTATE State;
5290 RT_ZERO(State);
5291 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5292 {
5293 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R64[iFn].pszName))
5294 continue;
5295
5296 uint32_t const cTests = *g_aSseBinaryU128R64[iFn].pcTests;
5297 SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
5298 PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
5299 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
5300 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5301 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5302 {
5303 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_U128_R64_TEST_T); iTest++)
5304 {
5305 IEMSSERESULT Res; RT_ZERO(Res);
5306
5307 State.MXCSR = paTests[iTest].fMxcsrIn;
5308 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
5309 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5310 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5311 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5312 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
5313 "%s -> mxcsr=%#08x %s'%s\n"
5314 "%s expected %#08x %s'%s%s%s (%s)\n",
5315 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5316 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5317 FormatR64(&paTests[iTest].r64Val2),
5318 iVar ? " " : "", Res.MXCSR,
5319 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5320 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5321 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5322 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5323 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5324 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5325 ? " - val" : "",
5326 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5327 }
5328 }
5329 }
5330}
5331
5332
5333/*
5334 * SSE operations converting single double-precision floating point values to signed double-word integers (cvttsd2si and friends).
5335 */
5336TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R64_T, SSE_BINARY_I32_R64_TEST_T, PFNIEMAIMPLSSEF2I32U64);
5337
5338static const SSE_BINARY_I32_R64_T g_aSseBinaryI32R64[] =
5339{
5340 ENTRY_BIN(cvttsd2si_i32_r64),
5341 ENTRY_BIN(cvtsd2si_i32_r64),
5342};
5343
5344#ifdef TSTIEMAIMPL_WITH_GENERATOR
5345static RTEXITCODE SseBinaryI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5346{
5347 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5348
5349 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
5350 {
5351 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5352 /** @todo More specials. */
5353 };
5354
5355 X86FXSTATE State;
5356 RT_ZERO(State);
5357 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5358 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5359 {
5360 PFNIEMAIMPLSSEF2I32U64 const pfn = g_aSseBinaryI32R64[iFn].pfnNative ? g_aSseBinaryI32R64[iFn].pfnNative : g_aSseBinaryI32R64[iFn].pfn;
5361
5362 PRTSTREAM pStrmOut = NULL;
5363 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI32R64[iFn].pszName);
5364 if (RT_FAILURE(rc))
5365 {
5366 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI32R64[iFn].pszName, rc);
5367 return RTEXITCODE_FAILURE;
5368 }
5369
5370 uint32_t cNormalInputPairs = 0;
5371 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5372 {
5373 SSE_BINARY_I32_R64_TEST_T TestData; RT_ZERO(TestData);
5374
5375 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
5376
5377 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
5378 cNormalInputPairs++;
5379 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5380 {
5381 iTest -= 1;
5382 continue;
5383 }
5384
5385 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5386 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5387 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5388 for (uint8_t iFz = 0; iFz < 2; iFz++)
5389 {
5390 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5391 | (iRounding << X86_MXCSR_RC_SHIFT)
5392 | (iDaz ? X86_MXCSR_DAZ : 0)
5393 | (iFz ? X86_MXCSR_FZ : 0)
5394 | X86_MXCSR_XCPT_MASK;
5395 uint32_t fMxcsrM; int32_t i32OutM;
5396 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r64ValIn.u);
5397 TestData.fMxcsrIn = State.MXCSR;
5398 TestData.fMxcsrOut = fMxcsrM;
5399 TestData.i32ValOut = i32OutM;
5400 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5401
5402 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5403 uint32_t fMxcsrU; int32_t i32OutU;
5404 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r64ValIn.u);
5405 TestData.fMxcsrIn = State.MXCSR;
5406 TestData.fMxcsrOut = fMxcsrU;
5407 TestData.i32ValOut = i32OutU;
5408 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5409
5410 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5411 if (fXcpt)
5412 {
5413 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5414 uint32_t fMxcsr1; int32_t i32Out1;
5415 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r64ValIn.u);
5416 TestData.fMxcsrIn = State.MXCSR;
5417 TestData.fMxcsrOut = fMxcsr1;
5418 TestData.i32ValOut = i32Out1;
5419 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5420
5421 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5422 {
5423 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5424 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5425 uint32_t fMxcsr2; int32_t i32Out2;
5426 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r64ValIn.u);
5427 TestData.fMxcsrIn = State.MXCSR;
5428 TestData.fMxcsrOut = fMxcsr2;
5429 TestData.i32ValOut = i32Out2;
5430 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5431 }
5432 if (!RT_IS_POWER_OF_TWO(fXcpt))
5433 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5434 if (fUnmasked & fXcpt)
5435 {
5436 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5437 uint32_t fMxcsr3; int32_t i32Out3;
5438 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r64ValIn.u);
5439 TestData.fMxcsrIn = State.MXCSR;
5440 TestData.fMxcsrOut = fMxcsr3;
5441 TestData.i32ValOut = i32Out3;
5442 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5443 }
5444 }
5445 }
5446 }
5447 rc = RTStrmClose(pStrmOut);
5448 if (RT_FAILURE(rc))
5449 {
5450 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI32R64[iFn].pszName, rc);
5451 return RTEXITCODE_FAILURE;
5452 }
5453 }
5454
5455 return RTEXITCODE_SUCCESS;
5456}
5457#endif
5458
5459
5460static void SseBinaryI32R64Test(void)
5461{
5462 X86FXSTATE State;
5463 RT_ZERO(State);
5464 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5465 {
5466 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI32R64[iFn].pszName))
5467 continue;
5468
5469 uint32_t const cTests = *g_aSseBinaryI32R64[iFn].pcTests;
5470 SSE_BINARY_I32_R64_TEST_T const * const paTests = g_aSseBinaryI32R64[iFn].paTests;
5471 PFNIEMAIMPLSSEF2I32U64 pfn = g_aSseBinaryI32R64[iFn].pfn;
5472 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
5473 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5474 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5475 {
5476 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I32_R64_TEST_T); iTest++)
5477 {
5478 uint32_t fMxcsr = 0;
5479 int32_t i32Dst = 0;
5480
5481 State.MXCSR = paTests[iTest].fMxcsrIn;
5482 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r64ValIn.u);
5483 if ( fMxcsr != paTests[iTest].fMxcsrOut
5484 || i32Dst != paTests[iTest].i32ValOut)
5485 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5486 "%s -> mxcsr=%#08x %RI32\n"
5487 "%s expected %#08x %RI32%s%s (%s)\n",
5488 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5489 FormatR64(&paTests[iTest].r64ValIn),
5490 iVar ? " " : "", fMxcsr, i32Dst,
5491 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
5492 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5493 i32Dst != paTests[iTest].i32ValOut
5494 ? " - val" : "",
5495 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5496 }
5497 }
5498 }
5499}
5500
5501
5502/*
5503 * SSE operations converting single double-precision floating point values to signed quad-word integers (cvttsd2si and friends).
5504 */
5505TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R64_T, SSE_BINARY_I64_R64_TEST_T, PFNIEMAIMPLSSEF2I64U64);
5506
5507static const SSE_BINARY_I64_R64_T g_aSseBinaryI64R64[] =
5508{
5509 ENTRY_BIN(cvttsd2si_i64_r64),
5510 ENTRY_BIN(cvtsd2si_i64_r64),
5511};
5512
5513#ifdef TSTIEMAIMPL_WITH_GENERATOR
5514static RTEXITCODE SseBinaryI64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5515{
5516 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5517
5518 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
5519 {
5520 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5521 /** @todo More specials. */
5522 };
5523
5524 X86FXSTATE State;
5525 RT_ZERO(State);
5526 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5527 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
5528 {
5529 PFNIEMAIMPLSSEF2I64U64 const pfn = g_aSseBinaryI64R64[iFn].pfnNative ? g_aSseBinaryI64R64[iFn].pfnNative : g_aSseBinaryI64R64[iFn].pfn;
5530
5531 PRTSTREAM pStrmOut = NULL;
5532 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI64R64[iFn].pszName);
5533 if (RT_FAILURE(rc))
5534 {
5535 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI64R64[iFn].pszName, rc);
5536 return RTEXITCODE_FAILURE;
5537 }
5538
5539 uint32_t cNormalInputPairs = 0;
5540 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5541 {
5542 SSE_BINARY_I64_R64_TEST_T TestData; RT_ZERO(TestData);
5543
5544 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
5545
5546 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
5547 cNormalInputPairs++;
5548 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5549 {
5550 iTest -= 1;
5551 continue;
5552 }
5553
5554 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5555 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5556 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5557 for (uint8_t iFz = 0; iFz < 2; iFz++)
5558 {
5559 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5560 | (iRounding << X86_MXCSR_RC_SHIFT)
5561 | (iDaz ? X86_MXCSR_DAZ : 0)
5562 | (iFz ? X86_MXCSR_FZ : 0)
5563 | X86_MXCSR_XCPT_MASK;
5564 uint32_t fMxcsrM; int64_t i64OutM;
5565 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r64ValIn.u);
5566 TestData.fMxcsrIn = State.MXCSR;
5567 TestData.fMxcsrOut = fMxcsrM;
5568 TestData.i64ValOut = i64OutM;
5569 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5570
5571 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5572 uint32_t fMxcsrU; int64_t i64OutU;
5573 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r64ValIn.u);
5574 TestData.fMxcsrIn = State.MXCSR;
5575 TestData.fMxcsrOut = fMxcsrU;
5576 TestData.i64ValOut = i64OutU;
5577 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5578
5579 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5580 if (fXcpt)
5581 {
5582 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5583 uint32_t fMxcsr1; int64_t i64Out1;
5584 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r64ValIn.u);
5585 TestData.fMxcsrIn = State.MXCSR;
5586 TestData.fMxcsrOut = fMxcsr1;
5587 TestData.i64ValOut = i64Out1;
5588 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5589
5590 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5591 {
5592 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5593 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5594 uint32_t fMxcsr2; int64_t i64Out2;
5595 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r64ValIn.u);
5596 TestData.fMxcsrIn = State.MXCSR;
5597 TestData.fMxcsrOut = fMxcsr2;
5598 TestData.i64ValOut = i64Out2;
5599 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5600 }
5601 if (!RT_IS_POWER_OF_TWO(fXcpt))
5602 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5603 if (fUnmasked & fXcpt)
5604 {
5605 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5606 uint32_t fMxcsr3; int64_t i64Out3;
5607 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r64ValIn.u);
5608 TestData.fMxcsrIn = State.MXCSR;
5609 TestData.fMxcsrOut = fMxcsr3;
5610 TestData.i64ValOut = i64Out3;
5611 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5612 }
5613 }
5614 }
5615 }
5616 rc = RTStrmClose(pStrmOut);
5617 if (RT_FAILURE(rc))
5618 {
5619 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI64R64[iFn].pszName, rc);
5620 return RTEXITCODE_FAILURE;
5621 }
5622 }
5623
5624 return RTEXITCODE_SUCCESS;
5625}
5626#endif
5627
5628
5629static void SseBinaryI64R64Test(void)
5630{
5631 X86FXSTATE State;
5632 RT_ZERO(State);
5633 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
5634 {
5635 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI64R64[iFn].pszName))
5636 continue;
5637
5638 uint32_t const cTests = *g_aSseBinaryI64R64[iFn].pcTests;
5639 SSE_BINARY_I64_R64_TEST_T const * const paTests = g_aSseBinaryI64R64[iFn].paTests;
5640 PFNIEMAIMPLSSEF2I64U64 pfn = g_aSseBinaryI64R64[iFn].pfn;
5641 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
5642 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5643 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5644 {
5645 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I64_R64_TEST_T); iTest++)
5646 {
5647 uint32_t fMxcsr = 0;
5648 int64_t i64Dst = 0;
5649
5650 State.MXCSR = paTests[iTest].fMxcsrIn;
5651 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r64ValIn.u);
5652 if ( fMxcsr != paTests[iTest].fMxcsrOut
5653 || i64Dst != paTests[iTest].i64ValOut)
5654 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5655 "%s -> mxcsr=%#08x %RI64\n"
5656 "%s expected %#08x %RI64%s%s (%s)\n",
5657 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5658 FormatR64(&paTests[iTest].r64ValIn),
5659 iVar ? " " : "", fMxcsr, i64Dst,
5660 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
5661 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5662 i64Dst != paTests[iTest].i64ValOut
5663 ? " - val" : "",
5664 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5665 }
5666 }
5667 }
5668}
5669
5670
5671/*
5672 * SSE operations converting single single-precision floating point values to signed double-word integers (cvttss2si and friends).
5673 */
5674TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R32_T, SSE_BINARY_I32_R32_TEST_T, PFNIEMAIMPLSSEF2I32U32);
5675
5676static const SSE_BINARY_I32_R32_T g_aSseBinaryI32R32[] =
5677{
5678 ENTRY_BIN(cvttss2si_i32_r32),
5679 ENTRY_BIN(cvtss2si_i32_r32),
5680};
5681
5682#ifdef TSTIEMAIMPL_WITH_GENERATOR
5683static RTEXITCODE SseBinaryI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
5684{
5685 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5686
5687 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
5688 {
5689 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5690 /** @todo More specials. */
5691 };
5692
5693 X86FXSTATE State;
5694 RT_ZERO(State);
5695 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5696 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
5697 {
5698 PFNIEMAIMPLSSEF2I32U32 const pfn = g_aSseBinaryI32R32[iFn].pfnNative ? g_aSseBinaryI32R32[iFn].pfnNative : g_aSseBinaryI32R32[iFn].pfn;
5699
5700 PRTSTREAM pStrmOut = NULL;
5701 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI32R32[iFn].pszName);
5702 if (RT_FAILURE(rc))
5703 {
5704 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI32R32[iFn].pszName, rc);
5705 return RTEXITCODE_FAILURE;
5706 }
5707
5708 uint32_t cNormalInputPairs = 0;
5709 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5710 {
5711 SSE_BINARY_I32_R32_TEST_T TestData; RT_ZERO(TestData);
5712
5713 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
5714
5715 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
5716 cNormalInputPairs++;
5717 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5718 {
5719 iTest -= 1;
5720 continue;
5721 }
5722
5723 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5724 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5725 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5726 for (uint8_t iFz = 0; iFz < 2; iFz++)
5727 {
5728 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5729 | (iRounding << X86_MXCSR_RC_SHIFT)
5730 | (iDaz ? X86_MXCSR_DAZ : 0)
5731 | (iFz ? X86_MXCSR_FZ : 0)
5732 | X86_MXCSR_XCPT_MASK;
5733 uint32_t fMxcsrM; int32_t i32OutM;
5734 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r32ValIn.u);
5735 TestData.fMxcsrIn = State.MXCSR;
5736 TestData.fMxcsrOut = fMxcsrM;
5737 TestData.i32ValOut = i32OutM;
5738 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5739
5740 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5741 uint32_t fMxcsrU; int32_t i32OutU;
5742 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r32ValIn.u);
5743 TestData.fMxcsrIn = State.MXCSR;
5744 TestData.fMxcsrOut = fMxcsrU;
5745 TestData.i32ValOut = i32OutU;
5746 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5747
5748 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5749 if (fXcpt)
5750 {
5751 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5752 uint32_t fMxcsr1; int32_t i32Out1;
5753 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r32ValIn.u);
5754 TestData.fMxcsrIn = State.MXCSR;
5755 TestData.fMxcsrOut = fMxcsr1;
5756 TestData.i32ValOut = i32Out1;
5757 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5758
5759 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5760 {
5761 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5762 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5763 uint32_t fMxcsr2; int32_t i32Out2;
5764 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r32ValIn.u);
5765 TestData.fMxcsrIn = State.MXCSR;
5766 TestData.fMxcsrOut = fMxcsr2;
5767 TestData.i32ValOut = i32Out2;
5768 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5769 }
5770 if (!RT_IS_POWER_OF_TWO(fXcpt))
5771 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5772 if (fUnmasked & fXcpt)
5773 {
5774 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5775 uint32_t fMxcsr3; int32_t i32Out3;
5776 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r32ValIn.u);
5777 TestData.fMxcsrIn = State.MXCSR;
5778 TestData.fMxcsrOut = fMxcsr3;
5779 TestData.i32ValOut = i32Out3;
5780 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5781 }
5782 }
5783 }
5784 }
5785 rc = RTStrmClose(pStrmOut);
5786 if (RT_FAILURE(rc))
5787 {
5788 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI32R32[iFn].pszName, rc);
5789 return RTEXITCODE_FAILURE;
5790 }
5791 }
5792
5793 return RTEXITCODE_SUCCESS;
5794}
5795#endif
5796
5797
5798static void SseBinaryI32R32Test(void)
5799{
5800 X86FXSTATE State;
5801 RT_ZERO(State);
5802 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
5803 {
5804 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI32R32[iFn].pszName))
5805 continue;
5806
5807 uint32_t const cTests = *g_aSseBinaryI32R32[iFn].pcTests;
5808 SSE_BINARY_I32_R32_TEST_T const * const paTests = g_aSseBinaryI32R32[iFn].paTests;
5809 PFNIEMAIMPLSSEF2I32U32 pfn = g_aSseBinaryI32R32[iFn].pfn;
5810 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R32[iFn]);
5811 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5812 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5813 {
5814 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I32_R32_TEST_T); iTest++)
5815 {
5816 uint32_t fMxcsr = 0;
5817 int32_t i32Dst = 0;
5818
5819 State.MXCSR = paTests[iTest].fMxcsrIn;
5820 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r32ValIn.u);
5821 if ( fMxcsr != paTests[iTest].fMxcsrOut
5822 || i32Dst != paTests[iTest].i32ValOut)
5823 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5824 "%s -> mxcsr=%#08x %RI32\n"
5825 "%s expected %#08x %RI32%s%s (%s)\n",
5826 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5827 FormatR32(&paTests[iTest].r32ValIn),
5828 iVar ? " " : "", fMxcsr, i32Dst,
5829 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
5830 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5831 i32Dst != paTests[iTest].i32ValOut
5832 ? " - val" : "",
5833 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5834 }
5835 }
5836 }
5837}
5838
5839
5840/*
5841 * SSE operations converting single single-precision floating point values to signed quad-word integers (cvttss2si and friends).
5842 */
5843TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R32_T, SSE_BINARY_I64_R32_TEST_T, PFNIEMAIMPLSSEF2I64U32);
5844
5845static const SSE_BINARY_I64_R32_T g_aSseBinaryI64R32[] =
5846{
5847 ENTRY_BIN(cvttss2si_i64_r32),
5848 ENTRY_BIN(cvtss2si_i64_r32),
5849};
5850
5851#ifdef TSTIEMAIMPL_WITH_GENERATOR
5852static RTEXITCODE SseBinaryI64R32Generate(const char *pszDataFileFmt, uint32_t cTests)
5853{
5854 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5855
5856 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
5857 {
5858 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5859 /** @todo More specials. */
5860 };
5861
5862 X86FXSTATE State;
5863 RT_ZERO(State);
5864 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5865 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
5866 {
5867 PFNIEMAIMPLSSEF2I64U32 const pfn = g_aSseBinaryI64R32[iFn].pfnNative ? g_aSseBinaryI64R32[iFn].pfnNative : g_aSseBinaryI64R32[iFn].pfn;
5868
5869 PRTSTREAM pStrmOut = NULL;
5870 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI64R32[iFn].pszName);
5871 if (RT_FAILURE(rc))
5872 {
5873 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI64R32[iFn].pszName, rc);
5874 return RTEXITCODE_FAILURE;
5875 }
5876
5877 uint32_t cNormalInputPairs = 0;
5878 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5879 {
5880 SSE_BINARY_I64_R32_TEST_T TestData; RT_ZERO(TestData);
5881
5882 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
5883
5884 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
5885 cNormalInputPairs++;
5886 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5887 {
5888 iTest -= 1;
5889 continue;
5890 }
5891
5892 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5893 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5894 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5895 for (uint8_t iFz = 0; iFz < 2; iFz++)
5896 {
5897 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5898 | (iRounding << X86_MXCSR_RC_SHIFT)
5899 | (iDaz ? X86_MXCSR_DAZ : 0)
5900 | (iFz ? X86_MXCSR_FZ : 0)
5901 | X86_MXCSR_XCPT_MASK;
5902 uint32_t fMxcsrM; int64_t i64OutM;
5903 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r32ValIn.u);
5904 TestData.fMxcsrIn = State.MXCSR;
5905 TestData.fMxcsrOut = fMxcsrM;
5906 TestData.i64ValOut = i64OutM;
5907 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5908
5909 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5910 uint32_t fMxcsrU; int64_t i64OutU;
5911 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r32ValIn.u);
5912 TestData.fMxcsrIn = State.MXCSR;
5913 TestData.fMxcsrOut = fMxcsrU;
5914 TestData.i64ValOut = i64OutU;
5915 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5916
5917 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5918 if (fXcpt)
5919 {
5920 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5921 uint32_t fMxcsr1; int64_t i64Out1;
5922 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r32ValIn.u);
5923 TestData.fMxcsrIn = State.MXCSR;
5924 TestData.fMxcsrOut = fMxcsr1;
5925 TestData.i64ValOut = i64Out1;
5926 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5927
5928 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5929 {
5930 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5931 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5932 uint32_t fMxcsr2; int64_t i64Out2;
5933 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r32ValIn.u);
5934 TestData.fMxcsrIn = State.MXCSR;
5935 TestData.fMxcsrOut = fMxcsr2;
5936 TestData.i64ValOut = i64Out2;
5937 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5938 }
5939 if (!RT_IS_POWER_OF_TWO(fXcpt))
5940 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5941 if (fUnmasked & fXcpt)
5942 {
5943 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5944 uint32_t fMxcsr3; int64_t i64Out3;
5945 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r32ValIn.u);
5946 TestData.fMxcsrIn = State.MXCSR;
5947 TestData.fMxcsrOut = fMxcsr3;
5948 TestData.i64ValOut = i64Out3;
5949 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5950 }
5951 }
5952 }
5953 }
5954 rc = RTStrmClose(pStrmOut);
5955 if (RT_FAILURE(rc))
5956 {
5957 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI64R32[iFn].pszName, rc);
5958 return RTEXITCODE_FAILURE;
5959 }
5960 }
5961
5962 return RTEXITCODE_SUCCESS;
5963}
5964#endif
5965
5966
5967static void SseBinaryI64R32Test(void)
5968{
5969 X86FXSTATE State;
5970 RT_ZERO(State);
5971 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
5972 {
5973 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI64R32[iFn].pszName))
5974 continue;
5975
5976 uint32_t const cTests = *g_aSseBinaryI64R32[iFn].pcTests;
5977 SSE_BINARY_I64_R32_TEST_T const * const paTests = g_aSseBinaryI64R32[iFn].paTests;
5978 PFNIEMAIMPLSSEF2I64U32 pfn = g_aSseBinaryI64R32[iFn].pfn;
5979 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI64R32[iFn]);
5980 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5981 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5982 {
5983 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I64_R32_TEST_T); iTest++)
5984 {
5985 uint32_t fMxcsr = 0;
5986 int64_t i64Dst = 0;
5987
5988 State.MXCSR = paTests[iTest].fMxcsrIn;
5989 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r32ValIn.u);
5990 if ( fMxcsr != paTests[iTest].fMxcsrOut
5991 || i64Dst != paTests[iTest].i64ValOut)
5992 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5993 "%s -> mxcsr=%#08x %RI64\n"
5994 "%s expected %#08x %RI64%s%s (%s)\n",
5995 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5996 FormatR32(&paTests[iTest].r32ValIn),
5997 iVar ? " " : "", fMxcsr, i64Dst,
5998 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
5999 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6000 i64Dst != paTests[iTest].i64ValOut
6001 ? " - val" : "",
6002 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6003 }
6004 }
6005 }
6006}
6007
6008
6009/*
6010 * SSE operations converting single signed double-word integers to double-precision floating point values (probably only cvtsi2sd).
6011 */
6012TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I32_T, SSE_BINARY_R64_I32_TEST_T, PFNIEMAIMPLSSEF2R64I32);
6013
6014static const SSE_BINARY_R64_I32_T g_aSseBinaryR64I32[] =
6015{
6016 ENTRY_BIN(cvtsi2sd_r64_i32)
6017};
6018
6019#ifdef TSTIEMAIMPL_WITH_GENERATOR
6020static RTEXITCODE SseBinaryR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
6021{
6022 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6023
6024 static int32_t const s_aSpecials[] =
6025 {
6026 INT32_MIN,
6027 INT32_MAX,
6028 /** @todo More specials. */
6029 };
6030
6031 X86FXSTATE State;
6032 RT_ZERO(State);
6033 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6034 {
6035 PFNIEMAIMPLSSEF2R64I32 const pfn = g_aSseBinaryR64I32[iFn].pfnNative ? g_aSseBinaryR64I32[iFn].pfnNative : g_aSseBinaryR64I32[iFn].pfn;
6036
6037 PRTSTREAM pStrmOut = NULL;
6038 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64I32[iFn].pszName);
6039 if (RT_FAILURE(rc))
6040 {
6041 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64I32[iFn].pszName, rc);
6042 return RTEXITCODE_FAILURE;
6043 }
6044
6045 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6046 {
6047 SSE_BINARY_R64_I32_TEST_T TestData; RT_ZERO(TestData);
6048
6049 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6050
6051 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6052 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6053 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6054 for (uint8_t iFz = 0; iFz < 2; iFz++)
6055 {
6056 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6057 | (iRounding << X86_MXCSR_RC_SHIFT)
6058 | (iDaz ? X86_MXCSR_DAZ : 0)
6059 | (iFz ? X86_MXCSR_FZ : 0)
6060 | X86_MXCSR_XCPT_MASK;
6061 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6062 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i32ValIn);
6063 TestData.fMxcsrIn = State.MXCSR;
6064 TestData.fMxcsrOut = fMxcsrM;
6065 TestData.r64ValOut = r64OutM;
6066 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6067
6068 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6069 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6070 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i32ValIn);
6071 TestData.fMxcsrIn = State.MXCSR;
6072 TestData.fMxcsrOut = fMxcsrU;
6073 TestData.r64ValOut = r64OutU;
6074 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6075
6076 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6077 if (fXcpt)
6078 {
6079 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6080 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6081 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i32ValIn);
6082 TestData.fMxcsrIn = State.MXCSR;
6083 TestData.fMxcsrOut = fMxcsr1;
6084 TestData.r64ValOut = r64Out1;
6085 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6086
6087 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6088 {
6089 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6090 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6091 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6092 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i32ValIn);
6093 TestData.fMxcsrIn = State.MXCSR;
6094 TestData.fMxcsrOut = fMxcsr2;
6095 TestData.r64ValOut = r64Out2;
6096 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6097 }
6098 if (!RT_IS_POWER_OF_TWO(fXcpt))
6099 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6100 if (fUnmasked & fXcpt)
6101 {
6102 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6103 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6104 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i32ValIn);
6105 TestData.fMxcsrIn = State.MXCSR;
6106 TestData.fMxcsrOut = fMxcsr3;
6107 TestData.r64ValOut = r64Out3;
6108 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6109 }
6110 }
6111 }
6112 }
6113 rc = RTStrmClose(pStrmOut);
6114 if (RT_FAILURE(rc))
6115 {
6116 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64I32[iFn].pszName, rc);
6117 return RTEXITCODE_FAILURE;
6118 }
6119 }
6120
6121 return RTEXITCODE_SUCCESS;
6122}
6123#endif
6124
6125
6126static void SseBinaryR64I32Test(void)
6127{
6128 X86FXSTATE State;
6129 RT_ZERO(State);
6130 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6131 {
6132 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64I32[iFn].pszName))
6133 continue;
6134
6135 uint32_t const cTests = *g_aSseBinaryR64I32[iFn].pcTests;
6136 SSE_BINARY_R64_I32_TEST_T const * const paTests = g_aSseBinaryR64I32[iFn].paTests;
6137 PFNIEMAIMPLSSEF2R64I32 pfn = g_aSseBinaryR64I32[iFn].pfn;
6138 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I32[iFn]);
6139 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6140 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6141 {
6142 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R64_I32_TEST_T); iTest++)
6143 {
6144 uint32_t fMxcsr = 0;
6145 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6146
6147 State.MXCSR = paTests[iTest].fMxcsrIn;
6148 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i32ValIn);
6149 if ( fMxcsr != paTests[iTest].fMxcsrOut
6150 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6151 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6152 "%s -> mxcsr=%#08x %s\n"
6153 "%s expected %#08x %s%s%s (%s)\n",
6154 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6155 &paTests[iTest].i32ValIn,
6156 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6157 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6158 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6159 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6160 ? " - val" : "",
6161 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6162 }
6163 }
6164 }
6165}
6166
6167
6168/*
6169 * SSE operations converting single signed quad-word integers to double-precision floating point values (probably only cvtsi2sd).
6170 */
6171TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I64_T, SSE_BINARY_R64_I64_TEST_T, PFNIEMAIMPLSSEF2R64I64);
6172
6173static const SSE_BINARY_R64_I64_T g_aSseBinaryR64I64[] =
6174{
6175 ENTRY_BIN(cvtsi2sd_r64_i64),
6176};
6177
6178#ifdef TSTIEMAIMPL_WITH_GENERATOR
6179static RTEXITCODE SseBinaryR64I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6180{
6181 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6182
6183 static int64_t const s_aSpecials[] =
6184 {
6185 INT64_MIN,
6186 INT64_MAX
6187 /** @todo More specials. */
6188 };
6189
6190 X86FXSTATE State;
6191 RT_ZERO(State);
6192 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6193 {
6194 PFNIEMAIMPLSSEF2R64I64 const pfn = g_aSseBinaryR64I64[iFn].pfnNative ? g_aSseBinaryR64I64[iFn].pfnNative : g_aSseBinaryR64I64[iFn].pfn;
6195
6196 PRTSTREAM pStrmOut = NULL;
6197 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64I64[iFn].pszName);
6198 if (RT_FAILURE(rc))
6199 {
6200 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64I64[iFn].pszName, rc);
6201 return RTEXITCODE_FAILURE;
6202 }
6203
6204 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6205 {
6206 SSE_BINARY_R64_I64_TEST_T TestData; RT_ZERO(TestData);
6207
6208 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6209
6210 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6211 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6212 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6213 for (uint8_t iFz = 0; iFz < 2; iFz++)
6214 {
6215 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6216 | (iRounding << X86_MXCSR_RC_SHIFT)
6217 | (iDaz ? X86_MXCSR_DAZ : 0)
6218 | (iFz ? X86_MXCSR_FZ : 0)
6219 | X86_MXCSR_XCPT_MASK;
6220 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6221 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i64ValIn);
6222 TestData.fMxcsrIn = State.MXCSR;
6223 TestData.fMxcsrOut = fMxcsrM;
6224 TestData.r64ValOut = r64OutM;
6225 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6226
6227 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6228 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6229 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i64ValIn);
6230 TestData.fMxcsrIn = State.MXCSR;
6231 TestData.fMxcsrOut = fMxcsrU;
6232 TestData.r64ValOut = r64OutU;
6233 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6234
6235 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6236 if (fXcpt)
6237 {
6238 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6239 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6240 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i64ValIn);
6241 TestData.fMxcsrIn = State.MXCSR;
6242 TestData.fMxcsrOut = fMxcsr1;
6243 TestData.r64ValOut = r64Out1;
6244 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6245
6246 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6247 {
6248 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6249 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6250 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6251 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i64ValIn);
6252 TestData.fMxcsrIn = State.MXCSR;
6253 TestData.fMxcsrOut = fMxcsr2;
6254 TestData.r64ValOut = r64Out2;
6255 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6256 }
6257 if (!RT_IS_POWER_OF_TWO(fXcpt))
6258 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6259 if (fUnmasked & fXcpt)
6260 {
6261 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6262 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6263 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i64ValIn);
6264 TestData.fMxcsrIn = State.MXCSR;
6265 TestData.fMxcsrOut = fMxcsr3;
6266 TestData.r64ValOut = r64Out3;
6267 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6268 }
6269 }
6270 }
6271 }
6272 rc = RTStrmClose(pStrmOut);
6273 if (RT_FAILURE(rc))
6274 {
6275 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64I64[iFn].pszName, rc);
6276 return RTEXITCODE_FAILURE;
6277 }
6278 }
6279
6280 return RTEXITCODE_SUCCESS;
6281}
6282#endif
6283
6284
6285static void SseBinaryR64I64Test(void)
6286{
6287 X86FXSTATE State;
6288 RT_ZERO(State);
6289 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6290 {
6291 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64I64[iFn].pszName))
6292 continue;
6293
6294 uint32_t const cTests = *g_aSseBinaryR64I64[iFn].pcTests;
6295 SSE_BINARY_R64_I64_TEST_T const * const paTests = g_aSseBinaryR64I64[iFn].paTests;
6296 PFNIEMAIMPLSSEF2R64I64 pfn = g_aSseBinaryR64I64[iFn].pfn;
6297 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I64[iFn]);
6298 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6299 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6300 {
6301 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R64_I64_TEST_T); iTest++)
6302 {
6303 uint32_t fMxcsr = 0;
6304 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6305
6306 State.MXCSR = paTests[iTest].fMxcsrIn;
6307 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i64ValIn);
6308 if ( fMxcsr != paTests[iTest].fMxcsrOut
6309 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6310 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6311 "%s -> mxcsr=%#08x %s\n"
6312 "%s expected %#08x %s%s%s (%s)\n",
6313 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6314 &paTests[iTest].i64ValIn,
6315 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6316 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6317 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6318 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6319 ? " - val" : "",
6320 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6321 }
6322 }
6323 }
6324}
6325
6326
6327/*
6328 * SSE operations converting single signed double-word integers to single-precision floating point values (probably only cvtsi2ss).
6329 */
6330TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I32_T, SSE_BINARY_R32_I32_TEST_T, PFNIEMAIMPLSSEF2R32I32);
6331
6332static const SSE_BINARY_R32_I32_T g_aSseBinaryR32I32[] =
6333{
6334 ENTRY_BIN(cvtsi2ss_r32_i32),
6335};
6336
6337#ifdef TSTIEMAIMPL_WITH_GENERATOR
6338static RTEXITCODE SseBinaryR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
6339{
6340 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6341
6342 static int32_t const s_aSpecials[] =
6343 {
6344 INT32_MIN,
6345 INT32_MAX,
6346 /** @todo More specials. */
6347 };
6348
6349 X86FXSTATE State;
6350 RT_ZERO(State);
6351 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6352 {
6353 PFNIEMAIMPLSSEF2R32I32 const pfn = g_aSseBinaryR32I32[iFn].pfnNative ? g_aSseBinaryR32I32[iFn].pfnNative : g_aSseBinaryR32I32[iFn].pfn;
6354
6355 PRTSTREAM pStrmOut = NULL;
6356 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32I32[iFn].pszName);
6357 if (RT_FAILURE(rc))
6358 {
6359 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32I32[iFn].pszName, rc);
6360 return RTEXITCODE_FAILURE;
6361 }
6362
6363 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6364 {
6365 SSE_BINARY_R32_I32_TEST_T TestData; RT_ZERO(TestData);
6366
6367 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6368
6369 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6370 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6371 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6372 for (uint8_t iFz = 0; iFz < 2; iFz++)
6373 {
6374 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6375 | (iRounding << X86_MXCSR_RC_SHIFT)
6376 | (iDaz ? X86_MXCSR_DAZ : 0)
6377 | (iFz ? X86_MXCSR_FZ : 0)
6378 | X86_MXCSR_XCPT_MASK;
6379 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6380 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i32ValIn);
6381 TestData.fMxcsrIn = State.MXCSR;
6382 TestData.fMxcsrOut = fMxcsrM;
6383 TestData.r32ValOut = r32OutM;
6384 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6385
6386 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6387 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6388 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i32ValIn);
6389 TestData.fMxcsrIn = State.MXCSR;
6390 TestData.fMxcsrOut = fMxcsrU;
6391 TestData.r32ValOut = r32OutU;
6392 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6393
6394 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6395 if (fXcpt)
6396 {
6397 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6398 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6399 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i32ValIn);
6400 TestData.fMxcsrIn = State.MXCSR;
6401 TestData.fMxcsrOut = fMxcsr1;
6402 TestData.r32ValOut = r32Out1;
6403 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6404
6405 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6406 {
6407 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6408 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6409 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6410 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i32ValIn);
6411 TestData.fMxcsrIn = State.MXCSR;
6412 TestData.fMxcsrOut = fMxcsr2;
6413 TestData.r32ValOut = r32Out2;
6414 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6415 }
6416 if (!RT_IS_POWER_OF_TWO(fXcpt))
6417 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6418 if (fUnmasked & fXcpt)
6419 {
6420 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6421 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
6422 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i32ValIn);
6423 TestData.fMxcsrIn = State.MXCSR;
6424 TestData.fMxcsrOut = fMxcsr3;
6425 TestData.r32ValOut = r32Out3;
6426 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6427 }
6428 }
6429 }
6430 }
6431 rc = RTStrmClose(pStrmOut);
6432 if (RT_FAILURE(rc))
6433 {
6434 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32I32[iFn].pszName, rc);
6435 return RTEXITCODE_FAILURE;
6436 }
6437 }
6438
6439 return RTEXITCODE_SUCCESS;
6440}
6441#endif
6442
6443
6444static void SseBinaryR32I32Test(void)
6445{
6446 X86FXSTATE State;
6447 RT_ZERO(State);
6448 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6449 {
6450 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32I32[iFn].pszName))
6451 continue;
6452
6453 uint32_t const cTests = *g_aSseBinaryR32I32[iFn].pcTests;
6454 SSE_BINARY_R32_I32_TEST_T const * const paTests = g_aSseBinaryR32I32[iFn].paTests;
6455 PFNIEMAIMPLSSEF2R32I32 pfn = g_aSseBinaryR32I32[iFn].pfn;
6456 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I32[iFn]);
6457 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6458 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6459 {
6460 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R32_I32_TEST_T); iTest++)
6461 {
6462 uint32_t fMxcsr = 0;
6463 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
6464
6465 State.MXCSR = paTests[iTest].fMxcsrIn;
6466 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i32ValIn);
6467 if ( fMxcsr != paTests[iTest].fMxcsrOut
6468 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
6469 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6470 "%s -> mxcsr=%#08x %RI32\n"
6471 "%s expected %#08x %RI32%s%s (%s)\n",
6472 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6473 &paTests[iTest].i32ValIn,
6474 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
6475 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
6476 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6477 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
6478 ? " - val" : "",
6479 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6480 }
6481 }
6482 }
6483}
6484
6485
6486/*
6487 * SSE operations converting single signed quad-word integers to single-precision floating point values (probably only cvtsi2ss).
6488 */
6489TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I64_T, SSE_BINARY_R32_I64_TEST_T, PFNIEMAIMPLSSEF2R32I64);
6490
6491static const SSE_BINARY_R32_I64_T g_aSseBinaryR32I64[] =
6492{
6493 ENTRY_BIN(cvtsi2ss_r32_i64),
6494};
6495
6496#ifdef TSTIEMAIMPL_WITH_GENERATOR
6497static RTEXITCODE SseBinaryR32I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6498{
6499 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6500
6501 static int64_t const s_aSpecials[] =
6502 {
6503 INT64_MIN,
6504 INT64_MAX
6505 /** @todo More specials. */
6506 };
6507
6508 X86FXSTATE State;
6509 RT_ZERO(State);
6510 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
6511 {
6512 PFNIEMAIMPLSSEF2R32I64 const pfn = g_aSseBinaryR32I64[iFn].pfnNative ? g_aSseBinaryR32I64[iFn].pfnNative : g_aSseBinaryR32I64[iFn].pfn;
6513
6514 PRTSTREAM pStrmOut = NULL;
6515 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32I64[iFn].pszName);
6516 if (RT_FAILURE(rc))
6517 {
6518 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32I64[iFn].pszName, rc);
6519 return RTEXITCODE_FAILURE;
6520 }
6521
6522 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6523 {
6524 SSE_BINARY_R32_I64_TEST_T TestData; RT_ZERO(TestData);
6525
6526 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6527
6528 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6529 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6530 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6531 for (uint8_t iFz = 0; iFz < 2; iFz++)
6532 {
6533 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6534 | (iRounding << X86_MXCSR_RC_SHIFT)
6535 | (iDaz ? X86_MXCSR_DAZ : 0)
6536 | (iFz ? X86_MXCSR_FZ : 0)
6537 | X86_MXCSR_XCPT_MASK;
6538 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6539 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i64ValIn);
6540 TestData.fMxcsrIn = State.MXCSR;
6541 TestData.fMxcsrOut = fMxcsrM;
6542 TestData.r32ValOut = r32OutM;
6543 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6544
6545 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6546 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6547 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i64ValIn);
6548 TestData.fMxcsrIn = State.MXCSR;
6549 TestData.fMxcsrOut = fMxcsrU;
6550 TestData.r32ValOut = r32OutU;
6551 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6552
6553 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6554 if (fXcpt)
6555 {
6556 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6557 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6558 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i64ValIn);
6559 TestData.fMxcsrIn = State.MXCSR;
6560 TestData.fMxcsrOut = fMxcsr1;
6561 TestData.r32ValOut = r32Out1;
6562 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6563
6564 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6565 {
6566 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6567 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6568 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6569 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i64ValIn);
6570 TestData.fMxcsrIn = State.MXCSR;
6571 TestData.fMxcsrOut = fMxcsr2;
6572 TestData.r32ValOut = r32Out2;
6573 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6574 }
6575 if (!RT_IS_POWER_OF_TWO(fXcpt))
6576 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6577 if (fUnmasked & fXcpt)
6578 {
6579 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6580 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
6581 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i64ValIn);
6582 TestData.fMxcsrIn = State.MXCSR;
6583 TestData.fMxcsrOut = fMxcsr3;
6584 TestData.r32ValOut = r32Out3;
6585 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6586 }
6587 }
6588 }
6589 }
6590 rc = RTStrmClose(pStrmOut);
6591 if (RT_FAILURE(rc))
6592 {
6593 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32I64[iFn].pszName, rc);
6594 return RTEXITCODE_FAILURE;
6595 }
6596 }
6597
6598 return RTEXITCODE_SUCCESS;
6599}
6600#endif
6601
6602
6603static void SseBinaryR32I64Test(void)
6604{
6605 X86FXSTATE State;
6606 RT_ZERO(State);
6607 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
6608 {
6609 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32I64[iFn].pszName))
6610 continue;
6611
6612 uint32_t const cTests = *g_aSseBinaryR32I64[iFn].pcTests;
6613 SSE_BINARY_R32_I64_TEST_T const * const paTests = g_aSseBinaryR32I64[iFn].paTests;
6614 PFNIEMAIMPLSSEF2R32I64 pfn = g_aSseBinaryR32I64[iFn].pfn;
6615 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I64[iFn]);
6616 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6617 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6618 {
6619 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R32_I64_TEST_T); iTest++)
6620 {
6621 uint32_t fMxcsr = 0;
6622 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
6623
6624 State.MXCSR = paTests[iTest].fMxcsrIn;
6625 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i64ValIn);
6626 if ( fMxcsr != paTests[iTest].fMxcsrOut
6627 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
6628 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6629 "%s -> mxcsr=%#08x %RI32\n"
6630 "%s expected %#08x %RI32%s%s (%s)\n",
6631 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6632 &paTests[iTest].i64ValIn,
6633 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
6634 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
6635 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6636 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
6637 ? " - val" : "",
6638 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6639 }
6640 }
6641 }
6642}
6643
6644
6645/*
6646 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
6647 */
6648TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R32_R32_T, SSE_COMPARE_EFL_R32_R32_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
6649
6650static const SSE_COMPARE_EFL_R32_R32_T g_aSseCompareEflR32R32[] =
6651{
6652 ENTRY_BIN(ucomiss_u128),
6653 ENTRY_BIN(comiss_u128),
6654 ENTRY_BIN_AVX(vucomiss_u128),
6655 ENTRY_BIN_AVX(vcomiss_u128),
6656};
6657
6658#ifdef TSTIEMAIMPL_WITH_GENERATOR
6659static RTEXITCODE SseCompareEflR32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
6660{
6661 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6662
6663 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
6664 {
6665 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
6666 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
6667 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
6668 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
6669 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
6670 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
6671 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
6672 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
6673 /** @todo More specials. */
6674 };
6675
6676 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6677 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
6678 {
6679 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR32R32[iFn].pfnNative ? g_aSseCompareEflR32R32[iFn].pfnNative : g_aSseCompareEflR32R32[iFn].pfn;
6680
6681 PRTSTREAM pStrmOut = NULL;
6682 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareEflR32R32[iFn].pszName);
6683 if (RT_FAILURE(rc))
6684 {
6685 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareEflR32R32[iFn].pszName, rc);
6686 return RTEXITCODE_FAILURE;
6687 }
6688
6689 uint32_t cNormalInputPairs = 0;
6690 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6691 {
6692 SSE_COMPARE_EFL_R32_R32_TEST_T TestData; RT_ZERO(TestData);
6693 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6694 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6695
6696 TestData.r32ValIn1 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
6697 TestData.r32ValIn2 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
6698
6699 ValIn1.ar32[0] = TestData.r32ValIn1;
6700 ValIn2.ar32[0] = TestData.r32ValIn2;
6701
6702 if ( RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn1)
6703 && RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn2))
6704 cNormalInputPairs++;
6705 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6706 {
6707 iTest -= 1;
6708 continue;
6709 }
6710
6711 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6712 uint32_t const fEFlags = RandEFlags();
6713 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6714 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6715 for (uint8_t iFz = 0; iFz < 2; iFz++)
6716 {
6717 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6718 | (iRounding << X86_MXCSR_RC_SHIFT)
6719 | (iDaz ? X86_MXCSR_DAZ : 0)
6720 | (iFz ? X86_MXCSR_FZ : 0)
6721 | X86_MXCSR_XCPT_MASK;
6722 uint32_t fMxcsrM = fMxcsrIn;
6723 uint32_t fEFlagsM = fEFlags;
6724 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
6725 TestData.fMxcsrIn = fMxcsrIn;
6726 TestData.fMxcsrOut = fMxcsrM;
6727 TestData.fEflIn = fEFlags;
6728 TestData.fEflOut = fEFlagsM;
6729 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6730
6731 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
6732 uint32_t fMxcsrU = fMxcsrIn;
6733 uint32_t fEFlagsU = fEFlags;
6734 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
6735 TestData.fMxcsrIn = fMxcsrIn;
6736 TestData.fMxcsrOut = fMxcsrU;
6737 TestData.fEflIn = fEFlags;
6738 TestData.fEflOut = fEFlagsU;
6739 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6740
6741 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6742 if (fXcpt)
6743 {
6744 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6745 uint32_t fMxcsr1 = fMxcsrIn;
6746 uint32_t fEFlags1 = fEFlags;
6747 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
6748 TestData.fMxcsrIn = fMxcsrIn;
6749 TestData.fMxcsrOut = fMxcsr1;
6750 TestData.fEflIn = fEFlags;
6751 TestData.fEflOut = fEFlags1;
6752 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6753
6754 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6755 {
6756 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6757 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6758 uint32_t fMxcsr2 = fMxcsrIn;
6759 uint32_t fEFlags2 = fEFlags;
6760 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
6761 TestData.fMxcsrIn = fMxcsrIn;
6762 TestData.fMxcsrOut = fMxcsr2;
6763 TestData.fEflIn = fEFlags;
6764 TestData.fEflOut = fEFlags2;
6765 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6766 }
6767 if (!RT_IS_POWER_OF_TWO(fXcpt))
6768 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6769 if (fUnmasked & fXcpt)
6770 {
6771 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6772 uint32_t fMxcsr3 = fMxcsrIn;
6773 uint32_t fEFlags3 = fEFlags;
6774 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
6775 TestData.fMxcsrIn = fMxcsrIn;
6776 TestData.fMxcsrOut = fMxcsr3;
6777 TestData.fEflIn = fEFlags;
6778 TestData.fEflOut = fEFlags3;
6779 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6780 }
6781 }
6782 }
6783 }
6784 rc = RTStrmClose(pStrmOut);
6785 if (RT_FAILURE(rc))
6786 {
6787 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareEflR32R32[iFn].pszName, rc);
6788 return RTEXITCODE_FAILURE;
6789 }
6790 }
6791
6792 return RTEXITCODE_SUCCESS;
6793}
6794#endif
6795
6796static void SseCompareEflR32R32Test(void)
6797{
6798 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
6799 {
6800 if (!SubTestAndCheckIfEnabled(g_aSseCompareEflR32R32[iFn].pszName))
6801 continue;
6802
6803 uint32_t const cTests = *g_aSseCompareEflR32R32[iFn].pcTests;
6804 SSE_COMPARE_EFL_R32_R32_TEST_T const * const paTests = g_aSseCompareEflR32R32[iFn].paTests;
6805 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR32R32[iFn].pfn;
6806 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR32R32[iFn]);
6807 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6808 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6809 {
6810 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_EFL_R32_R32_TEST_T); iTest++)
6811 {
6812 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6813 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6814
6815 ValIn1.ar32[0] = paTests[iTest].r32ValIn1;
6816 ValIn2.ar32[0] = paTests[iTest].r32ValIn2;
6817 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
6818 uint32_t fEFlags = paTests[iTest].fEflIn;
6819 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
6820 if ( fMxcsr != paTests[iTest].fMxcsrOut
6821 || fEFlags != paTests[iTest].fEflOut)
6822 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
6823 "%s -> mxcsr=%#08x %#08x\n"
6824 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
6825 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
6826 FormatR32(&paTests[iTest].r32ValIn1), FormatR32(&paTests[iTest].r32ValIn2),
6827 iVar ? " " : "", fMxcsr, fEFlags,
6828 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
6829 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6830 FormatMxcsr(paTests[iTest].fMxcsrIn),
6831 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
6832 }
6833 }
6834 }
6835}
6836
6837
6838/*
6839 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
6840 */
6841TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R64_R64_T, SSE_COMPARE_EFL_R64_R64_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
6842
6843static const SSE_COMPARE_EFL_R64_R64_T g_aSseCompareEflR64R64[] =
6844{
6845 ENTRY_BIN(ucomisd_u128),
6846 ENTRY_BIN(comisd_u128),
6847 ENTRY_BIN_AVX(vucomisd_u128),
6848 ENTRY_BIN_AVX(vcomisd_u128)
6849};
6850
6851#ifdef TSTIEMAIMPL_WITH_GENERATOR
6852static RTEXITCODE SseCompareEflR64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
6853{
6854 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6855
6856 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
6857 {
6858 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
6859 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
6860 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
6861 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
6862 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
6863 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
6864 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
6865 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
6866 /** @todo More specials. */
6867 };
6868
6869 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6870 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
6871 {
6872 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR64R64[iFn].pfnNative ? g_aSseCompareEflR64R64[iFn].pfnNative : g_aSseCompareEflR64R64[iFn].pfn;
6873
6874 PRTSTREAM pStrmOut = NULL;
6875 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareEflR64R64[iFn].pszName);
6876 if (RT_FAILURE(rc))
6877 {
6878 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareEflR64R64[iFn].pszName, rc);
6879 return RTEXITCODE_FAILURE;
6880 }
6881
6882 uint32_t cNormalInputPairs = 0;
6883 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6884 {
6885 SSE_COMPARE_EFL_R64_R64_TEST_T TestData; RT_ZERO(TestData);
6886 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6887 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6888
6889 TestData.r64ValIn1 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
6890 TestData.r64ValIn2 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
6891
6892 ValIn1.ar64[0] = TestData.r64ValIn1;
6893 ValIn2.ar64[0] = TestData.r64ValIn2;
6894
6895 if ( RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn1)
6896 && RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn2))
6897 cNormalInputPairs++;
6898 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6899 {
6900 iTest -= 1;
6901 continue;
6902 }
6903
6904 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6905 uint32_t const fEFlags = RandEFlags();
6906 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6907 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6908 for (uint8_t iFz = 0; iFz < 2; iFz++)
6909 {
6910 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6911 | (iRounding << X86_MXCSR_RC_SHIFT)
6912 | (iDaz ? X86_MXCSR_DAZ : 0)
6913 | (iFz ? X86_MXCSR_FZ : 0)
6914 | X86_MXCSR_XCPT_MASK;
6915 uint32_t fMxcsrM = fMxcsrIn;
6916 uint32_t fEFlagsM = fEFlags;
6917 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
6918 TestData.fMxcsrIn = fMxcsrIn;
6919 TestData.fMxcsrOut = fMxcsrM;
6920 TestData.fEflIn = fEFlags;
6921 TestData.fEflOut = fEFlagsM;
6922 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6923
6924 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
6925 uint32_t fMxcsrU = fMxcsrIn;
6926 uint32_t fEFlagsU = fEFlags;
6927 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
6928 TestData.fMxcsrIn = fMxcsrIn;
6929 TestData.fMxcsrOut = fMxcsrU;
6930 TestData.fEflIn = fEFlags;
6931 TestData.fEflOut = fEFlagsU;
6932 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6933
6934 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6935 if (fXcpt)
6936 {
6937 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6938 uint32_t fMxcsr1 = fMxcsrIn;
6939 uint32_t fEFlags1 = fEFlags;
6940 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
6941 TestData.fMxcsrIn = fMxcsrIn;
6942 TestData.fMxcsrOut = fMxcsr1;
6943 TestData.fEflIn = fEFlags;
6944 TestData.fEflOut = fEFlags1;
6945 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6946
6947 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6948 {
6949 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6950 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6951 uint32_t fMxcsr2 = fMxcsrIn;
6952 uint32_t fEFlags2 = fEFlags;
6953 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
6954 TestData.fMxcsrIn = fMxcsrIn;
6955 TestData.fMxcsrOut = fMxcsr2;
6956 TestData.fEflIn = fEFlags;
6957 TestData.fEflOut = fEFlags2;
6958 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6959 }
6960 if (!RT_IS_POWER_OF_TWO(fXcpt))
6961 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6962 if (fUnmasked & fXcpt)
6963 {
6964 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6965 uint32_t fMxcsr3 = fMxcsrIn;
6966 uint32_t fEFlags3 = fEFlags;
6967 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
6968 TestData.fMxcsrIn = fMxcsrIn;
6969 TestData.fMxcsrOut = fMxcsr3;
6970 TestData.fEflIn = fEFlags;
6971 TestData.fEflOut = fEFlags3;
6972 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6973 }
6974 }
6975 }
6976 }
6977 rc = RTStrmClose(pStrmOut);
6978 if (RT_FAILURE(rc))
6979 {
6980 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareEflR64R64[iFn].pszName, rc);
6981 return RTEXITCODE_FAILURE;
6982 }
6983 }
6984
6985 return RTEXITCODE_SUCCESS;
6986}
6987#endif
6988
6989static void SseCompareEflR64R64Test(void)
6990{
6991 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
6992 {
6993 if (!SubTestAndCheckIfEnabled(g_aSseCompareEflR64R64[iFn].pszName))
6994 continue;
6995
6996 uint32_t const cTests = *g_aSseCompareEflR64R64[iFn].pcTests;
6997 SSE_COMPARE_EFL_R64_R64_TEST_T const * const paTests = g_aSseCompareEflR64R64[iFn].paTests;
6998 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR64R64[iFn].pfn;
6999 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR64R64[iFn]);
7000 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7001 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7002 {
7003 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_EFL_R64_R64_TEST_T); iTest++)
7004 {
7005 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7006 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7007
7008 ValIn1.ar64[0] = paTests[iTest].r64ValIn1;
7009 ValIn2.ar64[0] = paTests[iTest].r64ValIn2;
7010 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7011 uint32_t fEFlags = paTests[iTest].fEflIn;
7012 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
7013 if ( fMxcsr != paTests[iTest].fMxcsrOut
7014 || fEFlags != paTests[iTest].fEflOut)
7015 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7016 "%s -> mxcsr=%#08x %#08x\n"
7017 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7018 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7019 FormatR64(&paTests[iTest].r64ValIn1), FormatR64(&paTests[iTest].r64ValIn2),
7020 iVar ? " " : "", fMxcsr, fEFlags,
7021 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7022 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7023 FormatMxcsr(paTests[iTest].fMxcsrIn),
7024 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7025 }
7026 }
7027 }
7028}
7029
7030
7031/*
7032 * Compare SSE operations on packed and single single-precision floating point values - outputting a mask.
7033 */
7034/** Maximum immediate to try to keep the testdata size under control (at least a little bit)- */
7035#define SSE_COMPARE_F2_XMM_IMM8_MAX 0x1f
7036
7037TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_F2_XMM_IMM8_T, SSE_COMPARE_F2_XMM_IMM8_TEST_T, PFNIEMAIMPLMXCSRF2XMMIMM8);
7038
7039static const SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR32Imm8[] =
7040{
7041 ENTRY_BIN(cmpps_u128),
7042 ENTRY_BIN(cmpss_u128)
7043};
7044
7045#ifdef TSTIEMAIMPL_WITH_GENERATOR
7046static RTEXITCODE SseCompareF2XmmR32Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
7047{
7048 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7049
7050 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7051 {
7052 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7053 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7054 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7055 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7056 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7057 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7058 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7059 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7060 /** @todo More specials. */
7061 };
7062
7063 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7064 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7065 {
7066 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR32Imm8[iFn].pfnNative : g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7067
7068 PRTSTREAM pStrmOut = NULL;
7069 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareF2XmmR32Imm8[iFn].pszName);
7070 if (RT_FAILURE(rc))
7071 {
7072 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareF2XmmR32Imm8[iFn].pszName, rc);
7073 return RTEXITCODE_FAILURE;
7074 }
7075
7076 uint32_t cNormalInputPairs = 0;
7077 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7078 {
7079 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7080
7081 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7082 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7083 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7084 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7085
7086 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7087 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7088 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7089 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7090
7091 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
7092 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
7093 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
7094 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
7095 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
7096 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
7097 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
7098 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
7099 cNormalInputPairs++;
7100 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7101 {
7102 iTest -= 1;
7103 continue;
7104 }
7105
7106 IEMMEDIAF2XMMSRC Src;
7107 Src.uSrc1 = TestData.InVal1;
7108 Src.uSrc2 = TestData.InVal2;
7109 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7110 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7111 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7112 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7113 for (uint8_t iFz = 0; iFz < 2; iFz++)
7114 {
7115 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7116 | (iRounding << X86_MXCSR_RC_SHIFT)
7117 | (iDaz ? X86_MXCSR_DAZ : 0)
7118 | (iFz ? X86_MXCSR_FZ : 0)
7119 | X86_MXCSR_XCPT_MASK;
7120 uint32_t fMxcsrM = fMxcsrIn;
7121 X86XMMREG ResM;
7122 pfn(&fMxcsrM, &ResM, &Src, bImm);
7123 TestData.fMxcsrIn = fMxcsrIn;
7124 TestData.fMxcsrOut = fMxcsrM;
7125 TestData.bImm = bImm;
7126 TestData.OutVal = ResM;
7127 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7128
7129 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7130 uint32_t fMxcsrU = fMxcsrIn;
7131 X86XMMREG ResU;
7132 pfn(&fMxcsrU, &ResU, &Src, bImm);
7133 TestData.fMxcsrIn = fMxcsrIn;
7134 TestData.fMxcsrOut = fMxcsrU;
7135 TestData.bImm = bImm;
7136 TestData.OutVal = ResU;
7137 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7138
7139 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7140 if (fXcpt)
7141 {
7142 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7143 uint32_t fMxcsr1 = fMxcsrIn;
7144 X86XMMREG Res1;
7145 pfn(&fMxcsr1, &Res1, &Src, bImm);
7146 TestData.fMxcsrIn = fMxcsrIn;
7147 TestData.fMxcsrOut = fMxcsr1;
7148 TestData.bImm = bImm;
7149 TestData.OutVal = Res1;
7150 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7151
7152 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7153 {
7154 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7155 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7156 uint32_t fMxcsr2 = fMxcsrIn;
7157 X86XMMREG Res2;
7158 pfn(&fMxcsr2, &Res2, &Src, bImm);
7159 TestData.fMxcsrIn = fMxcsrIn;
7160 TestData.fMxcsrOut = fMxcsr2;
7161 TestData.bImm = bImm;
7162 TestData.OutVal = Res2;
7163 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7164 }
7165 if (!RT_IS_POWER_OF_TWO(fXcpt))
7166 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7167 if (fUnmasked & fXcpt)
7168 {
7169 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7170 uint32_t fMxcsr3 = fMxcsrIn;
7171 X86XMMREG Res3;
7172 pfn(&fMxcsr3, &Res3, &Src, bImm);
7173 TestData.fMxcsrIn = fMxcsrIn;
7174 TestData.fMxcsrOut = fMxcsr3;
7175 TestData.bImm = bImm;
7176 TestData.OutVal = Res3;
7177 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7178 }
7179 }
7180 }
7181 }
7182 rc = RTStrmClose(pStrmOut);
7183 if (RT_FAILURE(rc))
7184 {
7185 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareF2XmmR32Imm8[iFn].pszName, rc);
7186 return RTEXITCODE_FAILURE;
7187 }
7188 }
7189
7190 return RTEXITCODE_SUCCESS;
7191}
7192#endif
7193
7194static void SseCompareF2XmmR32Imm8Test(void)
7195{
7196 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7197 {
7198 if (!SubTestAndCheckIfEnabled(g_aSseCompareF2XmmR32Imm8[iFn].pszName))
7199 continue;
7200
7201 uint32_t const cTests = *g_aSseCompareF2XmmR32Imm8[iFn].pcTests;
7202 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR32Imm8[iFn].paTests;
7203 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7204 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR32Imm8[iFn]);
7205 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7206 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7207 {
7208 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_F2_XMM_IMM8_TEST_T); iTest++)
7209 {
7210 IEMMEDIAF2XMMSRC Src;
7211 X86XMMREG ValOut;
7212
7213 Src.uSrc1 = paTests[iTest].InVal1;
7214 Src.uSrc2 = paTests[iTest].InVal2;
7215 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7216 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7217 if ( fMxcsr != paTests[iTest].fMxcsrOut
7218 || ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7219 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7220 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7221 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7222 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s imm8=%x\n"
7223 "%s -> mxcsr=%#08x %RX32'%RX32'%RX32'%RX32\n"
7224 "%s expected %#08x %RX32'%RX32'%RX32'%RX32%s%s (%s)\n",
7225 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7226 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
7227 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
7228 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
7229 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
7230 paTests[iTest].bImm,
7231 iVar ? " " : "", fMxcsr, ValOut.au32[0], ValOut.au32[1], ValOut.au32[2], ValOut.au32[3],
7232 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7233 paTests[iTest].OutVal.au32[0], paTests[iTest].OutVal.au32[1],
7234 paTests[iTest].OutVal.au32[2], paTests[iTest].OutVal.au32[3],
7235 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7236 ( ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7237 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7238 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7239 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7240 ? " - val" : "",
7241 FormatMxcsr(paTests[iTest].fMxcsrIn));
7242 }
7243 }
7244 }
7245}
7246
7247
7248/*
7249 * Compare SSE operations on packed and single double-precision floating point values - outputting a mask.
7250 */
7251static const SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR64Imm8[] =
7252{
7253 ENTRY_BIN(cmppd_u128),
7254 ENTRY_BIN(cmpsd_u128)
7255};
7256
7257#ifdef TSTIEMAIMPL_WITH_GENERATOR
7258static RTEXITCODE SseCompareF2XmmR64Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
7259{
7260 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7261
7262 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7263 {
7264 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7265 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7266 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7267 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7268 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7269 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7270 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7271 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7272 /** @todo More specials. */
7273 };
7274
7275 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7276 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7277 {
7278 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR64Imm8[iFn].pfnNative : g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7279
7280 PRTSTREAM pStrmOut = NULL;
7281 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareF2XmmR64Imm8[iFn].pszName);
7282 if (RT_FAILURE(rc))
7283 {
7284 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareF2XmmR64Imm8[iFn].pszName, rc);
7285 return RTEXITCODE_FAILURE;
7286 }
7287
7288 uint32_t cNormalInputPairs = 0;
7289 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7290 {
7291 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7292
7293 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7294 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7295
7296 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7297 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7298
7299 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0])
7300 && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
7301 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0])
7302 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
7303 cNormalInputPairs++;
7304 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7305 {
7306 iTest -= 1;
7307 continue;
7308 }
7309
7310 IEMMEDIAF2XMMSRC Src;
7311 Src.uSrc1 = TestData.InVal1;
7312 Src.uSrc2 = TestData.InVal2;
7313 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7314 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7315 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7316 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7317 for (uint8_t iFz = 0; iFz < 2; iFz++)
7318 {
7319 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7320 | (iRounding << X86_MXCSR_RC_SHIFT)
7321 | (iDaz ? X86_MXCSR_DAZ : 0)
7322 | (iFz ? X86_MXCSR_FZ : 0)
7323 | X86_MXCSR_XCPT_MASK;
7324 uint32_t fMxcsrM = fMxcsrIn;
7325 X86XMMREG ResM;
7326 pfn(&fMxcsrM, &ResM, &Src, bImm);
7327 TestData.fMxcsrIn = fMxcsrIn;
7328 TestData.fMxcsrOut = fMxcsrM;
7329 TestData.bImm = bImm;
7330 TestData.OutVal = ResM;
7331 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7332
7333 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7334 uint32_t fMxcsrU = fMxcsrIn;
7335 X86XMMREG ResU;
7336 pfn(&fMxcsrU, &ResU, &Src, bImm);
7337 TestData.fMxcsrIn = fMxcsrIn;
7338 TestData.fMxcsrOut = fMxcsrU;
7339 TestData.bImm = bImm;
7340 TestData.OutVal = ResU;
7341 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7342
7343 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7344 if (fXcpt)
7345 {
7346 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7347 uint32_t fMxcsr1 = fMxcsrIn;
7348 X86XMMREG Res1;
7349 pfn(&fMxcsr1, &Res1, &Src, bImm);
7350 TestData.fMxcsrIn = fMxcsrIn;
7351 TestData.fMxcsrOut = fMxcsr1;
7352 TestData.bImm = bImm;
7353 TestData.OutVal = Res1;
7354 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7355
7356 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7357 {
7358 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7359 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7360 uint32_t fMxcsr2 = fMxcsrIn;
7361 X86XMMREG Res2;
7362 pfn(&fMxcsr2, &Res2, &Src, bImm);
7363 TestData.fMxcsrIn = fMxcsrIn;
7364 TestData.fMxcsrOut = fMxcsr2;
7365 TestData.bImm = bImm;
7366 TestData.OutVal = Res2;
7367 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7368 }
7369 if (!RT_IS_POWER_OF_TWO(fXcpt))
7370 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7371 if (fUnmasked & fXcpt)
7372 {
7373 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7374 uint32_t fMxcsr3 = fMxcsrIn;
7375 X86XMMREG Res3;
7376 pfn(&fMxcsr3, &Res3, &Src, bImm);
7377 TestData.fMxcsrIn = fMxcsrIn;
7378 TestData.fMxcsrOut = fMxcsr3;
7379 TestData.bImm = bImm;
7380 TestData.OutVal = Res3;
7381 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7382 }
7383 }
7384 }
7385 }
7386 rc = RTStrmClose(pStrmOut);
7387 if (RT_FAILURE(rc))
7388 {
7389 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareF2XmmR64Imm8[iFn].pszName, rc);
7390 return RTEXITCODE_FAILURE;
7391 }
7392 }
7393
7394 return RTEXITCODE_SUCCESS;
7395}
7396#endif
7397
7398static void SseCompareF2XmmR64Imm8Test(void)
7399{
7400 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7401 {
7402 if (!SubTestAndCheckIfEnabled(g_aSseCompareF2XmmR64Imm8[iFn].pszName))
7403 continue;
7404
7405 uint32_t const cTests = *g_aSseCompareF2XmmR64Imm8[iFn].pcTests;
7406 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR64Imm8[iFn].paTests;
7407 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7408 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR64Imm8[iFn]);
7409 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7410 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7411 {
7412 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_F2_XMM_IMM8_TEST_T); iTest++)
7413 {
7414 IEMMEDIAF2XMMSRC Src;
7415 X86XMMREG ValOut;
7416
7417 Src.uSrc1 = paTests[iTest].InVal1;
7418 Src.uSrc2 = paTests[iTest].InVal2;
7419 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7420 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7421 if ( fMxcsr != paTests[iTest].fMxcsrOut
7422 || ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7423 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7424 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s imm8=%x\n"
7425 "%s -> mxcsr=%#08x %RX64'%RX64\n"
7426 "%s expected %#08x %RX64'%RX64%s%s (%s)\n",
7427 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7428 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
7429 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
7430 paTests[iTest].bImm,
7431 iVar ? " " : "", fMxcsr, ValOut.au64[0], ValOut.au64[1],
7432 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7433 paTests[iTest].OutVal.au64[0], paTests[iTest].OutVal.au64[1],
7434 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7435 ( ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7436 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7437 ? " - val" : "",
7438 FormatMxcsr(paTests[iTest].fMxcsrIn));
7439 }
7440 }
7441 }
7442}
7443
7444
7445/*
7446 * Convert SSE operations converting signed double-words to single-precision floating point values.
7447 */
7448TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_T, SSE_CONVERT_XMM_TEST_T, PFNIEMAIMPLFPSSEF2U128);
7449
7450static const SSE_CONVERT_XMM_T g_aSseConvertXmmI32R32[] =
7451{
7452 ENTRY_BIN(cvtdq2ps_u128)
7453};
7454
7455#ifdef TSTIEMAIMPL_WITH_GENERATOR
7456static RTEXITCODE SseConvertXmmI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
7457{
7458 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7459
7460 static int32_t const s_aSpecials[] =
7461 {
7462 INT32_MIN,
7463 INT32_MIN / 2,
7464 0,
7465 INT32_MAX / 2,
7466 INT32_MAX,
7467 (int32_t)0x80000000
7468 /** @todo More specials. */
7469 };
7470
7471 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7472 {
7473 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R32[iFn].pfnNative ? g_aSseConvertXmmI32R32[iFn].pfnNative : g_aSseConvertXmmI32R32[iFn].pfn;
7474
7475 PRTSTREAM pStrmOut = NULL;
7476 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmI32R32[iFn].pszName);
7477 if (RT_FAILURE(rc))
7478 {
7479 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmI32R32[iFn].pszName, rc);
7480 return RTEXITCODE_FAILURE;
7481 }
7482
7483 X86FXSTATE State;
7484 RT_ZERO(State);
7485 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7486 {
7487 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7488
7489 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7490 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7491 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7492 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7493
7494 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7495 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7496 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7497 for (uint8_t iFz = 0; iFz < 2; iFz++)
7498 {
7499 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7500 | (iRounding << X86_MXCSR_RC_SHIFT)
7501 | (iDaz ? X86_MXCSR_DAZ : 0)
7502 | (iFz ? X86_MXCSR_FZ : 0)
7503 | X86_MXCSR_XCPT_MASK;
7504 IEMSSERESULT ResM; RT_ZERO(ResM);
7505 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7506 TestData.fMxcsrIn = State.MXCSR;
7507 TestData.fMxcsrOut = ResM.MXCSR;
7508 TestData.OutVal = ResM.uResult;
7509 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7510
7511 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7512 IEMSSERESULT ResU; RT_ZERO(ResU);
7513 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7514 TestData.fMxcsrIn = State.MXCSR;
7515 TestData.fMxcsrOut = ResU.MXCSR;
7516 TestData.OutVal = ResU.uResult;
7517 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7518
7519 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7520 if (fXcpt)
7521 {
7522 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7523 IEMSSERESULT Res1; RT_ZERO(Res1);
7524 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7525 TestData.fMxcsrIn = State.MXCSR;
7526 TestData.fMxcsrOut = Res1.MXCSR;
7527 TestData.OutVal = Res1.uResult;
7528 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7529
7530 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7531 {
7532 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7533 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7534 IEMSSERESULT Res2; RT_ZERO(Res2);
7535 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7536 TestData.fMxcsrIn = State.MXCSR;
7537 TestData.fMxcsrOut = Res2.MXCSR;
7538 TestData.OutVal = Res2.uResult;
7539 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7540 }
7541 if (!RT_IS_POWER_OF_TWO(fXcpt))
7542 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7543 if (fUnmasked & fXcpt)
7544 {
7545 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7546 IEMSSERESULT Res3; RT_ZERO(Res3);
7547 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7548 TestData.fMxcsrIn = State.MXCSR;
7549 TestData.fMxcsrOut = Res3.MXCSR;
7550 TestData.OutVal = Res3.uResult;
7551 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7552 }
7553 }
7554 }
7555 }
7556 rc = RTStrmClose(pStrmOut);
7557 if (RT_FAILURE(rc))
7558 {
7559 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmI32R32[iFn].pszName, rc);
7560 return RTEXITCODE_FAILURE;
7561 }
7562 }
7563
7564 return RTEXITCODE_SUCCESS;
7565}
7566#endif
7567
7568static void SseConvertXmmI32R32Test(void)
7569{
7570 X86FXSTATE State;
7571 RT_ZERO(State);
7572
7573 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7574 {
7575 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmI32R32[iFn].pszName))
7576 continue;
7577
7578 uint32_t const cTests = *g_aSseConvertXmmI32R32[iFn].pcTests;
7579 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R32[iFn].paTests;
7580 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R32[iFn].pfn;
7581 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R32[iFn]);
7582 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7583 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7584 {
7585 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7586 {
7587 IEMSSERESULT Res; RT_ZERO(Res);
7588
7589 State.MXCSR = paTests[iTest].fMxcsrIn;
7590 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7591 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7592 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7593 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7594 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7595 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7596 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
7597 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
7598 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
7599 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7600 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
7601 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
7602 iVar ? " " : "", Res.MXCSR,
7603 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
7604 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
7605 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7606 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
7607 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
7608 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7609 ( !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7610 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7611 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7612 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7613 ? " - val" : "",
7614 FormatMxcsr(paTests[iTest].fMxcsrIn));
7615 }
7616 }
7617 }
7618}
7619
7620
7621/*
7622 * Convert SSE operations converting signed double-words to single-precision floating point values.
7623 */
7624static const SSE_CONVERT_XMM_T g_aSseConvertXmmR32I32[] =
7625{
7626 ENTRY_BIN(cvtps2dq_u128),
7627 ENTRY_BIN(cvttps2dq_u128)
7628};
7629
7630#ifdef TSTIEMAIMPL_WITH_GENERATOR
7631static RTEXITCODE SseConvertXmmR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
7632{
7633 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7634
7635 static struct { RTFLOAT32U aVal1[4]; } const s_aSpecials[] =
7636 {
7637 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
7638 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
7639 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
7640 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
7641 /** @todo More specials. */
7642 };
7643
7644 X86FXSTATE State;
7645 RT_ZERO(State);
7646 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7647 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
7648 {
7649 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR32I32[iFn].pfnNative ? g_aSseConvertXmmR32I32[iFn].pfnNative : g_aSseConvertXmmR32I32[iFn].pfn;
7650
7651 PRTSTREAM pStrmOut = NULL;
7652 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR32I32[iFn].pszName);
7653 if (RT_FAILURE(rc))
7654 {
7655 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR32I32[iFn].pszName, rc);
7656 return RTEXITCODE_FAILURE;
7657 }
7658
7659 uint32_t cNormalInputPairs = 0;
7660 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7661 {
7662 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7663
7664 TestData.InVal.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
7665 TestData.InVal.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
7666 TestData.InVal.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
7667 TestData.InVal.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
7668
7669 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[0])
7670 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[1])
7671 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[2])
7672 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[3]))
7673 cNormalInputPairs++;
7674 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7675 {
7676 iTest -= 1;
7677 continue;
7678 }
7679
7680 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7681 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7682 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7683 for (uint8_t iFz = 0; iFz < 2; iFz++)
7684 {
7685 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7686 | (iRounding << X86_MXCSR_RC_SHIFT)
7687 | (iDaz ? X86_MXCSR_DAZ : 0)
7688 | (iFz ? X86_MXCSR_FZ : 0)
7689 | X86_MXCSR_XCPT_MASK;
7690 IEMSSERESULT ResM; RT_ZERO(ResM);
7691 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7692 TestData.fMxcsrIn = State.MXCSR;
7693 TestData.fMxcsrOut = ResM.MXCSR;
7694 TestData.OutVal = ResM.uResult;
7695 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7696
7697 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7698 IEMSSERESULT ResU; RT_ZERO(ResU);
7699 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7700 TestData.fMxcsrIn = State.MXCSR;
7701 TestData.fMxcsrOut = ResU.MXCSR;
7702 TestData.OutVal = ResU.uResult;
7703 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7704
7705 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7706 if (fXcpt)
7707 {
7708 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7709 IEMSSERESULT Res1; RT_ZERO(Res1);
7710 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7711 TestData.fMxcsrIn = State.MXCSR;
7712 TestData.fMxcsrOut = Res1.MXCSR;
7713 TestData.OutVal = Res1.uResult;
7714 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7715
7716 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7717 {
7718 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7719 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7720 IEMSSERESULT Res2; RT_ZERO(Res2);
7721 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7722 TestData.fMxcsrIn = State.MXCSR;
7723 TestData.fMxcsrOut = Res2.MXCSR;
7724 TestData.OutVal = Res2.uResult;
7725 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7726 }
7727 if (!RT_IS_POWER_OF_TWO(fXcpt))
7728 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7729 if (fUnmasked & fXcpt)
7730 {
7731 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7732 IEMSSERESULT Res3; RT_ZERO(Res3);
7733 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7734 TestData.fMxcsrIn = State.MXCSR;
7735 TestData.fMxcsrOut = Res3.MXCSR;
7736 TestData.OutVal = Res3.uResult;
7737 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7738 }
7739 }
7740 }
7741 }
7742 rc = RTStrmClose(pStrmOut);
7743 if (RT_FAILURE(rc))
7744 {
7745 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR32I32[iFn].pszName, rc);
7746 return RTEXITCODE_FAILURE;
7747 }
7748 }
7749
7750 return RTEXITCODE_SUCCESS;
7751}
7752#endif
7753
7754static void SseConvertXmmR32I32Test(void)
7755{
7756 X86FXSTATE State;
7757 RT_ZERO(State);
7758
7759 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
7760 {
7761 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR32I32[iFn].pszName))
7762 continue;
7763
7764 uint32_t const cTests = *g_aSseConvertXmmR32I32[iFn].pcTests;
7765 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR32I32[iFn].paTests;
7766 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR32I32[iFn].pfn;
7767 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32I32[iFn]);
7768 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7769 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7770 {
7771 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7772 {
7773 IEMSSERESULT Res; RT_ZERO(Res);
7774
7775 State.MXCSR = paTests[iTest].fMxcsrIn;
7776 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7777 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7778 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
7779 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
7780 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
7781 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
7782 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s \n"
7783 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
7784 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
7785 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7786 FormatR32(&paTests[iTest].InVal.ar32[0]), FormatR32(&paTests[iTest].InVal.ar32[1]),
7787 FormatR32(&paTests[iTest].InVal.ar32[2]), FormatR32(&paTests[iTest].InVal.ar32[3]),
7788 iVar ? " " : "", Res.MXCSR,
7789 Res.uResult.ai32[0], Res.uResult.ai32[1],
7790 Res.uResult.ai32[2], Res.uResult.ai32[3],
7791 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7792 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
7793 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
7794 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7795 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
7796 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
7797 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
7798 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
7799 ? " - val" : "",
7800 FormatMxcsr(paTests[iTest].fMxcsrIn));
7801 }
7802 }
7803 }
7804}
7805
7806
7807/*
7808 * Convert SSE operations converting signed double-words to double-precision floating point values.
7809 */
7810static const SSE_CONVERT_XMM_T g_aSseConvertXmmI32R64[] =
7811{
7812 ENTRY_BIN(cvtdq2pd_u128)
7813};
7814
7815#ifdef TSTIEMAIMPL_WITH_GENERATOR
7816static RTEXITCODE SseConvertXmmI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
7817{
7818 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7819
7820 static int32_t const s_aSpecials[] =
7821 {
7822 INT32_MIN,
7823 INT32_MIN / 2,
7824 0,
7825 INT32_MAX / 2,
7826 INT32_MAX,
7827 (int32_t)0x80000000
7828 /** @todo More specials. */
7829 };
7830
7831 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
7832 {
7833 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R64[iFn].pfnNative ? g_aSseConvertXmmI32R64[iFn].pfnNative : g_aSseConvertXmmI32R64[iFn].pfn;
7834
7835 PRTSTREAM pStrmOut = NULL;
7836 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmI32R64[iFn].pszName);
7837 if (RT_FAILURE(rc))
7838 {
7839 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmI32R64[iFn].pszName, rc);
7840 return RTEXITCODE_FAILURE;
7841 }
7842
7843 X86FXSTATE State;
7844 RT_ZERO(State);
7845 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7846 {
7847 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7848
7849 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7850 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7851 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7852 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7853
7854 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7855 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7856 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7857 for (uint8_t iFz = 0; iFz < 2; iFz++)
7858 {
7859 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7860 | (iRounding << X86_MXCSR_RC_SHIFT)
7861 | (iDaz ? X86_MXCSR_DAZ : 0)
7862 | (iFz ? X86_MXCSR_FZ : 0)
7863 | X86_MXCSR_XCPT_MASK;
7864 IEMSSERESULT ResM; RT_ZERO(ResM);
7865 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7866 TestData.fMxcsrIn = State.MXCSR;
7867 TestData.fMxcsrOut = ResM.MXCSR;
7868 TestData.OutVal = ResM.uResult;
7869 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7870
7871 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7872 IEMSSERESULT ResU; RT_ZERO(ResU);
7873 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7874 TestData.fMxcsrIn = State.MXCSR;
7875 TestData.fMxcsrOut = ResU.MXCSR;
7876 TestData.OutVal = ResU.uResult;
7877 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7878
7879 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7880 if (fXcpt)
7881 {
7882 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7883 IEMSSERESULT Res1; RT_ZERO(Res1);
7884 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7885 TestData.fMxcsrIn = State.MXCSR;
7886 TestData.fMxcsrOut = Res1.MXCSR;
7887 TestData.OutVal = Res1.uResult;
7888 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7889
7890 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7891 {
7892 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7893 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7894 IEMSSERESULT Res2; RT_ZERO(Res2);
7895 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7896 TestData.fMxcsrIn = State.MXCSR;
7897 TestData.fMxcsrOut = Res2.MXCSR;
7898 TestData.OutVal = Res2.uResult;
7899 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7900 }
7901 if (!RT_IS_POWER_OF_TWO(fXcpt))
7902 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7903 if (fUnmasked & fXcpt)
7904 {
7905 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7906 IEMSSERESULT Res3; RT_ZERO(Res3);
7907 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7908 TestData.fMxcsrIn = State.MXCSR;
7909 TestData.fMxcsrOut = Res3.MXCSR;
7910 TestData.OutVal = Res3.uResult;
7911 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7912 }
7913 }
7914 }
7915 }
7916 rc = RTStrmClose(pStrmOut);
7917 if (RT_FAILURE(rc))
7918 {
7919 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmI32R64[iFn].pszName, rc);
7920 return RTEXITCODE_FAILURE;
7921 }
7922 }
7923
7924 return RTEXITCODE_SUCCESS;
7925}
7926#endif
7927
7928static void SseConvertXmmI32R64Test(void)
7929{
7930 X86FXSTATE State;
7931 RT_ZERO(State);
7932
7933 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
7934 {
7935 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmI32R64[iFn].pszName))
7936 continue;
7937
7938 uint32_t const cTests = *g_aSseConvertXmmI32R64[iFn].pcTests;
7939 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R64[iFn].paTests;
7940 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R64[iFn].pfn;
7941 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R64[iFn]);
7942 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7943 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7944 {
7945 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7946 {
7947 IEMSSERESULT Res; RT_ZERO(Res);
7948
7949 State.MXCSR = paTests[iTest].fMxcsrIn;
7950 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7951 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7952 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
7953 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
7954 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
7955 "%s -> mxcsr=%#08x %s'%s\n"
7956 "%s expected %#08x %s'%s%s%s (%s)\n",
7957 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7958 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
7959 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
7960 iVar ? " " : "", Res.MXCSR,
7961 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
7962 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7963 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
7964 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7965 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
7966 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
7967 ? " - val" : "",
7968 FormatMxcsr(paTests[iTest].fMxcsrIn));
7969 }
7970 }
7971 }
7972}
7973
7974
7975/*
7976 * Convert SSE operations converting signed double-words to double-precision floating point values.
7977 */
7978static const SSE_CONVERT_XMM_T g_aSseConvertXmmR64I32[] =
7979{
7980 ENTRY_BIN(cvtpd2dq_u128),
7981 ENTRY_BIN(cvttpd2dq_u128)
7982};
7983
7984#ifdef TSTIEMAIMPL_WITH_GENERATOR
7985static RTEXITCODE SseConvertXmmR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
7986{
7987 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7988
7989 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
7990 {
7991 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
7992 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
7993 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
7994 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
7995 /** @todo More specials. */
7996 };
7997
7998 X86FXSTATE State;
7999 RT_ZERO(State);
8000 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8001 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8002 {
8003 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR64I32[iFn].pfnNative ? g_aSseConvertXmmR64I32[iFn].pfnNative : g_aSseConvertXmmR64I32[iFn].pfn;
8004
8005 PRTSTREAM pStrmOut = NULL;
8006 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR64I32[iFn].pszName);
8007 if (RT_FAILURE(rc))
8008 {
8009 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR64I32[iFn].pszName, rc);
8010 return RTEXITCODE_FAILURE;
8011 }
8012
8013 uint32_t cNormalInputPairs = 0;
8014 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8015 {
8016 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8017
8018 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8019 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8020
8021 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8022 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8023 cNormalInputPairs++;
8024 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8025 {
8026 iTest -= 1;
8027 continue;
8028 }
8029
8030 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8031 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8032 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8033 for (uint8_t iFz = 0; iFz < 2; iFz++)
8034 {
8035 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
8036 | (iRounding << X86_MXCSR_RC_SHIFT)
8037 | (iDaz ? X86_MXCSR_DAZ : 0)
8038 | (iFz ? X86_MXCSR_FZ : 0)
8039 | X86_MXCSR_XCPT_MASK;
8040 IEMSSERESULT ResM; RT_ZERO(ResM);
8041 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
8042 TestData.fMxcsrIn = State.MXCSR;
8043 TestData.fMxcsrOut = ResM.MXCSR;
8044 TestData.OutVal = ResM.uResult;
8045 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8046
8047 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
8048 IEMSSERESULT ResU; RT_ZERO(ResU);
8049 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
8050 TestData.fMxcsrIn = State.MXCSR;
8051 TestData.fMxcsrOut = ResU.MXCSR;
8052 TestData.OutVal = ResU.uResult;
8053 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8054
8055 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8056 if (fXcpt)
8057 {
8058 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8059 IEMSSERESULT Res1; RT_ZERO(Res1);
8060 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8061 TestData.fMxcsrIn = State.MXCSR;
8062 TestData.fMxcsrOut = Res1.MXCSR;
8063 TestData.OutVal = Res1.uResult;
8064 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8065
8066 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8067 {
8068 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8069 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8070 IEMSSERESULT Res2; RT_ZERO(Res2);
8071 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8072 TestData.fMxcsrIn = State.MXCSR;
8073 TestData.fMxcsrOut = Res2.MXCSR;
8074 TestData.OutVal = Res2.uResult;
8075 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8076 }
8077 if (!RT_IS_POWER_OF_TWO(fXcpt))
8078 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8079 if (fUnmasked & fXcpt)
8080 {
8081 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8082 IEMSSERESULT Res3; RT_ZERO(Res3);
8083 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8084 TestData.fMxcsrIn = State.MXCSR;
8085 TestData.fMxcsrOut = Res3.MXCSR;
8086 TestData.OutVal = Res3.uResult;
8087 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8088 }
8089 }
8090 }
8091 }
8092 rc = RTStrmClose(pStrmOut);
8093 if (RT_FAILURE(rc))
8094 {
8095 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR64I32[iFn].pszName, rc);
8096 return RTEXITCODE_FAILURE;
8097 }
8098 }
8099
8100 return RTEXITCODE_SUCCESS;
8101}
8102#endif
8103
8104static void SseConvertXmmR64I32Test(void)
8105{
8106 X86FXSTATE State;
8107 RT_ZERO(State);
8108
8109 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8110 {
8111 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR64I32[iFn].pszName))
8112 continue;
8113
8114 uint32_t const cTests = *g_aSseConvertXmmR64I32[iFn].pcTests;
8115 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR64I32[iFn].paTests;
8116 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR64I32[iFn].pfn;
8117 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64I32[iFn]);
8118 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8119 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8120 {
8121 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8122 {
8123 IEMSSERESULT Res; RT_ZERO(Res);
8124
8125 State.MXCSR = paTests[iTest].fMxcsrIn;
8126 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8127 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8128 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8129 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8130 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8131 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8132 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8133 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8134 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8135 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8136 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8137 iVar ? " " : "", Res.MXCSR,
8138 Res.uResult.ai32[0], Res.uResult.ai32[1],
8139 Res.uResult.ai32[2], Res.uResult.ai32[3],
8140 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8141 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8142 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8143 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8144 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8145 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8146 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8147 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8148 ? " - val" : "",
8149 FormatMxcsr(paTests[iTest].fMxcsrIn));
8150 }
8151 }
8152 }
8153}
8154
8155
8156/*
8157 * Convert SSE operations converting double-precision floating point values to signed double-word values.
8158 */
8159TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_XMM_T, SSE_CONVERT_MM_XMM_TEST_T, PFNIEMAIMPLMXCSRU64U128);
8160
8161static const SSE_CONVERT_MM_XMM_T g_aSseConvertMmXmm[] =
8162{
8163 ENTRY_BIN(cvtpd2pi_u128),
8164 ENTRY_BIN(cvttpd2pi_u128)
8165};
8166
8167#ifdef TSTIEMAIMPL_WITH_GENERATOR
8168static RTEXITCODE SseConvertMmXmmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8169{
8170 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8171
8172 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8173 {
8174 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8175 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8176 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8177 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8178 /** @todo More specials. */
8179 };
8180
8181 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8182 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8183 {
8184 PFNIEMAIMPLMXCSRU64U128 const pfn = g_aSseConvertMmXmm[iFn].pfnNative ? g_aSseConvertMmXmm[iFn].pfnNative : g_aSseConvertMmXmm[iFn].pfn;
8185
8186 PRTSTREAM pStrmOut = NULL;
8187 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertMmXmm[iFn].pszName);
8188 if (RT_FAILURE(rc))
8189 {
8190 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertMmXmm[iFn].pszName, rc);
8191 return RTEXITCODE_FAILURE;
8192 }
8193
8194 uint32_t cNormalInputPairs = 0;
8195 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8196 {
8197 SSE_CONVERT_MM_XMM_TEST_T TestData; RT_ZERO(TestData);
8198
8199 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8200 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8201
8202 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8203 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8204 cNormalInputPairs++;
8205 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8206 {
8207 iTest -= 1;
8208 continue;
8209 }
8210
8211 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8212 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8213 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8214 for (uint8_t iFz = 0; iFz < 2; iFz++)
8215 {
8216 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8217 | (iRounding << X86_MXCSR_RC_SHIFT)
8218 | (iDaz ? X86_MXCSR_DAZ : 0)
8219 | (iFz ? X86_MXCSR_FZ : 0)
8220 | X86_MXCSR_XCPT_MASK;
8221 uint32_t fMxcsrM = fMxcsrIn;
8222 uint64_t u64ResM;
8223 pfn(&fMxcsrM, &u64ResM, &TestData.InVal);
8224 TestData.fMxcsrIn = fMxcsrIn;
8225 TestData.fMxcsrOut = fMxcsrM;
8226 TestData.OutVal.u = u64ResM;
8227 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8228
8229 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8230 uint32_t fMxcsrU = fMxcsrIn;
8231 uint64_t u64ResU;
8232 pfn(&fMxcsrU, &u64ResU, &TestData.InVal);
8233 TestData.fMxcsrIn = fMxcsrIn;
8234 TestData.fMxcsrOut = fMxcsrU;
8235 TestData.OutVal.u = u64ResU;
8236 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8237
8238 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8239 if (fXcpt)
8240 {
8241 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8242 uint32_t fMxcsr1 = fMxcsrIn;
8243 uint64_t u64Res1;
8244 pfn(&fMxcsr1, &u64Res1, &TestData.InVal);
8245 TestData.fMxcsrIn = fMxcsrIn;
8246 TestData.fMxcsrOut = fMxcsr1;
8247 TestData.OutVal.u = u64Res1;
8248 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8249
8250 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8251 {
8252 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8253 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8254 uint32_t fMxcsr2 = fMxcsrIn;
8255 uint64_t u64Res2;
8256 pfn(&fMxcsr2, &u64Res2, &TestData.InVal);
8257 TestData.fMxcsrIn = fMxcsrIn;
8258 TestData.fMxcsrOut = fMxcsr2;
8259 TestData.OutVal.u = u64Res2;
8260 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8261 }
8262 if (!RT_IS_POWER_OF_TWO(fXcpt))
8263 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8264 if (fUnmasked & fXcpt)
8265 {
8266 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8267 uint32_t fMxcsr3 = fMxcsrIn;
8268 uint64_t u64Res3;
8269 pfn(&fMxcsr3, &u64Res3, &TestData.InVal);
8270 TestData.fMxcsrIn = fMxcsrIn;
8271 TestData.fMxcsrOut = fMxcsr3;
8272 TestData.OutVal.u = u64Res3;
8273 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8274 }
8275 }
8276 }
8277 }
8278 rc = RTStrmClose(pStrmOut);
8279 if (RT_FAILURE(rc))
8280 {
8281 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertMmXmm[iFn].pszName, rc);
8282 return RTEXITCODE_FAILURE;
8283 }
8284 }
8285
8286 return RTEXITCODE_SUCCESS;
8287}
8288#endif
8289
8290static void SseConvertMmXmmTest(void)
8291{
8292 X86FXSTATE State;
8293 RT_ZERO(State);
8294
8295 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8296 {
8297 if (!SubTestAndCheckIfEnabled(g_aSseConvertMmXmm[iFn].pszName))
8298 continue;
8299
8300 uint32_t const cTests = *g_aSseConvertMmXmm[iFn].pcTests;
8301 SSE_CONVERT_MM_XMM_TEST_T const * const paTests = g_aSseConvertMmXmm[iFn].paTests;
8302 PFNIEMAIMPLMXCSRU64U128 pfn = g_aSseConvertMmXmm[iFn].pfn;
8303 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmXmm[iFn]);
8304 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8305 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8306 {
8307 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8308 {
8309 RTUINT64U ValOut;
8310 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8311 pfn(&fMxcsr, &ValOut.u, &paTests[iTest].InVal);
8312 if ( fMxcsr != paTests[iTest].fMxcsrOut
8313 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8314 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8315 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s\n"
8316 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8317 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8318 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8319 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8320 iVar ? " " : "", fMxcsr, ValOut.ai32[0], ValOut.ai32[1],
8321 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8322 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8323 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8324 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8325 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8326 ? " - val" : "",
8327 FormatMxcsr(paTests[iTest].fMxcsrIn));
8328 }
8329 }
8330 }
8331}
8332
8333
8334/*
8335 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8336 */
8337TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R64_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8338
8339static const SSE_CONVERT_XMM_R64_MM_T g_aSseConvertXmmR64Mm[] =
8340{
8341 ENTRY_BIN(cvtpi2pd_u128)
8342};
8343
8344#ifdef TSTIEMAIMPL_WITH_GENERATOR
8345static RTEXITCODE SseConvertXmmR64MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8346{
8347 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8348
8349 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8350 {
8351 { { INT32_MIN, INT32_MIN } },
8352 { { INT32_MAX, INT32_MAX } }
8353 /** @todo More specials. */
8354 };
8355
8356 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8357 {
8358 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR64Mm[iFn].pfnNative ? g_aSseConvertXmmR64Mm[iFn].pfnNative : g_aSseConvertXmmR64Mm[iFn].pfn;
8359
8360 PRTSTREAM pStrmOut = NULL;
8361 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR64Mm[iFn].pszName);
8362 if (RT_FAILURE(rc))
8363 {
8364 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR64Mm[iFn].pszName, rc);
8365 return RTEXITCODE_FAILURE;
8366 }
8367
8368 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8369 {
8370 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8371
8372 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8373 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8374
8375 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8376 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8377 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8378 for (uint8_t iFz = 0; iFz < 2; iFz++)
8379 {
8380 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8381 | (iRounding << X86_MXCSR_RC_SHIFT)
8382 | (iDaz ? X86_MXCSR_DAZ : 0)
8383 | (iFz ? X86_MXCSR_FZ : 0)
8384 | X86_MXCSR_XCPT_MASK;
8385 uint32_t fMxcsrM = fMxcsrIn;
8386 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8387 TestData.fMxcsrIn = fMxcsrIn;
8388 TestData.fMxcsrOut = fMxcsrM;
8389 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8390
8391 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8392 uint32_t fMxcsrU = fMxcsrIn;
8393 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8394 TestData.fMxcsrIn = fMxcsrIn;
8395 TestData.fMxcsrOut = fMxcsrU;
8396 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8397
8398 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8399 if (fXcpt)
8400 {
8401 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8402 uint32_t fMxcsr1 = fMxcsrIn;
8403 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8404 TestData.fMxcsrIn = fMxcsrIn;
8405 TestData.fMxcsrOut = fMxcsr1;
8406 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8407
8408 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8409 {
8410 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8411 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8412 uint32_t fMxcsr2 = fMxcsrIn;
8413 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8414 TestData.fMxcsrIn = fMxcsrIn;
8415 TestData.fMxcsrOut = fMxcsr2;
8416 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8417 }
8418 if (!RT_IS_POWER_OF_TWO(fXcpt))
8419 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8420 if (fUnmasked & fXcpt)
8421 {
8422 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8423 uint32_t fMxcsr3 = fMxcsrIn;
8424 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8425 TestData.fMxcsrIn = fMxcsrIn;
8426 TestData.fMxcsrOut = fMxcsr3;
8427 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8428 }
8429 }
8430 }
8431 }
8432 rc = RTStrmClose(pStrmOut);
8433 if (RT_FAILURE(rc))
8434 {
8435 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR64Mm[iFn].pszName, rc);
8436 return RTEXITCODE_FAILURE;
8437 }
8438 }
8439
8440 return RTEXITCODE_SUCCESS;
8441}
8442#endif
8443
8444static void SseConvertXmmR64MmTest(void)
8445{
8446 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8447 {
8448 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR64Mm[iFn].pszName))
8449 continue;
8450
8451 uint32_t const cTests = *g_aSseConvertXmmR64Mm[iFn].pcTests;
8452 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR64Mm[iFn].paTests;
8453 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR64Mm[iFn].pfn;
8454 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64Mm[iFn]);
8455 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8456 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8457 {
8458 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8459 {
8460 X86XMMREG ValOut;
8461 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8462 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8463 if ( fMxcsr != paTests[iTest].fMxcsrOut
8464 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8465 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8466 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8467 "%s -> mxcsr=%#08x %s'%s\n"
8468 "%s expected %#08x %s'%s%s%s (%s)\n",
8469 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8470 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8471 iVar ? " " : "", fMxcsr,
8472 FormatR64(&ValOut.ar64[0]), FormatR64(&ValOut.ar64[1]),
8473 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8474 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8475 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8476 ( !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8477 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8478 ? " - val" : "",
8479 FormatMxcsr(paTests[iTest].fMxcsrIn));
8480 }
8481 }
8482 }
8483}
8484
8485
8486/*
8487 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8488 */
8489TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R32_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8490
8491static const SSE_CONVERT_XMM_R32_MM_T g_aSseConvertXmmR32Mm[] =
8492{
8493 ENTRY_BIN(cvtpi2ps_u128)
8494};
8495
8496#ifdef TSTIEMAIMPL_WITH_GENERATOR
8497static RTEXITCODE SseConvertXmmR32MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8498{
8499 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8500
8501 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8502 {
8503 { { INT32_MIN, INT32_MIN } },
8504 { { INT32_MAX, INT32_MAX } }
8505 /** @todo More specials. */
8506 };
8507
8508 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8509 {
8510 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR32Mm[iFn].pfnNative ? g_aSseConvertXmmR32Mm[iFn].pfnNative : g_aSseConvertXmmR32Mm[iFn].pfn;
8511
8512 PRTSTREAM pStrmOut = NULL;
8513 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR32Mm[iFn].pszName);
8514 if (RT_FAILURE(rc))
8515 {
8516 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR32Mm[iFn].pszName, rc);
8517 return RTEXITCODE_FAILURE;
8518 }
8519
8520 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8521 {
8522 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8523
8524 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8525 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8526
8527 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8528 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8529 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8530 for (uint8_t iFz = 0; iFz < 2; iFz++)
8531 {
8532 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8533 | (iRounding << X86_MXCSR_RC_SHIFT)
8534 | (iDaz ? X86_MXCSR_DAZ : 0)
8535 | (iFz ? X86_MXCSR_FZ : 0)
8536 | X86_MXCSR_XCPT_MASK;
8537 uint32_t fMxcsrM = fMxcsrIn;
8538 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8539 TestData.fMxcsrIn = fMxcsrIn;
8540 TestData.fMxcsrOut = fMxcsrM;
8541 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8542
8543 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8544 uint32_t fMxcsrU = fMxcsrIn;
8545 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8546 TestData.fMxcsrIn = fMxcsrIn;
8547 TestData.fMxcsrOut = fMxcsrU;
8548 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8549
8550 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8551 if (fXcpt)
8552 {
8553 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8554 uint32_t fMxcsr1 = fMxcsrIn;
8555 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8556 TestData.fMxcsrIn = fMxcsrIn;
8557 TestData.fMxcsrOut = fMxcsr1;
8558 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8559
8560 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8561 {
8562 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8563 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8564 uint32_t fMxcsr2 = fMxcsrIn;
8565 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8566 TestData.fMxcsrIn = fMxcsrIn;
8567 TestData.fMxcsrOut = fMxcsr2;
8568 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8569 }
8570 if (!RT_IS_POWER_OF_TWO(fXcpt))
8571 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8572 if (fUnmasked & fXcpt)
8573 {
8574 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8575 uint32_t fMxcsr3 = fMxcsrIn;
8576 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8577 TestData.fMxcsrIn = fMxcsrIn;
8578 TestData.fMxcsrOut = fMxcsr3;
8579 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8580 }
8581 }
8582 }
8583 }
8584 rc = RTStrmClose(pStrmOut);
8585 if (RT_FAILURE(rc))
8586 {
8587 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR32Mm[iFn].pszName, rc);
8588 return RTEXITCODE_FAILURE;
8589 }
8590 }
8591
8592 return RTEXITCODE_SUCCESS;
8593}
8594#endif
8595
8596static void SseConvertXmmR32MmTest(void)
8597{
8598 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8599 {
8600 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR32Mm[iFn].pszName))
8601 continue;
8602
8603 uint32_t const cTests = *g_aSseConvertXmmR32Mm[iFn].pcTests;
8604 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR32Mm[iFn].paTests;
8605 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR32Mm[iFn].pfn;
8606 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32Mm[iFn]);
8607 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8608 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8609 {
8610 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8611 {
8612 X86XMMREG ValOut;
8613 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8614 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8615 if ( fMxcsr != paTests[iTest].fMxcsrOut
8616 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8617 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8618 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8619 "%s -> mxcsr=%#08x %s'%s\n"
8620 "%s expected %#08x %s'%s%s%s (%s)\n",
8621 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8622 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8623 iVar ? " " : "", fMxcsr,
8624 FormatR32(&ValOut.ar32[0]), FormatR32(&ValOut.ar32[1]),
8625 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8626 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
8627 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8628 ( !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8629 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8630 ? " - val" : "",
8631 FormatMxcsr(paTests[iTest].fMxcsrIn));
8632 }
8633 }
8634 }
8635}
8636
8637
8638/*
8639 * Convert SSE operations converting single-precision floating point values to signed double-word values.
8640 */
8641TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_I32_XMM_R32_T, SSE_CONVERT_MM_R32_TEST_T, PFNIEMAIMPLMXCSRU64U64);
8642
8643static const SSE_CONVERT_MM_I32_XMM_R32_T g_aSseConvertMmI32XmmR32[] =
8644{
8645 ENTRY_BIN(cvtps2pi_u128),
8646 ENTRY_BIN(cvttps2pi_u128)
8647};
8648
8649#ifdef TSTIEMAIMPL_WITH_GENERATOR
8650static RTEXITCODE SseConvertMmI32XmmR32Generate(const char *pszDataFileFmt, uint32_t cTests)
8651{
8652 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8653
8654 static struct { RTFLOAT32U aVal1[2]; } const s_aSpecials[] =
8655 {
8656 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
8657 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
8658 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
8659 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
8660 /** @todo More specials. */
8661 };
8662
8663 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8664 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
8665 {
8666 PFNIEMAIMPLMXCSRU64U64 const pfn = g_aSseConvertMmI32XmmR32[iFn].pfnNative ? g_aSseConvertMmI32XmmR32[iFn].pfnNative : g_aSseConvertMmI32XmmR32[iFn].pfn;
8667
8668 PRTSTREAM pStrmOut = NULL;
8669 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertMmI32XmmR32[iFn].pszName);
8670 if (RT_FAILURE(rc))
8671 {
8672 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertMmI32XmmR32[iFn].pszName, rc);
8673 return RTEXITCODE_FAILURE;
8674 }
8675
8676 uint32_t cNormalInputPairs = 0;
8677 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8678 {
8679 SSE_CONVERT_MM_R32_TEST_T TestData; RT_ZERO(TestData);
8680
8681 TestData.ar32InVal[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8682 TestData.ar32InVal[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8683
8684 if ( RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[0])
8685 && RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[1]))
8686 cNormalInputPairs++;
8687 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8688 {
8689 iTest -= 1;
8690 continue;
8691 }
8692
8693 RTFLOAT64U TestVal;
8694 TestVal.au32[0] = TestData.ar32InVal[0].u;
8695 TestVal.au32[1] = TestData.ar32InVal[1].u;
8696
8697 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8698 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8699 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8700 for (uint8_t iFz = 0; iFz < 2; iFz++)
8701 {
8702 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8703 | (iRounding << X86_MXCSR_RC_SHIFT)
8704 | (iDaz ? X86_MXCSR_DAZ : 0)
8705 | (iFz ? X86_MXCSR_FZ : 0)
8706 | X86_MXCSR_XCPT_MASK;
8707 uint32_t fMxcsrM = fMxcsrIn;
8708 uint64_t u64ResM;
8709 pfn(&fMxcsrM, &u64ResM, TestVal.u);
8710 TestData.fMxcsrIn = fMxcsrIn;
8711 TestData.fMxcsrOut = fMxcsrM;
8712 TestData.OutVal.u = u64ResM;
8713 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8714
8715 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8716 uint32_t fMxcsrU = fMxcsrIn;
8717 uint64_t u64ResU;
8718 pfn(&fMxcsrU, &u64ResU, TestVal.u);
8719 TestData.fMxcsrIn = fMxcsrIn;
8720 TestData.fMxcsrOut = fMxcsrU;
8721 TestData.OutVal.u = u64ResU;
8722 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8723
8724 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8725 if (fXcpt)
8726 {
8727 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8728 uint32_t fMxcsr1 = fMxcsrIn;
8729 uint64_t u64Res1;
8730 pfn(&fMxcsr1, &u64Res1, TestVal.u);
8731 TestData.fMxcsrIn = fMxcsrIn;
8732 TestData.fMxcsrOut = fMxcsr1;
8733 TestData.OutVal.u = u64Res1;
8734 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8735
8736 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8737 {
8738 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8739 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8740 uint32_t fMxcsr2 = fMxcsrIn;
8741 uint64_t u64Res2;
8742 pfn(&fMxcsr2, &u64Res2, TestVal.u);
8743 TestData.fMxcsrIn = fMxcsrIn;
8744 TestData.fMxcsrOut = fMxcsr2;
8745 TestData.OutVal.u = u64Res2;
8746 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8747 }
8748 if (!RT_IS_POWER_OF_TWO(fXcpt))
8749 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8750 if (fUnmasked & fXcpt)
8751 {
8752 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8753 uint32_t fMxcsr3 = fMxcsrIn;
8754 uint64_t u64Res3;
8755 pfn(&fMxcsr3, &u64Res3, TestVal.u);
8756 TestData.fMxcsrIn = fMxcsrIn;
8757 TestData.fMxcsrOut = fMxcsr3;
8758 TestData.OutVal.u = u64Res3;
8759 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8760 }
8761 }
8762 }
8763 }
8764 rc = RTStrmClose(pStrmOut);
8765 if (RT_FAILURE(rc))
8766 {
8767 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertMmI32XmmR32[iFn].pszName, rc);
8768 return RTEXITCODE_FAILURE;
8769 }
8770 }
8771
8772 return RTEXITCODE_SUCCESS;
8773}
8774#endif
8775
8776static void SseConvertMmI32XmmR32Test(void)
8777{
8778 X86FXSTATE State;
8779 RT_ZERO(State);
8780
8781 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
8782 {
8783 if (!SubTestAndCheckIfEnabled(g_aSseConvertMmI32XmmR32[iFn].pszName))
8784 continue;
8785
8786 uint32_t const cTests = *g_aSseConvertMmI32XmmR32[iFn].pcTests;
8787 SSE_CONVERT_MM_R32_TEST_T const * const paTests = g_aSseConvertMmI32XmmR32[iFn].paTests;
8788 PFNIEMAIMPLMXCSRU64U64 pfn = g_aSseConvertMmI32XmmR32[iFn].pfn;
8789 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmI32XmmR32[iFn]);
8790 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8791 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8792 {
8793 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8794 {
8795 RTUINT64U ValOut;
8796 RTUINT64U ValIn;
8797
8798 ValIn.au32[0] = paTests[iTest].ar32InVal[0].u;
8799 ValIn.au32[1] = paTests[iTest].ar32InVal[1].u;
8800
8801 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8802 pfn(&fMxcsr, &ValOut.u, ValIn.u);
8803 if ( fMxcsr != paTests[iTest].fMxcsrOut
8804 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8805 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8806 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8807 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8808 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8809 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8810 FormatR32(&paTests[iTest].ar32InVal[0]), FormatR32(&paTests[iTest].ar32InVal[1]),
8811 iVar ? " " : "", fMxcsr,
8812 ValOut.ai32[0], ValOut.ai32[1],
8813 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8814 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8815 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8816 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8817 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8818 ? " - val" : "",
8819 FormatMxcsr(paTests[iTest].fMxcsrIn));
8820 }
8821 }
8822 }
8823}
8824
8825
8826/*
8827 * SSE 4.2 pcmpxstrx instructions.
8828 */
8829TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRI_T, SSE_PCMPISTRI_TEST_T, PFNIEMAIMPLPCMPISTRIU128IMM8);
8830
8831static const SSE_PCMPISTRI_T g_aSsePcmpistri[] =
8832{
8833 ENTRY_BIN_SSE_OPT(pcmpistri_u128),
8834};
8835
8836#ifdef TSTIEMAIMPL_WITH_GENERATOR
8837static RTEXITCODE SseComparePcmpistriGenerate(const char *pszDataFileFmt, uint32_t cTests)
8838{
8839 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8840
8841 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
8842 {
8843 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
8844 /** @todo More specials. */
8845 };
8846
8847 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
8848 {
8849 PFNIEMAIMPLPCMPISTRIU128IMM8 const pfn = g_aSsePcmpistri[iFn].pfnNative ? g_aSsePcmpistri[iFn].pfnNative : g_aSsePcmpistri[iFn].pfn;
8850
8851 PRTSTREAM pStrmOut = NULL;
8852 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpistri[iFn].pszName);
8853 if (RT_FAILURE(rc))
8854 {
8855 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpistri[iFn].pszName, rc);
8856 return RTEXITCODE_FAILURE;
8857 }
8858
8859 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8860 {
8861 SSE_PCMPISTRI_TEST_T TestData; RT_ZERO(TestData);
8862
8863 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
8864 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
8865
8866 IEMPCMPISTRXSRC TestVal;
8867 TestVal.uSrc1 = TestData.InVal1.uXmm;
8868 TestVal.uSrc2 = TestData.InVal2.uXmm;
8869
8870 uint32_t const fEFlagsIn = RandEFlags();
8871 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
8872 {
8873 uint32_t fEFlagsOut = fEFlagsIn;
8874 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
8875 TestData.fEFlagsIn = fEFlagsIn;
8876 TestData.fEFlagsOut = fEFlagsOut;
8877 TestData.bImm = (uint8_t)u16Imm;
8878 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8879 }
8880
8881 /* Repeat the test with the input value being the same. */
8882 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
8883 TestVal.uSrc1 = TestData.InVal1.uXmm;
8884 TestVal.uSrc2 = TestData.InVal2.uXmm;
8885
8886 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
8887 {
8888 uint32_t fEFlagsOut = fEFlagsIn;
8889 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
8890 TestData.fEFlagsIn = fEFlagsIn;
8891 TestData.fEFlagsOut = fEFlagsOut;
8892 TestData.bImm = (uint8_t)u16Imm;
8893 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8894 }
8895 }
8896 rc = RTStrmClose(pStrmOut);
8897 if (RT_FAILURE(rc))
8898 {
8899 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpistri[iFn].pszName, rc);
8900 return RTEXITCODE_FAILURE;
8901 }
8902 }
8903
8904 return RTEXITCODE_SUCCESS;
8905}
8906#endif
8907
8908static void SseComparePcmpistriTest(void)
8909{
8910 X86FXSTATE State;
8911 RT_ZERO(State);
8912
8913 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
8914 {
8915 if (!SubTestAndCheckIfEnabled(g_aSsePcmpistri[iFn].pszName))
8916 continue;
8917
8918 uint32_t const cTests = *g_aSsePcmpistri[iFn].pcTests;
8919 SSE_PCMPISTRI_TEST_T const * const paTests = g_aSsePcmpistri[iFn].paTests;
8920 PFNIEMAIMPLPCMPISTRIU128IMM8 pfn = g_aSsePcmpistri[iFn].pfn;
8921 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistri[iFn]);
8922 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8923 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8924 {
8925 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8926 {
8927 IEMPCMPISTRXSRC TestVal;
8928 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
8929 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
8930
8931 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
8932 uint32_t u32EcxOut = 0;
8933 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
8934 if ( fEFlags != paTests[iTest].fEFlagsOut
8935 || u32EcxOut != paTests[iTest].u32EcxOut)
8936 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
8937 "%s -> efl=%#08x %RU32\n"
8938 "%s expected %#08x %RU32%s%s\n",
8939 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
8940 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
8941 iVar ? " " : "", fEFlags, u32EcxOut,
8942 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
8943 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
8944 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
8945 }
8946 }
8947 }
8948}
8949
8950
8951TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRM_T, SSE_PCMPISTRM_TEST_T, PFNIEMAIMPLPCMPISTRMU128IMM8);
8952
8953static const SSE_PCMPISTRM_T g_aSsePcmpistrm[] =
8954{
8955 ENTRY_BIN_SSE_OPT(pcmpistrm_u128),
8956};
8957
8958#ifdef TSTIEMAIMPL_WITH_GENERATOR
8959static RTEXITCODE SseComparePcmpistrmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8960{
8961 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8962
8963 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
8964 {
8965 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
8966 /** @todo More specials. */
8967 };
8968
8969 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
8970 {
8971 PFNIEMAIMPLPCMPISTRMU128IMM8 const pfn = g_aSsePcmpistrm[iFn].pfnNative ? g_aSsePcmpistrm[iFn].pfnNative : g_aSsePcmpistrm[iFn].pfn;
8972
8973 PRTSTREAM pStrmOut = NULL;
8974 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpistrm[iFn].pszName);
8975 if (RT_FAILURE(rc))
8976 {
8977 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpistrm[iFn].pszName, rc);
8978 return RTEXITCODE_FAILURE;
8979 }
8980
8981 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8982 {
8983 SSE_PCMPISTRM_TEST_T TestData; RT_ZERO(TestData);
8984
8985 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
8986 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
8987
8988 IEMPCMPISTRXSRC TestVal;
8989 TestVal.uSrc1 = TestData.InVal1.uXmm;
8990 TestVal.uSrc2 = TestData.InVal2.uXmm;
8991
8992 uint32_t const fEFlagsIn = RandEFlags();
8993 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
8994 {
8995 uint32_t fEFlagsOut = fEFlagsIn;
8996 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
8997 TestData.fEFlagsIn = fEFlagsIn;
8998 TestData.fEFlagsOut = fEFlagsOut;
8999 TestData.bImm = (uint8_t)u16Imm;
9000 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9001 }
9002
9003 /* Repeat the test with the input value being the same. */
9004 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9005 TestVal.uSrc1 = TestData.InVal1.uXmm;
9006 TestVal.uSrc2 = TestData.InVal2.uXmm;
9007
9008 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9009 {
9010 uint32_t fEFlagsOut = fEFlagsIn;
9011 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9012 TestData.fEFlagsIn = fEFlagsIn;
9013 TestData.fEFlagsOut = fEFlagsOut;
9014 TestData.bImm = (uint8_t)u16Imm;
9015 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9016 }
9017 }
9018 rc = RTStrmClose(pStrmOut);
9019 if (RT_FAILURE(rc))
9020 {
9021 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpistrm[iFn].pszName, rc);
9022 return RTEXITCODE_FAILURE;
9023 }
9024 }
9025
9026 return RTEXITCODE_SUCCESS;
9027}
9028#endif
9029
9030static void SseComparePcmpistrmTest(void)
9031{
9032 X86FXSTATE State;
9033 RT_ZERO(State);
9034
9035 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9036 {
9037 if (!SubTestAndCheckIfEnabled(g_aSsePcmpistrm[iFn].pszName))
9038 continue;
9039
9040 uint32_t const cTests = *g_aSsePcmpistrm[iFn].pcTests;
9041 SSE_PCMPISTRM_TEST_T const * const paTests = g_aSsePcmpistrm[iFn].paTests;
9042 PFNIEMAIMPLPCMPISTRMU128IMM8 pfn = g_aSsePcmpistrm[iFn].pfn;
9043 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistrm[iFn]);
9044 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9045 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9046 {
9047 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9048 {
9049 IEMPCMPISTRXSRC TestVal;
9050 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9051 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9052
9053 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9054 RTUINT128U OutVal;
9055 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9056 if ( fEFlags != paTests[iTest].fEFlagsOut
9057 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9058 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9059 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9060 "%s -> efl=%#08x %s\n"
9061 "%s expected %#08x %s%s%s\n",
9062 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9063 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9064 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9065 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9066 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9067 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9068 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9069 }
9070 }
9071 }
9072}
9073
9074
9075TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRI_T, SSE_PCMPESTRI_TEST_T, PFNIEMAIMPLPCMPESTRIU128IMM8);
9076
9077static const SSE_PCMPESTRI_T g_aSsePcmpestri[] =
9078{
9079 ENTRY_BIN_SSE_OPT(pcmpestri_u128),
9080};
9081
9082#ifdef TSTIEMAIMPL_WITH_GENERATOR
9083static RTEXITCODE SseComparePcmpestriGenerate(const char *pszDataFileFmt, uint32_t cTests)
9084{
9085 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9086
9087 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9088 {
9089 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9090 /** @todo More specials. */
9091 };
9092
9093 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9094 {
9095 PFNIEMAIMPLPCMPESTRIU128IMM8 const pfn = g_aSsePcmpestri[iFn].pfnNative ? g_aSsePcmpestri[iFn].pfnNative : g_aSsePcmpestri[iFn].pfn;
9096
9097 PRTSTREAM pStrmOut = NULL;
9098 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpestri[iFn].pszName);
9099 if (RT_FAILURE(rc))
9100 {
9101 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpestri[iFn].pszName, rc);
9102 return RTEXITCODE_FAILURE;
9103 }
9104
9105 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9106 {
9107 SSE_PCMPESTRI_TEST_T TestData; RT_ZERO(TestData);
9108
9109 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9110 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9111
9112 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9113 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9114 {
9115 TestData.u64Rax = (uint64_t)i64Rax;
9116 TestData.u64Rdx = (uint64_t)i64Rdx;
9117
9118 IEMPCMPESTRXSRC TestVal;
9119 TestVal.uSrc1 = TestData.InVal1.uXmm;
9120 TestVal.uSrc2 = TestData.InVal2.uXmm;
9121 TestVal.u64Rax = TestData.u64Rax;
9122 TestVal.u64Rdx = TestData.u64Rdx;
9123
9124 uint32_t const fEFlagsIn = RandEFlags();
9125 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9126 {
9127 uint32_t fEFlagsOut = fEFlagsIn;
9128 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9129 TestData.fEFlagsIn = fEFlagsIn;
9130 TestData.fEFlagsOut = fEFlagsOut;
9131 TestData.bImm = (uint8_t)u16Imm;
9132 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9133 }
9134
9135 /* Repeat the test with the input value being the same. */
9136 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9137 TestVal.uSrc1 = TestData.InVal1.uXmm;
9138 TestVal.uSrc2 = TestData.InVal2.uXmm;
9139
9140 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9141 {
9142 uint32_t fEFlagsOut = fEFlagsIn;
9143 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9144 TestData.fEFlagsIn = fEFlagsIn;
9145 TestData.fEFlagsOut = fEFlagsOut;
9146 TestData.bImm = (uint8_t)u16Imm;
9147 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9148 }
9149 }
9150 }
9151 rc = RTStrmClose(pStrmOut);
9152 if (RT_FAILURE(rc))
9153 {
9154 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpestri[iFn].pszName, rc);
9155 return RTEXITCODE_FAILURE;
9156 }
9157 }
9158
9159 return RTEXITCODE_SUCCESS;
9160}
9161#endif
9162
9163static void SseComparePcmpestriTest(void)
9164{
9165 X86FXSTATE State;
9166 RT_ZERO(State);
9167
9168 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9169 {
9170 if (!SubTestAndCheckIfEnabled(g_aSsePcmpestri[iFn].pszName))
9171 continue;
9172
9173 uint32_t const cTests = *g_aSsePcmpestri[iFn].pcTests;
9174 SSE_PCMPESTRI_TEST_T const * const paTests = g_aSsePcmpestri[iFn].paTests;
9175 PFNIEMAIMPLPCMPESTRIU128IMM8 pfn = g_aSsePcmpestri[iFn].pfn;
9176 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestri[iFn]);
9177 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9178 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9179 {
9180 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9181 {
9182 IEMPCMPESTRXSRC TestVal;
9183 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9184 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9185 TestVal.u64Rax = paTests[iTest].u64Rax;
9186 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9187
9188 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9189 uint32_t u32EcxOut = 0;
9190 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9191 if ( fEFlags != paTests[iTest].fEFlagsOut
9192 || u32EcxOut != paTests[iTest].u32EcxOut)
9193 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9194 "%s -> efl=%#08x %RU32\n"
9195 "%s expected %#08x %RU32%s%s\n",
9196 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9197 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9198 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9199 paTests[iTest].bImm,
9200 iVar ? " " : "", fEFlags, u32EcxOut,
9201 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9202 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9203 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9204 }
9205 }
9206 }
9207}
9208
9209
9210TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRM_T, SSE_PCMPESTRM_TEST_T, PFNIEMAIMPLPCMPESTRMU128IMM8);
9211
9212static const SSE_PCMPESTRM_T g_aSsePcmpestrm[] =
9213{
9214 ENTRY_BIN_SSE_OPT(pcmpestrm_u128),
9215};
9216
9217#ifdef TSTIEMAIMPL_WITH_GENERATOR
9218static RTEXITCODE SseComparePcmpestrmGenerate(const char *pszDataFileFmt, uint32_t cTests)
9219{
9220 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9221
9222 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9223 {
9224 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9225 /** @todo More specials. */
9226 };
9227
9228 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9229 {
9230 PFNIEMAIMPLPCMPESTRMU128IMM8 const pfn = g_aSsePcmpestrm[iFn].pfnNative ? g_aSsePcmpestrm[iFn].pfnNative : g_aSsePcmpestrm[iFn].pfn;
9231
9232 PRTSTREAM pStrmOut = NULL;
9233 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpestrm[iFn].pszName);
9234 if (RT_FAILURE(rc))
9235 {
9236 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpestrm[iFn].pszName, rc);
9237 return RTEXITCODE_FAILURE;
9238 }
9239
9240 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9241 {
9242 SSE_PCMPESTRM_TEST_T TestData; RT_ZERO(TestData);
9243
9244 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9245 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9246
9247 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9248 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9249 {
9250 TestData.u64Rax = (uint64_t)i64Rax;
9251 TestData.u64Rdx = (uint64_t)i64Rdx;
9252
9253 IEMPCMPESTRXSRC TestVal;
9254 TestVal.uSrc1 = TestData.InVal1.uXmm;
9255 TestVal.uSrc2 = TestData.InVal2.uXmm;
9256 TestVal.u64Rax = TestData.u64Rax;
9257 TestVal.u64Rdx = TestData.u64Rdx;
9258
9259 uint32_t const fEFlagsIn = RandEFlags();
9260 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9261 {
9262 uint32_t fEFlagsOut = fEFlagsIn;
9263 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9264 TestData.fEFlagsIn = fEFlagsIn;
9265 TestData.fEFlagsOut = fEFlagsOut;
9266 TestData.bImm = (uint8_t)u16Imm;
9267 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9268 }
9269
9270 /* Repeat the test with the input value being the same. */
9271 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9272 TestVal.uSrc1 = TestData.InVal1.uXmm;
9273 TestVal.uSrc2 = TestData.InVal2.uXmm;
9274
9275 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9276 {
9277 uint32_t fEFlagsOut = fEFlagsIn;
9278 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9279 TestData.fEFlagsIn = fEFlagsIn;
9280 TestData.fEFlagsOut = fEFlagsOut;
9281 TestData.bImm = (uint8_t)u16Imm;
9282 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9283 }
9284 }
9285 }
9286 rc = RTStrmClose(pStrmOut);
9287 if (RT_FAILURE(rc))
9288 {
9289 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpestrm[iFn].pszName, rc);
9290 return RTEXITCODE_FAILURE;
9291 }
9292 }
9293
9294 return RTEXITCODE_SUCCESS;
9295}
9296#endif
9297
9298static void SseComparePcmpestrmTest(void)
9299{
9300 X86FXSTATE State;
9301 RT_ZERO(State);
9302
9303 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9304 {
9305 if (!SubTestAndCheckIfEnabled(g_aSsePcmpestrm[iFn].pszName))
9306 continue;
9307
9308 uint32_t const cTests = *g_aSsePcmpestrm[iFn].pcTests;
9309 SSE_PCMPESTRM_TEST_T const * const paTests = g_aSsePcmpestrm[iFn].paTests;
9310 PFNIEMAIMPLPCMPESTRMU128IMM8 pfn = g_aSsePcmpestrm[iFn].pfn;
9311 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestrm[iFn]);
9312 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9313 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9314 {
9315 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9316 {
9317 IEMPCMPESTRXSRC TestVal;
9318 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9319 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9320 TestVal.u64Rax = paTests[iTest].u64Rax;
9321 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9322
9323 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9324 RTUINT128U OutVal;
9325 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9326 if ( fEFlags != paTests[iTest].fEFlagsOut
9327 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9328 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9329 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9330 "%s -> efl=%#08x %s\n"
9331 "%s expected %#08x %s%s%s\n",
9332 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9333 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9334 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9335 paTests[iTest].bImm,
9336 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9337 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9338 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9339 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9340 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9341 }
9342 }
9343 }
9344}
9345
9346
9347
9348int main(int argc, char **argv)
9349{
9350 int rc = RTR3InitExe(argc, &argv, 0);
9351 if (RT_FAILURE(rc))
9352 return RTMsgInitFailure(rc);
9353
9354 /*
9355 * Determin the host CPU.
9356 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
9357 */
9358#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
9359 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
9360 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
9361 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9362#else
9363 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9364#endif
9365
9366 /*
9367 * Parse arguments.
9368 */
9369 enum { kModeNotSet, kModeTest, kModeGenerate }
9370 enmMode = kModeNotSet;
9371 bool fInt = true;
9372 bool fFpuLdSt = true;
9373 bool fFpuBinary1 = true;
9374 bool fFpuBinary2 = true;
9375 bool fFpuOther = true;
9376 bool fCpuData = true;
9377 bool fCommonData = true;
9378 bool fSseFpBinary = true;
9379 bool fSseFpOther = true;
9380 bool fSsePcmpxstrx = true;
9381 uint32_t const cDefaultTests = 96;
9382 uint32_t cTests = cDefaultTests;
9383 RTGETOPTDEF const s_aOptions[] =
9384 {
9385 // mode:
9386 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
9387 { "--test", 't', RTGETOPT_REQ_NOTHING },
9388 // test selection (both)
9389 { "--all", 'a', RTGETOPT_REQ_NOTHING },
9390 { "--none", 'z', RTGETOPT_REQ_NOTHING },
9391 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
9392 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
9393 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
9394 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
9395 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
9396 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
9397 { "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
9398 { "--sse-fp-other", 'T', RTGETOPT_REQ_NOTHING },
9399 { "--sse-pcmpxstrx", 'C', RTGETOPT_REQ_NOTHING },
9400 { "--int", 'i', RTGETOPT_REQ_NOTHING },
9401 { "--include", 'I', RTGETOPT_REQ_STRING },
9402 { "--exclude", 'X', RTGETOPT_REQ_STRING },
9403 // generation parameters
9404 { "--common", 'm', RTGETOPT_REQ_NOTHING },
9405 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
9406 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
9407 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
9408 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
9409 };
9410
9411 RTGETOPTSTATE State;
9412 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
9413 AssertRCReturn(rc, RTEXITCODE_FAILURE);
9414
9415 RTGETOPTUNION ValueUnion;
9416 while ((rc = RTGetOpt(&State, &ValueUnion)))
9417 {
9418 switch (rc)
9419 {
9420 case 'g':
9421 enmMode = kModeGenerate;
9422 break;
9423 case 't':
9424 enmMode = kModeTest;
9425 break;
9426
9427 case 'a':
9428 fCpuData = true;
9429 fCommonData = true;
9430 fInt = true;
9431 fFpuLdSt = true;
9432 fFpuBinary1 = true;
9433 fFpuBinary2 = true;
9434 fFpuOther = true;
9435 fSseFpBinary = true;
9436 fSseFpOther = true;
9437 fSsePcmpxstrx = true;
9438 break;
9439 case 'z':
9440 fCpuData = false;
9441 fCommonData = false;
9442 fInt = false;
9443 fFpuLdSt = false;
9444 fFpuBinary1 = false;
9445 fFpuBinary2 = false;
9446 fFpuOther = false;
9447 fSseFpBinary = false;
9448 fSseFpOther = false;
9449 fSsePcmpxstrx = false;
9450 break;
9451
9452 case 'F':
9453 fFpuLdSt = true;
9454 break;
9455 case 'O':
9456 fFpuOther = true;
9457 break;
9458 case 'B':
9459 fFpuBinary1 = true;
9460 break;
9461 case 'P':
9462 fFpuBinary2 = true;
9463 break;
9464 case 'S':
9465 fSseFpBinary = true;
9466 break;
9467 case 'T':
9468 fSseFpOther = true;
9469 break;
9470 case 'C':
9471 fSsePcmpxstrx = true;
9472 break;
9473 case 'i':
9474 fInt = true;
9475 break;
9476
9477 case 'I':
9478 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
9479 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
9480 RT_ELEMENTS(g_apszIncludeTestPatterns));
9481 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
9482 break;
9483 case 'X':
9484 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
9485 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
9486 RT_ELEMENTS(g_apszExcludeTestPatterns));
9487 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
9488 break;
9489
9490 case 'm':
9491 fCommonData = true;
9492 break;
9493 case 'c':
9494 fCpuData = true;
9495 break;
9496 case 'n':
9497 cTests = ValueUnion.u32;
9498 break;
9499
9500 case 'q':
9501 g_cVerbosity = 0;
9502 break;
9503 case 'v':
9504 g_cVerbosity++;
9505 break;
9506
9507 case 'h':
9508 RTPrintf("usage: %s <-g|-t> [options]\n"
9509 "\n"
9510 "Mode:\n"
9511 " -g, --generate\n"
9512 " Generate test data.\n"
9513 " -t, --test\n"
9514 " Execute tests.\n"
9515 "\n"
9516 "Test selection (both modes):\n"
9517 " -a, --all\n"
9518 " Enable all tests and generated test data. (default)\n"
9519 " -z, --zap, --none\n"
9520 " Disable all tests and test data types.\n"
9521 " -i, --int\n"
9522 " Enable non-FPU tests.\n"
9523 " -F, --fpu-ld-st\n"
9524 " Enable FPU load and store tests.\n"
9525 " -B, --fpu-binary-1\n"
9526 " Enable FPU binary 80-bit FP tests.\n"
9527 " -P, --fpu-binary-2\n"
9528 " Enable FPU binary 64- and 32-bit FP tests.\n"
9529 " -O, --fpu-other\n"
9530 " Enable FPU binary 64- and 32-bit FP tests.\n"
9531 " -S, --sse-fp-binary\n"
9532 " Enable SSE binary 64- and 32-bit FP tests.\n"
9533 " -T, --sse-fp-other\n"
9534 " Enable misc SSE 64- and 32-bit FP tests.\n"
9535 " -C, --sse-pcmpxstrx\n"
9536 " Enable SSE pcmpxstrx tests.\n"
9537 " -I,--include=<test-patter>\n"
9538 " Enable tests matching the given pattern.\n"
9539 " -X,--exclude=<test-patter>\n"
9540 " Skip tests matching the given pattern (overrides --include).\n"
9541 "\n"
9542 "Generation:\n"
9543 " -m, --common\n"
9544 " Enable generating common test data.\n"
9545 " -c, --only-cpu\n"
9546 " Enable generating CPU specific test data.\n"
9547 " -n, --number-of-test <count>\n"
9548 " Number of tests to generate. Default: %u\n"
9549 "\n"
9550 "Other:\n"
9551 " -v, --verbose\n"
9552 " -q, --quiet\n"
9553 " Noise level. Default: --quiet\n"
9554 , argv[0], cDefaultTests);
9555 return RTEXITCODE_SUCCESS;
9556 default:
9557 return RTGetOptPrintError(rc, &ValueUnion);
9558 }
9559 }
9560
9561 /*
9562 * Generate data?
9563 */
9564 if (enmMode == kModeGenerate)
9565 {
9566#ifdef TSTIEMAIMPL_WITH_GENERATOR
9567 char szCpuDesc[256] = {0};
9568 RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
9569 const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
9570# if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
9571 const char * const pszBitBucket = "NUL";
9572# else
9573 const char * const pszBitBucket = "/dev/null";
9574# endif
9575
9576 if (cTests == 0)
9577 cTests = cDefaultTests;
9578 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
9579 g_cZeroSrcTests = g_cZeroDstTests * 2;
9580
9581 if (fInt)
9582 {
9583 const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
9584 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9585 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9586 ? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
9587 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9588 if (!pStrmData || !pStrmDataCpu)
9589 return RTEXITCODE_FAILURE;
9590
9591 BinU8Generate( pStrmData, pStrmDataCpu, cTests);
9592 BinU16Generate(pStrmData, pStrmDataCpu, cTests);
9593 BinU32Generate(pStrmData, pStrmDataCpu, cTests);
9594 BinU64Generate(pStrmData, pStrmDataCpu, cTests);
9595 ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
9596 UnaryGenerate(pStrmData, cTests);
9597 ShiftGenerate(pStrmDataCpu, cTests);
9598 MulDivGenerate(pStrmDataCpu, cTests);
9599
9600 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9601 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9602 if (rcExit != RTEXITCODE_SUCCESS)
9603 return rcExit;
9604 }
9605
9606 if (fFpuLdSt)
9607 {
9608 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
9609 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9610 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9611 ? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
9612 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9613 if (!pStrmData || !pStrmDataCpu)
9614 return RTEXITCODE_FAILURE;
9615
9616 FpuLdConstGenerate(pStrmData, cTests);
9617 FpuLdIntGenerate(pStrmData, cTests);
9618 FpuLdD80Generate(pStrmData, cTests);
9619 FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
9620 FpuStD80Generate(pStrmData, cTests);
9621 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
9622 FpuLdMemGenerate(pStrmData, cTests2);
9623 FpuStMemGenerate(pStrmData, cTests2);
9624
9625 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9626 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9627 if (rcExit != RTEXITCODE_SUCCESS)
9628 return rcExit;
9629 }
9630
9631 if (fFpuBinary1)
9632 {
9633 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
9634 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9635 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9636 ? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
9637 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9638 if (!pStrmData || !pStrmDataCpu)
9639 return RTEXITCODE_FAILURE;
9640
9641 FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
9642 FpuBinaryFswR80Generate(pStrmData, cTests);
9643 FpuBinaryEflR80Generate(pStrmData, cTests);
9644
9645 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9646 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9647 if (rcExit != RTEXITCODE_SUCCESS)
9648 return rcExit;
9649 }
9650
9651 if (fFpuBinary2)
9652 {
9653 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
9654 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9655 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9656 ? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
9657 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9658 if (!pStrmData || !pStrmDataCpu)
9659 return RTEXITCODE_FAILURE;
9660
9661 FpuBinaryR64Generate(pStrmData, cTests);
9662 FpuBinaryR32Generate(pStrmData, cTests);
9663 FpuBinaryI32Generate(pStrmData, cTests);
9664 FpuBinaryI16Generate(pStrmData, cTests);
9665 FpuBinaryFswR64Generate(pStrmData, cTests);
9666 FpuBinaryFswR32Generate(pStrmData, cTests);
9667 FpuBinaryFswI32Generate(pStrmData, cTests);
9668 FpuBinaryFswI16Generate(pStrmData, cTests);
9669
9670 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9671 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9672 if (rcExit != RTEXITCODE_SUCCESS)
9673 return rcExit;
9674 }
9675
9676 if (fFpuOther)
9677 {
9678 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
9679 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9680 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9681 ? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
9682 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9683 if (!pStrmData || !pStrmDataCpu)
9684 return RTEXITCODE_FAILURE;
9685
9686 FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
9687 FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
9688 FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
9689
9690 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9691 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9692 if (rcExit != RTEXITCODE_SUCCESS)
9693 return rcExit;
9694 }
9695
9696 if (fSseFpBinary)
9697 {
9698 const char *pszDataFileFmt = fCommonData ? "tstIEMAImplDataSseBinary-%s.bin" : pszBitBucket;
9699
9700 RTEXITCODE rcExit = SseBinaryR32Generate(pszDataFileFmt, cTests);
9701 if (rcExit == RTEXITCODE_SUCCESS)
9702 rcExit = SseBinaryR64Generate(pszDataFileFmt, cTests);
9703 if (rcExit == RTEXITCODE_SUCCESS)
9704 rcExit = SseBinaryU128R32Generate(pszDataFileFmt, cTests);
9705 if (rcExit == RTEXITCODE_SUCCESS)
9706 rcExit = SseBinaryU128R64Generate(pszDataFileFmt, cTests);
9707
9708 if (rcExit == RTEXITCODE_SUCCESS)
9709 rcExit = SseBinaryI32R64Generate(pszDataFileFmt, cTests);
9710 if (rcExit == RTEXITCODE_SUCCESS)
9711 rcExit = SseBinaryI64R64Generate(pszDataFileFmt, cTests);
9712 if (rcExit == RTEXITCODE_SUCCESS)
9713 rcExit = SseBinaryI32R32Generate(pszDataFileFmt, cTests);
9714 if (rcExit == RTEXITCODE_SUCCESS)
9715 rcExit = SseBinaryI64R32Generate(pszDataFileFmt, cTests);
9716
9717 if (rcExit == RTEXITCODE_SUCCESS)
9718 rcExit = SseBinaryR64I32Generate(pszDataFileFmt, cTests);
9719 if (rcExit == RTEXITCODE_SUCCESS)
9720 rcExit = SseBinaryR64I64Generate(pszDataFileFmt, cTests);
9721 if (rcExit == RTEXITCODE_SUCCESS)
9722 rcExit = SseBinaryR32I32Generate(pszDataFileFmt, cTests);
9723 if (rcExit == RTEXITCODE_SUCCESS)
9724 rcExit = SseBinaryR32I64Generate(pszDataFileFmt, cTests);
9725 if (rcExit != RTEXITCODE_SUCCESS)
9726 return rcExit;
9727 }
9728
9729 if (fSseFpOther)
9730 {
9731 const char *pszDataFileFmtCmp = fCommonData ? "tstIEMAImplDataSseCompare-%s.bin" : pszBitBucket;
9732 const char *pszDataFileFmtConv = fCommonData ? "tstIEMAImplDataSseConvert-%s.bin" : pszBitBucket;
9733
9734 RTEXITCODE rcExit = SseCompareEflR32R32Generate(pszDataFileFmtCmp, cTests);
9735 if (rcExit == RTEXITCODE_SUCCESS)
9736 rcExit = SseCompareEflR64R64Generate(pszDataFileFmtCmp, cTests);
9737 if (rcExit == RTEXITCODE_SUCCESS)
9738 rcExit = SseCompareF2XmmR32Imm8Generate(pszDataFileFmtCmp, cTests);
9739 if (rcExit == RTEXITCODE_SUCCESS)
9740 rcExit = SseCompareF2XmmR64Imm8Generate(pszDataFileFmtCmp, cTests);
9741 if (rcExit == RTEXITCODE_SUCCESS)
9742 rcExit = SseConvertXmmI32R32Generate(pszDataFileFmtConv, cTests);
9743 if (rcExit == RTEXITCODE_SUCCESS)
9744 rcExit = SseConvertXmmR32I32Generate(pszDataFileFmtConv, cTests);
9745 if (rcExit == RTEXITCODE_SUCCESS)
9746 rcExit = SseConvertXmmI32R64Generate(pszDataFileFmtConv, cTests);
9747 if (rcExit == RTEXITCODE_SUCCESS)
9748 rcExit = SseConvertXmmR64I32Generate(pszDataFileFmtConv, cTests);
9749 if (rcExit == RTEXITCODE_SUCCESS)
9750 rcExit = SseConvertMmXmmGenerate(pszDataFileFmtConv, cTests);
9751 if (rcExit == RTEXITCODE_SUCCESS)
9752 rcExit = SseConvertXmmR32MmGenerate(pszDataFileFmtConv, cTests);
9753 if (rcExit == RTEXITCODE_SUCCESS)
9754 rcExit = SseConvertXmmR64MmGenerate(pszDataFileFmtConv, cTests);
9755 if (rcExit == RTEXITCODE_SUCCESS)
9756 rcExit = SseConvertMmI32XmmR32Generate(pszDataFileFmtConv, cTests);
9757 if (rcExit != RTEXITCODE_SUCCESS)
9758 return rcExit;
9759 }
9760
9761 if (fSsePcmpxstrx)
9762 {
9763 const char *pszDataFileFmtCmp = fCommonData ? "tstIEMAImplDataSsePcmpxstrx-%s.bin" : pszBitBucket;
9764
9765 RTEXITCODE rcExit = SseComparePcmpistriGenerate(pszDataFileFmtCmp, cTests);
9766 if (rcExit == RTEXITCODE_SUCCESS)
9767 rcExit = SseComparePcmpistrmGenerate(pszDataFileFmtCmp, cTests);
9768 if (rcExit == RTEXITCODE_SUCCESS)
9769 rcExit = SseComparePcmpestriGenerate(pszDataFileFmtCmp, cTests);
9770 if (rcExit == RTEXITCODE_SUCCESS)
9771 rcExit = SseComparePcmpestrmGenerate(pszDataFileFmtCmp, cTests);
9772 if (rcExit != RTEXITCODE_SUCCESS)
9773 return rcExit;
9774 }
9775
9776 return RTEXITCODE_SUCCESS;
9777#else
9778 return RTMsgErrorExitFailure("Test data generator not compiled in!");
9779#endif
9780 }
9781
9782 /*
9783 * Do testing. Currrently disabled by default as data needs to be checked
9784 * on both intel and AMD systems first.
9785 */
9786 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
9787 AssertRCReturn(rc, RTEXITCODE_FAILURE);
9788 if (enmMode == kModeTest)
9789 {
9790 RTTestBanner(g_hTest);
9791
9792 /* Allocate guarded memory for use in the tests. */
9793#define ALLOC_GUARDED_VAR(a_puVar) do { \
9794 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
9795 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
9796 } while (0)
9797 ALLOC_GUARDED_VAR(g_pu8);
9798 ALLOC_GUARDED_VAR(g_pu16);
9799 ALLOC_GUARDED_VAR(g_pu32);
9800 ALLOC_GUARDED_VAR(g_pu64);
9801 ALLOC_GUARDED_VAR(g_pu128);
9802 ALLOC_GUARDED_VAR(g_pu8Two);
9803 ALLOC_GUARDED_VAR(g_pu16Two);
9804 ALLOC_GUARDED_VAR(g_pu32Two);
9805 ALLOC_GUARDED_VAR(g_pu64Two);
9806 ALLOC_GUARDED_VAR(g_pu128Two);
9807 ALLOC_GUARDED_VAR(g_pfEfl);
9808 if (RTTestErrorCount(g_hTest) == 0)
9809 {
9810 if (fInt)
9811 {
9812 BinU8Test();
9813 BinU16Test();
9814 BinU32Test();
9815 BinU64Test();
9816 XchgTest();
9817 XaddTest();
9818 CmpXchgTest();
9819 CmpXchg8bTest();
9820 CmpXchg16bTest();
9821 ShiftDblTest();
9822 UnaryTest();
9823 ShiftTest();
9824 MulDivTest();
9825 BswapTest();
9826 }
9827
9828 if (fFpuLdSt)
9829 {
9830 FpuLoadConstTest();
9831 FpuLdMemTest();
9832 FpuLdIntTest();
9833 FpuLdD80Test();
9834 FpuStMemTest();
9835 FpuStIntTest();
9836 FpuStD80Test();
9837 }
9838
9839 if (fFpuBinary1)
9840 {
9841 FpuBinaryR80Test();
9842 FpuBinaryFswR80Test();
9843 FpuBinaryEflR80Test();
9844 }
9845
9846 if (fFpuBinary2)
9847 {
9848 FpuBinaryR64Test();
9849 FpuBinaryR32Test();
9850 FpuBinaryI32Test();
9851 FpuBinaryI16Test();
9852 FpuBinaryFswR64Test();
9853 FpuBinaryFswR32Test();
9854 FpuBinaryFswI32Test();
9855 FpuBinaryFswI16Test();
9856 }
9857
9858 if (fFpuOther)
9859 {
9860 FpuUnaryR80Test();
9861 FpuUnaryFswR80Test();
9862 FpuUnaryTwoR80Test();
9863 }
9864
9865 if (fSseFpBinary)
9866 {
9867 SseBinaryR32Test();
9868 SseBinaryR64Test();
9869 SseBinaryU128R32Test();
9870 SseBinaryU128R64Test();
9871
9872 SseBinaryI32R64Test();
9873 SseBinaryI64R64Test();
9874 SseBinaryI32R32Test();
9875 SseBinaryI64R32Test();
9876
9877 SseBinaryR64I32Test();
9878 SseBinaryR64I64Test();
9879 SseBinaryR32I32Test();
9880 SseBinaryR32I64Test();
9881 }
9882
9883 if (fSseFpOther)
9884 {
9885 SseCompareEflR32R32Test();
9886 SseCompareEflR64R64Test();
9887 SseCompareEflR64R64Test();
9888 SseCompareF2XmmR32Imm8Test();
9889 SseCompareF2XmmR64Imm8Test();
9890 SseConvertXmmI32R32Test();
9891 SseConvertXmmR32I32Test();
9892 SseConvertXmmI32R64Test();
9893 SseConvertXmmR64I32Test();
9894 SseConvertMmXmmTest();
9895 SseConvertXmmR32MmTest();
9896 SseConvertXmmR64MmTest();
9897 SseConvertMmI32XmmR32Test();
9898 }
9899
9900 if (fSsePcmpxstrx)
9901 {
9902 SseComparePcmpistriTest();
9903 SseComparePcmpistrmTest();
9904 SseComparePcmpestriTest();
9905 SseComparePcmpestrmTest();
9906 }
9907 }
9908 return RTTestSummaryAndDestroy(g_hTest);
9909 }
9910 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
9911}
9912
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette