VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 100731

Last change on this file since 100731 was 99775, checked in by vboxsync, 19 months ago

*: Mark functions as static if not used outside of a given compilation unit. Enables the compiler to optimize inlining, reduces the symbol tables, exposes unused functions and in some rare cases exposes mismtaches between function declarations and definitions, but most importantly reduces the number of parfait reports for the extern-function-no-forward-declaration category. This should not result in any functional changes, bugref:3409

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 460.0 KB
Line 
1/* $Id: tstIEMAImpl.cpp 99775 2023-05-12 12:21:58Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include "../include/IEMInternal.h"
33
34#include <iprt/errcore.h>
35#include <VBox/log.h>
36#include <iprt/assert.h>
37#include <iprt/ctype.h>
38#include <iprt/getopt.h>
39#include <iprt/initterm.h>
40#include <iprt/message.h>
41#include <iprt/mp.h>
42#include <iprt/rand.h>
43#include <iprt/stream.h>
44#include <iprt/string.h>
45#include <iprt/test.h>
46#include <VBox/version.h>
47
48#include "tstIEMAImpl.h"
49
50
51/*********************************************************************************************************************************
52* Defined Constants And Macros *
53*********************************************************************************************************************************/
54#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
55#define ENTRY_EX(a_Name, a_uExtra) \
56 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
57 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
58 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
59
60#define ENTRY_BIN(a_Name) ENTRY_EX_BIN(a_Name, 0)
61#define ENTRY_EX_BIN(a_Name, a_uExtra) \
62 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
63 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
64 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
65
66#define ENTRY_BIN_AVX(a_Name) ENTRY_BIN_AVX_EX(a_Name, 0)
67#ifndef IEM_WITHOUT_ASSEMBLY
68# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
69 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
70 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
71 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
72#else
73# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
74 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
75 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
76 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
77#endif
78
79#define ENTRY_BIN_SSE_OPT(a_Name) ENTRY_BIN_SSE_OPT_EX(a_Name, 0)
80#ifndef IEM_WITHOUT_ASSEMBLY
81# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
82 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
83 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
84 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
85#else
86# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
87 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
88 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
89 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
90#endif
91
92
93#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
94#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
95 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
96 g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
97 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
98
99#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
100#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
101 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
102 g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
103 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
104
105#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
106 typedef struct a_TypeName \
107 { \
108 const char *pszName; \
109 a_FunctionPtrType pfn; \
110 a_FunctionPtrType pfnNative; \
111 a_TestType const *paTests; \
112 uint32_t const *pcTests; \
113 uint32_t uExtra; \
114 uint8_t idxCpuEflFlavour; \
115 } a_TypeName
116
117#define COUNT_VARIATIONS(a_SubTest) \
118 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
119
120
121/*********************************************************************************************************************************
122* Global Variables *
123*********************************************************************************************************************************/
124static RTTEST g_hTest;
125static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
126#ifdef TSTIEMAIMPL_WITH_GENERATOR
127static uint32_t g_cZeroDstTests = 2;
128static uint32_t g_cZeroSrcTests = 4;
129#endif
130static uint8_t *g_pu8, *g_pu8Two;
131static uint16_t *g_pu16, *g_pu16Two;
132static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
133static uint64_t *g_pu64, *g_pu64Two;
134static RTUINT128U *g_pu128, *g_pu128Two;
135
136static char g_aszBuf[32][256];
137static unsigned g_idxBuf = 0;
138
139static uint32_t g_cIncludeTestPatterns;
140static uint32_t g_cExcludeTestPatterns;
141static const char *g_apszIncludeTestPatterns[64];
142static const char *g_apszExcludeTestPatterns[64];
143
144static unsigned g_cVerbosity = 0;
145
146
147/*********************************************************************************************************************************
148* Internal Functions *
149*********************************************************************************************************************************/
150static const char *FormatR80(PCRTFLOAT80U pr80);
151static const char *FormatR64(PCRTFLOAT64U pr64);
152static const char *FormatR32(PCRTFLOAT32U pr32);
153
154
155/*
156 * Random helpers.
157 */
158
159static uint32_t RandEFlags(void)
160{
161 uint32_t fEfl = RTRandU32();
162 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
163}
164
165#ifdef TSTIEMAIMPL_WITH_GENERATOR
166
167static uint8_t RandU8(void)
168{
169 return RTRandU32Ex(0, 0xff);
170}
171
172
173static uint16_t RandU16(void)
174{
175 return RTRandU32Ex(0, 0xffff);
176}
177
178
179static uint32_t RandU32(void)
180{
181 return RTRandU32();
182}
183
184#endif
185
186static uint64_t RandU64(void)
187{
188 return RTRandU64();
189}
190
191
192static RTUINT128U RandU128(void)
193{
194 RTUINT128U Ret;
195 Ret.s.Hi = RTRandU64();
196 Ret.s.Lo = RTRandU64();
197 return Ret;
198}
199
200#ifdef TSTIEMAIMPL_WITH_GENERATOR
201
202static uint8_t RandU8Dst(uint32_t iTest)
203{
204 if (iTest < g_cZeroDstTests)
205 return 0;
206 return RandU8();
207}
208
209
210static uint8_t RandU8Src(uint32_t iTest)
211{
212 if (iTest < g_cZeroSrcTests)
213 return 0;
214 return RandU8();
215}
216
217
218static uint16_t RandU16Dst(uint32_t iTest)
219{
220 if (iTest < g_cZeroDstTests)
221 return 0;
222 return RandU16();
223}
224
225
226static uint16_t RandU16Src(uint32_t iTest)
227{
228 if (iTest < g_cZeroSrcTests)
229 return 0;
230 return RandU16();
231}
232
233
234static uint32_t RandU32Dst(uint32_t iTest)
235{
236 if (iTest < g_cZeroDstTests)
237 return 0;
238 return RandU32();
239}
240
241
242static uint32_t RandU32Src(uint32_t iTest)
243{
244 if (iTest < g_cZeroSrcTests)
245 return 0;
246 return RandU32();
247}
248
249
250static uint64_t RandU64Dst(uint32_t iTest)
251{
252 if (iTest < g_cZeroDstTests)
253 return 0;
254 return RandU64();
255}
256
257
258static uint64_t RandU64Src(uint32_t iTest)
259{
260 if (iTest < g_cZeroSrcTests)
261 return 0;
262 return RandU64();
263}
264
265
266/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
267static int16_t RandI16Src2(uint32_t iTest)
268{
269 if (iTest < 18 * 4)
270 switch (iTest % 4)
271 {
272 case 0: return 0;
273 case 1: return INT16_MAX;
274 case 2: return INT16_MIN;
275 case 3: break;
276 }
277 return (int16_t)RandU16();
278}
279
280
281/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
282static int32_t RandI32Src2(uint32_t iTest)
283{
284 if (iTest < 18 * 4)
285 switch (iTest % 4)
286 {
287 case 0: return 0;
288 case 1: return INT32_MAX;
289 case 2: return INT32_MIN;
290 case 3: break;
291 }
292 return (int32_t)RandU32();
293}
294
295
296static int64_t RandI64Src(uint32_t iTest)
297{
298 RT_NOREF(iTest);
299 return (int64_t)RandU64();
300}
301
302
303static uint16_t RandFcw(void)
304{
305 return RandU16() & ~X86_FCW_ZERO_MASK;
306}
307
308
309static uint16_t RandFsw(void)
310{
311 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
312 return RandU16();
313}
314
315
316static uint32_t RandMxcsr(void)
317{
318 return RandU32() & ~X86_MXCSR_ZERO_MASK;
319}
320
321
322static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
323{
324 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
325 pr80->sj64.uFraction >>= cShift;
326 else
327 pr80->sj64.uFraction = (cShift % 19) + 1;
328}
329
330
331
332static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
333{
334 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
335
336 RTFLOAT80U r80;
337 r80.au64[0] = RandU64();
338 r80.au16[4] = RandU16();
339
340 /*
341 * Adjust the random stuff according to bType.
342 */
343 bType &= 0x1f;
344 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
345 {
346 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
347 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
348 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
349 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
350 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
351 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
352 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
353 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
354 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
355 }
356 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
357 {
358 /* Denormals (4,5) and Pseudo denormals (6,7) */
359 if (bType & 1)
360 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
361 else if (r80.sj64.uFraction == 0 && bType < 6)
362 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
363 r80.sj64.uExponent = 0;
364 r80.sj64.fInteger = bType >= 6;
365 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
366 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
367 }
368 else if (bType == 8 || bType == 9)
369 {
370 /* Pseudo NaN. */
371 if (bType & 1)
372 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
373 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
374 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
375 r80.sj64.uExponent = 0x7fff;
376 if (r80.sj64.fInteger)
377 r80.sj64.uFraction |= RT_BIT_64(62);
378 else
379 r80.sj64.uFraction &= ~RT_BIT_64(62);
380 r80.sj64.fInteger = 0;
381 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
382 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
383 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
384 }
385 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
386 {
387 /* Quiet and signalling NaNs. */
388 if (bType & 1)
389 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
390 else if (r80.sj64.uFraction == 0)
391 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
392 r80.sj64.uExponent = 0x7fff;
393 if (bType < 12)
394 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
395 else
396 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
397 r80.sj64.fInteger = 1;
398 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
399 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
400 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
401 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
402 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
403 }
404 else if (bType == 14 || bType == 15)
405 {
406 /* Unnormals */
407 if (bType & 1)
408 SafeR80FractionShift(&r80, RandU8() % 62);
409 r80.sj64.fInteger = 0;
410 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
411 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
412 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
413 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
414 }
415 else if (bType < 26)
416 {
417 /* Make sure we have lots of normalized values. */
418 if (!fIntTarget)
419 {
420 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
421 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
422 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
423 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
424 r80.sj64.fInteger = 1;
425 if (r80.sj64.uExponent <= uMinExp)
426 r80.sj64.uExponent = uMinExp + 1;
427 else if (r80.sj64.uExponent >= uMaxExp)
428 r80.sj64.uExponent = uMaxExp - 1;
429
430 if (bType == 16)
431 { /* All 1s is useful to testing rounding. Also try trigger special
432 behaviour by sometimes rounding out of range, while we're at it. */
433 r80.sj64.uFraction = RT_BIT_64(63) - 1;
434 uint8_t bExp = RandU8();
435 if ((bExp & 3) == 0)
436 r80.sj64.uExponent = uMaxExp - 1;
437 else if ((bExp & 3) == 1)
438 r80.sj64.uExponent = uMinExp + 1;
439 else if ((bExp & 3) == 2)
440 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
441 }
442 }
443 else
444 {
445 /* integer target: */
446 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
447 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
448 r80.sj64.fInteger = 1;
449 if (r80.sj64.uExponent < uMinExp)
450 r80.sj64.uExponent = uMinExp;
451 else if (r80.sj64.uExponent > uMaxExp)
452 r80.sj64.uExponent = uMaxExp;
453
454 if (bType == 16)
455 { /* All 1s is useful to testing rounding. Also try trigger special
456 behaviour by sometimes rounding out of range, while we're at it. */
457 r80.sj64.uFraction = RT_BIT_64(63) - 1;
458 uint8_t bExp = RandU8();
459 if ((bExp & 3) == 0)
460 r80.sj64.uExponent = uMaxExp;
461 else if ((bExp & 3) == 1)
462 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
463 }
464 }
465
466 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
467 }
468 return r80;
469}
470
471
472static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
473{
474 /*
475 * Make it more likely that we get a good selection of special values.
476 */
477 return RandR80Ex(RandU8(), cTarget, fIntTarget);
478
479}
480
481
482static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
483{
484 /* Make sure we cover all the basic types first before going for random selection: */
485 if (iTest <= 18)
486 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
487 return RandR80(cTarget, fIntTarget);
488}
489
490
491/**
492 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
493 * to a 0..17, covering all basic value types.
494 */
495static uint8_t RandR80Src12RemapType(uint8_t bType)
496{
497 switch (bType)
498 {
499 case 0: return 18; /* normal */
500 case 1: return 16; /* normal extreme rounding */
501 case 2: return 14; /* unnormal */
502 case 3: return 12; /* Signalling NaN */
503 case 4: return 10; /* Quiet NaN */
504 case 5: return 8; /* PseudoNaN */
505 case 6: return 6; /* Pseudo Denormal */
506 case 7: return 4; /* Denormal */
507 case 8: return 3; /* Indefinite */
508 case 9: return 2; /* Infinity */
509 case 10: return 1; /* Pseudo-Infinity */
510 case 11: return 0; /* Zero */
511 default: AssertFailedReturn(18);
512 }
513}
514
515
516/**
517 * This works in tandem with RandR80Src2 to make sure we cover all operand
518 * type mixes first before we venture into regular random testing.
519 *
520 * There are 11 basic variations, when we leave out the five odd ones using
521 * SafeR80FractionShift. Because of the special normalized value targetting at
522 * rounding, we make it an even 12. So 144 combinations for two operands.
523 */
524static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
525{
526 if (cPartnerBits == 80)
527 {
528 Assert(!fPartnerInt);
529 if (iTest < 12 * 12)
530 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
531 }
532 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
533 {
534 if (iTest < 12 * 10)
535 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
536 }
537 else if (iTest < 18 * 4 && fPartnerInt)
538 return RandR80Ex(iTest / 4);
539 return RandR80();
540}
541
542
543/** Partner to RandR80Src1. */
544static RTFLOAT80U RandR80Src2(uint32_t iTest)
545{
546 if (iTest < 12 * 12)
547 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
548 return RandR80();
549}
550
551
552static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
553{
554 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
555 pr64->s64.uFraction >>= cShift;
556 else
557 pr64->s64.uFraction = (cShift % 19) + 1;
558}
559
560
561static RTFLOAT64U RandR64Ex(uint8_t bType)
562{
563 RTFLOAT64U r64;
564 r64.u = RandU64();
565
566 /*
567 * Make it more likely that we get a good selection of special values.
568 * On average 6 out of 16 calls should return a special value.
569 */
570 bType &= 0xf;
571 if (bType == 0 || bType == 1)
572 {
573 /* 0 or Infinity. We only keep fSign here. */
574 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
575 r64.s.uFractionHigh = 0;
576 r64.s.uFractionLow = 0;
577 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
578 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
579 }
580 else if (bType == 2 || bType == 3)
581 {
582 /* Subnormals */
583 if (bType == 3)
584 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
585 else if (r64.s64.uFraction == 0)
586 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
587 r64.s64.uExponent = 0;
588 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
589 }
590 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
591 {
592 /* NaNs */
593 if (bType & 1)
594 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
595 else if (r64.s64.uFraction == 0)
596 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
597 r64.s64.uExponent = 0x7ff;
598 if (bType < 6)
599 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
600 else
601 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
602 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
603 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
604 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
605 }
606 else if (bType < 12)
607 {
608 /* Make sure we have lots of normalized values. */
609 if (r64.s.uExponent == 0)
610 r64.s.uExponent = 1;
611 else if (r64.s.uExponent == 0x7ff)
612 r64.s.uExponent = 0x7fe;
613 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
614 }
615 return r64;
616}
617
618
619static RTFLOAT64U RandR64Src(uint32_t iTest)
620{
621 if (iTest < 16)
622 return RandR64Ex(iTest);
623 return RandR64Ex(RandU8());
624}
625
626
627/** Pairing with a 80-bit floating point arg. */
628static RTFLOAT64U RandR64Src2(uint32_t iTest)
629{
630 if (iTest < 12 * 10)
631 return RandR64Ex(9 - iTest % 10); /* start with normal values */
632 return RandR64Ex(RandU8());
633}
634
635
636static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
637{
638 if (pr32->s.uFraction >= RT_BIT_32(cShift))
639 pr32->s.uFraction >>= cShift;
640 else
641 pr32->s.uFraction = (cShift % 19) + 1;
642}
643
644
645static RTFLOAT32U RandR32Ex(uint8_t bType)
646{
647 RTFLOAT32U r32;
648 r32.u = RandU32();
649
650 /*
651 * Make it more likely that we get a good selection of special values.
652 * On average 6 out of 16 calls should return a special value.
653 */
654 bType &= 0xf;
655 if (bType == 0 || bType == 1)
656 {
657 /* 0 or Infinity. We only keep fSign here. */
658 r32.s.uExponent = bType == 0 ? 0 : 0xff;
659 r32.s.uFraction = 0;
660 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
661 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
662 }
663 else if (bType == 2 || bType == 3)
664 {
665 /* Subnormals */
666 if (bType == 3)
667 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
668 else if (r32.s.uFraction == 0)
669 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
670 r32.s.uExponent = 0;
671 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
672 }
673 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
674 {
675 /* NaNs */
676 if (bType & 1)
677 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
678 else if (r32.s.uFraction == 0)
679 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
680 r32.s.uExponent = 0xff;
681 if (bType < 6)
682 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
683 else
684 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
685 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
686 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
687 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
688 }
689 else if (bType < 12)
690 {
691 /* Make sure we have lots of normalized values. */
692 if (r32.s.uExponent == 0)
693 r32.s.uExponent = 1;
694 else if (r32.s.uExponent == 0xff)
695 r32.s.uExponent = 0xfe;
696 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
697 }
698 return r32;
699}
700
701
702static RTFLOAT32U RandR32Src(uint32_t iTest)
703{
704 if (iTest < 16)
705 return RandR32Ex(iTest);
706 return RandR32Ex(RandU8());
707}
708
709
710/** Pairing with a 80-bit floating point arg. */
711static RTFLOAT32U RandR32Src2(uint32_t iTest)
712{
713 if (iTest < 12 * 10)
714 return RandR32Ex(9 - iTest % 10); /* start with normal values */
715 return RandR32Ex(RandU8());
716}
717
718
719static RTPBCD80U RandD80Src(uint32_t iTest)
720{
721 if (iTest < 3)
722 {
723 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
724 return d80Zero;
725 }
726 if (iTest < 5)
727 {
728 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
729 return d80Ind;
730 }
731
732 RTPBCD80U d80;
733 uint8_t b = RandU8();
734 d80.s.fSign = b & 1;
735
736 if ((iTest & 7) >= 6)
737 {
738 /* Illegal */
739 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
740 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
741 d80.s.abPairs[iPair] = RandU8();
742 }
743 else
744 {
745 /* Normal */
746 d80.s.uPad = 0;
747 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
748 {
749 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
750 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
751 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
752 }
753 }
754 return d80;
755}
756
757
758static const char *GenFormatR80(PCRTFLOAT80U plrd)
759{
760 if (RTFLOAT80U_IS_ZERO(plrd))
761 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
762 if (RTFLOAT80U_IS_INF(plrd))
763 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
764 if (RTFLOAT80U_IS_INDEFINITE(plrd))
765 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
766 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
767 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
768 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
769 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
770
771 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
772 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
773 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
774 return pszBuf;
775}
776
777static const char *GenFormatR64(PCRTFLOAT64U prd)
778{
779 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
780 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
781 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
782 return pszBuf;
783}
784
785
786static const char *GenFormatR32(PCRTFLOAT32U pr)
787{
788 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
789 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
790 return pszBuf;
791}
792
793
794static const char *GenFormatD80(PCRTPBCD80U pd80)
795{
796 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
797 size_t off;
798 if (pd80->s.uPad == 0)
799 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
800 else
801 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
802 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
803 while (iPair-- > 0)
804 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
805 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
806 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
807 pszBuf[off++] = ')';
808 pszBuf[off++] = '\0';
809 return pszBuf;
810}
811
812
813static const char *GenFormatI64(int64_t i64)
814{
815 if (i64 == INT64_MIN) /* This one is problematic */
816 return "INT64_MIN";
817 if (i64 == INT64_MAX)
818 return "INT64_MAX";
819 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
820 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
821 return pszBuf;
822}
823
824#if 0 /* unused */
825static const char *GenFormatI64(int64_t const *pi64)
826{
827 return GenFormatI64(*pi64);
828}
829#endif
830
831static const char *GenFormatI32(int32_t i32)
832{
833 if (i32 == INT32_MIN) /* This one is problematic */
834 return "INT32_MIN";
835 if (i32 == INT32_MAX)
836 return "INT32_MAX";
837 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
838 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
839 return pszBuf;
840}
841
842
843const char *GenFormatI32(int32_t const *pi32)
844{
845 return GenFormatI32(*pi32);
846}
847
848
849const char *GenFormatI16(int16_t i16)
850{
851 if (i16 == INT16_MIN) /* This one is problematic */
852 return "INT16_MIN";
853 if (i16 == INT16_MAX)
854 return "INT16_MAX";
855 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
856 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
857 return pszBuf;
858}
859
860
861const char *GenFormatI16(int16_t const *pi16)
862{
863 return GenFormatI16(*pi16);
864}
865
866
867static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
868{
869 /* We want to tag the generated source code with the revision that produced it. */
870 static char s_szRev[] = "$Revision: 99775 $";
871 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
872 size_t cchRev = 0;
873 while (RT_C_IS_DIGIT(pszRev[cchRev]))
874 cchRev++;
875
876 RTStrmPrintf(pOut,
877 "/* $Id: tstIEMAImpl.cpp 99775 2023-05-12 12:21:58Z vboxsync $ */\n"
878 "/** @file\n"
879 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
880 " */\n"
881 "\n"
882 "/*\n"
883 " * Copyright (C) 2022-" VBOX_C_YEAR " Oracle and/or its affiliates.\n"
884 " *\n"
885 " * This file is part of VirtualBox base platform packages, as\n"
886 " * available from https://www.virtualbox.org.\n"
887 " *\n"
888 " * This program is free software; you can redistribute it and/or\n"
889 " * modify it under the terms of the GNU General Public License\n"
890 " * as published by the Free Software Foundation, in version 3 of the\n"
891 " * License.\n"
892 " *\n"
893 " * This program is distributed in the hope that it will be useful, but\n"
894 " * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
895 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
896 " * General Public License for more details.\n"
897 " *\n"
898 " * You should have received a copy of the GNU General Public License\n"
899 " * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
900 " *\n"
901 " * SPDX-License-Identifier: GPL-3.0-only\n"
902 " */\n"
903 "\n"
904 "#include \"tstIEMAImpl.h\"\n"
905 "\n"
906 ,
907 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
908}
909
910
911static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
912{
913 PRTSTREAM pOut = NULL;
914 int rc = RTStrmOpen(pszFilename, "w", &pOut);
915 if (RT_SUCCESS(rc))
916 {
917 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
918 return pOut;
919 }
920 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
921 return NULL;
922}
923
924
925static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
926{
927 RTStrmPrintf(pOut,
928 "\n"
929 "/* end of file */\n");
930 int rc = RTStrmClose(pOut);
931 if (RT_SUCCESS(rc))
932 return rcExit;
933 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
934}
935
936
937static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
938{
939 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
940}
941
942
943static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
944{
945 RTStrmPrintf(pOut,
946 "};\n"
947 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
948 "\n",
949 pszName, pszName);
950}
951
952#endif /* TSTIEMAIMPL_WITH_GENERATOR */
953
954
955/*
956 * Test helpers.
957 */
958static bool IsTestEnabled(const char *pszName)
959{
960 /* Process excludes first: */
961 uint32_t i = g_cExcludeTestPatterns;
962 while (i-- > 0)
963 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
964 return false;
965
966 /* If no include patterns, everything is included: */
967 i = g_cIncludeTestPatterns;
968 if (!i)
969 return true;
970
971 /* Otherwise only tests in the include patters gets tested: */
972 while (i-- > 0)
973 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
974 return true;
975
976 return false;
977}
978
979
980static bool SubTestAndCheckIfEnabled(const char *pszName)
981{
982 RTTestSub(g_hTest, pszName);
983 if (IsTestEnabled(pszName))
984 return true;
985 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
986 return false;
987}
988
989
990static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
991{
992 if (fActual == fExpected)
993 return "";
994
995 uint32_t const fXor = fActual ^ fExpected;
996 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
997 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
998
999 static struct
1000 {
1001 const char *pszName;
1002 uint32_t fFlag;
1003 } const s_aFlags[] =
1004 {
1005#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
1006 EFL_ENTRY(CF),
1007 EFL_ENTRY(PF),
1008 EFL_ENTRY(AF),
1009 EFL_ENTRY(ZF),
1010 EFL_ENTRY(SF),
1011 EFL_ENTRY(TF),
1012 EFL_ENTRY(IF),
1013 EFL_ENTRY(DF),
1014 EFL_ENTRY(OF),
1015 EFL_ENTRY(IOPL),
1016 EFL_ENTRY(NT),
1017 EFL_ENTRY(RF),
1018 EFL_ENTRY(VM),
1019 EFL_ENTRY(AC),
1020 EFL_ENTRY(VIF),
1021 EFL_ENTRY(VIP),
1022 EFL_ENTRY(ID),
1023 };
1024 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1025 if (s_aFlags[i].fFlag & fXor)
1026 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1027 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1028 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1029 return pszBuf;
1030}
1031
1032
1033static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
1034{
1035 if (fActual == fExpected)
1036 return "";
1037
1038 uint16_t const fXor = fActual ^ fExpected;
1039 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1040 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1041
1042 static struct
1043 {
1044 const char *pszName;
1045 uint32_t fFlag;
1046 } const s_aFlags[] =
1047 {
1048#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1049 FSW_ENTRY(IE),
1050 FSW_ENTRY(DE),
1051 FSW_ENTRY(ZE),
1052 FSW_ENTRY(OE),
1053 FSW_ENTRY(UE),
1054 FSW_ENTRY(PE),
1055 FSW_ENTRY(SF),
1056 FSW_ENTRY(ES),
1057 FSW_ENTRY(C0),
1058 FSW_ENTRY(C1),
1059 FSW_ENTRY(C2),
1060 FSW_ENTRY(C3),
1061 FSW_ENTRY(B),
1062 };
1063 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1064 if (s_aFlags[i].fFlag & fXor)
1065 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1066 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1067 if (fXor & X86_FSW_TOP_MASK)
1068 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1069 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1070#if 0 /* For debugging fprem & fprem1 */
1071 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1072 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1073#endif
1074 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1075 return pszBuf;
1076}
1077
1078
1079static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1080{
1081 if (fActual == fExpected)
1082 return "";
1083
1084 uint16_t const fXor = fActual ^ fExpected;
1085 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1086 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1087
1088 static struct
1089 {
1090 const char *pszName;
1091 uint32_t fFlag;
1092 } const s_aFlags[] =
1093 {
1094#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1095 MXCSR_ENTRY(IE),
1096 MXCSR_ENTRY(DE),
1097 MXCSR_ENTRY(ZE),
1098 MXCSR_ENTRY(OE),
1099 MXCSR_ENTRY(UE),
1100 MXCSR_ENTRY(PE),
1101
1102 MXCSR_ENTRY(IM),
1103 MXCSR_ENTRY(DM),
1104 MXCSR_ENTRY(ZM),
1105 MXCSR_ENTRY(OM),
1106 MXCSR_ENTRY(UM),
1107 MXCSR_ENTRY(PM),
1108
1109 MXCSR_ENTRY(DAZ),
1110 MXCSR_ENTRY(FZ),
1111#undef MXCSR_ENTRY
1112 };
1113 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1114 if (s_aFlags[i].fFlag & fXor)
1115 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1116 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1117 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1118 return pszBuf;
1119}
1120
1121
1122static const char *FormatFcw(uint16_t fFcw)
1123{
1124 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1125
1126 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1127 switch (fFcw & X86_FCW_PC_MASK)
1128 {
1129 case X86_FCW_PC_24: pszPC = "PC24"; break;
1130 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1131 case X86_FCW_PC_53: pszPC = "PC53"; break;
1132 case X86_FCW_PC_64: pszPC = "PC64"; break;
1133 }
1134
1135 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1136 switch (fFcw & X86_FCW_RC_MASK)
1137 {
1138 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1139 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1140 case X86_FCW_RC_UP: pszRC = "UP"; break;
1141 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1142 }
1143 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1144
1145 static struct
1146 {
1147 const char *pszName;
1148 uint32_t fFlag;
1149 } const s_aFlags[] =
1150 {
1151#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1152 FCW_ENTRY(IM),
1153 FCW_ENTRY(DM),
1154 FCW_ENTRY(ZM),
1155 FCW_ENTRY(OM),
1156 FCW_ENTRY(UM),
1157 FCW_ENTRY(PM),
1158 { "6M", 64 },
1159 };
1160 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1161 if (fFcw & s_aFlags[i].fFlag)
1162 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1163
1164 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1165 return pszBuf;
1166}
1167
1168
1169static const char *FormatMxcsr(uint32_t fMxcsr)
1170{
1171 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1172
1173 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1174 switch (fMxcsr & X86_MXCSR_RC_MASK)
1175 {
1176 case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1177 case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1178 case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1179 case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1180 }
1181
1182 const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1183 const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1184 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1185
1186 static struct
1187 {
1188 const char *pszName;
1189 uint32_t fFlag;
1190 } const s_aFlags[] =
1191 {
1192#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1193 MXCSR_ENTRY(IE),
1194 MXCSR_ENTRY(DE),
1195 MXCSR_ENTRY(ZE),
1196 MXCSR_ENTRY(OE),
1197 MXCSR_ENTRY(UE),
1198 MXCSR_ENTRY(PE),
1199
1200 MXCSR_ENTRY(IM),
1201 MXCSR_ENTRY(DM),
1202 MXCSR_ENTRY(ZM),
1203 MXCSR_ENTRY(OM),
1204 MXCSR_ENTRY(UM),
1205 MXCSR_ENTRY(PM),
1206 { "6M", 64 },
1207 };
1208 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1209 if (fMxcsr & s_aFlags[i].fFlag)
1210 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1211
1212 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1213 return pszBuf;
1214}
1215
1216
1217static const char *FormatR80(PCRTFLOAT80U pr80)
1218{
1219 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1220 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1221 return pszBuf;
1222}
1223
1224
1225static const char *FormatR64(PCRTFLOAT64U pr64)
1226{
1227 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1228 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1229 return pszBuf;
1230}
1231
1232
1233static const char *FormatR32(PCRTFLOAT32U pr32)
1234{
1235 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1236 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1237 return pszBuf;
1238}
1239
1240
1241static const char *FormatD80(PCRTPBCD80U pd80)
1242{
1243 /* There is only one indefinite endcoding (same as for 80-bit
1244 floating point), so get it out of the way first: */
1245 if (RTPBCD80U_IS_INDEFINITE(pd80))
1246 return "Ind";
1247
1248 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1249 size_t off = 0;
1250 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1251 unsigned cBadDigits = 0;
1252 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1253 while (iPair-- > 0)
1254 {
1255 static const char s_szDigits[] = "0123456789abcdef";
1256 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1257 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1258 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1259 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1260 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1261 }
1262 if (cBadDigits || pd80->s.uPad != 0)
1263 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1264 pszBuf[off] = '\0';
1265 return pszBuf;
1266}
1267
1268
1269#if 0
1270static const char *FormatI64(int64_t const *piVal)
1271{
1272 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1273 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1274 return pszBuf;
1275}
1276#endif
1277
1278
1279static const char *FormatI32(int32_t const *piVal)
1280{
1281 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1282 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1283 return pszBuf;
1284}
1285
1286
1287static const char *FormatI16(int16_t const *piVal)
1288{
1289 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1290 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1291 return pszBuf;
1292}
1293
1294
1295static const char *FormatU128(PCRTUINT128U puVal)
1296{
1297 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1298 RTStrFormatU128(pszBuf, sizeof(g_aszBuf[0]), puVal, 16, 0, 0, RTSTR_F_SPECIAL);
1299 return pszBuf;
1300}
1301
1302
1303/*
1304 * Binary operations.
1305 */
1306TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1307TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1308TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1309TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1310
1311#ifdef TSTIEMAIMPL_WITH_GENERATOR
1312# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1313static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1314{ \
1315 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1316 { \
1317 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1318 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1319 PRTSTREAM pOutFn = pOut; \
1320 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1321 { \
1322 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1323 continue; \
1324 pOutFn = pOutCpu; \
1325 } \
1326 \
1327 GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1328 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1329 { \
1330 a_TestType Test; \
1331 Test.fEflIn = RandEFlags(); \
1332 Test.fEflOut = Test.fEflIn; \
1333 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1334 Test.uDstOut = Test.uDstIn; \
1335 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1336 if (g_aBinU ## a_cBits[iFn].uExtra) \
1337 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1338 Test.uMisc = 0; \
1339 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1340 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1341 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1342 } \
1343 GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1344 } \
1345}
1346#else
1347# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1348#endif
1349
1350#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1351GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1352\
1353static void BinU ## a_cBits ## Test(void) \
1354{ \
1355 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1356 { \
1357 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1358 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1359 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1360 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1361 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1362 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1363 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1364 { \
1365 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1366 { \
1367 uint32_t fEfl = paTests[iTest].fEflIn; \
1368 a_uType uDst = paTests[iTest].uDstIn; \
1369 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1370 if ( uDst != paTests[iTest].uDstOut \
1371 || fEfl != paTests[iTest].fEflOut) \
1372 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1373 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1374 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1375 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1376 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1377 else \
1378 { \
1379 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1380 *g_pfEfl = paTests[iTest].fEflIn; \
1381 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1382 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1383 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1384 } \
1385 } \
1386 pfn = a_aSubTests[iFn].pfnNative; \
1387 } \
1388 } \
1389}
1390
1391
1392/*
1393 * 8-bit binary operations.
1394 */
1395static const BINU8_T g_aBinU8[] =
1396{
1397 ENTRY(add_u8),
1398 ENTRY(add_u8_locked),
1399 ENTRY(adc_u8),
1400 ENTRY(adc_u8_locked),
1401 ENTRY(sub_u8),
1402 ENTRY(sub_u8_locked),
1403 ENTRY(sbb_u8),
1404 ENTRY(sbb_u8_locked),
1405 ENTRY(or_u8),
1406 ENTRY(or_u8_locked),
1407 ENTRY(xor_u8),
1408 ENTRY(xor_u8_locked),
1409 ENTRY(and_u8),
1410 ENTRY(and_u8_locked),
1411 ENTRY(cmp_u8),
1412 ENTRY(test_u8),
1413};
1414TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1415
1416
1417/*
1418 * 16-bit binary operations.
1419 */
1420static const BINU16_T g_aBinU16[] =
1421{
1422 ENTRY(add_u16),
1423 ENTRY(add_u16_locked),
1424 ENTRY(adc_u16),
1425 ENTRY(adc_u16_locked),
1426 ENTRY(sub_u16),
1427 ENTRY(sub_u16_locked),
1428 ENTRY(sbb_u16),
1429 ENTRY(sbb_u16_locked),
1430 ENTRY(or_u16),
1431 ENTRY(or_u16_locked),
1432 ENTRY(xor_u16),
1433 ENTRY(xor_u16_locked),
1434 ENTRY(and_u16),
1435 ENTRY(and_u16_locked),
1436 ENTRY(cmp_u16),
1437 ENTRY(test_u16),
1438 ENTRY_EX(bt_u16, 1),
1439 ENTRY_EX(btc_u16, 1),
1440 ENTRY_EX(btc_u16_locked, 1),
1441 ENTRY_EX(btr_u16, 1),
1442 ENTRY_EX(btr_u16_locked, 1),
1443 ENTRY_EX(bts_u16, 1),
1444 ENTRY_EX(bts_u16_locked, 1),
1445 ENTRY_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1446 ENTRY_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1447 ENTRY_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1448 ENTRY_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1449 ENTRY_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1450 ENTRY_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1451 ENTRY(arpl),
1452};
1453TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1454
1455
1456/*
1457 * 32-bit binary operations.
1458 */
1459static const BINU32_T g_aBinU32[] =
1460{
1461 ENTRY(add_u32),
1462 ENTRY(add_u32_locked),
1463 ENTRY(adc_u32),
1464 ENTRY(adc_u32_locked),
1465 ENTRY(sub_u32),
1466 ENTRY(sub_u32_locked),
1467 ENTRY(sbb_u32),
1468 ENTRY(sbb_u32_locked),
1469 ENTRY(or_u32),
1470 ENTRY(or_u32_locked),
1471 ENTRY(xor_u32),
1472 ENTRY(xor_u32_locked),
1473 ENTRY(and_u32),
1474 ENTRY(and_u32_locked),
1475 ENTRY(cmp_u32),
1476 ENTRY(test_u32),
1477 ENTRY_EX(bt_u32, 1),
1478 ENTRY_EX(btc_u32, 1),
1479 ENTRY_EX(btc_u32_locked, 1),
1480 ENTRY_EX(btr_u32, 1),
1481 ENTRY_EX(btr_u32_locked, 1),
1482 ENTRY_EX(bts_u32, 1),
1483 ENTRY_EX(bts_u32_locked, 1),
1484 ENTRY_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1485 ENTRY_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1486 ENTRY_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1487 ENTRY_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1488 ENTRY_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1489 ENTRY_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1490};
1491TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1492
1493
1494/*
1495 * 64-bit binary operations.
1496 */
1497static const BINU64_T g_aBinU64[] =
1498{
1499 ENTRY(add_u64),
1500 ENTRY(add_u64_locked),
1501 ENTRY(adc_u64),
1502 ENTRY(adc_u64_locked),
1503 ENTRY(sub_u64),
1504 ENTRY(sub_u64_locked),
1505 ENTRY(sbb_u64),
1506 ENTRY(sbb_u64_locked),
1507 ENTRY(or_u64),
1508 ENTRY(or_u64_locked),
1509 ENTRY(xor_u64),
1510 ENTRY(xor_u64_locked),
1511 ENTRY(and_u64),
1512 ENTRY(and_u64_locked),
1513 ENTRY(cmp_u64),
1514 ENTRY(test_u64),
1515 ENTRY_EX(bt_u64, 1),
1516 ENTRY_EX(btc_u64, 1),
1517 ENTRY_EX(btc_u64_locked, 1),
1518 ENTRY_EX(btr_u64, 1),
1519 ENTRY_EX(btr_u64_locked, 1),
1520 ENTRY_EX(bts_u64, 1),
1521 ENTRY_EX(bts_u64_locked, 1),
1522 ENTRY_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1523 ENTRY_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1524 ENTRY_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1525 ENTRY_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1526 ENTRY_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1527 ENTRY_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1528};
1529TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1530
1531
1532/*
1533 * XCHG
1534 */
1535static void XchgTest(void)
1536{
1537 if (!SubTestAndCheckIfEnabled("xchg"))
1538 return;
1539 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1540 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1541 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1542 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1543
1544 static struct
1545 {
1546 uint8_t cb; uint64_t fMask;
1547 union
1548 {
1549 uintptr_t pfn;
1550 FNIEMAIMPLXCHGU8 *pfnU8;
1551 FNIEMAIMPLXCHGU16 *pfnU16;
1552 FNIEMAIMPLXCHGU32 *pfnU32;
1553 FNIEMAIMPLXCHGU64 *pfnU64;
1554 } u;
1555 }
1556 s_aXchgWorkers[] =
1557 {
1558 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1559 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1560 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1561 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1562 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1563 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1564 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1565 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1566 };
1567 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1568 {
1569 RTUINT64U uIn1, uIn2, uMem, uDst;
1570 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1571 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1572 if (uIn1.u == uIn2.u)
1573 uDst.u = uIn2.u = ~uIn2.u;
1574
1575 switch (s_aXchgWorkers[i].cb)
1576 {
1577 case 1:
1578 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1579 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1580 break;
1581 case 2:
1582 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1583 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1584 break;
1585 case 4:
1586 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1587 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1588 break;
1589 case 8:
1590 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1591 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1592 break;
1593 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1594 }
1595
1596 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1597 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1598 }
1599}
1600
1601
1602/*
1603 * XADD
1604 */
1605static void XaddTest(void)
1606{
1607#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1608 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1609 static struct \
1610 { \
1611 const char *pszName; \
1612 FNIEMAIMPLXADDU ## a_cBits *pfn; \
1613 BINU ## a_cBits ## _TEST_T const *paTests; \
1614 uint32_t const *pcTests; \
1615 } const s_aFuncs[] = \
1616 { \
1617 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1618 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1619 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1620 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1621 }; \
1622 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1623 { \
1624 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1625 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1626 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1627 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1628 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1629 { \
1630 uint32_t fEfl = paTests[iTest].fEflIn; \
1631 a_Type uSrc = paTests[iTest].uSrcIn; \
1632 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1633 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1634 if ( fEfl != paTests[iTest].fEflOut \
1635 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
1636 || uSrc != paTests[iTest].uDstIn) \
1637 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1638 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1639 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1640 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1641 } \
1642 } \
1643 } while(0)
1644 TEST_XADD(8, uint8_t, "%#04x");
1645 TEST_XADD(16, uint16_t, "%#06x");
1646 TEST_XADD(32, uint32_t, "%#010RX32");
1647 TEST_XADD(64, uint64_t, "%#010RX64");
1648}
1649
1650
1651/*
1652 * CMPXCHG
1653 */
1654
1655static void CmpXchgTest(void)
1656{
1657#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1658 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
1659 static struct \
1660 { \
1661 const char *pszName; \
1662 FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1663 PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1664 BINU ## a_cBits ## _TEST_T const *paTests; \
1665 uint32_t const *pcTests; \
1666 } const s_aFuncs[] = \
1667 { \
1668 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1669 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1670 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1671 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1672 }; \
1673 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1674 { \
1675 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1676 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1677 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1678 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1679 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1680 { \
1681 /* as is (99% likely to be negative). */ \
1682 uint32_t fEfl = paTests[iTest].fEflIn; \
1683 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1684 a_Type uA = paTests[iTest].uDstIn; \
1685 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1686 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1687 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1688 if ( fEfl != paTests[iTest].fEflOut \
1689 || *g_pu ## a_cBits != uExpect \
1690 || uA != paTests[iTest].uSrcIn) \
1691 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1692 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1693 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1694 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1695 /* positive */ \
1696 uint32_t fEflExpect = paTests[iTest].fEflIn; \
1697 uA = paTests[iTest].uDstIn; \
1698 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1699 fEfl = paTests[iTest].fEflIn; \
1700 uA = paTests[iTest].uDstIn; \
1701 *g_pu ## a_cBits = uA; \
1702 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1703 if ( fEfl != fEflExpect \
1704 || *g_pu ## a_cBits != uNew \
1705 || uA != paTests[iTest].uDstIn) \
1706 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1707 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1708 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1709 EFlagsDiff(fEfl, fEflExpect)); \
1710 } \
1711 } \
1712 } while(0)
1713 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1714 TEST_CMPXCHG(16, uint16_t, "%#06x");
1715 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1716#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1717 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1718#endif
1719}
1720
1721static void CmpXchg8bTest(void)
1722{
1723 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
1724 static struct
1725 {
1726 const char *pszName;
1727 FNIEMAIMPLCMPXCHG8B *pfn;
1728 } const s_aFuncs[] =
1729 {
1730 { "cmpxchg8b", iemAImpl_cmpxchg8b },
1731 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1732 };
1733 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1734 {
1735 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1736 continue;
1737 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1738 {
1739 uint64_t const uOldValue = RandU64();
1740 uint64_t const uNewValue = RandU64();
1741
1742 /* positive test. */
1743 RTUINT64U uA, uB;
1744 uB.u = uNewValue;
1745 uA.u = uOldValue;
1746 *g_pu64 = uOldValue;
1747 uint32_t fEflIn = RandEFlags();
1748 uint32_t fEfl = fEflIn;
1749 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1750 if ( fEfl != (fEflIn | X86_EFL_ZF)
1751 || *g_pu64 != uNewValue
1752 || uA.u != uOldValue)
1753 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1754 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1755 fEfl, *g_pu64, uA.u,
1756 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1757 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1758
1759 /* negative */
1760 uint64_t const uExpect = ~uOldValue;
1761 *g_pu64 = uExpect;
1762 uA.u = uOldValue;
1763 uB.u = uNewValue;
1764 fEfl = fEflIn = RandEFlags();
1765 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1766 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1767 || *g_pu64 != uExpect
1768 || uA.u != uExpect)
1769 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1770 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1771 fEfl, *g_pu64, uA.u,
1772 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1773 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1774 }
1775 }
1776}
1777
1778static void CmpXchg16bTest(void)
1779{
1780 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1781 static struct
1782 {
1783 const char *pszName;
1784 FNIEMAIMPLCMPXCHG16B *pfn;
1785 } const s_aFuncs[] =
1786 {
1787 { "cmpxchg16b", iemAImpl_cmpxchg16b },
1788 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1789#if !defined(RT_ARCH_ARM64)
1790 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1791#endif
1792 };
1793 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1794 {
1795 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1796 continue;
1797#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1798 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1799 {
1800 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1801 continue;
1802 }
1803#endif
1804 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1805 {
1806 RTUINT128U const uOldValue = RandU128();
1807 RTUINT128U const uNewValue = RandU128();
1808
1809 /* positive test. */
1810 RTUINT128U uA, uB;
1811 uB = uNewValue;
1812 uA = uOldValue;
1813 *g_pu128 = uOldValue;
1814 uint32_t fEflIn = RandEFlags();
1815 uint32_t fEfl = fEflIn;
1816 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1817 if ( fEfl != (fEflIn | X86_EFL_ZF)
1818 || g_pu128->s.Lo != uNewValue.s.Lo
1819 || g_pu128->s.Hi != uNewValue.s.Hi
1820 || uA.s.Lo != uOldValue.s.Lo
1821 || uA.s.Hi != uOldValue.s.Hi)
1822 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1823 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1824 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1825 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1826 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1827 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1828 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1829 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1830
1831 /* negative */
1832 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1833 *g_pu128 = uExpect;
1834 uA = uOldValue;
1835 uB = uNewValue;
1836 fEfl = fEflIn = RandEFlags();
1837 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1838 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1839 || g_pu128->s.Lo != uExpect.s.Lo
1840 || g_pu128->s.Hi != uExpect.s.Hi
1841 || uA.s.Lo != uExpect.s.Lo
1842 || uA.s.Hi != uExpect.s.Hi)
1843 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1844 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1845 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1846 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1847 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1848 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1849 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1850 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1851 }
1852 }
1853}
1854
1855
1856/*
1857 * Double shifts.
1858 *
1859 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1860 */
1861#ifdef TSTIEMAIMPL_WITH_GENERATOR
1862# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1863static void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1864{ \
1865 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1866 { \
1867 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1868 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1869 continue; \
1870 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1871 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1872 { \
1873 a_TestType Test; \
1874 Test.fEflIn = RandEFlags(); \
1875 Test.fEflOut = Test.fEflIn; \
1876 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1877 Test.uDstOut = Test.uDstIn; \
1878 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1879 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1880 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1881 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1882 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1883 } \
1884 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1885 } \
1886}
1887#else
1888# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1889#endif
1890
1891#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1892TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1893\
1894static a_SubTestType const a_aSubTests[] = \
1895{ \
1896 ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1897 ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1898 ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1899 ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1900}; \
1901\
1902GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1903\
1904static void ShiftDblU ## a_cBits ## Test(void) \
1905{ \
1906 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1907 { \
1908 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1909 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1910 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1911 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1912 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1913 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1914 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1915 { \
1916 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1917 { \
1918 uint32_t fEfl = paTests[iTest].fEflIn; \
1919 a_Type uDst = paTests[iTest].uDstIn; \
1920 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1921 if ( uDst != paTests[iTest].uDstOut \
1922 || fEfl != paTests[iTest].fEflOut) \
1923 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1924 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1925 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1926 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1927 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1928 else \
1929 { \
1930 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1931 *g_pfEfl = paTests[iTest].fEflIn; \
1932 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1933 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1934 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1935 } \
1936 } \
1937 pfn = a_aSubTests[iFn].pfnNative; \
1938 } \
1939 } \
1940}
1941TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1942TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1943TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1944
1945#ifdef TSTIEMAIMPL_WITH_GENERATOR
1946static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1947{
1948 ShiftDblU16Generate(pOut, cTests);
1949 ShiftDblU32Generate(pOut, cTests);
1950 ShiftDblU64Generate(pOut, cTests);
1951}
1952#endif
1953
1954static void ShiftDblTest(void)
1955{
1956 ShiftDblU16Test();
1957 ShiftDblU32Test();
1958 ShiftDblU64Test();
1959}
1960
1961
1962/*
1963 * Unary operators.
1964 *
1965 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1966 */
1967#ifdef TSTIEMAIMPL_WITH_GENERATOR
1968# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1969static void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1970{ \
1971 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1972 { \
1973 GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1974 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1975 { \
1976 a_TestType Test; \
1977 Test.fEflIn = RandEFlags(); \
1978 Test.fEflOut = Test.fEflIn; \
1979 Test.uDstIn = RandU ## a_cBits(); \
1980 Test.uDstOut = Test.uDstIn; \
1981 Test.uSrcIn = 0; \
1982 Test.uMisc = 0; \
1983 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1984 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1985 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1986 } \
1987 GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1988 } \
1989}
1990#else
1991# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1992#endif
1993
1994#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1995TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1996static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1997{ \
1998 ENTRY(inc_u ## a_cBits), \
1999 ENTRY(inc_u ## a_cBits ## _locked), \
2000 ENTRY(dec_u ## a_cBits), \
2001 ENTRY(dec_u ## a_cBits ## _locked), \
2002 ENTRY(not_u ## a_cBits), \
2003 ENTRY(not_u ## a_cBits ## _locked), \
2004 ENTRY(neg_u ## a_cBits), \
2005 ENTRY(neg_u ## a_cBits ## _locked), \
2006}; \
2007\
2008GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2009\
2010static void UnaryU ## a_cBits ## Test(void) \
2011{ \
2012 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2013 { \
2014 if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
2015 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
2016 uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
2017 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2018 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2019 { \
2020 uint32_t fEfl = paTests[iTest].fEflIn; \
2021 a_Type uDst = paTests[iTest].uDstIn; \
2022 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
2023 if ( uDst != paTests[iTest].uDstOut \
2024 || fEfl != paTests[iTest].fEflOut) \
2025 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2026 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
2027 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2028 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2029 else \
2030 { \
2031 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2032 *g_pfEfl = paTests[iTest].fEflIn; \
2033 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
2034 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2035 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2036 } \
2037 } \
2038 } \
2039}
2040TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
2041TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
2042TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
2043TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
2044
2045#ifdef TSTIEMAIMPL_WITH_GENERATOR
2046static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
2047{
2048 UnaryU8Generate(pOut, cTests);
2049 UnaryU16Generate(pOut, cTests);
2050 UnaryU32Generate(pOut, cTests);
2051 UnaryU64Generate(pOut, cTests);
2052}
2053#endif
2054
2055static void UnaryTest(void)
2056{
2057 UnaryU8Test();
2058 UnaryU16Test();
2059 UnaryU32Test();
2060 UnaryU64Test();
2061}
2062
2063
2064/*
2065 * Shifts.
2066 *
2067 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2068 */
2069#ifdef TSTIEMAIMPL_WITH_GENERATOR
2070# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2071static void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2072{ \
2073 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2074 { \
2075 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2076 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2077 continue; \
2078 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2079 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2080 { \
2081 a_TestType Test; \
2082 Test.fEflIn = RandEFlags(); \
2083 Test.fEflOut = Test.fEflIn; \
2084 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2085 Test.uDstOut = Test.uDstIn; \
2086 Test.uSrcIn = 0; \
2087 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2088 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2089 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
2090 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2091 \
2092 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
2093 Test.fEflOut = Test.fEflIn; \
2094 Test.uDstOut = Test.uDstIn; \
2095 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2096 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
2097 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2098 } \
2099 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2100 } \
2101}
2102#else
2103# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2104#endif
2105
2106#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2107TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2108static a_SubTestType const a_aSubTests[] = \
2109{ \
2110 ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2111 ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2112 ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2113 ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2114 ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2115 ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2116 ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2117 ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2118 ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2119 ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2120 ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2121 ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2122 ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2123 ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2124}; \
2125\
2126GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2127\
2128static void ShiftU ## a_cBits ## Test(void) \
2129{ \
2130 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2131 { \
2132 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2133 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2134 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2135 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2136 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2137 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2138 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2139 { \
2140 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2141 { \
2142 uint32_t fEfl = paTests[iTest].fEflIn; \
2143 a_Type uDst = paTests[iTest].uDstIn; \
2144 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2145 if ( uDst != paTests[iTest].uDstOut \
2146 || fEfl != paTests[iTest].fEflOut ) \
2147 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2148 iTest, iVar == 0 ? "" : "/n", \
2149 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2150 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2151 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2152 else \
2153 { \
2154 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2155 *g_pfEfl = paTests[iTest].fEflIn; \
2156 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2157 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2158 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2159 } \
2160 } \
2161 pfn = a_aSubTests[iFn].pfnNative; \
2162 } \
2163 } \
2164}
2165TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2166TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2167TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2168TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2169
2170#ifdef TSTIEMAIMPL_WITH_GENERATOR
2171static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2172{
2173 ShiftU8Generate(pOut, cTests);
2174 ShiftU16Generate(pOut, cTests);
2175 ShiftU32Generate(pOut, cTests);
2176 ShiftU64Generate(pOut, cTests);
2177}
2178#endif
2179
2180static void ShiftTest(void)
2181{
2182 ShiftU8Test();
2183 ShiftU16Test();
2184 ShiftU32Test();
2185 ShiftU64Test();
2186}
2187
2188
2189/*
2190 * Multiplication and division.
2191 *
2192 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2193 * Note! Currently ignoring undefined bits.
2194 */
2195
2196/* U8 */
2197TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2198static INT_MULDIV_U8_T const g_aMulDivU8[] =
2199{
2200 ENTRY_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2201 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2202 ENTRY_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2203 ENTRY_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2204 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2205 ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2206 ENTRY_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2207 ENTRY_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2208 ENTRY_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2209 ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2210};
2211
2212#ifdef TSTIEMAIMPL_WITH_GENERATOR
2213static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2214{
2215 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2216 {
2217 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2218 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2219 continue;
2220 GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2221 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2222 {
2223 MULDIVU8_TEST_T Test;
2224 Test.fEflIn = RandEFlags();
2225 Test.fEflOut = Test.fEflIn;
2226 Test.uDstIn = RandU16Dst(iTest);
2227 Test.uDstOut = Test.uDstIn;
2228 Test.uSrcIn = RandU8Src(iTest);
2229 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2230 RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2231 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2232 }
2233 GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2234 }
2235}
2236#endif
2237
2238static void MulDivU8Test(void)
2239{
2240 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2241 {
2242 if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2243 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2244 uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2245 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2246 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2247 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2248 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2249 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2250 {
2251 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2252 {
2253 uint32_t fEfl = paTests[iTest].fEflIn;
2254 uint16_t uDst = paTests[iTest].uDstIn;
2255 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2256 if ( uDst != paTests[iTest].uDstOut
2257 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2258 || rc != paTests[iTest].rc)
2259 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2260 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2261 "%sexpected %#08x %#06RX16 %d%s\n",
2262 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2263 iVar ? " " : "", fEfl, uDst, rc,
2264 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2265 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2266 else
2267 {
2268 *g_pu16 = paTests[iTest].uDstIn;
2269 *g_pfEfl = paTests[iTest].fEflIn;
2270 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2271 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2272 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2273 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2274 }
2275 }
2276 pfn = g_aMulDivU8[iFn].pfnNative;
2277 }
2278 }
2279}
2280
2281#ifdef TSTIEMAIMPL_WITH_GENERATOR
2282# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2283void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2284{ \
2285 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2286 { \
2287 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2288 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2289 continue; \
2290 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2291 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2292 { \
2293 a_TestType Test; \
2294 Test.fEflIn = RandEFlags(); \
2295 Test.fEflOut = Test.fEflIn; \
2296 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2297 Test.uDst1Out = Test.uDst1In; \
2298 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2299 Test.uDst2Out = Test.uDst2In; \
2300 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2301 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2302 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2303 Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2304 Test.rc, iTest); \
2305 } \
2306 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2307 } \
2308}
2309#else
2310# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2311#endif
2312
2313#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2314TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2315static a_SubTestType const a_aSubTests [] = \
2316{ \
2317 ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2318 ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2319 ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2320 ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2321 ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2322 ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2323 ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2324 ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2325}; \
2326\
2327GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2328\
2329static void MulDivU ## a_cBits ## Test(void) \
2330{ \
2331 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2332 { \
2333 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2334 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2335 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2336 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2337 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2338 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2339 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2340 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2341 { \
2342 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2343 { \
2344 uint32_t fEfl = paTests[iTest].fEflIn; \
2345 a_Type uDst1 = paTests[iTest].uDst1In; \
2346 a_Type uDst2 = paTests[iTest].uDst2In; \
2347 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2348 if ( uDst1 != paTests[iTest].uDst1Out \
2349 || uDst2 != paTests[iTest].uDst2Out \
2350 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2351 || rc != paTests[iTest].rc) \
2352 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2353 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2354 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2355 iTest, iVar == 0 ? "" : "/n", \
2356 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2357 fEfl, uDst1, uDst2, rc, \
2358 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2359 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2360 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2361 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2362 else \
2363 { \
2364 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2365 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2366 *g_pfEfl = paTests[iTest].fEflIn; \
2367 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2368 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2369 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2370 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2371 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2372 } \
2373 } \
2374 pfn = a_aSubTests[iFn].pfnNative; \
2375 } \
2376 } \
2377}
2378TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2379TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2380TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2381
2382#ifdef TSTIEMAIMPL_WITH_GENERATOR
2383static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2384{
2385 MulDivU8Generate(pOut, cTests);
2386 MulDivU16Generate(pOut, cTests);
2387 MulDivU32Generate(pOut, cTests);
2388 MulDivU64Generate(pOut, cTests);
2389}
2390#endif
2391
2392static void MulDivTest(void)
2393{
2394 MulDivU8Test();
2395 MulDivU16Test();
2396 MulDivU32Test();
2397 MulDivU64Test();
2398}
2399
2400
2401/*
2402 * BSWAP
2403 */
2404static void BswapTest(void)
2405{
2406 if (SubTestAndCheckIfEnabled("bswap_u16"))
2407 {
2408 *g_pu32 = UINT32_C(0x12345678);
2409 iemAImpl_bswap_u16(g_pu32);
2410#if 0
2411 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2412#else
2413 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2414#endif
2415 *g_pu32 = UINT32_C(0xffff1122);
2416 iemAImpl_bswap_u16(g_pu32);
2417#if 0
2418 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2419#else
2420 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2421#endif
2422 }
2423
2424 if (SubTestAndCheckIfEnabled("bswap_u32"))
2425 {
2426 *g_pu32 = UINT32_C(0x12345678);
2427 iemAImpl_bswap_u32(g_pu32);
2428 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2429 }
2430
2431 if (SubTestAndCheckIfEnabled("bswap_u64"))
2432 {
2433 *g_pu64 = UINT64_C(0x0123456789abcdef);
2434 iemAImpl_bswap_u64(g_pu64);
2435 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2436 }
2437}
2438
2439
2440
2441/*********************************************************************************************************************************
2442* Floating point (x87 style) *
2443*********************************************************************************************************************************/
2444
2445/*
2446 * FPU constant loading.
2447 */
2448TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2449
2450static const FPU_LD_CONST_T g_aFpuLdConst[] =
2451{
2452 ENTRY(fld1),
2453 ENTRY(fldl2t),
2454 ENTRY(fldl2e),
2455 ENTRY(fldpi),
2456 ENTRY(fldlg2),
2457 ENTRY(fldln2),
2458 ENTRY(fldz),
2459};
2460
2461#ifdef TSTIEMAIMPL_WITH_GENERATOR
2462static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2463{
2464 X86FXSTATE State;
2465 RT_ZERO(State);
2466 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2467 {
2468 GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2469 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2470 {
2471 State.FCW = RandFcw();
2472 State.FSW = RandFsw();
2473
2474 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2475 {
2476 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2477 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2478 g_aFpuLdConst[iFn].pfn(&State, &Res);
2479 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2480 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2481 }
2482 }
2483 GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2484 }
2485}
2486#endif
2487
2488static void FpuLoadConstTest(void)
2489{
2490 /*
2491 * Inputs:
2492 * - FSW: C0, C1, C2, C3
2493 * - FCW: Exception masks, Precision control, Rounding control.
2494 *
2495 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2496 */
2497 X86FXSTATE State;
2498 RT_ZERO(State);
2499 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2500 {
2501 if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2502 continue;
2503
2504 uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2505 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2506 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2507 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2508 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2509 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2510 {
2511 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2512 {
2513 State.FCW = paTests[iTest].fFcw;
2514 State.FSW = paTests[iTest].fFswIn;
2515 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2516 pfn(&State, &Res);
2517 if ( Res.FSW != paTests[iTest].fFswOut
2518 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2519 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2520 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2521 Res.FSW, FormatR80(&Res.r80Result),
2522 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2523 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2524 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2525 FormatFcw(paTests[iTest].fFcw) );
2526 }
2527 pfn = g_aFpuLdConst[iFn].pfnNative;
2528 }
2529 }
2530}
2531
2532
2533/*
2534 * Load floating point values from memory.
2535 */
2536#ifdef TSTIEMAIMPL_WITH_GENERATOR
2537# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2538static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2539{ \
2540 X86FXSTATE State; \
2541 RT_ZERO(State); \
2542 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2543 { \
2544 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2545 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2546 { \
2547 State.FCW = RandFcw(); \
2548 State.FSW = RandFsw(); \
2549 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2550 \
2551 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2552 { \
2553 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2554 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2555 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2556 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2557 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2558 GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2559 } \
2560 } \
2561 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2562 } \
2563}
2564#else
2565# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2566#endif
2567
2568#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2569typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2570typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2571TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2572\
2573static const a_SubTestType a_aSubTests[] = \
2574{ \
2575 ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2576}; \
2577GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2578\
2579static void FpuLdR ## a_cBits ## Test(void) \
2580{ \
2581 X86FXSTATE State; \
2582 RT_ZERO(State); \
2583 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2584 { \
2585 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2586 \
2587 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2588 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2589 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2590 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2591 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2592 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2593 { \
2594 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2595 { \
2596 a_rdTypeIn const InVal = paTests[iTest].InVal; \
2597 State.FCW = paTests[iTest].fFcw; \
2598 State.FSW = paTests[iTest].fFswIn; \
2599 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2600 pfn(&State, &Res, &InVal); \
2601 if ( Res.FSW != paTests[iTest].fFswOut \
2602 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2603 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2604 "%s -> fsw=%#06x %s\n" \
2605 "%s expected %#06x %s%s%s (%s)\n", \
2606 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2607 FormatR ## a_cBits(&paTests[iTest].InVal), \
2608 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2609 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2610 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2611 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2612 FormatFcw(paTests[iTest].fFcw) ); \
2613 } \
2614 pfn = a_aSubTests[iFn].pfnNative; \
2615 } \
2616 } \
2617}
2618
2619TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2620TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2621TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2622
2623#ifdef TSTIEMAIMPL_WITH_GENERATOR
2624static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2625{
2626 FpuLdR80Generate(pOut, cTests);
2627 FpuLdR64Generate(pOut, cTests);
2628 FpuLdR32Generate(pOut, cTests);
2629}
2630#endif
2631
2632static void FpuLdMemTest(void)
2633{
2634 FpuLdR80Test();
2635 FpuLdR64Test();
2636 FpuLdR32Test();
2637}
2638
2639
2640/*
2641 * Load integer values from memory.
2642 */
2643#ifdef TSTIEMAIMPL_WITH_GENERATOR
2644# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2645static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2646{ \
2647 X86FXSTATE State; \
2648 RT_ZERO(State); \
2649 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2650 { \
2651 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2652 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2653 { \
2654 State.FCW = RandFcw(); \
2655 State.FSW = RandFsw(); \
2656 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2657 \
2658 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2659 { \
2660 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2661 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2662 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2663 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2664 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2665 } \
2666 } \
2667 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2668 } \
2669}
2670#else
2671# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2672#endif
2673
2674#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2675typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2676typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2677TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2678\
2679static const a_SubTestType a_aSubTests[] = \
2680{ \
2681 ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2682}; \
2683GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2684\
2685static void FpuLdI ## a_cBits ## Test(void) \
2686{ \
2687 X86FXSTATE State; \
2688 RT_ZERO(State); \
2689 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2690 { \
2691 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2692 \
2693 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2694 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2695 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2696 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2697 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2698 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2699 { \
2700 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2701 { \
2702 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2703 State.FCW = paTests[iTest].fFcw; \
2704 State.FSW = paTests[iTest].fFswIn; \
2705 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2706 pfn(&State, &Res, &iInVal); \
2707 if ( Res.FSW != paTests[iTest].fFswOut \
2708 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2709 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2710 "%s -> fsw=%#06x %s\n" \
2711 "%s expected %#06x %s%s%s (%s)\n", \
2712 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2713 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2714 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2715 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2716 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2717 FormatFcw(paTests[iTest].fFcw) ); \
2718 } \
2719 pfn = a_aSubTests[iFn].pfnNative; \
2720 } \
2721 } \
2722}
2723
2724TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2725TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2726TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2727
2728#ifdef TSTIEMAIMPL_WITH_GENERATOR
2729static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2730{
2731 FpuLdI64Generate(pOut, cTests);
2732 FpuLdI32Generate(pOut, cTests);
2733 FpuLdI16Generate(pOut, cTests);
2734}
2735#endif
2736
2737static void FpuLdIntTest(void)
2738{
2739 FpuLdI64Test();
2740 FpuLdI32Test();
2741 FpuLdI16Test();
2742}
2743
2744
2745/*
2746 * Load binary coded decimal values from memory.
2747 */
2748typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2749typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2750TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2751
2752static const FPU_LD_D80_T g_aFpuLdD80[] =
2753{
2754 ENTRY(fld_r80_from_d80)
2755};
2756
2757#ifdef TSTIEMAIMPL_WITH_GENERATOR
2758static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2759{
2760 X86FXSTATE State;
2761 RT_ZERO(State);
2762 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2763 {
2764 GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2765 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2766 {
2767 State.FCW = RandFcw();
2768 State.FSW = RandFsw();
2769 RTPBCD80U InVal = RandD80Src(iTest);
2770
2771 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2772 {
2773 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2774 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2775 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2776 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2777 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2778 iTest, iRounding);
2779 }
2780 }
2781 GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2782 }
2783}
2784#endif
2785
2786static void FpuLdD80Test(void)
2787{
2788 X86FXSTATE State;
2789 RT_ZERO(State);
2790 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2791 {
2792 if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2793 continue;
2794
2795 uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2796 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2797 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2798 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2799 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2800 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2801 {
2802 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2803 {
2804 RTPBCD80U const InVal = paTests[iTest].InVal;
2805 State.FCW = paTests[iTest].fFcw;
2806 State.FSW = paTests[iTest].fFswIn;
2807 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2808 pfn(&State, &Res, &InVal);
2809 if ( Res.FSW != paTests[iTest].fFswOut
2810 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2811 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2812 "%s -> fsw=%#06x %s\n"
2813 "%s expected %#06x %s%s%s (%s)\n",
2814 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2815 FormatD80(&paTests[iTest].InVal),
2816 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2817 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2818 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2819 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2820 FormatFcw(paTests[iTest].fFcw) );
2821 }
2822 pfn = g_aFpuLdD80[iFn].pfnNative;
2823 }
2824 }
2825}
2826
2827
2828/*
2829 * Store values floating point values to memory.
2830 */
2831#ifdef TSTIEMAIMPL_WITH_GENERATOR
2832static const RTFLOAT80U g_aFpuStR32Specials[] =
2833{
2834 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2835 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2836 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2837 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2838};
2839static const RTFLOAT80U g_aFpuStR64Specials[] =
2840{
2841 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2842 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2843 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2844 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2845 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2846};
2847static const RTFLOAT80U g_aFpuStR80Specials[] =
2848{
2849 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2850};
2851# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2852static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2853{ \
2854 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2855 X86FXSTATE State; \
2856 RT_ZERO(State); \
2857 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2858 { \
2859 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2860 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2861 { \
2862 uint16_t const fFcw = RandFcw(); \
2863 State.FSW = RandFsw(); \
2864 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2865 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2866 \
2867 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2868 { \
2869 /* PC doesn't influence these, so leave as is. */ \
2870 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2871 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2872 { \
2873 uint16_t uFswOut = 0; \
2874 a_rdType OutVal; \
2875 RT_ZERO(OutVal); \
2876 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2877 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2878 | (iRounding << X86_FCW_RC_SHIFT); \
2879 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2880 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2881 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2882 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2883 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2884 GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2885 } \
2886 } \
2887 } \
2888 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2889 } \
2890}
2891#else
2892# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2893#endif
2894
2895#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2896typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2897 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2898typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2899TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2900\
2901static const a_SubTestType a_aSubTests[] = \
2902{ \
2903 ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2904}; \
2905GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2906\
2907static void FpuStR ## a_cBits ## Test(void) \
2908{ \
2909 X86FXSTATE State; \
2910 RT_ZERO(State); \
2911 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2912 { \
2913 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2914 \
2915 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2916 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2917 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2918 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2919 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2920 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2921 { \
2922 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2923 { \
2924 RTFLOAT80U const InVal = paTests[iTest].InVal; \
2925 uint16_t uFswOut = 0; \
2926 a_rdType OutVal; \
2927 RT_ZERO(OutVal); \
2928 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2929 State.FCW = paTests[iTest].fFcw; \
2930 State.FSW = paTests[iTest].fFswIn; \
2931 pfn(&State, &uFswOut, &OutVal, &InVal); \
2932 if ( uFswOut != paTests[iTest].fFswOut \
2933 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2934 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2935 "%s -> fsw=%#06x %s\n" \
2936 "%s expected %#06x %s%s%s (%s)\n", \
2937 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2938 FormatR80(&paTests[iTest].InVal), \
2939 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2940 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2941 FswDiff(uFswOut, paTests[iTest].fFswOut), \
2942 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2943 FormatFcw(paTests[iTest].fFcw) ); \
2944 } \
2945 pfn = a_aSubTests[iFn].pfnNative; \
2946 } \
2947 } \
2948}
2949
2950TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2951TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2952TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2953
2954#ifdef TSTIEMAIMPL_WITH_GENERATOR
2955static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2956{
2957 FpuStR80Generate(pOut, cTests);
2958 FpuStR64Generate(pOut, cTests);
2959 FpuStR32Generate(pOut, cTests);
2960}
2961#endif
2962
2963static void FpuStMemTest(void)
2964{
2965 FpuStR80Test();
2966 FpuStR64Test();
2967 FpuStR32Test();
2968}
2969
2970
2971/*
2972 * Store integer values to memory or register.
2973 */
2974TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2975TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2976TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2977
2978static const FPU_ST_I16_T g_aFpuStI16[] =
2979{
2980 ENTRY(fist_r80_to_i16),
2981 ENTRY_AMD( fistt_r80_to_i16, 0),
2982 ENTRY_INTEL(fistt_r80_to_i16, 0),
2983};
2984static const FPU_ST_I32_T g_aFpuStI32[] =
2985{
2986 ENTRY(fist_r80_to_i32),
2987 ENTRY(fistt_r80_to_i32),
2988};
2989static const FPU_ST_I64_T g_aFpuStI64[] =
2990{
2991 ENTRY(fist_r80_to_i64),
2992 ENTRY(fistt_r80_to_i64),
2993};
2994
2995#ifdef TSTIEMAIMPL_WITH_GENERATOR
2996static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2997{
2998 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2999 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
3000 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3001 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3002 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3003 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3004 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3005 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3006 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3007 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3008 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3009 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3010 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3011 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3012 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
3013 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3014 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3015 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3016 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3017 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3018 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3019 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3020 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3021 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
3022 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3023 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
3024 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
3025 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
3026 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
3027 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
3028 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
3029 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
3030 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3031 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3032 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3033 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3034 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3035 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3036 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3037 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3038 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3039 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3040 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3041 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3042 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3043 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3044 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
3045};
3046static const RTFLOAT80U g_aFpuStI32Specials[] =
3047{
3048 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3049 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3050 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3051 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3052 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3053 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3054 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3055 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3056 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3057 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3058 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3059 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3060 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3061 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3062 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3063 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3064};
3065static const RTFLOAT80U g_aFpuStI64Specials[] =
3066{
3067 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3068 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3069 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3070 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3071 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3072 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3073 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3074 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3075 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3076 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3077 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3078 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3079 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3080 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3081 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3082 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3083 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3084};
3085
3086# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3087static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
3088{ \
3089 X86FXSTATE State; \
3090 RT_ZERO(State); \
3091 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3092 { \
3093 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3094 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3095 PRTSTREAM pOutFn = pOut; \
3096 if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
3097 { \
3098 if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3099 continue; \
3100 pOutFn = pOutCpu; \
3101 } \
3102 \
3103 GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
3104 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3105 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3106 { \
3107 uint16_t const fFcw = RandFcw(); \
3108 State.FSW = RandFsw(); \
3109 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3110 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3111 \
3112 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3113 { \
3114 /* PC doesn't influence these, so leave as is. */ \
3115 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3116 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3117 { \
3118 uint16_t uFswOut = 0; \
3119 a_iType iOutVal = ~(a_iType)2; \
3120 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3121 | (iRounding << X86_FCW_RC_SHIFT); \
3122 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3123 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3124 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3125 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3126 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3127 GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
3128 } \
3129 } \
3130 } \
3131 GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
3132 } \
3133}
3134#else
3135# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3136#endif
3137
3138#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3139GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3140\
3141static void FpuStI ## a_cBits ## Test(void) \
3142{ \
3143 X86FXSTATE State; \
3144 RT_ZERO(State); \
3145 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3146 { \
3147 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3148 \
3149 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3150 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3151 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3152 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3153 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3154 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3155 { \
3156 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3157 { \
3158 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3159 uint16_t uFswOut = 0; \
3160 a_iType iOutVal = ~(a_iType)2; \
3161 State.FCW = paTests[iTest].fFcw; \
3162 State.FSW = paTests[iTest].fFswIn; \
3163 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3164 if ( uFswOut != paTests[iTest].fFswOut \
3165 || iOutVal != paTests[iTest].iOutVal) \
3166 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3167 "%s -> fsw=%#06x " a_szFmt "\n" \
3168 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3169 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3170 FormatR80(&paTests[iTest].InVal), \
3171 iVar ? " " : "", uFswOut, iOutVal, \
3172 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3173 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3174 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3175 } \
3176 pfn = a_aSubTests[iFn].pfnNative; \
3177 } \
3178 } \
3179}
3180
3181//fistt_r80_to_i16 diffs for AMD, of course :-)
3182
3183TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3184TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3185TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3186
3187#ifdef TSTIEMAIMPL_WITH_GENERATOR
3188static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3189{
3190 FpuStI64Generate(pOut, pOutCpu, cTests);
3191 FpuStI32Generate(pOut, pOutCpu, cTests);
3192 FpuStI16Generate(pOut, pOutCpu, cTests);
3193}
3194#endif
3195
3196static void FpuStIntTest(void)
3197{
3198 FpuStI64Test();
3199 FpuStI32Test();
3200 FpuStI16Test();
3201}
3202
3203
3204/*
3205 * Store as packed BCD value (memory).
3206 */
3207typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3208typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3209TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3210
3211static const FPU_ST_D80_T g_aFpuStD80[] =
3212{
3213 ENTRY(fst_r80_to_d80),
3214};
3215
3216#ifdef TSTIEMAIMPL_WITH_GENERATOR
3217static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3218{
3219 static RTFLOAT80U const s_aSpecials[] =
3220 {
3221 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3222 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3223 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3224 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3225 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3226 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3227 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3228 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3229 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3230 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3231 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3232 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3233 };
3234
3235 X86FXSTATE State;
3236 RT_ZERO(State);
3237 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3238 {
3239 GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3240 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3241 {
3242 uint16_t const fFcw = RandFcw();
3243 State.FSW = RandFsw();
3244 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3245
3246 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3247 {
3248 /* PC doesn't influence these, so leave as is. */
3249 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3250 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
3251 {
3252 uint16_t uFswOut = 0;
3253 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3254 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
3255 | (iRounding << X86_FCW_RC_SHIFT);
3256 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
3257 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
3258 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3259 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3260 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3261 GenFormatD80(&OutVal), iTest, iRounding, iMask);
3262 }
3263 }
3264 }
3265 GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3266 }
3267}
3268#endif
3269
3270
3271static void FpuStD80Test(void)
3272{
3273 X86FXSTATE State;
3274 RT_ZERO(State);
3275 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3276 {
3277 if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3278 continue;
3279
3280 uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3281 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3282 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3283 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3284 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3285 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3286 {
3287 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3288 {
3289 RTFLOAT80U const InVal = paTests[iTest].InVal;
3290 uint16_t uFswOut = 0;
3291 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3292 State.FCW = paTests[iTest].fFcw;
3293 State.FSW = paTests[iTest].fFswIn;
3294 pfn(&State, &uFswOut, &OutVal, &InVal);
3295 if ( uFswOut != paTests[iTest].fFswOut
3296 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3297 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3298 "%s -> fsw=%#06x %s\n"
3299 "%s expected %#06x %s%s%s (%s)\n",
3300 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3301 FormatR80(&paTests[iTest].InVal),
3302 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3303 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3304 FswDiff(uFswOut, paTests[iTest].fFswOut),
3305 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3306 FormatFcw(paTests[iTest].fFcw) );
3307 }
3308 pfn = g_aFpuStD80[iFn].pfnNative;
3309 }
3310 }
3311}
3312
3313
3314
3315/*********************************************************************************************************************************
3316* x87 FPU Binary Operations *
3317*********************************************************************************************************************************/
3318
3319/*
3320 * Binary FPU operations on two 80-bit floating point values.
3321 */
3322TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3323enum { kFpuBinaryHint_fprem = 1, };
3324
3325static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3326{
3327 ENTRY(fadd_r80_by_r80),
3328 ENTRY(fsub_r80_by_r80),
3329 ENTRY(fsubr_r80_by_r80),
3330 ENTRY(fmul_r80_by_r80),
3331 ENTRY(fdiv_r80_by_r80),
3332 ENTRY(fdivr_r80_by_r80),
3333 ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3334 ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3335 ENTRY(fscale_r80_by_r80),
3336 ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3337 ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3338 ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3339 ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3340 ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3341 ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3342};
3343
3344#ifdef TSTIEMAIMPL_WITH_GENERATOR
3345static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3346{
3347 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3348
3349 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3350 {
3351 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3352 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3353 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3354 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3355 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3356 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3357 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3358 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3359 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3360 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3361 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3362 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3363 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3364 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3365 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3366 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3367 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3368 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3369 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3370 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3371 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3372 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3373 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3374 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3375 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3376 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3377 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3378 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3379 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3380 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3381 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3382 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3383 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3384 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3385 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3386 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3387 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3388 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3389 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3390 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3391 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3392 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3393 /* fscale: Negative variants for the essentials of the above. */
3394 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3395 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3396 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3397 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3398 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3399 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3400 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3401 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3402 /* fscale: Some fun with denormals and pseudo-denormals. */
3403 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3404 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3405 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3406 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3407 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3408 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3409 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3410 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3411 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3412 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3413 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3414 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3415 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3416 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3417 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3418 };
3419
3420 X86FXSTATE State;
3421 RT_ZERO(State);
3422 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3423 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3424 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3425 {
3426 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3427 PRTSTREAM pOutFn = pOut;
3428 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3429 {
3430 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3431 continue;
3432 pOutFn = pOutCpu;
3433 }
3434
3435 GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3436 uint32_t iTestOutput = 0;
3437 uint32_t cNormalInputPairs = 0;
3438 uint32_t cTargetRangeInputs = 0;
3439 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3440 {
3441 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3442 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3443 bool fTargetRange = false;
3444 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3445 {
3446 cNormalInputPairs++;
3447 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3448 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3449 cTargetRangeInputs += fTargetRange = true;
3450 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3451 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3452 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3453 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3454 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3455 cTargetRangeInputs += fTargetRange = true;
3456 }
3457 }
3458 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3459 {
3460 iTest -= 1;
3461 continue;
3462 }
3463
3464 uint16_t const fFcwExtra = 0;
3465 uint16_t const fFcw = RandFcw();
3466 State.FSW = RandFsw();
3467
3468 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3469 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3470 {
3471 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3472 | (iRounding << X86_FCW_RC_SHIFT)
3473 | (iPrecision << X86_FCW_PC_SHIFT)
3474 | X86_FCW_MASK_ALL;
3475 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3476 pfn(&State, &ResM, &InVal1, &InVal2);
3477 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3478 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3479 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3480
3481 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3482 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3483 pfn(&State, &ResU, &InVal1, &InVal2);
3484 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3485 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3486 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3487
3488 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3489 if (fXcpt)
3490 {
3491 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3492 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3493 pfn(&State, &Res1, &InVal1, &InVal2);
3494 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3495 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3496 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3497 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3498 {
3499 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3500 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3501 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3502 pfn(&State, &Res2, &InVal1, &InVal2);
3503 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3504 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3505 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3506 }
3507 if (!RT_IS_POWER_OF_TWO(fXcpt))
3508 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3509 if (fUnmasked & fXcpt)
3510 {
3511 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3512 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3513 pfn(&State, &Res3, &InVal1, &InVal2);
3514 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3515 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3516 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3517 }
3518 }
3519
3520 /* If the values are in range and caused no exceptions, do the whole series of
3521 partial reminders till we get the non-partial one or run into an exception. */
3522 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3523 {
3524 IEMFPURESULT ResPrev = ResM;
3525 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
3526 {
3527 State.FCW = State.FCW | X86_FCW_MASK_ALL;
3528 State.FSW = ResPrev.FSW;
3529 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3530 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
3531 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
3532 State.FCW | fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
3533 GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
3534 iTest, iRounding, iPrecision, i + 1, iTestOutput++);
3535 ResPrev = ResSeq;
3536 }
3537 }
3538 }
3539 }
3540 GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3541 }
3542}
3543#endif
3544
3545
3546static void FpuBinaryR80Test(void)
3547{
3548 X86FXSTATE State;
3549 RT_ZERO(State);
3550 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3551 {
3552 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3553 continue;
3554
3555 uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3556 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3557 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3558 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3559 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3560 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3561 {
3562 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3563 {
3564 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3565 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3566 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3567 State.FCW = paTests[iTest].fFcw;
3568 State.FSW = paTests[iTest].fFswIn;
3569 pfn(&State, &Res, &InVal1, &InVal2);
3570 if ( Res.FSW != paTests[iTest].fFswOut
3571 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3572 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3573 "%s -> fsw=%#06x %s\n"
3574 "%s expected %#06x %s%s%s (%s)\n",
3575 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3576 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3577 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3578 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3579 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3580 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3581 FormatFcw(paTests[iTest].fFcw) );
3582 }
3583 pfn = g_aFpuBinaryR80[iFn].pfnNative;
3584 }
3585 }
3586}
3587
3588
3589/*
3590 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3591 */
3592#define int64_t_IS_NORMAL(a) 1
3593#define int32_t_IS_NORMAL(a) 1
3594#define int16_t_IS_NORMAL(a) 1
3595
3596#ifdef TSTIEMAIMPL_WITH_GENERATOR
3597static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3598{
3599 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3600 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3601};
3602static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3603{
3604 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3605 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3606};
3607static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3608{
3609 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3610};
3611static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3612{
3613 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3614};
3615
3616# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3617static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3618{ \
3619 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3620 \
3621 X86FXSTATE State; \
3622 RT_ZERO(State); \
3623 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3624 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3625 { \
3626 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3627 uint32_t cNormalInputPairs = 0; \
3628 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3629 { \
3630 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3631 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3632 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3633 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3634 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3635 cNormalInputPairs++; \
3636 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3637 { \
3638 iTest -= 1; \
3639 continue; \
3640 } \
3641 \
3642 uint16_t const fFcw = RandFcw(); \
3643 State.FSW = RandFsw(); \
3644 \
3645 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3646 { \
3647 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3648 { \
3649 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3650 { \
3651 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
3652 | (iRounding << X86_FCW_RC_SHIFT) \
3653 | (iPrecision << X86_FCW_PC_SHIFT) \
3654 | iMask; \
3655 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3656 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3657 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3658 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3659 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3660 } \
3661 } \
3662 } \
3663 } \
3664 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3665 } \
3666}
3667#else
3668# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3669#endif
3670
3671#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3672TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3673\
3674static const a_SubTestType a_aSubTests[] = \
3675{ \
3676 ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3677 ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3678 ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3679 ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3680 ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3681 ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3682}; \
3683\
3684GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3685\
3686static void FpuBinary ## a_UpBits ## Test(void) \
3687{ \
3688 X86FXSTATE State; \
3689 RT_ZERO(State); \
3690 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3691 { \
3692 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3693 \
3694 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3695 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3696 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3697 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3698 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3699 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3700 { \
3701 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3702 { \
3703 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3704 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3705 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3706 State.FCW = paTests[iTest].fFcw; \
3707 State.FSW = paTests[iTest].fFswIn; \
3708 pfn(&State, &Res, &InVal1, &InVal2); \
3709 if ( Res.FSW != paTests[iTest].fFswOut \
3710 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3711 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3712 "%s -> fsw=%#06x %s\n" \
3713 "%s expected %#06x %s%s%s (%s)\n", \
3714 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3715 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3716 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3717 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3718 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3719 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3720 FormatFcw(paTests[iTest].fFcw) ); \
3721 } \
3722 pfn = a_aSubTests[iFn].pfnNative; \
3723 } \
3724 } \
3725}
3726
3727TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3728TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3729TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3730TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3731
3732
3733/*
3734 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3735 */
3736#ifdef TSTIEMAIMPL_WITH_GENERATOR
3737static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3738{
3739 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3740 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3741};
3742static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3743{
3744 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3745 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3746};
3747static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3748{
3749 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3750 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3751};
3752static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3753{
3754 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3755};
3756static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3757{
3758 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3759};
3760
3761# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3762static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3763{ \
3764 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3765 \
3766 X86FXSTATE State; \
3767 RT_ZERO(State); \
3768 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3769 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3770 { \
3771 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3772 uint32_t cNormalInputPairs = 0; \
3773 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3774 { \
3775 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3776 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3777 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3778 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3779 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3780 cNormalInputPairs++; \
3781 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3782 { \
3783 iTest -= 1; \
3784 continue; \
3785 } \
3786 \
3787 uint16_t const fFcw = RandFcw(); \
3788 State.FSW = RandFsw(); \
3789 \
3790 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3791 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3792 { \
3793 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
3794 uint16_t fFswOut = 0; \
3795 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3796 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3797 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3798 iTest, iMask ? 'c' : 'u'); \
3799 } \
3800 } \
3801 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3802 } \
3803}
3804#else
3805# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3806#endif
3807
3808#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3809TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3810\
3811static const a_SubTestType a_aSubTests[] = \
3812{ \
3813 __VA_ARGS__ \
3814}; \
3815\
3816GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3817\
3818static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3819{ \
3820 X86FXSTATE State; \
3821 RT_ZERO(State); \
3822 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3823 { \
3824 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3825 \
3826 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3827 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3828 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3829 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3830 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3831 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3832 { \
3833 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3834 { \
3835 uint16_t fFswOut = 0; \
3836 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3837 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3838 State.FCW = paTests[iTest].fFcw; \
3839 State.FSW = paTests[iTest].fFswIn; \
3840 pfn(&State, &fFswOut, &InVal1, &InVal2); \
3841 if (fFswOut != paTests[iTest].fFswOut) \
3842 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3843 "%s -> fsw=%#06x\n" \
3844 "%s expected %#06x %s (%s)\n", \
3845 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3846 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3847 iVar ? " " : "", fFswOut, \
3848 iVar ? " " : "", paTests[iTest].fFswOut, \
3849 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3850 } \
3851 pfn = a_aSubTests[iFn].pfnNative; \
3852 } \
3853 } \
3854}
3855
3856TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3857TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3858TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3859TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3860TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3861
3862
3863/*
3864 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3865 */
3866TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3867
3868static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3869{
3870 ENTRY(fcomi_r80_by_r80),
3871 ENTRY(fucomi_r80_by_r80),
3872};
3873
3874#ifdef TSTIEMAIMPL_WITH_GENERATOR
3875static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3876{
3877 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3878 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3879};
3880
3881static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3882{
3883 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3884
3885 X86FXSTATE State;
3886 RT_ZERO(State);
3887 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3888 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3889 {
3890 GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3891 uint32_t cNormalInputPairs = 0;
3892 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3893 {
3894 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3895 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3896 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3897 cNormalInputPairs++;
3898 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3899 {
3900 iTest -= 1;
3901 continue;
3902 }
3903
3904 uint16_t const fFcw = RandFcw();
3905 State.FSW = RandFsw();
3906
3907 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3908 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3909 {
3910 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
3911 uint16_t uFswOut = 0;
3912 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3913 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3914 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3915 iTest, iMask ? 'c' : 'u');
3916 }
3917 }
3918 GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3919 }
3920}
3921#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
3922
3923static void FpuBinaryEflR80Test(void)
3924{
3925 X86FXSTATE State;
3926 RT_ZERO(State);
3927 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3928 {
3929 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3930 continue;
3931
3932 uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3933 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3934 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3935 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3936 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3937 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3938 {
3939 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3940 {
3941 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3942 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3943 State.FCW = paTests[iTest].fFcw;
3944 State.FSW = paTests[iTest].fFswIn;
3945 uint16_t uFswOut = 0;
3946 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3947 if ( uFswOut != paTests[iTest].fFswOut
3948 || fEflOut != paTests[iTest].fEflOut)
3949 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3950 "%s -> fsw=%#06x efl=%#08x\n"
3951 "%s expected %#06x %#08x %s%s (%s)\n",
3952 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3953 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3954 iVar ? " " : "", uFswOut, fEflOut,
3955 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3956 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
3957 FormatFcw(paTests[iTest].fFcw));
3958 }
3959 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3960 }
3961 }
3962}
3963
3964
3965/*********************************************************************************************************************************
3966* x87 FPU Unary Operations *
3967*********************************************************************************************************************************/
3968
3969/*
3970 * Unary FPU operations on one 80-bit floating point value.
3971 *
3972 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3973 * a rounding error or not.
3974 */
3975TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3976
3977enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
3978static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3979{
3980 ENTRY_EX( fabs_r80, kUnary_Accurate),
3981 ENTRY_EX( fchs_r80, kUnary_Accurate),
3982 ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3983 ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3984 ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3985 ENTRY_EX( frndint_r80, kUnary_Accurate),
3986 ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3987 ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
3988 ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
3989 ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
3990};
3991
3992#ifdef TSTIEMAIMPL_WITH_GENERATOR
3993
3994static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3995{
3996 if ( enmKind == kUnary_Rounding_F2xm1
3997 && RTFLOAT80U_IS_NORMAL(pr80Val)
3998 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3999 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
4000 return true;
4001 return false;
4002}
4003
4004static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4005{
4006 static RTFLOAT80U const s_aSpecials[] =
4007 {
4008 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
4009 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
4010 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
4011 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
4012 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
4013 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
4014 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
4015 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
4016 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
4017 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
4018 };
4019 X86FXSTATE State;
4020 RT_ZERO(State);
4021 uint32_t cMinNormals = cTests / 4;
4022 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4023 {
4024 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
4025 PRTSTREAM pOutFn = pOut;
4026 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4027 {
4028 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4029 continue;
4030 pOutFn = pOutCpu;
4031 }
4032
4033 GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4034 uint32_t iTestOutput = 0;
4035 uint32_t cNormalInputs = 0;
4036 uint32_t cTargetRangeInputs = 0;
4037 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4038 {
4039 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4040 if (RTFLOAT80U_IS_NORMAL(&InVal))
4041 {
4042 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
4043 {
4044 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
4045 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4046 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4047 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4048 cTargetRangeInputs++;
4049 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4050 {
4051 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4052 cTargetRangeInputs++;
4053 }
4054 }
4055 cNormalInputs++;
4056 }
4057 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4058 {
4059 iTest -= 1;
4060 continue;
4061 }
4062
4063 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4064 uint16_t const fFcw = RandFcw();
4065 State.FSW = RandFsw();
4066
4067 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4068 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4069 {
4070 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4071 | (iRounding << X86_FCW_RC_SHIFT)
4072 | (iPrecision << X86_FCW_PC_SHIFT)
4073 | X86_FCW_MASK_ALL;
4074 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4075 pfn(&State, &ResM, &InVal);
4076 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4077 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
4078 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4079
4080 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4081 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4082 pfn(&State, &ResU, &InVal);
4083 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4084 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
4085 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4086
4087 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4088 if (fXcpt)
4089 {
4090 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4091 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4092 pfn(&State, &Res1, &InVal);
4093 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4094 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
4095 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4096 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4097 {
4098 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4099 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4100 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4101 pfn(&State, &Res2, &InVal);
4102 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4103 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
4104 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4105 }
4106 if (!RT_IS_POWER_OF_TWO(fXcpt))
4107 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4108 if (fUnmasked & fXcpt)
4109 {
4110 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4111 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4112 pfn(&State, &Res3, &InVal);
4113 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4114 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
4115 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4116 }
4117 }
4118 }
4119 }
4120 GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
4121 }
4122}
4123#endif
4124
4125static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4126{
4127 if (fFcw1 == fFcw2)
4128 return true;
4129 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4130 {
4131 *pfRndErr = true;
4132 return true;
4133 }
4134 return false;
4135}
4136
4137static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4138{
4139 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4140 return true;
4141 if ( fRndErrOk
4142 && pr80Val1->s.fSign == pr80Val2->s.fSign)
4143 {
4144 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4145 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4146 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4147 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4148 ||
4149 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4150 && pr80Val1->s.uMantissa == UINT64_MAX
4151 && pr80Val2->s.uMantissa == RT_BIT_64(63))
4152 ||
4153 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4154 && pr80Val2->s.uMantissa == UINT64_MAX
4155 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4156 {
4157 *pfRndErr = true;
4158 return true;
4159 }
4160 }
4161 return false;
4162}
4163
4164
4165static void FpuUnaryR80Test(void)
4166{
4167 X86FXSTATE State;
4168 RT_ZERO(State);
4169 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4170 {
4171 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
4172 continue;
4173
4174 uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4175 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4176 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4177 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4178 uint32_t cRndErrs = 0;
4179 uint32_t cPossibleRndErrs = 0;
4180 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4181 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4182 {
4183 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4184 {
4185 RTFLOAT80U const InVal = paTests[iTest].InVal;
4186 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4187 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4188 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4189 State.FSW = paTests[iTest].fFswIn;
4190 pfn(&State, &Res, &InVal);
4191 bool fRndErr = false;
4192 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4193 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4194 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4195 "%s -> fsw=%#06x %s\n"
4196 "%s expected %#06x %s%s%s%s (%s)\n",
4197 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4198 FormatR80(&paTests[iTest].InVal),
4199 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4200 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4201 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4202 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4203 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4204 cRndErrs += fRndErr;
4205 cPossibleRndErrs += fRndErrOk;
4206 }
4207 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4208 }
4209 if (cPossibleRndErrs > 0)
4210 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4211 }
4212}
4213
4214
4215/*
4216 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4217 */
4218TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4219
4220static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4221{
4222 ENTRY(ftst_r80),
4223 ENTRY_EX(fxam_r80, 1),
4224};
4225
4226#ifdef TSTIEMAIMPL_WITH_GENERATOR
4227static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4228{
4229 static RTFLOAT80U const s_aSpecials[] =
4230 {
4231 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4232 };
4233
4234 X86FXSTATE State;
4235 RT_ZERO(State);
4236 uint32_t cMinNormals = cTests / 4;
4237 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4238 {
4239 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4240 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4241 PRTSTREAM pOutFn = pOut;
4242 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4243 {
4244 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4245 continue;
4246 pOutFn = pOutCpu;
4247 }
4248 State.FTW = 0;
4249
4250 GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4251 uint32_t cNormalInputs = 0;
4252 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4253 {
4254 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4255 if (RTFLOAT80U_IS_NORMAL(&InVal))
4256 cNormalInputs++;
4257 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4258 {
4259 iTest -= 1;
4260 continue;
4261 }
4262
4263 uint16_t const fFcw = RandFcw();
4264 State.FSW = RandFsw();
4265 if (!fIsFxam)
4266 {
4267 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4268 {
4269 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4270 {
4271 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4272 {
4273 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4274 | (iRounding << X86_FCW_RC_SHIFT)
4275 | (iPrecision << X86_FCW_PC_SHIFT)
4276 | iMask;
4277 uint16_t fFswOut = 0;
4278 pfn(&State, &fFswOut, &InVal);
4279 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4280 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4281 iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4282 }
4283 }
4284 }
4285 }
4286 else
4287 {
4288 uint16_t fFswOut = 0;
4289 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4290 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4291 State.FCW = fFcw;
4292 pfn(&State, &fFswOut, &InVal);
4293 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4294 fFcw | fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4295 }
4296 }
4297 GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4298 }
4299}
4300#endif
4301
4302
4303static void FpuUnaryFswR80Test(void)
4304{
4305 X86FXSTATE State;
4306 RT_ZERO(State);
4307 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4308 {
4309 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4310 continue;
4311
4312 uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4313 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4314 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4315 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4316 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4317 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4318 {
4319 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4320 {
4321 RTFLOAT80U const InVal = paTests[iTest].InVal;
4322 uint16_t fFswOut = 0;
4323 State.FSW = paTests[iTest].fFswIn;
4324 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4325 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4326 pfn(&State, &fFswOut, &InVal);
4327 if (fFswOut != paTests[iTest].fFswOut)
4328 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4329 "%s -> fsw=%#06x\n"
4330 "%s expected %#06x %s (%s%s)\n",
4331 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4332 FormatR80(&paTests[iTest].InVal),
4333 iVar ? " " : "", fFswOut,
4334 iVar ? " " : "", paTests[iTest].fFswOut,
4335 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4336 paTests[iTest].fFcw & 0x80 ? " empty" : "");
4337 }
4338 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4339 }
4340 }
4341}
4342
4343/*
4344 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
4345 */
4346TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4347
4348static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4349{
4350 ENTRY(fxtract_r80_r80),
4351 ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4352 ENTRY_INTEL(fptan_r80_r80, 0),
4353 ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4354 ENTRY_INTEL(fsincos_r80_r80, 0),
4355};
4356
4357#ifdef TSTIEMAIMPL_WITH_GENERATOR
4358static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4359{
4360 static RTFLOAT80U const s_aSpecials[] =
4361 {
4362 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4363 };
4364
4365 X86FXSTATE State;
4366 RT_ZERO(State);
4367 uint32_t cMinNormals = cTests / 4;
4368 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4369 {
4370 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4371 PRTSTREAM pOutFn = pOut;
4372 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4373 {
4374 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4375 continue;
4376 pOutFn = pOutCpu;
4377 }
4378
4379 GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4380 uint32_t iTestOutput = 0;
4381 uint32_t cNormalInputs = 0;
4382 uint32_t cTargetRangeInputs = 0;
4383 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4384 {
4385 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4386 if (RTFLOAT80U_IS_NORMAL(&InVal))
4387 {
4388 if (iFn != 0)
4389 {
4390 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4391 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4392 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4393 cTargetRangeInputs++;
4394 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4395 {
4396 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4397 cTargetRangeInputs++;
4398 }
4399 }
4400 cNormalInputs++;
4401 }
4402 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4403 {
4404 iTest -= 1;
4405 continue;
4406 }
4407
4408 uint16_t const fFcwExtra = 0; /* for rounding error indication */
4409 uint16_t const fFcw = RandFcw();
4410 State.FSW = RandFsw();
4411
4412 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4413 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4414 {
4415 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4416 | (iRounding << X86_FCW_RC_SHIFT)
4417 | (iPrecision << X86_FCW_PC_SHIFT)
4418 | X86_FCW_MASK_ALL;
4419 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4420 pfn(&State, &ResM, &InVal);
4421 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4422 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4423 GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4424
4425 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4426 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4427 pfn(&State, &ResU, &InVal);
4428 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4429 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4430 GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4431
4432 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4433 if (fXcpt)
4434 {
4435 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4436 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4437 pfn(&State, &Res1, &InVal);
4438 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4439 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4440 GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4441 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4442 {
4443 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4444 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4445 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4446 pfn(&State, &Res2, &InVal);
4447 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4448 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4449 GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4450 }
4451 if (!RT_IS_POWER_OF_TWO(fXcpt))
4452 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4453 if (fUnmasked & fXcpt)
4454 {
4455 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4456 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4457 pfn(&State, &Res3, &InVal);
4458 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4459 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4460 GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4461 }
4462 }
4463 }
4464 }
4465 GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4466 }
4467}
4468#endif
4469
4470
4471static void FpuUnaryTwoR80Test(void)
4472{
4473 X86FXSTATE State;
4474 RT_ZERO(State);
4475 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4476 {
4477 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4478 continue;
4479
4480 uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4481 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4482 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4483 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4484 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4485 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4486 {
4487 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4488 {
4489 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4490 RTFLOAT80U const InVal = paTests[iTest].InVal;
4491 State.FCW = paTests[iTest].fFcw;
4492 State.FSW = paTests[iTest].fFswIn;
4493 pfn(&State, &Res, &InVal);
4494 if ( Res.FSW != paTests[iTest].fFswOut
4495 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4496 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4497 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4498 "%s -> fsw=%#06x %s %s\n"
4499 "%s expected %#06x %s %s %s%s%s (%s)\n",
4500 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4501 FormatR80(&paTests[iTest].InVal),
4502 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4503 iVar ? " " : "", paTests[iTest].fFswOut,
4504 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4505 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4506 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4507 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4508 }
4509 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4510 }
4511 }
4512}
4513
4514
4515/*********************************************************************************************************************************
4516* SSE floating point Binary Operations *
4517*********************************************************************************************************************************/
4518
4519/*
4520 * Binary SSE operations on packed single precision floating point values.
4521 */
4522TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4523
4524static const SSE_BINARY_R32_T g_aSseBinaryR32[] =
4525{
4526 ENTRY_BIN(addps_u128),
4527 ENTRY_BIN(mulps_u128),
4528 ENTRY_BIN(subps_u128),
4529 ENTRY_BIN(minps_u128),
4530 ENTRY_BIN(divps_u128),
4531 ENTRY_BIN(maxps_u128),
4532 ENTRY_BIN(haddps_u128),
4533 ENTRY_BIN(hsubps_u128),
4534 ENTRY_BIN(sqrtps_u128),
4535 ENTRY_BIN(addsubps_u128),
4536 ENTRY_BIN(cvtps2pd_u128),
4537};
4538
4539#ifdef TSTIEMAIMPL_WITH_GENERATOR
4540static RTEXITCODE SseBinaryR32Generate(const char *pszDataFileFmt, uint32_t cTests)
4541{
4542 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4543
4544 static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
4545 {
4546 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
4547 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
4548 /** @todo More specials. */
4549 };
4550
4551 X86FXSTATE State;
4552 RT_ZERO(State);
4553 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4554 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4555 {
4556 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
4557
4558 PRTSTREAM pStrmOut = NULL;
4559 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32[iFn].pszName);
4560 if (RT_FAILURE(rc))
4561 {
4562 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4563 return RTEXITCODE_FAILURE;
4564 }
4565
4566 uint32_t cNormalInputPairs = 0;
4567 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4568 {
4569 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4570
4571 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4572 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4573 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4574 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4575
4576 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4577 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
4578 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
4579 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
4580
4581 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
4582 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
4583 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
4584 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
4585 cNormalInputPairs++;
4586 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4587 {
4588 iTest -= 1;
4589 continue;
4590 }
4591
4592 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4593 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4594 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4595 for (uint8_t iFz = 0; iFz < 2; iFz++)
4596 {
4597 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4598 | (iRounding << X86_MXCSR_RC_SHIFT)
4599 | (iDaz ? X86_MXCSR_DAZ : 0)
4600 | (iFz ? X86_MXCSR_FZ : 0)
4601 | X86_MXCSR_XCPT_MASK;
4602 IEMSSERESULT ResM; RT_ZERO(ResM);
4603 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4604 TestData.fMxcsrIn = State.MXCSR;
4605 TestData.fMxcsrOut = ResM.MXCSR;
4606 TestData.OutVal = ResM.uResult;
4607 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4608
4609 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4610 IEMSSERESULT ResU; RT_ZERO(ResU);
4611 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4612 TestData.fMxcsrIn = State.MXCSR;
4613 TestData.fMxcsrOut = ResU.MXCSR;
4614 TestData.OutVal = ResU.uResult;
4615 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4616
4617 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4618 if (fXcpt)
4619 {
4620 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4621 IEMSSERESULT Res1; RT_ZERO(Res1);
4622 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4623 TestData.fMxcsrIn = State.MXCSR;
4624 TestData.fMxcsrOut = Res1.MXCSR;
4625 TestData.OutVal = Res1.uResult;
4626 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4627
4628 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4629 {
4630 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4631 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4632 IEMSSERESULT Res2; RT_ZERO(Res2);
4633 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4634 TestData.fMxcsrIn = State.MXCSR;
4635 TestData.fMxcsrOut = Res2.MXCSR;
4636 TestData.OutVal = Res2.uResult;
4637 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4638 }
4639 if (!RT_IS_POWER_OF_TWO(fXcpt))
4640 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4641 if (fUnmasked & fXcpt)
4642 {
4643 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4644 IEMSSERESULT Res3; RT_ZERO(Res3);
4645 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4646 TestData.fMxcsrIn = State.MXCSR;
4647 TestData.fMxcsrOut = Res3.MXCSR;
4648 TestData.OutVal = Res3.uResult;
4649 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4650 }
4651 }
4652 }
4653 }
4654 rc = RTStrmClose(pStrmOut);
4655 if (RT_FAILURE(rc))
4656 {
4657 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4658 return RTEXITCODE_FAILURE;
4659 }
4660 }
4661
4662 return RTEXITCODE_SUCCESS;
4663}
4664#endif
4665
4666static void SseBinaryR32Test(void)
4667{
4668 X86FXSTATE State;
4669 RT_ZERO(State);
4670 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4671 {
4672 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32[iFn].pszName))
4673 continue;
4674
4675 uint32_t const cTests = *g_aSseBinaryR32[iFn].pcTests;
4676 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
4677 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
4678 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
4679 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4680 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4681 {
4682 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4683 {
4684 IEMSSERESULT Res; RT_ZERO(Res);
4685
4686 State.MXCSR = paTests[iTest].fMxcsrIn;
4687 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4688 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
4689 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
4690 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
4691 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
4692 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4693 || !fValsIdentical)
4694 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
4695 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
4696 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
4697 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4698 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
4699 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
4700 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
4701 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
4702 iVar ? " " : "", Res.MXCSR,
4703 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
4704 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
4705 iVar ? " " : "", paTests[iTest].fMxcsrOut,
4706 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
4707 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
4708 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4709 !fValsIdentical ? " - val" : "",
4710 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4711 }
4712 pfn = g_aSseBinaryR32[iFn].pfnNative;
4713 }
4714 }
4715}
4716
4717
4718/*
4719 * Binary SSE operations on packed single precision floating point values.
4720 */
4721TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4722
4723static const SSE_BINARY_R64_T g_aSseBinaryR64[] =
4724{
4725 ENTRY_BIN(addpd_u128),
4726 ENTRY_BIN(mulpd_u128),
4727 ENTRY_BIN(subpd_u128),
4728 ENTRY_BIN(minpd_u128),
4729 ENTRY_BIN(divpd_u128),
4730 ENTRY_BIN(maxpd_u128),
4731 ENTRY_BIN(haddpd_u128),
4732 ENTRY_BIN(hsubpd_u128),
4733 ENTRY_BIN(sqrtpd_u128),
4734 ENTRY_BIN(addsubpd_u128),
4735 ENTRY_BIN(cvtpd2ps_u128),
4736};
4737
4738#ifdef TSTIEMAIMPL_WITH_GENERATOR
4739static RTEXITCODE SseBinaryR64Generate(const char *pszDataFileFmt, uint32_t cTests)
4740{
4741 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4742
4743 static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
4744 {
4745 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
4746 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
4747 /** @todo More specials. */
4748 };
4749
4750 X86FXSTATE State;
4751 RT_ZERO(State);
4752 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4753 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4754 {
4755 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
4756
4757 PRTSTREAM pStrmOut = NULL;
4758 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64[iFn].pszName);
4759 if (RT_FAILURE(rc))
4760 {
4761 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4762 return RTEXITCODE_FAILURE;
4763 }
4764
4765 uint32_t cNormalInputPairs = 0;
4766 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4767 {
4768 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4769
4770 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4771 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4772 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4773 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4774
4775 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
4776 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
4777 cNormalInputPairs++;
4778 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4779 {
4780 iTest -= 1;
4781 continue;
4782 }
4783
4784 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4785 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4786 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4787 for (uint8_t iFz = 0; iFz < 2; iFz++)
4788 {
4789 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4790 | (iRounding << X86_MXCSR_RC_SHIFT)
4791 | (iDaz ? X86_MXCSR_DAZ : 0)
4792 | (iFz ? X86_MXCSR_FZ : 0)
4793 | X86_MXCSR_XCPT_MASK;
4794 IEMSSERESULT ResM; RT_ZERO(ResM);
4795 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4796 TestData.fMxcsrIn = State.MXCSR;
4797 TestData.fMxcsrOut = ResM.MXCSR;
4798 TestData.OutVal = ResM.uResult;
4799 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4800
4801 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4802 IEMSSERESULT ResU; RT_ZERO(ResU);
4803 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4804 TestData.fMxcsrIn = State.MXCSR;
4805 TestData.fMxcsrOut = ResU.MXCSR;
4806 TestData.OutVal = ResU.uResult;
4807 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4808
4809 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4810 if (fXcpt)
4811 {
4812 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4813 IEMSSERESULT Res1; RT_ZERO(Res1);
4814 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4815 TestData.fMxcsrIn = State.MXCSR;
4816 TestData.fMxcsrOut = Res1.MXCSR;
4817 TestData.OutVal = Res1.uResult;
4818 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4819
4820 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4821 {
4822 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4823 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4824 IEMSSERESULT Res2; RT_ZERO(Res2);
4825 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4826 TestData.fMxcsrIn = State.MXCSR;
4827 TestData.fMxcsrOut = Res2.MXCSR;
4828 TestData.OutVal = Res2.uResult;
4829 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4830 }
4831 if (!RT_IS_POWER_OF_TWO(fXcpt))
4832 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4833 if (fUnmasked & fXcpt)
4834 {
4835 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4836 IEMSSERESULT Res3; RT_ZERO(Res3);
4837 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4838 TestData.fMxcsrIn = State.MXCSR;
4839 TestData.fMxcsrOut = Res3.MXCSR;
4840 TestData.OutVal = Res3.uResult;
4841 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4842 }
4843 }
4844 }
4845 }
4846 rc = RTStrmClose(pStrmOut);
4847 if (RT_FAILURE(rc))
4848 {
4849 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4850 return RTEXITCODE_FAILURE;
4851 }
4852 }
4853
4854 return RTEXITCODE_SUCCESS;
4855}
4856#endif
4857
4858
4859static void SseBinaryR64Test(void)
4860{
4861 X86FXSTATE State;
4862 RT_ZERO(State);
4863 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4864 {
4865 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64[iFn].pszName))
4866 continue;
4867
4868 uint32_t const cTests = *g_aSseBinaryR64[iFn].pcTests;
4869 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
4870 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
4871 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
4872 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4873 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4874 {
4875 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4876 {
4877 IEMSSERESULT Res; RT_ZERO(Res);
4878
4879 State.MXCSR = paTests[iTest].fMxcsrIn;
4880 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4881 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4882 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4883 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4884 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
4885 "%s -> mxcsr=%#08x %s'%s\n"
4886 "%s expected %#08x %s'%s%s%s (%s)\n",
4887 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4888 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
4889 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
4890 iVar ? " " : "", Res.MXCSR,
4891 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
4892 iVar ? " " : "", paTests[iTest].fMxcsrOut,
4893 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
4894 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4895 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4896 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4897 ? " - val" : "",
4898 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4899 }
4900 pfn = g_aSseBinaryR64[iFn].pfnNative;
4901 }
4902 }
4903}
4904
4905
4906/*
4907 * Binary SSE operations on packed single precision floating point values.
4908 */
4909TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
4910
4911static const SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
4912{
4913 ENTRY_BIN(addss_u128_r32),
4914 ENTRY_BIN(mulss_u128_r32),
4915 ENTRY_BIN(subss_u128_r32),
4916 ENTRY_BIN(minss_u128_r32),
4917 ENTRY_BIN(divss_u128_r32),
4918 ENTRY_BIN(maxss_u128_r32),
4919 ENTRY_BIN(cvtss2sd_u128_r32),
4920 ENTRY_BIN(sqrtss_u128_r32),
4921};
4922
4923#ifdef TSTIEMAIMPL_WITH_GENERATOR
4924static RTEXITCODE SseBinaryU128R32Generate(const char *pszDataFileFmt, uint32_t cTests)
4925{
4926 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4927
4928 static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
4929 {
4930 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
4931 /** @todo More specials. */
4932 };
4933
4934 X86FXSTATE State;
4935 RT_ZERO(State);
4936 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4937 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
4938 {
4939 PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
4940
4941 PRTSTREAM pStrmOut = NULL;
4942 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R32[iFn].pszName);
4943 if (RT_FAILURE(rc))
4944 {
4945 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
4946 return RTEXITCODE_FAILURE;
4947 }
4948
4949 uint32_t cNormalInputPairs = 0;
4950 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4951 {
4952 SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
4953
4954 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4955 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4956 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4957 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4958
4959 TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
4960
4961 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
4962 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
4963 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
4964 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
4965 && RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
4966 cNormalInputPairs++;
4967 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4968 {
4969 iTest -= 1;
4970 continue;
4971 }
4972
4973 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4974 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4975 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4976 for (uint8_t iFz = 0; iFz < 2; iFz++)
4977 {
4978 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4979 | (iRounding << X86_MXCSR_RC_SHIFT)
4980 | (iDaz ? X86_MXCSR_DAZ : 0)
4981 | (iFz ? X86_MXCSR_FZ : 0)
4982 | X86_MXCSR_XCPT_MASK;
4983 IEMSSERESULT ResM; RT_ZERO(ResM);
4984 pfn(&State, &ResM, &TestData.InVal1, &TestData.r32Val2);
4985 TestData.fMxcsrIn = State.MXCSR;
4986 TestData.fMxcsrOut = ResM.MXCSR;
4987 TestData.OutVal = ResM.uResult;
4988 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4989
4990 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4991 IEMSSERESULT ResU; RT_ZERO(ResU);
4992 pfn(&State, &ResU, &TestData.InVal1, &TestData.r32Val2);
4993 TestData.fMxcsrIn = State.MXCSR;
4994 TestData.fMxcsrOut = ResU.MXCSR;
4995 TestData.OutVal = ResU.uResult;
4996 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4997
4998 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4999 if (fXcpt)
5000 {
5001 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5002 IEMSSERESULT Res1; RT_ZERO(Res1);
5003 pfn(&State, &Res1, &TestData.InVal1, &TestData.r32Val2);
5004 TestData.fMxcsrIn = State.MXCSR;
5005 TestData.fMxcsrOut = Res1.MXCSR;
5006 TestData.OutVal = Res1.uResult;
5007 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5008
5009 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5010 {
5011 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5012 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5013 IEMSSERESULT Res2; RT_ZERO(Res2);
5014 pfn(&State, &Res2, &TestData.InVal1, &TestData.r32Val2);
5015 TestData.fMxcsrIn = State.MXCSR;
5016 TestData.fMxcsrOut = Res2.MXCSR;
5017 TestData.OutVal = Res2.uResult;
5018 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5019 }
5020 if (!RT_IS_POWER_OF_TWO(fXcpt))
5021 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5022 if (fUnmasked & fXcpt)
5023 {
5024 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5025 IEMSSERESULT Res3; RT_ZERO(Res3);
5026 pfn(&State, &Res3, &TestData.InVal1, &TestData.r32Val2);
5027 TestData.fMxcsrIn = State.MXCSR;
5028 TestData.fMxcsrOut = Res3.MXCSR;
5029 TestData.OutVal = Res3.uResult;
5030 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5031 }
5032 }
5033 }
5034 }
5035 rc = RTStrmClose(pStrmOut);
5036 if (RT_FAILURE(rc))
5037 {
5038 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
5039 return RTEXITCODE_FAILURE;
5040 }
5041 }
5042
5043 return RTEXITCODE_SUCCESS;
5044}
5045#endif
5046
5047static void SseBinaryU128R32Test(void)
5048{
5049 X86FXSTATE State;
5050 RT_ZERO(State);
5051 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5052 {
5053 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R32[iFn].pszName))
5054 continue;
5055
5056 uint32_t const cTests = *g_aSseBinaryU128R32[iFn].pcTests;
5057 SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
5058 PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
5059 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
5060 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5061 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5062 {
5063 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
5064 {
5065 IEMSSERESULT Res; RT_ZERO(Res);
5066
5067 State.MXCSR = paTests[iTest].fMxcsrIn;
5068 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
5069 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5070 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5071 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5072 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5073 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5074 || !fValsIdentical)
5075 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
5076 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5077 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5078 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5079 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5080 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5081 FormatR32(&paTests[iTest].r32Val2),
5082 iVar ? " " : "", Res.MXCSR,
5083 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5084 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5085 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5086 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5087 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5088 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5089 !fValsIdentical ? " - val" : "",
5090 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5091 }
5092 }
5093 }
5094}
5095
5096
5097/*
5098 * Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
5099 */
5100TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
5101
5102static const SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
5103{
5104 ENTRY_BIN(addsd_u128_r64),
5105 ENTRY_BIN(mulsd_u128_r64),
5106 ENTRY_BIN(subsd_u128_r64),
5107 ENTRY_BIN(minsd_u128_r64),
5108 ENTRY_BIN(divsd_u128_r64),
5109 ENTRY_BIN(maxsd_u128_r64),
5110 ENTRY_BIN(cvtsd2ss_u128_r64),
5111 ENTRY_BIN(sqrtsd_u128_r64),
5112};
5113
5114#ifdef TSTIEMAIMPL_WITH_GENERATOR
5115static RTEXITCODE SseBinaryU128R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5116{
5117 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5118
5119 static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
5120 {
5121 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5122 /** @todo More specials. */
5123 };
5124
5125 X86FXSTATE State;
5126 RT_ZERO(State);
5127 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5128 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5129 {
5130 PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
5131
5132 PRTSTREAM pStrmOut = NULL;
5133 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R64[iFn].pszName);
5134 if (RT_FAILURE(rc))
5135 {
5136 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5137 return RTEXITCODE_FAILURE;
5138 }
5139
5140 uint32_t cNormalInputPairs = 0;
5141 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5142 {
5143 SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
5144
5145 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5146 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5147 TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5148
5149 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5150 && RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
5151 cNormalInputPairs++;
5152 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5153 {
5154 iTest -= 1;
5155 continue;
5156 }
5157
5158 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5159 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5160 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5161 for (uint8_t iFz = 0; iFz < 2; iFz++)
5162 {
5163 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5164 | (iRounding << X86_MXCSR_RC_SHIFT)
5165 | (iDaz ? X86_MXCSR_DAZ : 0)
5166 | (iFz ? X86_MXCSR_FZ : 0)
5167 | X86_MXCSR_XCPT_MASK;
5168 IEMSSERESULT ResM; RT_ZERO(ResM);
5169 pfn(&State, &ResM, &TestData.InVal1, &TestData.r64Val2);
5170 TestData.fMxcsrIn = State.MXCSR;
5171 TestData.fMxcsrOut = ResM.MXCSR;
5172 TestData.OutVal = ResM.uResult;
5173 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5174
5175 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5176 IEMSSERESULT ResU; RT_ZERO(ResU);
5177 pfn(&State, &ResU, &TestData.InVal1, &TestData.r64Val2);
5178 TestData.fMxcsrIn = State.MXCSR;
5179 TestData.fMxcsrOut = ResU.MXCSR;
5180 TestData.OutVal = ResU.uResult;
5181 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5182
5183 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5184 if (fXcpt)
5185 {
5186 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5187 IEMSSERESULT Res1; RT_ZERO(Res1);
5188 pfn(&State, &Res1, &TestData.InVal1, &TestData.r64Val2);
5189 TestData.fMxcsrIn = State.MXCSR;
5190 TestData.fMxcsrOut = Res1.MXCSR;
5191 TestData.OutVal = Res1.uResult;
5192 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5193
5194 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5195 {
5196 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5197 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5198 IEMSSERESULT Res2; RT_ZERO(Res2);
5199 pfn(&State, &Res2, &TestData.InVal1, &TestData.r64Val2);
5200 TestData.fMxcsrIn = State.MXCSR;
5201 TestData.fMxcsrOut = Res2.MXCSR;
5202 TestData.OutVal = Res2.uResult;
5203 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5204 }
5205 if (!RT_IS_POWER_OF_TWO(fXcpt))
5206 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5207 if (fUnmasked & fXcpt)
5208 {
5209 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5210 IEMSSERESULT Res3; RT_ZERO(Res3);
5211 pfn(&State, &Res3, &TestData.InVal1, &TestData.r64Val2);
5212 TestData.fMxcsrIn = State.MXCSR;
5213 TestData.fMxcsrOut = Res3.MXCSR;
5214 TestData.OutVal = Res3.uResult;
5215 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5216 }
5217 }
5218 }
5219 }
5220 rc = RTStrmClose(pStrmOut);
5221 if (RT_FAILURE(rc))
5222 {
5223 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5224 return RTEXITCODE_FAILURE;
5225 }
5226 }
5227
5228 return RTEXITCODE_SUCCESS;
5229}
5230#endif
5231
5232
5233static void SseBinaryU128R64Test(void)
5234{
5235 X86FXSTATE State;
5236 RT_ZERO(State);
5237 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5238 {
5239 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R64[iFn].pszName))
5240 continue;
5241
5242 uint32_t const cTests = *g_aSseBinaryU128R64[iFn].pcTests;
5243 SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
5244 PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
5245 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
5246 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5247 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5248 {
5249 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_U128_R64_TEST_T); iTest++)
5250 {
5251 IEMSSERESULT Res; RT_ZERO(Res);
5252
5253 State.MXCSR = paTests[iTest].fMxcsrIn;
5254 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
5255 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5256 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5257 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5258 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
5259 "%s -> mxcsr=%#08x %s'%s\n"
5260 "%s expected %#08x %s'%s%s%s (%s)\n",
5261 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5262 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5263 FormatR64(&paTests[iTest].r64Val2),
5264 iVar ? " " : "", Res.MXCSR,
5265 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5266 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5267 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5268 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5269 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5270 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5271 ? " - val" : "",
5272 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5273 }
5274 }
5275 }
5276}
5277
5278
5279/*
5280 * SSE operations converting single double-precision floating point values to signed double-word integers (cvttsd2si and friends).
5281 */
5282TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R64_T, SSE_BINARY_I32_R64_TEST_T, PFNIEMAIMPLSSEF2I32U64);
5283
5284static const SSE_BINARY_I32_R64_T g_aSseBinaryI32R64[] =
5285{
5286 ENTRY_BIN(cvttsd2si_i32_r64),
5287 ENTRY_BIN(cvtsd2si_i32_r64),
5288};
5289
5290#ifdef TSTIEMAIMPL_WITH_GENERATOR
5291static RTEXITCODE SseBinaryI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5292{
5293 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5294
5295 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
5296 {
5297 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5298 /** @todo More specials. */
5299 };
5300
5301 X86FXSTATE State;
5302 RT_ZERO(State);
5303 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5304 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5305 {
5306 PFNIEMAIMPLSSEF2I32U64 const pfn = g_aSseBinaryI32R64[iFn].pfnNative ? g_aSseBinaryI32R64[iFn].pfnNative : g_aSseBinaryI32R64[iFn].pfn;
5307
5308 PRTSTREAM pStrmOut = NULL;
5309 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI32R64[iFn].pszName);
5310 if (RT_FAILURE(rc))
5311 {
5312 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI32R64[iFn].pszName, rc);
5313 return RTEXITCODE_FAILURE;
5314 }
5315
5316 uint32_t cNormalInputPairs = 0;
5317 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5318 {
5319 SSE_BINARY_I32_R64_TEST_T TestData; RT_ZERO(TestData);
5320
5321 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
5322
5323 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
5324 cNormalInputPairs++;
5325 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5326 {
5327 iTest -= 1;
5328 continue;
5329 }
5330
5331 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5332 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5333 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5334 for (uint8_t iFz = 0; iFz < 2; iFz++)
5335 {
5336 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5337 | (iRounding << X86_MXCSR_RC_SHIFT)
5338 | (iDaz ? X86_MXCSR_DAZ : 0)
5339 | (iFz ? X86_MXCSR_FZ : 0)
5340 | X86_MXCSR_XCPT_MASK;
5341 uint32_t fMxcsrM; int32_t i32OutM;
5342 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r64ValIn.u);
5343 TestData.fMxcsrIn = State.MXCSR;
5344 TestData.fMxcsrOut = fMxcsrM;
5345 TestData.i32ValOut = i32OutM;
5346 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5347
5348 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5349 uint32_t fMxcsrU; int32_t i32OutU;
5350 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r64ValIn.u);
5351 TestData.fMxcsrIn = State.MXCSR;
5352 TestData.fMxcsrOut = fMxcsrU;
5353 TestData.i32ValOut = i32OutU;
5354 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5355
5356 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5357 if (fXcpt)
5358 {
5359 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5360 uint32_t fMxcsr1; int32_t i32Out1;
5361 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r64ValIn.u);
5362 TestData.fMxcsrIn = State.MXCSR;
5363 TestData.fMxcsrOut = fMxcsr1;
5364 TestData.i32ValOut = i32Out1;
5365 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5366
5367 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5368 {
5369 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5370 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5371 uint32_t fMxcsr2; int32_t i32Out2;
5372 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r64ValIn.u);
5373 TestData.fMxcsrIn = State.MXCSR;
5374 TestData.fMxcsrOut = fMxcsr2;
5375 TestData.i32ValOut = i32Out2;
5376 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5377 }
5378 if (!RT_IS_POWER_OF_TWO(fXcpt))
5379 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5380 if (fUnmasked & fXcpt)
5381 {
5382 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5383 uint32_t fMxcsr3; int32_t i32Out3;
5384 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r64ValIn.u);
5385 TestData.fMxcsrIn = State.MXCSR;
5386 TestData.fMxcsrOut = fMxcsr3;
5387 TestData.i32ValOut = i32Out3;
5388 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5389 }
5390 }
5391 }
5392 }
5393 rc = RTStrmClose(pStrmOut);
5394 if (RT_FAILURE(rc))
5395 {
5396 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI32R64[iFn].pszName, rc);
5397 return RTEXITCODE_FAILURE;
5398 }
5399 }
5400
5401 return RTEXITCODE_SUCCESS;
5402}
5403#endif
5404
5405
5406static void SseBinaryI32R64Test(void)
5407{
5408 X86FXSTATE State;
5409 RT_ZERO(State);
5410 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5411 {
5412 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI32R64[iFn].pszName))
5413 continue;
5414
5415 uint32_t const cTests = *g_aSseBinaryI32R64[iFn].pcTests;
5416 SSE_BINARY_I32_R64_TEST_T const * const paTests = g_aSseBinaryI32R64[iFn].paTests;
5417 PFNIEMAIMPLSSEF2I32U64 pfn = g_aSseBinaryI32R64[iFn].pfn;
5418 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
5419 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5420 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5421 {
5422 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I32_R64_TEST_T); iTest++)
5423 {
5424 uint32_t fMxcsr = 0;
5425 int32_t i32Dst = 0;
5426
5427 State.MXCSR = paTests[iTest].fMxcsrIn;
5428 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r64ValIn.u);
5429 if ( fMxcsr != paTests[iTest].fMxcsrOut
5430 || i32Dst != paTests[iTest].i32ValOut)
5431 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5432 "%s -> mxcsr=%#08x %RI32\n"
5433 "%s expected %#08x %RI32%s%s (%s)\n",
5434 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5435 FormatR64(&paTests[iTest].r64ValIn),
5436 iVar ? " " : "", fMxcsr, i32Dst,
5437 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
5438 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5439 i32Dst != paTests[iTest].i32ValOut
5440 ? " - val" : "",
5441 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5442 }
5443 }
5444 }
5445}
5446
5447
5448/*
5449 * SSE operations converting single double-precision floating point values to signed quad-word integers (cvttsd2si and friends).
5450 */
5451TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R64_T, SSE_BINARY_I64_R64_TEST_T, PFNIEMAIMPLSSEF2I64U64);
5452
5453static const SSE_BINARY_I64_R64_T g_aSseBinaryI64R64[] =
5454{
5455 ENTRY_BIN(cvttsd2si_i64_r64),
5456 ENTRY_BIN(cvtsd2si_i64_r64),
5457};
5458
5459#ifdef TSTIEMAIMPL_WITH_GENERATOR
5460static RTEXITCODE SseBinaryI64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5461{
5462 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5463
5464 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
5465 {
5466 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5467 /** @todo More specials. */
5468 };
5469
5470 X86FXSTATE State;
5471 RT_ZERO(State);
5472 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5473 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
5474 {
5475 PFNIEMAIMPLSSEF2I64U64 const pfn = g_aSseBinaryI64R64[iFn].pfnNative ? g_aSseBinaryI64R64[iFn].pfnNative : g_aSseBinaryI64R64[iFn].pfn;
5476
5477 PRTSTREAM pStrmOut = NULL;
5478 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI64R64[iFn].pszName);
5479 if (RT_FAILURE(rc))
5480 {
5481 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI64R64[iFn].pszName, rc);
5482 return RTEXITCODE_FAILURE;
5483 }
5484
5485 uint32_t cNormalInputPairs = 0;
5486 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5487 {
5488 SSE_BINARY_I64_R64_TEST_T TestData; RT_ZERO(TestData);
5489
5490 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
5491
5492 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
5493 cNormalInputPairs++;
5494 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5495 {
5496 iTest -= 1;
5497 continue;
5498 }
5499
5500 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5501 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5502 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5503 for (uint8_t iFz = 0; iFz < 2; iFz++)
5504 {
5505 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5506 | (iRounding << X86_MXCSR_RC_SHIFT)
5507 | (iDaz ? X86_MXCSR_DAZ : 0)
5508 | (iFz ? X86_MXCSR_FZ : 0)
5509 | X86_MXCSR_XCPT_MASK;
5510 uint32_t fMxcsrM; int64_t i64OutM;
5511 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r64ValIn.u);
5512 TestData.fMxcsrIn = State.MXCSR;
5513 TestData.fMxcsrOut = fMxcsrM;
5514 TestData.i64ValOut = i64OutM;
5515 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5516
5517 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5518 uint32_t fMxcsrU; int64_t i64OutU;
5519 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r64ValIn.u);
5520 TestData.fMxcsrIn = State.MXCSR;
5521 TestData.fMxcsrOut = fMxcsrU;
5522 TestData.i64ValOut = i64OutU;
5523 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5524
5525 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5526 if (fXcpt)
5527 {
5528 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5529 uint32_t fMxcsr1; int64_t i64Out1;
5530 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r64ValIn.u);
5531 TestData.fMxcsrIn = State.MXCSR;
5532 TestData.fMxcsrOut = fMxcsr1;
5533 TestData.i64ValOut = i64Out1;
5534 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5535
5536 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5537 {
5538 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5539 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5540 uint32_t fMxcsr2; int64_t i64Out2;
5541 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r64ValIn.u);
5542 TestData.fMxcsrIn = State.MXCSR;
5543 TestData.fMxcsrOut = fMxcsr2;
5544 TestData.i64ValOut = i64Out2;
5545 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5546 }
5547 if (!RT_IS_POWER_OF_TWO(fXcpt))
5548 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5549 if (fUnmasked & fXcpt)
5550 {
5551 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5552 uint32_t fMxcsr3; int64_t i64Out3;
5553 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r64ValIn.u);
5554 TestData.fMxcsrIn = State.MXCSR;
5555 TestData.fMxcsrOut = fMxcsr3;
5556 TestData.i64ValOut = i64Out3;
5557 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5558 }
5559 }
5560 }
5561 }
5562 rc = RTStrmClose(pStrmOut);
5563 if (RT_FAILURE(rc))
5564 {
5565 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI64R64[iFn].pszName, rc);
5566 return RTEXITCODE_FAILURE;
5567 }
5568 }
5569
5570 return RTEXITCODE_SUCCESS;
5571}
5572#endif
5573
5574
5575static void SseBinaryI64R64Test(void)
5576{
5577 X86FXSTATE State;
5578 RT_ZERO(State);
5579 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
5580 {
5581 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI64R64[iFn].pszName))
5582 continue;
5583
5584 uint32_t const cTests = *g_aSseBinaryI64R64[iFn].pcTests;
5585 SSE_BINARY_I64_R64_TEST_T const * const paTests = g_aSseBinaryI64R64[iFn].paTests;
5586 PFNIEMAIMPLSSEF2I64U64 pfn = g_aSseBinaryI64R64[iFn].pfn;
5587 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
5588 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5589 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5590 {
5591 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I64_R64_TEST_T); iTest++)
5592 {
5593 uint32_t fMxcsr = 0;
5594 int64_t i64Dst = 0;
5595
5596 State.MXCSR = paTests[iTest].fMxcsrIn;
5597 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r64ValIn.u);
5598 if ( fMxcsr != paTests[iTest].fMxcsrOut
5599 || i64Dst != paTests[iTest].i64ValOut)
5600 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5601 "%s -> mxcsr=%#08x %RI64\n"
5602 "%s expected %#08x %RI64%s%s (%s)\n",
5603 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5604 FormatR64(&paTests[iTest].r64ValIn),
5605 iVar ? " " : "", fMxcsr, i64Dst,
5606 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
5607 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5608 i64Dst != paTests[iTest].i64ValOut
5609 ? " - val" : "",
5610 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5611 }
5612 }
5613 }
5614}
5615
5616
5617/*
5618 * SSE operations converting single single-precision floating point values to signed double-word integers (cvttss2si and friends).
5619 */
5620TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R32_T, SSE_BINARY_I32_R32_TEST_T, PFNIEMAIMPLSSEF2I32U32);
5621
5622static const SSE_BINARY_I32_R32_T g_aSseBinaryI32R32[] =
5623{
5624 ENTRY_BIN(cvttss2si_i32_r32),
5625 ENTRY_BIN(cvtss2si_i32_r32),
5626};
5627
5628#ifdef TSTIEMAIMPL_WITH_GENERATOR
5629static RTEXITCODE SseBinaryI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
5630{
5631 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5632
5633 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
5634 {
5635 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5636 /** @todo More specials. */
5637 };
5638
5639 X86FXSTATE State;
5640 RT_ZERO(State);
5641 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5642 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
5643 {
5644 PFNIEMAIMPLSSEF2I32U32 const pfn = g_aSseBinaryI32R32[iFn].pfnNative ? g_aSseBinaryI32R32[iFn].pfnNative : g_aSseBinaryI32R32[iFn].pfn;
5645
5646 PRTSTREAM pStrmOut = NULL;
5647 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI32R32[iFn].pszName);
5648 if (RT_FAILURE(rc))
5649 {
5650 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI32R32[iFn].pszName, rc);
5651 return RTEXITCODE_FAILURE;
5652 }
5653
5654 uint32_t cNormalInputPairs = 0;
5655 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5656 {
5657 SSE_BINARY_I32_R32_TEST_T TestData; RT_ZERO(TestData);
5658
5659 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
5660
5661 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
5662 cNormalInputPairs++;
5663 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5664 {
5665 iTest -= 1;
5666 continue;
5667 }
5668
5669 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5670 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5671 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5672 for (uint8_t iFz = 0; iFz < 2; iFz++)
5673 {
5674 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5675 | (iRounding << X86_MXCSR_RC_SHIFT)
5676 | (iDaz ? X86_MXCSR_DAZ : 0)
5677 | (iFz ? X86_MXCSR_FZ : 0)
5678 | X86_MXCSR_XCPT_MASK;
5679 uint32_t fMxcsrM; int32_t i32OutM;
5680 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r32ValIn.u);
5681 TestData.fMxcsrIn = State.MXCSR;
5682 TestData.fMxcsrOut = fMxcsrM;
5683 TestData.i32ValOut = i32OutM;
5684 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5685
5686 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5687 uint32_t fMxcsrU; int32_t i32OutU;
5688 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r32ValIn.u);
5689 TestData.fMxcsrIn = State.MXCSR;
5690 TestData.fMxcsrOut = fMxcsrU;
5691 TestData.i32ValOut = i32OutU;
5692 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5693
5694 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5695 if (fXcpt)
5696 {
5697 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5698 uint32_t fMxcsr1; int32_t i32Out1;
5699 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r32ValIn.u);
5700 TestData.fMxcsrIn = State.MXCSR;
5701 TestData.fMxcsrOut = fMxcsr1;
5702 TestData.i32ValOut = i32Out1;
5703 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5704
5705 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5706 {
5707 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5708 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5709 uint32_t fMxcsr2; int32_t i32Out2;
5710 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r32ValIn.u);
5711 TestData.fMxcsrIn = State.MXCSR;
5712 TestData.fMxcsrOut = fMxcsr2;
5713 TestData.i32ValOut = i32Out2;
5714 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5715 }
5716 if (!RT_IS_POWER_OF_TWO(fXcpt))
5717 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5718 if (fUnmasked & fXcpt)
5719 {
5720 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5721 uint32_t fMxcsr3; int32_t i32Out3;
5722 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r32ValIn.u);
5723 TestData.fMxcsrIn = State.MXCSR;
5724 TestData.fMxcsrOut = fMxcsr3;
5725 TestData.i32ValOut = i32Out3;
5726 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5727 }
5728 }
5729 }
5730 }
5731 rc = RTStrmClose(pStrmOut);
5732 if (RT_FAILURE(rc))
5733 {
5734 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI32R32[iFn].pszName, rc);
5735 return RTEXITCODE_FAILURE;
5736 }
5737 }
5738
5739 return RTEXITCODE_SUCCESS;
5740}
5741#endif
5742
5743
5744static void SseBinaryI32R32Test(void)
5745{
5746 X86FXSTATE State;
5747 RT_ZERO(State);
5748 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
5749 {
5750 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI32R32[iFn].pszName))
5751 continue;
5752
5753 uint32_t const cTests = *g_aSseBinaryI32R32[iFn].pcTests;
5754 SSE_BINARY_I32_R32_TEST_T const * const paTests = g_aSseBinaryI32R32[iFn].paTests;
5755 PFNIEMAIMPLSSEF2I32U32 pfn = g_aSseBinaryI32R32[iFn].pfn;
5756 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R32[iFn]);
5757 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5758 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5759 {
5760 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I32_R32_TEST_T); iTest++)
5761 {
5762 uint32_t fMxcsr = 0;
5763 int32_t i32Dst = 0;
5764
5765 State.MXCSR = paTests[iTest].fMxcsrIn;
5766 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r32ValIn.u);
5767 if ( fMxcsr != paTests[iTest].fMxcsrOut
5768 || i32Dst != paTests[iTest].i32ValOut)
5769 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5770 "%s -> mxcsr=%#08x %RI32\n"
5771 "%s expected %#08x %RI32%s%s (%s)\n",
5772 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5773 FormatR32(&paTests[iTest].r32ValIn),
5774 iVar ? " " : "", fMxcsr, i32Dst,
5775 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
5776 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5777 i32Dst != paTests[iTest].i32ValOut
5778 ? " - val" : "",
5779 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5780 }
5781 }
5782 }
5783}
5784
5785
5786/*
5787 * SSE operations converting single single-precision floating point values to signed quad-word integers (cvttss2si and friends).
5788 */
5789TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R32_T, SSE_BINARY_I64_R32_TEST_T, PFNIEMAIMPLSSEF2I64U32);
5790
5791static const SSE_BINARY_I64_R32_T g_aSseBinaryI64R32[] =
5792{
5793 ENTRY_BIN(cvttss2si_i64_r32),
5794 ENTRY_BIN(cvtss2si_i64_r32),
5795};
5796
5797#ifdef TSTIEMAIMPL_WITH_GENERATOR
5798static RTEXITCODE SseBinaryI64R32Generate(const char *pszDataFileFmt, uint32_t cTests)
5799{
5800 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5801
5802 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
5803 {
5804 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5805 /** @todo More specials. */
5806 };
5807
5808 X86FXSTATE State;
5809 RT_ZERO(State);
5810 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5811 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
5812 {
5813 PFNIEMAIMPLSSEF2I64U32 const pfn = g_aSseBinaryI64R32[iFn].pfnNative ? g_aSseBinaryI64R32[iFn].pfnNative : g_aSseBinaryI64R32[iFn].pfn;
5814
5815 PRTSTREAM pStrmOut = NULL;
5816 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI64R32[iFn].pszName);
5817 if (RT_FAILURE(rc))
5818 {
5819 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI64R32[iFn].pszName, rc);
5820 return RTEXITCODE_FAILURE;
5821 }
5822
5823 uint32_t cNormalInputPairs = 0;
5824 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5825 {
5826 SSE_BINARY_I64_R32_TEST_T TestData; RT_ZERO(TestData);
5827
5828 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
5829
5830 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
5831 cNormalInputPairs++;
5832 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5833 {
5834 iTest -= 1;
5835 continue;
5836 }
5837
5838 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5839 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5840 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5841 for (uint8_t iFz = 0; iFz < 2; iFz++)
5842 {
5843 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5844 | (iRounding << X86_MXCSR_RC_SHIFT)
5845 | (iDaz ? X86_MXCSR_DAZ : 0)
5846 | (iFz ? X86_MXCSR_FZ : 0)
5847 | X86_MXCSR_XCPT_MASK;
5848 uint32_t fMxcsrM; int64_t i64OutM;
5849 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r32ValIn.u);
5850 TestData.fMxcsrIn = State.MXCSR;
5851 TestData.fMxcsrOut = fMxcsrM;
5852 TestData.i64ValOut = i64OutM;
5853 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5854
5855 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5856 uint32_t fMxcsrU; int64_t i64OutU;
5857 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r32ValIn.u);
5858 TestData.fMxcsrIn = State.MXCSR;
5859 TestData.fMxcsrOut = fMxcsrU;
5860 TestData.i64ValOut = i64OutU;
5861 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5862
5863 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5864 if (fXcpt)
5865 {
5866 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5867 uint32_t fMxcsr1; int64_t i64Out1;
5868 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r32ValIn.u);
5869 TestData.fMxcsrIn = State.MXCSR;
5870 TestData.fMxcsrOut = fMxcsr1;
5871 TestData.i64ValOut = i64Out1;
5872 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5873
5874 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5875 {
5876 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5877 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5878 uint32_t fMxcsr2; int64_t i64Out2;
5879 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r32ValIn.u);
5880 TestData.fMxcsrIn = State.MXCSR;
5881 TestData.fMxcsrOut = fMxcsr2;
5882 TestData.i64ValOut = i64Out2;
5883 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5884 }
5885 if (!RT_IS_POWER_OF_TWO(fXcpt))
5886 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5887 if (fUnmasked & fXcpt)
5888 {
5889 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5890 uint32_t fMxcsr3; int64_t i64Out3;
5891 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r32ValIn.u);
5892 TestData.fMxcsrIn = State.MXCSR;
5893 TestData.fMxcsrOut = fMxcsr3;
5894 TestData.i64ValOut = i64Out3;
5895 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5896 }
5897 }
5898 }
5899 }
5900 rc = RTStrmClose(pStrmOut);
5901 if (RT_FAILURE(rc))
5902 {
5903 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI64R32[iFn].pszName, rc);
5904 return RTEXITCODE_FAILURE;
5905 }
5906 }
5907
5908 return RTEXITCODE_SUCCESS;
5909}
5910#endif
5911
5912
5913static void SseBinaryI64R32Test(void)
5914{
5915 X86FXSTATE State;
5916 RT_ZERO(State);
5917 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
5918 {
5919 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI64R32[iFn].pszName))
5920 continue;
5921
5922 uint32_t const cTests = *g_aSseBinaryI64R32[iFn].pcTests;
5923 SSE_BINARY_I64_R32_TEST_T const * const paTests = g_aSseBinaryI64R32[iFn].paTests;
5924 PFNIEMAIMPLSSEF2I64U32 pfn = g_aSseBinaryI64R32[iFn].pfn;
5925 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI64R32[iFn]);
5926 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5927 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5928 {
5929 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I64_R32_TEST_T); iTest++)
5930 {
5931 uint32_t fMxcsr = 0;
5932 int64_t i64Dst = 0;
5933
5934 State.MXCSR = paTests[iTest].fMxcsrIn;
5935 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r32ValIn.u);
5936 if ( fMxcsr != paTests[iTest].fMxcsrOut
5937 || i64Dst != paTests[iTest].i64ValOut)
5938 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5939 "%s -> mxcsr=%#08x %RI64\n"
5940 "%s expected %#08x %RI64%s%s (%s)\n",
5941 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5942 FormatR32(&paTests[iTest].r32ValIn),
5943 iVar ? " " : "", fMxcsr, i64Dst,
5944 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
5945 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5946 i64Dst != paTests[iTest].i64ValOut
5947 ? " - val" : "",
5948 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5949 }
5950 }
5951 }
5952}
5953
5954
5955/*
5956 * SSE operations converting single signed double-word integers to double-precision floating point values (probably only cvtsi2sd).
5957 */
5958TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I32_T, SSE_BINARY_R64_I32_TEST_T, PFNIEMAIMPLSSEF2R64I32);
5959
5960static const SSE_BINARY_R64_I32_T g_aSseBinaryR64I32[] =
5961{
5962 ENTRY_BIN(cvtsi2sd_r64_i32)
5963};
5964
5965#ifdef TSTIEMAIMPL_WITH_GENERATOR
5966static RTEXITCODE SseBinaryR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
5967{
5968 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5969
5970 static int32_t const s_aSpecials[] =
5971 {
5972 INT32_MIN,
5973 INT32_MAX,
5974 /** @todo More specials. */
5975 };
5976
5977 X86FXSTATE State;
5978 RT_ZERO(State);
5979 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
5980 {
5981 PFNIEMAIMPLSSEF2R64I32 const pfn = g_aSseBinaryR64I32[iFn].pfnNative ? g_aSseBinaryR64I32[iFn].pfnNative : g_aSseBinaryR64I32[iFn].pfn;
5982
5983 PRTSTREAM pStrmOut = NULL;
5984 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64I32[iFn].pszName);
5985 if (RT_FAILURE(rc))
5986 {
5987 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64I32[iFn].pszName, rc);
5988 return RTEXITCODE_FAILURE;
5989 }
5990
5991 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5992 {
5993 SSE_BINARY_R64_I32_TEST_T TestData; RT_ZERO(TestData);
5994
5995 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
5996
5997 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5998 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5999 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6000 for (uint8_t iFz = 0; iFz < 2; iFz++)
6001 {
6002 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6003 | (iRounding << X86_MXCSR_RC_SHIFT)
6004 | (iDaz ? X86_MXCSR_DAZ : 0)
6005 | (iFz ? X86_MXCSR_FZ : 0)
6006 | X86_MXCSR_XCPT_MASK;
6007 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6008 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i32ValIn);
6009 TestData.fMxcsrIn = State.MXCSR;
6010 TestData.fMxcsrOut = fMxcsrM;
6011 TestData.r64ValOut = r64OutM;
6012 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6013
6014 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6015 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6016 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i32ValIn);
6017 TestData.fMxcsrIn = State.MXCSR;
6018 TestData.fMxcsrOut = fMxcsrU;
6019 TestData.r64ValOut = r64OutU;
6020 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6021
6022 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6023 if (fXcpt)
6024 {
6025 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6026 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6027 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i32ValIn);
6028 TestData.fMxcsrIn = State.MXCSR;
6029 TestData.fMxcsrOut = fMxcsr1;
6030 TestData.r64ValOut = r64Out1;
6031 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6032
6033 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6034 {
6035 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6036 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6037 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6038 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i32ValIn);
6039 TestData.fMxcsrIn = State.MXCSR;
6040 TestData.fMxcsrOut = fMxcsr2;
6041 TestData.r64ValOut = r64Out2;
6042 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6043 }
6044 if (!RT_IS_POWER_OF_TWO(fXcpt))
6045 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6046 if (fUnmasked & fXcpt)
6047 {
6048 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6049 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6050 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i32ValIn);
6051 TestData.fMxcsrIn = State.MXCSR;
6052 TestData.fMxcsrOut = fMxcsr3;
6053 TestData.r64ValOut = r64Out3;
6054 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6055 }
6056 }
6057 }
6058 }
6059 rc = RTStrmClose(pStrmOut);
6060 if (RT_FAILURE(rc))
6061 {
6062 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64I32[iFn].pszName, rc);
6063 return RTEXITCODE_FAILURE;
6064 }
6065 }
6066
6067 return RTEXITCODE_SUCCESS;
6068}
6069#endif
6070
6071
6072static void SseBinaryR64I32Test(void)
6073{
6074 X86FXSTATE State;
6075 RT_ZERO(State);
6076 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6077 {
6078 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64I32[iFn].pszName))
6079 continue;
6080
6081 uint32_t const cTests = *g_aSseBinaryR64I32[iFn].pcTests;
6082 SSE_BINARY_R64_I32_TEST_T const * const paTests = g_aSseBinaryR64I32[iFn].paTests;
6083 PFNIEMAIMPLSSEF2R64I32 pfn = g_aSseBinaryR64I32[iFn].pfn;
6084 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I32[iFn]);
6085 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6086 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6087 {
6088 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R64_I32_TEST_T); iTest++)
6089 {
6090 uint32_t fMxcsr = 0;
6091 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6092
6093 State.MXCSR = paTests[iTest].fMxcsrIn;
6094 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i32ValIn);
6095 if ( fMxcsr != paTests[iTest].fMxcsrOut
6096 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6097 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6098 "%s -> mxcsr=%#08x %s\n"
6099 "%s expected %#08x %s%s%s (%s)\n",
6100 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6101 &paTests[iTest].i32ValIn,
6102 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6103 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6104 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6105 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6106 ? " - val" : "",
6107 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6108 }
6109 }
6110 }
6111}
6112
6113
6114/*
6115 * SSE operations converting single signed quad-word integers to double-precision floating point values (probably only cvtsi2sd).
6116 */
6117TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I64_T, SSE_BINARY_R64_I64_TEST_T, PFNIEMAIMPLSSEF2R64I64);
6118
6119static const SSE_BINARY_R64_I64_T g_aSseBinaryR64I64[] =
6120{
6121 ENTRY_BIN(cvtsi2sd_r64_i64),
6122};
6123
6124#ifdef TSTIEMAIMPL_WITH_GENERATOR
6125static RTEXITCODE SseBinaryR64I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6126{
6127 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6128
6129 static int64_t const s_aSpecials[] =
6130 {
6131 INT64_MIN,
6132 INT64_MAX
6133 /** @todo More specials. */
6134 };
6135
6136 X86FXSTATE State;
6137 RT_ZERO(State);
6138 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6139 {
6140 PFNIEMAIMPLSSEF2R64I64 const pfn = g_aSseBinaryR64I64[iFn].pfnNative ? g_aSseBinaryR64I64[iFn].pfnNative : g_aSseBinaryR64I64[iFn].pfn;
6141
6142 PRTSTREAM pStrmOut = NULL;
6143 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64I64[iFn].pszName);
6144 if (RT_FAILURE(rc))
6145 {
6146 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64I64[iFn].pszName, rc);
6147 return RTEXITCODE_FAILURE;
6148 }
6149
6150 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6151 {
6152 SSE_BINARY_R64_I64_TEST_T TestData; RT_ZERO(TestData);
6153
6154 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6155
6156 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6157 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6158 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6159 for (uint8_t iFz = 0; iFz < 2; iFz++)
6160 {
6161 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6162 | (iRounding << X86_MXCSR_RC_SHIFT)
6163 | (iDaz ? X86_MXCSR_DAZ : 0)
6164 | (iFz ? X86_MXCSR_FZ : 0)
6165 | X86_MXCSR_XCPT_MASK;
6166 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6167 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i64ValIn);
6168 TestData.fMxcsrIn = State.MXCSR;
6169 TestData.fMxcsrOut = fMxcsrM;
6170 TestData.r64ValOut = r64OutM;
6171 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6172
6173 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6174 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6175 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i64ValIn);
6176 TestData.fMxcsrIn = State.MXCSR;
6177 TestData.fMxcsrOut = fMxcsrU;
6178 TestData.r64ValOut = r64OutU;
6179 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6180
6181 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6182 if (fXcpt)
6183 {
6184 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6185 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6186 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i64ValIn);
6187 TestData.fMxcsrIn = State.MXCSR;
6188 TestData.fMxcsrOut = fMxcsr1;
6189 TestData.r64ValOut = r64Out1;
6190 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6191
6192 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6193 {
6194 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6195 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6196 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6197 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i64ValIn);
6198 TestData.fMxcsrIn = State.MXCSR;
6199 TestData.fMxcsrOut = fMxcsr2;
6200 TestData.r64ValOut = r64Out2;
6201 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6202 }
6203 if (!RT_IS_POWER_OF_TWO(fXcpt))
6204 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6205 if (fUnmasked & fXcpt)
6206 {
6207 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6208 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6209 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i64ValIn);
6210 TestData.fMxcsrIn = State.MXCSR;
6211 TestData.fMxcsrOut = fMxcsr3;
6212 TestData.r64ValOut = r64Out3;
6213 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6214 }
6215 }
6216 }
6217 }
6218 rc = RTStrmClose(pStrmOut);
6219 if (RT_FAILURE(rc))
6220 {
6221 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64I64[iFn].pszName, rc);
6222 return RTEXITCODE_FAILURE;
6223 }
6224 }
6225
6226 return RTEXITCODE_SUCCESS;
6227}
6228#endif
6229
6230
6231static void SseBinaryR64I64Test(void)
6232{
6233 X86FXSTATE State;
6234 RT_ZERO(State);
6235 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6236 {
6237 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64I64[iFn].pszName))
6238 continue;
6239
6240 uint32_t const cTests = *g_aSseBinaryR64I64[iFn].pcTests;
6241 SSE_BINARY_R64_I64_TEST_T const * const paTests = g_aSseBinaryR64I64[iFn].paTests;
6242 PFNIEMAIMPLSSEF2R64I64 pfn = g_aSseBinaryR64I64[iFn].pfn;
6243 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I64[iFn]);
6244 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6245 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6246 {
6247 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R64_I64_TEST_T); iTest++)
6248 {
6249 uint32_t fMxcsr = 0;
6250 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6251
6252 State.MXCSR = paTests[iTest].fMxcsrIn;
6253 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i64ValIn);
6254 if ( fMxcsr != paTests[iTest].fMxcsrOut
6255 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6256 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6257 "%s -> mxcsr=%#08x %s\n"
6258 "%s expected %#08x %s%s%s (%s)\n",
6259 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6260 &paTests[iTest].i64ValIn,
6261 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6262 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6263 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6264 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6265 ? " - val" : "",
6266 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6267 }
6268 }
6269 }
6270}
6271
6272
6273/*
6274 * SSE operations converting single signed double-word integers to single-precision floating point values (probably only cvtsi2ss).
6275 */
6276TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I32_T, SSE_BINARY_R32_I32_TEST_T, PFNIEMAIMPLSSEF2R32I32);
6277
6278static const SSE_BINARY_R32_I32_T g_aSseBinaryR32I32[] =
6279{
6280 ENTRY_BIN(cvtsi2ss_r32_i32),
6281};
6282
6283#ifdef TSTIEMAIMPL_WITH_GENERATOR
6284static RTEXITCODE SseBinaryR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
6285{
6286 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6287
6288 static int32_t const s_aSpecials[] =
6289 {
6290 INT32_MIN,
6291 INT32_MAX,
6292 /** @todo More specials. */
6293 };
6294
6295 X86FXSTATE State;
6296 RT_ZERO(State);
6297 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6298 {
6299 PFNIEMAIMPLSSEF2R32I32 const pfn = g_aSseBinaryR32I32[iFn].pfnNative ? g_aSseBinaryR32I32[iFn].pfnNative : g_aSseBinaryR32I32[iFn].pfn;
6300
6301 PRTSTREAM pStrmOut = NULL;
6302 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32I32[iFn].pszName);
6303 if (RT_FAILURE(rc))
6304 {
6305 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32I32[iFn].pszName, rc);
6306 return RTEXITCODE_FAILURE;
6307 }
6308
6309 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6310 {
6311 SSE_BINARY_R32_I32_TEST_T TestData; RT_ZERO(TestData);
6312
6313 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6314
6315 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6316 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6317 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6318 for (uint8_t iFz = 0; iFz < 2; iFz++)
6319 {
6320 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6321 | (iRounding << X86_MXCSR_RC_SHIFT)
6322 | (iDaz ? X86_MXCSR_DAZ : 0)
6323 | (iFz ? X86_MXCSR_FZ : 0)
6324 | X86_MXCSR_XCPT_MASK;
6325 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6326 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i32ValIn);
6327 TestData.fMxcsrIn = State.MXCSR;
6328 TestData.fMxcsrOut = fMxcsrM;
6329 TestData.r32ValOut = r32OutM;
6330 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6331
6332 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6333 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6334 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i32ValIn);
6335 TestData.fMxcsrIn = State.MXCSR;
6336 TestData.fMxcsrOut = fMxcsrU;
6337 TestData.r32ValOut = r32OutU;
6338 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6339
6340 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6341 if (fXcpt)
6342 {
6343 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6344 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6345 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i32ValIn);
6346 TestData.fMxcsrIn = State.MXCSR;
6347 TestData.fMxcsrOut = fMxcsr1;
6348 TestData.r32ValOut = r32Out1;
6349 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6350
6351 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6352 {
6353 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6354 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6355 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6356 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i32ValIn);
6357 TestData.fMxcsrIn = State.MXCSR;
6358 TestData.fMxcsrOut = fMxcsr2;
6359 TestData.r32ValOut = r32Out2;
6360 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6361 }
6362 if (!RT_IS_POWER_OF_TWO(fXcpt))
6363 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6364 if (fUnmasked & fXcpt)
6365 {
6366 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6367 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
6368 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i32ValIn);
6369 TestData.fMxcsrIn = State.MXCSR;
6370 TestData.fMxcsrOut = fMxcsr3;
6371 TestData.r32ValOut = r32Out3;
6372 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6373 }
6374 }
6375 }
6376 }
6377 rc = RTStrmClose(pStrmOut);
6378 if (RT_FAILURE(rc))
6379 {
6380 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32I32[iFn].pszName, rc);
6381 return RTEXITCODE_FAILURE;
6382 }
6383 }
6384
6385 return RTEXITCODE_SUCCESS;
6386}
6387#endif
6388
6389
6390static void SseBinaryR32I32Test(void)
6391{
6392 X86FXSTATE State;
6393 RT_ZERO(State);
6394 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6395 {
6396 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32I32[iFn].pszName))
6397 continue;
6398
6399 uint32_t const cTests = *g_aSseBinaryR32I32[iFn].pcTests;
6400 SSE_BINARY_R32_I32_TEST_T const * const paTests = g_aSseBinaryR32I32[iFn].paTests;
6401 PFNIEMAIMPLSSEF2R32I32 pfn = g_aSseBinaryR32I32[iFn].pfn;
6402 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I32[iFn]);
6403 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6404 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6405 {
6406 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R32_I32_TEST_T); iTest++)
6407 {
6408 uint32_t fMxcsr = 0;
6409 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
6410
6411 State.MXCSR = paTests[iTest].fMxcsrIn;
6412 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i32ValIn);
6413 if ( fMxcsr != paTests[iTest].fMxcsrOut
6414 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
6415 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6416 "%s -> mxcsr=%#08x %RI32\n"
6417 "%s expected %#08x %RI32%s%s (%s)\n",
6418 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6419 &paTests[iTest].i32ValIn,
6420 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
6421 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
6422 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6423 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
6424 ? " - val" : "",
6425 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6426 }
6427 }
6428 }
6429}
6430
6431
6432/*
6433 * SSE operations converting single signed quad-word integers to single-precision floating point values (probably only cvtsi2ss).
6434 */
6435TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I64_T, SSE_BINARY_R32_I64_TEST_T, PFNIEMAIMPLSSEF2R32I64);
6436
6437static const SSE_BINARY_R32_I64_T g_aSseBinaryR32I64[] =
6438{
6439 ENTRY_BIN(cvtsi2ss_r32_i64),
6440};
6441
6442#ifdef TSTIEMAIMPL_WITH_GENERATOR
6443static RTEXITCODE SseBinaryR32I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6444{
6445 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6446
6447 static int64_t const s_aSpecials[] =
6448 {
6449 INT64_MIN,
6450 INT64_MAX
6451 /** @todo More specials. */
6452 };
6453
6454 X86FXSTATE State;
6455 RT_ZERO(State);
6456 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
6457 {
6458 PFNIEMAIMPLSSEF2R32I64 const pfn = g_aSseBinaryR32I64[iFn].pfnNative ? g_aSseBinaryR32I64[iFn].pfnNative : g_aSseBinaryR32I64[iFn].pfn;
6459
6460 PRTSTREAM pStrmOut = NULL;
6461 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32I64[iFn].pszName);
6462 if (RT_FAILURE(rc))
6463 {
6464 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32I64[iFn].pszName, rc);
6465 return RTEXITCODE_FAILURE;
6466 }
6467
6468 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6469 {
6470 SSE_BINARY_R32_I64_TEST_T TestData; RT_ZERO(TestData);
6471
6472 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6473
6474 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6475 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6476 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6477 for (uint8_t iFz = 0; iFz < 2; iFz++)
6478 {
6479 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6480 | (iRounding << X86_MXCSR_RC_SHIFT)
6481 | (iDaz ? X86_MXCSR_DAZ : 0)
6482 | (iFz ? X86_MXCSR_FZ : 0)
6483 | X86_MXCSR_XCPT_MASK;
6484 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6485 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i64ValIn);
6486 TestData.fMxcsrIn = State.MXCSR;
6487 TestData.fMxcsrOut = fMxcsrM;
6488 TestData.r32ValOut = r32OutM;
6489 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6490
6491 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6492 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6493 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i64ValIn);
6494 TestData.fMxcsrIn = State.MXCSR;
6495 TestData.fMxcsrOut = fMxcsrU;
6496 TestData.r32ValOut = r32OutU;
6497 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6498
6499 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6500 if (fXcpt)
6501 {
6502 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6503 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6504 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i64ValIn);
6505 TestData.fMxcsrIn = State.MXCSR;
6506 TestData.fMxcsrOut = fMxcsr1;
6507 TestData.r32ValOut = r32Out1;
6508 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6509
6510 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6511 {
6512 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6513 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6514 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6515 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i64ValIn);
6516 TestData.fMxcsrIn = State.MXCSR;
6517 TestData.fMxcsrOut = fMxcsr2;
6518 TestData.r32ValOut = r32Out2;
6519 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6520 }
6521 if (!RT_IS_POWER_OF_TWO(fXcpt))
6522 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6523 if (fUnmasked & fXcpt)
6524 {
6525 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6526 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
6527 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i64ValIn);
6528 TestData.fMxcsrIn = State.MXCSR;
6529 TestData.fMxcsrOut = fMxcsr3;
6530 TestData.r32ValOut = r32Out3;
6531 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6532 }
6533 }
6534 }
6535 }
6536 rc = RTStrmClose(pStrmOut);
6537 if (RT_FAILURE(rc))
6538 {
6539 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32I64[iFn].pszName, rc);
6540 return RTEXITCODE_FAILURE;
6541 }
6542 }
6543
6544 return RTEXITCODE_SUCCESS;
6545}
6546#endif
6547
6548
6549static void SseBinaryR32I64Test(void)
6550{
6551 X86FXSTATE State;
6552 RT_ZERO(State);
6553 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
6554 {
6555 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32I64[iFn].pszName))
6556 continue;
6557
6558 uint32_t const cTests = *g_aSseBinaryR32I64[iFn].pcTests;
6559 SSE_BINARY_R32_I64_TEST_T const * const paTests = g_aSseBinaryR32I64[iFn].paTests;
6560 PFNIEMAIMPLSSEF2R32I64 pfn = g_aSseBinaryR32I64[iFn].pfn;
6561 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I64[iFn]);
6562 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6563 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6564 {
6565 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R32_I64_TEST_T); iTest++)
6566 {
6567 uint32_t fMxcsr = 0;
6568 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
6569
6570 State.MXCSR = paTests[iTest].fMxcsrIn;
6571 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i64ValIn);
6572 if ( fMxcsr != paTests[iTest].fMxcsrOut
6573 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
6574 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6575 "%s -> mxcsr=%#08x %RI32\n"
6576 "%s expected %#08x %RI32%s%s (%s)\n",
6577 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6578 &paTests[iTest].i64ValIn,
6579 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
6580 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
6581 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6582 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
6583 ? " - val" : "",
6584 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6585 }
6586 }
6587 }
6588}
6589
6590
6591/*
6592 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
6593 */
6594TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R32_R32_T, SSE_COMPARE_EFL_R32_R32_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
6595
6596static const SSE_COMPARE_EFL_R32_R32_T g_aSseCompareEflR32R32[] =
6597{
6598 ENTRY_BIN(ucomiss_u128),
6599 ENTRY_BIN(comiss_u128),
6600 ENTRY_BIN_AVX(vucomiss_u128),
6601 ENTRY_BIN_AVX(vcomiss_u128),
6602};
6603
6604#ifdef TSTIEMAIMPL_WITH_GENERATOR
6605static RTEXITCODE SseCompareEflR32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
6606{
6607 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6608
6609 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
6610 {
6611 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
6612 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
6613 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
6614 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
6615 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
6616 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
6617 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
6618 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
6619 /** @todo More specials. */
6620 };
6621
6622 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6623 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
6624 {
6625 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR32R32[iFn].pfnNative ? g_aSseCompareEflR32R32[iFn].pfnNative : g_aSseCompareEflR32R32[iFn].pfn;
6626
6627 PRTSTREAM pStrmOut = NULL;
6628 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareEflR32R32[iFn].pszName);
6629 if (RT_FAILURE(rc))
6630 {
6631 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareEflR32R32[iFn].pszName, rc);
6632 return RTEXITCODE_FAILURE;
6633 }
6634
6635 uint32_t cNormalInputPairs = 0;
6636 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6637 {
6638 SSE_COMPARE_EFL_R32_R32_TEST_T TestData; RT_ZERO(TestData);
6639 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6640 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6641
6642 TestData.r32ValIn1 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
6643 TestData.r32ValIn2 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
6644
6645 ValIn1.ar32[0] = TestData.r32ValIn1;
6646 ValIn2.ar32[0] = TestData.r32ValIn2;
6647
6648 if ( RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn1)
6649 && RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn2))
6650 cNormalInputPairs++;
6651 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6652 {
6653 iTest -= 1;
6654 continue;
6655 }
6656
6657 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6658 uint32_t const fEFlags = RandEFlags();
6659 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6660 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6661 for (uint8_t iFz = 0; iFz < 2; iFz++)
6662 {
6663 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6664 | (iRounding << X86_MXCSR_RC_SHIFT)
6665 | (iDaz ? X86_MXCSR_DAZ : 0)
6666 | (iFz ? X86_MXCSR_FZ : 0)
6667 | X86_MXCSR_XCPT_MASK;
6668 uint32_t fMxcsrM = fMxcsrIn;
6669 uint32_t fEFlagsM = fEFlags;
6670 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
6671 TestData.fMxcsrIn = fMxcsrIn;
6672 TestData.fMxcsrOut = fMxcsrM;
6673 TestData.fEflIn = fEFlags;
6674 TestData.fEflOut = fEFlagsM;
6675 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6676
6677 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
6678 uint32_t fMxcsrU = fMxcsrIn;
6679 uint32_t fEFlagsU = fEFlags;
6680 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
6681 TestData.fMxcsrIn = fMxcsrIn;
6682 TestData.fMxcsrOut = fMxcsrU;
6683 TestData.fEflIn = fEFlags;
6684 TestData.fEflOut = fEFlagsU;
6685 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6686
6687 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6688 if (fXcpt)
6689 {
6690 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6691 uint32_t fMxcsr1 = fMxcsrIn;
6692 uint32_t fEFlags1 = fEFlags;
6693 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
6694 TestData.fMxcsrIn = fMxcsrIn;
6695 TestData.fMxcsrOut = fMxcsr1;
6696 TestData.fEflIn = fEFlags;
6697 TestData.fEflOut = fEFlags1;
6698 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6699
6700 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6701 {
6702 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6703 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6704 uint32_t fMxcsr2 = fMxcsrIn;
6705 uint32_t fEFlags2 = fEFlags;
6706 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
6707 TestData.fMxcsrIn = fMxcsrIn;
6708 TestData.fMxcsrOut = fMxcsr2;
6709 TestData.fEflIn = fEFlags;
6710 TestData.fEflOut = fEFlags2;
6711 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6712 }
6713 if (!RT_IS_POWER_OF_TWO(fXcpt))
6714 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6715 if (fUnmasked & fXcpt)
6716 {
6717 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6718 uint32_t fMxcsr3 = fMxcsrIn;
6719 uint32_t fEFlags3 = fEFlags;
6720 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
6721 TestData.fMxcsrIn = fMxcsrIn;
6722 TestData.fMxcsrOut = fMxcsr3;
6723 TestData.fEflIn = fEFlags;
6724 TestData.fEflOut = fEFlags3;
6725 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6726 }
6727 }
6728 }
6729 }
6730 rc = RTStrmClose(pStrmOut);
6731 if (RT_FAILURE(rc))
6732 {
6733 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareEflR32R32[iFn].pszName, rc);
6734 return RTEXITCODE_FAILURE;
6735 }
6736 }
6737
6738 return RTEXITCODE_SUCCESS;
6739}
6740#endif
6741
6742static void SseCompareEflR32R32Test(void)
6743{
6744 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
6745 {
6746 if (!SubTestAndCheckIfEnabled(g_aSseCompareEflR32R32[iFn].pszName))
6747 continue;
6748
6749 uint32_t const cTests = *g_aSseCompareEflR32R32[iFn].pcTests;
6750 SSE_COMPARE_EFL_R32_R32_TEST_T const * const paTests = g_aSseCompareEflR32R32[iFn].paTests;
6751 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR32R32[iFn].pfn;
6752 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR32R32[iFn]);
6753 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6754 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6755 {
6756 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_EFL_R32_R32_TEST_T); iTest++)
6757 {
6758 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6759 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6760
6761 ValIn1.ar32[0] = paTests[iTest].r32ValIn1;
6762 ValIn2.ar32[0] = paTests[iTest].r32ValIn2;
6763 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
6764 uint32_t fEFlags = paTests[iTest].fEflIn;
6765 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
6766 if ( fMxcsr != paTests[iTest].fMxcsrOut
6767 || fEFlags != paTests[iTest].fEflOut)
6768 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
6769 "%s -> mxcsr=%#08x %#08x\n"
6770 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
6771 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
6772 FormatR32(&paTests[iTest].r32ValIn1), FormatR32(&paTests[iTest].r32ValIn2),
6773 iVar ? " " : "", fMxcsr, fEFlags,
6774 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
6775 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6776 FormatMxcsr(paTests[iTest].fMxcsrIn),
6777 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
6778 }
6779 }
6780 }
6781}
6782
6783
6784/*
6785 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
6786 */
6787TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R64_R64_T, SSE_COMPARE_EFL_R64_R64_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
6788
6789static const SSE_COMPARE_EFL_R64_R64_T g_aSseCompareEflR64R64[] =
6790{
6791 ENTRY_BIN(ucomisd_u128),
6792 ENTRY_BIN(comisd_u128),
6793 ENTRY_BIN_AVX(vucomisd_u128),
6794 ENTRY_BIN_AVX(vcomisd_u128)
6795};
6796
6797#ifdef TSTIEMAIMPL_WITH_GENERATOR
6798static RTEXITCODE SseCompareEflR64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
6799{
6800 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6801
6802 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
6803 {
6804 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
6805 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
6806 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
6807 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
6808 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
6809 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
6810 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
6811 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
6812 /** @todo More specials. */
6813 };
6814
6815 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6816 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
6817 {
6818 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR64R64[iFn].pfnNative ? g_aSseCompareEflR64R64[iFn].pfnNative : g_aSseCompareEflR64R64[iFn].pfn;
6819
6820 PRTSTREAM pStrmOut = NULL;
6821 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareEflR64R64[iFn].pszName);
6822 if (RT_FAILURE(rc))
6823 {
6824 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareEflR64R64[iFn].pszName, rc);
6825 return RTEXITCODE_FAILURE;
6826 }
6827
6828 uint32_t cNormalInputPairs = 0;
6829 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6830 {
6831 SSE_COMPARE_EFL_R64_R64_TEST_T TestData; RT_ZERO(TestData);
6832 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6833 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6834
6835 TestData.r64ValIn1 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
6836 TestData.r64ValIn2 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
6837
6838 ValIn1.ar64[0] = TestData.r64ValIn1;
6839 ValIn2.ar64[0] = TestData.r64ValIn2;
6840
6841 if ( RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn1)
6842 && RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn2))
6843 cNormalInputPairs++;
6844 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6845 {
6846 iTest -= 1;
6847 continue;
6848 }
6849
6850 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6851 uint32_t const fEFlags = RandEFlags();
6852 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6853 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6854 for (uint8_t iFz = 0; iFz < 2; iFz++)
6855 {
6856 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6857 | (iRounding << X86_MXCSR_RC_SHIFT)
6858 | (iDaz ? X86_MXCSR_DAZ : 0)
6859 | (iFz ? X86_MXCSR_FZ : 0)
6860 | X86_MXCSR_XCPT_MASK;
6861 uint32_t fMxcsrM = fMxcsrIn;
6862 uint32_t fEFlagsM = fEFlags;
6863 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
6864 TestData.fMxcsrIn = fMxcsrIn;
6865 TestData.fMxcsrOut = fMxcsrM;
6866 TestData.fEflIn = fEFlags;
6867 TestData.fEflOut = fEFlagsM;
6868 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6869
6870 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
6871 uint32_t fMxcsrU = fMxcsrIn;
6872 uint32_t fEFlagsU = fEFlags;
6873 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
6874 TestData.fMxcsrIn = fMxcsrIn;
6875 TestData.fMxcsrOut = fMxcsrU;
6876 TestData.fEflIn = fEFlags;
6877 TestData.fEflOut = fEFlagsU;
6878 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6879
6880 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6881 if (fXcpt)
6882 {
6883 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6884 uint32_t fMxcsr1 = fMxcsrIn;
6885 uint32_t fEFlags1 = fEFlags;
6886 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
6887 TestData.fMxcsrIn = fMxcsrIn;
6888 TestData.fMxcsrOut = fMxcsr1;
6889 TestData.fEflIn = fEFlags;
6890 TestData.fEflOut = fEFlags1;
6891 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6892
6893 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6894 {
6895 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6896 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6897 uint32_t fMxcsr2 = fMxcsrIn;
6898 uint32_t fEFlags2 = fEFlags;
6899 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
6900 TestData.fMxcsrIn = fMxcsrIn;
6901 TestData.fMxcsrOut = fMxcsr2;
6902 TestData.fEflIn = fEFlags;
6903 TestData.fEflOut = fEFlags2;
6904 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6905 }
6906 if (!RT_IS_POWER_OF_TWO(fXcpt))
6907 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6908 if (fUnmasked & fXcpt)
6909 {
6910 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6911 uint32_t fMxcsr3 = fMxcsrIn;
6912 uint32_t fEFlags3 = fEFlags;
6913 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
6914 TestData.fMxcsrIn = fMxcsrIn;
6915 TestData.fMxcsrOut = fMxcsr3;
6916 TestData.fEflIn = fEFlags;
6917 TestData.fEflOut = fEFlags3;
6918 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6919 }
6920 }
6921 }
6922 }
6923 rc = RTStrmClose(pStrmOut);
6924 if (RT_FAILURE(rc))
6925 {
6926 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareEflR64R64[iFn].pszName, rc);
6927 return RTEXITCODE_FAILURE;
6928 }
6929 }
6930
6931 return RTEXITCODE_SUCCESS;
6932}
6933#endif
6934
6935static void SseCompareEflR64R64Test(void)
6936{
6937 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
6938 {
6939 if (!SubTestAndCheckIfEnabled(g_aSseCompareEflR64R64[iFn].pszName))
6940 continue;
6941
6942 uint32_t const cTests = *g_aSseCompareEflR64R64[iFn].pcTests;
6943 SSE_COMPARE_EFL_R64_R64_TEST_T const * const paTests = g_aSseCompareEflR64R64[iFn].paTests;
6944 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR64R64[iFn].pfn;
6945 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR64R64[iFn]);
6946 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6947 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6948 {
6949 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_EFL_R64_R64_TEST_T); iTest++)
6950 {
6951 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6952 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6953
6954 ValIn1.ar64[0] = paTests[iTest].r64ValIn1;
6955 ValIn2.ar64[0] = paTests[iTest].r64ValIn2;
6956 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
6957 uint32_t fEFlags = paTests[iTest].fEflIn;
6958 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
6959 if ( fMxcsr != paTests[iTest].fMxcsrOut
6960 || fEFlags != paTests[iTest].fEflOut)
6961 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
6962 "%s -> mxcsr=%#08x %#08x\n"
6963 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
6964 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
6965 FormatR64(&paTests[iTest].r64ValIn1), FormatR64(&paTests[iTest].r64ValIn2),
6966 iVar ? " " : "", fMxcsr, fEFlags,
6967 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
6968 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6969 FormatMxcsr(paTests[iTest].fMxcsrIn),
6970 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
6971 }
6972 }
6973 }
6974}
6975
6976
6977/*
6978 * Compare SSE operations on packed and single single-precision floating point values - outputting a mask.
6979 */
6980/** Maximum immediate to try to keep the testdata size under control (at least a little bit)- */
6981#define SSE_COMPARE_F2_XMM_IMM8_MAX 0x1f
6982
6983TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_F2_XMM_IMM8_T, SSE_COMPARE_F2_XMM_IMM8_TEST_T, PFNIEMAIMPLMXCSRF2XMMIMM8);
6984
6985static const SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR32Imm8[] =
6986{
6987 ENTRY_BIN(cmpps_u128),
6988 ENTRY_BIN(cmpss_u128)
6989};
6990
6991#ifdef TSTIEMAIMPL_WITH_GENERATOR
6992static RTEXITCODE SseCompareF2XmmR32Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
6993{
6994 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6995
6996 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
6997 {
6998 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
6999 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7000 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7001 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7002 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7003 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7004 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7005 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7006 /** @todo More specials. */
7007 };
7008
7009 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7010 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7011 {
7012 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR32Imm8[iFn].pfnNative : g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7013
7014 PRTSTREAM pStrmOut = NULL;
7015 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareF2XmmR32Imm8[iFn].pszName);
7016 if (RT_FAILURE(rc))
7017 {
7018 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareF2XmmR32Imm8[iFn].pszName, rc);
7019 return RTEXITCODE_FAILURE;
7020 }
7021
7022 uint32_t cNormalInputPairs = 0;
7023 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7024 {
7025 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7026
7027 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7028 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7029 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7030 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7031
7032 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7033 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7034 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7035 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7036
7037 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
7038 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
7039 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
7040 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
7041 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
7042 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
7043 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
7044 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
7045 cNormalInputPairs++;
7046 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7047 {
7048 iTest -= 1;
7049 continue;
7050 }
7051
7052 IEMMEDIAF2XMMSRC Src;
7053 Src.uSrc1 = TestData.InVal1;
7054 Src.uSrc2 = TestData.InVal2;
7055 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7056 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7057 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7058 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7059 for (uint8_t iFz = 0; iFz < 2; iFz++)
7060 {
7061 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7062 | (iRounding << X86_MXCSR_RC_SHIFT)
7063 | (iDaz ? X86_MXCSR_DAZ : 0)
7064 | (iFz ? X86_MXCSR_FZ : 0)
7065 | X86_MXCSR_XCPT_MASK;
7066 uint32_t fMxcsrM = fMxcsrIn;
7067 X86XMMREG ResM;
7068 pfn(&fMxcsrM, &ResM, &Src, bImm);
7069 TestData.fMxcsrIn = fMxcsrIn;
7070 TestData.fMxcsrOut = fMxcsrM;
7071 TestData.bImm = bImm;
7072 TestData.OutVal = ResM;
7073 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7074
7075 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7076 uint32_t fMxcsrU = fMxcsrIn;
7077 X86XMMREG ResU;
7078 pfn(&fMxcsrU, &ResU, &Src, bImm);
7079 TestData.fMxcsrIn = fMxcsrIn;
7080 TestData.fMxcsrOut = fMxcsrU;
7081 TestData.bImm = bImm;
7082 TestData.OutVal = ResU;
7083 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7084
7085 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7086 if (fXcpt)
7087 {
7088 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7089 uint32_t fMxcsr1 = fMxcsrIn;
7090 X86XMMREG Res1;
7091 pfn(&fMxcsr1, &Res1, &Src, bImm);
7092 TestData.fMxcsrIn = fMxcsrIn;
7093 TestData.fMxcsrOut = fMxcsr1;
7094 TestData.bImm = bImm;
7095 TestData.OutVal = Res1;
7096 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7097
7098 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7099 {
7100 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7101 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7102 uint32_t fMxcsr2 = fMxcsrIn;
7103 X86XMMREG Res2;
7104 pfn(&fMxcsr2, &Res2, &Src, bImm);
7105 TestData.fMxcsrIn = fMxcsrIn;
7106 TestData.fMxcsrOut = fMxcsr2;
7107 TestData.bImm = bImm;
7108 TestData.OutVal = Res2;
7109 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7110 }
7111 if (!RT_IS_POWER_OF_TWO(fXcpt))
7112 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7113 if (fUnmasked & fXcpt)
7114 {
7115 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7116 uint32_t fMxcsr3 = fMxcsrIn;
7117 X86XMMREG Res3;
7118 pfn(&fMxcsr3, &Res3, &Src, bImm);
7119 TestData.fMxcsrIn = fMxcsrIn;
7120 TestData.fMxcsrOut = fMxcsr3;
7121 TestData.bImm = bImm;
7122 TestData.OutVal = Res3;
7123 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7124 }
7125 }
7126 }
7127 }
7128 rc = RTStrmClose(pStrmOut);
7129 if (RT_FAILURE(rc))
7130 {
7131 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareF2XmmR32Imm8[iFn].pszName, rc);
7132 return RTEXITCODE_FAILURE;
7133 }
7134 }
7135
7136 return RTEXITCODE_SUCCESS;
7137}
7138#endif
7139
7140static void SseCompareF2XmmR32Imm8Test(void)
7141{
7142 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7143 {
7144 if (!SubTestAndCheckIfEnabled(g_aSseCompareF2XmmR32Imm8[iFn].pszName))
7145 continue;
7146
7147 uint32_t const cTests = *g_aSseCompareF2XmmR32Imm8[iFn].pcTests;
7148 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR32Imm8[iFn].paTests;
7149 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7150 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR32Imm8[iFn]);
7151 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7152 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7153 {
7154 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_F2_XMM_IMM8_TEST_T); iTest++)
7155 {
7156 IEMMEDIAF2XMMSRC Src;
7157 X86XMMREG ValOut;
7158
7159 Src.uSrc1 = paTests[iTest].InVal1;
7160 Src.uSrc2 = paTests[iTest].InVal2;
7161 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7162 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7163 if ( fMxcsr != paTests[iTest].fMxcsrOut
7164 || ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7165 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7166 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7167 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7168 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s imm8=%x\n"
7169 "%s -> mxcsr=%#08x %RX32'%RX32'%RX32'%RX32\n"
7170 "%s expected %#08x %RX32'%RX32'%RX32'%RX32%s%s (%s)\n",
7171 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7172 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
7173 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
7174 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
7175 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
7176 paTests[iTest].bImm,
7177 iVar ? " " : "", fMxcsr, ValOut.au32[0], ValOut.au32[1], ValOut.au32[2], ValOut.au32[3],
7178 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7179 paTests[iTest].OutVal.au32[0], paTests[iTest].OutVal.au32[1],
7180 paTests[iTest].OutVal.au32[2], paTests[iTest].OutVal.au32[3],
7181 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7182 ( ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7183 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7184 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7185 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7186 ? " - val" : "",
7187 FormatMxcsr(paTests[iTest].fMxcsrIn));
7188 }
7189 }
7190 }
7191}
7192
7193
7194/*
7195 * Compare SSE operations on packed and single double-precision floating point values - outputting a mask.
7196 */
7197static const SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR64Imm8[] =
7198{
7199 ENTRY_BIN(cmppd_u128),
7200 ENTRY_BIN(cmpsd_u128)
7201};
7202
7203#ifdef TSTIEMAIMPL_WITH_GENERATOR
7204static RTEXITCODE SseCompareF2XmmR64Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
7205{
7206 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7207
7208 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7209 {
7210 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7211 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7212 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7213 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7214 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7215 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7216 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7217 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7218 /** @todo More specials. */
7219 };
7220
7221 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7222 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7223 {
7224 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR64Imm8[iFn].pfnNative : g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7225
7226 PRTSTREAM pStrmOut = NULL;
7227 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareF2XmmR64Imm8[iFn].pszName);
7228 if (RT_FAILURE(rc))
7229 {
7230 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareF2XmmR64Imm8[iFn].pszName, rc);
7231 return RTEXITCODE_FAILURE;
7232 }
7233
7234 uint32_t cNormalInputPairs = 0;
7235 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7236 {
7237 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7238
7239 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7240 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7241
7242 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7243 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7244
7245 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0])
7246 && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
7247 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0])
7248 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
7249 cNormalInputPairs++;
7250 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7251 {
7252 iTest -= 1;
7253 continue;
7254 }
7255
7256 IEMMEDIAF2XMMSRC Src;
7257 Src.uSrc1 = TestData.InVal1;
7258 Src.uSrc2 = TestData.InVal2;
7259 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7260 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7261 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7262 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7263 for (uint8_t iFz = 0; iFz < 2; iFz++)
7264 {
7265 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7266 | (iRounding << X86_MXCSR_RC_SHIFT)
7267 | (iDaz ? X86_MXCSR_DAZ : 0)
7268 | (iFz ? X86_MXCSR_FZ : 0)
7269 | X86_MXCSR_XCPT_MASK;
7270 uint32_t fMxcsrM = fMxcsrIn;
7271 X86XMMREG ResM;
7272 pfn(&fMxcsrM, &ResM, &Src, bImm);
7273 TestData.fMxcsrIn = fMxcsrIn;
7274 TestData.fMxcsrOut = fMxcsrM;
7275 TestData.bImm = bImm;
7276 TestData.OutVal = ResM;
7277 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7278
7279 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7280 uint32_t fMxcsrU = fMxcsrIn;
7281 X86XMMREG ResU;
7282 pfn(&fMxcsrU, &ResU, &Src, bImm);
7283 TestData.fMxcsrIn = fMxcsrIn;
7284 TestData.fMxcsrOut = fMxcsrU;
7285 TestData.bImm = bImm;
7286 TestData.OutVal = ResU;
7287 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7288
7289 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7290 if (fXcpt)
7291 {
7292 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7293 uint32_t fMxcsr1 = fMxcsrIn;
7294 X86XMMREG Res1;
7295 pfn(&fMxcsr1, &Res1, &Src, bImm);
7296 TestData.fMxcsrIn = fMxcsrIn;
7297 TestData.fMxcsrOut = fMxcsr1;
7298 TestData.bImm = bImm;
7299 TestData.OutVal = Res1;
7300 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7301
7302 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7303 {
7304 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7305 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7306 uint32_t fMxcsr2 = fMxcsrIn;
7307 X86XMMREG Res2;
7308 pfn(&fMxcsr2, &Res2, &Src, bImm);
7309 TestData.fMxcsrIn = fMxcsrIn;
7310 TestData.fMxcsrOut = fMxcsr2;
7311 TestData.bImm = bImm;
7312 TestData.OutVal = Res2;
7313 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7314 }
7315 if (!RT_IS_POWER_OF_TWO(fXcpt))
7316 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7317 if (fUnmasked & fXcpt)
7318 {
7319 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7320 uint32_t fMxcsr3 = fMxcsrIn;
7321 X86XMMREG Res3;
7322 pfn(&fMxcsr3, &Res3, &Src, bImm);
7323 TestData.fMxcsrIn = fMxcsrIn;
7324 TestData.fMxcsrOut = fMxcsr3;
7325 TestData.bImm = bImm;
7326 TestData.OutVal = Res3;
7327 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7328 }
7329 }
7330 }
7331 }
7332 rc = RTStrmClose(pStrmOut);
7333 if (RT_FAILURE(rc))
7334 {
7335 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareF2XmmR64Imm8[iFn].pszName, rc);
7336 return RTEXITCODE_FAILURE;
7337 }
7338 }
7339
7340 return RTEXITCODE_SUCCESS;
7341}
7342#endif
7343
7344static void SseCompareF2XmmR64Imm8Test(void)
7345{
7346 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7347 {
7348 if (!SubTestAndCheckIfEnabled(g_aSseCompareF2XmmR64Imm8[iFn].pszName))
7349 continue;
7350
7351 uint32_t const cTests = *g_aSseCompareF2XmmR64Imm8[iFn].pcTests;
7352 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR64Imm8[iFn].paTests;
7353 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7354 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR64Imm8[iFn]);
7355 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7356 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7357 {
7358 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_F2_XMM_IMM8_TEST_T); iTest++)
7359 {
7360 IEMMEDIAF2XMMSRC Src;
7361 X86XMMREG ValOut;
7362
7363 Src.uSrc1 = paTests[iTest].InVal1;
7364 Src.uSrc2 = paTests[iTest].InVal2;
7365 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7366 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7367 if ( fMxcsr != paTests[iTest].fMxcsrOut
7368 || ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7369 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7370 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s imm8=%x\n"
7371 "%s -> mxcsr=%#08x %RX64'%RX64\n"
7372 "%s expected %#08x %RX64'%RX64%s%s (%s)\n",
7373 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7374 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
7375 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
7376 paTests[iTest].bImm,
7377 iVar ? " " : "", fMxcsr, ValOut.au64[0], ValOut.au64[1],
7378 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7379 paTests[iTest].OutVal.au64[0], paTests[iTest].OutVal.au64[1],
7380 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7381 ( ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7382 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7383 ? " - val" : "",
7384 FormatMxcsr(paTests[iTest].fMxcsrIn));
7385 }
7386 }
7387 }
7388}
7389
7390
7391/*
7392 * Convert SSE operations converting signed double-words to single-precision floating point values.
7393 */
7394TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_T, SSE_CONVERT_XMM_TEST_T, PFNIEMAIMPLFPSSEF2U128);
7395
7396static const SSE_CONVERT_XMM_T g_aSseConvertXmmI32R32[] =
7397{
7398 ENTRY_BIN(cvtdq2ps_u128)
7399};
7400
7401#ifdef TSTIEMAIMPL_WITH_GENERATOR
7402static RTEXITCODE SseConvertXmmI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
7403{
7404 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7405
7406 static int32_t const s_aSpecials[] =
7407 {
7408 INT32_MIN,
7409 INT32_MIN / 2,
7410 0,
7411 INT32_MAX / 2,
7412 INT32_MAX,
7413 (int32_t)0x80000000
7414 /** @todo More specials. */
7415 };
7416
7417 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7418 {
7419 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R32[iFn].pfnNative ? g_aSseConvertXmmI32R32[iFn].pfnNative : g_aSseConvertXmmI32R32[iFn].pfn;
7420
7421 PRTSTREAM pStrmOut = NULL;
7422 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmI32R32[iFn].pszName);
7423 if (RT_FAILURE(rc))
7424 {
7425 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmI32R32[iFn].pszName, rc);
7426 return RTEXITCODE_FAILURE;
7427 }
7428
7429 X86FXSTATE State;
7430 RT_ZERO(State);
7431 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7432 {
7433 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7434
7435 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7436 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7437 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7438 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7439
7440 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7441 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7442 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7443 for (uint8_t iFz = 0; iFz < 2; iFz++)
7444 {
7445 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7446 | (iRounding << X86_MXCSR_RC_SHIFT)
7447 | (iDaz ? X86_MXCSR_DAZ : 0)
7448 | (iFz ? X86_MXCSR_FZ : 0)
7449 | X86_MXCSR_XCPT_MASK;
7450 IEMSSERESULT ResM; RT_ZERO(ResM);
7451 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7452 TestData.fMxcsrIn = State.MXCSR;
7453 TestData.fMxcsrOut = ResM.MXCSR;
7454 TestData.OutVal = ResM.uResult;
7455 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7456
7457 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7458 IEMSSERESULT ResU; RT_ZERO(ResU);
7459 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7460 TestData.fMxcsrIn = State.MXCSR;
7461 TestData.fMxcsrOut = ResU.MXCSR;
7462 TestData.OutVal = ResU.uResult;
7463 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7464
7465 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7466 if (fXcpt)
7467 {
7468 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7469 IEMSSERESULT Res1; RT_ZERO(Res1);
7470 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7471 TestData.fMxcsrIn = State.MXCSR;
7472 TestData.fMxcsrOut = Res1.MXCSR;
7473 TestData.OutVal = Res1.uResult;
7474 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7475
7476 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7477 {
7478 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7479 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7480 IEMSSERESULT Res2; RT_ZERO(Res2);
7481 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7482 TestData.fMxcsrIn = State.MXCSR;
7483 TestData.fMxcsrOut = Res2.MXCSR;
7484 TestData.OutVal = Res2.uResult;
7485 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7486 }
7487 if (!RT_IS_POWER_OF_TWO(fXcpt))
7488 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7489 if (fUnmasked & fXcpt)
7490 {
7491 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7492 IEMSSERESULT Res3; RT_ZERO(Res3);
7493 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7494 TestData.fMxcsrIn = State.MXCSR;
7495 TestData.fMxcsrOut = Res3.MXCSR;
7496 TestData.OutVal = Res3.uResult;
7497 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7498 }
7499 }
7500 }
7501 }
7502 rc = RTStrmClose(pStrmOut);
7503 if (RT_FAILURE(rc))
7504 {
7505 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmI32R32[iFn].pszName, rc);
7506 return RTEXITCODE_FAILURE;
7507 }
7508 }
7509
7510 return RTEXITCODE_SUCCESS;
7511}
7512#endif
7513
7514static void SseConvertXmmI32R32Test(void)
7515{
7516 X86FXSTATE State;
7517 RT_ZERO(State);
7518
7519 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7520 {
7521 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmI32R32[iFn].pszName))
7522 continue;
7523
7524 uint32_t const cTests = *g_aSseConvertXmmI32R32[iFn].pcTests;
7525 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R32[iFn].paTests;
7526 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R32[iFn].pfn;
7527 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R32[iFn]);
7528 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7529 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7530 {
7531 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7532 {
7533 IEMSSERESULT Res; RT_ZERO(Res);
7534
7535 State.MXCSR = paTests[iTest].fMxcsrIn;
7536 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7537 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7538 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7539 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7540 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7541 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7542 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
7543 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
7544 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
7545 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7546 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
7547 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
7548 iVar ? " " : "", Res.MXCSR,
7549 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
7550 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
7551 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7552 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
7553 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
7554 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7555 ( !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7556 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7557 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7558 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7559 ? " - val" : "",
7560 FormatMxcsr(paTests[iTest].fMxcsrIn));
7561 }
7562 }
7563 }
7564}
7565
7566
7567/*
7568 * Convert SSE operations converting signed double-words to single-precision floating point values.
7569 */
7570static const SSE_CONVERT_XMM_T g_aSseConvertXmmR32I32[] =
7571{
7572 ENTRY_BIN(cvtps2dq_u128),
7573 ENTRY_BIN(cvttps2dq_u128)
7574};
7575
7576#ifdef TSTIEMAIMPL_WITH_GENERATOR
7577static RTEXITCODE SseConvertXmmR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
7578{
7579 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7580
7581 static struct { RTFLOAT32U aVal1[4]; } const s_aSpecials[] =
7582 {
7583 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
7584 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
7585 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
7586 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
7587 /** @todo More specials. */
7588 };
7589
7590 X86FXSTATE State;
7591 RT_ZERO(State);
7592 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7593 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
7594 {
7595 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR32I32[iFn].pfnNative ? g_aSseConvertXmmR32I32[iFn].pfnNative : g_aSseConvertXmmR32I32[iFn].pfn;
7596
7597 PRTSTREAM pStrmOut = NULL;
7598 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR32I32[iFn].pszName);
7599 if (RT_FAILURE(rc))
7600 {
7601 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR32I32[iFn].pszName, rc);
7602 return RTEXITCODE_FAILURE;
7603 }
7604
7605 uint32_t cNormalInputPairs = 0;
7606 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7607 {
7608 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7609
7610 TestData.InVal.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
7611 TestData.InVal.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
7612 TestData.InVal.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
7613 TestData.InVal.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
7614
7615 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[0])
7616 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[1])
7617 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[2])
7618 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[3]))
7619 cNormalInputPairs++;
7620 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7621 {
7622 iTest -= 1;
7623 continue;
7624 }
7625
7626 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7627 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7628 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7629 for (uint8_t iFz = 0; iFz < 2; iFz++)
7630 {
7631 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7632 | (iRounding << X86_MXCSR_RC_SHIFT)
7633 | (iDaz ? X86_MXCSR_DAZ : 0)
7634 | (iFz ? X86_MXCSR_FZ : 0)
7635 | X86_MXCSR_XCPT_MASK;
7636 IEMSSERESULT ResM; RT_ZERO(ResM);
7637 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7638 TestData.fMxcsrIn = State.MXCSR;
7639 TestData.fMxcsrOut = ResM.MXCSR;
7640 TestData.OutVal = ResM.uResult;
7641 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7642
7643 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7644 IEMSSERESULT ResU; RT_ZERO(ResU);
7645 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7646 TestData.fMxcsrIn = State.MXCSR;
7647 TestData.fMxcsrOut = ResU.MXCSR;
7648 TestData.OutVal = ResU.uResult;
7649 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7650
7651 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7652 if (fXcpt)
7653 {
7654 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7655 IEMSSERESULT Res1; RT_ZERO(Res1);
7656 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7657 TestData.fMxcsrIn = State.MXCSR;
7658 TestData.fMxcsrOut = Res1.MXCSR;
7659 TestData.OutVal = Res1.uResult;
7660 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7661
7662 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7663 {
7664 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7665 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7666 IEMSSERESULT Res2; RT_ZERO(Res2);
7667 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7668 TestData.fMxcsrIn = State.MXCSR;
7669 TestData.fMxcsrOut = Res2.MXCSR;
7670 TestData.OutVal = Res2.uResult;
7671 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7672 }
7673 if (!RT_IS_POWER_OF_TWO(fXcpt))
7674 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7675 if (fUnmasked & fXcpt)
7676 {
7677 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7678 IEMSSERESULT Res3; RT_ZERO(Res3);
7679 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7680 TestData.fMxcsrIn = State.MXCSR;
7681 TestData.fMxcsrOut = Res3.MXCSR;
7682 TestData.OutVal = Res3.uResult;
7683 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7684 }
7685 }
7686 }
7687 }
7688 rc = RTStrmClose(pStrmOut);
7689 if (RT_FAILURE(rc))
7690 {
7691 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR32I32[iFn].pszName, rc);
7692 return RTEXITCODE_FAILURE;
7693 }
7694 }
7695
7696 return RTEXITCODE_SUCCESS;
7697}
7698#endif
7699
7700static void SseConvertXmmR32I32Test(void)
7701{
7702 X86FXSTATE State;
7703 RT_ZERO(State);
7704
7705 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
7706 {
7707 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR32I32[iFn].pszName))
7708 continue;
7709
7710 uint32_t const cTests = *g_aSseConvertXmmR32I32[iFn].pcTests;
7711 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR32I32[iFn].paTests;
7712 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR32I32[iFn].pfn;
7713 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32I32[iFn]);
7714 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7715 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7716 {
7717 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7718 {
7719 IEMSSERESULT Res; RT_ZERO(Res);
7720
7721 State.MXCSR = paTests[iTest].fMxcsrIn;
7722 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7723 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7724 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
7725 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
7726 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
7727 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
7728 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s \n"
7729 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
7730 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
7731 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7732 FormatR32(&paTests[iTest].InVal.ar32[0]), FormatR32(&paTests[iTest].InVal.ar32[1]),
7733 FormatR32(&paTests[iTest].InVal.ar32[2]), FormatR32(&paTests[iTest].InVal.ar32[3]),
7734 iVar ? " " : "", Res.MXCSR,
7735 Res.uResult.ai32[0], Res.uResult.ai32[1],
7736 Res.uResult.ai32[2], Res.uResult.ai32[3],
7737 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7738 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
7739 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
7740 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7741 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
7742 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
7743 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
7744 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
7745 ? " - val" : "",
7746 FormatMxcsr(paTests[iTest].fMxcsrIn));
7747 }
7748 }
7749 }
7750}
7751
7752
7753/*
7754 * Convert SSE operations converting signed double-words to double-precision floating point values.
7755 */
7756static const SSE_CONVERT_XMM_T g_aSseConvertXmmI32R64[] =
7757{
7758 ENTRY_BIN(cvtdq2pd_u128)
7759};
7760
7761#ifdef TSTIEMAIMPL_WITH_GENERATOR
7762static RTEXITCODE SseConvertXmmI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
7763{
7764 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7765
7766 static int32_t const s_aSpecials[] =
7767 {
7768 INT32_MIN,
7769 INT32_MIN / 2,
7770 0,
7771 INT32_MAX / 2,
7772 INT32_MAX,
7773 (int32_t)0x80000000
7774 /** @todo More specials. */
7775 };
7776
7777 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
7778 {
7779 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R64[iFn].pfnNative ? g_aSseConvertXmmI32R64[iFn].pfnNative : g_aSseConvertXmmI32R64[iFn].pfn;
7780
7781 PRTSTREAM pStrmOut = NULL;
7782 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmI32R64[iFn].pszName);
7783 if (RT_FAILURE(rc))
7784 {
7785 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmI32R64[iFn].pszName, rc);
7786 return RTEXITCODE_FAILURE;
7787 }
7788
7789 X86FXSTATE State;
7790 RT_ZERO(State);
7791 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7792 {
7793 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7794
7795 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7796 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7797 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7798 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7799
7800 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7801 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7802 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7803 for (uint8_t iFz = 0; iFz < 2; iFz++)
7804 {
7805 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7806 | (iRounding << X86_MXCSR_RC_SHIFT)
7807 | (iDaz ? X86_MXCSR_DAZ : 0)
7808 | (iFz ? X86_MXCSR_FZ : 0)
7809 | X86_MXCSR_XCPT_MASK;
7810 IEMSSERESULT ResM; RT_ZERO(ResM);
7811 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7812 TestData.fMxcsrIn = State.MXCSR;
7813 TestData.fMxcsrOut = ResM.MXCSR;
7814 TestData.OutVal = ResM.uResult;
7815 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7816
7817 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7818 IEMSSERESULT ResU; RT_ZERO(ResU);
7819 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7820 TestData.fMxcsrIn = State.MXCSR;
7821 TestData.fMxcsrOut = ResU.MXCSR;
7822 TestData.OutVal = ResU.uResult;
7823 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7824
7825 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7826 if (fXcpt)
7827 {
7828 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7829 IEMSSERESULT Res1; RT_ZERO(Res1);
7830 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7831 TestData.fMxcsrIn = State.MXCSR;
7832 TestData.fMxcsrOut = Res1.MXCSR;
7833 TestData.OutVal = Res1.uResult;
7834 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7835
7836 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7837 {
7838 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7839 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7840 IEMSSERESULT Res2; RT_ZERO(Res2);
7841 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7842 TestData.fMxcsrIn = State.MXCSR;
7843 TestData.fMxcsrOut = Res2.MXCSR;
7844 TestData.OutVal = Res2.uResult;
7845 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7846 }
7847 if (!RT_IS_POWER_OF_TWO(fXcpt))
7848 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7849 if (fUnmasked & fXcpt)
7850 {
7851 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7852 IEMSSERESULT Res3; RT_ZERO(Res3);
7853 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7854 TestData.fMxcsrIn = State.MXCSR;
7855 TestData.fMxcsrOut = Res3.MXCSR;
7856 TestData.OutVal = Res3.uResult;
7857 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7858 }
7859 }
7860 }
7861 }
7862 rc = RTStrmClose(pStrmOut);
7863 if (RT_FAILURE(rc))
7864 {
7865 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmI32R64[iFn].pszName, rc);
7866 return RTEXITCODE_FAILURE;
7867 }
7868 }
7869
7870 return RTEXITCODE_SUCCESS;
7871}
7872#endif
7873
7874static void SseConvertXmmI32R64Test(void)
7875{
7876 X86FXSTATE State;
7877 RT_ZERO(State);
7878
7879 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
7880 {
7881 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmI32R64[iFn].pszName))
7882 continue;
7883
7884 uint32_t const cTests = *g_aSseConvertXmmI32R64[iFn].pcTests;
7885 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R64[iFn].paTests;
7886 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R64[iFn].pfn;
7887 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R64[iFn]);
7888 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7889 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7890 {
7891 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7892 {
7893 IEMSSERESULT Res; RT_ZERO(Res);
7894
7895 State.MXCSR = paTests[iTest].fMxcsrIn;
7896 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7897 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7898 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
7899 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
7900 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
7901 "%s -> mxcsr=%#08x %s'%s\n"
7902 "%s expected %#08x %s'%s%s%s (%s)\n",
7903 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7904 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
7905 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
7906 iVar ? " " : "", Res.MXCSR,
7907 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
7908 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7909 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
7910 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7911 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
7912 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
7913 ? " - val" : "",
7914 FormatMxcsr(paTests[iTest].fMxcsrIn));
7915 }
7916 }
7917 }
7918}
7919
7920
7921/*
7922 * Convert SSE operations converting signed double-words to double-precision floating point values.
7923 */
7924static const SSE_CONVERT_XMM_T g_aSseConvertXmmR64I32[] =
7925{
7926 ENTRY_BIN(cvtpd2dq_u128),
7927 ENTRY_BIN(cvttpd2dq_u128)
7928};
7929
7930#ifdef TSTIEMAIMPL_WITH_GENERATOR
7931static RTEXITCODE SseConvertXmmR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
7932{
7933 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7934
7935 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
7936 {
7937 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
7938 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
7939 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
7940 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
7941 /** @todo More specials. */
7942 };
7943
7944 X86FXSTATE State;
7945 RT_ZERO(State);
7946 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7947 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
7948 {
7949 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR64I32[iFn].pfnNative ? g_aSseConvertXmmR64I32[iFn].pfnNative : g_aSseConvertXmmR64I32[iFn].pfn;
7950
7951 PRTSTREAM pStrmOut = NULL;
7952 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR64I32[iFn].pszName);
7953 if (RT_FAILURE(rc))
7954 {
7955 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR64I32[iFn].pszName, rc);
7956 return RTEXITCODE_FAILURE;
7957 }
7958
7959 uint32_t cNormalInputPairs = 0;
7960 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7961 {
7962 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7963
7964 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
7965 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
7966
7967 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
7968 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
7969 cNormalInputPairs++;
7970 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7971 {
7972 iTest -= 1;
7973 continue;
7974 }
7975
7976 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7977 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7978 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7979 for (uint8_t iFz = 0; iFz < 2; iFz++)
7980 {
7981 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7982 | (iRounding << X86_MXCSR_RC_SHIFT)
7983 | (iDaz ? X86_MXCSR_DAZ : 0)
7984 | (iFz ? X86_MXCSR_FZ : 0)
7985 | X86_MXCSR_XCPT_MASK;
7986 IEMSSERESULT ResM; RT_ZERO(ResM);
7987 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7988 TestData.fMxcsrIn = State.MXCSR;
7989 TestData.fMxcsrOut = ResM.MXCSR;
7990 TestData.OutVal = ResM.uResult;
7991 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7992
7993 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7994 IEMSSERESULT ResU; RT_ZERO(ResU);
7995 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7996 TestData.fMxcsrIn = State.MXCSR;
7997 TestData.fMxcsrOut = ResU.MXCSR;
7998 TestData.OutVal = ResU.uResult;
7999 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8000
8001 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8002 if (fXcpt)
8003 {
8004 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8005 IEMSSERESULT Res1; RT_ZERO(Res1);
8006 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8007 TestData.fMxcsrIn = State.MXCSR;
8008 TestData.fMxcsrOut = Res1.MXCSR;
8009 TestData.OutVal = Res1.uResult;
8010 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8011
8012 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8013 {
8014 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8015 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8016 IEMSSERESULT Res2; RT_ZERO(Res2);
8017 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8018 TestData.fMxcsrIn = State.MXCSR;
8019 TestData.fMxcsrOut = Res2.MXCSR;
8020 TestData.OutVal = Res2.uResult;
8021 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8022 }
8023 if (!RT_IS_POWER_OF_TWO(fXcpt))
8024 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8025 if (fUnmasked & fXcpt)
8026 {
8027 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8028 IEMSSERESULT Res3; RT_ZERO(Res3);
8029 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8030 TestData.fMxcsrIn = State.MXCSR;
8031 TestData.fMxcsrOut = Res3.MXCSR;
8032 TestData.OutVal = Res3.uResult;
8033 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8034 }
8035 }
8036 }
8037 }
8038 rc = RTStrmClose(pStrmOut);
8039 if (RT_FAILURE(rc))
8040 {
8041 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR64I32[iFn].pszName, rc);
8042 return RTEXITCODE_FAILURE;
8043 }
8044 }
8045
8046 return RTEXITCODE_SUCCESS;
8047}
8048#endif
8049
8050static void SseConvertXmmR64I32Test(void)
8051{
8052 X86FXSTATE State;
8053 RT_ZERO(State);
8054
8055 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8056 {
8057 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR64I32[iFn].pszName))
8058 continue;
8059
8060 uint32_t const cTests = *g_aSseConvertXmmR64I32[iFn].pcTests;
8061 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR64I32[iFn].paTests;
8062 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR64I32[iFn].pfn;
8063 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64I32[iFn]);
8064 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8065 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8066 {
8067 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8068 {
8069 IEMSSERESULT Res; RT_ZERO(Res);
8070
8071 State.MXCSR = paTests[iTest].fMxcsrIn;
8072 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8073 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8074 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8075 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8076 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8077 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8078 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8079 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8080 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8081 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8082 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8083 iVar ? " " : "", Res.MXCSR,
8084 Res.uResult.ai32[0], Res.uResult.ai32[1],
8085 Res.uResult.ai32[2], Res.uResult.ai32[3],
8086 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8087 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8088 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8089 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8090 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8091 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8092 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8093 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8094 ? " - val" : "",
8095 FormatMxcsr(paTests[iTest].fMxcsrIn));
8096 }
8097 }
8098 }
8099}
8100
8101
8102/*
8103 * Convert SSE operations converting double-precision floating point values to signed double-word values.
8104 */
8105TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_XMM_T, SSE_CONVERT_MM_XMM_TEST_T, PFNIEMAIMPLMXCSRU64U128);
8106
8107static const SSE_CONVERT_MM_XMM_T g_aSseConvertMmXmm[] =
8108{
8109 ENTRY_BIN(cvtpd2pi_u128),
8110 ENTRY_BIN(cvttpd2pi_u128)
8111};
8112
8113#ifdef TSTIEMAIMPL_WITH_GENERATOR
8114static RTEXITCODE SseConvertMmXmmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8115{
8116 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8117
8118 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8119 {
8120 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8121 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8122 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8123 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8124 /** @todo More specials. */
8125 };
8126
8127 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8128 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8129 {
8130 PFNIEMAIMPLMXCSRU64U128 const pfn = g_aSseConvertMmXmm[iFn].pfnNative ? g_aSseConvertMmXmm[iFn].pfnNative : g_aSseConvertMmXmm[iFn].pfn;
8131
8132 PRTSTREAM pStrmOut = NULL;
8133 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertMmXmm[iFn].pszName);
8134 if (RT_FAILURE(rc))
8135 {
8136 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertMmXmm[iFn].pszName, rc);
8137 return RTEXITCODE_FAILURE;
8138 }
8139
8140 uint32_t cNormalInputPairs = 0;
8141 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8142 {
8143 SSE_CONVERT_MM_XMM_TEST_T TestData; RT_ZERO(TestData);
8144
8145 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8146 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8147
8148 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8149 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8150 cNormalInputPairs++;
8151 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8152 {
8153 iTest -= 1;
8154 continue;
8155 }
8156
8157 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8158 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8159 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8160 for (uint8_t iFz = 0; iFz < 2; iFz++)
8161 {
8162 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8163 | (iRounding << X86_MXCSR_RC_SHIFT)
8164 | (iDaz ? X86_MXCSR_DAZ : 0)
8165 | (iFz ? X86_MXCSR_FZ : 0)
8166 | X86_MXCSR_XCPT_MASK;
8167 uint32_t fMxcsrM = fMxcsrIn;
8168 uint64_t u64ResM;
8169 pfn(&fMxcsrM, &u64ResM, &TestData.InVal);
8170 TestData.fMxcsrIn = fMxcsrIn;
8171 TestData.fMxcsrOut = fMxcsrM;
8172 TestData.OutVal.u = u64ResM;
8173 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8174
8175 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8176 uint32_t fMxcsrU = fMxcsrIn;
8177 uint64_t u64ResU;
8178 pfn(&fMxcsrU, &u64ResU, &TestData.InVal);
8179 TestData.fMxcsrIn = fMxcsrIn;
8180 TestData.fMxcsrOut = fMxcsrU;
8181 TestData.OutVal.u = u64ResU;
8182 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8183
8184 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8185 if (fXcpt)
8186 {
8187 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8188 uint32_t fMxcsr1 = fMxcsrIn;
8189 uint64_t u64Res1;
8190 pfn(&fMxcsr1, &u64Res1, &TestData.InVal);
8191 TestData.fMxcsrIn = fMxcsrIn;
8192 TestData.fMxcsrOut = fMxcsr1;
8193 TestData.OutVal.u = u64Res1;
8194 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8195
8196 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8197 {
8198 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8199 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8200 uint32_t fMxcsr2 = fMxcsrIn;
8201 uint64_t u64Res2;
8202 pfn(&fMxcsr2, &u64Res2, &TestData.InVal);
8203 TestData.fMxcsrIn = fMxcsrIn;
8204 TestData.fMxcsrOut = fMxcsr2;
8205 TestData.OutVal.u = u64Res2;
8206 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8207 }
8208 if (!RT_IS_POWER_OF_TWO(fXcpt))
8209 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8210 if (fUnmasked & fXcpt)
8211 {
8212 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8213 uint32_t fMxcsr3 = fMxcsrIn;
8214 uint64_t u64Res3;
8215 pfn(&fMxcsr3, &u64Res3, &TestData.InVal);
8216 TestData.fMxcsrIn = fMxcsrIn;
8217 TestData.fMxcsrOut = fMxcsr3;
8218 TestData.OutVal.u = u64Res3;
8219 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8220 }
8221 }
8222 }
8223 }
8224 rc = RTStrmClose(pStrmOut);
8225 if (RT_FAILURE(rc))
8226 {
8227 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertMmXmm[iFn].pszName, rc);
8228 return RTEXITCODE_FAILURE;
8229 }
8230 }
8231
8232 return RTEXITCODE_SUCCESS;
8233}
8234#endif
8235
8236static void SseConvertMmXmmTest(void)
8237{
8238 X86FXSTATE State;
8239 RT_ZERO(State);
8240
8241 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8242 {
8243 if (!SubTestAndCheckIfEnabled(g_aSseConvertMmXmm[iFn].pszName))
8244 continue;
8245
8246 uint32_t const cTests = *g_aSseConvertMmXmm[iFn].pcTests;
8247 SSE_CONVERT_MM_XMM_TEST_T const * const paTests = g_aSseConvertMmXmm[iFn].paTests;
8248 PFNIEMAIMPLMXCSRU64U128 pfn = g_aSseConvertMmXmm[iFn].pfn;
8249 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmXmm[iFn]);
8250 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8251 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8252 {
8253 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8254 {
8255 RTUINT64U ValOut;
8256 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8257 pfn(&fMxcsr, &ValOut.u, &paTests[iTest].InVal);
8258 if ( fMxcsr != paTests[iTest].fMxcsrOut
8259 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8260 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8261 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s\n"
8262 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8263 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8264 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8265 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8266 iVar ? " " : "", fMxcsr, ValOut.ai32[0], ValOut.ai32[1],
8267 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8268 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8269 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8270 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8271 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8272 ? " - val" : "",
8273 FormatMxcsr(paTests[iTest].fMxcsrIn));
8274 }
8275 }
8276 }
8277}
8278
8279
8280/*
8281 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8282 */
8283TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R64_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8284
8285static const SSE_CONVERT_XMM_R64_MM_T g_aSseConvertXmmR64Mm[] =
8286{
8287 ENTRY_BIN(cvtpi2pd_u128)
8288};
8289
8290#ifdef TSTIEMAIMPL_WITH_GENERATOR
8291static RTEXITCODE SseConvertXmmR64MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8292{
8293 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8294
8295 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8296 {
8297 { { INT32_MIN, INT32_MIN } },
8298 { { INT32_MAX, INT32_MAX } }
8299 /** @todo More specials. */
8300 };
8301
8302 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8303 {
8304 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR64Mm[iFn].pfnNative ? g_aSseConvertXmmR64Mm[iFn].pfnNative : g_aSseConvertXmmR64Mm[iFn].pfn;
8305
8306 PRTSTREAM pStrmOut = NULL;
8307 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR64Mm[iFn].pszName);
8308 if (RT_FAILURE(rc))
8309 {
8310 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR64Mm[iFn].pszName, rc);
8311 return RTEXITCODE_FAILURE;
8312 }
8313
8314 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8315 {
8316 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8317
8318 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8319 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8320
8321 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8322 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8323 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8324 for (uint8_t iFz = 0; iFz < 2; iFz++)
8325 {
8326 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8327 | (iRounding << X86_MXCSR_RC_SHIFT)
8328 | (iDaz ? X86_MXCSR_DAZ : 0)
8329 | (iFz ? X86_MXCSR_FZ : 0)
8330 | X86_MXCSR_XCPT_MASK;
8331 uint32_t fMxcsrM = fMxcsrIn;
8332 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8333 TestData.fMxcsrIn = fMxcsrIn;
8334 TestData.fMxcsrOut = fMxcsrM;
8335 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8336
8337 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8338 uint32_t fMxcsrU = fMxcsrIn;
8339 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8340 TestData.fMxcsrIn = fMxcsrIn;
8341 TestData.fMxcsrOut = fMxcsrU;
8342 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8343
8344 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8345 if (fXcpt)
8346 {
8347 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8348 uint32_t fMxcsr1 = fMxcsrIn;
8349 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8350 TestData.fMxcsrIn = fMxcsrIn;
8351 TestData.fMxcsrOut = fMxcsr1;
8352 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8353
8354 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8355 {
8356 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8357 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8358 uint32_t fMxcsr2 = fMxcsrIn;
8359 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8360 TestData.fMxcsrIn = fMxcsrIn;
8361 TestData.fMxcsrOut = fMxcsr2;
8362 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8363 }
8364 if (!RT_IS_POWER_OF_TWO(fXcpt))
8365 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8366 if (fUnmasked & fXcpt)
8367 {
8368 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8369 uint32_t fMxcsr3 = fMxcsrIn;
8370 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8371 TestData.fMxcsrIn = fMxcsrIn;
8372 TestData.fMxcsrOut = fMxcsr3;
8373 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8374 }
8375 }
8376 }
8377 }
8378 rc = RTStrmClose(pStrmOut);
8379 if (RT_FAILURE(rc))
8380 {
8381 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR64Mm[iFn].pszName, rc);
8382 return RTEXITCODE_FAILURE;
8383 }
8384 }
8385
8386 return RTEXITCODE_SUCCESS;
8387}
8388#endif
8389
8390static void SseConvertXmmR64MmTest(void)
8391{
8392 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8393 {
8394 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR64Mm[iFn].pszName))
8395 continue;
8396
8397 uint32_t const cTests = *g_aSseConvertXmmR64Mm[iFn].pcTests;
8398 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR64Mm[iFn].paTests;
8399 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR64Mm[iFn].pfn;
8400 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64Mm[iFn]);
8401 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8402 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8403 {
8404 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8405 {
8406 X86XMMREG ValOut;
8407 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8408 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8409 if ( fMxcsr != paTests[iTest].fMxcsrOut
8410 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8411 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8412 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8413 "%s -> mxcsr=%#08x %s'%s\n"
8414 "%s expected %#08x %s'%s%s%s (%s)\n",
8415 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8416 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8417 iVar ? " " : "", fMxcsr,
8418 FormatR64(&ValOut.ar64[0]), FormatR64(&ValOut.ar64[1]),
8419 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8420 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8421 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8422 ( !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8423 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8424 ? " - val" : "",
8425 FormatMxcsr(paTests[iTest].fMxcsrIn));
8426 }
8427 }
8428 }
8429}
8430
8431
8432/*
8433 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8434 */
8435TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R32_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8436
8437static const SSE_CONVERT_XMM_R32_MM_T g_aSseConvertXmmR32Mm[] =
8438{
8439 ENTRY_BIN(cvtpi2ps_u128)
8440};
8441
8442#ifdef TSTIEMAIMPL_WITH_GENERATOR
8443static RTEXITCODE SseConvertXmmR32MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8444{
8445 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8446
8447 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8448 {
8449 { { INT32_MIN, INT32_MIN } },
8450 { { INT32_MAX, INT32_MAX } }
8451 /** @todo More specials. */
8452 };
8453
8454 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8455 {
8456 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR32Mm[iFn].pfnNative ? g_aSseConvertXmmR32Mm[iFn].pfnNative : g_aSseConvertXmmR32Mm[iFn].pfn;
8457
8458 PRTSTREAM pStrmOut = NULL;
8459 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR32Mm[iFn].pszName);
8460 if (RT_FAILURE(rc))
8461 {
8462 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR32Mm[iFn].pszName, rc);
8463 return RTEXITCODE_FAILURE;
8464 }
8465
8466 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8467 {
8468 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8469
8470 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8471 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8472
8473 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8474 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8475 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8476 for (uint8_t iFz = 0; iFz < 2; iFz++)
8477 {
8478 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8479 | (iRounding << X86_MXCSR_RC_SHIFT)
8480 | (iDaz ? X86_MXCSR_DAZ : 0)
8481 | (iFz ? X86_MXCSR_FZ : 0)
8482 | X86_MXCSR_XCPT_MASK;
8483 uint32_t fMxcsrM = fMxcsrIn;
8484 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8485 TestData.fMxcsrIn = fMxcsrIn;
8486 TestData.fMxcsrOut = fMxcsrM;
8487 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8488
8489 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8490 uint32_t fMxcsrU = fMxcsrIn;
8491 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8492 TestData.fMxcsrIn = fMxcsrIn;
8493 TestData.fMxcsrOut = fMxcsrU;
8494 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8495
8496 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8497 if (fXcpt)
8498 {
8499 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8500 uint32_t fMxcsr1 = fMxcsrIn;
8501 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8502 TestData.fMxcsrIn = fMxcsrIn;
8503 TestData.fMxcsrOut = fMxcsr1;
8504 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8505
8506 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8507 {
8508 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8509 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8510 uint32_t fMxcsr2 = fMxcsrIn;
8511 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8512 TestData.fMxcsrIn = fMxcsrIn;
8513 TestData.fMxcsrOut = fMxcsr2;
8514 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8515 }
8516 if (!RT_IS_POWER_OF_TWO(fXcpt))
8517 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8518 if (fUnmasked & fXcpt)
8519 {
8520 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8521 uint32_t fMxcsr3 = fMxcsrIn;
8522 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8523 TestData.fMxcsrIn = fMxcsrIn;
8524 TestData.fMxcsrOut = fMxcsr3;
8525 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8526 }
8527 }
8528 }
8529 }
8530 rc = RTStrmClose(pStrmOut);
8531 if (RT_FAILURE(rc))
8532 {
8533 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR32Mm[iFn].pszName, rc);
8534 return RTEXITCODE_FAILURE;
8535 }
8536 }
8537
8538 return RTEXITCODE_SUCCESS;
8539}
8540#endif
8541
8542static void SseConvertXmmR32MmTest(void)
8543{
8544 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8545 {
8546 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR32Mm[iFn].pszName))
8547 continue;
8548
8549 uint32_t const cTests = *g_aSseConvertXmmR32Mm[iFn].pcTests;
8550 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR32Mm[iFn].paTests;
8551 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR32Mm[iFn].pfn;
8552 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32Mm[iFn]);
8553 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8554 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8555 {
8556 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8557 {
8558 X86XMMREG ValOut;
8559 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8560 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8561 if ( fMxcsr != paTests[iTest].fMxcsrOut
8562 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8563 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8564 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8565 "%s -> mxcsr=%#08x %s'%s\n"
8566 "%s expected %#08x %s'%s%s%s (%s)\n",
8567 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8568 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8569 iVar ? " " : "", fMxcsr,
8570 FormatR32(&ValOut.ar32[0]), FormatR32(&ValOut.ar32[1]),
8571 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8572 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
8573 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8574 ( !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8575 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8576 ? " - val" : "",
8577 FormatMxcsr(paTests[iTest].fMxcsrIn));
8578 }
8579 }
8580 }
8581}
8582
8583
8584/*
8585 * Convert SSE operations converting single-precision floating point values to signed double-word values.
8586 */
8587TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_I32_XMM_R32_T, SSE_CONVERT_MM_R32_TEST_T, PFNIEMAIMPLMXCSRU64U64);
8588
8589static const SSE_CONVERT_MM_I32_XMM_R32_T g_aSseConvertMmI32XmmR32[] =
8590{
8591 ENTRY_BIN(cvtps2pi_u128),
8592 ENTRY_BIN(cvttps2pi_u128)
8593};
8594
8595#ifdef TSTIEMAIMPL_WITH_GENERATOR
8596static RTEXITCODE SseConvertMmI32XmmR32Generate(const char *pszDataFileFmt, uint32_t cTests)
8597{
8598 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8599
8600 static struct { RTFLOAT32U aVal1[2]; } const s_aSpecials[] =
8601 {
8602 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
8603 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
8604 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
8605 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
8606 /** @todo More specials. */
8607 };
8608
8609 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8610 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
8611 {
8612 PFNIEMAIMPLMXCSRU64U64 const pfn = g_aSseConvertMmI32XmmR32[iFn].pfnNative ? g_aSseConvertMmI32XmmR32[iFn].pfnNative : g_aSseConvertMmI32XmmR32[iFn].pfn;
8613
8614 PRTSTREAM pStrmOut = NULL;
8615 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertMmI32XmmR32[iFn].pszName);
8616 if (RT_FAILURE(rc))
8617 {
8618 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertMmI32XmmR32[iFn].pszName, rc);
8619 return RTEXITCODE_FAILURE;
8620 }
8621
8622 uint32_t cNormalInputPairs = 0;
8623 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8624 {
8625 SSE_CONVERT_MM_R32_TEST_T TestData; RT_ZERO(TestData);
8626
8627 TestData.ar32InVal[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8628 TestData.ar32InVal[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8629
8630 if ( RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[0])
8631 && RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[1]))
8632 cNormalInputPairs++;
8633 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8634 {
8635 iTest -= 1;
8636 continue;
8637 }
8638
8639 RTFLOAT64U TestVal;
8640 TestVal.au32[0] = TestData.ar32InVal[0].u;
8641 TestVal.au32[1] = TestData.ar32InVal[1].u;
8642
8643 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8644 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8645 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8646 for (uint8_t iFz = 0; iFz < 2; iFz++)
8647 {
8648 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8649 | (iRounding << X86_MXCSR_RC_SHIFT)
8650 | (iDaz ? X86_MXCSR_DAZ : 0)
8651 | (iFz ? X86_MXCSR_FZ : 0)
8652 | X86_MXCSR_XCPT_MASK;
8653 uint32_t fMxcsrM = fMxcsrIn;
8654 uint64_t u64ResM;
8655 pfn(&fMxcsrM, &u64ResM, TestVal.u);
8656 TestData.fMxcsrIn = fMxcsrIn;
8657 TestData.fMxcsrOut = fMxcsrM;
8658 TestData.OutVal.u = u64ResM;
8659 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8660
8661 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8662 uint32_t fMxcsrU = fMxcsrIn;
8663 uint64_t u64ResU;
8664 pfn(&fMxcsrU, &u64ResU, TestVal.u);
8665 TestData.fMxcsrIn = fMxcsrIn;
8666 TestData.fMxcsrOut = fMxcsrU;
8667 TestData.OutVal.u = u64ResU;
8668 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8669
8670 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8671 if (fXcpt)
8672 {
8673 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8674 uint32_t fMxcsr1 = fMxcsrIn;
8675 uint64_t u64Res1;
8676 pfn(&fMxcsr1, &u64Res1, TestVal.u);
8677 TestData.fMxcsrIn = fMxcsrIn;
8678 TestData.fMxcsrOut = fMxcsr1;
8679 TestData.OutVal.u = u64Res1;
8680 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8681
8682 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8683 {
8684 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8685 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8686 uint32_t fMxcsr2 = fMxcsrIn;
8687 uint64_t u64Res2;
8688 pfn(&fMxcsr2, &u64Res2, TestVal.u);
8689 TestData.fMxcsrIn = fMxcsrIn;
8690 TestData.fMxcsrOut = fMxcsr2;
8691 TestData.OutVal.u = u64Res2;
8692 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8693 }
8694 if (!RT_IS_POWER_OF_TWO(fXcpt))
8695 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8696 if (fUnmasked & fXcpt)
8697 {
8698 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8699 uint32_t fMxcsr3 = fMxcsrIn;
8700 uint64_t u64Res3;
8701 pfn(&fMxcsr3, &u64Res3, TestVal.u);
8702 TestData.fMxcsrIn = fMxcsrIn;
8703 TestData.fMxcsrOut = fMxcsr3;
8704 TestData.OutVal.u = u64Res3;
8705 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8706 }
8707 }
8708 }
8709 }
8710 rc = RTStrmClose(pStrmOut);
8711 if (RT_FAILURE(rc))
8712 {
8713 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertMmI32XmmR32[iFn].pszName, rc);
8714 return RTEXITCODE_FAILURE;
8715 }
8716 }
8717
8718 return RTEXITCODE_SUCCESS;
8719}
8720#endif
8721
8722static void SseConvertMmI32XmmR32Test(void)
8723{
8724 X86FXSTATE State;
8725 RT_ZERO(State);
8726
8727 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
8728 {
8729 if (!SubTestAndCheckIfEnabled(g_aSseConvertMmI32XmmR32[iFn].pszName))
8730 continue;
8731
8732 uint32_t const cTests = *g_aSseConvertMmI32XmmR32[iFn].pcTests;
8733 SSE_CONVERT_MM_R32_TEST_T const * const paTests = g_aSseConvertMmI32XmmR32[iFn].paTests;
8734 PFNIEMAIMPLMXCSRU64U64 pfn = g_aSseConvertMmI32XmmR32[iFn].pfn;
8735 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmI32XmmR32[iFn]);
8736 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8737 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8738 {
8739 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8740 {
8741 RTUINT64U ValOut;
8742 RTUINT64U ValIn;
8743
8744 ValIn.au32[0] = paTests[iTest].ar32InVal[0].u;
8745 ValIn.au32[1] = paTests[iTest].ar32InVal[1].u;
8746
8747 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8748 pfn(&fMxcsr, &ValOut.u, ValIn.u);
8749 if ( fMxcsr != paTests[iTest].fMxcsrOut
8750 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8751 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8752 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8753 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8754 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8755 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8756 FormatR32(&paTests[iTest].ar32InVal[0]), FormatR32(&paTests[iTest].ar32InVal[1]),
8757 iVar ? " " : "", fMxcsr,
8758 ValOut.ai32[0], ValOut.ai32[1],
8759 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8760 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8761 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8762 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8763 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8764 ? " - val" : "",
8765 FormatMxcsr(paTests[iTest].fMxcsrIn));
8766 }
8767 }
8768 }
8769}
8770
8771
8772/*
8773 * SSE 4.2 pcmpxstrx instructions.
8774 */
8775TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRI_T, SSE_PCMPISTRI_TEST_T, PFNIEMAIMPLPCMPISTRIU128IMM8);
8776
8777static const SSE_PCMPISTRI_T g_aSsePcmpistri[] =
8778{
8779 ENTRY_BIN_SSE_OPT(pcmpistri_u128),
8780};
8781
8782#ifdef TSTIEMAIMPL_WITH_GENERATOR
8783static RTEXITCODE SseComparePcmpistriGenerate(const char *pszDataFileFmt, uint32_t cTests)
8784{
8785 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8786
8787 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
8788 {
8789 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
8790 /** @todo More specials. */
8791 };
8792
8793 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
8794 {
8795 PFNIEMAIMPLPCMPISTRIU128IMM8 const pfn = g_aSsePcmpistri[iFn].pfnNative ? g_aSsePcmpistri[iFn].pfnNative : g_aSsePcmpistri[iFn].pfn;
8796
8797 PRTSTREAM pStrmOut = NULL;
8798 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpistri[iFn].pszName);
8799 if (RT_FAILURE(rc))
8800 {
8801 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpistri[iFn].pszName, rc);
8802 return RTEXITCODE_FAILURE;
8803 }
8804
8805 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8806 {
8807 SSE_PCMPISTRI_TEST_T TestData; RT_ZERO(TestData);
8808
8809 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
8810 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
8811
8812 IEMPCMPISTRXSRC TestVal;
8813 TestVal.uSrc1 = TestData.InVal1.uXmm;
8814 TestVal.uSrc2 = TestData.InVal2.uXmm;
8815
8816 uint32_t const fEFlagsIn = RandEFlags();
8817 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
8818 {
8819 uint32_t fEFlagsOut = fEFlagsIn;
8820 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
8821 TestData.fEFlagsIn = fEFlagsIn;
8822 TestData.fEFlagsOut = fEFlagsOut;
8823 TestData.bImm = (uint8_t)u16Imm;
8824 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8825 }
8826
8827 /* Repeat the test with the input value being the same. */
8828 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
8829 TestVal.uSrc1 = TestData.InVal1.uXmm;
8830 TestVal.uSrc2 = TestData.InVal2.uXmm;
8831
8832 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
8833 {
8834 uint32_t fEFlagsOut = fEFlagsIn;
8835 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
8836 TestData.fEFlagsIn = fEFlagsIn;
8837 TestData.fEFlagsOut = fEFlagsOut;
8838 TestData.bImm = (uint8_t)u16Imm;
8839 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8840 }
8841 }
8842 rc = RTStrmClose(pStrmOut);
8843 if (RT_FAILURE(rc))
8844 {
8845 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpistri[iFn].pszName, rc);
8846 return RTEXITCODE_FAILURE;
8847 }
8848 }
8849
8850 return RTEXITCODE_SUCCESS;
8851}
8852#endif
8853
8854static void SseComparePcmpistriTest(void)
8855{
8856 X86FXSTATE State;
8857 RT_ZERO(State);
8858
8859 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
8860 {
8861 if (!SubTestAndCheckIfEnabled(g_aSsePcmpistri[iFn].pszName))
8862 continue;
8863
8864 uint32_t const cTests = *g_aSsePcmpistri[iFn].pcTests;
8865 SSE_PCMPISTRI_TEST_T const * const paTests = g_aSsePcmpistri[iFn].paTests;
8866 PFNIEMAIMPLPCMPISTRIU128IMM8 pfn = g_aSsePcmpistri[iFn].pfn;
8867 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistri[iFn]);
8868 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8869 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8870 {
8871 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8872 {
8873 IEMPCMPISTRXSRC TestVal;
8874 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
8875 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
8876
8877 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
8878 uint32_t u32EcxOut = 0;
8879 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
8880 if ( fEFlags != paTests[iTest].fEFlagsOut
8881 || u32EcxOut != paTests[iTest].u32EcxOut)
8882 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
8883 "%s -> efl=%#08x %RU32\n"
8884 "%s expected %#08x %RU32%s%s\n",
8885 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
8886 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
8887 iVar ? " " : "", fEFlags, u32EcxOut,
8888 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
8889 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
8890 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
8891 }
8892 }
8893 }
8894}
8895
8896
8897TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRM_T, SSE_PCMPISTRM_TEST_T, PFNIEMAIMPLPCMPISTRMU128IMM8);
8898
8899static const SSE_PCMPISTRM_T g_aSsePcmpistrm[] =
8900{
8901 ENTRY_BIN_SSE_OPT(pcmpistrm_u128),
8902};
8903
8904#ifdef TSTIEMAIMPL_WITH_GENERATOR
8905static RTEXITCODE SseComparePcmpistrmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8906{
8907 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8908
8909 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
8910 {
8911 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
8912 /** @todo More specials. */
8913 };
8914
8915 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
8916 {
8917 PFNIEMAIMPLPCMPISTRMU128IMM8 const pfn = g_aSsePcmpistrm[iFn].pfnNative ? g_aSsePcmpistrm[iFn].pfnNative : g_aSsePcmpistrm[iFn].pfn;
8918
8919 PRTSTREAM pStrmOut = NULL;
8920 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpistrm[iFn].pszName);
8921 if (RT_FAILURE(rc))
8922 {
8923 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpistrm[iFn].pszName, rc);
8924 return RTEXITCODE_FAILURE;
8925 }
8926
8927 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8928 {
8929 SSE_PCMPISTRM_TEST_T TestData; RT_ZERO(TestData);
8930
8931 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
8932 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
8933
8934 IEMPCMPISTRXSRC TestVal;
8935 TestVal.uSrc1 = TestData.InVal1.uXmm;
8936 TestVal.uSrc2 = TestData.InVal2.uXmm;
8937
8938 uint32_t const fEFlagsIn = RandEFlags();
8939 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
8940 {
8941 uint32_t fEFlagsOut = fEFlagsIn;
8942 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
8943 TestData.fEFlagsIn = fEFlagsIn;
8944 TestData.fEFlagsOut = fEFlagsOut;
8945 TestData.bImm = (uint8_t)u16Imm;
8946 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8947 }
8948
8949 /* Repeat the test with the input value being the same. */
8950 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
8951 TestVal.uSrc1 = TestData.InVal1.uXmm;
8952 TestVal.uSrc2 = TestData.InVal2.uXmm;
8953
8954 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
8955 {
8956 uint32_t fEFlagsOut = fEFlagsIn;
8957 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
8958 TestData.fEFlagsIn = fEFlagsIn;
8959 TestData.fEFlagsOut = fEFlagsOut;
8960 TestData.bImm = (uint8_t)u16Imm;
8961 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8962 }
8963 }
8964 rc = RTStrmClose(pStrmOut);
8965 if (RT_FAILURE(rc))
8966 {
8967 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpistrm[iFn].pszName, rc);
8968 return RTEXITCODE_FAILURE;
8969 }
8970 }
8971
8972 return RTEXITCODE_SUCCESS;
8973}
8974#endif
8975
8976static void SseComparePcmpistrmTest(void)
8977{
8978 X86FXSTATE State;
8979 RT_ZERO(State);
8980
8981 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
8982 {
8983 if (!SubTestAndCheckIfEnabled(g_aSsePcmpistrm[iFn].pszName))
8984 continue;
8985
8986 uint32_t const cTests = *g_aSsePcmpistrm[iFn].pcTests;
8987 SSE_PCMPISTRM_TEST_T const * const paTests = g_aSsePcmpistrm[iFn].paTests;
8988 PFNIEMAIMPLPCMPISTRMU128IMM8 pfn = g_aSsePcmpistrm[iFn].pfn;
8989 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistrm[iFn]);
8990 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8991 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8992 {
8993 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8994 {
8995 IEMPCMPISTRXSRC TestVal;
8996 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
8997 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
8998
8999 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9000 RTUINT128U OutVal;
9001 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9002 if ( fEFlags != paTests[iTest].fEFlagsOut
9003 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9004 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9005 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9006 "%s -> efl=%#08x %s\n"
9007 "%s expected %#08x %s%s%s\n",
9008 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9009 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9010 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9011 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9012 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9013 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9014 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9015 }
9016 }
9017 }
9018}
9019
9020
9021TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRI_T, SSE_PCMPESTRI_TEST_T, PFNIEMAIMPLPCMPESTRIU128IMM8);
9022
9023static const SSE_PCMPESTRI_T g_aSsePcmpestri[] =
9024{
9025 ENTRY_BIN_SSE_OPT(pcmpestri_u128),
9026};
9027
9028#ifdef TSTIEMAIMPL_WITH_GENERATOR
9029static RTEXITCODE SseComparePcmpestriGenerate(const char *pszDataFileFmt, uint32_t cTests)
9030{
9031 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9032
9033 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9034 {
9035 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9036 /** @todo More specials. */
9037 };
9038
9039 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9040 {
9041 PFNIEMAIMPLPCMPESTRIU128IMM8 const pfn = g_aSsePcmpestri[iFn].pfnNative ? g_aSsePcmpestri[iFn].pfnNative : g_aSsePcmpestri[iFn].pfn;
9042
9043 PRTSTREAM pStrmOut = NULL;
9044 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpestri[iFn].pszName);
9045 if (RT_FAILURE(rc))
9046 {
9047 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpestri[iFn].pszName, rc);
9048 return RTEXITCODE_FAILURE;
9049 }
9050
9051 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9052 {
9053 SSE_PCMPESTRI_TEST_T TestData; RT_ZERO(TestData);
9054
9055 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9056 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9057
9058 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9059 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9060 {
9061 TestData.u64Rax = (uint64_t)i64Rax;
9062 TestData.u64Rdx = (uint64_t)i64Rdx;
9063
9064 IEMPCMPESTRXSRC TestVal;
9065 TestVal.uSrc1 = TestData.InVal1.uXmm;
9066 TestVal.uSrc2 = TestData.InVal2.uXmm;
9067 TestVal.u64Rax = TestData.u64Rax;
9068 TestVal.u64Rdx = TestData.u64Rdx;
9069
9070 uint32_t const fEFlagsIn = RandEFlags();
9071 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9072 {
9073 uint32_t fEFlagsOut = fEFlagsIn;
9074 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9075 TestData.fEFlagsIn = fEFlagsIn;
9076 TestData.fEFlagsOut = fEFlagsOut;
9077 TestData.bImm = (uint8_t)u16Imm;
9078 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9079 }
9080
9081 /* Repeat the test with the input value being the same. */
9082 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9083 TestVal.uSrc1 = TestData.InVal1.uXmm;
9084 TestVal.uSrc2 = TestData.InVal2.uXmm;
9085
9086 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9087 {
9088 uint32_t fEFlagsOut = fEFlagsIn;
9089 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9090 TestData.fEFlagsIn = fEFlagsIn;
9091 TestData.fEFlagsOut = fEFlagsOut;
9092 TestData.bImm = (uint8_t)u16Imm;
9093 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9094 }
9095 }
9096 }
9097 rc = RTStrmClose(pStrmOut);
9098 if (RT_FAILURE(rc))
9099 {
9100 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpestri[iFn].pszName, rc);
9101 return RTEXITCODE_FAILURE;
9102 }
9103 }
9104
9105 return RTEXITCODE_SUCCESS;
9106}
9107#endif
9108
9109static void SseComparePcmpestriTest(void)
9110{
9111 X86FXSTATE State;
9112 RT_ZERO(State);
9113
9114 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9115 {
9116 if (!SubTestAndCheckIfEnabled(g_aSsePcmpestri[iFn].pszName))
9117 continue;
9118
9119 uint32_t const cTests = *g_aSsePcmpestri[iFn].pcTests;
9120 SSE_PCMPESTRI_TEST_T const * const paTests = g_aSsePcmpestri[iFn].paTests;
9121 PFNIEMAIMPLPCMPESTRIU128IMM8 pfn = g_aSsePcmpestri[iFn].pfn;
9122 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestri[iFn]);
9123 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9124 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9125 {
9126 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9127 {
9128 IEMPCMPESTRXSRC TestVal;
9129 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9130 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9131 TestVal.u64Rax = paTests[iTest].u64Rax;
9132 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9133
9134 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9135 uint32_t u32EcxOut = 0;
9136 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9137 if ( fEFlags != paTests[iTest].fEFlagsOut
9138 || u32EcxOut != paTests[iTest].u32EcxOut)
9139 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9140 "%s -> efl=%#08x %RU32\n"
9141 "%s expected %#08x %RU32%s%s\n",
9142 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9143 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9144 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9145 paTests[iTest].bImm,
9146 iVar ? " " : "", fEFlags, u32EcxOut,
9147 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9148 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9149 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9150 }
9151 }
9152 }
9153}
9154
9155
9156TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRM_T, SSE_PCMPESTRM_TEST_T, PFNIEMAIMPLPCMPESTRMU128IMM8);
9157
9158static const SSE_PCMPESTRM_T g_aSsePcmpestrm[] =
9159{
9160 ENTRY_BIN_SSE_OPT(pcmpestrm_u128),
9161};
9162
9163#ifdef TSTIEMAIMPL_WITH_GENERATOR
9164static RTEXITCODE SseComparePcmpestrmGenerate(const char *pszDataFileFmt, uint32_t cTests)
9165{
9166 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9167
9168 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9169 {
9170 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9171 /** @todo More specials. */
9172 };
9173
9174 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9175 {
9176 PFNIEMAIMPLPCMPESTRMU128IMM8 const pfn = g_aSsePcmpestrm[iFn].pfnNative ? g_aSsePcmpestrm[iFn].pfnNative : g_aSsePcmpestrm[iFn].pfn;
9177
9178 PRTSTREAM pStrmOut = NULL;
9179 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpestrm[iFn].pszName);
9180 if (RT_FAILURE(rc))
9181 {
9182 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpestrm[iFn].pszName, rc);
9183 return RTEXITCODE_FAILURE;
9184 }
9185
9186 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9187 {
9188 SSE_PCMPESTRM_TEST_T TestData; RT_ZERO(TestData);
9189
9190 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9191 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9192
9193 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9194 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9195 {
9196 TestData.u64Rax = (uint64_t)i64Rax;
9197 TestData.u64Rdx = (uint64_t)i64Rdx;
9198
9199 IEMPCMPESTRXSRC TestVal;
9200 TestVal.uSrc1 = TestData.InVal1.uXmm;
9201 TestVal.uSrc2 = TestData.InVal2.uXmm;
9202 TestVal.u64Rax = TestData.u64Rax;
9203 TestVal.u64Rdx = TestData.u64Rdx;
9204
9205 uint32_t const fEFlagsIn = RandEFlags();
9206 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9207 {
9208 uint32_t fEFlagsOut = fEFlagsIn;
9209 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9210 TestData.fEFlagsIn = fEFlagsIn;
9211 TestData.fEFlagsOut = fEFlagsOut;
9212 TestData.bImm = (uint8_t)u16Imm;
9213 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9214 }
9215
9216 /* Repeat the test with the input value being the same. */
9217 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9218 TestVal.uSrc1 = TestData.InVal1.uXmm;
9219 TestVal.uSrc2 = TestData.InVal2.uXmm;
9220
9221 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9222 {
9223 uint32_t fEFlagsOut = fEFlagsIn;
9224 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9225 TestData.fEFlagsIn = fEFlagsIn;
9226 TestData.fEFlagsOut = fEFlagsOut;
9227 TestData.bImm = (uint8_t)u16Imm;
9228 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9229 }
9230 }
9231 }
9232 rc = RTStrmClose(pStrmOut);
9233 if (RT_FAILURE(rc))
9234 {
9235 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpestrm[iFn].pszName, rc);
9236 return RTEXITCODE_FAILURE;
9237 }
9238 }
9239
9240 return RTEXITCODE_SUCCESS;
9241}
9242#endif
9243
9244static void SseComparePcmpestrmTest(void)
9245{
9246 X86FXSTATE State;
9247 RT_ZERO(State);
9248
9249 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9250 {
9251 if (!SubTestAndCheckIfEnabled(g_aSsePcmpestrm[iFn].pszName))
9252 continue;
9253
9254 uint32_t const cTests = *g_aSsePcmpestrm[iFn].pcTests;
9255 SSE_PCMPESTRM_TEST_T const * const paTests = g_aSsePcmpestrm[iFn].paTests;
9256 PFNIEMAIMPLPCMPESTRMU128IMM8 pfn = g_aSsePcmpestrm[iFn].pfn;
9257 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestrm[iFn]);
9258 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9259 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9260 {
9261 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9262 {
9263 IEMPCMPESTRXSRC TestVal;
9264 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9265 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9266 TestVal.u64Rax = paTests[iTest].u64Rax;
9267 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9268
9269 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9270 RTUINT128U OutVal;
9271 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9272 if ( fEFlags != paTests[iTest].fEFlagsOut
9273 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9274 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9275 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9276 "%s -> efl=%#08x %s\n"
9277 "%s expected %#08x %s%s%s\n",
9278 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9279 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9280 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9281 paTests[iTest].bImm,
9282 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9283 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9284 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9285 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9286 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9287 }
9288 }
9289 }
9290}
9291
9292
9293
9294int main(int argc, char **argv)
9295{
9296 int rc = RTR3InitExe(argc, &argv, 0);
9297 if (RT_FAILURE(rc))
9298 return RTMsgInitFailure(rc);
9299
9300 /*
9301 * Determin the host CPU.
9302 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
9303 */
9304#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
9305 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
9306 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
9307 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9308#else
9309 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9310#endif
9311
9312 /*
9313 * Parse arguments.
9314 */
9315 enum { kModeNotSet, kModeTest, kModeGenerate }
9316 enmMode = kModeNotSet;
9317 bool fInt = true;
9318 bool fFpuLdSt = true;
9319 bool fFpuBinary1 = true;
9320 bool fFpuBinary2 = true;
9321 bool fFpuOther = true;
9322 bool fCpuData = true;
9323 bool fCommonData = true;
9324 bool fSseFpBinary = true;
9325 bool fSseFpOther = true;
9326 bool fSsePcmpxstrx = true;
9327 uint32_t const cDefaultTests = 96;
9328 uint32_t cTests = cDefaultTests;
9329 RTGETOPTDEF const s_aOptions[] =
9330 {
9331 // mode:
9332 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
9333 { "--test", 't', RTGETOPT_REQ_NOTHING },
9334 // test selection (both)
9335 { "--all", 'a', RTGETOPT_REQ_NOTHING },
9336 { "--none", 'z', RTGETOPT_REQ_NOTHING },
9337 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
9338 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
9339 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
9340 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
9341 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
9342 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
9343 { "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
9344 { "--sse-fp-other", 'T', RTGETOPT_REQ_NOTHING },
9345 { "--sse-pcmpxstrx", 'C', RTGETOPT_REQ_NOTHING },
9346 { "--int", 'i', RTGETOPT_REQ_NOTHING },
9347 { "--include", 'I', RTGETOPT_REQ_STRING },
9348 { "--exclude", 'X', RTGETOPT_REQ_STRING },
9349 // generation parameters
9350 { "--common", 'm', RTGETOPT_REQ_NOTHING },
9351 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
9352 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
9353 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
9354 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
9355 };
9356
9357 RTGETOPTSTATE State;
9358 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
9359 AssertRCReturn(rc, RTEXITCODE_FAILURE);
9360
9361 RTGETOPTUNION ValueUnion;
9362 while ((rc = RTGetOpt(&State, &ValueUnion)))
9363 {
9364 switch (rc)
9365 {
9366 case 'g':
9367 enmMode = kModeGenerate;
9368 break;
9369 case 't':
9370 enmMode = kModeTest;
9371 break;
9372
9373 case 'a':
9374 fCpuData = true;
9375 fCommonData = true;
9376 fInt = true;
9377 fFpuLdSt = true;
9378 fFpuBinary1 = true;
9379 fFpuBinary2 = true;
9380 fFpuOther = true;
9381 fSseFpBinary = true;
9382 fSseFpOther = true;
9383 fSsePcmpxstrx = true;
9384 break;
9385 case 'z':
9386 fCpuData = false;
9387 fCommonData = false;
9388 fInt = false;
9389 fFpuLdSt = false;
9390 fFpuBinary1 = false;
9391 fFpuBinary2 = false;
9392 fFpuOther = false;
9393 fSseFpBinary = false;
9394 fSseFpOther = false;
9395 fSsePcmpxstrx = false;
9396 break;
9397
9398 case 'F':
9399 fFpuLdSt = true;
9400 break;
9401 case 'O':
9402 fFpuOther = true;
9403 break;
9404 case 'B':
9405 fFpuBinary1 = true;
9406 break;
9407 case 'P':
9408 fFpuBinary2 = true;
9409 break;
9410 case 'S':
9411 fSseFpBinary = true;
9412 break;
9413 case 'T':
9414 fSseFpOther = true;
9415 break;
9416 case 'C':
9417 fSsePcmpxstrx = true;
9418 break;
9419 case 'i':
9420 fInt = true;
9421 break;
9422
9423 case 'I':
9424 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
9425 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
9426 RT_ELEMENTS(g_apszIncludeTestPatterns));
9427 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
9428 break;
9429 case 'X':
9430 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
9431 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
9432 RT_ELEMENTS(g_apszExcludeTestPatterns));
9433 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
9434 break;
9435
9436 case 'm':
9437 fCommonData = true;
9438 break;
9439 case 'c':
9440 fCpuData = true;
9441 break;
9442 case 'n':
9443 cTests = ValueUnion.u32;
9444 break;
9445
9446 case 'q':
9447 g_cVerbosity = 0;
9448 break;
9449 case 'v':
9450 g_cVerbosity++;
9451 break;
9452
9453 case 'h':
9454 RTPrintf("usage: %s <-g|-t> [options]\n"
9455 "\n"
9456 "Mode:\n"
9457 " -g, --generate\n"
9458 " Generate test data.\n"
9459 " -t, --test\n"
9460 " Execute tests.\n"
9461 "\n"
9462 "Test selection (both modes):\n"
9463 " -a, --all\n"
9464 " Enable all tests and generated test data. (default)\n"
9465 " -z, --zap, --none\n"
9466 " Disable all tests and test data types.\n"
9467 " -i, --int\n"
9468 " Enable non-FPU tests.\n"
9469 " -F, --fpu-ld-st\n"
9470 " Enable FPU load and store tests.\n"
9471 " -B, --fpu-binary-1\n"
9472 " Enable FPU binary 80-bit FP tests.\n"
9473 " -P, --fpu-binary-2\n"
9474 " Enable FPU binary 64- and 32-bit FP tests.\n"
9475 " -O, --fpu-other\n"
9476 " Enable FPU binary 64- and 32-bit FP tests.\n"
9477 " -S, --sse-fp-binary\n"
9478 " Enable SSE binary 64- and 32-bit FP tests.\n"
9479 " -T, --sse-fp-other\n"
9480 " Enable misc SSE 64- and 32-bit FP tests.\n"
9481 " -C, --sse-pcmpxstrx\n"
9482 " Enable SSE pcmpxstrx tests.\n"
9483 " -I,--include=<test-patter>\n"
9484 " Enable tests matching the given pattern.\n"
9485 " -X,--exclude=<test-patter>\n"
9486 " Skip tests matching the given pattern (overrides --include).\n"
9487 "\n"
9488 "Generation:\n"
9489 " -m, --common\n"
9490 " Enable generating common test data.\n"
9491 " -c, --only-cpu\n"
9492 " Enable generating CPU specific test data.\n"
9493 " -n, --number-of-test <count>\n"
9494 " Number of tests to generate. Default: %u\n"
9495 "\n"
9496 "Other:\n"
9497 " -v, --verbose\n"
9498 " -q, --quiet\n"
9499 " Noise level. Default: --quiet\n"
9500 , argv[0], cDefaultTests);
9501 return RTEXITCODE_SUCCESS;
9502 default:
9503 return RTGetOptPrintError(rc, &ValueUnion);
9504 }
9505 }
9506
9507 /*
9508 * Generate data?
9509 */
9510 if (enmMode == kModeGenerate)
9511 {
9512#ifdef TSTIEMAIMPL_WITH_GENERATOR
9513 char szCpuDesc[256] = {0};
9514 RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
9515 const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
9516# if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
9517 const char * const pszBitBucket = "NUL";
9518# else
9519 const char * const pszBitBucket = "/dev/null";
9520# endif
9521
9522 if (cTests == 0)
9523 cTests = cDefaultTests;
9524 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
9525 g_cZeroSrcTests = g_cZeroDstTests * 2;
9526
9527 if (fInt)
9528 {
9529 const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
9530 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9531 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9532 ? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
9533 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9534 if (!pStrmData || !pStrmDataCpu)
9535 return RTEXITCODE_FAILURE;
9536
9537 BinU8Generate( pStrmData, pStrmDataCpu, cTests);
9538 BinU16Generate(pStrmData, pStrmDataCpu, cTests);
9539 BinU32Generate(pStrmData, pStrmDataCpu, cTests);
9540 BinU64Generate(pStrmData, pStrmDataCpu, cTests);
9541 ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
9542 UnaryGenerate(pStrmData, cTests);
9543 ShiftGenerate(pStrmDataCpu, cTests);
9544 MulDivGenerate(pStrmDataCpu, cTests);
9545
9546 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9547 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9548 if (rcExit != RTEXITCODE_SUCCESS)
9549 return rcExit;
9550 }
9551
9552 if (fFpuLdSt)
9553 {
9554 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
9555 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9556 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9557 ? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
9558 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9559 if (!pStrmData || !pStrmDataCpu)
9560 return RTEXITCODE_FAILURE;
9561
9562 FpuLdConstGenerate(pStrmData, cTests);
9563 FpuLdIntGenerate(pStrmData, cTests);
9564 FpuLdD80Generate(pStrmData, cTests);
9565 FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
9566 FpuStD80Generate(pStrmData, cTests);
9567 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
9568 FpuLdMemGenerate(pStrmData, cTests2);
9569 FpuStMemGenerate(pStrmData, cTests2);
9570
9571 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9572 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9573 if (rcExit != RTEXITCODE_SUCCESS)
9574 return rcExit;
9575 }
9576
9577 if (fFpuBinary1)
9578 {
9579 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
9580 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9581 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9582 ? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
9583 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9584 if (!pStrmData || !pStrmDataCpu)
9585 return RTEXITCODE_FAILURE;
9586
9587 FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
9588 FpuBinaryFswR80Generate(pStrmData, cTests);
9589 FpuBinaryEflR80Generate(pStrmData, cTests);
9590
9591 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9592 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9593 if (rcExit != RTEXITCODE_SUCCESS)
9594 return rcExit;
9595 }
9596
9597 if (fFpuBinary2)
9598 {
9599 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
9600 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9601 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9602 ? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
9603 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9604 if (!pStrmData || !pStrmDataCpu)
9605 return RTEXITCODE_FAILURE;
9606
9607 FpuBinaryR64Generate(pStrmData, cTests);
9608 FpuBinaryR32Generate(pStrmData, cTests);
9609 FpuBinaryI32Generate(pStrmData, cTests);
9610 FpuBinaryI16Generate(pStrmData, cTests);
9611 FpuBinaryFswR64Generate(pStrmData, cTests);
9612 FpuBinaryFswR32Generate(pStrmData, cTests);
9613 FpuBinaryFswI32Generate(pStrmData, cTests);
9614 FpuBinaryFswI16Generate(pStrmData, cTests);
9615
9616 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9617 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9618 if (rcExit != RTEXITCODE_SUCCESS)
9619 return rcExit;
9620 }
9621
9622 if (fFpuOther)
9623 {
9624 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
9625 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9626 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9627 ? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
9628 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9629 if (!pStrmData || !pStrmDataCpu)
9630 return RTEXITCODE_FAILURE;
9631
9632 FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
9633 FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
9634 FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
9635
9636 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9637 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9638 if (rcExit != RTEXITCODE_SUCCESS)
9639 return rcExit;
9640 }
9641
9642 if (fSseFpBinary)
9643 {
9644 const char *pszDataFileFmt = fCommonData ? "tstIEMAImplDataSseBinary-%s.bin" : pszBitBucket;
9645
9646 RTEXITCODE rcExit = SseBinaryR32Generate(pszDataFileFmt, cTests);
9647 if (rcExit == RTEXITCODE_SUCCESS)
9648 rcExit = SseBinaryR64Generate(pszDataFileFmt, cTests);
9649 if (rcExit == RTEXITCODE_SUCCESS)
9650 rcExit = SseBinaryU128R32Generate(pszDataFileFmt, cTests);
9651 if (rcExit == RTEXITCODE_SUCCESS)
9652 rcExit = SseBinaryU128R64Generate(pszDataFileFmt, cTests);
9653
9654 if (rcExit == RTEXITCODE_SUCCESS)
9655 rcExit = SseBinaryI32R64Generate(pszDataFileFmt, cTests);
9656 if (rcExit == RTEXITCODE_SUCCESS)
9657 rcExit = SseBinaryI64R64Generate(pszDataFileFmt, cTests);
9658 if (rcExit == RTEXITCODE_SUCCESS)
9659 rcExit = SseBinaryI32R32Generate(pszDataFileFmt, cTests);
9660 if (rcExit == RTEXITCODE_SUCCESS)
9661 rcExit = SseBinaryI64R32Generate(pszDataFileFmt, cTests);
9662
9663 if (rcExit == RTEXITCODE_SUCCESS)
9664 rcExit = SseBinaryR64I32Generate(pszDataFileFmt, cTests);
9665 if (rcExit == RTEXITCODE_SUCCESS)
9666 rcExit = SseBinaryR64I64Generate(pszDataFileFmt, cTests);
9667 if (rcExit == RTEXITCODE_SUCCESS)
9668 rcExit = SseBinaryR32I32Generate(pszDataFileFmt, cTests);
9669 if (rcExit == RTEXITCODE_SUCCESS)
9670 rcExit = SseBinaryR32I64Generate(pszDataFileFmt, cTests);
9671 if (rcExit != RTEXITCODE_SUCCESS)
9672 return rcExit;
9673 }
9674
9675 if (fSseFpOther)
9676 {
9677 const char *pszDataFileFmtCmp = fCommonData ? "tstIEMAImplDataSseCompare-%s.bin" : pszBitBucket;
9678 const char *pszDataFileFmtConv = fCommonData ? "tstIEMAImplDataSseConvert-%s.bin" : pszBitBucket;
9679
9680 RTEXITCODE rcExit = SseCompareEflR32R32Generate(pszDataFileFmtCmp, cTests);
9681 if (rcExit == RTEXITCODE_SUCCESS)
9682 rcExit = SseCompareEflR64R64Generate(pszDataFileFmtCmp, cTests);
9683 if (rcExit == RTEXITCODE_SUCCESS)
9684 rcExit = SseCompareF2XmmR32Imm8Generate(pszDataFileFmtCmp, cTests);
9685 if (rcExit == RTEXITCODE_SUCCESS)
9686 rcExit = SseCompareF2XmmR64Imm8Generate(pszDataFileFmtCmp, cTests);
9687 if (rcExit == RTEXITCODE_SUCCESS)
9688 rcExit = SseConvertXmmI32R32Generate(pszDataFileFmtConv, cTests);
9689 if (rcExit == RTEXITCODE_SUCCESS)
9690 rcExit = SseConvertXmmR32I32Generate(pszDataFileFmtConv, cTests);
9691 if (rcExit == RTEXITCODE_SUCCESS)
9692 rcExit = SseConvertXmmI32R64Generate(pszDataFileFmtConv, cTests);
9693 if (rcExit == RTEXITCODE_SUCCESS)
9694 rcExit = SseConvertXmmR64I32Generate(pszDataFileFmtConv, cTests);
9695 if (rcExit == RTEXITCODE_SUCCESS)
9696 rcExit = SseConvertMmXmmGenerate(pszDataFileFmtConv, cTests);
9697 if (rcExit == RTEXITCODE_SUCCESS)
9698 rcExit = SseConvertXmmR32MmGenerate(pszDataFileFmtConv, cTests);
9699 if (rcExit == RTEXITCODE_SUCCESS)
9700 rcExit = SseConvertXmmR64MmGenerate(pszDataFileFmtConv, cTests);
9701 if (rcExit == RTEXITCODE_SUCCESS)
9702 rcExit = SseConvertMmI32XmmR32Generate(pszDataFileFmtConv, cTests);
9703 if (rcExit != RTEXITCODE_SUCCESS)
9704 return rcExit;
9705 }
9706
9707 if (fSsePcmpxstrx)
9708 {
9709 const char *pszDataFileFmtCmp = fCommonData ? "tstIEMAImplDataSsePcmpxstrx-%s.bin" : pszBitBucket;
9710
9711 RTEXITCODE rcExit = SseComparePcmpistriGenerate(pszDataFileFmtCmp, cTests);
9712 if (rcExit == RTEXITCODE_SUCCESS)
9713 rcExit = SseComparePcmpistrmGenerate(pszDataFileFmtCmp, cTests);
9714 if (rcExit == RTEXITCODE_SUCCESS)
9715 rcExit = SseComparePcmpestriGenerate(pszDataFileFmtCmp, cTests);
9716 if (rcExit == RTEXITCODE_SUCCESS)
9717 rcExit = SseComparePcmpestrmGenerate(pszDataFileFmtCmp, cTests);
9718 if (rcExit != RTEXITCODE_SUCCESS)
9719 return rcExit;
9720 }
9721
9722 return RTEXITCODE_SUCCESS;
9723#else
9724 return RTMsgErrorExitFailure("Test data generator not compiled in!");
9725#endif
9726 }
9727
9728 /*
9729 * Do testing. Currrently disabled by default as data needs to be checked
9730 * on both intel and AMD systems first.
9731 */
9732 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
9733 AssertRCReturn(rc, RTEXITCODE_FAILURE);
9734 if (enmMode == kModeTest)
9735 {
9736 RTTestBanner(g_hTest);
9737
9738 /* Allocate guarded memory for use in the tests. */
9739#define ALLOC_GUARDED_VAR(a_puVar) do { \
9740 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
9741 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
9742 } while (0)
9743 ALLOC_GUARDED_VAR(g_pu8);
9744 ALLOC_GUARDED_VAR(g_pu16);
9745 ALLOC_GUARDED_VAR(g_pu32);
9746 ALLOC_GUARDED_VAR(g_pu64);
9747 ALLOC_GUARDED_VAR(g_pu128);
9748 ALLOC_GUARDED_VAR(g_pu8Two);
9749 ALLOC_GUARDED_VAR(g_pu16Two);
9750 ALLOC_GUARDED_VAR(g_pu32Two);
9751 ALLOC_GUARDED_VAR(g_pu64Two);
9752 ALLOC_GUARDED_VAR(g_pu128Two);
9753 ALLOC_GUARDED_VAR(g_pfEfl);
9754 if (RTTestErrorCount(g_hTest) == 0)
9755 {
9756 if (fInt)
9757 {
9758 BinU8Test();
9759 BinU16Test();
9760 BinU32Test();
9761 BinU64Test();
9762 XchgTest();
9763 XaddTest();
9764 CmpXchgTest();
9765 CmpXchg8bTest();
9766 CmpXchg16bTest();
9767 ShiftDblTest();
9768 UnaryTest();
9769 ShiftTest();
9770 MulDivTest();
9771 BswapTest();
9772 }
9773
9774 if (fFpuLdSt)
9775 {
9776 FpuLoadConstTest();
9777 FpuLdMemTest();
9778 FpuLdIntTest();
9779 FpuLdD80Test();
9780 FpuStMemTest();
9781 FpuStIntTest();
9782 FpuStD80Test();
9783 }
9784
9785 if (fFpuBinary1)
9786 {
9787 FpuBinaryR80Test();
9788 FpuBinaryFswR80Test();
9789 FpuBinaryEflR80Test();
9790 }
9791
9792 if (fFpuBinary2)
9793 {
9794 FpuBinaryR64Test();
9795 FpuBinaryR32Test();
9796 FpuBinaryI32Test();
9797 FpuBinaryI16Test();
9798 FpuBinaryFswR64Test();
9799 FpuBinaryFswR32Test();
9800 FpuBinaryFswI32Test();
9801 FpuBinaryFswI16Test();
9802 }
9803
9804 if (fFpuOther)
9805 {
9806 FpuUnaryR80Test();
9807 FpuUnaryFswR80Test();
9808 FpuUnaryTwoR80Test();
9809 }
9810
9811 if (fSseFpBinary)
9812 {
9813 SseBinaryR32Test();
9814 SseBinaryR64Test();
9815 SseBinaryU128R32Test();
9816 SseBinaryU128R64Test();
9817
9818 SseBinaryI32R64Test();
9819 SseBinaryI64R64Test();
9820 SseBinaryI32R32Test();
9821 SseBinaryI64R32Test();
9822
9823 SseBinaryR64I32Test();
9824 SseBinaryR64I64Test();
9825 SseBinaryR32I32Test();
9826 SseBinaryR32I64Test();
9827 }
9828
9829 if (fSseFpOther)
9830 {
9831 SseCompareEflR32R32Test();
9832 SseCompareEflR64R64Test();
9833 SseCompareEflR64R64Test();
9834 SseCompareF2XmmR32Imm8Test();
9835 SseCompareF2XmmR64Imm8Test();
9836 SseConvertXmmI32R32Test();
9837 SseConvertXmmR32I32Test();
9838 SseConvertXmmI32R64Test();
9839 SseConvertXmmR64I32Test();
9840 SseConvertMmXmmTest();
9841 SseConvertXmmR32MmTest();
9842 SseConvertXmmR64MmTest();
9843 SseConvertMmI32XmmR32Test();
9844 }
9845
9846 if (fSsePcmpxstrx)
9847 {
9848 SseComparePcmpistriTest();
9849 SseComparePcmpistrmTest();
9850 SseComparePcmpestriTest();
9851 SseComparePcmpestrmTest();
9852 }
9853 }
9854 return RTTestSummaryAndDestroy(g_hTest);
9855 }
9856 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
9857}
9858
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette