tstIEMAImpl.cpp@ 104208

Last change on this file since 104208 was 104208, checked in by vboxsync, 12 months ago
VMM/IEM: Refactoring assembly helpers to not pass eflags by reference but instead by value and return the updated value (via eax/w0) - fourth chunk: ARPL, ADCX, ADOX. bugref:10376
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 489.2 KB

Line
1	/* $Id: tstIEMAImpl.cpp 104208 2024-04-05 21:17:41Z vboxsync $ */
2	/** @file
3	* IEM Assembly Instruction Helper Testcase.
4	*/
5
6	/*
7	* Copyright (C) 2022-2024 Oracle and/or its affiliates.
8	*
9	* This file is part of VirtualBox base platform packages, as
10	* available from https://www.virtualbox.org.
11	*
12	* This program is free software; you can redistribute it and/or
13	* modify it under the terms of the GNU General Public License
14	* as published by the Free Software Foundation, in version 3 of the
15	* License.
16	*
17	* This program is distributed in the hope that it will be useful, but
18	* WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	* General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, see <https://www.gnu.org/licenses>.
24	*
25	* SPDX-License-Identifier: GPL-3.0-only
26	*/
27
28
29	/*********************************************************************************************************************************
30	* Header Files *
31	*********************************************************************************************************************************/
32	#include "../include/IEMInternal.h"
33
34	#include <iprt/errcore.h>
35	#include <VBox/log.h>
36	#include <iprt/assert.h>
37	#include <iprt/buildconfig.h>
38	#include <iprt/ctype.h>
39	#include <iprt/err.h>
40	#include <iprt/getopt.h>
41	#include <iprt/initterm.h>
42	#include <iprt/file.h>
43	#include <iprt/mem.h>
44	#include <iprt/message.h>
45	#include <iprt/mp.h>
46	#include <iprt/rand.h>
47	#include <iprt/stream.h>
48	#include <iprt/string.h>
49	#include <iprt/test.h>
50	#include <iprt/time.h>
51	#include <iprt/thread.h>
52	#include <iprt/vfs.h>
53	#include <iprt/zip.h>
54	#include <VBox/version.h>
55
56	#include "tstIEMAImpl.h"
57
58
59	/*********************************************************************************************************************************
60	* Defined Constants And Macros *
61	*********************************************************************************************************************************/
62	#define ENTRY_BIN_FIX(a_Name) ENTRY_BIN_FIX_EX(a_Name, 0)
63	#ifdef TSTIEMAIMPL_WITH_GENERATOR
64	# define ENTRY_BIN_FIX_EX(a_Name, a_uExtra) \
65	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
66	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
67	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, \
68	RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
69	#else
70	# define ENTRY_BIN_FIX_EX(a_Name, a_uExtra) ENTRY_BIN_EX(a_Name, a_uExtra)
71	#endif
72
73	#define ENTRY_BIN_PFN_CAST(a_Name, a_pfnType) ENTRY_BIN_PFN_CAST_EX(a_Name, a_pfnType, 0)
74	#define ENTRY_BIN_PFN_CAST_EX(a_Name, a_pfnType, a_uExtra) \
75	{ RT_XSTR(a_Name), (a_pfnType)iemAImpl_ ## a_Name, NULL, \
76	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
77	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
78
79	#define ENTRY_BIN(a_Name) ENTRY_BIN_EX(a_Name, 0)
80	#define ENTRY_BIN_EX(a_Name, a_uExtra) \
81	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
82	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
83	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
84
85	#define ENTRY_BIN_AVX(a_Name) ENTRY_BIN_AVX_EX(a_Name, 0)
86	#ifndef IEM_WITHOUT_ASSEMBLY
87	# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
88	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
89	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
90	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
91	#else
92	# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
93	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
94	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
95	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
96	#endif
97
98	#define ENTRY_BIN_SSE_OPT(a_Name) ENTRY_BIN_SSE_OPT_EX(a_Name, 0)
99	#ifndef IEM_WITHOUT_ASSEMBLY
100	# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
101	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
102	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
103	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
104	#else
105	# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
106	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
107	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
108	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
109	#endif
110
111	#define ENTRY_BIN_INTEL(a_Name, a_fEflUndef) ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, 0)
112	#define ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
113	{ RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
114	g_abTests_ ## a_Name ## _intel, &g_cbTests_ ## a_Name ## _intel, \
115	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
116
117	#define ENTRY_BIN_AMD(a_Name, a_fEflUndef) ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, 0)
118	#define ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
119	{ RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
120	g_abTests_ ## a_Name ## _amd, &g_cbTests_ ## a_Name ## _amd, \
121	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
122
123	#define ENTRY_BIN_FIX_INTEL(a_Name, a_fEflUndef) ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, 0)
124	#ifdef TSTIEMAIMPL_WITH_GENERATOR
125	# define ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
126	{ RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
127	g_abTests_ ## a_Name ## _intel, &g_cbTests_ ## a_Name ## _intel, \
128	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL, \
129	RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
130	#else
131	# define ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, a_uExtra)
132	#endif
133
134	#define ENTRY_BIN_FIX_AMD(a_Name, a_fEflUndef) ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, 0)
135	#ifdef TSTIEMAIMPL_WITH_GENERATOR
136	# define ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
137	{ RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
138	g_abTests_ ## a_Name ## _amd, &g_cbTests_ ## a_Name ## _amd, \
139	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD, \
140	RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
141	#else
142	# define ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, a_uExtra) ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, a_uExtra)
143	#endif
144
145
146	#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
147	typedef struct a_TypeName \
148	{ \
149	const char *pszName; \
150	const a_FunctionPtrType pfn; \
151	const a_FunctionPtrType pfnNative; \
152	void const * const pvCompressedTests; \
153	uint32_t const *pcbCompressedTests; \
154	uint32_t const uExtra; \
155	uint8_t const idxCpuEflFlavour; \
156	uint16_t const cFixedTests; \
157	a_TestType const * const paFixedTests; \
158	a_TestType const paTests; /< The decompressed info. / \
159	uint32_t cTests; /*< The decompressed info. / \
160	IEMTESTENTRYINFO Info; \
161	} a_TypeName
162
163	#define COUNT_VARIATIONS(a_SubTest) \
164	(1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
165
166
167	/*********************************************************************************************************************************
168	* Structures and Typedefs *
169	*********************************************************************************************************************************/
170	typedef struct IEMBINARYHEADER
171	{
172	char szMagic[16];
173	uint32_t cbEntry;
174	uint32_t uSvnRev;
175	uint32_t auUnused[6];
176	char szCpuDesc[80];
177	} IEMBINARYHEADER;
178	AssertCompileSize(IEMBINARYHEADER, 128);
179
180	// 01234567890123456
181	#define IEMBINARYHEADER_MAGIC "IEMAImpl Bin v1"
182	AssertCompile(sizeof(IEMBINARYHEADER_MAGIC) == 16);
183
184
185	typedef struct IEMBINARYFOOTER
186	{
187	char szMagic[24];
188	uint32_t cbEntry;
189	uint32_t cEntries;
190	} IEMBINARYFOOTER;
191	AssertCompileSize(IEMBINARYFOOTER, 32);
192	// 012345678901234567890123
193	#define IEMBINARYFOOTER_MAGIC "\nIEMAImpl Bin Footer v1"
194	AssertCompile(sizeof(IEMBINARYFOOTER_MAGIC) == 24);
195
196
197	/** Fixed part of TYPEDEF_SUBTEST_TYPE and friends. */
198	typedef struct IEMTESTENTRYINFO
199	{
200	void *pvUncompressed;
201	uint32_t cbUncompressed;
202	const char *pszCpuDesc;
203	uint32_t uSvnRev;
204	} IEMTESTENTRYINFO;
205
206
207	#ifdef TSTIEMAIMPL_WITH_GENERATOR
208	typedef struct IEMBINARYOUTPUT
209	{
210	/** The output file. */
211	RTVFSFILE hVfsFile;
212	/** The stream we write uncompressed binary test data to. */
213	RTVFSIOSTREAM hVfsUncompressed;
214	/** The number of bytes written (ignoring write failures). */
215	size_t cbWritten;
216	/** The entry size. */
217	uint32_t cbEntry;
218	/** Write status. */
219	int rcWrite;
220	/** Set if NULL. */
221	bool fNull;
222	/** Set if we wrote a header and should write a footer as well. */
223	bool fWroteHeader;
224	/** Filename. */
225	char szFilename[94];
226	} IEMBINARYOUTPUT;
227	typedef IEMBINARYOUTPUT *PIEMBINARYOUTPUT;
228	#endif /* TSTIEMAIMPL_WITH_GENERATOR */
229
230
231	/*********************************************************************************************************************************
232	* Global Variables *
233	*********************************************************************************************************************************/
234	static RTTEST g_hTest;
235	static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
236	#ifdef TSTIEMAIMPL_WITH_GENERATOR
237	static uint32_t g_cZeroDstTests = 2;
238	static uint32_t g_cZeroSrcTests = 4;
239	#endif
240	static uint8_t g_pu8, g_pu8Two;
241	static uint16_t g_pu16, g_pu16Two;
242	static uint32_t g_pu32, g_pu32Two, *g_pfEfl;
243	static uint64_t g_pu64, g_pu64Two;
244	static RTUINT128U g_pu128, g_pu128Two;
245
246	static char g_aszBuf[32][256];
247	static unsigned g_idxBuf = 0;
248
249	static uint32_t g_cIncludeTestPatterns;
250	static uint32_t g_cExcludeTestPatterns;
251	static const char *g_apszIncludeTestPatterns[64];
252	static const char *g_apszExcludeTestPatterns[64];
253
254	/** Higher value, means longer benchmarking. */
255	static uint64_t g_cPicoSecBenchmark = 0;
256
257	static unsigned g_cVerbosity = 0;
258	static bool g_fVerboseSkipping = true;
259
260
261	#ifdef TSTIEMAIMPL_WITH_GENERATOR
262	/** The SVN revision (for use in the binary headers). */
263	static uint32_t g_uSvnRev = 0;
264	/** The CPU description (for use in the binary headers). */
265	static char g_szCpuDesc[80] = "";
266	#endif
267
268
269	/*********************************************************************************************************************************
270	* Internal Functions *
271	*********************************************************************************************************************************/
272	static const char *FormatR80(PCRTFLOAT80U pr80);
273	static const char *FormatR64(PCRTFLOAT64U pr64);
274	static const char *FormatR32(PCRTFLOAT32U pr32);
275
276
277	/*
278	* Random helpers.
279	*/
280
281	static uint32_t RandEFlags(void)
282	{
283	uint32_t fEfl = RTRandU32();
284	return (fEfl & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK;
285	}
286
287	#ifdef TSTIEMAIMPL_WITH_GENERATOR
288
289	static uint8_t RandU8(void)
290	{
291	return RTRandU32Ex(0, 0xff);
292	}
293
294
295	static uint16_t RandU16(void)
296	{
297	return RTRandU32Ex(0, 0xffff);
298	}
299
300
301	static uint32_t RandU32(void)
302	{
303	return RTRandU32();
304	}
305
306	#endif
307
308	static uint64_t RandU64(void)
309	{
310	return RTRandU64();
311	}
312
313
314	static RTUINT128U RandU128(void)
315	{
316	RTUINT128U Ret;
317	Ret.s.Hi = RTRandU64();
318	Ret.s.Lo = RTRandU64();
319	return Ret;
320	}
321
322	#ifdef TSTIEMAIMPL_WITH_GENERATOR
323
324	static uint8_t RandU8Dst(uint32_t iTest)
325	{
326	if (iTest < g_cZeroDstTests)
327	return 0;
328	return RandU8();
329	}
330
331
332	static uint8_t RandU8Src(uint32_t iTest)
333	{
334	if (iTest < g_cZeroSrcTests)
335	return 0;
336	return RandU8();
337	}
338
339
340	static uint16_t RandU16Dst(uint32_t iTest)
341	{
342	if (iTest < g_cZeroDstTests)
343	return 0;
344	return RandU16();
345	}
346
347
348	static uint16_t RandU16Src(uint32_t iTest)
349	{
350	if (iTest < g_cZeroSrcTests)
351	return 0;
352	return RandU16();
353	}
354
355
356	static uint32_t RandU32Dst(uint32_t iTest)
357	{
358	if (iTest < g_cZeroDstTests)
359	return 0;
360	return RandU32();
361	}
362
363
364	static uint32_t RandU32Src(uint32_t iTest)
365	{
366	if (iTest < g_cZeroSrcTests)
367	return 0;
368	return RandU32();
369	}
370
371
372	static uint64_t RandU64Dst(uint32_t iTest)
373	{
374	if (iTest < g_cZeroDstTests)
375	return 0;
376	return RandU64();
377	}
378
379
380	static uint64_t RandU64Src(uint32_t iTest)
381	{
382	if (iTest < g_cZeroSrcTests)
383	return 0;
384	return RandU64();
385	}
386
387
388	/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
389	static int16_t RandI16Src2(uint32_t iTest)
390	{
391	if (iTest < 18 * 4)
392	switch (iTest % 4)
393	{
394	case 0: return 0;
395	case 1: return INT16_MAX;
396	case 2: return INT16_MIN;
397	case 3: break;
398	}
399	return (int16_t)RandU16();
400	}
401
402
403	/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
404	static int32_t RandI32Src2(uint32_t iTest)
405	{
406	if (iTest < 18 * 4)
407	switch (iTest % 4)
408	{
409	case 0: return 0;
410	case 1: return INT32_MAX;
411	case 2: return INT32_MIN;
412	case 3: break;
413	}
414	return (int32_t)RandU32();
415	}
416
417
418	static int64_t RandI64Src(uint32_t iTest)
419	{
420	RT_NOREF(iTest);
421	return (int64_t)RandU64();
422	}
423
424
425	static uint16_t RandFcw(void)
426	{
427	return RandU16() & ~X86_FCW_ZERO_MASK;
428	}
429
430
431	static uint16_t RandFsw(void)
432	{
433	AssertCompile((X86_FSW_C_MASK \| X86_FSW_XCPT_ES_MASK \| X86_FSW_TOP_MASK \| X86_FSW_B) == 0xffff);
434	return RandU16();
435	}
436
437
438	static uint32_t RandMxcsr(void)
439	{
440	return RandU32() & ~X86_MXCSR_ZERO_MASK;
441	}
442
443
444	static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
445	{
446	if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
447	pr80->sj64.uFraction >>= cShift;
448	else
449	pr80->sj64.uFraction = (cShift % 19) + 1;
450	}
451
452
453
454	static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
455	{
456	Assert(cTarget == (!fIntTarget ? 80U : 16U) \|\| cTarget == 64U \|\| cTarget == 32U \|\| (cTarget == 59U && fIntTarget));
457
458	RTFLOAT80U r80;
459	r80.au64[0] = RandU64();
460	r80.au16[4] = RandU16();
461
462	/*
463	* Adjust the random stuff according to bType.
464	*/
465	bType &= 0x1f;
466	if (bType == 0 \|\| bType == 1 \|\| bType == 2 \|\| bType == 3)
467	{
468	/* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
469	r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
470	r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
471	r80.sj64.fInteger = bType >= 2 ? 1 : 0;
472	AssertMsg(bType != 0 \|\| RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
473	AssertMsg(bType != 1 \|\| RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
474	Assert( bType != 1 \|\| RTFLOAT80U_IS_387_INVALID(&r80));
475	AssertMsg(bType != 2 \|\| RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
476	AssertMsg(bType != 3 \|\| RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
477	}
478	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
479	{
480	/* Denormals (4,5) and Pseudo denormals (6,7) */
481	if (bType & 1)
482	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
483	else if (r80.sj64.uFraction == 0 && bType < 6)
484	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
485	r80.sj64.uExponent = 0;
486	r80.sj64.fInteger = bType >= 6;
487	AssertMsg(bType >= 6 \|\| RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
488	AssertMsg(bType < 6 \|\| RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
489	}
490	else if (bType == 8 \|\| bType == 9)
491	{
492	/* Pseudo NaN. */
493	if (bType & 1)
494	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
495	else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
496	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
497	r80.sj64.uExponent = 0x7fff;
498	if (r80.sj64.fInteger)
499	r80.sj64.uFraction \|= RT_BIT_64(62);
500	else
501	r80.sj64.uFraction &= ~RT_BIT_64(62);
502	r80.sj64.fInteger = 0;
503	AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
504	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
505	Assert(RTFLOAT80U_IS_387_INVALID(&r80));
506	}
507	else if (bType == 10 \|\| bType == 11 \|\| bType == 12 \|\| bType == 13)
508	{
509	/* Quiet and signalling NaNs. */
510	if (bType & 1)
511	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
512	else if (r80.sj64.uFraction == 0)
513	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
514	r80.sj64.uExponent = 0x7fff;
515	if (bType < 12)
516	r80.sj64.uFraction \|= RT_BIT_64(62); /* quiet */
517	else
518	r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
519	r80.sj64.fInteger = 1;
520	AssertMsg(bType >= 12 \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
521	AssertMsg(bType < 12 \|\| RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
522	AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
523	AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
524	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
525	}
526	else if (bType == 14 \|\| bType == 15)
527	{
528	/* Unnormals */
529	if (bType & 1)
530	SafeR80FractionShift(&r80, RandU8() % 62);
531	r80.sj64.fInteger = 0;
532	if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX \|\| r80.sj64.uExponent == 0)
533	r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
534	AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
535	Assert(RTFLOAT80U_IS_387_INVALID(&r80));
536	}
537	else if (bType < 26)
538	{
539	/* Make sure we have lots of normalized values. */
540	if (!fIntTarget)
541	{
542	const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
543	: cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
544	const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
545	: cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
546	r80.sj64.fInteger = 1;
547	if (r80.sj64.uExponent <= uMinExp)
548	r80.sj64.uExponent = uMinExp + 1;
549	else if (r80.sj64.uExponent >= uMaxExp)
550	r80.sj64.uExponent = uMaxExp - 1;
551
552	if (bType == 16)
553	{ /* All 1s is useful to testing rounding. Also try trigger special
554	behaviour by sometimes rounding out of range, while we're at it. */
555	r80.sj64.uFraction = RT_BIT_64(63) - 1;
556	uint8_t bExp = RandU8();
557	if ((bExp & 3) == 0)
558	r80.sj64.uExponent = uMaxExp - 1;
559	else if ((bExp & 3) == 1)
560	r80.sj64.uExponent = uMinExp + 1;
561	else if ((bExp & 3) == 2)
562	r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
563	}
564	}
565	else
566	{
567	/* integer target: */
568	const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
569	const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
570	r80.sj64.fInteger = 1;
571	if (r80.sj64.uExponent < uMinExp)
572	r80.sj64.uExponent = uMinExp;
573	else if (r80.sj64.uExponent > uMaxExp)
574	r80.sj64.uExponent = uMaxExp;
575
576	if (bType == 16)
577	{ /* All 1s is useful to testing rounding. Also try trigger special
578	behaviour by sometimes rounding out of range, while we're at it. */
579	r80.sj64.uFraction = RT_BIT_64(63) - 1;
580	uint8_t bExp = RandU8();
581	if ((bExp & 3) == 0)
582	r80.sj64.uExponent = uMaxExp;
583	else if ((bExp & 3) == 1)
584	r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
585	}
586	}
587
588	AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
589	}
590	return r80;
591	}
592
593
594	static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
595	{
596	/*
597	* Make it more likely that we get a good selection of special values.
598	*/
599	return RandR80Ex(RandU8(), cTarget, fIntTarget);
600
601	}
602
603
604	static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
605	{
606	/* Make sure we cover all the basic types first before going for random selection: */
607	if (iTest <= 18)
608	return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
609	return RandR80(cTarget, fIntTarget);
610	}
611
612
613	/**
614	* Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
615	* to a 0..17, covering all basic value types.
616	*/
617	static uint8_t RandR80Src12RemapType(uint8_t bType)
618	{
619	switch (bType)
620	{
621	case 0: return 18; /* normal */
622	case 1: return 16; /* normal extreme rounding */
623	case 2: return 14; /* unnormal */
624	case 3: return 12; /* Signalling NaN */
625	case 4: return 10; /* Quiet NaN */
626	case 5: return 8; /* PseudoNaN */
627	case 6: return 6; /* Pseudo Denormal */
628	case 7: return 4; /* Denormal */
629	case 8: return 3; /* Indefinite */
630	case 9: return 2; /* Infinity */
631	case 10: return 1; /* Pseudo-Infinity */
632	case 11: return 0; /* Zero */
633	default: AssertFailedReturn(18);
634	}
635	}
636
637
638	/**
639	* This works in tandem with RandR80Src2 to make sure we cover all operand
640	* type mixes first before we venture into regular random testing.
641	*
642	* There are 11 basic variations, when we leave out the five odd ones using
643	* SafeR80FractionShift. Because of the special normalized value targetting at
644	* rounding, we make it an even 12. So 144 combinations for two operands.
645	*/
646	static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
647	{
648	if (cPartnerBits == 80)
649	{
650	Assert(!fPartnerInt);
651	if (iTest < 12 * 12)
652	return RandR80Ex(RandR80Src12RemapType(iTest / 12));
653	}
654	else if ((cPartnerBits == 64 \|\| cPartnerBits == 32) && !fPartnerInt)
655	{
656	if (iTest < 12 * 10)
657	return RandR80Ex(RandR80Src12RemapType(iTest / 10));
658	}
659	else if (iTest < 18 * 4 && fPartnerInt)
660	return RandR80Ex(iTest / 4);
661	return RandR80();
662	}
663
664
665	/** Partner to RandR80Src1. */
666	static RTFLOAT80U RandR80Src2(uint32_t iTest)
667	{
668	if (iTest < 12 * 12)
669	return RandR80Ex(RandR80Src12RemapType(iTest % 12));
670	return RandR80();
671	}
672
673
674	static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
675	{
676	if (pr64->s64.uFraction >= RT_BIT_64(cShift))
677	pr64->s64.uFraction >>= cShift;
678	else
679	pr64->s64.uFraction = (cShift % 19) + 1;
680	}
681
682
683	static RTFLOAT64U RandR64Ex(uint8_t bType)
684	{
685	RTFLOAT64U r64;
686	r64.u = RandU64();
687
688	/*
689	* Make it more likely that we get a good selection of special values.
690	* On average 6 out of 16 calls should return a special value.
691	*/
692	bType &= 0xf;
693	if (bType == 0 \|\| bType == 1)
694	{
695	/* 0 or Infinity. We only keep fSign here. */
696	r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
697	r64.s.uFractionHigh = 0;
698	r64.s.uFractionLow = 0;
699	AssertMsg(bType != 0 \|\| RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
700	AssertMsg(bType != 1 \|\| RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
701	}
702	else if (bType == 2 \|\| bType == 3)
703	{
704	/* Subnormals */
705	if (bType == 3)
706	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
707	else if (r64.s64.uFraction == 0)
708	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
709	r64.s64.uExponent = 0;
710	AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
711	}
712	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
713	{
714	/* NaNs */
715	if (bType & 1)
716	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
717	else if (r64.s64.uFraction == 0)
718	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
719	r64.s64.uExponent = 0x7ff;
720	if (bType < 6)
721	r64.s64.uFraction \|= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
722	else
723	r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
724	AssertMsg(bType >= 6 \|\| RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
725	AssertMsg(bType < 6 \|\| RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
726	AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
727	}
728	else if (bType < 12)
729	{
730	/* Make sure we have lots of normalized values. */
731	if (r64.s.uExponent == 0)
732	r64.s.uExponent = 1;
733	else if (r64.s.uExponent == 0x7ff)
734	r64.s.uExponent = 0x7fe;
735	AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
736	}
737	return r64;
738	}
739
740
741	static RTFLOAT64U RandR64Src(uint32_t iTest)
742	{
743	if (iTest < 16)
744	return RandR64Ex(iTest);
745	return RandR64Ex(RandU8());
746	}
747
748
749	/** Pairing with a 80-bit floating point arg. */
750	static RTFLOAT64U RandR64Src2(uint32_t iTest)
751	{
752	if (iTest < 12 * 10)
753	return RandR64Ex(9 - iTest % 10); /* start with normal values */
754	return RandR64Ex(RandU8());
755	}
756
757
758	static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
759	{
760	if (pr32->s.uFraction >= RT_BIT_32(cShift))
761	pr32->s.uFraction >>= cShift;
762	else
763	pr32->s.uFraction = (cShift % 19) + 1;
764	}
765
766
767	static RTFLOAT32U RandR32Ex(uint8_t bType)
768	{
769	RTFLOAT32U r32;
770	r32.u = RandU32();
771
772	/*
773	* Make it more likely that we get a good selection of special values.
774	* On average 6 out of 16 calls should return a special value.
775	*/
776	bType &= 0xf;
777	if (bType == 0 \|\| bType == 1)
778	{
779	/* 0 or Infinity. We only keep fSign here. */
780	r32.s.uExponent = bType == 0 ? 0 : 0xff;
781	r32.s.uFraction = 0;
782	AssertMsg(bType != 0 \|\| RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
783	AssertMsg(bType != 1 \|\| RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
784	}
785	else if (bType == 2 \|\| bType == 3)
786	{
787	/* Subnormals */
788	if (bType == 3)
789	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
790	else if (r32.s.uFraction == 0)
791	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
792	r32.s.uExponent = 0;
793	AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
794	}
795	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
796	{
797	/* NaNs */
798	if (bType & 1)
799	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
800	else if (r32.s.uFraction == 0)
801	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
802	r32.s.uExponent = 0xff;
803	if (bType < 6)
804	r32.s.uFraction \|= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
805	else
806	r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
807	AssertMsg(bType >= 6 \|\| RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
808	AssertMsg(bType < 6 \|\| RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
809	AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
810	}
811	else if (bType < 12)
812	{
813	/* Make sure we have lots of normalized values. */
814	if (r32.s.uExponent == 0)
815	r32.s.uExponent = 1;
816	else if (r32.s.uExponent == 0xff)
817	r32.s.uExponent = 0xfe;
818	AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
819	}
820	return r32;
821	}
822
823
824	static RTFLOAT32U RandR32Src(uint32_t iTest)
825	{
826	if (iTest < 16)
827	return RandR32Ex(iTest);
828	return RandR32Ex(RandU8());
829	}
830
831
832	/** Pairing with a 80-bit floating point arg. */
833	static RTFLOAT32U RandR32Src2(uint32_t iTest)
834	{
835	if (iTest < 12 * 10)
836	return RandR32Ex(9 - iTest % 10); /* start with normal values */
837	return RandR32Ex(RandU8());
838	}
839
840
841	static RTPBCD80U RandD80Src(uint32_t iTest)
842	{
843	if (iTest < 3)
844	{
845	RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
846	return d80Zero;
847	}
848	if (iTest < 5)
849	{
850	RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
851	return d80Ind;
852	}
853
854	RTPBCD80U d80;
855	uint8_t b = RandU8();
856	d80.s.fSign = b & 1;
857
858	if ((iTest & 7) >= 6)
859	{
860	/* Illegal */
861	d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
862	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
863	d80.s.abPairs[iPair] = RandU8();
864	}
865	else
866	{
867	/* Normal */
868	d80.s.uPad = 0;
869	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
870	{
871	uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
872	uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
873	d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
874	}
875	}
876	return d80;
877	}
878
879	# if 0 /* unused */
880
881	static const char *GenFormatR80(PCRTFLOAT80U plrd)
882	{
883	if (RTFLOAT80U_IS_ZERO(plrd))
884	return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
885	if (RTFLOAT80U_IS_INF(plrd))
886	return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
887	if (RTFLOAT80U_IS_INDEFINITE(plrd))
888	return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
889	if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
890	return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
891	if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
892	return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
893
894	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
895	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
896	plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
897	return pszBuf;
898	}
899
900	static const char *GenFormatR64(PCRTFLOAT64U prd)
901	{
902	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
903	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
904	prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
905	return pszBuf;
906	}
907
908
909	static const char *GenFormatR32(PCRTFLOAT32U pr)
910	{
911	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
912	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
913	return pszBuf;
914	}
915
916
917	static const char *GenFormatD80(PCRTPBCD80U pd80)
918	{
919	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
920	size_t off;
921	if (pd80->s.uPad == 0)
922	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
923	else
924	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
925	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
926	while (iPair-- > 0)
927	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
928	RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
929	RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
930	pszBuf[off++] = ')';
931	pszBuf[off++] = '\0';
932	return pszBuf;
933	}
934
935
936	static const char *GenFormatI64(int64_t i64)
937	{
938	if (i64 == INT64_MIN) /* This one is problematic */
939	return "INT64_MIN";
940	if (i64 == INT64_MAX)
941	return "INT64_MAX";
942	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
943	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
944	return pszBuf;
945	}
946
947	# if 0 /* unused */
948	static const char GenFormatI64(int64_t const pi64)
949	{
950	return GenFormatI64(*pi64);
951	}
952	# endif
953
954	static const char *GenFormatI32(int32_t i32)
955	{
956	if (i32 == INT32_MIN) /* This one is problematic */
957	return "INT32_MIN";
958	if (i32 == INT32_MAX)
959	return "INT32_MAX";
960	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
961	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
962	return pszBuf;
963	}
964
965
966	const char GenFormatI32(int32_t const pi32)
967	{
968	return GenFormatI32(*pi32);
969	}
970
971
972	const char *GenFormatI16(int16_t i16)
973	{
974	if (i16 == INT16_MIN) /* This one is problematic */
975	return "INT16_MIN";
976	if (i16 == INT16_MAX)
977	return "INT16_MAX";
978	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
979	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
980	return pszBuf;
981	}
982
983
984	const char GenFormatI16(int16_t const pi16)
985	{
986	return GenFormatI16(*pi16);
987	}
988
989
990	static void GenerateHeader(PRTSTREAM pOut, const char pszCpuDesc, const char pszCpuType)
991	{
992	/* We want to tag the generated source code with the revision that produced it. */
993	static char s_szRev[] = "$Revision: 104208 $";
994	const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
995	size_t cchRev = 0;
996	while (RT_C_IS_DIGIT(pszRev[cchRev]))
997	cchRev++;
998
999	RTStrmPrintf(pOut,
1000	"/* $Id: tstIEMAImpl.cpp 104208 2024-04-05 21:17:41Z vboxsync $ */\n"
1001	"/** @file\n"
1002	" * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
1003	" */\n"
1004	"\n"
1005	"/*\n"
1006	" * Copyright (C) 2022-" VBOX_C_YEAR " Oracle and/or its affiliates.\n"
1007	" *\n"
1008	" * This file is part of VirtualBox base platform packages, as\n"
1009	" * available from https://www.virtualbox.org.\n"
1010	" *\n"
1011	" * This program is free software; you can redistribute it and/or\n"
1012	" * modify it under the terms of the GNU General Public License\n"
1013	" * as published by the Free Software Foundation, in version 3 of the\n"
1014	" * License.\n"
1015	" *\n"
1016	" * This program is distributed in the hope that it will be useful, but\n"
1017	" * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
1018	" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
1019	" * General Public License for more details.\n"
1020	" *\n"
1021	" * You should have received a copy of the GNU General Public License\n"
1022	" * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
1023	" *\n"
1024	" * SPDX-License-Identifier: GPL-3.0-only\n"
1025	" */\n"
1026	"\n"
1027	"#include \"tstIEMAImpl.h\"\n"
1028	"\n"
1029	,
1030	pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
1031	}
1032
1033
1034	static PRTSTREAM GenerateOpenWithHdr(const char pszFilename, const char pszCpuDesc, const char *pszCpuType)
1035	{
1036	PRTSTREAM pOut = NULL;
1037	int rc = RTStrmOpen(pszFilename, "w", &pOut);
1038	if (RT_SUCCESS(rc))
1039	{
1040	GenerateHeader(pOut, pszCpuDesc, pszCpuType);
1041	return pOut;
1042	}
1043	RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
1044	return NULL;
1045	}
1046
1047
1048	static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
1049	{
1050	RTStrmPrintf(pOut,
1051	"\n"
1052	"/* end of file */\n");
1053	int rc = RTStrmClose(pOut);
1054	if (RT_SUCCESS(rc))
1055	return rcExit;
1056	return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
1057	}
1058
1059
1060	static void GenerateArrayStart(PRTSTREAM pOut, const char pszName, const char pszType)
1061	{
1062	RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
1063	}
1064
1065
1066	static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
1067	{
1068	RTStrmPrintf(pOut,
1069	"};\n"
1070	"uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
1071	"\n",
1072	pszName, pszName);
1073	}
1074
1075	# endif /* unused */
1076
1077	static void GenerateBinaryWrite(PIEMBINARYOUTPUT pBinOut, const void *pvData, size_t cbData)
1078	{
1079	pBinOut->cbWritten += cbData; /* ignore errors - makes entry calculation simpler */
1080	if (RT_SUCCESS_NP(pBinOut->rcWrite))
1081	{
1082	pBinOut->rcWrite = RTVfsIoStrmWrite(pBinOut->hVfsUncompressed, pvData, cbData, true /fBlocking/, NULL);
1083	if (RT_SUCCESS(pBinOut->rcWrite))
1084	return;
1085	RTMsgError("Error writing '%s': %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1086	}
1087	}
1088
1089	static bool GenerateBinaryOpen(PIEMBINARYOUTPUT pBinOut, const char pszFilenameFmt, const char pszName,
1090	IEMTESTENTRYINFO const *pInfoToPreserve, uint32_t cbEntry)
1091	{
1092	pBinOut->cbEntry = cbEntry;
1093	pBinOut->cbWritten = 0;
1094	pBinOut->hVfsFile = NIL_RTVFSFILE;
1095	pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1096	if (pszFilenameFmt)
1097	{
1098	pBinOut->fNull = false;
1099	if (RTStrPrintf2(pBinOut->szFilename, sizeof(pBinOut->szFilename), pszFilenameFmt, pszName) > 0)
1100	{
1101	RTMsgInfo("GenerateBinaryOpen: %s...\n", pBinOut->szFilename);
1102	pBinOut->rcWrite = RTVfsFileOpenNormal(pBinOut->szFilename,
1103	RTFILE_O_CREATE_REPLACE \| RTFILE_O_WRITE \| RTFILE_O_DENY_READWRITE,
1104	&pBinOut->hVfsFile);
1105	if (RT_SUCCESS(pBinOut->rcWrite))
1106	{
1107	RTVFSIOSTREAM hVfsIoFile = RTVfsFileToIoStream(pBinOut->hVfsFile);
1108	if (hVfsIoFile != NIL_RTVFSIOSTREAM)
1109	{
1110	pBinOut->rcWrite = RTZipGzipCompressIoStream(hVfsIoFile, 0 /fFlags/, 9, &pBinOut->hVfsUncompressed);
1111	RTVfsIoStrmRelease(hVfsIoFile);
1112	if (RT_SUCCESS(pBinOut->rcWrite))
1113	{
1114	pBinOut->rcWrite = VINF_SUCCESS;
1115	pBinOut->fWroteHeader = false;
1116
1117	/* Write the header if applicable. */
1118	if ( !pInfoToPreserve
1119	\|\| (pInfoToPreserve->uSvnRev != 0 && *pInfoToPreserve->pszCpuDesc))
1120	{
1121	IEMBINARYHEADER Hdr;
1122	RT_ZERO(Hdr);
1123	memcpy(Hdr.szMagic, IEMBINARYHEADER_MAGIC, sizeof(IEMBINARYHEADER_MAGIC));
1124	Hdr.cbEntry = cbEntry;
1125	Hdr.uSvnRev = pInfoToPreserve ? pInfoToPreserve->uSvnRev : g_uSvnRev;
1126	RTStrCopy(Hdr.szCpuDesc, sizeof(Hdr.szCpuDesc),
1127	pInfoToPreserve ? pInfoToPreserve->pszCpuDesc : g_szCpuDesc);
1128	GenerateBinaryWrite(pBinOut, &Hdr, sizeof(Hdr));
1129	pBinOut->fWroteHeader = true;
1130	}
1131
1132	return true;
1133	}
1134
1135	RTMsgError("RTZipGzipCompressIoStream: %Rrc", pBinOut->rcWrite);
1136	}
1137	else
1138	{
1139	RTMsgError("RTVfsFileToIoStream failed!");
1140	pBinOut->rcWrite = VERR_VFS_CHAIN_CAST_FAILED;
1141	}
1142	RTVfsFileRelease(pBinOut->hVfsFile);
1143	RTFileDelete(pBinOut->szFilename);
1144	}
1145	else
1146	RTMsgError("Failed to open '%s' for writing: %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1147	}
1148	else
1149	{
1150	RTMsgError("filename too long: %s + %s", pszFilenameFmt, pszName);
1151	pBinOut->rcWrite = VERR_BUFFER_OVERFLOW;
1152	}
1153	return false;
1154	}
1155	RTMsgInfo("GenerateBinaryOpen: %s -> /dev/null\n", pszName);
1156	pBinOut->rcWrite = VERR_IGNORED;
1157	pBinOut->fNull = true;
1158	pBinOut->fWroteHeader = false;
1159	pBinOut->szFilename[0] = '\0';
1160	return true;
1161	}
1162
1163	# define GENERATE_BINARY_OPEN(a_pBinOut, a_papszNameFmts, a_Entry) \
1164	GenerateBinaryOpen((a_pBinOut), a_papszNameFmts[(a_Entry).idxCpuEflFlavour], (a_Entry).pszName, \
1165	NULL /pInfo/, sizeof((a_Entry).paTests[0]))
1166
1167	static bool GenerateBinaryClose(PIEMBINARYOUTPUT pBinOut)
1168	{
1169	if (!pBinOut->fNull)
1170	{
1171	/* Write footer if we've written a header. */
1172	if (pBinOut->fWroteHeader)
1173	{
1174	IEMBINARYFOOTER Ftr;
1175	RT_ZERO(Ftr);
1176	memcpy(Ftr.szMagic, IEMBINARYFOOTER_MAGIC, sizeof(IEMBINARYFOOTER_MAGIC));
1177	Ftr.cbEntry = pBinOut->cbEntry;
1178	Ftr.cEntries = (uint32_t)((pBinOut->cbWritten - sizeof(IEMBINARYHEADER)) / pBinOut->cbEntry);
1179	Assert(Ftr.cEntries * pBinOut->cbEntry + sizeof(IEMBINARYHEADER) == pBinOut->cbWritten);
1180	GenerateBinaryWrite(pBinOut, &Ftr, sizeof(Ftr));
1181	}
1182
1183	/* This is rather jovial about rcWrite. */
1184	int const rc1 = RTVfsIoStrmFlush(pBinOut->hVfsUncompressed);
1185	RTVfsIoStrmRelease(pBinOut->hVfsUncompressed);
1186	pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1187	if (RT_FAILURE(rc1))
1188	RTMsgError("Error flushing '%s' (uncompressed stream): %Rrc", pBinOut->szFilename, rc1);
1189
1190	int const rc2 = RTVfsFileFlush(pBinOut->hVfsFile);
1191	RTVfsFileRelease(pBinOut->hVfsFile);
1192	pBinOut->hVfsFile = NIL_RTVFSFILE;
1193	if (RT_FAILURE(rc2))
1194	RTMsgError("Error flushing '%s' (compressed file): %Rrc", pBinOut->szFilename, rc2);
1195
1196	return RT_SUCCESS(rc2) && RT_SUCCESS(rc1) && RT_SUCCESS(pBinOut->rcWrite);
1197	}
1198	return true;
1199	}
1200
1201	/* Helper for DumpAll. */
1202	# define DUMP_ALL_FN(a_FnBaseName, a_aSubTests) \
1203	static RTEXITCODE a_FnBaseName ## DumpAll(const char * const * papszNameFmts) \
1204	{ \
1205	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1206	{ \
1207	AssertReturn(DECOMPRESS_TESTS(a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
1208	IEMBINARYOUTPUT BinOut; \
1209	AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[a_aSubTests[iFn].idxCpuEflFlavour], \
1210	a_aSubTests[iFn].pszName, &a_aSubTests[iFn].Info, \
1211	sizeof(a_aSubTests[iFn].paTests[0])), \
1212	RTEXITCODE_FAILURE); \
1213	GenerateBinaryWrite(&BinOut, a_aSubTests[iFn].paTests, a_aSubTests[iFn].cTests); \
1214	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
1215	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1216	} \
1217	return RTEXITCODE_SUCCESS; \
1218	}
1219	#endif /* TSTIEMAIMPL_WITH_GENERATOR */
1220
1221
1222	/*
1223	* Test helpers.
1224	*/
1225	static bool IsTestEnabled(const char *pszName)
1226	{
1227	/* Process excludes first: */
1228	uint32_t i = g_cExcludeTestPatterns;
1229	while (i-- > 0)
1230	if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1231	return false;
1232
1233	/* If no include patterns, everything is included: */
1234	i = g_cIncludeTestPatterns;
1235	if (!i)
1236	return true;
1237
1238	/* Otherwise only tests in the include patters gets tested: */
1239	while (i-- > 0)
1240	if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1241	return true;
1242
1243	return false;
1244	}
1245
1246
1247	static bool SubTestAndCheckIfEnabled(const char *pszName)
1248	{
1249	bool const fEnabled = IsTestEnabled(pszName);
1250	if (g_fVerboseSkipping \|\| fEnabled)
1251	{
1252	RTTestSub(g_hTest, pszName);
1253	if (fEnabled)
1254	return true;
1255	RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
1256	}
1257	return false;
1258	}
1259
1260
1261	/** Decompresses test data before use as required. */
1262	static int DecompressBinaryTest(void const pvCompressed, uint32_t cbCompressed, size_t cbEntry, const char pszWhat,
1263	void *ppvTests, uint32_t pcTests, IEMTESTENTRYINFO *pInfo)
1264	{
1265	/* Don't do it again. */
1266	if (pInfo->pvUncompressed && *ppvTests)
1267	return VINF_SUCCESS;
1268
1269	/* Open a memory stream for the compressed binary data. */
1270	RTVFSIOSTREAM hVfsIos = NIL_RTVFSIOSTREAM;
1271	int rc = RTVfsIoStrmFromBuffer(RTFILE_O_READ, pvCompressed, cbCompressed, &hVfsIos);
1272	RTTESTI_CHECK_RC_OK_RET(rc, rc);
1273
1274	/* Open a decompressed stream for it. */
1275	RTVFSIOSTREAM hVfsIosDecomp = NIL_RTVFSIOSTREAM;
1276	rc = RTZipGzipDecompressIoStream(hVfsIos, RTZIPGZIPDECOMP_F_ALLOW_ZLIB_HDR, &hVfsIosDecomp);
1277	RTTESTI_CHECK_RC_OK(rc);
1278	if (RT_SUCCESS(rc))
1279	{
1280	/* Initial output buffer allocation. */
1281	size_t cbDecompressedAlloc = cbCompressed <= _16M ? (size_t)cbCompressed * 16 : (size_t)cbCompressed * 4;
1282	uint8_t pbDecompressed = (uint8_t )RTMemAllocZ(cbDecompressedAlloc);
1283	if (pbDecompressed)
1284	{
1285	size_t off = 0;
1286	for (;;)
1287	{
1288	size_t cbRead = 0;
1289	rc = RTVfsIoStrmRead(hVfsIosDecomp, &pbDecompressed[off], cbDecompressedAlloc - off, true /fBlocking/, &cbRead);
1290	if (RT_FAILURE(rc))
1291	break;
1292	if (rc == VINF_EOF && cbRead == 0)
1293	break;
1294	off += cbRead;
1295
1296	if (cbDecompressedAlloc < off + 256)
1297	{
1298	size_t const cbNew = cbDecompressedAlloc < _128M ? cbDecompressedAlloc * 2 : cbDecompressedAlloc + _32M;
1299	void * const pvNew = RTMemRealloc(pbDecompressed, cbNew);
1300	AssertBreakStmt(pvNew, rc = VERR_NO_MEMORY);
1301	cbDecompressedAlloc = cbNew;
1302	pbDecompressed = (uint8_t *)pvNew;
1303	}
1304	}
1305	if (RT_SUCCESS(rc))
1306	{
1307	size_t const cbUncompressed = off;
1308
1309	/* Validate the header and footer if present and subtract them from 'off'. */
1310	IEMBINARYHEADER const *pHdr = NULL;
1311	if ( off >= sizeof(IEMTESTENTRYINFO)
1312	&& memcmp(pbDecompressed, IEMBINARYHEADER_MAGIC, sizeof(IEMBINARYHEADER_MAGIC)) == 0)
1313	{
1314	pHdr = (IEMBINARYHEADER const *)pbDecompressed;
1315	IEMBINARYFOOTER const pFtr = (IEMBINARYFOOTER const )&pbDecompressed[off - sizeof(IEMBINARYFOOTER)];
1316
1317	off -= sizeof(pHdr) + sizeof(pFtr);
1318	rc = VERR_IO_BAD_UNIT;
1319	if (pHdr->cbEntry != cbEntry)
1320	RTTestIFailed("Test entry size differs for '%s': %#x (header r%u), expected %#zx (uncompressed size %#zx)",
1321	pszWhat, pHdr->cbEntry, pHdr->uSvnRev, cbEntry, off + sizeof(pHdr) + sizeof(pFtr));
1322	else if (memcmp(pFtr->szMagic, IEMBINARYFOOTER_MAGIC, sizeof(IEMBINARYFOOTER_MAGIC)) != 0)
1323	RTTestIFailed("Wrong footer magic for '%s': %.*Rhxs\n", pszWhat, sizeof(pFtr->szMagic), pFtr->szMagic);
1324	else if (pFtr->cbEntry != cbEntry)
1325	RTTestIFailed("Wrong footer entry size for '%s': %#x, expected %#x\n", pszWhat, pFtr->cbEntry, cbEntry);
1326	else if (pFtr->cEntries != off / cbEntry)
1327	RTTestIFailed("Wrong footer entry count for '%s': %#x, expected %#x\n",
1328	pszWhat, pFtr->cEntries, off / cbEntry);
1329	else
1330	rc = VINF_SUCCESS;
1331	}
1332
1333	/* Validate the decompressed size wrt entry size. */
1334	if ((off % cbEntry) != 0 && RT_SUCCESS(rc))
1335	{
1336	RTTestIFailed("Uneven decompressed data size for '%s': %#zx vs entry size %#zx -> %#zx",
1337	pszWhat, off, cbEntry, off % cbEntry);
1338	rc = VERR_IO_BAD_LENGTH;
1339	}
1340
1341	if (RT_SUCCESS(rc))
1342	{
1343	/*
1344	* We're good.
1345	*/
1346	/* Reallocate the block if it's way to big. */
1347	if (cbDecompressedAlloc - cbUncompressed > _512K)
1348	{
1349	void * const pvNew = RTMemRealloc(pbDecompressed, cbUncompressed);
1350	if (pvNew)
1351	{
1352	pbDecompressed = (uint8_t *)pvNew;
1353	if (pHdr)
1354	pHdr = (IEMBINARYHEADER const *)pbDecompressed;
1355	}
1356	}
1357	RTMEM_MAY_LEAK(pbDecompressed);
1358
1359	/* Fill in the info and other return values. */
1360	pInfo->cbUncompressed = (uint32_t)cbUncompressed;
1361	pInfo->pvUncompressed = pbDecompressed;
1362	pInfo->pszCpuDesc = pHdr ? pHdr->szCpuDesc : NULL;
1363	pInfo->uSvnRev = pHdr ? pHdr->uSvnRev : 0;
1364	*pcTests = (uint32_t)(off / cbEntry);
1365	ppvTests = pHdr ? (uint8_t )(pHdr + 1) : pbDecompressed;
1366
1367	pbDecompressed = NULL;
1368	rc = VINF_SUCCESS;
1369	}
1370	}
1371	else
1372	RTTestIFailed("Failed to decompress binary stream '%s': %Rrc (off=%#zx, cbCompressed=%#x)",
1373	pszWhat, rc, off, cbCompressed);
1374	RTMemFree(pbDecompressed);
1375	}
1376	else
1377	{
1378	RTTestIFailed("Out of memory decompressing test data '%s'", pszWhat);
1379	rc = VERR_NO_MEMORY;
1380	}
1381	RTVfsIoStrmRelease(hVfsIosDecomp);
1382	}
1383	RTVfsIoStrmRelease(hVfsIos);
1384	return rc;
1385	}
1386
1387	#define DECOMPRESS_TESTS(a_Entry) \
1388	RT_SUCCESS(DecompressBinaryTest((a_Entry).pvCompressedTests, *(a_Entry).pcbCompressedTests, \
1389	sizeof((a_Entry).paTests[0]), (a_Entry).pszName, \
1390	(void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info))
1391
1392	/** Frees the decompressed test data. */
1393	static void FreeDecompressedTests(void *ppvTests, uint32_t pcTests, IEMTESTENTRYINFO *pInfo)
1394	{
1395	RTMemFree(pInfo->pvUncompressed);
1396	pInfo->pvUncompressed = NULL;
1397	pInfo->cbUncompressed = 0;
1398	*ppvTests = NULL;
1399	*pcTests = 0;
1400	}
1401
1402	#define FREE_DECOMPRESSED_TESTS(a_Entry) \
1403	FreeDecompressedTests((void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info)
1404
1405
1406	/** Check if the test is enabled and decompresses test data. */
1407	static int SubTestAndCheckIfEnabledAndDecompress(const char pszName, void const pvCompressed, uint32_t cbCompressed,
1408	size_t cbEntry, void *ppvTests, uint32_t pcTests, IEMTESTENTRYINFO *pInfo)
1409	{
1410	if (SubTestAndCheckIfEnabled(pszName))
1411	{
1412	int const rc = DecompressBinaryTest(pvCompressed, cbCompressed, cbEntry, pszName, ppvTests, pcTests, pInfo);
1413	if (RT_SUCCESS(rc))
1414	return true;
1415	}
1416	return false;
1417	}
1418
1419	#define SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_Entry) \
1420	SubTestAndCheckIfEnabledAndDecompress((a_Entry).pszName, (a_Entry).pvCompressedTests, *(a_Entry).pcbCompressedTests, \
1421	sizeof((a_Entry).paTests[0]), \
1422	(void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info)
1423
1424
1425	static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
1426	{
1427	if (fActual == fExpected)
1428	return "";
1429
1430	uint32_t const fXor = fActual ^ fExpected;
1431	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1432	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1433
1434	static struct
1435	{
1436	const char *pszName;
1437	uint32_t fFlag;
1438	} const s_aFlags[] =
1439	{
1440	#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
1441	EFL_ENTRY(CF),
1442	EFL_ENTRY(PF),
1443	EFL_ENTRY(AF),
1444	EFL_ENTRY(ZF),
1445	EFL_ENTRY(SF),
1446	EFL_ENTRY(TF),
1447	EFL_ENTRY(IF),
1448	EFL_ENTRY(DF),
1449	EFL_ENTRY(OF),
1450	EFL_ENTRY(IOPL),
1451	EFL_ENTRY(NT),
1452	EFL_ENTRY(RF),
1453	EFL_ENTRY(VM),
1454	EFL_ENTRY(AC),
1455	EFL_ENTRY(VIF),
1456	EFL_ENTRY(VIP),
1457	EFL_ENTRY(ID),
1458	};
1459	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1460	if (s_aFlags[i].fFlag & fXor)
1461	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1462	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1463	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1464	return pszBuf;
1465	}
1466
1467
1468	static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
1469	{
1470	if (fActual == fExpected)
1471	return "";
1472
1473	uint16_t const fXor = fActual ^ fExpected;
1474	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1475	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1476
1477	static struct
1478	{
1479	const char *pszName;
1480	uint32_t fFlag;
1481	} const s_aFlags[] =
1482	{
1483	#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1484	FSW_ENTRY(IE),
1485	FSW_ENTRY(DE),
1486	FSW_ENTRY(ZE),
1487	FSW_ENTRY(OE),
1488	FSW_ENTRY(UE),
1489	FSW_ENTRY(PE),
1490	FSW_ENTRY(SF),
1491	FSW_ENTRY(ES),
1492	FSW_ENTRY(C0),
1493	FSW_ENTRY(C1),
1494	FSW_ENTRY(C2),
1495	FSW_ENTRY(C3),
1496	FSW_ENTRY(B),
1497	};
1498	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1499	if (s_aFlags[i].fFlag & fXor)
1500	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1501	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1502	if (fXor & X86_FSW_TOP_MASK)
1503	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1504	X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1505	#if 0 /* For debugging fprem & fprem1 */
1506	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1507	X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1508	#endif
1509	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1510	return pszBuf;
1511	}
1512
1513
1514	static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1515	{
1516	if (fActual == fExpected)
1517	return "";
1518
1519	uint16_t const fXor = fActual ^ fExpected;
1520	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1521	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1522
1523	static struct
1524	{
1525	const char *pszName;
1526	uint32_t fFlag;
1527	} const s_aFlags[] =
1528	{
1529	#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1530	MXCSR_ENTRY(IE),
1531	MXCSR_ENTRY(DE),
1532	MXCSR_ENTRY(ZE),
1533	MXCSR_ENTRY(OE),
1534	MXCSR_ENTRY(UE),
1535	MXCSR_ENTRY(PE),
1536
1537	MXCSR_ENTRY(IM),
1538	MXCSR_ENTRY(DM),
1539	MXCSR_ENTRY(ZM),
1540	MXCSR_ENTRY(OM),
1541	MXCSR_ENTRY(UM),
1542	MXCSR_ENTRY(PM),
1543
1544	MXCSR_ENTRY(DAZ),
1545	MXCSR_ENTRY(FZ),
1546	#undef MXCSR_ENTRY
1547	};
1548	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1549	if (s_aFlags[i].fFlag & fXor)
1550	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1551	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1552	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1553	return pszBuf;
1554	}
1555
1556
1557	static const char *FormatFcw(uint16_t fFcw)
1558	{
1559	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1560
1561	const char pszPC = NULL; / (msc+gcc are too stupid) */
1562	switch (fFcw & X86_FCW_PC_MASK)
1563	{
1564	case X86_FCW_PC_24: pszPC = "PC24"; break;
1565	case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1566	case X86_FCW_PC_53: pszPC = "PC53"; break;
1567	case X86_FCW_PC_64: pszPC = "PC64"; break;
1568	}
1569
1570	const char pszRC = NULL; / (msc+gcc are too stupid) */
1571	switch (fFcw & X86_FCW_RC_MASK)
1572	{
1573	case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1574	case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1575	case X86_FCW_RC_UP: pszRC = "UP"; break;
1576	case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1577	}
1578	size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1579
1580	static struct
1581	{
1582	const char *pszName;
1583	uint32_t fFlag;
1584	} const s_aFlags[] =
1585	{
1586	#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1587	FCW_ENTRY(IM),
1588	FCW_ENTRY(DM),
1589	FCW_ENTRY(ZM),
1590	FCW_ENTRY(OM),
1591	FCW_ENTRY(UM),
1592	FCW_ENTRY(PM),
1593	{ "6M", 64 },
1594	};
1595	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1596	if (fFcw & s_aFlags[i].fFlag)
1597	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1598
1599	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1600	return pszBuf;
1601	}
1602
1603
1604	static const char *FormatMxcsr(uint32_t fMxcsr)
1605	{
1606	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1607
1608	const char pszRC = NULL; / (msc+gcc are too stupid) */
1609	switch (fMxcsr & X86_MXCSR_RC_MASK)
1610	{
1611	case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1612	case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1613	case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1614	case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1615	}
1616
1617	const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1618	const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1619	size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1620
1621	static struct
1622	{
1623	const char *pszName;
1624	uint32_t fFlag;
1625	} const s_aFlags[] =
1626	{
1627	#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1628	MXCSR_ENTRY(IE),
1629	MXCSR_ENTRY(DE),
1630	MXCSR_ENTRY(ZE),
1631	MXCSR_ENTRY(OE),
1632	MXCSR_ENTRY(UE),
1633	MXCSR_ENTRY(PE),
1634
1635	MXCSR_ENTRY(IM),
1636	MXCSR_ENTRY(DM),
1637	MXCSR_ENTRY(ZM),
1638	MXCSR_ENTRY(OM),
1639	MXCSR_ENTRY(UM),
1640	MXCSR_ENTRY(PM),
1641	{ "6M", 64 },
1642	};
1643	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1644	if (fMxcsr & s_aFlags[i].fFlag)
1645	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1646
1647	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1648	return pszBuf;
1649	}
1650
1651
1652	static const char *FormatR80(PCRTFLOAT80U pr80)
1653	{
1654	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1655	RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1656	return pszBuf;
1657	}
1658
1659
1660	static const char *FormatR64(PCRTFLOAT64U pr64)
1661	{
1662	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1663	RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1664	return pszBuf;
1665	}
1666
1667
1668	static const char *FormatR32(PCRTFLOAT32U pr32)
1669	{
1670	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1671	RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1672	return pszBuf;
1673	}
1674
1675
1676	static const char *FormatD80(PCRTPBCD80U pd80)
1677	{
1678	/* There is only one indefinite endcoding (same as for 80-bit
1679	floating point), so get it out of the way first: */
1680	if (RTPBCD80U_IS_INDEFINITE(pd80))
1681	return "Ind";
1682
1683	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1684	size_t off = 0;
1685	pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1686	unsigned cBadDigits = 0;
1687	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1688	while (iPair-- > 0)
1689	{
1690	static const char s_szDigits[] = "0123456789abcdef";
1691	static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1692	pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1693	pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1694	cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1695	+ s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1696	}
1697	if (cBadDigits \|\| pd80->s.uPad != 0)
1698	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1699	pszBuf[off] = '\0';
1700	return pszBuf;
1701	}
1702
1703
1704	#if 0
1705	static const char FormatI64(int64_t const piVal)
1706	{
1707	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1708	RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1709	return pszBuf;
1710	}
1711	#endif
1712
1713
1714	static const char FormatI32(int32_t const piVal)
1715	{
1716	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1717	RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1718	return pszBuf;
1719	}
1720
1721
1722	static const char FormatI16(int16_t const piVal)
1723	{
1724	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1725	RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1726	return pszBuf;
1727	}
1728
1729
1730	static const char *FormatU128(PCRTUINT128U puVal)
1731	{
1732	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1733	RTStrFormatU128(pszBuf, sizeof(g_aszBuf[0]), puVal, 16, 0, 0, RTSTR_F_SPECIAL);
1734	return pszBuf;
1735	}
1736
1737
1738	/*
1739	* Binary operations.
1740	*/
1741	TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1742	TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1743	TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1744	TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1745
1746	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1747	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1748	static RTEXITCODE BinU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
1749	{ \
1750	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1751	{ \
1752	PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1753	? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1754	IEMBINARYOUTPUT BinOut; \
1755	if ( g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1756	&& g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1757	continue; \
1758	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aBinU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
1759	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1760	{ \
1761	a_TestType Test; \
1762	Test.fEflIn = RandEFlags(); \
1763	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1764	Test.uDstOut = Test.uDstIn; \
1765	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1766	if (g_aBinU ## a_cBits[iFn].uExtra) \
1767	Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1768	Test.uMisc = 0; \
1769	Test.fEflOut = pfn(Test.fEflIn, &Test.uDstOut, Test.uSrcIn); \
1770	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1771	} \
1772	for (uint32_t iTest = 0; iTest < g_aBinU ## a_cBits[iFn].cFixedTests; iTest++ ) \
1773	{ \
1774	a_TestType Test; \
1775	Test.fEflIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
1776	: g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn; \
1777	Test.uDstIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uDstIn; \
1778	Test.uDstOut = Test.uDstIn; \
1779	Test.uSrcIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uSrcIn; \
1780	Test.uMisc = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uMisc; \
1781	Test.fEflOut = pfn(Test.fEflIn, &Test.uDstOut, Test.uSrcIn); \
1782	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1783	} \
1784	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1785	} \
1786	return RTEXITCODE_SUCCESS; \
1787	} \
1788	DUMP_ALL_FN(BinU ## a_cBits, g_aBinU ## a_cBits)
1789
1790	#else
1791	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1792	#endif
1793
1794
1795	/** Based on a quick probe run, guess how long to run the benchmark. */
1796	static uint32_t EstimateIterations(uint32_t cProbeIterations, uint64_t cNsProbe)
1797	{
1798	uint64_t cPicoSecPerIteration = cNsProbe * 1000 / cProbeIterations;
1799	uint64_t cIterations = g_cPicoSecBenchmark / cPicoSecPerIteration;
1800	if (cIterations > _2G)
1801	return _2G;
1802	if (cIterations < _4K)
1803	return _4K;
1804	return RT_ALIGN_32((uint32_t)cIterations, _4K);
1805	}
1806
1807
1808	#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1809	GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1810	\
1811	static uint64_t BinU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLBINU ## a_cBits pfn, a_TestType const *pEntry) \
1812	{ \
1813	uint32_t const fEflIn = pEntry->fEflIn; \
1814	a_uType const uDstIn = pEntry->uDstIn; \
1815	a_uType const uSrcIn = pEntry->uSrcIn; \
1816	cIterations /= 4; \
1817	RTThreadYield(); \
1818	uint64_t const nsStart = RTTimeNanoTS(); \
1819	for (uint32_t i = 0; i < cIterations; i++) \
1820	{ \
1821	a_uType uBenchDst = uDstIn; \
1822	pfn(fEflIn, &uBenchDst, uSrcIn); \
1823	\
1824	uBenchDst = uDstIn; \
1825	pfn(fEflIn, &uBenchDst, uSrcIn); \
1826	\
1827	uBenchDst = uDstIn; \
1828	pfn(fEflIn, &uBenchDst, uSrcIn); \
1829	\
1830	uBenchDst = uDstIn; \
1831	pfn(fEflIn, &uBenchDst, uSrcIn); \
1832	} \
1833	return RTTimeNanoTS() - nsStart; \
1834	} \
1835	\
1836	static void BinU ## a_cBits ## Test(void) \
1837	{ \
1838	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1839	{ \
1840	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
1841	continue; \
1842	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1843	uint32_t const cTests = a_aSubTests[iFn].cTests; \
1844	PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1845	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1846	if (!cTests) { RTTestSkipped(g_hTest, "no tests"); continue; } \
1847	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1848	{ \
1849	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1850	{ \
1851	a_uType uDst = paTests[iTest].uDstIn; \
1852	uint32_t fEfl = pfn(paTests[iTest].fEflIn, &uDst, paTests[iTest].uSrcIn); \
1853	if ( uDst != paTests[iTest].uDstOut \
1854	\|\| fEfl != paTests[iTest].fEflOut) \
1855	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1856	iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1857	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1858	EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1859	uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1860	else \
1861	{ \
1862	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1863	fEfl = pfn(paTests[iTest].fEflIn, g_pu ## a_cBits, paTests[iTest].uSrcIn); \
1864	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1865	RTTEST_CHECK(g_hTest, fEfl == paTests[iTest].fEflOut); \
1866	} \
1867	} \
1868	\
1869	/* Benchmark if all succeeded. */ \
1870	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
1871	{ \
1872	uint32_t const iTest = cTests / 2; \
1873	uint32_t const cIterations = EstimateIterations(_64K, BinU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
1874	uint64_t const cNsRealRun = BinU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
1875	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
1876	"%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
1877	} \
1878	\
1879	/* Next variation is native. */ \
1880	pfn = a_aSubTests[iFn].pfnNative; \
1881	} \
1882	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
1883	} \
1884	}
1885
1886
1887	/*
1888	* 8-bit binary operations.
1889	*/
1890	static BINU8_T g_aBinU8[] =
1891	{
1892	ENTRY_BIN(add_u8),
1893	ENTRY_BIN(add_u8_locked),
1894	ENTRY_BIN(adc_u8),
1895	ENTRY_BIN(adc_u8_locked),
1896	ENTRY_BIN(sub_u8),
1897	ENTRY_BIN(sub_u8_locked),
1898	ENTRY_BIN(sbb_u8),
1899	ENTRY_BIN(sbb_u8_locked),
1900	ENTRY_BIN(or_u8),
1901	ENTRY_BIN(or_u8_locked),
1902	ENTRY_BIN(xor_u8),
1903	ENTRY_BIN(xor_u8_locked),
1904	ENTRY_BIN(and_u8),
1905	ENTRY_BIN(and_u8_locked),
1906	ENTRY_BIN_PFN_CAST(cmp_u8, PFNIEMAIMPLBINU8),
1907	ENTRY_BIN_PFN_CAST(test_u8, PFNIEMAIMPLBINU8),
1908	};
1909	TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1910
1911
1912	/*
1913	* 16-bit binary operations.
1914	*/
1915	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1916	static const BINU16_TEST_T g_aFixedTests_add_u16[] =
1917	{
1918	/* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1919	{ UINT32_MAX, 0, 1, 0, UINT16_MAX, 0 },
1920	};
1921	#endif
1922	static BINU16_T g_aBinU16[] =
1923	{
1924	ENTRY_BIN_FIX(add_u16),
1925	ENTRY_BIN(add_u16_locked),
1926	ENTRY_BIN(adc_u16),
1927	ENTRY_BIN(adc_u16_locked),
1928	ENTRY_BIN(sub_u16),
1929	ENTRY_BIN(sub_u16_locked),
1930	ENTRY_BIN(sbb_u16),
1931	ENTRY_BIN(sbb_u16_locked),
1932	ENTRY_BIN(or_u16),
1933	ENTRY_BIN(or_u16_locked),
1934	ENTRY_BIN(xor_u16),
1935	ENTRY_BIN(xor_u16_locked),
1936	ENTRY_BIN(and_u16),
1937	ENTRY_BIN(and_u16_locked),
1938	ENTRY_BIN_PFN_CAST(cmp_u16, PFNIEMAIMPLBINU16),
1939	ENTRY_BIN_PFN_CAST(test_u16, PFNIEMAIMPLBINU16),
1940	ENTRY_BIN_PFN_CAST_EX(bt_u16, PFNIEMAIMPLBINU16, 1),
1941	ENTRY_BIN_EX(btc_u16, 1),
1942	ENTRY_BIN_EX(btc_u16_locked, 1),
1943	ENTRY_BIN_EX(btr_u16, 1),
1944	ENTRY_BIN_EX(btr_u16_locked, 1),
1945	ENTRY_BIN_EX(bts_u16, 1),
1946	ENTRY_BIN_EX(bts_u16_locked, 1),
1947	ENTRY_BIN_AMD( bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1948	ENTRY_BIN_INTEL(bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1949	ENTRY_BIN_AMD( bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1950	ENTRY_BIN_INTEL(bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1951	ENTRY_BIN_AMD( imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1952	ENTRY_BIN_INTEL(imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1953	ENTRY_BIN(arpl),
1954	};
1955	TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1956
1957
1958	/*
1959	* 32-bit binary operations.
1960	*/
1961	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1962	static const BINU32_TEST_T g_aFixedTests_add_u32[] =
1963	{
1964	/* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1965	{ UINT32_MAX, 0, 1, 0, UINT32_MAX, 0 },
1966	};
1967	#endif
1968	static BINU32_T g_aBinU32[] =
1969	{
1970	ENTRY_BIN_FIX(add_u32),
1971	ENTRY_BIN(add_u32_locked),
1972	ENTRY_BIN(adc_u32),
1973	ENTRY_BIN(adc_u32_locked),
1974	ENTRY_BIN(sub_u32),
1975	ENTRY_BIN(sub_u32_locked),
1976	ENTRY_BIN(sbb_u32),
1977	ENTRY_BIN(sbb_u32_locked),
1978	ENTRY_BIN(or_u32),
1979	ENTRY_BIN(or_u32_locked),
1980	ENTRY_BIN(xor_u32),
1981	ENTRY_BIN(xor_u32_locked),
1982	ENTRY_BIN(and_u32),
1983	ENTRY_BIN(and_u32_locked),
1984	ENTRY_BIN_PFN_CAST(cmp_u32, PFNIEMAIMPLBINU32),
1985	ENTRY_BIN_PFN_CAST(test_u32, PFNIEMAIMPLBINU32),
1986	ENTRY_BIN_PFN_CAST_EX(bt_u32, PFNIEMAIMPLBINU32, 1),
1987	ENTRY_BIN_EX(btc_u32, 1),
1988	ENTRY_BIN_EX(btc_u32_locked, 1),
1989	ENTRY_BIN_EX(btr_u32, 1),
1990	ENTRY_BIN_EX(btr_u32_locked, 1),
1991	ENTRY_BIN_EX(bts_u32, 1),
1992	ENTRY_BIN_EX(bts_u32_locked, 1),
1993	ENTRY_BIN_AMD( bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1994	ENTRY_BIN_INTEL(bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1995	ENTRY_BIN_AMD( bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1996	ENTRY_BIN_INTEL(bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1997	ENTRY_BIN_AMD( imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1998	ENTRY_BIN_INTEL(imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1999	ENTRY_BIN(adcx_u32),
2000	ENTRY_BIN(adox_u32),
2001	};
2002	TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
2003
2004
2005	/*
2006	* 64-bit binary operations.
2007	*/
2008	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2009	static const BINU64_TEST_T g_aFixedTests_add_u64[] =
2010	{
2011	/* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
2012	{ UINT32_MAX, 0, 1, 0, UINT64_MAX, 0 },
2013	};
2014	#endif
2015	static BINU64_T g_aBinU64[] =
2016	{
2017	ENTRY_BIN_FIX(add_u64),
2018	ENTRY_BIN(add_u64_locked),
2019	ENTRY_BIN(adc_u64),
2020	ENTRY_BIN(adc_u64_locked),
2021	ENTRY_BIN(sub_u64),
2022	ENTRY_BIN(sub_u64_locked),
2023	ENTRY_BIN(sbb_u64),
2024	ENTRY_BIN(sbb_u64_locked),
2025	ENTRY_BIN(or_u64),
2026	ENTRY_BIN(or_u64_locked),
2027	ENTRY_BIN(xor_u64),
2028	ENTRY_BIN(xor_u64_locked),
2029	ENTRY_BIN(and_u64),
2030	ENTRY_BIN(and_u64_locked),
2031	ENTRY_BIN_PFN_CAST(cmp_u64, PFNIEMAIMPLBINU64),
2032	ENTRY_BIN_PFN_CAST(test_u64, PFNIEMAIMPLBINU64),
2033	ENTRY_BIN_PFN_CAST_EX(bt_u64, PFNIEMAIMPLBINU64, 1),
2034	ENTRY_BIN_EX(btc_u64, 1),
2035	ENTRY_BIN_EX(btc_u64_locked, 1),
2036	ENTRY_BIN_EX(btr_u64, 1),
2037	ENTRY_BIN_EX(btr_u64_locked, 1),
2038	ENTRY_BIN_EX(bts_u64, 1),
2039	ENTRY_BIN_EX(bts_u64_locked, 1),
2040	ENTRY_BIN_AMD( bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
2041	ENTRY_BIN_INTEL(bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
2042	ENTRY_BIN_AMD( bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
2043	ENTRY_BIN_INTEL(bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
2044	ENTRY_BIN_AMD( imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
2045	ENTRY_BIN_INTEL(imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
2046	ENTRY_BIN(adcx_u64),
2047	ENTRY_BIN(adox_u64),
2048	/** @todo popcnt */
2049	/** @todo tzcnt */
2050	/** @todo lzcnt */
2051	};
2052	TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
2053
2054
2055	/*
2056	* XCHG
2057	*/
2058	static void XchgTest(void)
2059	{
2060	if (!SubTestAndCheckIfEnabled("xchg"))
2061	return;
2062	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t pu8Mem, uint8_t pu8Reg));
2063	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t pu16Mem, uint16_t pu16Reg));
2064	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t pu32Mem, uint32_t pu32Reg));
2065	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t pu64Mem, uint64_t pu64Reg));
2066
2067	static struct
2068	{
2069	uint8_t cb; uint64_t fMask;
2070	union
2071	{
2072	uintptr_t pfn;
2073	FNIEMAIMPLXCHGU8 *pfnU8;
2074	FNIEMAIMPLXCHGU16 *pfnU16;
2075	FNIEMAIMPLXCHGU32 *pfnU32;
2076	FNIEMAIMPLXCHGU64 *pfnU64;
2077	} u;
2078	}
2079	s_aXchgWorkers[] =
2080	{
2081	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
2082	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
2083	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
2084	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
2085	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
2086	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
2087	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
2088	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
2089	};
2090	for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
2091	{
2092	RTUINT64U uIn1, uIn2, uMem, uDst;
2093	uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
2094	uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
2095	if (uIn1.u == uIn2.u)
2096	uDst.u = uIn2.u = ~uIn2.u;
2097
2098	switch (s_aXchgWorkers[i].cb)
2099	{
2100	case 1:
2101	s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
2102	s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
2103	break;
2104	case 2:
2105	s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
2106	s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
2107	break;
2108	case 4:
2109	s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
2110	s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
2111	break;
2112	case 8:
2113	s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
2114	s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
2115	break;
2116	default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
2117	}
2118
2119	if (uMem.u != uIn2.u \|\| uDst.u != uIn1.u)
2120	RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
2121	}
2122	}
2123
2124
2125	/*
2126	* XADD
2127	*/
2128	static void XaddTest(void)
2129	{
2130	#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
2131	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type , a_Type , uint32_t *)); \
2132	static struct \
2133	{ \
2134	const char * const pszName; \
2135	FNIEMAIMPLXADDU ## a_cBits * const pfn; \
2136	void const * const pvCompressedTests; \
2137	uint32_t const * const pcbCompressedTests; \
2138	BINU ## a_cBits ## _TEST_T const *paTests; \
2139	uint32_t cTests; \
2140	IEMTESTENTRYINFO Info; \
2141	} s_aFuncs[] = \
2142	{ \
2143	{ "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
2144	g_abTests_add_u ## a_cBits, &g_cbTests_add_u ## a_cBits }, \
2145	{ "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
2146	g_abTests_add_u ## a_cBits, &g_cbTests_add_u ## a_cBits }, \
2147	}; \
2148	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
2149	{ \
2150	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(s_aFuncs[iFn])) continue; \
2151	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
2152	uint32_t const cTests = s_aFuncs[iFn].cTests; \
2153	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2154	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2155	{ \
2156	uint32_t fEfl = paTests[iTest].fEflIn; \
2157	a_Type uSrc = paTests[iTest].uSrcIn; \
2158	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
2159	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
2160	if ( fEfl != paTests[iTest].fEflOut \
2161	\|\| *g_pu ## a_cBits != paTests[iTest].uDstOut \
2162	\|\| uSrc != paTests[iTest].uDstIn) \
2163	RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2164	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
2165	fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
2166	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2167	} \
2168	FREE_DECOMPRESSED_TESTS(s_aFuncs[iFn]); \
2169	} \
2170	} while(0)
2171	TEST_XADD(8, uint8_t, "%#04x");
2172	TEST_XADD(16, uint16_t, "%#06x");
2173	TEST_XADD(32, uint32_t, "%#010RX32");
2174	TEST_XADD(64, uint64_t, "%#010RX64");
2175	}
2176
2177
2178	/*
2179	* CMPXCHG
2180	*/
2181
2182	static void CmpXchgTest(void)
2183	{
2184	#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
2185	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type , a_Type , a_Type, uint32_t *)); \
2186	static struct \
2187	{ \
2188	const char * const pszName; \
2189	FNIEMAIMPLCMPXCHGU ## a_cBits * const pfn; \
2190	PFNIEMAIMPLBINU ## a_cBits const pfnSub; \
2191	void const * const pvCompressedTests; \
2192	uint32_t const * const pcbCompressedTests; \
2193	BINU ## a_cBits ## _TEST_T const *paTests; \
2194	uint32_t cTests; \
2195	IEMTESTENTRYINFO Info; \
2196	} s_aFuncs[] = \
2197	{ \
2198	{ "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
2199	g_abTests_cmp_u ## a_cBits, &g_cbTests_cmp_u ## a_cBits }, \
2200	{ "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
2201	g_abTests_cmp_u ## a_cBits, &g_cbTests_cmp_u ## a_cBits }, \
2202	}; \
2203	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
2204	{ \
2205	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(s_aFuncs[iFn])) continue; \
2206	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
2207	uint32_t const cTests = s_aFuncs[iFn].cTests; \
2208	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2209	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2210	{ \
2211	/* as is (99% likely to be negative). */ \
2212	uint32_t fEfl = paTests[iTest].fEflIn; \
2213	a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
2214	a_Type uA = paTests[iTest].uDstIn; \
2215	*g_pu ## a_cBits = paTests[iTest].uSrcIn; \
2216	a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
2217	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2218	if ( fEfl != paTests[iTest].fEflOut \
2219	\|\| *g_pu ## a_cBits != uExpect \
2220	\|\| uA != paTests[iTest].uSrcIn) \
2221	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2222	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
2223	uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
2224	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2225	/* positive */ \
2226	uA = paTests[iTest].uDstIn; \
2227	uint32_t fEflExpect = s_aFuncs[iFn].pfnSub(paTests[iTest].fEflIn, &uA, uA); \
2228	fEfl = paTests[iTest].fEflIn; \
2229	uA = paTests[iTest].uDstIn; \
2230	*g_pu ## a_cBits = uA; \
2231	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2232	if ( fEfl != fEflExpect \
2233	\|\| *g_pu ## a_cBits != uNew \
2234	\|\| uA != paTests[iTest].uDstIn) \
2235	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2236	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
2237	uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
2238	EFlagsDiff(fEfl, fEflExpect)); \
2239	} \
2240	FREE_DECOMPRESSED_TESTS(s_aFuncs[iFn]); \
2241	} \
2242	} while(0)
2243	TEST_CMPXCHG(8, uint8_t, "%#04RX8");
2244	TEST_CMPXCHG(16, uint16_t, "%#06x");
2245	TEST_CMPXCHG(32, uint32_t, "%#010RX32");
2246	#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
2247	TEST_CMPXCHG(64, uint64_t, "%#010RX64");
2248	#endif
2249	}
2250
2251
2252	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t , PRTUINT64U, PRTUINT64U, uint32_t ));
2253
2254	static uint64_t CmpXchg8bBench(uint32_t cIterations, FNIEMAIMPLCMPXCHG8B *pfn, uint64_t const uDstValue,
2255	uint64_t const uOldValue, uint64_t const uNewValue, uint32_t const fEflIn)
2256	{
2257	cIterations /= 4;
2258	RTThreadYield();
2259	uint64_t const nsStart = RTTimeNanoTS();
2260	for (uint32_t i = 0; i < cIterations; i++)
2261	{
2262	RTUINT64U uA, uB;
2263	uint32_t fEfl = fEflIn;
2264	uint64_t uDst = uDstValue;
2265	uB.u = uNewValue;
2266	uA.u = uOldValue;
2267	pfn(&uDst, &uA, &uB, &fEfl);
2268
2269	fEfl = fEflIn;
2270	uDst = uDstValue;
2271	uB.u = uNewValue;
2272	uA.u = uOldValue;
2273	pfn(&uDst, &uA, &uB, &fEfl);
2274
2275	fEfl = fEflIn;
2276	uDst = uDstValue;
2277	uB.u = uNewValue;
2278	uA.u = uOldValue;
2279	pfn(&uDst, &uA, &uB, &fEfl);
2280
2281	fEfl = fEflIn;
2282	uDst = uDstValue;
2283	uB.u = uNewValue;
2284	uA.u = uOldValue;
2285	pfn(&uDst, &uA, &uB, &fEfl);
2286	}
2287	return RTTimeNanoTS() - nsStart;
2288	}
2289
2290	static void CmpXchg8bTest(void)
2291	{
2292	static struct
2293	{
2294	const char *pszName;
2295	FNIEMAIMPLCMPXCHG8B *pfn;
2296	} const s_aFuncs[] =
2297	{
2298	{ "cmpxchg8b", iemAImpl_cmpxchg8b },
2299	{ "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
2300	};
2301	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2302	{
2303	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2304	continue;
2305	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2306	{
2307	uint64_t const uOldValue = RandU64();
2308	uint64_t const uNewValue = RandU64();
2309
2310	/* positive test. */
2311	RTUINT64U uA, uB;
2312	uB.u = uNewValue;
2313	uA.u = uOldValue;
2314	*g_pu64 = uOldValue;
2315	uint32_t fEflIn = RandEFlags();
2316	uint32_t fEfl = fEflIn;
2317	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2318	if ( fEfl != (fEflIn \| X86_EFL_ZF)
2319	\|\| *g_pu64 != uNewValue
2320	\|\| uA.u != uOldValue)
2321	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2322	iTest, fEflIn, uOldValue, uOldValue, uNewValue,
2323	fEfl, *g_pu64, uA.u,
2324	(fEflIn \| X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
2325	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2326
2327	/* negative */
2328	uint64_t const uExpect = ~uOldValue;
2329	*g_pu64 = uExpect;
2330	uA.u = uOldValue;
2331	uB.u = uNewValue;
2332	fEfl = fEflIn = RandEFlags();
2333	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2334	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2335	\|\| *g_pu64 != uExpect
2336	\|\| uA.u != uExpect)
2337	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2338	iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
2339	fEfl, *g_pu64, uA.u,
2340	(fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2341	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2342
2343	if (iTest == 2 && g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0)
2344	{
2345	uint32_t cIterations = EstimateIterations(_64K, CmpXchg8bBench(_64K, s_aFuncs[iFn].pfn,
2346	uOldValue, uOldValue, uNewValue, fEflIn));
2347	uint64_t cNsRealRun = CmpXchg8bBench(cIterations, s_aFuncs[iFn].pfn, uOldValue, uOldValue, uNewValue, fEflIn);
2348	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL,
2349	"%s-positive", s_aFuncs[iFn].pszName);
2350
2351	cIterations = EstimateIterations(_64K, CmpXchg8bBench(_64K, s_aFuncs[iFn].pfn,
2352	~uOldValue, uOldValue, uNewValue, fEflIn));
2353	cNsRealRun = CmpXchg8bBench(cIterations, s_aFuncs[iFn].pfn, ~uOldValue, uOldValue, uNewValue, fEflIn);
2354	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL,
2355	"%s-negative", s_aFuncs[iFn].pszName);
2356	}
2357	}
2358	}
2359	}
2360
2361	static void CmpXchg16bTest(void)
2362	{
2363	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
2364	static struct
2365	{
2366	const char *pszName;
2367	FNIEMAIMPLCMPXCHG16B *pfn;
2368	} const s_aFuncs[] =
2369	{
2370	{ "cmpxchg16b", iemAImpl_cmpxchg16b },
2371	{ "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
2372	#if !defined(RT_ARCH_ARM64)
2373	{ "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
2374	#endif
2375	};
2376	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2377	{
2378	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2379	continue;
2380	#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
2381	if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
2382	{
2383	RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
2384	continue;
2385	}
2386	#endif
2387	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2388	{
2389	RTUINT128U const uOldValue = RandU128();
2390	RTUINT128U const uNewValue = RandU128();
2391
2392	/* positive test. */
2393	RTUINT128U uA, uB;
2394	uB = uNewValue;
2395	uA = uOldValue;
2396	*g_pu128 = uOldValue;
2397	uint32_t fEflIn = RandEFlags();
2398	uint32_t fEfl = fEflIn;
2399	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2400	if ( fEfl != (fEflIn \| X86_EFL_ZF)
2401	\|\| g_pu128->s.Lo != uNewValue.s.Lo
2402	\|\| g_pu128->s.Hi != uNewValue.s.Hi
2403	\|\| uA.s.Lo != uOldValue.s.Lo
2404	\|\| uA.s.Hi != uOldValue.s.Hi)
2405	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2406	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2407	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2408	iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2409	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2410	(fEflIn \| X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
2411	EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
2412	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2413
2414	/* negative */
2415	RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
2416	*g_pu128 = uExpect;
2417	uA = uOldValue;
2418	uB = uNewValue;
2419	fEfl = fEflIn = RandEFlags();
2420	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2421	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2422	\|\| g_pu128->s.Lo != uExpect.s.Lo
2423	\|\| g_pu128->s.Hi != uExpect.s.Hi
2424	\|\| uA.s.Lo != uExpect.s.Lo
2425	\|\| uA.s.Hi != uExpect.s.Hi)
2426	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2427	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2428	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2429	iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2430	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2431	(fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
2432	EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2433	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2434	}
2435	}
2436	}
2437
2438
2439	/*
2440	* Double shifts.
2441	*
2442	* Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
2443	*/
2444	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2445	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2446	static RTEXITCODE ShiftDblU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2447	{ \
2448	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2449	{ \
2450	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2451	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2452	continue; \
2453	IEMBINARYOUTPUT BinOut; \
2454	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2455	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2456	{ \
2457	a_TestType Test; \
2458	Test.fEflIn = RandEFlags(); \
2459	Test.fEflOut = Test.fEflIn; \
2460	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2461	Test.uDstOut = Test.uDstIn; \
2462	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2463	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2464	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
2465	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2466	} \
2467	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2468	} \
2469	return RTEXITCODE_SUCCESS; \
2470	} \
2471	DUMP_ALL_FN(ShiftDblU ## a_cBits, a_aSubTests)
2472
2473	#else
2474	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2475	#endif
2476
2477	#define TEST_SHIFT_DBL(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2478	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
2479	\
2480	static a_SubTestType a_aSubTests[] = \
2481	{ \
2482	ENTRY_BIN_AMD(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
2483	ENTRY_BIN_INTEL(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
2484	ENTRY_BIN_AMD(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
2485	ENTRY_BIN_INTEL(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
2486	}; \
2487	\
2488	GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2489	\
2490	static uint64_t ShiftDblU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn, a_TestType const *pEntry) \
2491	{ \
2492	uint32_t const fEflIn = pEntry->fEflIn; \
2493	a_uType const uDstIn = pEntry->uDstIn; \
2494	a_uType const uSrcIn = pEntry->uSrcIn; \
2495	a_uType const cShift = pEntry->uMisc; \
2496	cIterations /= 4; \
2497	RTThreadYield(); \
2498	uint64_t const nsStart = RTTimeNanoTS(); \
2499	for (uint32_t i = 0; i < cIterations; i++) \
2500	{ \
2501	uint32_t fBenchEfl = fEflIn; \
2502	a_uType uBenchDst = uDstIn; \
2503	pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2504	\
2505	fBenchEfl = fEflIn; \
2506	uBenchDst = uDstIn; \
2507	pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2508	\
2509	fBenchEfl = fEflIn; \
2510	uBenchDst = uDstIn; \
2511	pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2512	\
2513	fBenchEfl = fEflIn; \
2514	uBenchDst = uDstIn; \
2515	pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2516	} \
2517	return RTTimeNanoTS() - nsStart; \
2518	} \
2519	\
2520	static void ShiftDblU ## a_cBits ## Test(void) \
2521	{ \
2522	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2523	{ \
2524	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2525	continue; \
2526	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2527	uint32_t const cTests = a_aSubTests[iFn].cTests; \
2528	PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2529	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2530	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2531	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2532	{ \
2533	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2534	{ \
2535	uint32_t fEfl = paTests[iTest].fEflIn; \
2536	a_uType uDst = paTests[iTest].uDstIn; \
2537	pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
2538	if ( uDst != paTests[iTest].uDstOut \
2539	\|\| fEfl != paTests[iTest].fEflOut) \
2540	RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
2541	iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
2542	paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
2543	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2544	EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
2545	else \
2546	{ \
2547	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
2548	*g_pfEfl = paTests[iTest].fEflIn; \
2549	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
2550	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2551	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2552	} \
2553	} \
2554	\
2555	/* Benchmark if all succeeded. */ \
2556	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
2557	{ \
2558	uint32_t const iTest = cTests / 2; \
2559	uint32_t const cIterations = EstimateIterations(_64K, ShiftDblU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
2560	uint64_t const cNsRealRun = ShiftDblU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
2561	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
2562	"%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
2563	} \
2564	\
2565	/* Next variation is native. */ \
2566	pfn = a_aSubTests[iFn].pfnNative; \
2567	} \
2568	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
2569	} \
2570	}
2571	TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
2572	TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
2573	TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
2574
2575	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2576	static RTEXITCODE ShiftDblGenerate(uint32_t cTests, const char * const * papszNameFmts)
2577	{
2578	RTEXITCODE rcExit = ShiftDblU16Generate(cTests, papszNameFmts);
2579	if (rcExit == RTEXITCODE_SUCCESS)
2580	rcExit = ShiftDblU32Generate(cTests, papszNameFmts);
2581	if (rcExit == RTEXITCODE_SUCCESS)
2582	rcExit = ShiftDblU64Generate(cTests, papszNameFmts);
2583	return rcExit;
2584	}
2585
2586	static RTEXITCODE ShiftDblDumpAll(const char * const * papszNameFmts)
2587	{
2588	RTEXITCODE rcExit = ShiftDblU16DumpAll(papszNameFmts);
2589	if (rcExit == RTEXITCODE_SUCCESS)
2590	rcExit = ShiftDblU32DumpAll(papszNameFmts);
2591	if (rcExit == RTEXITCODE_SUCCESS)
2592	rcExit = ShiftDblU64DumpAll(papszNameFmts);
2593	return rcExit;
2594	}
2595	#endif
2596
2597	static void ShiftDblTest(void)
2598	{
2599	ShiftDblU16Test();
2600	ShiftDblU32Test();
2601	ShiftDblU64Test();
2602	}
2603
2604
2605	/*
2606	* Unary operators.
2607	*
2608	* Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
2609	*/
2610	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2611	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2612	static RTEXITCODE UnaryU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2613	{ \
2614	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2615	{ \
2616	IEMBINARYOUTPUT BinOut; \
2617	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aUnaryU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
2618	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2619	{ \
2620	a_TestType Test; \
2621	Test.fEflIn = RandEFlags(); \
2622	Test.fEflOut = Test.fEflIn; \
2623	Test.uDstIn = RandU ## a_cBits(); \
2624	Test.uDstOut = Test.uDstIn; \
2625	Test.uSrcIn = 0; \
2626	Test.uMisc = 0; \
2627	g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
2628	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2629	} \
2630	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2631	} \
2632	return RTEXITCODE_SUCCESS; \
2633	} \
2634	DUMP_ALL_FN(UnaryU ## a_cBits, g_aUnaryU ## a_cBits)
2635	#else
2636	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
2637	#endif
2638
2639	#define TEST_UNARY(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2640	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
2641	static a_SubTestType a_aSubTests[] = \
2642	{ \
2643	ENTRY_BIN(inc_u ## a_cBits), \
2644	ENTRY_BIN(inc_u ## a_cBits ## _locked), \
2645	ENTRY_BIN(dec_u ## a_cBits), \
2646	ENTRY_BIN(dec_u ## a_cBits ## _locked), \
2647	ENTRY_BIN(not_u ## a_cBits), \
2648	ENTRY_BIN(not_u ## a_cBits ## _locked), \
2649	ENTRY_BIN(neg_u ## a_cBits), \
2650	ENTRY_BIN(neg_u ## a_cBits ## _locked), \
2651	}; \
2652	\
2653	GEN_UNARY(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType) \
2654	\
2655	static uint64_t UnaryU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLUNARYU ## a_cBits pfn, a_TestType const *pEntry) \
2656	{ \
2657	uint32_t const fEflIn = pEntry->fEflIn; \
2658	a_uType const uDstIn = pEntry->uDstIn; \
2659	cIterations /= 4; \
2660	RTThreadYield(); \
2661	uint64_t const nsStart = RTTimeNanoTS(); \
2662	for (uint32_t i = 0; i < cIterations; i++) \
2663	{ \
2664	uint32_t fBenchEfl = fEflIn; \
2665	a_uType uBenchDst = uDstIn; \
2666	pfn(&uBenchDst, &fBenchEfl); \
2667	\
2668	fBenchEfl = fEflIn; \
2669	uBenchDst = uDstIn; \
2670	pfn(&uBenchDst, &fBenchEfl); \
2671	\
2672	fBenchEfl = fEflIn; \
2673	uBenchDst = uDstIn; \
2674	pfn(&uBenchDst, &fBenchEfl); \
2675	\
2676	fBenchEfl = fEflIn; \
2677	uBenchDst = uDstIn; \
2678	pfn(&uBenchDst, &fBenchEfl); \
2679	} \
2680	return RTTimeNanoTS() - nsStart; \
2681	} \
2682	\
2683	static void UnaryU ## a_cBits ## Test(void) \
2684	{ \
2685	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2686	{ \
2687	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2688	continue; \
2689	PFNIEMAIMPLUNARYU ## a_cBits const pfn = a_aSubTests[iFn].pfn; \
2690	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2691	uint32_t const cTests = a_aSubTests[iFn].cTests; \
2692	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2693	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2694	{ \
2695	uint32_t fEfl = paTests[iTest].fEflIn; \
2696	a_uType uDst = paTests[iTest].uDstIn; \
2697	pfn(&uDst, &fEfl); \
2698	if ( uDst != paTests[iTest].uDstOut \
2699	\|\| fEfl != paTests[iTest].fEflOut) \
2700	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2701	iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
2702	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2703	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2704	else \
2705	{ \
2706	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
2707	*g_pfEfl = paTests[iTest].fEflIn; \
2708	pfn(g_pu ## a_cBits, g_pfEfl); \
2709	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2710	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2711	} \
2712	} \
2713	\
2714	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
2715	{ \
2716	uint32_t const iTest = cTests / 2; \
2717	uint32_t const cIterations = EstimateIterations(_64K, UnaryU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
2718	uint64_t const cNsRealRun = UnaryU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
2719	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, "%s", a_aSubTests[iFn].pszName); \
2720	} \
2721	\
2722	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
2723	} \
2724	}
2725	TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T, g_aUnaryU8)
2726	TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T, g_aUnaryU16)
2727	TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T, g_aUnaryU32)
2728	TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T, g_aUnaryU64)
2729
2730	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2731	static RTEXITCODE UnaryGenerate(uint32_t cTests, const char * const * papszNameFmts)
2732	{
2733	RTEXITCODE rcExit = UnaryU8Generate(cTests, papszNameFmts);
2734	if (rcExit == RTEXITCODE_SUCCESS)
2735	rcExit = UnaryU16Generate(cTests, papszNameFmts);
2736	if (rcExit == RTEXITCODE_SUCCESS)
2737	rcExit = UnaryU32Generate(cTests, papszNameFmts);
2738	if (rcExit == RTEXITCODE_SUCCESS)
2739	rcExit = UnaryU64Generate(cTests, papszNameFmts);
2740	return rcExit;
2741	}
2742
2743	static RTEXITCODE UnaryDumpAll(const char * const * papszNameFmts)
2744	{
2745	RTEXITCODE rcExit = UnaryU8DumpAll(papszNameFmts);
2746	if (rcExit == RTEXITCODE_SUCCESS)
2747	rcExit = UnaryU16DumpAll(papszNameFmts);
2748	if (rcExit == RTEXITCODE_SUCCESS)
2749	rcExit = UnaryU32DumpAll(papszNameFmts);
2750	if (rcExit == RTEXITCODE_SUCCESS)
2751	rcExit = UnaryU64DumpAll(papszNameFmts);
2752	return rcExit;
2753	}
2754	#endif
2755
2756	static void UnaryTest(void)
2757	{
2758	UnaryU8Test();
2759	UnaryU16Test();
2760	UnaryU32Test();
2761	UnaryU64Test();
2762	}
2763
2764
2765	/*
2766	* Shifts.
2767	*
2768	* Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2769	*/
2770	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2771	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2772	static RTEXITCODE ShiftU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2773	{ \
2774	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2775	{ \
2776	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2777	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2778	continue; \
2779	IEMBINARYOUTPUT BinOut; \
2780	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2781	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2782	{ \
2783	a_TestType Test; \
2784	Test.fEflIn = RandEFlags(); \
2785	Test.fEflOut = Test.fEflIn; \
2786	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2787	Test.uDstOut = Test.uDstIn; \
2788	Test.uSrcIn = 0; \
2789	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2790	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2791	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2792	\
2793	Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK; \
2794	Test.fEflOut = Test.fEflIn; \
2795	Test.uDstOut = Test.uDstIn; \
2796	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2797	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2798	} \
2799	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2800	} \
2801	return RTEXITCODE_SUCCESS; \
2802	} \
2803	DUMP_ALL_FN(ShiftU ## a_cBits, a_aSubTests)
2804	#else
2805	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2806	#endif
2807
2808	#define TEST_SHIFT(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2809	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2810	static a_SubTestType a_aSubTests[] = \
2811	{ \
2812	ENTRY_BIN_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2813	ENTRY_BIN_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2814	ENTRY_BIN_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2815	ENTRY_BIN_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2816	ENTRY_BIN_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2817	ENTRY_BIN_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2818	ENTRY_BIN_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2819	ENTRY_BIN_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2820	ENTRY_BIN_AMD( shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2821	ENTRY_BIN_INTEL(shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2822	ENTRY_BIN_AMD( shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2823	ENTRY_BIN_INTEL(shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2824	ENTRY_BIN_AMD( sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2825	ENTRY_BIN_INTEL(sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2826	}; \
2827	\
2828	GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2829	\
2830	static uint64_t ShiftU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLSHIFTU ## a_cBits pfn, a_TestType const *pEntry) \
2831	{ \
2832	uint32_t const fEflIn = pEntry->fEflIn; \
2833	a_uType const uDstIn = pEntry->uDstIn; \
2834	a_uType const cShift = pEntry->uMisc; \
2835	cIterations /= 4; \
2836	RTThreadYield(); \
2837	uint64_t const nsStart = RTTimeNanoTS(); \
2838	for (uint32_t i = 0; i < cIterations; i++) \
2839	{ \
2840	uint32_t fBenchEfl = fEflIn; \
2841	a_uType uBenchDst = uDstIn; \
2842	pfn(&uBenchDst, cShift, &fBenchEfl); \
2843	\
2844	fBenchEfl = fEflIn; \
2845	uBenchDst = uDstIn; \
2846	pfn(&uBenchDst, cShift, &fBenchEfl); \
2847	\
2848	fBenchEfl = fEflIn; \
2849	uBenchDst = uDstIn; \
2850	pfn(&uBenchDst, cShift, &fBenchEfl); \
2851	\
2852	fBenchEfl = fEflIn; \
2853	uBenchDst = uDstIn; \
2854	pfn(&uBenchDst, cShift, &fBenchEfl); \
2855	} \
2856	return RTTimeNanoTS() - nsStart; \
2857	} \
2858	\
2859	static void ShiftU ## a_cBits ## Test(void) \
2860	{ \
2861	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2862	{ \
2863	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2864	continue; \
2865	PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2866	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2867	uint32_t const cTests = a_aSubTests[iFn].cTests; \
2868	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2869	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2870	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2871	{ \
2872	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2873	{ \
2874	uint32_t fEfl = paTests[iTest].fEflIn; \
2875	a_uType uDst = paTests[iTest].uDstIn; \
2876	pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2877	if ( uDst != paTests[iTest].uDstOut \
2878	\|\| fEfl != paTests[iTest].fEflOut ) \
2879	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2880	iTest, iVar == 0 ? "" : "/n", \
2881	paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2882	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2883	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2884	else \
2885	{ \
2886	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
2887	*g_pfEfl = paTests[iTest].fEflIn; \
2888	pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2889	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2890	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2891	} \
2892	} \
2893	\
2894	/* Benchmark if all succeeded. */ \
2895	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
2896	{ \
2897	uint32_t const iTest = cTests / 2; \
2898	uint32_t const cIterations = EstimateIterations(_64K, ShiftU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
2899	uint64_t const cNsRealRun = ShiftU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
2900	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
2901	"%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
2902	} \
2903	\
2904	/* Next variation is native. */ \
2905	pfn = a_aSubTests[iFn].pfnNative; \
2906	} \
2907	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
2908	} \
2909	}
2910	TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2911	TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2912	TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2913	TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2914
2915	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2916	static RTEXITCODE ShiftGenerate(uint32_t cTests, const char * const * papszNameFmts)
2917	{
2918	RTEXITCODE rcExit = ShiftU8Generate(cTests, papszNameFmts);
2919	if (rcExit == RTEXITCODE_SUCCESS)
2920	rcExit = ShiftU16Generate(cTests, papszNameFmts);
2921	if (rcExit == RTEXITCODE_SUCCESS)
2922	rcExit = ShiftU32Generate(cTests, papszNameFmts);
2923	if (rcExit == RTEXITCODE_SUCCESS)
2924	rcExit = ShiftU64Generate(cTests, papszNameFmts);
2925	return rcExit;
2926	}
2927
2928	static RTEXITCODE ShiftDumpAll(const char * const * papszNameFmts)
2929	{
2930	RTEXITCODE rcExit = ShiftU8DumpAll(papszNameFmts);
2931	if (rcExit == RTEXITCODE_SUCCESS)
2932	rcExit = ShiftU16DumpAll(papszNameFmts);
2933	if (rcExit == RTEXITCODE_SUCCESS)
2934	rcExit = ShiftU32DumpAll(papszNameFmts);
2935	if (rcExit == RTEXITCODE_SUCCESS)
2936	rcExit = ShiftU64DumpAll(papszNameFmts);
2937	return rcExit;
2938	}
2939	#endif
2940
2941	static void ShiftTest(void)
2942	{
2943	ShiftU8Test();
2944	ShiftU16Test();
2945	ShiftU32Test();
2946	ShiftU64Test();
2947	}
2948
2949
2950	/*
2951	* Multiplication and division.
2952	*
2953	* Note! The 8-bit functions has a different format, so we need to duplicate things.
2954	* Note! Currently ignoring undefined bits.
2955	*/
2956
2957	/* U8 */
2958	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2959	static const MULDIVU8_TEST_T g_aFixedTests_idiv_u8[] =
2960	{
2961	/* efl in, efl out, uDstIn, uDstOut, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
2962	{ UINT32_MAX, 0, 0x8000, 0, 0xc7, -1 }, /* -32768 / -57 = #DE (574.8771929824...) */
2963	{ UINT32_MAX, 0, 0x8000, 0, 0xdd, -128 }, /* -32768 / -35 = #DE (936.2285714285...) */
2964	{ UINT32_MAX, 0, 0x7f00, 0, 0x7f, -1 }, /* 0x7f00 / 0x7f = #DE (0x100) */
2965	{ UINT32_MAX, 0, 0x3f80, 0, 0x7f, -1 }, /* 0x3F80 / 0x7f = #DE (0x80) */
2966	{ UINT32_MAX, 0, 0x3f7f, 0, 0x7f, 0 }, /* 0x3F7F / 0x7f = 127.992125984... */
2967	{ UINT32_MAX, 0, 0xc000, 0, 0x80, -1 }, /* -16384 / -128 = #DE (0x80) */
2968	{ UINT32_MAX, 0, 0xc001, 0, 0x80, 0 }, /* -16383 / -128 = 127.9921875 */
2969	};
2970	#endif
2971	TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2972	static INT_MULDIV_U8_T g_aMulDivU8[] =
2973	{
2974	ENTRY_BIN_AMD_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
2975	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
2976	ENTRY_BIN_INTEL_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
2977	ENTRY_BIN_AMD_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
2978	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
2979	ENTRY_BIN_INTEL_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
2980	ENTRY_BIN_AMD_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2981	ENTRY_BIN_INTEL_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2982	ENTRY_BIN_FIX_AMD_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2983	ENTRY_BIN_FIX_INTEL_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2984	};
2985
2986	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2987	DUMP_ALL_FN(MulDivU8, g_aMulDivU8)
2988	static RTEXITCODE MulDivU8Generate(uint32_t cTests, const char * const * papszNameFmts)
2989	{
2990	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2991	{
2992	if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2993	&& g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2994	continue;
2995	IEMBINARYOUTPUT BinOut; \
2996	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aMulDivU8[iFn]), RTEXITCODE_FAILURE); \
2997	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2998	{
2999	MULDIVU8_TEST_T Test;
3000	Test.fEflIn = RandEFlags();
3001	Test.fEflOut = Test.fEflIn;
3002	Test.uDstIn = RandU16Dst(iTest);
3003	Test.uDstOut = Test.uDstIn;
3004	Test.uSrcIn = RandU8Src(iTest);
3005	Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
3006	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3007	}
3008	for (uint32_t iTest = 0; iTest < g_aMulDivU8[iFn].cFixedTests; iTest++)
3009	{
3010	MULDIVU8_TEST_T Test;
3011	Test.fEflIn = g_aMulDivU8[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags()
3012	: g_aMulDivU8[iFn].paFixedTests[iTest].fEflIn;
3013	Test.fEflOut = Test.fEflIn;
3014	Test.uDstIn = g_aMulDivU8[iFn].paFixedTests[iTest].uDstIn;
3015	Test.uDstOut = Test.uDstIn;
3016	Test.uSrcIn = g_aMulDivU8[iFn].paFixedTests[iTest].uSrcIn;
3017	Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
3018	if (g_aMulDivU8[iFn].paFixedTests[iTest].rc == 0 \|\| g_aMulDivU8[iFn].paFixedTests[iTest].rc == -1)
3019	Test.rc = g_aMulDivU8[iFn].paFixedTests[iTest].rc;
3020	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3021	}
3022	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3023	}
3024	return RTEXITCODE_SUCCESS;
3025	}
3026	#endif
3027
3028	static uint64_t MulDivU8Bench(uint32_t cIterations, PFNIEMAIMPLMULDIVU8 pfn, MULDIVU8_TEST_T const *pEntry)
3029	{
3030	uint32_t const fEflIn = pEntry->fEflIn;
3031	uint16_t const uDstIn = pEntry->uDstIn;
3032	uint8_t const uSrcIn = pEntry->uSrcIn;
3033	cIterations /= 4;
3034	RTThreadYield();
3035	uint64_t const nsStart = RTTimeNanoTS();
3036	for (uint32_t i = 0; i < cIterations; i++)
3037	{
3038	uint32_t fBenchEfl = fEflIn;
3039	uint16_t uBenchDst = uDstIn;
3040	pfn(&uBenchDst, uSrcIn, &fBenchEfl);
3041
3042	fBenchEfl = fEflIn;
3043	uBenchDst = uDstIn;
3044	pfn(&uBenchDst, uSrcIn, &fBenchEfl);
3045
3046	fBenchEfl = fEflIn;
3047	uBenchDst = uDstIn;
3048	pfn(&uBenchDst, uSrcIn, &fBenchEfl);
3049
3050	fBenchEfl = fEflIn;
3051	uBenchDst = uDstIn;
3052	pfn(&uBenchDst, uSrcIn, &fBenchEfl);
3053	}
3054	return RTTimeNanoTS() - nsStart;
3055	}
3056
3057	static void MulDivU8Test(void)
3058	{
3059	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
3060	{
3061	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aMulDivU8[iFn]))
3062	continue;
3063	MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
3064	uint32_t const cTests = g_aMulDivU8[iFn].cTests;
3065	uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
3066	PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
3067	uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]);
3068	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3069	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3070	{
3071	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
3072	{
3073	uint32_t fEfl = paTests[iTest].fEflIn;
3074	uint16_t uDst = paTests[iTest].uDstIn;
3075	int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
3076	if ( uDst != paTests[iTest].uDstOut
3077	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)
3078	\|\| rc != paTests[iTest].rc)
3079	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
3080	" %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
3081	"%sexpected %#08x %#06RX16 %d%s\n",
3082	iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
3083	iVar ? " " : "", fEfl, uDst, rc,
3084	iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
3085	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn));
3086	else
3087	{
3088	*g_pu16 = paTests[iTest].uDstIn;
3089	*g_pfEfl = paTests[iTest].fEflIn;
3090	rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
3091	RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
3092	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn));
3093	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
3094	}
3095	}
3096
3097	/* Benchmark if all succeeded. */
3098	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0)
3099	{
3100	uint32_t const iTest = cTests / 2;
3101	uint32_t const cIterations = EstimateIterations(_64K, MulDivU8Bench(_64K, pfn, &paTests[iTest]));
3102	uint64_t const cNsRealRun = MulDivU8Bench(cIterations, pfn, &paTests[iTest]);
3103	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL,
3104	"%s%s", g_aMulDivU8[iFn].pszName, iVar ? "-native" : "");
3105	}
3106
3107	/* Next variation is native. */
3108	pfn = g_aMulDivU8[iFn].pfnNative;
3109	}
3110	FREE_DECOMPRESSED_TESTS(g_aMulDivU8[iFn]);
3111	}
3112	}
3113
3114	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3115	static const MULDIVU16_TEST_T g_aFixedTests_idiv_u16[] =
3116	{
3117	/* low high */
3118	/* --- eflags ---, -- uDst1 --, -- uDst2 --, */
3119	/* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
3120	{ UINT32_MAX, 0, 0x0000, 0, 0x8000, 0, 0xc004, -1 }, /* -2147483648 /-16380 = #DE (131104.00781...) */
3121	{ UINT32_MAX, 0, 0xffff, 0, 0x7fff, 0, 0x7fff, -1 }, /* 2147483647 / 32767 = #DE (65538.000030...) */
3122	{ UINT32_MAX, 0, 0x8000, 0, 0x3fff, 0, 0x7fff, -1 }, /* 0x3fff8000 / 0x7fff = #DE (0x8000) */
3123	{ UINT32_MAX, 0, 0x7fff, 0, 0x3fff, 0, 0x7fff, 0 }, /* 0x3fff7fff / 0x7fff = 32767.99996948... */
3124	{ UINT32_MAX, 0, 0x0000, 0, 0xc000, 0, 0x8000, -1 }, /* -1073741824 / -32768 = #DE (0x8000) */
3125	{ UINT32_MAX, 0, 0x0001, 0, 0xc000, 0, 0x8000, 0 }, /* -1073741823 / -32768 = 32767.999969482421875 */
3126	};
3127
3128	static const MULDIVU32_TEST_T g_aFixedTests_idiv_u32[] =
3129	{
3130	/* low high */
3131	/* --- eflags ---, ---- uDst1 ----, ---- uDst2 ----, */
3132	/* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
3133	{ UINT32_MAX, 0, 0x00000000, 0, 0x80000000, 0, 0xc0000004, -1 },
3134	{ UINT32_MAX, 0, 0xffffffff, 0, 0x7fffffff, 0, 0x7fffffff, -1 },
3135	{ UINT32_MAX, 0, 0x80000000, 0, 0x3fffffff, 0, 0x7fffffff, -1 },
3136	{ UINT32_MAX, 0, 0x7fffffff, 0, 0x3fffffff, 0, 0x7fffffff, 0 },
3137	{ UINT32_MAX, 0, 0x00000000, 0, 0xc0000000, 0, 0x80000000, -1 },
3138	{ UINT32_MAX, 0, 0x00000001, 0, 0xc0000000, 0, 0x80000000, 0 },
3139	};
3140
3141	static const MULDIVU64_TEST_T g_aFixedTests_idiv_u64[] =
3142	{
3143	/* low high */
3144	/* --- eflags ---, -------- uDst1 --------, -------- uDst2 --------, */
3145	/* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
3146	{ UINT32_MAX, 0, 0x0000000000000000, 0, 0x8000000000000000, 0, 0xc000000000000004, -1 },
3147	{ UINT32_MAX, 0, 0xffffffffffffffff, 0, 0x7fffffffffffffff, 0, 0x7fffffffffffffff, -1 },
3148	{ UINT32_MAX, 0, 0x8000000000000000, 0, 0x3fffffffffffffff, 0, 0x7fffffffffffffff, -1 },
3149	{ UINT32_MAX, 0, 0x7fffffffffffffff, 0, 0x3fffffffffffffff, 0, 0x7fffffffffffffff, 0 },
3150	{ UINT32_MAX, 0, 0x0000000000000000, 0, 0xc000000000000000, 0, 0x8000000000000000, -1 },
3151	{ UINT32_MAX, 0, 0x0000000000000001, 0, 0xc000000000000000, 0, 0x8000000000000000, 0 },
3152	};
3153
3154	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
3155	DUMP_ALL_FN(MulDivU ## a_cBits, a_aSubTests) \
3156	static RTEXITCODE MulDivU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
3157	{ \
3158	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3159	{ \
3160	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
3161	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3162	continue; \
3163	IEMBINARYOUTPUT BinOut; \
3164	a_TestType Test; \
3165	RT_ZERO(Test); /* 64-bit variant contains alignment padding */ \
3166	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3167	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
3168	{ \
3169	Test.fEflIn = RandEFlags(); \
3170	Test.fEflOut = Test.fEflIn; \
3171	Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
3172	Test.uDst1Out = Test.uDst1In; \
3173	Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
3174	Test.uDst2Out = Test.uDst2In; \
3175	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
3176	Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
3177	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3178	} \
3179	for (uint32_t iTest = 0; iTest < a_aSubTests[iFn].cFixedTests; iTest++ ) \
3180	{ \
3181	Test.fEflIn = a_aSubTests[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
3182	: a_aSubTests[iFn].paFixedTests[iTest].fEflIn; \
3183	Test.fEflOut = Test.fEflIn; \
3184	Test.uDst1In = a_aSubTests[iFn].paFixedTests[iTest].uDst1In; \
3185	Test.uDst1Out = Test.uDst1In; \
3186	Test.uDst2In = a_aSubTests[iFn].paFixedTests[iTest].uDst2In; \
3187	Test.uDst2Out = Test.uDst2In; \
3188	Test.uSrcIn = a_aSubTests[iFn].paFixedTests[iTest].uSrcIn; \
3189	Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
3190	if (a_aSubTests[iFn].paFixedTests[iTest].rc == 0 \|\| a_aSubTests[iFn].paFixedTests[iTest].rc == -1) \
3191	Test.rc = a_aSubTests[iFn].paFixedTests[iTest].rc; \
3192	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3193	} \
3194	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3195	} \
3196	return RTEXITCODE_SUCCESS; \
3197	}
3198	#else
3199	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
3200	#endif
3201
3202	#define TEST_MULDIV(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
3203	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
3204	static a_SubTestType a_aSubTests [] = \
3205	{ \
3206	ENTRY_BIN_AMD_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
3207	ENTRY_BIN_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
3208	ENTRY_BIN_AMD_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
3209	ENTRY_BIN_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
3210	ENTRY_BIN_AMD_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
3211	ENTRY_BIN_INTEL_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
3212	ENTRY_BIN_FIX_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
3213	ENTRY_BIN_FIX_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
3214	}; \
3215	\
3216	GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
3217	\
3218	static uint64_t MulDivU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLMULDIVU ## a_cBits pfn, a_TestType const *pEntry) \
3219	{ \
3220	uint32_t const fEflIn = pEntry->fEflIn; \
3221	a_uType const uDst1In = pEntry->uDst1In; \
3222	a_uType const uDst2In = pEntry->uDst2In; \
3223	a_uType const uSrcIn = pEntry->uSrcIn; \
3224	cIterations /= 4; \
3225	RTThreadYield(); \
3226	uint64_t const nsStart = RTTimeNanoTS(); \
3227	for (uint32_t i = 0; i < cIterations; i++) \
3228	{ \
3229	uint32_t fBenchEfl = fEflIn; \
3230	a_uType uBenchDst1 = uDst1In; \
3231	a_uType uBenchDst2 = uDst2In; \
3232	pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \
3233	\
3234	fBenchEfl = fEflIn; \
3235	uBenchDst1 = uDst1In; \
3236	uBenchDst2 = uDst2In; \
3237	pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \
3238	\
3239	fBenchEfl = fEflIn; \
3240	uBenchDst1 = uDst1In; \
3241	uBenchDst2 = uDst2In; \
3242	pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \
3243	\
3244	fBenchEfl = fEflIn; \
3245	uBenchDst1 = uDst1In; \
3246	uBenchDst2 = uDst2In; \
3247	pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \
3248	} \
3249	return RTTimeNanoTS() - nsStart; \
3250	} \
3251	\
3252	static void MulDivU ## a_cBits ## Test(void) \
3253	{ \
3254	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3255	{ \
3256	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3257	continue; \
3258	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3259	uint32_t const cTests = a_aSubTests[iFn].cTests; \
3260	uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
3261	PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3262	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3263	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3264	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3265	{ \
3266	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
3267	{ \
3268	uint32_t fEfl = paTests[iTest].fEflIn; \
3269	a_uType uDst1 = paTests[iTest].uDst1In; \
3270	a_uType uDst2 = paTests[iTest].uDst2In; \
3271	int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
3272	if ( uDst1 != paTests[iTest].uDst1Out \
3273	\|\| uDst2 != paTests[iTest].uDst2Out \
3274	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)\
3275	\|\| rc != paTests[iTest].rc) \
3276	RTTestFailed(g_hTest, "#%04u%s: efl=%#010x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
3277	" -> efl=%#010x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
3278	" expected %#010x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
3279	iTest, iVar == 0 ? " " : "/n", \
3280	paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
3281	fEfl, uDst1, uDst2, rc, \
3282	paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
3283	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn), \
3284	uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
3285	(fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn) ? " eflags" : ""); \
3286	else \
3287	{ \
3288	*g_pu ## a_cBits = paTests[iTest].uDst1In; \
3289	*g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
3290	*g_pfEfl = paTests[iTest].fEflIn; \
3291	rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
3292	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
3293	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
3294	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn)); \
3295	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
3296	} \
3297	} \
3298	\
3299	/* Benchmark if all succeeded. */ \
3300	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
3301	{ \
3302	uint32_t const iTest = cTests / 2; \
3303	uint32_t const cIterations = EstimateIterations(_64K, MulDivU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
3304	uint64_t const cNsRealRun = MulDivU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
3305	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
3306	"%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
3307	} \
3308	\
3309	/* Next variation is native. */ \
3310	pfn = a_aSubTests[iFn].pfnNative; \
3311	} \
3312	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3313	} \
3314	} //1068553096 = 0x3FB0D388 (1068553096)
3315	TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
3316	TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
3317	TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
3318
3319	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3320	static RTEXITCODE MulDivGenerate(uint32_t cTests, const char * const * papszNameFmts)
3321	{
3322	RTEXITCODE rcExit = MulDivU8Generate(cTests, papszNameFmts);
3323	if (rcExit == RTEXITCODE_SUCCESS)
3324	rcExit = MulDivU16Generate(cTests, papszNameFmts);
3325	if (rcExit == RTEXITCODE_SUCCESS)
3326	rcExit = MulDivU32Generate(cTests, papszNameFmts);
3327	if (rcExit == RTEXITCODE_SUCCESS)
3328	rcExit = MulDivU64Generate(cTests, papszNameFmts);
3329	return rcExit;
3330	}
3331
3332	static RTEXITCODE MulDivDumpAll(const char * const * papszNameFmts)
3333	{
3334	RTEXITCODE rcExit = MulDivU8DumpAll(papszNameFmts);
3335	if (rcExit == RTEXITCODE_SUCCESS)
3336	rcExit = MulDivU16DumpAll(papszNameFmts);
3337	if (rcExit == RTEXITCODE_SUCCESS)
3338	rcExit = MulDivU32DumpAll(papszNameFmts);
3339	if (rcExit == RTEXITCODE_SUCCESS)
3340	rcExit = MulDivU64DumpAll(papszNameFmts);
3341	return rcExit;
3342	}
3343	#endif
3344
3345	static void MulDivTest(void)
3346	{
3347	MulDivU8Test();
3348	MulDivU16Test();
3349	MulDivU32Test();
3350	MulDivU64Test();
3351	}
3352
3353
3354	/*
3355	* BSWAP
3356	*/
3357	static void BswapTest(void)
3358	{
3359	if (SubTestAndCheckIfEnabled("bswap_u16"))
3360	{
3361	*g_pu32 = UINT32_C(0x12345678);
3362	iemAImpl_bswap_u16(g_pu32);
3363	#if 0
3364	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12347856), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
3365	#else
3366	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12340000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
3367	#endif
3368	*g_pu32 = UINT32_C(0xffff1122);
3369	iemAImpl_bswap_u16(g_pu32);
3370	#if 0
3371	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff2211), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
3372	#else
3373	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff0000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
3374	#endif
3375	}
3376
3377	if (SubTestAndCheckIfEnabled("bswap_u32"))
3378	{
3379	*g_pu32 = UINT32_C(0x12345678);
3380	iemAImpl_bswap_u32(g_pu32);
3381	RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
3382	}
3383
3384	if (SubTestAndCheckIfEnabled("bswap_u64"))
3385	{
3386	*g_pu64 = UINT64_C(0x0123456789abcdef);
3387	iemAImpl_bswap_u64(g_pu64);
3388	RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
3389	}
3390	}
3391
3392
3393
3394	/*********************************************************************************************************************************
3395	* Floating point (x87 style) *
3396	*********************************************************************************************************************************/
3397
3398	/*
3399	* FPU constant loading.
3400	*/
3401	TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
3402
3403	static FPU_LD_CONST_T g_aFpuLdConst[] =
3404	{
3405	ENTRY_BIN(fld1),
3406	ENTRY_BIN(fldl2t),
3407	ENTRY_BIN(fldl2e),
3408	ENTRY_BIN(fldpi),
3409	ENTRY_BIN(fldlg2),
3410	ENTRY_BIN(fldln2),
3411	ENTRY_BIN(fldz),
3412	};
3413
3414	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3415	static RTEXITCODE FpuLdConstGenerate(uint32_t cTests, const char * const *papszNameFmts)
3416	{
3417	X86FXSTATE State;
3418	RT_ZERO(State);
3419	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
3420	{
3421	IEMBINARYOUTPUT BinOut;
3422	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuLdConst[iFn]), RTEXITCODE_FAILURE);
3423	for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
3424	{
3425	State.FCW = RandFcw();
3426	State.FSW = RandFsw();
3427
3428	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3429	{
3430	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3431	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
3432	g_aFpuLdConst[iFn].pfn(&State, &Res);
3433	FPU_LD_CONST_TEST_T const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result };
3434	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3435	}
3436	}
3437	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3438	}
3439	return RTEXITCODE_SUCCESS;
3440	}
3441	DUMP_ALL_FN(FpuLdConst, g_aFpuLdConst)
3442	#endif
3443
3444	static void FpuLdConstTest(void)
3445	{
3446	/*
3447	* Inputs:
3448	* - FSW: C0, C1, C2, C3
3449	* - FCW: Exception masks, Precision control, Rounding control.
3450	*
3451	* C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
3452	*/
3453	X86FXSTATE State;
3454	RT_ZERO(State);
3455	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
3456	{
3457	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdConst[iFn]))
3458	continue;
3459
3460	FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
3461	uint32_t const cTests = g_aFpuLdConst[iFn].cTests;
3462	PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
3463	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
3464	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3465	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3466	{
3467	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3468	{
3469	State.FCW = paTests[iTest].fFcw;
3470	State.FSW = paTests[iTest].fFswIn;
3471	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3472	pfn(&State, &Res);
3473	if ( Res.FSW != paTests[iTest].fFswOut
3474	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
3475	RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
3476	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3477	Res.FSW, FormatR80(&Res.r80Result),
3478	paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
3479	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3480	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
3481	FormatFcw(paTests[iTest].fFcw) );
3482	}
3483	pfn = g_aFpuLdConst[iFn].pfnNative;
3484	}
3485
3486	FREE_DECOMPRESSED_TESTS(g_aFpuLdConst[iFn]);
3487	}
3488	}
3489
3490
3491	/*
3492	* Load floating point values from memory.
3493	*/
3494	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3495	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
3496	static RTEXITCODE FpuLdR ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3497	{ \
3498	X86FXSTATE State; \
3499	RT_ZERO(State); \
3500	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3501	{ \
3502	IEMBINARYOUTPUT BinOut; \
3503	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3504	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3505	{ \
3506	State.FCW = RandFcw(); \
3507	State.FSW = RandFsw(); \
3508	a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
3509	\
3510	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3511	{ \
3512	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3513	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
3514	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3515	a_TestType const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result, InVal }; \
3516	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3517	} \
3518	} \
3519	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3520	} \
3521	return RTEXITCODE_SUCCESS; \
3522	} \
3523	DUMP_ALL_FN(FpuLdR ## a_cBits, a_aSubTests)
3524	#else
3525	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
3526	#endif
3527
3528	#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
3529	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
3530	typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
3531	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
3532	\
3533	static a_SubTestType a_aSubTests[] = \
3534	{ \
3535	ENTRY_BIN(RT_CONCAT(fld_r80_from_r,a_cBits)) \
3536	}; \
3537	GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
3538	\
3539	static void FpuLdR ## a_cBits ## Test(void) \
3540	{ \
3541	X86FXSTATE State; \
3542	RT_ZERO(State); \
3543	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3544	{ \
3545	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3546	continue; \
3547	\
3548	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3549	uint32_t const cTests = a_aSubTests[iFn].cTests; \
3550	PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3551	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3552	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3553	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3554	{ \
3555	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3556	{ \
3557	a_rdTypeIn const InVal = paTests[iTest].InVal; \
3558	State.FCW = paTests[iTest].fFcw; \
3559	State.FSW = paTests[iTest].fFswIn; \
3560	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3561	pfn(&State, &Res, &InVal); \
3562	if ( Res.FSW != paTests[iTest].fFswOut \
3563	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3564	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3565	"%s -> fsw=%#06x %s\n" \
3566	"%s expected %#06x %s%s%s (%s)\n", \
3567	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3568	FormatR ## a_cBits(&paTests[iTest].InVal), \
3569	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3570	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3571	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3572	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3573	FormatFcw(paTests[iTest].fFcw) ); \
3574	} \
3575	pfn = a_aSubTests[iFn].pfnNative; \
3576	} \
3577	\
3578	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3579	} \
3580	}
3581
3582	TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
3583	TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
3584	TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
3585
3586	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3587	static RTEXITCODE FpuLdMemGenerate(uint32_t cTests, const char * const *papszNameFmts)
3588	{
3589	RTEXITCODE rcExit = FpuLdR80Generate(cTests, papszNameFmts);
3590	if (rcExit == RTEXITCODE_SUCCESS)
3591	rcExit = FpuLdR64Generate(cTests, papszNameFmts);
3592	if (rcExit == RTEXITCODE_SUCCESS)
3593	rcExit = FpuLdR32Generate(cTests, papszNameFmts);
3594	return rcExit;
3595	}
3596
3597	static RTEXITCODE FpuLdMemDumpAll(const char * const *papszNameFmts)
3598	{
3599	RTEXITCODE rcExit = FpuLdR80DumpAll(papszNameFmts);
3600	if (rcExit == RTEXITCODE_SUCCESS)
3601	rcExit = FpuLdR64DumpAll(papszNameFmts);
3602	if (rcExit == RTEXITCODE_SUCCESS)
3603	rcExit = FpuLdR32DumpAll(papszNameFmts);
3604	return rcExit;
3605	}
3606	#endif
3607
3608	static void FpuLdMemTest(void)
3609	{
3610	FpuLdR80Test();
3611	FpuLdR64Test();
3612	FpuLdR32Test();
3613	}
3614
3615
3616	/*
3617	* Load integer values from memory.
3618	*/
3619	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3620	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3621	static RTEXITCODE FpuLdI ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3622	{ \
3623	X86FXSTATE State; \
3624	RT_ZERO(State); \
3625	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3626	{ \
3627	IEMBINARYOUTPUT BinOut; \
3628	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3629	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3630	{ \
3631	State.FCW = RandFcw(); \
3632	State.FSW = RandFsw(); \
3633	a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
3634	\
3635	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3636	{ \
3637	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3638	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
3639	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3640	a_TestType const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result }; \
3641	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3642	} \
3643	} \
3644	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3645	} \
3646	return RTEXITCODE_SUCCESS; \
3647	} \
3648	DUMP_ALL_FN(FpuLdI ## a_cBits, a_aSubTests)
3649	#else
3650	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
3651	#endif
3652
3653	#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
3654	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
3655	typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
3656	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
3657	\
3658	static a_SubTestType a_aSubTests[] = \
3659	{ \
3660	ENTRY_BIN(RT_CONCAT(fild_r80_from_i,a_cBits)) \
3661	}; \
3662	GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3663	\
3664	static void FpuLdI ## a_cBits ## Test(void) \
3665	{ \
3666	X86FXSTATE State; \
3667	RT_ZERO(State); \
3668	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3669	{ \
3670	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3671	continue; \
3672	\
3673	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3674	uint32_t const cTests = a_aSubTests[iFn].cTests; \
3675	PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3676	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3677	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3678	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3679	{ \
3680	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3681	{ \
3682	a_iTypeIn const iInVal = paTests[iTest].iInVal; \
3683	State.FCW = paTests[iTest].fFcw; \
3684	State.FSW = paTests[iTest].fFswIn; \
3685	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3686	pfn(&State, &Res, &iInVal); \
3687	if ( Res.FSW != paTests[iTest].fFswOut \
3688	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3689	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
3690	"%s -> fsw=%#06x %s\n" \
3691	"%s expected %#06x %s%s%s (%s)\n", \
3692	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
3693	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3694	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3695	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3696	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3697	FormatFcw(paTests[iTest].fFcw) ); \
3698	} \
3699	pfn = a_aSubTests[iFn].pfnNative; \
3700	} \
3701	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3702	} \
3703	}
3704
3705	TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
3706	TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
3707	TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
3708
3709	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3710	static RTEXITCODE FpuLdIntGenerate(uint32_t cTests, const char * const *papszNameFmts)
3711	{
3712	RTEXITCODE rcExit = FpuLdI64Generate(cTests, papszNameFmts);
3713	if (rcExit == RTEXITCODE_SUCCESS)
3714	rcExit = FpuLdI32Generate(cTests, papszNameFmts);
3715	if (rcExit == RTEXITCODE_SUCCESS)
3716	rcExit = FpuLdI16Generate(cTests, papszNameFmts);
3717	return rcExit;
3718	}
3719
3720	static RTEXITCODE FpuLdIntDumpAll(const char * const *papszNameFmts)
3721	{
3722	RTEXITCODE rcExit = FpuLdI64DumpAll(papszNameFmts);
3723	if (rcExit == RTEXITCODE_SUCCESS)
3724	rcExit = FpuLdI32DumpAll(papszNameFmts);
3725	if (rcExit == RTEXITCODE_SUCCESS)
3726	rcExit = FpuLdI16DumpAll(papszNameFmts);
3727	return rcExit;
3728	}
3729	#endif
3730
3731	static void FpuLdIntTest(void)
3732	{
3733	FpuLdI64Test();
3734	FpuLdI32Test();
3735	FpuLdI16Test();
3736	}
3737
3738
3739	/*
3740	* Load binary coded decimal values from memory.
3741	*/
3742	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
3743	typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
3744	TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
3745
3746	static FPU_LD_D80_T g_aFpuLdD80[] =
3747	{
3748	ENTRY_BIN(fld_r80_from_d80)
3749	};
3750
3751	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3752	static RTEXITCODE FpuLdD80Generate(uint32_t cTests, const char * const *papszNameFmts)
3753	{
3754	X86FXSTATE State;
3755	RT_ZERO(State);
3756	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3757	{
3758	IEMBINARYOUTPUT BinOut;
3759	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuLdD80[iFn]), RTEXITCODE_FAILURE);
3760	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3761	{
3762	State.FCW = RandFcw();
3763	State.FSW = RandFsw();
3764	RTPBCD80U InVal = RandD80Src(iTest);
3765
3766	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3767	{
3768	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3769	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
3770	g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
3771	FPU_D80_IN_TEST_T const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result, InVal };
3772	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3773	}
3774	}
3775	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3776	}
3777	return RTEXITCODE_SUCCESS;
3778	}
3779	DUMP_ALL_FN(FpuLdD80, g_aFpuLdD80)
3780	#endif
3781
3782	static void FpuLdD80Test(void)
3783	{
3784	X86FXSTATE State;
3785	RT_ZERO(State);
3786	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3787	{
3788	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdD80[iFn]))
3789	continue;
3790
3791	FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
3792	uint32_t const cTests = g_aFpuLdD80[iFn].cTests;
3793	PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
3794	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
3795	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3796	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3797	{
3798	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3799	{
3800	RTPBCD80U const InVal = paTests[iTest].InVal;
3801	State.FCW = paTests[iTest].fFcw;
3802	State.FSW = paTests[iTest].fFswIn;
3803	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3804	pfn(&State, &Res, &InVal);
3805	if ( Res.FSW != paTests[iTest].fFswOut
3806	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
3807	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
3808	"%s -> fsw=%#06x %s\n"
3809	"%s expected %#06x %s%s%s (%s)\n",
3810	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3811	FormatD80(&paTests[iTest].InVal),
3812	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3813	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
3814	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3815	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
3816	FormatFcw(paTests[iTest].fFcw) );
3817	}
3818	pfn = g_aFpuLdD80[iFn].pfnNative;
3819	}
3820
3821	FREE_DECOMPRESSED_TESTS(g_aFpuLdD80[iFn]);
3822	}
3823	}
3824
3825
3826	/*
3827	* Store values floating point values to memory.
3828	*/
3829	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3830	static const RTFLOAT80U g_aFpuStR32Specials[] =
3831	{
3832	RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3833	RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3834	RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3835	RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3836	};
3837	static const RTFLOAT80U g_aFpuStR64Specials[] =
3838	{
3839	RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3840	RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3841	RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3842	RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3843	RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
3844	};
3845	static const RTFLOAT80U g_aFpuStR80Specials[] =
3846	{
3847	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
3848	};
3849	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3850	static RTEXITCODE FpuStR ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3851	{ \
3852	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
3853	X86FXSTATE State; \
3854	RT_ZERO(State); \
3855	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3856	{ \
3857	IEMBINARYOUTPUT BinOut; \
3858	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3859	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3860	{ \
3861	uint16_t const fFcw = RandFcw(); \
3862	State.FSW = RandFsw(); \
3863	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
3864	: g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
3865	\
3866	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3867	{ \
3868	/* PC doesn't influence these, so leave as is. */ \
3869	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3870	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
3871	{ \
3872	uint16_t uFswOut = 0; \
3873	a_rdType OutVal; \
3874	RT_ZERO(OutVal); \
3875	memset(&OutVal, 0xfe, sizeof(OutVal)); \
3876	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
3877	\| (iRounding << X86_FCW_RC_SHIFT); \
3878	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
3879	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
3880	a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
3881	a_TestType const Test = { State.FCW, State.FSW, uFswOut, InVal, OutVal }; \
3882	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3883	} \
3884	} \
3885	} \
3886	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3887	} \
3888	return RTEXITCODE_SUCCESS; \
3889	} \
3890	DUMP_ALL_FN(FpuStR ## a_cBits, a_aSubTests)
3891	#else
3892	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
3893	#endif
3894
3895	#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
3896	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
3897	PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
3898	typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
3899	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
3900	\
3901	static a_SubTestType a_aSubTests[] = \
3902	{ \
3903	ENTRY_BIN(RT_CONCAT(fst_r80_to_r,a_cBits)) \
3904	}; \
3905	GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3906	\
3907	static void FpuStR ## a_cBits ## Test(void) \
3908	{ \
3909	X86FXSTATE State; \
3910	RT_ZERO(State); \
3911	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3912	{ \
3913	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3914	continue; \
3915	\
3916	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3917	uint32_t const cTests = a_aSubTests[iFn].cTests; \
3918	PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3919	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3920	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3921	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3922	{ \
3923	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3924	{ \
3925	RTFLOAT80U const InVal = paTests[iTest].InVal; \
3926	uint16_t uFswOut = 0; \
3927	a_rdType OutVal; \
3928	RT_ZERO(OutVal); \
3929	memset(&OutVal, 0xfe, sizeof(OutVal)); \
3930	State.FCW = paTests[iTest].fFcw; \
3931	State.FSW = paTests[iTest].fFswIn; \
3932	pfn(&State, &uFswOut, &OutVal, &InVal); \
3933	if ( uFswOut != paTests[iTest].fFswOut \
3934	\|\| !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
3935	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3936	"%s -> fsw=%#06x %s\n" \
3937	"%s expected %#06x %s%s%s (%s)\n", \
3938	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3939	FormatR80(&paTests[iTest].InVal), \
3940	iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
3941	iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
3942	FswDiff(uFswOut, paTests[iTest].fFswOut), \
3943	!RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
3944	FormatFcw(paTests[iTest].fFcw) ); \
3945	} \
3946	pfn = a_aSubTests[iFn].pfnNative; \
3947	} \
3948	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3949	} \
3950	}
3951
3952	TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
3953	TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
3954	TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
3955
3956	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3957	static RTEXITCODE FpuStMemGenerate(uint32_t cTests, const char * const *papszNameFmts)
3958	{
3959	RTEXITCODE rcExit = FpuStR80Generate(cTests, papszNameFmts);
3960	if (rcExit == RTEXITCODE_SUCCESS)
3961	rcExit = FpuStR64Generate(cTests, papszNameFmts);
3962	if (rcExit == RTEXITCODE_SUCCESS)
3963	rcExit = FpuStR32Generate(cTests, papszNameFmts);
3964	return rcExit;
3965	}
3966
3967	static RTEXITCODE FpuStMemDumpAll(const char * const *papszNameFmts)
3968	{
3969	RTEXITCODE rcExit = FpuStR80DumpAll(papszNameFmts);
3970	if (rcExit == RTEXITCODE_SUCCESS)
3971	rcExit = FpuStR64DumpAll(papszNameFmts);
3972	if (rcExit == RTEXITCODE_SUCCESS)
3973	rcExit = FpuStR32DumpAll(papszNameFmts);
3974	return rcExit;
3975	}
3976	#endif
3977
3978	static void FpuStMemTest(void)
3979	{
3980	FpuStR80Test();
3981	FpuStR64Test();
3982	FpuStR32Test();
3983	}
3984
3985
3986	/*
3987	* Store integer values to memory or register.
3988	*/
3989	TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
3990	TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
3991	TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
3992
3993	static FPU_ST_I16_T g_aFpuStI16[] =
3994	{
3995	ENTRY_BIN(fist_r80_to_i16),
3996	ENTRY_BIN_AMD( fistt_r80_to_i16, 0),
3997	ENTRY_BIN_INTEL(fistt_r80_to_i16, 0),
3998	};
3999	static FPU_ST_I32_T g_aFpuStI32[] =
4000	{
4001	ENTRY_BIN(fist_r80_to_i32),
4002	ENTRY_BIN(fistt_r80_to_i32),
4003	};
4004	static FPU_ST_I64_T g_aFpuStI64[] =
4005	{
4006	ENTRY_BIN(fist_r80_to_i64),
4007	ENTRY_BIN(fistt_r80_to_i64),
4008	};
4009
4010	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4011	static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
4012	{
4013	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
4014	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
4015	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4016	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4017	RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
4018	RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
4019	RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
4020	RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
4021	RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
4022	RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
4023	RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
4024	RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
4025	RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4026	RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4027	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
4028	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4029	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4030	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
4031	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
4032	RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4033	RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4034	RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4035	RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4036	RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
4037	RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4038	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
4039	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
4040	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
4041	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
4042	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
4043	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
4044	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
4045	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4046	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4047	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
4048	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
4049	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4050	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4051	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4052	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4053	RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
4054	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
4055	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4056	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4057	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4058	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4059	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
4060	};
4061	static const RTFLOAT80U g_aFpuStI32Specials[] =
4062	{
4063	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4064	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4065	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4066	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
4067	RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4068	RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
4069	RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4070	RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
4071	RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
4072	RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
4073	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4074	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4075	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4076	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4077	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4078	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4079	};
4080	static const RTFLOAT80U g_aFpuStI64Specials[] =
4081	{
4082	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
4083	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
4084	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
4085	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
4086	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
4087	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
4088	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4089	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
4090	RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
4091	RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
4092	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
4093	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
4094	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
4095	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
4096	RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
4097	RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
4098	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
4099	};
4100
4101	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
4102	static RTEXITCODE FpuStI ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4103	{ \
4104	X86FXSTATE State; \
4105	RT_ZERO(State); \
4106	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4107	{ \
4108	PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
4109	? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
4110	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
4111	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
4112	continue; \
4113	\
4114	IEMBINARYOUTPUT BinOut; \
4115	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4116	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
4117	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
4118	{ \
4119	uint16_t const fFcw = RandFcw(); \
4120	State.FSW = RandFsw(); \
4121	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
4122	: g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
4123	\
4124	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
4125	{ \
4126	/* PC doesn't influence these, so leave as is. */ \
4127	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
4128	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
4129	{ \
4130	uint16_t uFswOut = 0; \
4131	a_iType iOutVal = ~(a_iType)2; \
4132	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
4133	\| (iRounding << X86_FCW_RC_SHIFT); \
4134	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
4135	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
4136	pfn(&State, &uFswOut, &iOutVal, &InVal); \
4137	a_TestType const Test = { State.FCW, State.FSW, uFswOut, InVal, iOutVal }; \
4138	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4139	} \
4140	} \
4141	} \
4142	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4143	} \
4144	return RTEXITCODE_SUCCESS; \
4145	} \
4146	DUMP_ALL_FN(FpuStI ## a_cBits, a_aSubTests)
4147	#else
4148	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
4149	#endif
4150
4151	#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
4152	GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
4153	\
4154	static void FpuStI ## a_cBits ## Test(void) \
4155	{ \
4156	X86FXSTATE State; \
4157	RT_ZERO(State); \
4158	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4159	{ \
4160	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4161	continue; \
4162	\
4163	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4164	uint32_t const cTests = a_aSubTests[iFn].cTests; \
4165	PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
4166	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4167	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4168	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4169	{ \
4170	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4171	{ \
4172	RTFLOAT80U const InVal = paTests[iTest].InVal; \
4173	uint16_t uFswOut = 0; \
4174	a_iType iOutVal = ~(a_iType)2; \
4175	State.FCW = paTests[iTest].fFcw; \
4176	State.FSW = paTests[iTest].fFswIn; \
4177	pfn(&State, &uFswOut, &iOutVal, &InVal); \
4178	if ( uFswOut != paTests[iTest].fFswOut \
4179	\|\| iOutVal != paTests[iTest].iOutVal) \
4180	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
4181	"%s -> fsw=%#06x " a_szFmt "\n" \
4182	"%s expected %#06x " a_szFmt "%s%s (%s)\n", \
4183	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4184	FormatR80(&paTests[iTest].InVal), \
4185	iVar ? " " : "", uFswOut, iOutVal, \
4186	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
4187	FswDiff(uFswOut, paTests[iTest].fFswOut), \
4188	iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
4189	} \
4190	pfn = a_aSubTests[iFn].pfnNative; \
4191	} \
4192	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
4193	} \
4194	}
4195
4196	//fistt_r80_to_i16 diffs for AMD, of course :-)
4197
4198	TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
4199	TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
4200	TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
4201
4202	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4203	static RTEXITCODE FpuStIntGenerate(uint32_t cTests, const char * const *papszNameFmts)
4204	{
4205	RTEXITCODE rcExit = FpuStI64Generate(cTests, papszNameFmts);
4206	if (rcExit == RTEXITCODE_SUCCESS)
4207	rcExit = FpuStI32Generate(cTests, papszNameFmts);
4208	if (rcExit == RTEXITCODE_SUCCESS)
4209	rcExit = FpuStI16Generate(cTests, papszNameFmts);
4210	return rcExit;
4211	}
4212	static RTEXITCODE FpuStIntDumpAll(const char * const *papszNameFmts)
4213	{
4214	RTEXITCODE rcExit = FpuStI64DumpAll(papszNameFmts);
4215	if (rcExit == RTEXITCODE_SUCCESS)
4216	rcExit = FpuStI32DumpAll(papszNameFmts);
4217	if (rcExit == RTEXITCODE_SUCCESS)
4218	rcExit = FpuStI16DumpAll(papszNameFmts);
4219	return rcExit;
4220	}
4221	#endif
4222
4223	static void FpuStIntTest(void)
4224	{
4225	FpuStI64Test();
4226	FpuStI32Test();
4227	FpuStI16Test();
4228	}
4229
4230
4231	/*
4232	* Store as packed BCD value (memory).
4233	*/
4234	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
4235	typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
4236	TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
4237
4238	static FPU_ST_D80_T g_aFpuStD80[] =
4239	{
4240	ENTRY_BIN(fst_r80_to_d80),
4241	};
4242
4243	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4244	static RTEXITCODE FpuStD80Generate(uint32_t cTests, const char * const *papszNameFmts)
4245	{
4246	static RTFLOAT80U const s_aSpecials[] =
4247	{
4248	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
4249	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
4250	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
4251	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
4252	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
4253	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
4254	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
4255	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
4256	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
4257	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
4258	RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
4259	RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
4260	};
4261
4262	X86FXSTATE State;
4263	RT_ZERO(State);
4264	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
4265	{
4266	IEMBINARYOUTPUT BinOut;
4267	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuStD80[iFn]), RTEXITCODE_FAILURE);
4268	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4269	{
4270	uint16_t const fFcw = RandFcw();
4271	State.FSW = RandFsw();
4272	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
4273
4274	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4275	{
4276	/* PC doesn't influence these, so leave as is. */
4277	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
4278	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/)
4279	{
4280	uint16_t uFswOut = 0;
4281	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
4282	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM))
4283	\| (iRounding << X86_FCW_RC_SHIFT);
4284	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/
4285	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT;
4286	g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
4287	FPU_ST_D80_TEST_T const Test = { State.FCW, State.FSW, uFswOut, InVal, OutVal };
4288	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
4289	}
4290	}
4291	}
4292	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4293	}
4294	return RTEXITCODE_SUCCESS;
4295	}
4296	DUMP_ALL_FN(FpuStD80, g_aFpuStD80)
4297	#endif
4298
4299
4300	static void FpuStD80Test(void)
4301	{
4302	X86FXSTATE State;
4303	RT_ZERO(State);
4304	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
4305	{
4306	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuStD80[iFn]))
4307	continue;
4308
4309	FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
4310	uint32_t const cTests = g_aFpuStD80[iFn].cTests;
4311	PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
4312	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
4313	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4314	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4315	{
4316	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4317	{
4318	RTFLOAT80U const InVal = paTests[iTest].InVal;
4319	uint16_t uFswOut = 0;
4320	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
4321	State.FCW = paTests[iTest].fFcw;
4322	State.FSW = paTests[iTest].fFswIn;
4323	pfn(&State, &uFswOut, &OutVal, &InVal);
4324	if ( uFswOut != paTests[iTest].fFswOut
4325	\|\| !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
4326	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4327	"%s -> fsw=%#06x %s\n"
4328	"%s expected %#06x %s%s%s (%s)\n",
4329	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4330	FormatR80(&paTests[iTest].InVal),
4331	iVar ? " " : "", uFswOut, FormatD80(&OutVal),
4332	iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
4333	FswDiff(uFswOut, paTests[iTest].fFswOut),
4334	RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
4335	FormatFcw(paTests[iTest].fFcw) );
4336	}
4337	pfn = g_aFpuStD80[iFn].pfnNative;
4338	}
4339
4340	FREE_DECOMPRESSED_TESTS(g_aFpuStD80[iFn]);
4341	}
4342	}
4343
4344
4345
4346	/*********************************************************************************************************************************
4347	* x87 FPU Binary Operations *
4348	*********************************************************************************************************************************/
4349
4350	/*
4351	* Binary FPU operations on two 80-bit floating point values.
4352	*/
4353	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
4354	enum { kFpuBinaryHint_fprem = 1, };
4355
4356	static FPU_BINARY_R80_T g_aFpuBinaryR80[] =
4357	{
4358	ENTRY_BIN(fadd_r80_by_r80),
4359	ENTRY_BIN(fsub_r80_by_r80),
4360	ENTRY_BIN(fsubr_r80_by_r80),
4361	ENTRY_BIN(fmul_r80_by_r80),
4362	ENTRY_BIN(fdiv_r80_by_r80),
4363	ENTRY_BIN(fdivr_r80_by_r80),
4364	ENTRY_BIN_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
4365	ENTRY_BIN_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
4366	ENTRY_BIN(fscale_r80_by_r80),
4367	ENTRY_BIN_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
4368	ENTRY_BIN_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
4369	ENTRY_BIN_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
4370	ENTRY_BIN_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
4371	ENTRY_BIN_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
4372	ENTRY_BIN_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
4373	};
4374
4375	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4376	static RTEXITCODE FpuBinaryR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4377	{
4378	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4379
4380	static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
4381	{
4382	{ RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
4383	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4384	{ RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
4385	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4386	{ RTFLOAT80U_INIT_ZERO(0), /* minus variant */
4387	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4388	{ RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
4389	RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
4390	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
4391	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
4392	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
4393	RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4394	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
4395	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4396	/* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
4397	once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
4398	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
4399	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4400	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
4401	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
4402	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
4403	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
4404	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
4405	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
4406	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
4407	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
4408	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4409	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
4410	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
4411	RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
4412	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
4413	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
4414	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
4415	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
4416	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
4417	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
4418	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
4419	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
4420	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4421	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
4422	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4423	RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
4424	/* fscale: Negative variants for the essentials of the above. */
4425	{ RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4426	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
4427	{ RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
4428	RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
4429	{ RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4430	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
4431	{ RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4432	RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
4433	/* fscale: Some fun with denormals and pseudo-denormals. */
4434	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
4435	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4436	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
4437	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4438	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
4439	{ RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
4440	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4441	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
4442	{ RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
4443	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
4444	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4445	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
4446	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4447	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
4448	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
4449	};
4450
4451	X86FXSTATE State;
4452	RT_ZERO(State);
4453	uint32_t cMinNormalPairs = (cTests - 144) / 4;
4454	uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
4455	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
4456	{
4457	PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
4458	if ( g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
4459	&& g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4460	continue;
4461
4462	IEMBINARYOUTPUT BinOut;
4463	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuBinaryR80[iFn]), RTEXITCODE_FAILURE);
4464	uint32_t cNormalInputPairs = 0;
4465	uint32_t cTargetRangeInputs = 0;
4466	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4467	{
4468	RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
4469	RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
4470	bool fTargetRange = false;
4471	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
4472	{
4473	cNormalInputPairs++;
4474	if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
4475	&& (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
4476	cTargetRangeInputs += fTargetRange = true;
4477	else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
4478	if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
4479	{ /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
4480	InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
4481	InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
4482	cTargetRangeInputs += fTargetRange = true;
4483	}
4484	}
4485	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4486	{
4487	iTest -= 1;
4488	continue;
4489	}
4490
4491	uint16_t const fFcwExtra = 0;
4492	uint16_t const fFcw = RandFcw();
4493	State.FSW = RandFsw();
4494
4495	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4496	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4497	{
4498	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
4499	\| (iRounding << X86_FCW_RC_SHIFT)
4500	\| (iPrecision << X86_FCW_PC_SHIFT)
4501	\| X86_FCW_MASK_ALL;
4502	IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4503	pfn(&State, &ResM, &InVal1, &InVal2);
4504	FPU_BINARY_R80_TEST_T const TestM
4505	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResM.FSW, InVal1, InVal2, ResM.r80Result };
4506	GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
4507
4508	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4509	IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4510	pfn(&State, &ResU, &InVal1, &InVal2);
4511	FPU_BINARY_R80_TEST_T const TestU
4512	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResU.FSW, InVal1, InVal2, ResU.r80Result };
4513	GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
4514
4515	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4516	if (fXcpt)
4517	{
4518	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4519	IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4520	pfn(&State, &Res1, &InVal1, &InVal2);
4521	FPU_BINARY_R80_TEST_T const Test1
4522	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res1.FSW, InVal1, InVal2, Res1.r80Result };
4523	GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
4524
4525	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4526	{
4527	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
4528	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4529	IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4530	pfn(&State, &Res2, &InVal1, &InVal2);
4531	FPU_BINARY_R80_TEST_T const Test2
4532	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res2.FSW, InVal1, InVal2, Res2.r80Result };
4533	GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
4534	}
4535	if (!RT_IS_POWER_OF_TWO(fXcpt))
4536	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4537	if (fUnmasked & fXcpt)
4538	{
4539	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
4540	IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4541	pfn(&State, &Res3, &InVal1, &InVal2);
4542	FPU_BINARY_R80_TEST_T const Test3
4543	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res3.FSW, InVal1, InVal2, Res3.r80Result };
4544	GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
4545	}
4546	}
4547
4548	/* If the values are in range and caused no exceptions, do the whole series of
4549	partial reminders till we get the non-partial one or run into an exception. */
4550	if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
4551	{
4552	IEMFPURESULT ResPrev = ResM;
4553	for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 \| X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
4554	{
4555	State.FCW = State.FCW \| X86_FCW_MASK_ALL;
4556	State.FSW = ResPrev.FSW;
4557	IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4558	pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
4559	FPU_BINARY_R80_TEST_T const TestSeq
4560	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResSeq.FSW, ResPrev.r80Result, InVal2, ResSeq.r80Result };
4561	GenerateBinaryWrite(&BinOut, &TestSeq, sizeof(TestSeq));
4562	ResPrev = ResSeq;
4563	}
4564	}
4565	}
4566	}
4567	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4568	}
4569	return RTEXITCODE_SUCCESS;
4570	}
4571	DUMP_ALL_FN(FpuBinaryR80, g_aFpuBinaryR80)
4572	#endif
4573
4574
4575	static void FpuBinaryR80Test(void)
4576	{
4577	X86FXSTATE State;
4578	RT_ZERO(State);
4579	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
4580	{
4581	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryR80[iFn]))
4582	continue;
4583
4584	FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
4585	uint32_t const cTests = g_aFpuBinaryR80[iFn].cTests;
4586	PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
4587	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
4588	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4589	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4590	{
4591	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4592	{
4593	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4594	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4595	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4596	State.FCW = paTests[iTest].fFcw;
4597	State.FSW = paTests[iTest].fFswIn;
4598	pfn(&State, &Res, &InVal1, &InVal2);
4599	if ( Res.FSW != paTests[iTest].fFswOut
4600	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
4601	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4602	"%s -> fsw=%#06x %s\n"
4603	"%s expected %#06x %s%s%s (%s)\n",
4604	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4605	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4606	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4607	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4608	FswDiff(Res.FSW, paTests[iTest].fFswOut),
4609	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4610	FormatFcw(paTests[iTest].fFcw) );
4611	}
4612	pfn = g_aFpuBinaryR80[iFn].pfnNative;
4613	}
4614
4615	FREE_DECOMPRESSED_TESTS(g_aFpuBinaryR80[iFn]);
4616	}
4617	}
4618
4619
4620	/*
4621	* Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
4622	*/
4623	#define int64_t_IS_NORMAL(a) 1
4624	#define int32_t_IS_NORMAL(a) 1
4625	#define int16_t_IS_NORMAL(a) 1
4626
4627	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4628	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
4629	{
4630	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4631	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4632	};
4633	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
4634	{
4635	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4636	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4637	};
4638	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
4639	{
4640	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4641	};
4642	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
4643	{
4644	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4645	};
4646
4647	# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4648	static RTEXITCODE FpuBinary ## a_UpBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4649	{ \
4650	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4651	\
4652	X86FXSTATE State; \
4653	RT_ZERO(State); \
4654	uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4655	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4656	{ \
4657	IEMBINARYOUTPUT BinOut; \
4658	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4659	uint32_t cNormalInputPairs = 0; \
4660	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
4661	{ \
4662	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4663	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
4664	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4665	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
4666	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4667	cNormalInputPairs++; \
4668	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4669	{ \
4670	iTest -= 1; \
4671	continue; \
4672	} \
4673	\
4674	uint16_t const fFcw = RandFcw(); \
4675	State.FSW = RandFsw(); \
4676	\
4677	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
4678	{ \
4679	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
4680	{ \
4681	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4682	{ \
4683	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL)) \
4684	\| (iRounding << X86_FCW_RC_SHIFT) \
4685	\| (iPrecision << X86_FCW_PC_SHIFT) \
4686	\| iMask; \
4687	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4688	a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
4689	a_TestType const Test = { State.FCW, State.FSW, Res.FSW, InVal1, InVal2, Res.r80Result }; \
4690	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4691	} \
4692	} \
4693	} \
4694	} \
4695	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4696	} \
4697	return RTEXITCODE_SUCCESS; \
4698	} \
4699	DUMP_ALL_FN(FpuBinary ## a_UpBits, a_aSubTests)
4700	#else
4701	# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4702	#endif
4703
4704	#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
4705	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
4706	\
4707	static a_SubTestType a_aSubTests[] = \
4708	{ \
4709	ENTRY_BIN(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
4710	ENTRY_BIN(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
4711	ENTRY_BIN(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
4712	ENTRY_BIN(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
4713	ENTRY_BIN(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
4714	ENTRY_BIN(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
4715	}; \
4716	\
4717	GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4718	\
4719	static void FpuBinary ## a_UpBits ## Test(void) \
4720	{ \
4721	X86FXSTATE State; \
4722	RT_ZERO(State); \
4723	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4724	{ \
4725	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4726	continue; \
4727	\
4728	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4729	uint32_t const cTests = a_aSubTests[iFn].cTests; \
4730	PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
4731	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4732	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4733	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4734	{ \
4735	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4736	{ \
4737	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4738	a_Type2 const InVal2 = paTests[iTest].InVal2; \
4739	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4740	State.FCW = paTests[iTest].fFcw; \
4741	State.FSW = paTests[iTest].fFswIn; \
4742	pfn(&State, &Res, &InVal1, &InVal2); \
4743	if ( Res.FSW != paTests[iTest].fFswOut \
4744	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
4745	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4746	"%s -> fsw=%#06x %s\n" \
4747	"%s expected %#06x %s%s%s (%s)\n", \
4748	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4749	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4750	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
4751	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
4752	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
4753	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
4754	FormatFcw(paTests[iTest].fFcw) ); \
4755	} \
4756	pfn = a_aSubTests[iFn].pfnNative; \
4757	} \
4758	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
4759	} \
4760	}
4761
4762	TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
4763	TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
4764	TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
4765	TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
4766
4767
4768	/*
4769	* Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
4770	*/
4771	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4772	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
4773	{
4774	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4775	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4776	};
4777	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
4778	{
4779	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4780	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4781	};
4782	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
4783	{
4784	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4785	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4786	};
4787	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
4788	{
4789	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4790	};
4791	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
4792	{
4793	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4794	};
4795
4796	# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4797	static RTEXITCODE FpuBinaryFsw ## a_UpBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4798	{ \
4799	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4800	\
4801	X86FXSTATE State; \
4802	RT_ZERO(State); \
4803	uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4804	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4805	{ \
4806	IEMBINARYOUTPUT BinOut; \
4807	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4808	uint32_t cNormalInputPairs = 0; \
4809	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
4810	{ \
4811	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4812	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
4813	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4814	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
4815	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4816	cNormalInputPairs++; \
4817	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4818	{ \
4819	iTest -= 1; \
4820	continue; \
4821	} \
4822	\
4823	uint16_t const fFcw = RandFcw(); \
4824	State.FSW = RandFsw(); \
4825	\
4826	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
4827	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4828	{ \
4829	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask; \
4830	uint16_t fFswOut = 0; \
4831	a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
4832	a_TestType const Test = { State.FCW, State.FSW, fFswOut, InVal1, InVal2 }; \
4833	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4834	} \
4835	} \
4836	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4837	} \
4838	return RTEXITCODE_SUCCESS; \
4839	} \
4840	DUMP_ALL_FN(FpuBinaryFsw ## a_UpBits, a_aSubTests)
4841	#else
4842	# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4843	#endif
4844
4845	#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
4846	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
4847	\
4848	static a_SubTestType a_aSubTests[] = \
4849	{ \
4850	__VA_ARGS__ \
4851	}; \
4852	\
4853	GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4854	\
4855	static void FpuBinaryFsw ## a_UpBits ## Test(void) \
4856	{ \
4857	X86FXSTATE State; \
4858	RT_ZERO(State); \
4859	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4860	{ \
4861	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4862	continue; \
4863	\
4864	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4865	uint32_t const cTests = a_aSubTests[iFn].cTests; \
4866	PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
4867	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4868	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4869	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4870	{ \
4871	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4872	{ \
4873	uint16_t fFswOut = 0; \
4874	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4875	a_Type2 const InVal2 = paTests[iTest].InVal2; \
4876	State.FCW = paTests[iTest].fFcw; \
4877	State.FSW = paTests[iTest].fFswIn; \
4878	pfn(&State, &fFswOut, &InVal1, &InVal2); \
4879	if (fFswOut != paTests[iTest].fFswOut) \
4880	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4881	"%s -> fsw=%#06x\n" \
4882	"%s expected %#06x %s (%s)\n", \
4883	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4884	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4885	iVar ? " " : "", fFswOut, \
4886	iVar ? " " : "", paTests[iTest].fFswOut, \
4887	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
4888	} \
4889	pfn = a_aSubTests[iFn].pfnNative; \
4890	} \
4891	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
4892	} \
4893	}
4894
4895	TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY_BIN(fcom_r80_by_r80), ENTRY_BIN(fucom_r80_by_r80))
4896	TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY_BIN(fcom_r80_by_r64))
4897	TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY_BIN(fcom_r80_by_r32))
4898	TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY_BIN(ficom_r80_by_i32))
4899	TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY_BIN(ficom_r80_by_i16))
4900
4901
4902	/*
4903	* Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
4904	*/
4905	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
4906
4907	static FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
4908	{
4909	ENTRY_BIN(fcomi_r80_by_r80),
4910	ENTRY_BIN(fucomi_r80_by_r80),
4911	};
4912
4913	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4914	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
4915	{
4916	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4917	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4918	};
4919
4920	static RTEXITCODE FpuBinaryEflR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4921	{
4922	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
4923
4924	X86FXSTATE State;
4925	RT_ZERO(State);
4926	uint32_t cMinNormalPairs = (cTests - 144) / 4;
4927	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4928	{
4929	IEMBINARYOUTPUT BinOut;
4930	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuBinaryEflR80[iFn]), RTEXITCODE_FAILURE);
4931	uint32_t cNormalInputPairs = 0;
4932	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
4933	{
4934	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
4935	RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
4936	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
4937	cNormalInputPairs++;
4938	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4939	{
4940	iTest -= 1;
4941	continue;
4942	}
4943
4944	uint16_t const fFcw = RandFcw();
4945	State.FSW = RandFsw();
4946
4947	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
4948	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4949	{
4950	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask;
4951	uint16_t uFswOut = 0;
4952	uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
4953	FPU_BINARY_EFL_R80_TEST_T const Test = { State.FCW, State.FSW, uFswOut, InVal1, InVal2, fEflOut, };
4954	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
4955	}
4956	}
4957	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4958	}
4959	return RTEXITCODE_SUCCESS;
4960	}
4961	DUMP_ALL_FN(FpuBinaryEflR80, g_aFpuBinaryEflR80)
4962	#endif /TSTIEMAIMPL_WITH_GENERATOR/
4963
4964	static void FpuBinaryEflR80Test(void)
4965	{
4966	X86FXSTATE State;
4967	RT_ZERO(State);
4968	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4969	{
4970	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryEflR80[iFn]))
4971	continue;
4972
4973	FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
4974	uint32_t const cTests = g_aFpuBinaryEflR80[iFn].cTests;
4975	PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
4976	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
4977	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4978	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4979	{
4980	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4981	{
4982	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4983	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4984	State.FCW = paTests[iTest].fFcw;
4985	State.FSW = paTests[iTest].fFswIn;
4986	uint16_t uFswOut = 0;
4987	uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
4988	if ( uFswOut != paTests[iTest].fFswOut
4989	\|\| fEflOut != paTests[iTest].fEflOut)
4990	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4991	"%s -> fsw=%#06x efl=%#08x\n"
4992	"%s expected %#06x %#08x %s%s (%s)\n",
4993	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4994	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4995	iVar ? " " : "", uFswOut, fEflOut,
4996	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
4997	FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
4998	FormatFcw(paTests[iTest].fFcw));
4999	}
5000	pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
5001	}
5002
5003	FREE_DECOMPRESSED_TESTS(g_aFpuBinaryEflR80[iFn]);
5004	}
5005	}
5006
5007
5008	/*********************************************************************************************************************************
5009	* x87 FPU Unary Operations *
5010	*********************************************************************************************************************************/
5011
5012	/*
5013	* Unary FPU operations on one 80-bit floating point value.
5014	*
5015	* Note! The FCW reserved bit 7 is used to indicate whether a test may produce
5016	* a rounding error or not.
5017	*/
5018	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
5019
5020	enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /probably not accurate, but need impl to know/, kUnary_Rounding_F2xm1 };
5021	static FPU_UNARY_R80_T g_aFpuUnaryR80[] =
5022	{
5023	ENTRY_BIN_EX( fabs_r80, kUnary_Accurate),
5024	ENTRY_BIN_EX( fchs_r80, kUnary_Accurate),
5025	ENTRY_BIN_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
5026	ENTRY_BIN_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
5027	ENTRY_BIN_EX( fsqrt_r80, kUnary_Accurate),
5028	ENTRY_BIN_EX( frndint_r80, kUnary_Accurate),
5029	ENTRY_BIN_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
5030	ENTRY_BIN_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
5031	ENTRY_BIN_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
5032	ENTRY_BIN_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
5033	};
5034
5035	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5036
5037	static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
5038	{
5039	if ( enmKind == kUnary_Rounding_F2xm1
5040	&& RTFLOAT80U_IS_NORMAL(pr80Val)
5041	&& pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
5042	&& pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
5043	return true;
5044	return false;
5045	}
5046
5047	DUMP_ALL_FN(FpuUnaryR80, g_aFpuUnaryR80)
5048	static RTEXITCODE FpuUnaryR80Generate(uint32_t cTests, const char * const *papszNameFmts)
5049	{
5050	static RTFLOAT80U const s_aSpecials[] =
5051	{
5052	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
5053	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
5054	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
5055	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
5056	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
5057	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
5058	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
5059	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
5060	RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
5061	RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
5062	};
5063	X86FXSTATE State;
5064	RT_ZERO(State);
5065	uint32_t cMinNormals = cTests / 4;
5066	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
5067	{
5068	PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
5069	if ( g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
5070	&& g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
5071	continue;
5072
5073	IEMBINARYOUTPUT BinOut;
5074	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryR80[iFn]), RTEXITCODE_FAILURE);
5075	uint32_t cNormalInputs = 0;
5076	uint32_t cTargetRangeInputs = 0;
5077	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5078	{
5079	RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
5080	if (RTFLOAT80U_IS_NORMAL(&InVal))
5081	{
5082	if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
5083	{
5084	unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
5085	? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 / : RTFLOAT80U_EXP_BIAS + 63 + 1 / 2^64..2^-64 */;
5086	unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
5087	if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
5088	cTargetRangeInputs++;
5089	else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
5090	{
5091	InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
5092	cTargetRangeInputs++;
5093	}
5094	}
5095	cNormalInputs++;
5096	}
5097	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
5098	{
5099	iTest -= 1;
5100	continue;
5101	}
5102
5103	uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
5104	uint16_t const fFcw = RandFcw();
5105	State.FSW = RandFsw();
5106
5107	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5108	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5109	{
5110	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
5111	\| (iRounding << X86_FCW_RC_SHIFT)
5112	\| (iPrecision << X86_FCW_PC_SHIFT)
5113	\| X86_FCW_MASK_ALL;
5114	IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5115	pfn(&State, &ResM, &InVal);
5116	FPU_UNARY_R80_TEST_T const TestM
5117	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResM.FSW, InVal, ResM.r80Result };
5118	GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
5119
5120	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
5121	IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5122	pfn(&State, &ResU, &InVal);
5123	FPU_UNARY_R80_TEST_T const TestU
5124	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResU.FSW, InVal, ResU.r80Result };
5125	GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
5126
5127	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
5128	if (fXcpt)
5129	{
5130	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
5131	IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5132	pfn(&State, &Res1, &InVal);
5133	FPU_UNARY_R80_TEST_T const Test1
5134	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res1.FSW, InVal, Res1.r80Result };
5135	GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
5136	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
5137	{
5138	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
5139	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
5140	IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5141	pfn(&State, &Res2, &InVal);
5142	FPU_UNARY_R80_TEST_T const Test2
5143	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res2.FSW, InVal, Res2.r80Result };
5144	GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
5145	}
5146	if (!RT_IS_POWER_OF_TWO(fXcpt))
5147	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
5148	if (fUnmasked & fXcpt)
5149	{
5150	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
5151	IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5152	pfn(&State, &Res3, &InVal);
5153	FPU_UNARY_R80_TEST_T const Test3
5154	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res3.FSW, InVal, Res3.r80Result };
5155	GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
5156	}
5157	}
5158	}
5159	}
5160	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5161	}
5162	return RTEXITCODE_SUCCESS;
5163	}
5164	#endif
5165
5166	static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
5167	{
5168	if (fFcw1 == fFcw2)
5169	return true;
5170	if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
5171	{
5172	*pfRndErr = true;
5173	return true;
5174	}
5175	return false;
5176	}
5177
5178	static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
5179	{
5180	if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
5181	return true;
5182	if ( fRndErrOk
5183	&& pr80Val1->s.fSign == pr80Val2->s.fSign)
5184	{
5185	if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
5186	&& ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
5187	? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
5188	: pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
5189	\|\|
5190	( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
5191	&& pr80Val1->s.uMantissa == UINT64_MAX
5192	&& pr80Val2->s.uMantissa == RT_BIT_64(63))
5193	\|\|
5194	( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
5195	&& pr80Val2->s.uMantissa == UINT64_MAX
5196	&& pr80Val1->s.uMantissa == RT_BIT_64(63)) )
5197	{
5198	*pfRndErr = true;
5199	return true;
5200	}
5201	}
5202	return false;
5203	}
5204
5205
5206	static void FpuUnaryR80Test(void)
5207	{
5208	X86FXSTATE State;
5209	RT_ZERO(State);
5210	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
5211	{
5212	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryR80[iFn]))
5213	continue;
5214
5215	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
5216	uint32_t const cTests = g_aFpuUnaryR80[iFn].cTests;
5217	PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
5218	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
5219	uint32_t cRndErrs = 0;
5220	uint32_t cPossibleRndErrs = 0;
5221	if (!cTests) RTTestSkipped(g_hTest, "no tests");
5222	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5223	{
5224	for (uint32_t iTest = 0; iTest < cTests; iTest++)
5225	{
5226	RTFLOAT80U const InVal = paTests[iTest].InVal;
5227	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5228	bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
5229	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
5230	State.FSW = paTests[iTest].fFswIn;
5231	pfn(&State, &Res, &InVal);
5232	bool fRndErr = false;
5233	if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
5234	\|\| !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
5235	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5236	"%s -> fsw=%#06x %s\n"
5237	"%s expected %#06x %s%s%s%s (%s)\n",
5238	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5239	FormatR80(&paTests[iTest].InVal),
5240	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
5241	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
5242	FswDiff(Res.FSW, paTests[iTest].fFswOut),
5243	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
5244	fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
5245	cRndErrs += fRndErr;
5246	cPossibleRndErrs += fRndErrOk;
5247	}
5248	pfn = g_aFpuUnaryR80[iFn].pfnNative;
5249	}
5250	if (cPossibleRndErrs > 0)
5251	RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
5252	FREE_DECOMPRESSED_TESTS(g_aFpuUnaryR80[iFn]);
5253	}
5254	}
5255
5256
5257	/*
5258	* Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
5259	*/
5260	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
5261
5262	static FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
5263	{
5264	ENTRY_BIN(ftst_r80),
5265	ENTRY_BIN_EX(fxam_r80, 1),
5266	};
5267
5268	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5269	static RTEXITCODE FpuUnaryFswR80Generate(uint32_t cTests, const char * const *papszNameFmts)
5270	{
5271	static RTFLOAT80U const s_aSpecials[] =
5272	{
5273	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
5274	};
5275
5276	X86FXSTATE State;
5277	RT_ZERO(State);
5278	uint32_t cMinNormals = cTests / 4;
5279	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
5280	{
5281	bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
5282	PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
5283	if ( g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
5284	&& g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
5285	continue;
5286	State.FTW = 0;
5287
5288	IEMBINARYOUTPUT BinOut;
5289	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryFswR80[iFn]), RTEXITCODE_FAILURE);
5290	uint32_t cNormalInputs = 0;
5291	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5292	{
5293	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
5294	if (RTFLOAT80U_IS_NORMAL(&InVal))
5295	cNormalInputs++;
5296	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
5297	{
5298	iTest -= 1;
5299	continue;
5300	}
5301
5302	uint16_t const fFcw = RandFcw();
5303	State.FSW = RandFsw();
5304	if (!fIsFxam)
5305	{
5306	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5307	{
5308	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5309	{
5310	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
5311	{
5312	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
5313	\| (iRounding << X86_FCW_RC_SHIFT)
5314	\| (iPrecision << X86_FCW_PC_SHIFT)
5315	\| iMask;
5316	uint16_t fFswOut = 0;
5317	pfn(&State, &fFswOut, &InVal);
5318	FPU_UNARY_R80_TEST_T const Test = { State.FCW, State.FSW, fFswOut, InVal };
5319	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
5320	}
5321	}
5322	}
5323	}
5324	else
5325	{
5326	uint16_t fFswOut = 0;
5327	uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
5328	State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
5329	State.FCW = fFcw;
5330	pfn(&State, &fFswOut, &InVal);
5331	FPU_UNARY_R80_TEST_T const Test = { (uint16_t)(fFcw \| fEmpty), State.FSW, fFswOut, InVal };
5332	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
5333	}
5334	}
5335	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5336	}
5337	return RTEXITCODE_SUCCESS;
5338	}
5339	DUMP_ALL_FN(FpuUnaryFswR80, g_aFpuUnaryFswR80)
5340	#endif
5341
5342
5343	static void FpuUnaryFswR80Test(void)
5344	{
5345	X86FXSTATE State;
5346	RT_ZERO(State);
5347	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
5348	{
5349	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryFswR80[iFn]))
5350	continue;
5351
5352	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
5353	uint32_t const cTests = g_aFpuUnaryFswR80[iFn].cTests;
5354	PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
5355	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
5356	if (!cTests) RTTestSkipped(g_hTest, "no tests");
5357	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5358	{
5359	for (uint32_t iTest = 0; iTest < cTests; iTest++)
5360	{
5361	RTFLOAT80U const InVal = paTests[iTest].InVal;
5362	uint16_t fFswOut = 0;
5363	State.FSW = paTests[iTest].fFswIn;
5364	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
5365	State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
5366	pfn(&State, &fFswOut, &InVal);
5367	if (fFswOut != paTests[iTest].fFswOut)
5368	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5369	"%s -> fsw=%#06x\n"
5370	"%s expected %#06x %s (%s%s)\n",
5371	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5372	FormatR80(&paTests[iTest].InVal),
5373	iVar ? " " : "", fFswOut,
5374	iVar ? " " : "", paTests[iTest].fFswOut,
5375	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
5376	paTests[iTest].fFcw & 0x80 ? " empty" : "");
5377	}
5378	pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
5379	}
5380
5381	FREE_DECOMPRESSED_TESTS(g_aFpuUnaryFswR80[iFn]);
5382	}
5383	}
5384
5385	/*
5386	* Unary FPU operations on one 80-bit floating point value, but with two outputs.
5387	*/
5388	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
5389
5390	static FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
5391	{
5392	ENTRY_BIN(fxtract_r80_r80),
5393	ENTRY_BIN_AMD( fptan_r80_r80, 0), // rounding differences
5394	ENTRY_BIN_INTEL(fptan_r80_r80, 0),
5395	ENTRY_BIN_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
5396	ENTRY_BIN_INTEL(fsincos_r80_r80, 0),
5397	};
5398
5399	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5400	static RTEXITCODE FpuUnaryTwoR80Generate(uint32_t cTests, const char * const *papszNameFmts)
5401	{
5402	static RTFLOAT80U const s_aSpecials[] =
5403	{
5404	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
5405	};
5406
5407	X86FXSTATE State;
5408	RT_ZERO(State);
5409	uint32_t cMinNormals = cTests / 4;
5410	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
5411	{
5412	PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
5413	if ( g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
5414	&& g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
5415	continue;
5416
5417	IEMBINARYOUTPUT BinOut;
5418	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryTwoR80[iFn]), RTEXITCODE_FAILURE);
5419	uint32_t cNormalInputs = 0;
5420	uint32_t cTargetRangeInputs = 0;
5421	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5422	{
5423	RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
5424	if (RTFLOAT80U_IS_NORMAL(&InVal))
5425	{
5426	if (iFn != 0)
5427	{
5428	unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
5429	unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
5430	if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
5431	cTargetRangeInputs++;
5432	else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
5433	{
5434	InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
5435	cTargetRangeInputs++;
5436	}
5437	}
5438	cNormalInputs++;
5439	}
5440	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
5441	{
5442	iTest -= 1;
5443	continue;
5444	}
5445
5446	uint16_t const fFcwExtra = 0; /* for rounding error indication */
5447	uint16_t const fFcw = RandFcw();
5448	State.FSW = RandFsw();
5449
5450	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5451	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5452	{
5453	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
5454	\| (iRounding << X86_FCW_RC_SHIFT)
5455	\| (iPrecision << X86_FCW_PC_SHIFT)
5456	\| X86_FCW_MASK_ALL;
5457	IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5458	pfn(&State, &ResM, &InVal);
5459	FPU_UNARY_TWO_R80_TEST_T const TestM
5460	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResM.FSW, InVal, ResM.r80Result1, ResM.r80Result2 };
5461	GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
5462
5463	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
5464	IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5465	pfn(&State, &ResU, &InVal);
5466	FPU_UNARY_TWO_R80_TEST_T const TestU
5467	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResU.FSW, InVal, ResU.r80Result1, ResU.r80Result2 };
5468	GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
5469
5470	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
5471	if (fXcpt)
5472	{
5473	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
5474	IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5475	pfn(&State, &Res1, &InVal);
5476	FPU_UNARY_TWO_R80_TEST_T const Test1
5477	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res1.FSW, InVal, Res1.r80Result1, Res1.r80Result2 };
5478	GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
5479
5480	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
5481	{
5482	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
5483	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
5484	IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5485	pfn(&State, &Res2, &InVal);
5486	FPU_UNARY_TWO_R80_TEST_T const Test2
5487	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res2.FSW, InVal, Res2.r80Result1, Res2.r80Result2 };
5488	GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
5489	}
5490	if (!RT_IS_POWER_OF_TWO(fXcpt))
5491	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
5492	if (fUnmasked & fXcpt)
5493	{
5494	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
5495	IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5496	pfn(&State, &Res3, &InVal);
5497	FPU_UNARY_TWO_R80_TEST_T const Test3
5498	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res3.FSW, InVal, Res3.r80Result1, Res3.r80Result2 };
5499	GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
5500	}
5501	}
5502	}
5503	}
5504	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5505	}
5506	return RTEXITCODE_SUCCESS;
5507	}
5508	DUMP_ALL_FN(FpuUnaryTwoR80, g_aFpuUnaryTwoR80)
5509	#endif
5510
5511
5512	static void FpuUnaryTwoR80Test(void)
5513	{
5514	X86FXSTATE State;
5515	RT_ZERO(State);
5516	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
5517	{
5518	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryTwoR80[iFn]))
5519	continue;
5520
5521	FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
5522	uint32_t const cTests = g_aFpuUnaryTwoR80[iFn].cTests;
5523	PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
5524	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
5525	if (!cTests) RTTestSkipped(g_hTest, "no tests");
5526	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5527	{
5528	for (uint32_t iTest = 0; iTest < cTests; iTest++)
5529	{
5530	IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5531	RTFLOAT80U const InVal = paTests[iTest].InVal;
5532	State.FCW = paTests[iTest].fFcw;
5533	State.FSW = paTests[iTest].fFswIn;
5534	pfn(&State, &Res, &InVal);
5535	if ( Res.FSW != paTests[iTest].fFswOut
5536	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
5537	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
5538	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5539	"%s -> fsw=%#06x %s %s\n"
5540	"%s expected %#06x %s %s %s%s%s (%s)\n",
5541	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5542	FormatR80(&paTests[iTest].InVal),
5543	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
5544	iVar ? " " : "", paTests[iTest].fFswOut,
5545	FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
5546	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
5547	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
5548	FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
5549	}
5550	pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
5551	}
5552
5553	FREE_DECOMPRESSED_TESTS(g_aFpuUnaryTwoR80[iFn]);
5554	}
5555	}
5556
5557
5558	/*********************************************************************************************************************************
5559	* SSE floating point Binary Operations *
5560	*********************************************************************************************************************************/
5561
5562	/*
5563	* Binary SSE operations on packed single precision floating point values.
5564	*/
5565	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5566
5567	static SSE_BINARY_R32_T g_aSseBinaryR32[] =
5568	{
5569	ENTRY_BIN(addps_u128),
5570	ENTRY_BIN(mulps_u128),
5571	ENTRY_BIN(subps_u128),
5572	ENTRY_BIN(minps_u128),
5573	ENTRY_BIN(divps_u128),
5574	ENTRY_BIN(maxps_u128),
5575	ENTRY_BIN(haddps_u128),
5576	ENTRY_BIN(hsubps_u128),
5577	ENTRY_BIN(sqrtps_u128),
5578	ENTRY_BIN(addsubps_u128),
5579	ENTRY_BIN(cvtps2pd_u128),
5580	};
5581
5582	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5583	DUMP_ALL_FN(SseBinaryR32, g_aSseBinaryR32)
5584	static RTEXITCODE SseBinaryR32Generate(uint32_t cTests, const char * const *papszNameFmts)
5585	{
5586	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5587
5588	static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
5589	{
5590	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
5591	{ RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
5592	/** @todo More specials. */
5593	};
5594
5595	uint32_t cMinNormalPairs = (cTests - 144) / 4;
5596	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5597	{
5598	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
5599
5600	IEMBINARYOUTPUT BinOut;
5601	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32[iFn]), RTEXITCODE_FAILURE);
5602
5603	uint32_t cNormalInputPairs = 0;
5604	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5605	{
5606	SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5607
5608	TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5609	TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5610	TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5611	TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5612
5613	TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5614	TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
5615	TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
5616	TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
5617
5618	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
5619	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
5620	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
5621	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
5622	cNormalInputPairs++;
5623	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5624	{
5625	iTest -= 1;
5626	continue;
5627	}
5628
5629	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5630	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5631	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5632	for (uint8_t iFz = 0; iFz < 2; iFz++)
5633	{
5634	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
5635	\| (iRounding << X86_MXCSR_RC_SHIFT)
5636	\| (iDaz ? X86_MXCSR_DAZ : 0)
5637	\| (iFz ? X86_MXCSR_FZ : 0)
5638	\| X86_MXCSR_XCPT_MASK;
5639	X86XMMREG ResM; RT_ZERO(ResM);
5640	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &ResM, &TestData.InVal1, &TestData.InVal2);
5641	TestData.fMxcsrIn = uMxCsrIn;
5642	TestData.fMxcsrOut = uMxCsrOutM;
5643	TestData.OutVal = ResM;
5644	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5645
5646	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
5647	X86XMMREG ResU; RT_ZERO(ResU);
5648	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &ResU, &TestData.InVal1, &TestData.InVal2);
5649	TestData.fMxcsrIn = uMxCsrIn;
5650	TestData.fMxcsrOut = uMxCsrOutU;
5651	TestData.OutVal = ResU;
5652	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5653
5654	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
5655	if (fXcpt)
5656	{
5657	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
5658	X86XMMREG Res1; RT_ZERO(Res1);
5659	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &Res1, &TestData.InVal1, &TestData.InVal2);
5660	TestData.fMxcsrIn = uMxCsrIn;
5661	TestData.fMxcsrOut = uMxCsrOut1;
5662	TestData.OutVal = Res1;
5663	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5664
5665	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
5666	{
5667	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
5668	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5669	X86XMMREG Res2; RT_ZERO(Res2);
5670	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &Res2, &TestData.InVal1, &TestData.InVal2);
5671	TestData.fMxcsrIn = uMxCsrIn;
5672	TestData.fMxcsrOut = uMxCsrOut2;
5673	TestData.OutVal = Res2;
5674	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5675	}
5676	if (!RT_IS_POWER_OF_TWO(fXcpt))
5677	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5678	if (fUnmasked & fXcpt)
5679	{
5680	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5681	X86XMMREG Res3; RT_ZERO(Res3);
5682	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &Res3, &TestData.InVal1, &TestData.InVal2);
5683	TestData.fMxcsrIn = uMxCsrIn;
5684	TestData.fMxcsrOut = uMxCsrOut3;
5685	TestData.OutVal = Res3;
5686	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5687	}
5688	}
5689	}
5690	}
5691	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5692	}
5693
5694	return RTEXITCODE_SUCCESS;
5695	}
5696	#endif
5697
5698	static void SseBinaryR32Test(void)
5699	{
5700	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5701	{
5702	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32[iFn]))
5703	continue;
5704
5705	SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
5706	uint32_t const cbTests = g_aSseBinaryR32[iFn].cTests;
5707	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
5708	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
5709	if (!cbTests) RTTestSkipped(g_hTest, "no tests");
5710	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5711	{
5712	for (uint32_t iTest = 0; iTest < cbTests / sizeof(paTests[0]); iTest++)
5713	{
5714	X86XMMREG Res; RT_ZERO(Res);
5715
5716	uint32_t uMxCsrOut = pfn(paTests[iTest].fMxcsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5717	bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
5718	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
5719	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
5720	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5721	if ( uMxCsrOut != paTests[iTest].fMxcsrOut
5722	\|\| !fValsIdentical)
5723	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
5724	"%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5725	"%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5726	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5727	FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5728	FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5729	FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
5730	FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
5731	iVar ? " " : "", uMxCsrOut,
5732	FormatR32(&Res.ar32[0]), FormatR32(&Res.ar32[1]),
5733	FormatR32(&Res.ar32[2]), FormatR32(&Res.ar32[3]),
5734	iVar ? " " : "", paTests[iTest].fMxcsrOut,
5735	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5736	FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5737	MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
5738	!fValsIdentical ? " - val" : "",
5739	FormatMxcsr(paTests[iTest].fMxcsrIn) );
5740	}
5741	pfn = g_aSseBinaryR32[iFn].pfnNative;
5742	}
5743
5744	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32[iFn]);
5745	}
5746	}
5747
5748
5749	/*
5750	* Binary SSE operations on packed single precision floating point values.
5751	*/
5752	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5753
5754	static SSE_BINARY_R64_T g_aSseBinaryR64[] =
5755	{
5756	ENTRY_BIN(addpd_u128),
5757	ENTRY_BIN(mulpd_u128),
5758	ENTRY_BIN(subpd_u128),
5759	ENTRY_BIN(minpd_u128),
5760	ENTRY_BIN(divpd_u128),
5761	ENTRY_BIN(maxpd_u128),
5762	ENTRY_BIN(haddpd_u128),
5763	ENTRY_BIN(hsubpd_u128),
5764	ENTRY_BIN(sqrtpd_u128),
5765	ENTRY_BIN(addsubpd_u128),
5766	ENTRY_BIN(cvtpd2ps_u128),
5767	};
5768
5769	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5770	DUMP_ALL_FN(SseBinaryR64, g_aSseBinaryR32)
5771	static RTEXITCODE SseBinaryR64Generate(uint32_t cTests, const char * const *papszNameFmts)
5772	{
5773	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5774
5775	static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
5776	{
5777	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
5778	{ RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
5779	/** @todo More specials. */
5780	};
5781
5782	uint32_t cMinNormalPairs = (cTests - 144) / 4;
5783	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5784	{
5785	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
5786
5787	IEMBINARYOUTPUT BinOut;
5788	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64[iFn]), RTEXITCODE_FAILURE);
5789
5790	uint32_t cNormalInputPairs = 0;
5791	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5792	{
5793	SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5794
5795	TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5796	TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5797	TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5798	TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5799
5800	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5801	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
5802	cNormalInputPairs++;
5803	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5804	{
5805	iTest -= 1;
5806	continue;
5807	}
5808
5809	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5810	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5811	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5812	for (uint8_t iFz = 0; iFz < 2; iFz++)
5813	{
5814	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
5815	\| (iRounding << X86_MXCSR_RC_SHIFT)
5816	\| (iDaz ? X86_MXCSR_DAZ : 0)
5817	\| (iFz ? X86_MXCSR_FZ : 0)
5818	\| X86_MXCSR_XCPT_MASK;
5819	X86XMMREG ResM; RT_ZERO(ResM);
5820	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &ResM, &TestData.InVal1, &TestData.InVal2);
5821	TestData.fMxcsrIn = uMxCsrIn;
5822	TestData.fMxcsrOut = uMxCsrOutM;
5823	TestData.OutVal = ResM;
5824	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5825
5826	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
5827	X86XMMREG ResU; RT_ZERO(ResU);
5828	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &ResU, &TestData.InVal1, &TestData.InVal2);
5829	TestData.fMxcsrIn = uMxCsrIn;
5830	TestData.fMxcsrOut = uMxCsrOutU;
5831	TestData.OutVal = ResU;
5832	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5833
5834	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
5835	if (fXcpt)
5836	{
5837	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
5838	X86XMMREG Res1; RT_ZERO(Res1);
5839	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &Res1, &TestData.InVal1, &TestData.InVal2);
5840	TestData.fMxcsrIn = uMxCsrIn;
5841	TestData.fMxcsrOut = uMxCsrOut1;
5842	TestData.OutVal = Res1;
5843	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5844
5845	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
5846	{
5847	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
5848	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5849	X86XMMREG Res2; RT_ZERO(Res2);
5850	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &Res2, &TestData.InVal1, &TestData.InVal2);
5851	TestData.fMxcsrIn = uMxCsrIn;
5852	TestData.fMxcsrOut = uMxCsrOut2;
5853	TestData.OutVal = Res2;
5854	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5855	}
5856	if (!RT_IS_POWER_OF_TWO(fXcpt))
5857	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5858	if (fUnmasked & fXcpt)
5859	{
5860	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5861	X86XMMREG Res3; RT_ZERO(Res3);
5862	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &Res3, &TestData.InVal1, &TestData.InVal2);
5863	TestData.fMxcsrIn = uMxCsrIn;
5864	TestData.fMxcsrOut = uMxCsrOut3;
5865	TestData.OutVal = Res3;
5866	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5867	}
5868	}
5869	}
5870	}
5871	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5872	}
5873
5874	return RTEXITCODE_SUCCESS;
5875	}
5876	#endif
5877
5878
5879	static void SseBinaryR64Test(void)
5880	{
5881	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5882	{
5883	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64[iFn]))
5884	continue;
5885
5886	SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
5887	uint32_t const cTests = g_aSseBinaryR64[iFn].cTests;
5888	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
5889	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
5890	if (!cTests) RTTestSkipped(g_hTest, "no tests");
5891	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5892	{
5893	for (uint32_t iTest = 0; iTest < cTests; iTest++)
5894	{
5895	X86XMMREG Res; RT_ZERO(Res);
5896
5897	uint32_t uMxCsrIn = paTests[iTest].fMxcsrIn;
5898	uint32_t uMxCsrOut = pfn(uMxCsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5899	if ( uMxCsrOut != paTests[iTest].fMxcsrOut
5900	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
5901	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5902	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
5903	"%s -> mxcsr=%#08x %s'%s\n"
5904	"%s expected %#08x %s'%s%s%s (%s)\n",
5905	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5906	FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5907	FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
5908	iVar ? " " : "", uMxCsrOut,
5909	FormatR64(&Res.ar64[0]), FormatR64(&Res.ar64[1]),
5910	iVar ? " " : "", paTests[iTest].fMxcsrOut,
5911	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5912	MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
5913	( !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
5914	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5915	? " - val" : "",
5916	FormatMxcsr(paTests[iTest].fMxcsrIn) );
5917	}
5918	pfn = g_aSseBinaryR64[iFn].pfnNative;
5919	}
5920
5921	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64[iFn]);
5922	}
5923	}
5924
5925
5926	/*
5927	* Binary SSE operations on packed single precision floating point values.
5928	*/
5929	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
5930
5931	static SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
5932	{
5933	ENTRY_BIN(addss_u128_r32),
5934	ENTRY_BIN(mulss_u128_r32),
5935	ENTRY_BIN(subss_u128_r32),
5936	ENTRY_BIN(minss_u128_r32),
5937	ENTRY_BIN(divss_u128_r32),
5938	ENTRY_BIN(maxss_u128_r32),
5939	ENTRY_BIN(cvtss2sd_u128_r32),
5940	ENTRY_BIN(sqrtss_u128_r32),
5941	};
5942
5943	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5944	DUMP_ALL_FN(SseBinaryU128R32, g_aSseBinaryU128R32)
5945	static RTEXITCODE SseBinaryU128R32Generate(uint32_t cTests, const char * const *papszNameFmts)
5946	{
5947	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5948
5949	static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
5950	{
5951	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5952	/** @todo More specials. */
5953	};
5954
5955	uint32_t cMinNormalPairs = (cTests - 144) / 4;
5956	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5957	{
5958	PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
5959
5960	IEMBINARYOUTPUT BinOut;
5961	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryU128R32[iFn]), RTEXITCODE_FAILURE);
5962
5963	uint32_t cNormalInputPairs = 0;
5964	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5965	{
5966	SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
5967
5968	TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5969	TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5970	TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5971	TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5972
5973	TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5974
5975	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
5976	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
5977	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
5978	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
5979	&& RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
5980	cNormalInputPairs++;
5981	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5982	{
5983	iTest -= 1;
5984	continue;
5985	}
5986
5987	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5988	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5989	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5990	for (uint8_t iFz = 0; iFz < 2; iFz++)
5991	{
5992	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
5993	\| (iRounding << X86_MXCSR_RC_SHIFT)
5994	\| (iDaz ? X86_MXCSR_DAZ : 0)
5995	\| (iFz ? X86_MXCSR_FZ : 0)
5996	\| X86_MXCSR_XCPT_MASK;
5997	X86XMMREG ResM; RT_ZERO(ResM);
5998	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &ResM, &TestData.InVal1, &TestData.r32Val2);
5999	TestData.fMxcsrIn = uMxCsrIn;
6000	TestData.fMxcsrOut = uMxCsrOutM;
6001	TestData.OutVal = ResM;
6002	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6003
6004	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6005	X86XMMREG ResU; RT_ZERO(ResU);
6006	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &ResU, &TestData.InVal1, &TestData.r32Val2);
6007	TestData.fMxcsrIn = uMxCsrIn;
6008	TestData.fMxcsrOut = uMxCsrOutU;
6009	TestData.OutVal = ResU;
6010	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6011
6012	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
6013	if (fXcpt)
6014	{
6015	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6016	X86XMMREG Res1; RT_ZERO(Res1);
6017	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &Res1, &TestData.InVal1, &TestData.r32Val2);
6018	TestData.fMxcsrIn = uMxCsrIn;
6019	TestData.fMxcsrOut = uMxCsrOut1;
6020	TestData.OutVal = Res1;
6021	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6022
6023	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
6024	{
6025	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
6026	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6027	X86XMMREG Res2; RT_ZERO(Res2);
6028	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &Res2, &TestData.InVal1, &TestData.r32Val2);
6029	TestData.fMxcsrIn = uMxCsrIn;
6030	TestData.fMxcsrOut = uMxCsrOut2;
6031	TestData.OutVal = Res2;
6032	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6033	}
6034	if (!RT_IS_POWER_OF_TWO(fXcpt))
6035	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6036	if (fUnmasked & fXcpt)
6037	{
6038	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6039	X86XMMREG Res3; RT_ZERO(Res3);
6040	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &Res3, &TestData.InVal1, &TestData.r32Val2);
6041	TestData.fMxcsrIn = uMxCsrIn;
6042	TestData.fMxcsrOut = uMxCsrOut3;
6043	TestData.OutVal = Res3;
6044	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6045	}
6046	}
6047	}
6048	}
6049	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6050	}
6051
6052	return RTEXITCODE_SUCCESS;
6053	}
6054	#endif
6055
6056	static void SseBinaryU128R32Test(void)
6057	{
6058	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
6059	{
6060	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R32[iFn]))
6061	continue;
6062
6063	SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
6064	uint32_t const cTests = g_aSseBinaryU128R32[iFn].cTests;
6065	PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
6066	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
6067	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6068	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6069	{
6070	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6071	{
6072	X86XMMREG Res; RT_ZERO(Res);
6073
6074	uint32_t uMxCsrIn = paTests[iTest].fMxcsrIn;
6075	uint32_t uMxCsrOut = pfn(uMxCsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
6076	bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
6077	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
6078	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
6079	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]);
6080	if ( uMxCsrOut != paTests[iTest].fMxcsrOut
6081	\|\| !fValsIdentical)
6082	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
6083	"%s -> mxcsr=%#08x %s'%s'%s'%s\n"
6084	"%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
6085	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6086	FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
6087	FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
6088	FormatR32(&paTests[iTest].r32Val2),
6089	iVar ? " " : "", uMxCsrOut,
6090	FormatR32(&Res.ar32[0]), FormatR32(&Res.ar32[1]),
6091	FormatR32(&Res.ar32[2]), FormatR32(&Res.ar32[3]),
6092	iVar ? " " : "", paTests[iTest].fMxcsrOut,
6093	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
6094	FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
6095	MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
6096	!fValsIdentical ? " - val" : "",
6097	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6098	}
6099	}
6100
6101	FREE_DECOMPRESSED_TESTS(g_aSseBinaryU128R32[iFn]);
6102	}
6103	}
6104
6105
6106	/*
6107	* Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
6108	*/
6109	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
6110
6111	static SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
6112	{
6113	ENTRY_BIN(addsd_u128_r64),
6114	ENTRY_BIN(mulsd_u128_r64),
6115	ENTRY_BIN(subsd_u128_r64),
6116	ENTRY_BIN(minsd_u128_r64),
6117	ENTRY_BIN(divsd_u128_r64),
6118	ENTRY_BIN(maxsd_u128_r64),
6119	ENTRY_BIN(cvtsd2ss_u128_r64),
6120	ENTRY_BIN(sqrtsd_u128_r64),
6121	};
6122
6123	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6124	DUMP_ALL_FN(SseBinaryU128R64, g_aSseBinaryU128R64)
6125	static RTEXITCODE SseBinaryU128R64Generate(uint32_t cTests, const char * const *papszNameFmts)
6126	{
6127	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6128
6129	static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
6130	{
6131	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6132	/** @todo More specials. */
6133	};
6134
6135	uint32_t cMinNormalPairs = (cTests - 144) / 4;
6136	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
6137	{
6138	PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
6139
6140	IEMBINARYOUTPUT BinOut;
6141	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryU128R64[iFn]), RTEXITCODE_FAILURE);
6142
6143	uint32_t cNormalInputPairs = 0;
6144	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6145	{
6146	SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
6147
6148	TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
6149	TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
6150	TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
6151
6152	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
6153	&& RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
6154	cNormalInputPairs++;
6155	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6156	{
6157	iTest -= 1;
6158	continue;
6159	}
6160
6161	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6162	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6163	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6164	for (uint8_t iFz = 0; iFz < 2; iFz++)
6165	{
6166	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6167	\| (iRounding << X86_MXCSR_RC_SHIFT)
6168	\| (iDaz ? X86_MXCSR_DAZ : 0)
6169	\| (iFz ? X86_MXCSR_FZ : 0)
6170	\| X86_MXCSR_XCPT_MASK;
6171	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6172	TestData.fMxcsrIn = uMxCsrIn;
6173	TestData.fMxcsrOut = uMxCsrOutM;
6174	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6175
6176	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6177	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6178	TestData.fMxcsrIn = uMxCsrIn;
6179	TestData.fMxcsrOut = uMxCsrOutU;
6180	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6181
6182	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
6183	if (fXcpt)
6184	{
6185	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6186	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6187	TestData.fMxcsrIn = uMxCsrIn;
6188	TestData.fMxcsrOut = uMxCsrOut1;
6189	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6190
6191	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
6192	{
6193	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
6194	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6195	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6196	TestData.fMxcsrIn = uMxCsrIn;
6197	TestData.fMxcsrOut = uMxCsrOut2;
6198	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6199	}
6200	if (!RT_IS_POWER_OF_TWO(fXcpt))
6201	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6202	if (fUnmasked & fXcpt)
6203	{
6204	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6205	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6206	TestData.fMxcsrIn = uMxCsrIn;
6207	TestData.fMxcsrOut = uMxCsrOut3;
6208	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6209	}
6210	}
6211	}
6212	}
6213	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6214	}
6215
6216	return RTEXITCODE_SUCCESS;
6217	}
6218	#endif
6219
6220
6221	static void SseBinaryU128R64Test(void)
6222	{
6223	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
6224	{
6225	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R64[iFn]))
6226	continue;
6227
6228	SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
6229	uint32_t const cTests = g_aSseBinaryU128R64[iFn].cTests;
6230	PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
6231	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
6232	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6233	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6234	{
6235	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6236	{
6237	X86XMMREG Res; RT_ZERO(Res);
6238
6239	uint32_t uMxCsrIn = paTests[iTest].fMxcsrIn;
6240	uint32_t uMxCsrOut = pfn(uMxCsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
6241	if ( uMxCsrOut != paTests[iTest].fMxcsrOut
6242	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
6243	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
6244	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
6245	"%s -> mxcsr=%#08x %s'%s\n"
6246	"%s expected %#08x %s'%s%s%s (%s)\n",
6247	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6248	FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
6249	FormatR64(&paTests[iTest].r64Val2),
6250	iVar ? " " : "", uMxCsrOut,
6251	FormatR64(&Res.ar64[0]), FormatR64(&Res.ar64[1]),
6252	iVar ? " " : "", paTests[iTest].fMxcsrOut,
6253	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
6254	MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
6255	( !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
6256	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
6257	? " - val" : "",
6258	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6259	}
6260	}
6261
6262	FREE_DECOMPRESSED_TESTS(g_aSseBinaryU128R64[iFn]);
6263	}
6264	}
6265
6266
6267	/*
6268	* SSE operations converting single double-precision floating point values to signed double-word integers (cvttsd2si and friends).
6269	*/
6270	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R64_T, SSE_BINARY_I32_R64_TEST_T, PFNIEMAIMPLSSEF2I32U64);
6271
6272	static SSE_BINARY_I32_R64_T g_aSseBinaryI32R64[] =
6273	{
6274	ENTRY_BIN(cvttsd2si_i32_r64),
6275	ENTRY_BIN(cvtsd2si_i32_r64),
6276	};
6277
6278	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6279	DUMP_ALL_FN(SseBinaryI32R64, g_aSseBinaryI32R64)
6280	static RTEXITCODE SseBinaryI32R64Generate(uint32_t cTests, const char * const *papszNameFmts)
6281	{
6282	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6283
6284	static struct { RTFLOAT64U Val; } const s_aSpecials[] =
6285	{
6286	{ RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6287	/** @todo More specials. */
6288	};
6289
6290	uint32_t cMinNormalPairs = (cTests - 144) / 4;
6291	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
6292	{
6293	PFNIEMAIMPLSSEF2I32U64 const pfn = g_aSseBinaryI32R64[iFn].pfnNative ? g_aSseBinaryI32R64[iFn].pfnNative : g_aSseBinaryI32R64[iFn].pfn;
6294
6295	IEMBINARYOUTPUT BinOut;
6296	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI32R64[iFn]), RTEXITCODE_FAILURE);
6297
6298	uint32_t cNormalInputPairs = 0;
6299	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6300	{
6301	SSE_BINARY_I32_R64_TEST_T TestData; RT_ZERO(TestData);
6302
6303	TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
6304
6305	if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
6306	cNormalInputPairs++;
6307	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6308	{
6309	iTest -= 1;
6310	continue;
6311	}
6312
6313	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6314	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6315	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6316	for (uint8_t iFz = 0; iFz < 2; iFz++)
6317	{
6318	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6319	\| (iRounding << X86_MXCSR_RC_SHIFT)
6320	\| (iDaz ? X86_MXCSR_DAZ : 0)
6321	\| (iFz ? X86_MXCSR_FZ : 0)
6322	\| X86_MXCSR_XCPT_MASK;
6323	uint32_t fMxcsrM; int32_t i32OutM;
6324	fMxcsrM = pfn(uMxCsrIn, &i32OutM, &TestData.r64ValIn.u);
6325	TestData.fMxcsrIn = uMxCsrIn;
6326	TestData.fMxcsrOut = fMxcsrM;
6327	TestData.i32ValOut = i32OutM;
6328	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6329
6330	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6331	uint32_t fMxcsrU; int32_t i32OutU;
6332	fMxcsrU = pfn(uMxCsrIn, &i32OutU, &TestData.r64ValIn.u);
6333	TestData.fMxcsrIn = uMxCsrIn;
6334	TestData.fMxcsrOut = fMxcsrU;
6335	TestData.i32ValOut = i32OutU;
6336	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6337
6338	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6339	if (fXcpt)
6340	{
6341	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6342	uint32_t fMxcsr1; int32_t i32Out1;
6343	fMxcsr1 = pfn(uMxCsrIn, &i32Out1, &TestData.r64ValIn.u);
6344	TestData.fMxcsrIn = uMxCsrIn;
6345	TestData.fMxcsrOut = fMxcsr1;
6346	TestData.i32ValOut = i32Out1;
6347	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6348
6349	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6350	{
6351	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6352	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6353	uint32_t fMxcsr2; int32_t i32Out2;
6354	fMxcsr2 = pfn(uMxCsrIn, &i32Out2, &TestData.r64ValIn.u);
6355	TestData.fMxcsrIn = uMxCsrIn;
6356	TestData.fMxcsrOut = fMxcsr2;
6357	TestData.i32ValOut = i32Out2;
6358	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6359	}
6360	if (!RT_IS_POWER_OF_TWO(fXcpt))
6361	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6362	if (fUnmasked & fXcpt)
6363	{
6364	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6365	uint32_t fMxcsr3; int32_t i32Out3;
6366	fMxcsr3 = pfn(uMxCsrIn, &i32Out3, &TestData.r64ValIn.u);
6367	TestData.fMxcsrIn = uMxCsrIn;
6368	TestData.fMxcsrOut = fMxcsr3;
6369	TestData.i32ValOut = i32Out3;
6370	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6371	}
6372	}
6373	}
6374	}
6375	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6376	}
6377
6378	return RTEXITCODE_SUCCESS;
6379	}
6380	#endif
6381
6382
6383	static void SseBinaryI32R64Test(void)
6384	{
6385	X86FXSTATE State;
6386	RT_ZERO(State);
6387	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
6388	{
6389	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R64[iFn]))
6390	continue;
6391
6392	SSE_BINARY_I32_R64_TEST_T const * const paTests = g_aSseBinaryI32R64[iFn].paTests;
6393	uint32_t const cTests = g_aSseBinaryI32R64[iFn].cTests;
6394	PFNIEMAIMPLSSEF2I32U64 pfn = g_aSseBinaryI32R64[iFn].pfn;
6395	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
6396	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6397	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6398	{
6399	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6400	{
6401	int32_t i32Dst = 0;
6402
6403	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i32Dst, &paTests[iTest].r64ValIn.u);
6404	if ( fMxcsr != paTests[iTest].fMxcsrOut
6405	\|\| i32Dst != paTests[iTest].i32ValOut)
6406	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6407	"%s -> mxcsr=%#08x %RI32\n"
6408	"%s expected %#08x %RI32%s%s (%s)\n",
6409	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6410	FormatR64(&paTests[iTest].r64ValIn),
6411	iVar ? " " : "", fMxcsr, i32Dst,
6412	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
6413	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6414	i32Dst != paTests[iTest].i32ValOut
6415	? " - val" : "",
6416	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6417	}
6418	}
6419
6420	FREE_DECOMPRESSED_TESTS(g_aSseBinaryI32R64[iFn]);
6421	}
6422	}
6423
6424
6425	/*
6426	* SSE operations converting single double-precision floating point values to signed quad-word integers (cvttsd2si and friends).
6427	*/
6428	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R64_T, SSE_BINARY_I64_R64_TEST_T, PFNIEMAIMPLSSEF2I64U64);
6429
6430	static SSE_BINARY_I64_R64_T g_aSseBinaryI64R64[] =
6431	{
6432	ENTRY_BIN(cvttsd2si_i64_r64),
6433	ENTRY_BIN(cvtsd2si_i64_r64),
6434	};
6435
6436	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6437	DUMP_ALL_FN(SseBinaryI64R64, g_aSseBinaryI64R64)
6438	static RTEXITCODE SseBinaryI64R64Generate(uint32_t cTests, const char * const *papszNameFmts)
6439	{
6440	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6441
6442	static struct { RTFLOAT64U Val; } const s_aSpecials[] =
6443	{
6444	{ RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6445	/** @todo More specials. */
6446	};
6447
6448	uint32_t cMinNormalPairs = (cTests - 144) / 4;
6449	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
6450	{
6451	PFNIEMAIMPLSSEF2I64U64 const pfn = g_aSseBinaryI64R64[iFn].pfnNative ? g_aSseBinaryI64R64[iFn].pfnNative : g_aSseBinaryI64R64[iFn].pfn;
6452
6453	IEMBINARYOUTPUT BinOut;
6454	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI64R64[iFn]), RTEXITCODE_FAILURE);
6455
6456	uint32_t cNormalInputPairs = 0;
6457	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6458	{
6459	SSE_BINARY_I64_R64_TEST_T TestData; RT_ZERO(TestData);
6460
6461	TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
6462
6463	if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
6464	cNormalInputPairs++;
6465	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6466	{
6467	iTest -= 1;
6468	continue;
6469	}
6470
6471	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6472	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6473	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6474	for (uint8_t iFz = 0; iFz < 2; iFz++)
6475	{
6476	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6477	\| (iRounding << X86_MXCSR_RC_SHIFT)
6478	\| (iDaz ? X86_MXCSR_DAZ : 0)
6479	\| (iFz ? X86_MXCSR_FZ : 0)
6480	\| X86_MXCSR_XCPT_MASK;
6481	uint32_t fMxcsrM; int64_t i64OutM;
6482	fMxcsrM = pfn(uMxCsrIn, &i64OutM, &TestData.r64ValIn.u);
6483	TestData.fMxcsrIn = uMxCsrIn;
6484	TestData.fMxcsrOut = fMxcsrM;
6485	TestData.i64ValOut = i64OutM;
6486	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6487
6488	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6489	uint32_t fMxcsrU; int64_t i64OutU;
6490	fMxcsrU =pfn(uMxCsrIn, &i64OutU, &TestData.r64ValIn.u);
6491	TestData.fMxcsrIn = uMxCsrIn;
6492	TestData.fMxcsrOut = fMxcsrU;
6493	TestData.i64ValOut = i64OutU;
6494	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6495
6496	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6497	if (fXcpt)
6498	{
6499	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6500	uint32_t fMxcsr1; int64_t i64Out1;
6501	fMxcsr1 = pfn(uMxCsrIn, &i64Out1, &TestData.r64ValIn.u);
6502	TestData.fMxcsrIn = uMxCsrIn;
6503	TestData.fMxcsrOut = fMxcsr1;
6504	TestData.i64ValOut = i64Out1;
6505	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6506
6507	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6508	{
6509	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6510	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6511	uint32_t fMxcsr2; int64_t i64Out2;
6512	fMxcsr2 = pfn(uMxCsrIn, &i64Out2, &TestData.r64ValIn.u);
6513	TestData.fMxcsrIn = uMxCsrIn;
6514	TestData.fMxcsrOut = fMxcsr2;
6515	TestData.i64ValOut = i64Out2;
6516	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6517	}
6518	if (!RT_IS_POWER_OF_TWO(fXcpt))
6519	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6520	if (fUnmasked & fXcpt)
6521	{
6522	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6523	uint32_t fMxcsr3; int64_t i64Out3;
6524	fMxcsr3 = pfn(uMxCsrIn, &i64Out3, &TestData.r64ValIn.u);
6525	TestData.fMxcsrIn = uMxCsrIn;
6526	TestData.fMxcsrOut = fMxcsr3;
6527	TestData.i64ValOut = i64Out3;
6528	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6529	}
6530	}
6531	}
6532	}
6533	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6534	}
6535
6536	return RTEXITCODE_SUCCESS;
6537	}
6538	#endif
6539
6540
6541	static void SseBinaryI64R64Test(void)
6542	{
6543	X86FXSTATE State;
6544	RT_ZERO(State);
6545	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
6546	{
6547	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R64[iFn]))
6548	continue;
6549
6550	SSE_BINARY_I64_R64_TEST_T const * const paTests = g_aSseBinaryI64R64[iFn].paTests;
6551	uint32_t const cTests = g_aSseBinaryI64R64[iFn].cTests;
6552	PFNIEMAIMPLSSEF2I64U64 pfn = g_aSseBinaryI64R64[iFn].pfn;
6553	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
6554	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6555	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6556	{
6557	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6558	{
6559	int64_t i64Dst = 0;
6560	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i64Dst, &paTests[iTest].r64ValIn.u);
6561	if ( fMxcsr != paTests[iTest].fMxcsrOut
6562	\|\| i64Dst != paTests[iTest].i64ValOut)
6563	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6564	"%s -> mxcsr=%#08x %RI64\n"
6565	"%s expected %#08x %RI64%s%s (%s)\n",
6566	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6567	FormatR64(&paTests[iTest].r64ValIn),
6568	iVar ? " " : "", fMxcsr, i64Dst,
6569	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6570	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6571	i64Dst != paTests[iTest].i64ValOut
6572	? " - val" : "",
6573	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6574	}
6575	}
6576
6577	FREE_DECOMPRESSED_TESTS(g_aSseBinaryI64R64[iFn]);
6578	}
6579	}
6580
6581
6582	/*
6583	* SSE operations converting single single-precision floating point values to signed double-word integers (cvttss2si and friends).
6584	*/
6585	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R32_T, SSE_BINARY_I32_R32_TEST_T, PFNIEMAIMPLSSEF2I32U32);
6586
6587	static SSE_BINARY_I32_R32_T g_aSseBinaryI32R32[] =
6588	{
6589	ENTRY_BIN(cvttss2si_i32_r32),
6590	ENTRY_BIN(cvtss2si_i32_r32),
6591	};
6592
6593	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6594	DUMP_ALL_FN(SseBinaryI32R32, g_aSseBinaryI32R32)
6595	static RTEXITCODE SseBinaryI32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
6596	{
6597	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6598
6599	static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6600	{
6601	{ RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6602	/** @todo More specials. */
6603	};
6604
6605	uint32_t cMinNormalPairs = (cTests - 144) / 4;
6606	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6607	{
6608	PFNIEMAIMPLSSEF2I32U32 const pfn = g_aSseBinaryI32R32[iFn].pfnNative ? g_aSseBinaryI32R32[iFn].pfnNative : g_aSseBinaryI32R32[iFn].pfn;
6609
6610	IEMBINARYOUTPUT BinOut;
6611	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI32R32[iFn]), RTEXITCODE_FAILURE);
6612
6613	uint32_t cNormalInputPairs = 0;
6614	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6615	{
6616	SSE_BINARY_I32_R32_TEST_T TestData; RT_ZERO(TestData);
6617
6618	TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6619
6620	if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6621	cNormalInputPairs++;
6622	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6623	{
6624	iTest -= 1;
6625	continue;
6626	}
6627
6628	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6629	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6630	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6631	for (uint8_t iFz = 0; iFz < 2; iFz++)
6632	{
6633	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6634	\| (iRounding << X86_MXCSR_RC_SHIFT)
6635	\| (iDaz ? X86_MXCSR_DAZ : 0)
6636	\| (iFz ? X86_MXCSR_FZ : 0)
6637	\| X86_MXCSR_XCPT_MASK;
6638	uint32_t fMxcsrM; int32_t i32OutM;
6639	fMxcsrM = pfn(uMxCsrIn, &i32OutM, &TestData.r32ValIn.u);
6640	TestData.fMxcsrIn = uMxCsrIn;
6641	TestData.fMxcsrOut = fMxcsrM;
6642	TestData.i32ValOut = i32OutM;
6643	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6644
6645	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6646	uint32_t fMxcsrU; int32_t i32OutU;
6647	fMxcsrU = pfn(uMxCsrIn, &i32OutU, &TestData.r32ValIn.u);
6648	TestData.fMxcsrIn = uMxCsrIn;
6649	TestData.fMxcsrOut = fMxcsrU;
6650	TestData.i32ValOut = i32OutU;
6651	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6652
6653	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6654	if (fXcpt)
6655	{
6656	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6657	uint32_t fMxcsr1; int32_t i32Out1;
6658	fMxcsr1 = pfn(uMxCsrIn, &i32Out1, &TestData.r32ValIn.u);
6659	TestData.fMxcsrIn = uMxCsrIn;
6660	TestData.fMxcsrOut = fMxcsr1;
6661	TestData.i32ValOut = i32Out1;
6662	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6663
6664	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6665	{
6666	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6667	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6668	uint32_t fMxcsr2; int32_t i32Out2;
6669	fMxcsr2 = pfn(uMxCsrIn, &i32Out2, &TestData.r32ValIn.u);
6670	TestData.fMxcsrIn = uMxCsrIn;
6671	TestData.fMxcsrOut = fMxcsr2;
6672	TestData.i32ValOut = i32Out2;
6673	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6674	}
6675	if (!RT_IS_POWER_OF_TWO(fXcpt))
6676	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6677	if (fUnmasked & fXcpt)
6678	{
6679	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6680	uint32_t fMxcsr3; int32_t i32Out3;
6681	fMxcsr3 = pfn(uMxCsrIn, &i32Out3, &TestData.r32ValIn.u);
6682	TestData.fMxcsrIn = uMxCsrIn;
6683	TestData.fMxcsrOut = fMxcsr3;
6684	TestData.i32ValOut = i32Out3;
6685	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6686	}
6687	}
6688	}
6689	}
6690	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6691	}
6692
6693	return RTEXITCODE_SUCCESS;
6694	}
6695	#endif
6696
6697
6698	static void SseBinaryI32R32Test(void)
6699	{
6700	X86FXSTATE State;
6701	RT_ZERO(State);
6702	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6703	{
6704	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R32[iFn]))
6705	continue;
6706
6707	SSE_BINARY_I32_R32_TEST_T const * const paTests = g_aSseBinaryI32R32[iFn].paTests;
6708	uint32_t const cTests = g_aSseBinaryI32R32[iFn].cTests;
6709	PFNIEMAIMPLSSEF2I32U32 pfn = g_aSseBinaryI32R32[iFn].pfn;
6710	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R32[iFn]);
6711	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6712	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6713	{
6714	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6715	{
6716	int32_t i32Dst = 0;
6717
6718	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i32Dst, &paTests[iTest].r32ValIn.u);
6719	if ( fMxcsr != paTests[iTest].fMxcsrOut
6720	\|\| i32Dst != paTests[iTest].i32ValOut)
6721	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6722	"%s -> mxcsr=%#08x %RI32\n"
6723	"%s expected %#08x %RI32%s%s (%s)\n",
6724	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6725	FormatR32(&paTests[iTest].r32ValIn),
6726	iVar ? " " : "", fMxcsr, i32Dst,
6727	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
6728	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6729	i32Dst != paTests[iTest].i32ValOut
6730	? " - val" : "",
6731	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6732	}
6733	}
6734
6735	FREE_DECOMPRESSED_TESTS(g_aSseBinaryI32R32[iFn]);
6736	}
6737	}
6738
6739
6740	/*
6741	* SSE operations converting single single-precision floating point values to signed quad-word integers (cvttss2si and friends).
6742	*/
6743	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R32_T, SSE_BINARY_I64_R32_TEST_T, PFNIEMAIMPLSSEF2I64U32);
6744
6745	static SSE_BINARY_I64_R32_T g_aSseBinaryI64R32[] =
6746	{
6747	ENTRY_BIN(cvttss2si_i64_r32),
6748	ENTRY_BIN(cvtss2si_i64_r32),
6749	};
6750
6751	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6752	DUMP_ALL_FN(SseBinaryI64R32, g_aSseBinaryI64R32)
6753	static RTEXITCODE SseBinaryI64R32Generate(uint32_t cTests, const char * const *papszNameFmts)
6754	{
6755	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6756
6757	static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6758	{
6759	{ RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6760	/** @todo More specials. */
6761	};
6762
6763	X86FXSTATE State;
6764	RT_ZERO(State);
6765	uint32_t cMinNormalPairs = (cTests - 144) / 4;
6766	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6767	{
6768	PFNIEMAIMPLSSEF2I64U32 const pfn = g_aSseBinaryI64R32[iFn].pfnNative ? g_aSseBinaryI64R32[iFn].pfnNative : g_aSseBinaryI64R32[iFn].pfn;
6769
6770	IEMBINARYOUTPUT BinOut;
6771	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI64R32[iFn]), RTEXITCODE_FAILURE);
6772
6773	uint32_t cNormalInputPairs = 0;
6774	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6775	{
6776	SSE_BINARY_I64_R32_TEST_T TestData; RT_ZERO(TestData);
6777
6778	TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6779
6780	if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6781	cNormalInputPairs++;
6782	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6783	{
6784	iTest -= 1;
6785	continue;
6786	}
6787
6788	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6789	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6790	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6791	for (uint8_t iFz = 0; iFz < 2; iFz++)
6792	{
6793	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6794	\| (iRounding << X86_MXCSR_RC_SHIFT)
6795	\| (iDaz ? X86_MXCSR_DAZ : 0)
6796	\| (iFz ? X86_MXCSR_FZ : 0)
6797	\| X86_MXCSR_XCPT_MASK;
6798	uint32_t fMxcsrM; int64_t i64OutM;
6799	fMxcsrM = pfn(uMxCsrIn, &i64OutM, &TestData.r32ValIn.u);
6800	TestData.fMxcsrIn = State.MXCSR;
6801	TestData.fMxcsrOut = fMxcsrM;
6802	TestData.i64ValOut = i64OutM;
6803	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6804
6805	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6806	uint32_t fMxcsrU; int64_t i64OutU;
6807	fMxcsrU = pfn(uMxCsrIn, &i64OutU, &TestData.r32ValIn.u);
6808	TestData.fMxcsrIn = State.MXCSR;
6809	TestData.fMxcsrOut = fMxcsrU;
6810	TestData.i64ValOut = i64OutU;
6811	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6812
6813	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6814	if (fXcpt)
6815	{
6816	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6817	uint32_t fMxcsr1; int64_t i64Out1;
6818	fMxcsr1 = pfn(uMxCsrIn, &i64Out1, &TestData.r32ValIn.u);
6819	TestData.fMxcsrIn = State.MXCSR;
6820	TestData.fMxcsrOut = fMxcsr1;
6821	TestData.i64ValOut = i64Out1;
6822	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6823
6824	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6825	{
6826	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6827	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6828	uint32_t fMxcsr2; int64_t i64Out2;
6829	fMxcsr2 = pfn(uMxCsrIn, &i64Out2, &TestData.r32ValIn.u);
6830	TestData.fMxcsrIn = State.MXCSR;
6831	TestData.fMxcsrOut = fMxcsr2;
6832	TestData.i64ValOut = i64Out2;
6833	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6834	}
6835	if (!RT_IS_POWER_OF_TWO(fXcpt))
6836	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6837	if (fUnmasked & fXcpt)
6838	{
6839	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6840	uint32_t fMxcsr3; int64_t i64Out3;
6841	fMxcsr3 = pfn(uMxCsrIn, &i64Out3, &TestData.r32ValIn.u);
6842	TestData.fMxcsrIn = State.MXCSR;
6843	TestData.fMxcsrOut = fMxcsr3;
6844	TestData.i64ValOut = i64Out3;
6845	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6846	}
6847	}
6848	}
6849	}
6850	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6851	}
6852
6853	return RTEXITCODE_SUCCESS;
6854	}
6855	#endif
6856
6857
6858	static void SseBinaryI64R32Test(void)
6859	{
6860	X86FXSTATE State;
6861	RT_ZERO(State);
6862	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6863	{
6864	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R32[iFn]))
6865	continue;
6866
6867	SSE_BINARY_I64_R32_TEST_T const * const paTests = g_aSseBinaryI64R32[iFn].paTests;
6868	uint32_t const cTests = g_aSseBinaryI64R32[iFn].cTests;
6869	PFNIEMAIMPLSSEF2I64U32 pfn = g_aSseBinaryI64R32[iFn].pfn;
6870	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI64R32[iFn]);
6871	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6872	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6873	{
6874	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6875	{
6876	int64_t i64Dst = 0;
6877
6878	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i64Dst, &paTests[iTest].r32ValIn.u);
6879	if ( fMxcsr != paTests[iTest].fMxcsrOut
6880	\|\| i64Dst != paTests[iTest].i64ValOut)
6881	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6882	"%s -> mxcsr=%#08x %RI64\n"
6883	"%s expected %#08x %RI64%s%s (%s)\n",
6884	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6885	FormatR32(&paTests[iTest].r32ValIn),
6886	iVar ? " " : "", fMxcsr, i64Dst,
6887	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6888	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6889	i64Dst != paTests[iTest].i64ValOut
6890	? " - val" : "",
6891	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6892	}
6893	}
6894
6895	FREE_DECOMPRESSED_TESTS(g_aSseBinaryI64R32[iFn]);
6896	}
6897	}
6898
6899
6900	/*
6901	* SSE operations converting single signed double-word integers to double-precision floating point values (probably only cvtsi2sd).
6902	*/
6903	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I32_T, SSE_BINARY_R64_I32_TEST_T, PFNIEMAIMPLSSEF2R64I32);
6904
6905	static SSE_BINARY_R64_I32_T g_aSseBinaryR64I32[] =
6906	{
6907	ENTRY_BIN(cvtsi2sd_r64_i32)
6908	};
6909
6910	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6911	DUMP_ALL_FN(SseBinaryR64I32, g_aSseBinaryR64I32)
6912	static RTEXITCODE SseBinaryR64I32Generate(uint32_t cTests, const char * const *papszNameFmts)
6913	{
6914	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6915
6916	static int32_t const s_aSpecials[] =
6917	{
6918	INT32_MIN,
6919	INT32_MAX,
6920	/** @todo More specials. */
6921	};
6922
6923	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6924	{
6925	PFNIEMAIMPLSSEF2R64I32 const pfn = g_aSseBinaryR64I32[iFn].pfnNative ? g_aSseBinaryR64I32[iFn].pfnNative : g_aSseBinaryR64I32[iFn].pfn;
6926
6927	IEMBINARYOUTPUT BinOut;
6928	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64I32[iFn]), RTEXITCODE_FAILURE);
6929
6930	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6931	{
6932	SSE_BINARY_R64_I32_TEST_T TestData; RT_ZERO(TestData);
6933
6934	TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6935
6936	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6937	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6938	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6939	for (uint8_t iFz = 0; iFz < 2; iFz++)
6940	{
6941	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6942	\| (iRounding << X86_MXCSR_RC_SHIFT)
6943	\| (iDaz ? X86_MXCSR_DAZ : 0)
6944	\| (iFz ? X86_MXCSR_FZ : 0)
6945	\| X86_MXCSR_XCPT_MASK;
6946	uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6947	fMxcsrM = pfn(uMxCsrIn, &r64OutM, &TestData.i32ValIn);
6948	TestData.fMxcsrIn = uMxCsrIn;
6949	TestData.fMxcsrOut = fMxcsrM;
6950	TestData.r64ValOut = r64OutM;
6951	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6952
6953	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6954	uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6955	fMxcsrU = pfn(uMxCsrIn, &r64OutU, &TestData.i32ValIn);
6956	TestData.fMxcsrIn = uMxCsrIn;
6957	TestData.fMxcsrOut = fMxcsrU;
6958	TestData.r64ValOut = r64OutU;
6959	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6960
6961	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6962	if (fXcpt)
6963	{
6964	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6965	uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6966	fMxcsr1 = pfn(uMxCsrIn, &r64Out1, &TestData.i32ValIn);
6967	TestData.fMxcsrIn = uMxCsrIn;
6968	TestData.fMxcsrOut = fMxcsr1;
6969	TestData.r64ValOut = r64Out1;
6970	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6971
6972	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6973	{
6974	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6975	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6976	uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6977	fMxcsr2 = pfn(uMxCsrIn, &r64Out2, &TestData.i32ValIn);
6978	TestData.fMxcsrIn = uMxCsrIn;
6979	TestData.fMxcsrOut = fMxcsr2;
6980	TestData.r64ValOut = r64Out2;
6981	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6982	}
6983	if (!RT_IS_POWER_OF_TWO(fXcpt))
6984	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6985	if (fUnmasked & fXcpt)
6986	{
6987	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6988	uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6989	fMxcsr3 = pfn(uMxCsrIn, &r64Out3, &TestData.i32ValIn);
6990	TestData.fMxcsrIn = uMxCsrIn;
6991	TestData.fMxcsrOut = fMxcsr3;
6992	TestData.r64ValOut = r64Out3;
6993	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6994	}
6995	}
6996	}
6997	}
6998	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6999	}
7000
7001	return RTEXITCODE_SUCCESS;
7002	}
7003	#endif
7004
7005
7006	static void SseBinaryR64I32Test(void)
7007	{
7008	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
7009	{
7010	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I32[iFn]))
7011	continue;
7012
7013	SSE_BINARY_R64_I32_TEST_T const * const paTests = g_aSseBinaryR64I32[iFn].paTests;
7014	uint32_t const cTests = g_aSseBinaryR64I32[iFn].cTests;
7015	PFNIEMAIMPLSSEF2R64I32 pfn = g_aSseBinaryR64I32[iFn].pfn;
7016	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I32[iFn]);
7017	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7018	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7019	{
7020	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7021	{
7022	RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
7023
7024	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r64Dst, &paTests[iTest].i32ValIn);
7025	if ( fMxcsr != paTests[iTest].fMxcsrOut
7026	\|\| !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
7027	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
7028	"%s -> mxcsr=%#08x %s\n"
7029	"%s expected %#08x %s%s%s (%s)\n",
7030	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7031	&paTests[iTest].i32ValIn,
7032	iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
7033	iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
7034	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7035	!RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
7036	? " - val" : "",
7037	FormatMxcsr(paTests[iTest].fMxcsrIn) );
7038	}
7039	}
7040
7041	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64I32[iFn]);
7042	}
7043	}
7044
7045
7046	/*
7047	* SSE operations converting single signed quad-word integers to double-precision floating point values (probably only cvtsi2sd).
7048	*/
7049	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I64_T, SSE_BINARY_R64_I64_TEST_T, PFNIEMAIMPLSSEF2R64I64);
7050
7051	static SSE_BINARY_R64_I64_T g_aSseBinaryR64I64[] =
7052	{
7053	ENTRY_BIN(cvtsi2sd_r64_i64),
7054	};
7055
7056	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7057	DUMP_ALL_FN(SseBinaryR64I64, g_aSseBinaryR64I64)
7058	static RTEXITCODE SseBinaryR64I64Generate(uint32_t cTests, const char * const *papszNameFmts)
7059	{
7060	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7061
7062	static int64_t const s_aSpecials[] =
7063	{
7064	INT64_MIN,
7065	INT64_MAX
7066	/** @todo More specials. */
7067	};
7068
7069	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
7070	{
7071	PFNIEMAIMPLSSEF2R64I64 const pfn = g_aSseBinaryR64I64[iFn].pfnNative ? g_aSseBinaryR64I64[iFn].pfnNative : g_aSseBinaryR64I64[iFn].pfn;
7072
7073	IEMBINARYOUTPUT BinOut;
7074	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64I64[iFn]), RTEXITCODE_FAILURE);
7075
7076	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7077	{
7078	SSE_BINARY_R64_I64_TEST_T TestData; RT_ZERO(TestData);
7079
7080	TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
7081
7082	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7083	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7084	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7085	for (uint8_t iFz = 0; iFz < 2; iFz++)
7086	{
7087	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7088	\| (iRounding << X86_MXCSR_RC_SHIFT)
7089	\| (iDaz ? X86_MXCSR_DAZ : 0)
7090	\| (iFz ? X86_MXCSR_FZ : 0)
7091	\| X86_MXCSR_XCPT_MASK;
7092	uint32_t fMxcsrM; RTFLOAT64U r64OutM;
7093	fMxcsrM = pfn(uMxCsrIn, &r64OutM, &TestData.i64ValIn);
7094	TestData.fMxcsrIn = uMxCsrIn;
7095	TestData.fMxcsrOut = fMxcsrM;
7096	TestData.r64ValOut = r64OutM;
7097	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7098
7099	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
7100	uint32_t fMxcsrU; RTFLOAT64U r64OutU;
7101	fMxcsrU = pfn(uMxCsrIn, &r64OutU, &TestData.i64ValIn);
7102	TestData.fMxcsrIn = uMxCsrIn;
7103	TestData.fMxcsrOut = fMxcsrU;
7104	TestData.r64ValOut = r64OutU;
7105	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7106
7107	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7108	if (fXcpt)
7109	{
7110	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7111	uint32_t fMxcsr1; RTFLOAT64U r64Out1;
7112	fMxcsr1 = pfn(uMxCsrIn, &r64Out1, &TestData.i64ValIn);
7113	TestData.fMxcsrIn = uMxCsrIn;
7114	TestData.fMxcsrOut = fMxcsr1;
7115	TestData.r64ValOut = r64Out1;
7116	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7117
7118	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7119	{
7120	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7121	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7122	uint32_t fMxcsr2; RTFLOAT64U r64Out2;
7123	fMxcsr2 = pfn(uMxCsrIn, &r64Out2, &TestData.i64ValIn);
7124	TestData.fMxcsrIn = uMxCsrIn;
7125	TestData.fMxcsrOut = fMxcsr2;
7126	TestData.r64ValOut = r64Out2;
7127	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7128	}
7129	if (!RT_IS_POWER_OF_TWO(fXcpt))
7130	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7131	if (fUnmasked & fXcpt)
7132	{
7133	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7134	uint32_t fMxcsr3; RTFLOAT64U r64Out3;
7135	fMxcsr3 = pfn(uMxCsrIn, &r64Out3, &TestData.i64ValIn);
7136	TestData.fMxcsrIn = uMxCsrIn;
7137	TestData.fMxcsrOut = fMxcsr3;
7138	TestData.r64ValOut = r64Out3;
7139	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7140	}
7141	}
7142	}
7143	}
7144	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7145	}
7146
7147	return RTEXITCODE_SUCCESS;
7148	}
7149	#endif
7150
7151
7152	static void SseBinaryR64I64Test(void)
7153	{
7154	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
7155	{
7156	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I64[iFn]))
7157	continue;
7158
7159	SSE_BINARY_R64_I64_TEST_T const * const paTests = g_aSseBinaryR64I64[iFn].paTests;
7160	uint32_t const cTests = g_aSseBinaryR64I64[iFn].cTests;
7161	PFNIEMAIMPLSSEF2R64I64 pfn = g_aSseBinaryR64I64[iFn].pfn;
7162	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I64[iFn]);
7163	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7164	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7165	{
7166	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7167	{
7168	RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
7169
7170	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r64Dst, &paTests[iTest].i64ValIn);
7171	if ( fMxcsr != paTests[iTest].fMxcsrOut
7172	\|\| !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
7173	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
7174	"%s -> mxcsr=%#08x %s\n"
7175	"%s expected %#08x %s%s%s (%s)\n",
7176	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7177	&paTests[iTest].i64ValIn,
7178	iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
7179	iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
7180	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7181	!RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
7182	? " - val" : "",
7183	FormatMxcsr(paTests[iTest].fMxcsrIn) );
7184	}
7185	}
7186
7187	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64I64[iFn]);
7188	}
7189	}
7190
7191
7192	/*
7193	* SSE operations converting single signed double-word integers to single-precision floating point values (probably only cvtsi2ss).
7194	*/
7195	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I32_T, SSE_BINARY_R32_I32_TEST_T, PFNIEMAIMPLSSEF2R32I32);
7196
7197	static SSE_BINARY_R32_I32_T g_aSseBinaryR32I32[] =
7198	{
7199	ENTRY_BIN(cvtsi2ss_r32_i32),
7200	};
7201
7202	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7203	DUMP_ALL_FN(SseBinaryR32I32, g_aSseBinaryR32I32)
7204	static RTEXITCODE SseBinaryR32I32Generate(uint32_t cTests, const char * const *papszNameFmts)
7205	{
7206	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7207
7208	static int32_t const s_aSpecials[] =
7209	{
7210	INT32_MIN,
7211	INT32_MAX,
7212	/** @todo More specials. */
7213	};
7214
7215	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
7216	{
7217	PFNIEMAIMPLSSEF2R32I32 const pfn = g_aSseBinaryR32I32[iFn].pfnNative ? g_aSseBinaryR32I32[iFn].pfnNative : g_aSseBinaryR32I32[iFn].pfn;
7218
7219	IEMBINARYOUTPUT BinOut;
7220	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32I32[iFn]), RTEXITCODE_FAILURE);
7221
7222	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7223	{
7224	SSE_BINARY_R32_I32_TEST_T TestData; RT_ZERO(TestData);
7225
7226	TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7227
7228	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7229	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7230	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7231	for (uint8_t iFz = 0; iFz < 2; iFz++)
7232	{
7233	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7234	\| (iRounding << X86_MXCSR_RC_SHIFT)
7235	\| (iDaz ? X86_MXCSR_DAZ : 0)
7236	\| (iFz ? X86_MXCSR_FZ : 0)
7237	\| X86_MXCSR_XCPT_MASK;
7238	uint32_t fMxcsrM; RTFLOAT32U r32OutM;
7239	fMxcsrM = pfn(uMxCsrIn, &r32OutM, &TestData.i32ValIn);
7240	TestData.fMxcsrIn = uMxCsrIn;
7241	TestData.fMxcsrOut = fMxcsrM;
7242	TestData.r32ValOut = r32OutM;
7243	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7244
7245	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
7246	uint32_t fMxcsrU; RTFLOAT32U r32OutU;
7247	fMxcsrU = pfn(uMxCsrIn, &r32OutU, &TestData.i32ValIn);
7248	TestData.fMxcsrIn = uMxCsrIn;
7249	TestData.fMxcsrOut = fMxcsrU;
7250	TestData.r32ValOut = r32OutU;
7251	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7252
7253	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7254	if (fXcpt)
7255	{
7256	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7257	uint32_t fMxcsr1; RTFLOAT32U r32Out1;
7258	fMxcsr1 = pfn(uMxCsrIn, &r32Out1, &TestData.i32ValIn);
7259	TestData.fMxcsrIn = uMxCsrIn;
7260	TestData.fMxcsrOut = fMxcsr1;
7261	TestData.r32ValOut = r32Out1;
7262	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7263
7264	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7265	{
7266	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7267	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7268	uint32_t fMxcsr2; RTFLOAT32U r32Out2;
7269	fMxcsr2 = pfn(uMxCsrIn, &r32Out2, &TestData.i32ValIn);
7270	TestData.fMxcsrIn = uMxCsrIn;
7271	TestData.fMxcsrOut = fMxcsr2;
7272	TestData.r32ValOut = r32Out2;
7273	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7274	}
7275	if (!RT_IS_POWER_OF_TWO(fXcpt))
7276	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7277	if (fUnmasked & fXcpt)
7278	{
7279	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7280	uint32_t fMxcsr3; RTFLOAT32U r32Out3;
7281	fMxcsr3 = pfn(uMxCsrIn, &r32Out3, &TestData.i32ValIn);
7282	TestData.fMxcsrIn = uMxCsrIn;
7283	TestData.fMxcsrOut = fMxcsr3;
7284	TestData.r32ValOut = r32Out3;
7285	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7286	}
7287	}
7288	}
7289	}
7290	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7291	}
7292
7293	return RTEXITCODE_SUCCESS;
7294	}
7295	#endif
7296
7297
7298	static void SseBinaryR32I32Test(void)
7299	{
7300	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
7301	{
7302	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I32[iFn]))
7303	continue;
7304
7305	SSE_BINARY_R32_I32_TEST_T const * const paTests = g_aSseBinaryR32I32[iFn].paTests;
7306	uint32_t const cTests = g_aSseBinaryR32I32[iFn].cTests;
7307	PFNIEMAIMPLSSEF2R32I32 pfn = g_aSseBinaryR32I32[iFn].pfn;
7308	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I32[iFn]);
7309	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7310	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7311	{
7312	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7313	{
7314	RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
7315
7316	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r32Dst, &paTests[iTest].i32ValIn);
7317	if ( fMxcsr != paTests[iTest].fMxcsrOut
7318	\|\| !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
7319	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
7320	"%s -> mxcsr=%#08x %RI32\n"
7321	"%s expected %#08x %RI32%s%s (%s)\n",
7322	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7323	&paTests[iTest].i32ValIn,
7324	iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
7325	iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
7326	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7327	!RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
7328	? " - val" : "",
7329	FormatMxcsr(paTests[iTest].fMxcsrIn) );
7330	}
7331	}
7332
7333	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32I32[iFn]);
7334	}
7335	}
7336
7337
7338	/*
7339	* SSE operations converting single signed quad-word integers to single-precision floating point values (probably only cvtsi2ss).
7340	*/
7341	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I64_T, SSE_BINARY_R32_I64_TEST_T, PFNIEMAIMPLSSEF2R32I64);
7342
7343	static SSE_BINARY_R32_I64_T g_aSseBinaryR32I64[] =
7344	{
7345	ENTRY_BIN(cvtsi2ss_r32_i64),
7346	};
7347
7348	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7349	DUMP_ALL_FN(SseBinaryR32I64, g_aSseBinaryR32I64)
7350	static RTEXITCODE SseBinaryR32I64Generate(uint32_t cTests, const char * const *papszNameFmts)
7351	{
7352	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7353
7354	static int64_t const s_aSpecials[] =
7355	{
7356	INT64_MIN,
7357	INT64_MAX
7358	/** @todo More specials. */
7359	};
7360
7361	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
7362	{
7363	PFNIEMAIMPLSSEF2R32I64 const pfn = g_aSseBinaryR32I64[iFn].pfnNative ? g_aSseBinaryR32I64[iFn].pfnNative : g_aSseBinaryR32I64[iFn].pfn;
7364
7365	IEMBINARYOUTPUT BinOut;
7366	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32I64[iFn]), RTEXITCODE_FAILURE);
7367
7368	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7369	{
7370	SSE_BINARY_R32_I64_TEST_T TestData; RT_ZERO(TestData);
7371
7372	TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
7373
7374	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7375	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7376	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7377	for (uint8_t iFz = 0; iFz < 2; iFz++)
7378	{
7379	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7380	\| (iRounding << X86_MXCSR_RC_SHIFT)
7381	\| (iDaz ? X86_MXCSR_DAZ : 0)
7382	\| (iFz ? X86_MXCSR_FZ : 0)
7383	\| X86_MXCSR_XCPT_MASK;
7384	uint32_t fMxcsrM; RTFLOAT32U r32OutM;
7385	fMxcsrM = pfn(uMxCsrIn, &r32OutM, &TestData.i64ValIn);
7386	TestData.fMxcsrIn = uMxCsrIn;
7387	TestData.fMxcsrOut = fMxcsrM;
7388	TestData.r32ValOut = r32OutM;
7389	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7390
7391	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
7392	uint32_t fMxcsrU; RTFLOAT32U r32OutU;
7393	fMxcsrU = pfn(uMxCsrIn, &r32OutU, &TestData.i64ValIn);
7394	TestData.fMxcsrIn = uMxCsrIn;
7395	TestData.fMxcsrOut = fMxcsrU;
7396	TestData.r32ValOut = r32OutU;
7397	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7398
7399	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7400	if (fXcpt)
7401	{
7402	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7403	uint32_t fMxcsr1; RTFLOAT32U r32Out1;
7404	fMxcsr1 = pfn(uMxCsrIn, &r32Out1, &TestData.i64ValIn);
7405	TestData.fMxcsrIn = uMxCsrIn;
7406	TestData.fMxcsrOut = fMxcsr1;
7407	TestData.r32ValOut = r32Out1;
7408	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7409
7410	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7411	{
7412	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7413	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7414	uint32_t fMxcsr2; RTFLOAT32U r32Out2;
7415	fMxcsr2 = pfn(uMxCsrIn, &r32Out2, &TestData.i64ValIn);
7416	TestData.fMxcsrIn = uMxCsrIn;
7417	TestData.fMxcsrOut = fMxcsr2;
7418	TestData.r32ValOut = r32Out2;
7419	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7420	}
7421	if (!RT_IS_POWER_OF_TWO(fXcpt))
7422	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7423	if (fUnmasked & fXcpt)
7424	{
7425	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7426	uint32_t fMxcsr3; RTFLOAT32U r32Out3;
7427	fMxcsr3 = pfn(uMxCsrIn, &r32Out3, &TestData.i64ValIn);
7428	TestData.fMxcsrIn = uMxCsrIn;
7429	TestData.fMxcsrOut = fMxcsr3;
7430	TestData.r32ValOut = r32Out3;
7431	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7432	}
7433	}
7434	}
7435	}
7436	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7437	}
7438
7439	return RTEXITCODE_SUCCESS;
7440	}
7441	#endif
7442
7443
7444	static void SseBinaryR32I64Test(void)
7445	{
7446	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
7447	{
7448	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I64[iFn]))
7449	continue;
7450
7451	SSE_BINARY_R32_I64_TEST_T const * const paTests = g_aSseBinaryR32I64[iFn].paTests;
7452	uint32_t const cTests = g_aSseBinaryR32I64[iFn].cTests;
7453	PFNIEMAIMPLSSEF2R32I64 pfn = g_aSseBinaryR32I64[iFn].pfn;
7454	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I64[iFn]);
7455	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7456	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7457	{
7458	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7459	{
7460	RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
7461
7462	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r32Dst, &paTests[iTest].i64ValIn);
7463	if ( fMxcsr != paTests[iTest].fMxcsrOut
7464	\|\| !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
7465	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
7466	"%s -> mxcsr=%#08x %RI32\n"
7467	"%s expected %#08x %RI32%s%s (%s)\n",
7468	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7469	&paTests[iTest].i64ValIn,
7470	iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
7471	iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
7472	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7473	!RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
7474	? " - val" : "",
7475	FormatMxcsr(paTests[iTest].fMxcsrIn) );
7476	}
7477	}
7478
7479	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32I64[iFn]);
7480	}
7481	}
7482
7483
7484	/*
7485	* Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
7486	*/
7487	TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R32_R32_T, SSE_COMPARE_EFL_R32_R32_TEST_T, PFNIEMAIMPLF2EFLMXCSRR32R32);
7488
7489	static SSE_COMPARE_EFL_R32_R32_T g_aSseCompareEflR32R32[] =
7490	{
7491	ENTRY_BIN(ucomiss_u128),
7492	ENTRY_BIN(comiss_u128),
7493	ENTRY_BIN_AVX(vucomiss_u128),
7494	ENTRY_BIN_AVX(vcomiss_u128),
7495	};
7496
7497	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7498	DUMP_ALL_FN(SseCompareEflR32R32, g_aSseCompareEflR32R32)
7499	static RTEXITCODE SseCompareEflR32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
7500	{
7501	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7502
7503	static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7504	{
7505	{ RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7506	{ RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7507	{ RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7508	{ RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7509	{ RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7510	{ RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7511	{ RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7512	{ RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7513	/** @todo More specials. */
7514	};
7515
7516	uint32_t cMinNormalPairs = (cTests - 144) / 4;
7517	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7518	{
7519	PFNIEMAIMPLF2EFLMXCSRR32R32 const pfn = g_aSseCompareEflR32R32[iFn].pfnNative ? g_aSseCompareEflR32R32[iFn].pfnNative : g_aSseCompareEflR32R32[iFn].pfn;
7520
7521	IEMBINARYOUTPUT BinOut;
7522	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareEflR32R32[iFn]), RTEXITCODE_FAILURE);
7523
7524	uint32_t cNormalInputPairs = 0;
7525	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7526	{
7527	SSE_COMPARE_EFL_R32_R32_TEST_T TestData; RT_ZERO(TestData);
7528
7529	TestData.r32ValIn1 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7530	TestData.r32ValIn2 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7531
7532	if ( RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn1)
7533	&& RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn2))
7534	cNormalInputPairs++;
7535	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7536	{
7537	iTest -= 1;
7538	continue;
7539	}
7540
7541	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7542	uint32_t const fEFlags = RandEFlags();
7543	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7544	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7545	for (uint8_t iFz = 0; iFz < 2; iFz++)
7546	{
7547	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7548	\| (iRounding << X86_MXCSR_RC_SHIFT)
7549	\| (iDaz ? X86_MXCSR_DAZ : 0)
7550	\| (iFz ? X86_MXCSR_FZ : 0)
7551	\| X86_MXCSR_XCPT_MASK;
7552	uint32_t fMxcsrM = fMxcsrIn;
7553	uint32_t fEFlagsM = fEFlags;
7554	fMxcsrM = pfn(fMxcsrIn, &fEFlagsM, TestData.r32ValIn1, TestData.r32ValIn2);
7555	TestData.fMxcsrIn = fMxcsrIn;
7556	TestData.fMxcsrOut = fMxcsrM;
7557	TestData.fEflIn = fEFlags;
7558	TestData.fEflOut = fEFlagsM;
7559	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7560
7561	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7562	uint32_t fMxcsrU = fMxcsrIn;
7563	uint32_t fEFlagsU = fEFlags;
7564	fMxcsrU = pfn(fMxcsrIn, &fEFlagsU, TestData.r32ValIn1, TestData.r32ValIn2);
7565	TestData.fMxcsrIn = fMxcsrIn;
7566	TestData.fMxcsrOut = fMxcsrU;
7567	TestData.fEflIn = fEFlags;
7568	TestData.fEflOut = fEFlagsU;
7569	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7570
7571	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7572	if (fXcpt)
7573	{
7574	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7575	uint32_t fMxcsr1 = fMxcsrIn;
7576	uint32_t fEFlags1 = fEFlags;
7577	fMxcsr1 = pfn(fMxcsrIn, &fEFlags1, TestData.r32ValIn1, TestData.r32ValIn2);
7578	TestData.fMxcsrIn = fMxcsrIn;
7579	TestData.fMxcsrOut = fMxcsr1;
7580	TestData.fEflIn = fEFlags;
7581	TestData.fEflOut = fEFlags1;
7582	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7583
7584	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7585	{
7586	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7587	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7588	uint32_t fMxcsr2 = fMxcsrIn;
7589	uint32_t fEFlags2 = fEFlags;
7590	fMxcsr2 = pfn(fMxcsrIn, &fEFlags2, TestData.r32ValIn1, TestData.r32ValIn2);
7591	TestData.fMxcsrIn = fMxcsrIn;
7592	TestData.fMxcsrOut = fMxcsr2;
7593	TestData.fEflIn = fEFlags;
7594	TestData.fEflOut = fEFlags2;
7595	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7596	}
7597	if (!RT_IS_POWER_OF_TWO(fXcpt))
7598	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7599	if (fUnmasked & fXcpt)
7600	{
7601	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7602	uint32_t fMxcsr3 = fMxcsrIn;
7603	uint32_t fEFlags3 = fEFlags;
7604	fMxcsr3 = pfn(fMxcsrIn, &fEFlags3, TestData.r32ValIn1, TestData.r32ValIn2);
7605	TestData.fMxcsrIn = fMxcsrIn;
7606	TestData.fMxcsrOut = fMxcsr3;
7607	TestData.fEflIn = fEFlags;
7608	TestData.fEflOut = fEFlags3;
7609	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7610	}
7611	}
7612	}
7613	}
7614	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7615	}
7616
7617	return RTEXITCODE_SUCCESS;
7618	}
7619	#endif
7620
7621	static void SseCompareEflR32R32Test(void)
7622	{
7623	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7624	{
7625	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR32R32[iFn]))
7626	continue;
7627
7628	SSE_COMPARE_EFL_R32_R32_TEST_T const * const paTests = g_aSseCompareEflR32R32[iFn].paTests;
7629	uint32_t const cTests = g_aSseCompareEflR32R32[iFn].cTests;
7630	PFNIEMAIMPLF2EFLMXCSRR32R32 pfn = g_aSseCompareEflR32R32[iFn].pfn;
7631	uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR32R32[iFn]);
7632	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7633	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7634	{
7635	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7636	{
7637	uint32_t fEFlags = paTests[iTest].fEflIn;
7638	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &fEFlags, paTests[iTest].r32ValIn1, paTests[iTest].r32ValIn2);
7639	if ( fMxcsr != paTests[iTest].fMxcsrOut
7640	\|\| fEFlags != paTests[iTest].fEflOut)
7641	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7642	"%s -> mxcsr=%#08x %#08x\n"
7643	"%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7644	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7645	FormatR32(&paTests[iTest].r32ValIn1), FormatR32(&paTests[iTest].r32ValIn2),
7646	iVar ? " " : "", fMxcsr, fEFlags,
7647	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7648	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7649	FormatMxcsr(paTests[iTest].fMxcsrIn),
7650	EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7651	}
7652	}
7653
7654	FREE_DECOMPRESSED_TESTS(g_aSseCompareEflR32R32[iFn]);
7655	}
7656	}
7657
7658
7659	/*
7660	* Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
7661	*/
7662	TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R64_R64_T, SSE_COMPARE_EFL_R64_R64_TEST_T, PFNIEMAIMPLF2EFLMXCSRR64R64);
7663
7664	static SSE_COMPARE_EFL_R64_R64_T g_aSseCompareEflR64R64[] =
7665	{
7666	ENTRY_BIN(ucomisd_u128),
7667	ENTRY_BIN(comisd_u128),
7668	ENTRY_BIN_AVX(vucomisd_u128),
7669	ENTRY_BIN_AVX(vcomisd_u128)
7670	};
7671
7672	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7673	DUMP_ALL_FN(SseCompareEflR64R64, g_aSseCompareEflR64R64)
7674	static RTEXITCODE SseCompareEflR64R64Generate(uint32_t cTests, const char * const *papszNameFmts)
7675	{
7676	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7677
7678	static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7679	{
7680	{ RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7681	{ RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7682	{ RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7683	{ RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7684	{ RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7685	{ RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7686	{ RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7687	{ RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7688	/** @todo More specials. */
7689	};
7690
7691	uint32_t cMinNormalPairs = (cTests - 144) / 4;
7692	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7693	{
7694	PFNIEMAIMPLF2EFLMXCSRR64R64 const pfn = g_aSseCompareEflR64R64[iFn].pfnNative ? g_aSseCompareEflR64R64[iFn].pfnNative : g_aSseCompareEflR64R64[iFn].pfn;
7695
7696	IEMBINARYOUTPUT BinOut;
7697	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareEflR64R64[iFn]), RTEXITCODE_FAILURE);
7698
7699	uint32_t cNormalInputPairs = 0;
7700	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7701	{
7702	SSE_COMPARE_EFL_R64_R64_TEST_T TestData; RT_ZERO(TestData);
7703
7704	TestData.r64ValIn1 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7705	TestData.r64ValIn2 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7706
7707	if ( RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn1)
7708	&& RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn2))
7709	cNormalInputPairs++;
7710	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7711	{
7712	iTest -= 1;
7713	continue;
7714	}
7715
7716	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7717	uint32_t const fEFlags = RandEFlags();
7718	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7719	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7720	for (uint8_t iFz = 0; iFz < 2; iFz++)
7721	{
7722	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7723	\| (iRounding << X86_MXCSR_RC_SHIFT)
7724	\| (iDaz ? X86_MXCSR_DAZ : 0)
7725	\| (iFz ? X86_MXCSR_FZ : 0)
7726	\| X86_MXCSR_XCPT_MASK;
7727	uint32_t fMxcsrM = fMxcsrIn;
7728	uint32_t fEFlagsM = fEFlags;
7729	fMxcsrM = pfn(fMxcsrIn, &fEFlagsM, TestData.r64ValIn1, TestData.r64ValIn2);
7730	TestData.fMxcsrIn = fMxcsrIn;
7731	TestData.fMxcsrOut = fMxcsrM;
7732	TestData.fEflIn = fEFlags;
7733	TestData.fEflOut = fEFlagsM;
7734	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7735
7736	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7737	uint32_t fMxcsrU = fMxcsrIn;
7738	uint32_t fEFlagsU = fEFlags;
7739	fMxcsrU = pfn(fMxcsrIn, &fEFlagsU, TestData.r64ValIn1, TestData.r64ValIn2);
7740	TestData.fMxcsrIn = fMxcsrIn;
7741	TestData.fMxcsrOut = fMxcsrU;
7742	TestData.fEflIn = fEFlags;
7743	TestData.fEflOut = fEFlagsU;
7744	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7745
7746	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7747	if (fXcpt)
7748	{
7749	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7750	uint32_t fMxcsr1 = fMxcsrIn;
7751	uint32_t fEFlags1 = fEFlags;
7752	fMxcsr1 = pfn(fMxcsrIn, &fEFlags1, TestData.r64ValIn1, TestData.r64ValIn2);
7753	TestData.fMxcsrIn = fMxcsrIn;
7754	TestData.fMxcsrOut = fMxcsr1;
7755	TestData.fEflIn = fEFlags;
7756	TestData.fEflOut = fEFlags1;
7757	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7758
7759	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7760	{
7761	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7762	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7763	uint32_t fMxcsr2 = fMxcsrIn;
7764	uint32_t fEFlags2 = fEFlags;
7765	fMxcsr2 = pfn(fMxcsrIn, &fEFlags2, TestData.r64ValIn1, TestData.r64ValIn2);
7766	TestData.fMxcsrIn = fMxcsrIn;
7767	TestData.fMxcsrOut = fMxcsr2;
7768	TestData.fEflIn = fEFlags;
7769	TestData.fEflOut = fEFlags2;
7770	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7771	}
7772	if (!RT_IS_POWER_OF_TWO(fXcpt))
7773	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7774	if (fUnmasked & fXcpt)
7775	{
7776	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7777	uint32_t fMxcsr3 = fMxcsrIn;
7778	uint32_t fEFlags3 = fEFlags;
7779	fMxcsr3 = pfn(fMxcsrIn, &fEFlags3, TestData.r64ValIn1, TestData.r64ValIn2);
7780	TestData.fMxcsrIn = fMxcsrIn;
7781	TestData.fMxcsrOut = fMxcsr3;
7782	TestData.fEflIn = fEFlags;
7783	TestData.fEflOut = fEFlags3;
7784	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7785	}
7786	}
7787	}
7788	}
7789	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7790	}
7791
7792	return RTEXITCODE_SUCCESS;
7793	}
7794	#endif
7795
7796	static void SseCompareEflR64R64Test(void)
7797	{
7798	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7799	{
7800	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR64R64[iFn]))
7801	continue;
7802
7803	SSE_COMPARE_EFL_R64_R64_TEST_T const * const paTests = g_aSseCompareEflR64R64[iFn].paTests;
7804	uint32_t const cTests = g_aSseCompareEflR64R64[iFn].cTests;
7805	PFNIEMAIMPLF2EFLMXCSRR64R64 pfn = g_aSseCompareEflR64R64[iFn].pfn;
7806	uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR64R64[iFn]);
7807	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7808	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7809	{
7810	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7811	{
7812	uint32_t fEFlags = paTests[iTest].fEflIn;
7813	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &fEFlags, paTests[iTest].r64ValIn1, paTests[iTest].r64ValIn2);
7814	if ( fMxcsr != paTests[iTest].fMxcsrOut
7815	\|\| fEFlags != paTests[iTest].fEflOut)
7816	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7817	"%s -> mxcsr=%#08x %#08x\n"
7818	"%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7819	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7820	FormatR64(&paTests[iTest].r64ValIn1), FormatR64(&paTests[iTest].r64ValIn2),
7821	iVar ? " " : "", fMxcsr, fEFlags,
7822	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7823	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7824	FormatMxcsr(paTests[iTest].fMxcsrIn),
7825	EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7826	}
7827	}
7828
7829	FREE_DECOMPRESSED_TESTS(g_aSseCompareEflR64R64[iFn]);
7830	}
7831	}
7832
7833
7834	/*
7835	* Compare SSE operations on packed and single single-precision floating point values - outputting a mask.
7836	*/
7837	/** Maximum immediate to try to keep the testdata size under control (at least a little bit)- */
7838	#define SSE_COMPARE_F2_XMM_IMM8_MAX 0x1f
7839
7840	TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_F2_XMM_IMM8_T, SSE_COMPARE_F2_XMM_IMM8_TEST_T, PFNIEMAIMPLMXCSRF2XMMIMM8);
7841
7842	static SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR32Imm8[] =
7843	{
7844	ENTRY_BIN(cmpps_u128),
7845	ENTRY_BIN(cmpss_u128)
7846	};
7847
7848	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7849	DUMP_ALL_FN(SseCompareF2XmmR32Imm8, g_aSseCompareF2XmmR32Imm8)
7850	static RTEXITCODE SseCompareF2XmmR32Imm8Generate(uint32_t cTests, const char * const *papszNameFmts)
7851	{
7852	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7853
7854	static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7855	{
7856	{ RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7857	{ RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7858	{ RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7859	{ RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7860	{ RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7861	{ RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7862	{ RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7863	{ RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7864	/** @todo More specials. */
7865	};
7866
7867	uint32_t cMinNormalPairs = (cTests - 144) / 4;
7868	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7869	{
7870	PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR32Imm8[iFn].pfnNative : g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7871
7872	IEMBINARYOUTPUT BinOut;
7873	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareF2XmmR32Imm8[iFn]), RTEXITCODE_FAILURE);
7874
7875	uint32_t cNormalInputPairs = 0;
7876	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7877	{
7878	SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7879
7880	TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7881	TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7882	TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7883	TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7884
7885	TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7886	TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7887	TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7888	TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7889
7890	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
7891	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
7892	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
7893	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
7894	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
7895	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
7896	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
7897	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
7898	cNormalInputPairs++;
7899	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7900	{
7901	iTest -= 1;
7902	continue;
7903	}
7904
7905	IEMMEDIAF2XMMSRC Src;
7906	Src.uSrc1 = TestData.InVal1;
7907	Src.uSrc2 = TestData.InVal2;
7908	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7909	for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7910	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7911	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7912	for (uint8_t iFz = 0; iFz < 2; iFz++)
7913	{
7914	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7915	\| (iRounding << X86_MXCSR_RC_SHIFT)
7916	\| (iDaz ? X86_MXCSR_DAZ : 0)
7917	\| (iFz ? X86_MXCSR_FZ : 0)
7918	\| X86_MXCSR_XCPT_MASK;
7919	X86XMMREG ResM;
7920	uint32_t fMxcsrM = pfn(fMxcsrIn, &ResM, &Src, bImm);
7921	TestData.fMxcsrIn = fMxcsrIn;
7922	TestData.fMxcsrOut = fMxcsrM;
7923	TestData.bImm = bImm;
7924	TestData.OutVal = ResM;
7925	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7926
7927	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7928	X86XMMREG ResU;
7929	uint32_t fMxcsrU = pfn(fMxcsrIn, &ResU, &Src, bImm);
7930	TestData.fMxcsrIn = fMxcsrIn;
7931	TestData.fMxcsrOut = fMxcsrU;
7932	TestData.bImm = bImm;
7933	TestData.OutVal = ResU;
7934	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7935
7936	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7937	if (fXcpt)
7938	{
7939	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7940	X86XMMREG Res1;
7941	uint32_t fMxcsr1 = pfn(fMxcsrIn, &Res1, &Src, bImm);
7942	TestData.fMxcsrIn = fMxcsrIn;
7943	TestData.fMxcsrOut = fMxcsr1;
7944	TestData.bImm = bImm;
7945	TestData.OutVal = Res1;
7946	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7947
7948	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7949	{
7950	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7951	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7952	X86XMMREG Res2;
7953	uint32_t fMxcsr2 = pfn(fMxcsrIn, &Res2, &Src, bImm);
7954	TestData.fMxcsrIn = fMxcsrIn;
7955	TestData.fMxcsrOut = fMxcsr2;
7956	TestData.bImm = bImm;
7957	TestData.OutVal = Res2;
7958	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7959	}
7960	if (!RT_IS_POWER_OF_TWO(fXcpt))
7961	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7962	if (fUnmasked & fXcpt)
7963	{
7964	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7965	X86XMMREG Res3;
7966	uint32_t fMxcsr3 = pfn(fMxcsrIn, &Res3, &Src, bImm);
7967	TestData.fMxcsrIn = fMxcsrIn;
7968	TestData.fMxcsrOut = fMxcsr3;
7969	TestData.bImm = bImm;
7970	TestData.OutVal = Res3;
7971	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7972	}
7973	}
7974	}
7975	}
7976	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7977	}
7978
7979	return RTEXITCODE_SUCCESS;
7980	}
7981	#endif
7982
7983	static void SseCompareF2XmmR32Imm8Test(void)
7984	{
7985	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7986	{
7987	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF2XmmR32Imm8[iFn]))
7988	continue;
7989
7990	SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR32Imm8[iFn].paTests;
7991	uint32_t const cTests = g_aSseCompareF2XmmR32Imm8[iFn].cTests;
7992	PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7993	uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR32Imm8[iFn]);
7994	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7995	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7996	{
7997	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7998	{
7999	IEMMEDIAF2XMMSRC Src;
8000	X86XMMREG ValOut;
8001
8002	Src.uSrc1 = paTests[iTest].InVal1;
8003	Src.uSrc2 = paTests[iTest].InVal2;
8004	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, &Src, paTests[iTest].bImm);
8005	if ( fMxcsr != paTests[iTest].fMxcsrOut
8006	\|\| ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
8007	\|\| ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
8008	\|\| ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
8009	\|\| ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
8010	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s imm8=%x\n"
8011	"%s -> mxcsr=%#08x %RX32'%RX32'%RX32'%RX32\n"
8012	"%s expected %#08x %RX32'%RX32'%RX32'%RX32%s%s (%s)\n",
8013	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8014	FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
8015	FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
8016	FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
8017	FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
8018	paTests[iTest].bImm,
8019	iVar ? " " : "", fMxcsr, ValOut.au32[0], ValOut.au32[1], ValOut.au32[2], ValOut.au32[3],
8020	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8021	paTests[iTest].OutVal.au32[0], paTests[iTest].OutVal.au32[1],
8022	paTests[iTest].OutVal.au32[2], paTests[iTest].OutVal.au32[3],
8023	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8024	( ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
8025	\|\| ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
8026	\|\| ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
8027	\|\| ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
8028	? " - val" : "",
8029	FormatMxcsr(paTests[iTest].fMxcsrIn));
8030	}
8031	}
8032
8033	FREE_DECOMPRESSED_TESTS(g_aSseCompareF2XmmR32Imm8[iFn]);
8034	}
8035	}
8036
8037
8038	/*
8039	* Compare SSE operations on packed and single double-precision floating point values - outputting a mask.
8040	*/
8041	static SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR64Imm8[] =
8042	{
8043	ENTRY_BIN(cmppd_u128),
8044	ENTRY_BIN(cmpsd_u128)
8045	};
8046
8047	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8048	DUMP_ALL_FN(SseCompareF2XmmR64Imm8, g_aSseCompareF2XmmR64Imm8)
8049	static RTEXITCODE SseCompareF2XmmR64Imm8Generate(uint32_t cTests, const char * const *papszNameFmts)
8050	{
8051	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8052
8053	static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
8054	{
8055	{ RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
8056	{ RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
8057	{ RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
8058	{ RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
8059	{ RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
8060	{ RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
8061	{ RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
8062	{ RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
8063	/** @todo More specials. */
8064	};
8065
8066	uint32_t cMinNormalPairs = (cTests - 144) / 4;
8067	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
8068	{
8069	PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR64Imm8[iFn].pfnNative : g_aSseCompareF2XmmR64Imm8[iFn].pfn;
8070
8071	IEMBINARYOUTPUT BinOut;
8072	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareF2XmmR64Imm8[iFn]), RTEXITCODE_FAILURE);
8073
8074	uint32_t cNormalInputPairs = 0;
8075	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8076	{
8077	SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
8078
8079	TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
8080	TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
8081
8082	TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
8083	TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
8084
8085	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0])
8086	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
8087	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0])
8088	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
8089	cNormalInputPairs++;
8090	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8091	{
8092	iTest -= 1;
8093	continue;
8094	}
8095
8096	IEMMEDIAF2XMMSRC Src;
8097	Src.uSrc1 = TestData.InVal1;
8098	Src.uSrc2 = TestData.InVal2;
8099	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8100	for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
8101	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8102	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8103	for (uint8_t iFz = 0; iFz < 2; iFz++)
8104	{
8105	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8106	\| (iRounding << X86_MXCSR_RC_SHIFT)
8107	\| (iDaz ? X86_MXCSR_DAZ : 0)
8108	\| (iFz ? X86_MXCSR_FZ : 0)
8109	\| X86_MXCSR_XCPT_MASK;
8110	X86XMMREG ResM;
8111	uint32_t fMxcsrM = pfn(fMxcsrIn, &ResM, &Src, bImm);
8112	TestData.fMxcsrIn = fMxcsrIn;
8113	TestData.fMxcsrOut = fMxcsrM;
8114	TestData.bImm = bImm;
8115	TestData.OutVal = ResM;
8116	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8117
8118	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8119	X86XMMREG ResU;
8120	uint32_t fMxcsrU = pfn(fMxcsrIn, &ResU, &Src, bImm);
8121	TestData.fMxcsrIn = fMxcsrIn;
8122	TestData.fMxcsrOut = fMxcsrU;
8123	TestData.bImm = bImm;
8124	TestData.OutVal = ResU;
8125	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8126
8127	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8128	if (fXcpt)
8129	{
8130	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8131	X86XMMREG Res1;
8132	uint32_t fMxcsr1 = pfn(fMxcsrIn, &Res1, &Src, bImm);
8133	TestData.fMxcsrIn = fMxcsrIn;
8134	TestData.fMxcsrOut = fMxcsr1;
8135	TestData.bImm = bImm;
8136	TestData.OutVal = Res1;
8137	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8138
8139	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8140	{
8141	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8142	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8143	X86XMMREG Res2;
8144	uint32_t fMxcsr2 = pfn(fMxcsrIn, &Res2, &Src, bImm);
8145	TestData.fMxcsrIn = fMxcsrIn;
8146	TestData.fMxcsrOut = fMxcsr2;
8147	TestData.bImm = bImm;
8148	TestData.OutVal = Res2;
8149	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8150	}
8151	if (!RT_IS_POWER_OF_TWO(fXcpt))
8152	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8153	if (fUnmasked & fXcpt)
8154	{
8155	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8156	X86XMMREG Res3;
8157	uint32_t fMxcsr3 = pfn(fMxcsrIn, &Res3, &Src, bImm);
8158	TestData.fMxcsrIn = fMxcsrIn;
8159	TestData.fMxcsrOut = fMxcsr3;
8160	TestData.bImm = bImm;
8161	TestData.OutVal = Res3;
8162	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8163	}
8164	}
8165	}
8166	}
8167	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8168	}
8169
8170	return RTEXITCODE_SUCCESS;
8171	}
8172	#endif
8173
8174	static void SseCompareF2XmmR64Imm8Test(void)
8175	{
8176	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
8177	{
8178	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF2XmmR64Imm8[iFn]))
8179	continue;
8180
8181	SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR64Imm8[iFn].paTests;
8182	uint32_t const cTests = g_aSseCompareF2XmmR64Imm8[iFn].cTests;
8183	PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfn;
8184	uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR64Imm8[iFn]);
8185	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8186	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8187	{
8188	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8189	{
8190	IEMMEDIAF2XMMSRC Src;
8191	X86XMMREG ValOut;
8192
8193	Src.uSrc1 = paTests[iTest].InVal1;
8194	Src.uSrc2 = paTests[iTest].InVal2;
8195	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, &Src, paTests[iTest].bImm);
8196	if ( fMxcsr != paTests[iTest].fMxcsrOut
8197	\|\| ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
8198	\|\| ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
8199	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s imm8=%x\n"
8200	"%s -> mxcsr=%#08x %RX64'%RX64\n"
8201	"%s expected %#08x %RX64'%RX64%s%s (%s)\n",
8202	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8203	FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
8204	FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
8205	paTests[iTest].bImm,
8206	iVar ? " " : "", fMxcsr, ValOut.au64[0], ValOut.au64[1],
8207	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8208	paTests[iTest].OutVal.au64[0], paTests[iTest].OutVal.au64[1],
8209	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8210	( ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
8211	\|\| ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
8212	? " - val" : "",
8213	FormatMxcsr(paTests[iTest].fMxcsrIn));
8214	}
8215	}
8216
8217	FREE_DECOMPRESSED_TESTS(g_aSseCompareF2XmmR64Imm8[iFn]);
8218	}
8219	}
8220
8221
8222	/*
8223	* Convert SSE operations converting signed double-words to single-precision floating point values.
8224	*/
8225	TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_T, SSE_CONVERT_XMM_TEST_T, PFNIEMAIMPLFPSSEF2U128);
8226
8227	static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R32[] =
8228	{
8229	ENTRY_BIN(cvtdq2ps_u128)
8230	};
8231
8232	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8233	DUMP_ALL_FN(SseConvertXmmI32R32, g_aSseConvertXmmI32R32)
8234	static RTEXITCODE SseConvertXmmI32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
8235	{
8236	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8237
8238	static int32_t const s_aSpecials[] =
8239	{
8240	INT32_MIN,
8241	INT32_MIN / 2,
8242	0,
8243	INT32_MAX / 2,
8244	INT32_MAX,
8245	(int32_t)0x80000000
8246	/** @todo More specials. */
8247	};
8248
8249	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
8250	{
8251	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R32[iFn].pfnNative ? g_aSseConvertXmmI32R32[iFn].pfnNative : g_aSseConvertXmmI32R32[iFn].pfn;
8252
8253	IEMBINARYOUTPUT BinOut;
8254	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmI32R32[iFn]), RTEXITCODE_FAILURE);
8255
8256	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8257	{
8258	SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8259
8260	TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8261	TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8262	TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8263	TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8264
8265	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8266	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8267	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8268	for (uint8_t iFz = 0; iFz < 2; iFz++)
8269	{
8270	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8271	\| (iRounding << X86_MXCSR_RC_SHIFT)
8272	\| (iDaz ? X86_MXCSR_DAZ : 0)
8273	\| (iFz ? X86_MXCSR_FZ : 0)
8274	\| X86_MXCSR_XCPT_MASK;
8275	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8276	TestData.fMxcsrIn = uMxCsrIn;
8277	TestData.fMxcsrOut = uMxCsrOutM;
8278	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8279
8280	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8281	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8282	TestData.fMxcsrIn = uMxCsrIn;
8283	TestData.fMxcsrOut = uMxCsrOutU;
8284	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8285
8286	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8287	if (fXcpt)
8288	{
8289	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8290	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8291	TestData.fMxcsrIn = uMxCsrIn;
8292	TestData.fMxcsrOut = uMxCsrOut1;
8293	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8294
8295	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8296	{
8297	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8298	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8299	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8300	TestData.fMxcsrIn = uMxCsrIn;
8301	TestData.fMxcsrOut = uMxCsrOut2;
8302	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8303	}
8304	if (!RT_IS_POWER_OF_TWO(fXcpt))
8305	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8306	if (fUnmasked & fXcpt)
8307	{
8308	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8309	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8310	TestData.fMxcsrIn = uMxCsrIn;
8311	TestData.fMxcsrOut = uMxCsrOut3;
8312	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8313	}
8314	}
8315	}
8316	}
8317	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8318	}
8319
8320	return RTEXITCODE_SUCCESS;
8321	}
8322	#endif
8323
8324	static void SseConvertXmmI32R32Test(void)
8325	{
8326	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
8327	{
8328	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R32[iFn]))
8329	continue;
8330
8331	SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R32[iFn].paTests;
8332	uint32_t const cTests = g_aSseConvertXmmI32R32[iFn].cTests;
8333	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R32[iFn].pfn;
8334	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R32[iFn]);
8335	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8336	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8337	{
8338	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8339	{
8340	X86XMMREG Res; RT_ZERO(Res);
8341
8342	uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8343	if ( fMxCsr != paTests[iTest].fMxcsrOut
8344	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
8345	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
8346	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
8347	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]))
8348	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
8349	"%s -> mxcsr=%#08x %s'%s'%s'%s\n"
8350	"%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
8351	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8352	paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8353	paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
8354	iVar ? " " : "", fMxCsr,
8355	FormatR32(&Res.ar32[0]), FormatR32(&Res.ar32[1]),
8356	FormatR32(&Res.ar32[2]), FormatR32(&Res.ar32[3]),
8357	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8358	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
8359	FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
8360	MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8361	( !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
8362	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
8363	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
8364	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]))
8365	? " - val" : "",
8366	FormatMxcsr(paTests[iTest].fMxcsrIn));
8367	}
8368	}
8369
8370	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmI32R32[iFn]);
8371	}
8372	}
8373
8374
8375	/*
8376	* Convert SSE operations converting signed double-words to single-precision floating point values.
8377	*/
8378	static SSE_CONVERT_XMM_T g_aSseConvertXmmR32I32[] =
8379	{
8380	ENTRY_BIN(cvtps2dq_u128),
8381	ENTRY_BIN(cvttps2dq_u128)
8382	};
8383
8384	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8385	DUMP_ALL_FN(SseConvertXmmR32I32, g_aSseConvertXmmR32I32)
8386	static RTEXITCODE SseConvertXmmR32I32Generate(uint32_t cTests, const char * const *papszNameFmts)
8387	{
8388	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8389
8390	static struct { RTFLOAT32U aVal1[4]; } const s_aSpecials[] =
8391	{
8392	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
8393	{ { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
8394	{ { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
8395	{ { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
8396	/** @todo More specials. */
8397	};
8398
8399	uint32_t cMinNormalPairs = (cTests - 144) / 4;
8400	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
8401	{
8402	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR32I32[iFn].pfnNative ? g_aSseConvertXmmR32I32[iFn].pfnNative : g_aSseConvertXmmR32I32[iFn].pfn;
8403
8404	IEMBINARYOUTPUT BinOut;
8405	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR32I32[iFn]), RTEXITCODE_FAILURE);
8406
8407	uint32_t cNormalInputPairs = 0;
8408	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8409	{
8410	SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8411
8412	TestData.InVal.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8413	TestData.InVal.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8414	TestData.InVal.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
8415	TestData.InVal.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
8416
8417	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[0])
8418	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[1])
8419	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[2])
8420	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[3]))
8421	cNormalInputPairs++;
8422	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8423	{
8424	iTest -= 1;
8425	continue;
8426	}
8427
8428	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8429	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8430	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8431	for (uint8_t iFz = 0; iFz < 2; iFz++)
8432	{
8433	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8434	\| (iRounding << X86_MXCSR_RC_SHIFT)
8435	\| (iDaz ? X86_MXCSR_DAZ : 0)
8436	\| (iFz ? X86_MXCSR_FZ : 0)
8437	\| X86_MXCSR_XCPT_MASK;
8438	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8439	TestData.fMxcsrIn = uMxCsrIn;
8440	TestData.fMxcsrOut = uMxCsrOutM;
8441	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8442
8443	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8444	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8445	TestData.fMxcsrIn = uMxCsrIn;
8446	TestData.fMxcsrOut = uMxCsrOutU;
8447	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8448
8449	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8450	if (fXcpt)
8451	{
8452	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8453	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8454	TestData.fMxcsrIn = uMxCsrIn;
8455	TestData.fMxcsrOut = uMxCsrOut1;
8456	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8457
8458	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8459	{
8460	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8461	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8462	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8463	TestData.fMxcsrIn = uMxCsrIn;
8464	TestData.fMxcsrOut = uMxCsrOut2;
8465	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8466	}
8467	if (!RT_IS_POWER_OF_TWO(fXcpt))
8468	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8469	if (fUnmasked & fXcpt)
8470	{
8471	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8472	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8473	TestData.fMxcsrIn = uMxCsrIn;
8474	TestData.fMxcsrOut = uMxCsrOut3;
8475	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8476	}
8477	}
8478	}
8479	}
8480	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8481	}
8482
8483	return RTEXITCODE_SUCCESS;
8484	}
8485	#endif
8486
8487	static void SseConvertXmmR32I32Test(void)
8488	{
8489	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
8490	{
8491	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32I32[iFn]))
8492	continue;
8493
8494	SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR32I32[iFn].paTests;
8495	uint32_t const cTests = g_aSseConvertXmmR32I32[iFn].cTests;
8496	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR32I32[iFn].pfn;
8497	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32I32[iFn]);
8498	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8499	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8500	{
8501	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8502	{
8503	X86XMMREG Res; RT_ZERO(Res);
8504
8505	uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8506	if ( fMxCsr != paTests[iTest].fMxcsrOut
8507	\|\| Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8508	\|\| Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8509	\|\| Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8510	\|\| Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8511	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s \n"
8512	"%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8513	"%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8514	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8515	FormatR32(&paTests[iTest].InVal.ar32[0]), FormatR32(&paTests[iTest].InVal.ar32[1]),
8516	FormatR32(&paTests[iTest].InVal.ar32[2]), FormatR32(&paTests[iTest].InVal.ar32[3]),
8517	iVar ? " " : "", fMxCsr,
8518	Res.ai32[0], Res.ai32[1],
8519	Res.ai32[2], Res.ai32[3],
8520	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8521	paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8522	paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8523	MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8524	( Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8525	\|\| Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8526	\|\| Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8527	\|\| Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8528	? " - val" : "",
8529	FormatMxcsr(paTests[iTest].fMxcsrIn));
8530	}
8531	}
8532
8533	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR32I32[iFn]);
8534	}
8535	}
8536
8537
8538	/*
8539	* Convert SSE operations converting signed double-words to double-precision floating point values.
8540	*/
8541	static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R64[] =
8542	{
8543	ENTRY_BIN(cvtdq2pd_u128)
8544	};
8545
8546	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8547	DUMP_ALL_FN(SseConvertXmmI32R64, g_aSseConvertXmmI32R64)
8548	static RTEXITCODE SseConvertXmmI32R64Generate(uint32_t cTests, const char * const *papszNameFmts)
8549	{
8550	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8551
8552	static int32_t const s_aSpecials[] =
8553	{
8554	INT32_MIN,
8555	INT32_MIN / 2,
8556	0,
8557	INT32_MAX / 2,
8558	INT32_MAX,
8559	(int32_t)0x80000000
8560	/** @todo More specials. */
8561	};
8562
8563	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8564	{
8565	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R64[iFn].pfnNative ? g_aSseConvertXmmI32R64[iFn].pfnNative : g_aSseConvertXmmI32R64[iFn].pfn;
8566
8567	IEMBINARYOUTPUT BinOut;
8568	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmI32R64[iFn]), RTEXITCODE_FAILURE);
8569
8570	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8571	{
8572	SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8573
8574	TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8575	TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8576	TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8577	TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8578
8579	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8580	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8581	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8582	for (uint8_t iFz = 0; iFz < 2; iFz++)
8583	{
8584	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8585	\| (iRounding << X86_MXCSR_RC_SHIFT)
8586	\| (iDaz ? X86_MXCSR_DAZ : 0)
8587	\| (iFz ? X86_MXCSR_FZ : 0)
8588	\| X86_MXCSR_XCPT_MASK;
8589	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8590	TestData.fMxcsrIn = uMxCsrIn;
8591	TestData.fMxcsrOut = uMxCsrOutM;
8592	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8593
8594	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8595	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8596	TestData.fMxcsrIn = uMxCsrIn;
8597	TestData.fMxcsrOut = uMxCsrOutU;
8598	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8599
8600	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8601	if (fXcpt)
8602	{
8603	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8604	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8605	TestData.fMxcsrIn = uMxCsrIn;
8606	TestData.fMxcsrOut = uMxCsrOut1;
8607	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8608
8609	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8610	{
8611	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8612	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8613	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8614	TestData.fMxcsrIn = uMxCsrIn;
8615	TestData.fMxcsrOut = uMxCsrOut2;
8616	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8617	}
8618	if (!RT_IS_POWER_OF_TWO(fXcpt))
8619	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8620	if (fUnmasked & fXcpt)
8621	{
8622	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8623	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8624	TestData.fMxcsrIn = uMxCsrIn;
8625	TestData.fMxcsrOut = uMxCsrOut3;
8626	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8627	}
8628	}
8629	}
8630	}
8631	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8632	}
8633
8634	return RTEXITCODE_SUCCESS;
8635	}
8636	#endif
8637
8638	static void SseConvertXmmI32R64Test(void)
8639	{
8640	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8641	{
8642	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R64[iFn]))
8643	continue;
8644
8645	SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R64[iFn].paTests;
8646	uint32_t const cTests = g_aSseConvertXmmI32R64[iFn].cTests;
8647	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R64[iFn].pfn;
8648	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R64[iFn]);
8649	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8650	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8651	{
8652	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8653	{
8654	X86XMMREG Res; RT_ZERO(Res);
8655
8656	uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8657	if ( fMxCsr != paTests[iTest].fMxcsrOut
8658	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
8659	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8660	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
8661	"%s -> mxcsr=%#08x %s'%s\n"
8662	"%s expected %#08x %s'%s%s%s (%s)\n",
8663	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8664	paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8665	paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
8666	iVar ? " " : "", fMxCsr,
8667	FormatR64(&Res.ar64[0]), FormatR64(&Res.ar64[1]),
8668	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8669	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8670	MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8671	( !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
8672	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8673	? " - val" : "",
8674	FormatMxcsr(paTests[iTest].fMxcsrIn));
8675	}
8676	}
8677
8678	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmI32R64[iFn]);
8679	}
8680	}
8681
8682
8683	/*
8684	* Convert SSE operations converting signed double-words to double-precision floating point values.
8685	*/
8686	static SSE_CONVERT_XMM_T g_aSseConvertXmmR64I32[] =
8687	{
8688	ENTRY_BIN(cvtpd2dq_u128),
8689	ENTRY_BIN(cvttpd2dq_u128)
8690	};
8691
8692	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8693	DUMP_ALL_FN(SseConvertXmmR64I32, g_aSseConvertXmmR64I32)
8694	static RTEXITCODE SseConvertXmmR64I32Generate(uint32_t cTests, const char * const *papszNameFmts)
8695	{
8696	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8697
8698	static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8699	{
8700	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8701	{ { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8702	{ { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8703	{ { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8704	/** @todo More specials. */
8705	};
8706
8707	uint32_t cMinNormalPairs = (cTests - 144) / 4;
8708	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8709	{
8710	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR64I32[iFn].pfnNative ? g_aSseConvertXmmR64I32[iFn].pfnNative : g_aSseConvertXmmR64I32[iFn].pfn;
8711
8712	IEMBINARYOUTPUT BinOut;
8713	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR64I32[iFn]), RTEXITCODE_FAILURE);
8714
8715	uint32_t cNormalInputPairs = 0;
8716	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8717	{
8718	SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8719
8720	TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8721	TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8722
8723	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8724	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8725	cNormalInputPairs++;
8726	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8727	{
8728	iTest -= 1;
8729	continue;
8730	}
8731
8732	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8733	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8734	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8735	for (uint8_t iFz = 0; iFz < 2; iFz++)
8736	{
8737	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8738	\| (iRounding << X86_MXCSR_RC_SHIFT)
8739	\| (iDaz ? X86_MXCSR_DAZ : 0)
8740	\| (iFz ? X86_MXCSR_FZ : 0)
8741	\| X86_MXCSR_XCPT_MASK;
8742	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8743	TestData.fMxcsrIn = uMxCsrIn;
8744	TestData.fMxcsrOut = uMxCsrOutM;
8745	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8746
8747	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8748	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8749	TestData.fMxcsrIn = uMxCsrIn;
8750	TestData.fMxcsrOut = uMxCsrOutU;
8751	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8752
8753	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8754	if (fXcpt)
8755	{
8756	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8757	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8758	TestData.fMxcsrIn = uMxCsrIn;
8759	TestData.fMxcsrOut = uMxCsrOut1;
8760	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8761
8762	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8763	{
8764	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8765	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8766	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8767	TestData.fMxcsrIn = uMxCsrIn;
8768	TestData.fMxcsrOut = uMxCsrOut2;
8769	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8770	}
8771	if (!RT_IS_POWER_OF_TWO(fXcpt))
8772	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8773	if (fUnmasked & fXcpt)
8774	{
8775	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8776	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8777	TestData.fMxcsrIn = uMxCsrIn;
8778	TestData.fMxcsrOut = uMxCsrOut3;
8779	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8780	}
8781	}
8782	}
8783	}
8784	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8785	}
8786
8787	return RTEXITCODE_SUCCESS;
8788	}
8789	#endif
8790
8791	static void SseConvertXmmR64I32Test(void)
8792	{
8793	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8794	{
8795	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64I32[iFn]))
8796	continue;
8797
8798	SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR64I32[iFn].paTests;
8799	uint32_t const cTests = g_aSseConvertXmmR64I32[iFn].cTests;
8800	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR64I32[iFn].pfn;
8801	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64I32[iFn]);
8802	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8803	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8804	{
8805	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8806	{
8807	X86XMMREG Res; RT_ZERO(Res);
8808
8809	uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8810	if ( fMxCsr != paTests[iTest].fMxcsrOut
8811	\|\| Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8812	\|\| Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8813	\|\| Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8814	\|\| Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8815	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8816	"%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8817	"%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8818	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8819	FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8820	iVar ? " " : "", fMxCsr,
8821	Res.ai32[0], Res.ai32[1],
8822	Res.ai32[2], Res.ai32[3],
8823	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8824	paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8825	paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8826	MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8827	( Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8828	\|\| Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8829	\|\| Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8830	\|\| Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8831	? " - val" : "",
8832	FormatMxcsr(paTests[iTest].fMxcsrIn));
8833	}
8834	}
8835
8836	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR64I32[iFn]);
8837	}
8838	}
8839
8840
8841	/*
8842	* Convert SSE operations converting double-precision floating point values to signed double-word values.
8843	*/
8844	TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_XMM_T, SSE_CONVERT_MM_XMM_TEST_T, PFNIEMAIMPLMXCSRU64U128);
8845
8846	static SSE_CONVERT_MM_XMM_T g_aSseConvertMmXmm[] =
8847	{
8848	ENTRY_BIN(cvtpd2pi_u128),
8849	ENTRY_BIN(cvttpd2pi_u128)
8850	};
8851
8852	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8853	DUMP_ALL_FN(SseConvertMmXmm, g_aSseConvertMmXmm)
8854	static RTEXITCODE SseConvertMmXmmGenerate(uint32_t cTests, const char * const *papszNameFmts)
8855	{
8856	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8857
8858	static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8859	{
8860	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8861	{ { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8862	{ { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8863	{ { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8864	/** @todo More specials. */
8865	};
8866
8867	uint32_t cMinNormalPairs = (cTests - 144) / 4;
8868	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8869	{
8870	PFNIEMAIMPLMXCSRU64U128 const pfn = g_aSseConvertMmXmm[iFn].pfnNative ? g_aSseConvertMmXmm[iFn].pfnNative : g_aSseConvertMmXmm[iFn].pfn;
8871
8872	IEMBINARYOUTPUT BinOut;
8873	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertMmXmm[iFn]), RTEXITCODE_FAILURE);
8874
8875	uint32_t cNormalInputPairs = 0;
8876	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8877	{
8878	SSE_CONVERT_MM_XMM_TEST_T TestData; RT_ZERO(TestData);
8879
8880	TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8881	TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8882
8883	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8884	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8885	cNormalInputPairs++;
8886	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8887	{
8888	iTest -= 1;
8889	continue;
8890	}
8891
8892	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8893	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8894	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8895	for (uint8_t iFz = 0; iFz < 2; iFz++)
8896	{
8897	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8898	\| (iRounding << X86_MXCSR_RC_SHIFT)
8899	\| (iDaz ? X86_MXCSR_DAZ : 0)
8900	\| (iFz ? X86_MXCSR_FZ : 0)
8901	\| X86_MXCSR_XCPT_MASK;
8902	uint64_t u64ResM;
8903	uint32_t fMxcsrM = pfn(fMxcsrIn, &u64ResM, &TestData.InVal);
8904	TestData.fMxcsrIn = fMxcsrIn;
8905	TestData.fMxcsrOut = fMxcsrM;
8906	TestData.OutVal.u = u64ResM;
8907	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8908
8909	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8910	uint64_t u64ResU;
8911	uint32_t fMxcsrU = pfn(fMxcsrIn, &u64ResU, &TestData.InVal);
8912	TestData.fMxcsrIn = fMxcsrIn;
8913	TestData.fMxcsrOut = fMxcsrU;
8914	TestData.OutVal.u = u64ResU;
8915	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8916
8917	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8918	if (fXcpt)
8919	{
8920	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8921	uint64_t u64Res1;
8922	uint32_t fMxcsr1 = pfn(fMxcsrIn, &u64Res1, &TestData.InVal);
8923	TestData.fMxcsrIn = fMxcsrIn;
8924	TestData.fMxcsrOut = fMxcsr1;
8925	TestData.OutVal.u = u64Res1;
8926	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8927
8928	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8929	{
8930	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8931	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8932	uint64_t u64Res2;
8933	uint32_t fMxcsr2 = pfn(fMxcsrIn, &u64Res2, &TestData.InVal);
8934	TestData.fMxcsrIn = fMxcsrIn;
8935	TestData.fMxcsrOut = fMxcsr2;
8936	TestData.OutVal.u = u64Res2;
8937	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8938	}
8939	if (!RT_IS_POWER_OF_TWO(fXcpt))
8940	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8941	if (fUnmasked & fXcpt)
8942	{
8943	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8944	uint64_t u64Res3;
8945	uint32_t fMxcsr3 = pfn(fMxcsrIn, &u64Res3, &TestData.InVal);
8946	TestData.fMxcsrIn = fMxcsrIn;
8947	TestData.fMxcsrOut = fMxcsr3;
8948	TestData.OutVal.u = u64Res3;
8949	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8950	}
8951	}
8952	}
8953	}
8954	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8955	}
8956
8957	return RTEXITCODE_SUCCESS;
8958	}
8959	#endif
8960
8961	static void SseConvertMmXmmTest(void)
8962	{
8963	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8964	{
8965	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmXmm[iFn]))
8966	continue;
8967
8968	SSE_CONVERT_MM_XMM_TEST_T const * const paTests = g_aSseConvertMmXmm[iFn].paTests;
8969	uint32_t const cTests = g_aSseConvertMmXmm[iFn].cTests;
8970	PFNIEMAIMPLMXCSRU64U128 pfn = g_aSseConvertMmXmm[iFn].pfn;
8971	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmXmm[iFn]);
8972	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8973	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8974	{
8975	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8976	{
8977	RTUINT64U ValOut;
8978	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut.u, &paTests[iTest].InVal);
8979	if ( fMxcsr != paTests[iTest].fMxcsrOut
8980	\|\| ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8981	\|\| ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8982	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s\n"
8983	"%s -> mxcsr=%#08x %RI32'%RI32\n"
8984	"%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8985	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8986	FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8987	iVar ? " " : "", fMxcsr, ValOut.ai32[0], ValOut.ai32[1],
8988	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8989	paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8990	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8991	( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8992	\|\| ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8993	? " - val" : "",
8994	FormatMxcsr(paTests[iTest].fMxcsrIn));
8995	}
8996	}
8997
8998	FREE_DECOMPRESSED_TESTS(g_aSseConvertMmXmm[iFn]);
8999	}
9000	}
9001
9002
9003	/*
9004	* Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
9005	*/
9006	TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R64_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
9007
9008	static SSE_CONVERT_XMM_R64_MM_T g_aSseConvertXmmR64Mm[] =
9009	{
9010	ENTRY_BIN(cvtpi2pd_u128)
9011	};
9012
9013	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9014	DUMP_ALL_FN(SseConvertXmmR64Mm, g_aSseConvertXmmR64Mm)
9015	static RTEXITCODE SseConvertXmmR64MmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9016	{
9017	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9018
9019	static struct { int32_t aVal[2]; } const s_aSpecials[] =
9020	{
9021	{ { INT32_MIN, INT32_MIN } },
9022	{ { INT32_MAX, INT32_MAX } }
9023	/** @todo More specials. */
9024	};
9025
9026	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
9027	{
9028	PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR64Mm[iFn].pfnNative ? g_aSseConvertXmmR64Mm[iFn].pfnNative : g_aSseConvertXmmR64Mm[iFn].pfn;
9029
9030	IEMBINARYOUTPUT BinOut;
9031	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR64Mm[iFn]), RTEXITCODE_FAILURE);
9032
9033	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9034	{
9035	SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
9036
9037	TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
9038	TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
9039
9040	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9041	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9042	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9043	for (uint8_t iFz = 0; iFz < 2; iFz++)
9044	{
9045	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9046	\| (iRounding << X86_MXCSR_RC_SHIFT)
9047	\| (iDaz ? X86_MXCSR_DAZ : 0)
9048	\| (iFz ? X86_MXCSR_FZ : 0)
9049	\| X86_MXCSR_XCPT_MASK;
9050	uint32_t fMxcsrM = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9051	TestData.fMxcsrIn = fMxcsrIn;
9052	TestData.fMxcsrOut = fMxcsrM;
9053	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9054
9055	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9056	uint32_t fMxcsrU = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9057	TestData.fMxcsrIn = fMxcsrIn;
9058	TestData.fMxcsrOut = fMxcsrU;
9059	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9060
9061	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9062	if (fXcpt)
9063	{
9064	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
9065	uint32_t fMxcsr1 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9066	TestData.fMxcsrIn = fMxcsrIn;
9067	TestData.fMxcsrOut = fMxcsr1;
9068	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9069
9070	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9071	{
9072	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9073	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9074	uint32_t fMxcsr2 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9075	TestData.fMxcsrIn = fMxcsrIn;
9076	TestData.fMxcsrOut = fMxcsr2;
9077	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9078	}
9079	if (!RT_IS_POWER_OF_TWO(fXcpt))
9080	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9081	if (fUnmasked & fXcpt)
9082	{
9083	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9084	uint32_t fMxcsr3 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9085	TestData.fMxcsrIn = fMxcsrIn;
9086	TestData.fMxcsrOut = fMxcsr3;
9087	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9088	}
9089	}
9090	}
9091	}
9092	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9093	}
9094
9095	return RTEXITCODE_SUCCESS;
9096	}
9097	#endif
9098
9099	static void SseConvertXmmR64MmTest(void)
9100	{
9101	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
9102	{
9103	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64Mm[iFn]))
9104	continue;
9105
9106	SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR64Mm[iFn].paTests;
9107	uint32_t const cTests = g_aSseConvertXmmR64Mm[iFn].cTests;
9108	PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR64Mm[iFn].pfn;
9109	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64Mm[iFn]);
9110	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9111	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9112	{
9113	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9114	{
9115	X86XMMREG ValOut;
9116	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, paTests[iTest].InVal.u);
9117	if ( fMxcsr != paTests[iTest].fMxcsrOut
9118	\|\| !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
9119	\|\| !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
9120	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
9121	"%s -> mxcsr=%#08x %s'%s\n"
9122	"%s expected %#08x %s'%s%s%s (%s)\n",
9123	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9124	paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
9125	iVar ? " " : "", fMxcsr,
9126	FormatR64(&ValOut.ar64[0]), FormatR64(&ValOut.ar64[1]),
9127	iVar ? " " : "", paTests[iTest].fMxcsrOut,
9128	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
9129	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9130	( !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
9131	\|\| !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
9132	? " - val" : "",
9133	FormatMxcsr(paTests[iTest].fMxcsrIn));
9134	}
9135	}
9136
9137	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR64Mm[iFn]);
9138	}
9139	}
9140
9141
9142	/*
9143	* Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
9144	*/
9145	TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R32_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
9146
9147	static SSE_CONVERT_XMM_R32_MM_T g_aSseConvertXmmR32Mm[] =
9148	{
9149	ENTRY_BIN(cvtpi2ps_u128)
9150	};
9151
9152	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9153	DUMP_ALL_FN(SseConvertXmmR32Mm, g_aSseConvertXmmR32Mm)
9154	static RTEXITCODE SseConvertXmmR32MmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9155	{
9156	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9157
9158	static struct { int32_t aVal[2]; } const s_aSpecials[] =
9159	{
9160	{ { INT32_MIN, INT32_MIN } },
9161	{ { INT32_MAX, INT32_MAX } }
9162	/** @todo More specials. */
9163	};
9164
9165	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
9166	{
9167	PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR32Mm[iFn].pfnNative ? g_aSseConvertXmmR32Mm[iFn].pfnNative : g_aSseConvertXmmR32Mm[iFn].pfn;
9168
9169	IEMBINARYOUTPUT BinOut;
9170	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR32Mm[iFn]), RTEXITCODE_FAILURE);
9171
9172	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9173	{
9174	SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
9175
9176	TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
9177	TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
9178
9179	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9180	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9181	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9182	for (uint8_t iFz = 0; iFz < 2; iFz++)
9183	{
9184	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9185	\| (iRounding << X86_MXCSR_RC_SHIFT)
9186	\| (iDaz ? X86_MXCSR_DAZ : 0)
9187	\| (iFz ? X86_MXCSR_FZ : 0)
9188	\| X86_MXCSR_XCPT_MASK;
9189	uint32_t fMxcsrM = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9190	TestData.fMxcsrIn = fMxcsrIn;
9191	TestData.fMxcsrOut = fMxcsrM;
9192	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9193
9194	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9195	uint32_t fMxcsrU = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9196	TestData.fMxcsrIn = fMxcsrIn;
9197	TestData.fMxcsrOut = fMxcsrU;
9198	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9199
9200	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9201	if (fXcpt)
9202	{
9203	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
9204	uint32_t fMxcsr1 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9205	TestData.fMxcsrIn = fMxcsrIn;
9206	TestData.fMxcsrOut = fMxcsr1;
9207	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9208
9209	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9210	{
9211	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9212	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9213	uint32_t fMxcsr2 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9214	TestData.fMxcsrIn = fMxcsrIn;
9215	TestData.fMxcsrOut = fMxcsr2;
9216	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9217	}
9218	if (!RT_IS_POWER_OF_TWO(fXcpt))
9219	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9220	if (fUnmasked & fXcpt)
9221	{
9222	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9223	uint32_t fMxcsr3 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9224	TestData.fMxcsrIn = fMxcsrIn;
9225	TestData.fMxcsrOut = fMxcsr3;
9226	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9227	}
9228	}
9229	}
9230	}
9231	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9232	}
9233
9234	return RTEXITCODE_SUCCESS;
9235	}
9236	#endif
9237
9238	static void SseConvertXmmR32MmTest(void)
9239	{
9240	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
9241	{
9242	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32Mm[iFn]))
9243	continue;
9244
9245	SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR32Mm[iFn].paTests;
9246	uint32_t const cTests = g_aSseConvertXmmR32Mm[iFn].cTests;
9247	PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR32Mm[iFn].pfn;
9248	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32Mm[iFn]);
9249	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9250	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9251	{
9252	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9253	{
9254	X86XMMREG ValOut;
9255	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, paTests[iTest].InVal.u);
9256	if ( fMxcsr != paTests[iTest].fMxcsrOut
9257	\|\| !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
9258	\|\| !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
9259	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
9260	"%s -> mxcsr=%#08x %s'%s\n"
9261	"%s expected %#08x %s'%s%s%s (%s)\n",
9262	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9263	paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
9264	iVar ? " " : "", fMxcsr,
9265	FormatR32(&ValOut.ar32[0]), FormatR32(&ValOut.ar32[1]),
9266	iVar ? " " : "", paTests[iTest].fMxcsrOut,
9267	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
9268	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9269	( !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
9270	\|\| !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
9271	? " - val" : "",
9272	FormatMxcsr(paTests[iTest].fMxcsrIn));
9273	}
9274	}
9275
9276	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR32Mm[iFn]);
9277	}
9278	}
9279
9280
9281	/*
9282	* Convert SSE operations converting single-precision floating point values to signed double-word values.
9283	*/
9284	TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_I32_XMM_R32_T, SSE_CONVERT_MM_R32_TEST_T, PFNIEMAIMPLMXCSRU64U64);
9285
9286	static SSE_CONVERT_MM_I32_XMM_R32_T g_aSseConvertMmI32XmmR32[] =
9287	{
9288	ENTRY_BIN(cvtps2pi_u128),
9289	ENTRY_BIN(cvttps2pi_u128)
9290	};
9291
9292	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9293	DUMP_ALL_FN(SseConvertMmI32XmmR32, g_aSseConvertMmI32XmmR32)
9294	static RTEXITCODE SseConvertMmI32XmmR32Generate(uint32_t cTests, const char * const *papszNameFmts)
9295	{
9296	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9297
9298	static struct { RTFLOAT32U aVal1[2]; } const s_aSpecials[] =
9299	{
9300	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
9301	{ { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
9302	{ { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
9303	{ { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
9304	/** @todo More specials. */
9305	};
9306
9307	uint32_t cMinNormalPairs = (cTests - 144) / 4;
9308	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
9309	{
9310	PFNIEMAIMPLMXCSRU64U64 const pfn = g_aSseConvertMmI32XmmR32[iFn].pfnNative ? g_aSseConvertMmI32XmmR32[iFn].pfnNative : g_aSseConvertMmI32XmmR32[iFn].pfn;
9311
9312	IEMBINARYOUTPUT BinOut;
9313	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertMmI32XmmR32[iFn]), RTEXITCODE_FAILURE);
9314
9315	uint32_t cNormalInputPairs = 0;
9316	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9317	{
9318	SSE_CONVERT_MM_R32_TEST_T TestData; RT_ZERO(TestData);
9319
9320	TestData.ar32InVal[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
9321	TestData.ar32InVal[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
9322
9323	if ( RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[0])
9324	&& RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[1]))
9325	cNormalInputPairs++;
9326	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
9327	{
9328	iTest -= 1;
9329	continue;
9330	}
9331
9332	RTFLOAT64U TestVal;
9333	TestVal.au32[0] = TestData.ar32InVal[0].u;
9334	TestVal.au32[1] = TestData.ar32InVal[1].u;
9335
9336	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9337	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9338	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9339	for (uint8_t iFz = 0; iFz < 2; iFz++)
9340	{
9341	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9342	\| (iRounding << X86_MXCSR_RC_SHIFT)
9343	\| (iDaz ? X86_MXCSR_DAZ : 0)
9344	\| (iFz ? X86_MXCSR_FZ : 0)
9345	\| X86_MXCSR_XCPT_MASK;
9346	uint64_t u64ResM;
9347	uint32_t fMxcsrM = pfn(fMxcsrIn, &u64ResM, TestVal.u);
9348	TestData.fMxcsrIn = fMxcsrIn;
9349	TestData.fMxcsrOut = fMxcsrM;
9350	TestData.OutVal.u = u64ResM;
9351	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9352
9353	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9354	uint64_t u64ResU;
9355	uint32_t fMxcsrU = pfn(fMxcsrIn, &u64ResU, TestVal.u);
9356	TestData.fMxcsrIn = fMxcsrIn;
9357	TestData.fMxcsrOut = fMxcsrU;
9358	TestData.OutVal.u = u64ResU;
9359	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9360
9361	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9362	if (fXcpt)
9363	{
9364	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
9365	uint64_t u64Res1;
9366	uint32_t fMxcsr1 = pfn(fMxcsrIn, &u64Res1, TestVal.u);
9367	TestData.fMxcsrIn = fMxcsrIn;
9368	TestData.fMxcsrOut = fMxcsr1;
9369	TestData.OutVal.u = u64Res1;
9370	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9371
9372	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9373	{
9374	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9375	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9376	uint64_t u64Res2;
9377	uint32_t fMxcsr2 = pfn(fMxcsrIn, &u64Res2, TestVal.u);
9378	TestData.fMxcsrIn = fMxcsrIn;
9379	TestData.fMxcsrOut = fMxcsr2;
9380	TestData.OutVal.u = u64Res2;
9381	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9382	}
9383	if (!RT_IS_POWER_OF_TWO(fXcpt))
9384	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9385	if (fUnmasked & fXcpt)
9386	{
9387	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9388	uint64_t u64Res3;
9389	uint32_t fMxcsr3 = pfn(fMxcsrIn, &u64Res3, TestVal.u);
9390	TestData.fMxcsrIn = fMxcsrIn;
9391	TestData.fMxcsrOut = fMxcsr3;
9392	TestData.OutVal.u = u64Res3;
9393	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9394	}
9395	}
9396	}
9397	}
9398	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9399	}
9400
9401	return RTEXITCODE_SUCCESS;
9402	}
9403	#endif
9404
9405	static void SseConvertMmI32XmmR32Test(void)
9406	{
9407	X86FXSTATE State;
9408	RT_ZERO(State);
9409
9410	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
9411	{
9412	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmI32XmmR32[iFn]))
9413	continue;
9414
9415	SSE_CONVERT_MM_R32_TEST_T const * const paTests = g_aSseConvertMmI32XmmR32[iFn].paTests;
9416	uint32_t const cTests = g_aSseConvertMmI32XmmR32[iFn].cTests;
9417	PFNIEMAIMPLMXCSRU64U64 pfn = g_aSseConvertMmI32XmmR32[iFn].pfn;
9418	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmI32XmmR32[iFn]);
9419	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9420	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9421	{
9422	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9423	{
9424	RTUINT64U ValOut;
9425	RTUINT64U ValIn;
9426
9427	ValIn.au32[0] = paTests[iTest].ar32InVal[0].u;
9428	ValIn.au32[1] = paTests[iTest].ar32InVal[1].u;
9429
9430	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut.u, ValIn.u);
9431	if ( fMxcsr != paTests[iTest].fMxcsrOut
9432	\|\| ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9433	\|\| ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9434	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
9435	"%s -> mxcsr=%#08x %RI32'%RI32\n"
9436	"%s expected %#08x %RI32'%RI32%s%s (%s)\n",
9437	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9438	FormatR32(&paTests[iTest].ar32InVal[0]), FormatR32(&paTests[iTest].ar32InVal[1]),
9439	iVar ? " " : "", fMxcsr,
9440	ValOut.ai32[0], ValOut.ai32[1],
9441	iVar ? " " : "", paTests[iTest].fMxcsrOut,
9442	paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
9443	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9444	( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9445	\|\| ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9446	? " - val" : "",
9447	FormatMxcsr(paTests[iTest].fMxcsrIn));
9448	}
9449	}
9450
9451	FREE_DECOMPRESSED_TESTS(g_aSseConvertMmI32XmmR32[iFn]);
9452	}
9453	}
9454
9455
9456	/*
9457	* SSE 4.2 pcmpxstrx instructions.
9458	*/
9459	TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRI_T, SSE_PCMPISTRI_TEST_T, PFNIEMAIMPLPCMPISTRIU128IMM8);
9460
9461	static SSE_PCMPISTRI_T g_aSsePcmpistri[] =
9462	{
9463	ENTRY_BIN_SSE_OPT(pcmpistri_u128),
9464	};
9465
9466	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9467	DUMP_ALL_FN(SseComparePcmpistri, g_aSsePcmpistri)
9468	static RTEXITCODE SseComparePcmpistriGenerate(uint32_t cTests, const char * const *papszNameFmts)
9469	{
9470	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9471
9472	static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9473	{
9474	{ RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9475	/** @todo More specials. */
9476	};
9477
9478	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9479	{
9480	PFNIEMAIMPLPCMPISTRIU128IMM8 const pfn = g_aSsePcmpistri[iFn].pfnNative ? g_aSsePcmpistri[iFn].pfnNative : g_aSsePcmpistri[iFn].pfn;
9481
9482	IEMBINARYOUTPUT BinOut;
9483	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpistri[iFn]), RTEXITCODE_FAILURE);
9484
9485	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9486	{
9487	SSE_PCMPISTRI_TEST_T TestData; RT_ZERO(TestData);
9488
9489	TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9490	TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9491
9492	IEMPCMPISTRXSRC TestVal;
9493	TestVal.uSrc1 = TestData.InVal1.uXmm;
9494	TestVal.uSrc2 = TestData.InVal2.uXmm;
9495
9496	uint32_t const fEFlagsIn = RandEFlags();
9497	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9498	{
9499	uint32_t fEFlagsOut = fEFlagsIn;
9500	pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9501	TestData.fEFlagsIn = fEFlagsIn;
9502	TestData.fEFlagsOut = fEFlagsOut;
9503	TestData.bImm = (uint8_t)u16Imm;
9504	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9505	}
9506
9507	/* Repeat the test with the input value being the same. */
9508	TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9509	TestVal.uSrc1 = TestData.InVal1.uXmm;
9510	TestVal.uSrc2 = TestData.InVal2.uXmm;
9511
9512	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9513	{
9514	uint32_t fEFlagsOut = fEFlagsIn;
9515	pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9516	TestData.fEFlagsIn = fEFlagsIn;
9517	TestData.fEFlagsOut = fEFlagsOut;
9518	TestData.bImm = (uint8_t)u16Imm;
9519	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9520	}
9521	}
9522	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9523	}
9524
9525	return RTEXITCODE_SUCCESS;
9526	}
9527	#endif
9528
9529	static void SseComparePcmpistriTest(void)
9530	{
9531	X86FXSTATE State;
9532	RT_ZERO(State);
9533
9534	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9535	{
9536	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistri[iFn]))
9537	continue;
9538
9539	SSE_PCMPISTRI_TEST_T const * const paTests = g_aSsePcmpistri[iFn].paTests;
9540	uint32_t const cTests = g_aSsePcmpistri[iFn].cTests;
9541	PFNIEMAIMPLPCMPISTRIU128IMM8 pfn = g_aSsePcmpistri[iFn].pfn;
9542	uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistri[iFn]);
9543	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9544	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9545	{
9546	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9547	{
9548	IEMPCMPISTRXSRC TestVal;
9549	TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9550	TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9551
9552	uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9553	uint32_t u32EcxOut = 0;
9554	pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9555	if ( fEFlags != paTests[iTest].fEFlagsOut
9556	\|\| u32EcxOut != paTests[iTest].u32EcxOut)
9557	RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9558	"%s -> efl=%#08x %RU32\n"
9559	"%s expected %#08x %RU32%s%s\n",
9560	iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9561	FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9562	iVar ? " " : "", fEFlags, u32EcxOut,
9563	iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9564	EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9565	(u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9566	}
9567	}
9568
9569	FREE_DECOMPRESSED_TESTS(g_aSsePcmpistri[iFn]);
9570	}
9571	}
9572
9573
9574	TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRM_T, SSE_PCMPISTRM_TEST_T, PFNIEMAIMPLPCMPISTRMU128IMM8);
9575
9576	static SSE_PCMPISTRM_T g_aSsePcmpistrm[] =
9577	{
9578	ENTRY_BIN_SSE_OPT(pcmpistrm_u128),
9579	};
9580
9581	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9582	DUMP_ALL_FN(SseComparePcmpistrm, g_aSsePcmpistrm)
9583	static RTEXITCODE SseComparePcmpistrmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9584	{
9585	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9586
9587	static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9588	{
9589	{ RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9590	/** @todo More specials. */
9591	};
9592
9593	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9594	{
9595	PFNIEMAIMPLPCMPISTRMU128IMM8 const pfn = g_aSsePcmpistrm[iFn].pfnNative ? g_aSsePcmpistrm[iFn].pfnNative : g_aSsePcmpistrm[iFn].pfn;
9596
9597	IEMBINARYOUTPUT BinOut;
9598	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpistrm[iFn]), RTEXITCODE_FAILURE);
9599
9600	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9601	{
9602	SSE_PCMPISTRM_TEST_T TestData; RT_ZERO(TestData);
9603
9604	TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9605	TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9606
9607	IEMPCMPISTRXSRC TestVal;
9608	TestVal.uSrc1 = TestData.InVal1.uXmm;
9609	TestVal.uSrc2 = TestData.InVal2.uXmm;
9610
9611	uint32_t const fEFlagsIn = RandEFlags();
9612	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9613	{
9614	uint32_t fEFlagsOut = fEFlagsIn;
9615	pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9616	TestData.fEFlagsIn = fEFlagsIn;
9617	TestData.fEFlagsOut = fEFlagsOut;
9618	TestData.bImm = (uint8_t)u16Imm;
9619	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9620	}
9621
9622	/* Repeat the test with the input value being the same. */
9623	TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9624	TestVal.uSrc1 = TestData.InVal1.uXmm;
9625	TestVal.uSrc2 = TestData.InVal2.uXmm;
9626
9627	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9628	{
9629	uint32_t fEFlagsOut = fEFlagsIn;
9630	pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9631	TestData.fEFlagsIn = fEFlagsIn;
9632	TestData.fEFlagsOut = fEFlagsOut;
9633	TestData.bImm = (uint8_t)u16Imm;
9634	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9635	}
9636	}
9637	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9638	}
9639
9640	return RTEXITCODE_SUCCESS;
9641	}
9642	#endif
9643
9644	static void SseComparePcmpistrmTest(void)
9645	{
9646	X86FXSTATE State;
9647	RT_ZERO(State);
9648
9649	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9650	{
9651	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistrm[iFn]))
9652	continue;
9653
9654	SSE_PCMPISTRM_TEST_T const * const paTests = g_aSsePcmpistrm[iFn].paTests;
9655	uint32_t const cTests = g_aSsePcmpistrm[iFn].cTests;
9656	PFNIEMAIMPLPCMPISTRMU128IMM8 pfn = g_aSsePcmpistrm[iFn].pfn;
9657	uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistrm[iFn]);
9658	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9659	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9660	{
9661	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9662	{
9663	IEMPCMPISTRXSRC TestVal;
9664	TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9665	TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9666
9667	uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9668	RTUINT128U OutVal;
9669	pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9670	if ( fEFlags != paTests[iTest].fEFlagsOut
9671	\|\| OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9672	\|\| OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9673	RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9674	"%s -> efl=%#08x %s\n"
9675	"%s expected %#08x %s%s%s\n",
9676	iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9677	FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9678	iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9679	iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9680	EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9681	( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9682	\|\| OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9683	}
9684	}
9685
9686	FREE_DECOMPRESSED_TESTS(g_aSsePcmpistrm[iFn]);
9687	}
9688	}
9689
9690
9691	TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRI_T, SSE_PCMPESTRI_TEST_T, PFNIEMAIMPLPCMPESTRIU128IMM8);
9692
9693	static SSE_PCMPESTRI_T g_aSsePcmpestri[] =
9694	{
9695	ENTRY_BIN_SSE_OPT(pcmpestri_u128),
9696	};
9697
9698	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9699	DUMP_ALL_FN(SseComparePcmpestri, g_aSsePcmpestri)
9700	static RTEXITCODE SseComparePcmpestriGenerate(uint32_t cTests, const char * const *papszNameFmts)
9701	{
9702	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9703
9704	static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9705	{
9706	{ RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9707	/** @todo More specials. */
9708	};
9709
9710	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9711	{
9712	PFNIEMAIMPLPCMPESTRIU128IMM8 const pfn = g_aSsePcmpestri[iFn].pfnNative ? g_aSsePcmpestri[iFn].pfnNative : g_aSsePcmpestri[iFn].pfn;
9713
9714	IEMBINARYOUTPUT BinOut;
9715	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpestri[iFn]), RTEXITCODE_FAILURE);
9716
9717	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9718	{
9719	SSE_PCMPESTRI_TEST_T TestData; RT_ZERO(TestData);
9720
9721	TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9722	TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9723
9724	for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9725	for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9726	{
9727	TestData.u64Rax = (uint64_t)i64Rax;
9728	TestData.u64Rdx = (uint64_t)i64Rdx;
9729
9730	IEMPCMPESTRXSRC TestVal;
9731	TestVal.uSrc1 = TestData.InVal1.uXmm;
9732	TestVal.uSrc2 = TestData.InVal2.uXmm;
9733	TestVal.u64Rax = TestData.u64Rax;
9734	TestVal.u64Rdx = TestData.u64Rdx;
9735
9736	uint32_t const fEFlagsIn = RandEFlags();
9737	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9738	{
9739	uint32_t fEFlagsOut = fEFlagsIn;
9740	pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9741	TestData.fEFlagsIn = fEFlagsIn;
9742	TestData.fEFlagsOut = fEFlagsOut;
9743	TestData.bImm = (uint8_t)u16Imm;
9744	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9745	}
9746
9747	/* Repeat the test with the input value being the same. */
9748	TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9749	TestVal.uSrc1 = TestData.InVal1.uXmm;
9750	TestVal.uSrc2 = TestData.InVal2.uXmm;
9751
9752	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9753	{
9754	uint32_t fEFlagsOut = fEFlagsIn;
9755	pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9756	TestData.fEFlagsIn = fEFlagsIn;
9757	TestData.fEFlagsOut = fEFlagsOut;
9758	TestData.bImm = (uint8_t)u16Imm;
9759	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9760	}
9761	}
9762	}
9763	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9764	}
9765
9766	return RTEXITCODE_SUCCESS;
9767	}
9768	#endif
9769
9770	static void SseComparePcmpestriTest(void)
9771	{
9772	X86FXSTATE State;
9773	RT_ZERO(State);
9774
9775	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9776	{
9777	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestri[iFn]))
9778	continue;
9779
9780	SSE_PCMPESTRI_TEST_T const * const paTests = g_aSsePcmpestri[iFn].paTests;
9781	uint32_t const cTests = g_aSsePcmpestri[iFn].cTests;
9782	PFNIEMAIMPLPCMPESTRIU128IMM8 pfn = g_aSsePcmpestri[iFn].pfn;
9783	uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestri[iFn]);
9784	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9785	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9786	{
9787	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9788	{
9789	IEMPCMPESTRXSRC TestVal;
9790	TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9791	TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9792	TestVal.u64Rax = paTests[iTest].u64Rax;
9793	TestVal.u64Rdx = paTests[iTest].u64Rdx;
9794
9795	uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9796	uint32_t u32EcxOut = 0;
9797	pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9798	if ( fEFlags != paTests[iTest].fEFlagsOut
9799	\|\| u32EcxOut != paTests[iTest].u32EcxOut)
9800	RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9801	"%s -> efl=%#08x %RU32\n"
9802	"%s expected %#08x %RU32%s%s\n",
9803	iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9804	FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9805	FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9806	paTests[iTest].bImm,
9807	iVar ? " " : "", fEFlags, u32EcxOut,
9808	iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9809	EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9810	(u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9811	}
9812	}
9813
9814	FREE_DECOMPRESSED_TESTS(g_aSsePcmpestri[iFn]);
9815	}
9816	}
9817
9818
9819	TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRM_T, SSE_PCMPESTRM_TEST_T, PFNIEMAIMPLPCMPESTRMU128IMM8);
9820
9821	static SSE_PCMPESTRM_T g_aSsePcmpestrm[] =
9822	{
9823	ENTRY_BIN_SSE_OPT(pcmpestrm_u128),
9824	};
9825
9826	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9827	DUMP_ALL_FN(SseComparePcmpestrm, g_aSsePcmpestrm)
9828	static RTEXITCODE SseComparePcmpestrmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9829	{
9830	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9831
9832	static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9833	{
9834	{ RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9835	/** @todo More specials. */
9836	};
9837
9838	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9839	{
9840	PFNIEMAIMPLPCMPESTRMU128IMM8 const pfn = g_aSsePcmpestrm[iFn].pfnNative ? g_aSsePcmpestrm[iFn].pfnNative : g_aSsePcmpestrm[iFn].pfn;
9841
9842	IEMBINARYOUTPUT BinOut;
9843	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpestrm[iFn]), RTEXITCODE_FAILURE);
9844
9845	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9846	{
9847	SSE_PCMPESTRM_TEST_T TestData; RT_ZERO(TestData);
9848
9849	TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9850	TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9851
9852	for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9853	for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9854	{
9855	TestData.u64Rax = (uint64_t)i64Rax;
9856	TestData.u64Rdx = (uint64_t)i64Rdx;
9857
9858	IEMPCMPESTRXSRC TestVal;
9859	TestVal.uSrc1 = TestData.InVal1.uXmm;
9860	TestVal.uSrc2 = TestData.InVal2.uXmm;
9861	TestVal.u64Rax = TestData.u64Rax;
9862	TestVal.u64Rdx = TestData.u64Rdx;
9863
9864	uint32_t const fEFlagsIn = RandEFlags();
9865	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9866	{
9867	uint32_t fEFlagsOut = fEFlagsIn;
9868	pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9869	TestData.fEFlagsIn = fEFlagsIn;
9870	TestData.fEFlagsOut = fEFlagsOut;
9871	TestData.bImm = (uint8_t)u16Imm;
9872	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9873	}
9874
9875	/* Repeat the test with the input value being the same. */
9876	TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9877	TestVal.uSrc1 = TestData.InVal1.uXmm;
9878	TestVal.uSrc2 = TestData.InVal2.uXmm;
9879
9880	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9881	{
9882	uint32_t fEFlagsOut = fEFlagsIn;
9883	pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9884	TestData.fEFlagsIn = fEFlagsIn;
9885	TestData.fEFlagsOut = fEFlagsOut;
9886	TestData.bImm = (uint8_t)u16Imm;
9887	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9888	}
9889	}
9890	}
9891	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9892	}
9893
9894	return RTEXITCODE_SUCCESS;
9895	}
9896	#endif
9897
9898	static void SseComparePcmpestrmTest(void)
9899	{
9900	X86FXSTATE State;
9901	RT_ZERO(State);
9902
9903	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9904	{
9905	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestrm[iFn]))
9906	continue;
9907
9908	SSE_PCMPESTRM_TEST_T const * const paTests = g_aSsePcmpestrm[iFn].paTests;
9909	uint32_t const cTests = g_aSsePcmpestrm[iFn].cTests;
9910	PFNIEMAIMPLPCMPESTRMU128IMM8 pfn = g_aSsePcmpestrm[iFn].pfn;
9911	uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestrm[iFn]);
9912	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9913	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9914	{
9915	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9916	{
9917	IEMPCMPESTRXSRC TestVal;
9918	TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9919	TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9920	TestVal.u64Rax = paTests[iTest].u64Rax;
9921	TestVal.u64Rdx = paTests[iTest].u64Rdx;
9922
9923	uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9924	RTUINT128U OutVal;
9925	pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9926	if ( fEFlags != paTests[iTest].fEFlagsOut
9927	\|\| OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9928	\|\| OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9929	RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9930	"%s -> efl=%#08x %s\n"
9931	"%s expected %#08x %s%s%s\n",
9932	iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9933	FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9934	FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9935	paTests[iTest].bImm,
9936	iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9937	iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9938	EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9939	( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9940	\|\| OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9941	}
9942	}
9943
9944	FREE_DECOMPRESSED_TESTS(g_aSsePcmpestrm[iFn]);
9945	}
9946	}
9947
9948
9949
9950	int main(int argc, char **argv)
9951	{
9952	int rc = RTR3InitExe(argc, &argv, 0);
9953	if (RT_FAILURE(rc))
9954	return RTMsgInitFailure(rc);
9955
9956	/*
9957	* Determin the host CPU.
9958	* If not using the IEMAllAImpl.asm code, this will be set to Intel.
9959	*/
9960	#if (defined(RT_ARCH_X86) \|\| defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
9961	g_idxCpuEflFlavour = ASMIsAmdCpu() \|\| ASMIsHygonCpu()
9962	? IEMTARGETCPU_EFL_BEHAVIOR_AMD
9963	: IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9964	#else
9965	g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9966	#endif
9967
9968	/*
9969	* Parse arguments.
9970	*/
9971	enum { kModeNotSet, kModeTest, kModeGenerate, kModeDump }
9972	enmMode = kModeNotSet;
9973	#define CATEGORY_INT RT_BIT_32(0)
9974	#define CATEGORY_FPU_LD_ST RT_BIT_32(1)
9975	#define CATEGORY_FPU_BINARY_1 RT_BIT_32(2)
9976	#define CATEGORY_FPU_BINARY_2 RT_BIT_32(3)
9977	#define CATEGORY_FPU_OTHER RT_BIT_32(4)
9978	#define CATEGORY_SSE_FP_BINARY RT_BIT_32(5)
9979	#define CATEGORY_SSE_FP_OTHER RT_BIT_32(6)
9980	#define CATEGORY_SSE_PCMPXSTRX RT_BIT_32(7)
9981	uint32_t fCategories = UINT32_MAX;
9982	bool fCpuData = true;
9983	bool fCommonData = true;
9984	uint32_t const cDefaultTests = 96;
9985	uint32_t cTests = cDefaultTests;
9986
9987	RTGETOPTDEF const s_aOptions[] =
9988	{
9989	// mode:
9990	{ "--generate", 'g', RTGETOPT_REQ_NOTHING },
9991	{ "--dump", 'G', RTGETOPT_REQ_NOTHING },
9992	{ "--test", 't', RTGETOPT_REQ_NOTHING },
9993	{ "--benchmark", 'b', RTGETOPT_REQ_NOTHING },
9994	// test selection (both)
9995	{ "--all", 'a', RTGETOPT_REQ_NOTHING },
9996	{ "--none", 'z', RTGETOPT_REQ_NOTHING },
9997	{ "--zap", 'z', RTGETOPT_REQ_NOTHING },
9998	{ "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
9999	{ "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
10000	{ "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
10001	{ "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
10002	{ "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
10003	{ "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
10004	{ "--sse-fp-other", 'T', RTGETOPT_REQ_NOTHING },
10005	{ "--sse-pcmpxstrx", 'C', RTGETOPT_REQ_NOTHING },
10006	{ "--int", 'i', RTGETOPT_REQ_NOTHING },
10007	{ "--include", 'I', RTGETOPT_REQ_STRING },
10008	{ "--exclude", 'X', RTGETOPT_REQ_STRING },
10009	// generation parameters
10010	{ "--common", 'm', RTGETOPT_REQ_NOTHING },
10011	{ "--cpu", 'c', RTGETOPT_REQ_NOTHING },
10012	{ "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
10013	{ "--verbose", 'v', RTGETOPT_REQ_NOTHING },
10014	{ "--quiet", 'q', RTGETOPT_REQ_NOTHING },
10015	{ "--quiet-skipping", 'Q', RTGETOPT_REQ_NOTHING },
10016	};
10017
10018	RTGETOPTSTATE State;
10019	rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
10020	AssertRCReturn(rc, RTEXITCODE_FAILURE);
10021
10022	RTGETOPTUNION ValueUnion;
10023	while ((rc = RTGetOpt(&State, &ValueUnion)))
10024	{
10025	switch (rc)
10026	{
10027	case 'g':
10028	enmMode = kModeGenerate;
10029	g_cPicoSecBenchmark = 0;
10030	break;
10031	case 'G':
10032	enmMode = kModeDump;
10033	g_cPicoSecBenchmark = 0;
10034	break;
10035	case 't':
10036	enmMode = kModeTest;
10037	g_cPicoSecBenchmark = 0;
10038	break;
10039	case 'b':
10040	enmMode = kModeTest;
10041	g_cPicoSecBenchmark += RT_NS_1SEC / 2 * UINT64_C(1000); /* half a second in pico seconds */
10042	break;
10043
10044	case 'a':
10045	fCpuData = true;
10046	fCommonData = true;
10047	fCategories = UINT32_MAX;
10048	break;
10049	case 'z':
10050	fCpuData = false;
10051	fCommonData = false;
10052	fCategories = 0;
10053	break;
10054
10055	case 'F':
10056	fCategories \|= CATEGORY_FPU_LD_ST;
10057	break;
10058	case 'O':
10059	fCategories \|= CATEGORY_FPU_OTHER;
10060	break;
10061	case 'B':
10062	fCategories \|= CATEGORY_FPU_BINARY_1;
10063	break;
10064	case 'P':
10065	fCategories \|= CATEGORY_FPU_BINARY_2;
10066	break;
10067	case 'S':
10068	fCategories \|= CATEGORY_SSE_FP_BINARY;
10069	break;
10070	case 'T':
10071	fCategories \|= CATEGORY_SSE_FP_OTHER;
10072	break;
10073	case 'C':
10074	fCategories \|= CATEGORY_SSE_PCMPXSTRX;
10075	break;
10076	case 'i':
10077	fCategories \|= CATEGORY_INT;
10078	break;
10079
10080	case 'I':
10081	if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
10082	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
10083	RT_ELEMENTS(g_apszIncludeTestPatterns));
10084	g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
10085	break;
10086	case 'X':
10087	if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
10088	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
10089	RT_ELEMENTS(g_apszExcludeTestPatterns));
10090	g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
10091	break;
10092
10093	case 'm':
10094	fCommonData = true;
10095	break;
10096	case 'c':
10097	fCpuData = true;
10098	break;
10099	case 'n':
10100	cTests = ValueUnion.u32;
10101	break;
10102
10103	case 'q':
10104	g_cVerbosity = 0;
10105	break;
10106	case 'v':
10107	g_cVerbosity++;
10108	break;
10109	case 'Q':
10110	g_fVerboseSkipping = false;
10111	break;
10112
10113	case 'h':
10114	RTPrintf("usage: %Rbn <-g\|-t> [options]\n"
10115	"\n"
10116	"Mode:\n"
10117	" -g, --generate\n"
10118	" Generate test data.\n"
10119	" -t, --test\n"
10120	" Execute tests.\n"
10121	" -b, --benchmark\n"
10122	" Execute tests and do 1/2 seconds of benchmarking.\n"
10123	" Repeating the option increases the benchmark duration by 0.5 seconds.\n"
10124	"\n"
10125	"Test selection (both modes):\n"
10126	" -a, --all\n"
10127	" Enable all tests and generated test data. (default)\n"
10128	" -z, --zap, --none\n"
10129	" Disable all tests and test data types.\n"
10130	" -i, --int\n"
10131	" Enable non-FPU tests.\n"
10132	" -F, --fpu-ld-st\n"
10133	" Enable FPU load and store tests.\n"
10134	" -B, --fpu-binary-1\n"
10135	" Enable FPU binary 80-bit FP tests.\n"
10136	" -P, --fpu-binary-2\n"
10137	" Enable FPU binary 64- and 32-bit FP tests.\n"
10138	" -O, --fpu-other\n"
10139	" Enable FPU binary 64- and 32-bit FP tests.\n"
10140	" -S, --sse-fp-binary\n"
10141	" Enable SSE binary 64- and 32-bit FP tests.\n"
10142	" -T, --sse-fp-other\n"
10143	" Enable misc SSE 64- and 32-bit FP tests.\n"
10144	" -C, --sse-pcmpxstrx\n"
10145	" Enable SSE pcmpxstrx tests.\n"
10146	" -I,--include=<test-patter>\n"
10147	" Enable tests matching the given pattern.\n"
10148	" -X,--exclude=<test-patter>\n"
10149	" Skip tests matching the given pattern (overrides --include).\n"
10150	"\n"
10151	"Generation:\n"
10152	" -m, --common\n"
10153	" Enable generating common test data.\n"
10154	" -c, --only-cpu\n"
10155	" Enable generating CPU specific test data.\n"
10156	" -n, --number-of-test <count>\n"
10157	" Number of tests to generate. Default: %u\n"
10158	"\n"
10159	"Other:\n"
10160	" -v, --verbose\n"
10161	" -q, --quiet\n"
10162	" Noise level. Default: --quiet\n"
10163	" -Q, --quiet-skipping\n"
10164	" Don't display skipped tests.\n"
10165	"\n"
10166	"Tip! When working on a single instruction, use the the -I and -Q options to\n"
10167	" restrict the testing: %Rbn -tiQI \"shr_*\"\n"
10168	, argv[0], cDefaultTests, argv[0]);
10169	return RTEXITCODE_SUCCESS;
10170	default:
10171	return RTGetOptPrintError(rc, &ValueUnion);
10172	}
10173	}
10174
10175	static const struct
10176	{
10177	uint32_t fCategory;
10178	void (*pfnTest)(void);
10179	#ifdef TSTIEMAIMPL_WITH_GENERATOR
10180	const char *pszFilenameFmt;
10181	RTEXITCODE (pfnGenerate)(uint32_t cTests, const char const *papszNameFmts);
10182	RTEXITCODE (pfnDumpAll)(const char const *papszNameFmts);
10183	uint32_t cMinTests;
10184	# define GROUP_ENTRY(a_fCategory, a_BaseNm, a_szFilenameFmt, a_cMinTests) \
10185	{ a_fCategory, a_BaseNm ## Test, a_szFilenameFmt, a_BaseNm ## Generate, a_BaseNm ## DumpAll, a_cMinTests }
10186	#else
10187	# define GROUP_ENTRY(a_fCategory, a_BaseNm, a_szFilenameFmt, a_cMinTests) \
10188	{ a_fCategory, a_BaseNm ## Test }
10189	#endif
10190	#define GROUP_ENTRY_MANUAL(a_fCategory, a_BaseNm) \
10191	{ a_fCategory, a_BaseNm ## Test }
10192	} s_aGroups[] =
10193	{
10194	GROUP_ENTRY(CATEGORY_INT, BinU8, "tstIEMAImplDataInt-%s.bin.gz", 0),
10195	GROUP_ENTRY(CATEGORY_INT, BinU16, "tstIEMAImplDataInt-%s.bin.gz", 0),
10196	GROUP_ENTRY(CATEGORY_INT, BinU32, "tstIEMAImplDataInt-%s.bin.gz", 0),
10197	GROUP_ENTRY(CATEGORY_INT, BinU64, "tstIEMAImplDataInt-%s.bin.gz", 0),
10198	GROUP_ENTRY(CATEGORY_INT, ShiftDbl, "tstIEMAImplDataInt-%s.bin.gz", 128),
10199	GROUP_ENTRY(CATEGORY_INT, Unary, "tstIEMAImplDataInt-%s.bin.gz", 0),
10200	GROUP_ENTRY(CATEGORY_INT, Shift, "tstIEMAImplDataInt-%s.bin.gz", 0),
10201	GROUP_ENTRY(CATEGORY_INT, MulDiv, "tstIEMAImplDataInt-%s.bin.gz", 0),
10202	GROUP_ENTRY_MANUAL(CATEGORY_INT, Xchg),
10203	GROUP_ENTRY_MANUAL(CATEGORY_INT, Xadd),
10204	GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg),
10205	GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg8b),
10206	GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg16b),
10207	GROUP_ENTRY_MANUAL(CATEGORY_INT, Bswap),
10208
10209	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdConst, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10210	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdInt, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10211	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdD80, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10212	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdMem, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 384), /* needs better coverage */
10213
10214	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStInt, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10215	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStD80, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10216	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStMem, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 384), /* needs better coverage */
10217
10218	GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10219	GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryFswR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10220	GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryEflR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10221
10222	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryR64, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10223	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryR32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10224	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryI32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10225	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryI16, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10226
10227	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswR64, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10228	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswR32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10229	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswI32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10230	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswI16, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10231
10232	GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10233	GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryFswR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10234	GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryTwoR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10235
10236	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10237	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10238	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryU128R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10239	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryU128R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10240
10241	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI32R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10242	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI64R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10243	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI32R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10244	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI64R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10245
10246	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64I32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10247	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64I64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10248	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32I32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10249	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32I64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10250
10251	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareEflR32R32, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10252	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareEflR64R64, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10253	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareF2XmmR32Imm8, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10254	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareF2XmmR64Imm8, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10255
10256	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmI32R32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10257	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR32I32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10258	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmI32R64, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10259	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR64I32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10260	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertMmXmm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10261	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR32Mm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10262	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR64Mm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10263	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertMmI32XmmR32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10264
10265	GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpistri, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10266	GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpistrm, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10267	GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpestri, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10268	GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpestrm, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10269	};
10270
10271	/*
10272	* Generate data?
10273	*/
10274	if (enmMode == kModeGenerate)
10275	{
10276	#ifdef TSTIEMAIMPL_WITH_GENERATOR
10277	if (cTests == 0)
10278	cTests = cDefaultTests;
10279	g_cZeroDstTests = RT_MIN(cTests / 16, 32);
10280	g_cZeroSrcTests = g_cZeroDstTests * 2;
10281
10282	RTMpGetDescription(NIL_RTCPUID, g_szCpuDesc, sizeof(g_szCpuDesc));
10283
10284	/* For the revision, use the highest for this file and VBoxRT. */
10285	static const char s_szRev[] = "$Revision: 104208 $";
10286	const char *pszRev = s_szRev;
10287	while (pszRev && !RT_C_IS_DIGIT(pszRev))
10288	pszRev++;
10289	g_uSvnRev = RTStrToUInt32(pszRev);
10290	g_uSvnRev = RT_MAX(g_uSvnRev, RTBldCfgRevision());
10291
10292	/* Loop thru the groups and call the generate for any that's enabled. */
10293	for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10294	if ((s_aGroups[i].fCategory & fCategories) && s_aGroups[i].pfnGenerate)
10295	{
10296	const char * const apszNameFmts[] =
10297	{
10298	/[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =/ fCommonData ? s_aGroups[i].pszFilenameFmt : NULL,
10299	/[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10300	/[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10301	};
10302	RTEXITCODE rcExit = s_aGroups[i].pfnGenerate(RT_MAX(cTests, s_aGroups[i].cMinTests), apszNameFmts);
10303	if (rcExit != RTEXITCODE_SUCCESS)
10304	return rcExit;
10305	}
10306	return RTEXITCODE_SUCCESS;
10307	#else
10308	return RTMsgErrorExitFailure("Test data generator not compiled in!");
10309	#endif
10310	}
10311
10312	/*
10313	* Dump tables (used for the conversion, mostly useless now).
10314	*/
10315	if (enmMode == kModeDump)
10316	{
10317	#ifdef TSTIEMAIMPL_WITH_GENERATOR
10318	/* Loop thru the groups and call the generate for any that's enabled. */
10319	for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10320	if ((s_aGroups[i].fCategory & fCategories) && s_aGroups[i].pfnDumpAll)
10321	{
10322	const char * const apszNameFmts[] =
10323	{
10324	/[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =/ fCommonData ? s_aGroups[i].pszFilenameFmt : NULL,
10325	/[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10326	/[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10327	};
10328	RTEXITCODE rcExit = s_aGroups[i].pfnGenerate(RT_MAX(cTests, s_aGroups[i].cMinTests), apszNameFmts);
10329	if (rcExit != RTEXITCODE_SUCCESS)
10330	return rcExit;
10331	}
10332	return RTEXITCODE_SUCCESS;
10333	#else
10334	return RTMsgErrorExitFailure("Test data generator not compiled in!");
10335	#endif
10336	}
10337
10338
10339	/*
10340	* Do testing. Currrently disabled by default as data needs to be checked
10341	* on both intel and AMD systems first.
10342	*/
10343	rc = RTTestCreate("tstIEMAImpl", &g_hTest);
10344	AssertRCReturn(rc, RTEXITCODE_FAILURE);
10345	if (enmMode == kModeTest)
10346	{
10347	RTTestBanner(g_hTest);
10348
10349	/* Allocate guarded memory for use in the tests. */
10350	#define ALLOC_GUARDED_VAR(a_puVar) do { \
10351	rc = RTTestGuardedAlloc(g_hTest, sizeof(a_puVar), sizeof(a_puVar), false /fHead/, (void **)&a_puVar); \
10352	if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
10353	} while (0)
10354	ALLOC_GUARDED_VAR(g_pu8);
10355	ALLOC_GUARDED_VAR(g_pu16);
10356	ALLOC_GUARDED_VAR(g_pu32);
10357	ALLOC_GUARDED_VAR(g_pu64);
10358	ALLOC_GUARDED_VAR(g_pu128);
10359	ALLOC_GUARDED_VAR(g_pu8Two);
10360	ALLOC_GUARDED_VAR(g_pu16Two);
10361	ALLOC_GUARDED_VAR(g_pu32Two);
10362	ALLOC_GUARDED_VAR(g_pu64Two);
10363	ALLOC_GUARDED_VAR(g_pu128Two);
10364	ALLOC_GUARDED_VAR(g_pfEfl);
10365	if (RTTestErrorCount(g_hTest) == 0)
10366	{
10367	/* Loop thru the groups and call test function for anything that's enabled. */
10368	for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10369	if ((s_aGroups[i].fCategory & fCategories))
10370	s_aGroups[i].pfnTest();
10371	}
10372	return RTTestSummaryAndDestroy(g_hTest);
10373	}
10374	return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
10375	}
10376

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 104208

Download in other formats: