tstIEMAImpl.cpp

Last change on this file was 106179, checked in by vboxsync, 2 months ago
VMM/IEM: Reworked the div, idiv, mul and imul assembly workers and how we raise division error exceptions. The latter is to simplify eflags management. bugref:10720
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 489.4 KB

Line
1	/* $Id: tstIEMAImpl.cpp 106179 2024-09-29 01:14:19Z vboxsync $ */
2	/** @file
3	* IEM Assembly Instruction Helper Testcase.
4	*/
5
6	/*
7	* Copyright (C) 2022-2024 Oracle and/or its affiliates.
8	*
9	* This file is part of VirtualBox base platform packages, as
10	* available from https://www.virtualbox.org.
11	*
12	* This program is free software; you can redistribute it and/or
13	* modify it under the terms of the GNU General Public License
14	* as published by the Free Software Foundation, in version 3 of the
15	* License.
16	*
17	* This program is distributed in the hope that it will be useful, but
18	* WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	* General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, see <https://www.gnu.org/licenses>.
24	*
25	* SPDX-License-Identifier: GPL-3.0-only
26	*/
27
28
29	/*********************************************************************************************************************************
30	* Header Files *
31	*********************************************************************************************************************************/
32	#include "../include/IEMInternal.h"
33
34	#include <iprt/errcore.h>
35	#include <VBox/log.h>
36	#include <iprt/assert.h>
37	#include <iprt/buildconfig.h>
38	#include <iprt/ctype.h>
39	#include <iprt/err.h>
40	#include <iprt/getopt.h>
41	#include <iprt/initterm.h>
42	#include <iprt/file.h>
43	#include <iprt/mem.h>
44	#include <iprt/message.h>
45	#include <iprt/mp.h>
46	#include <iprt/rand.h>
47	#include <iprt/stream.h>
48	#include <iprt/string.h>
49	#include <iprt/test.h>
50	#include <iprt/time.h>
51	#include <iprt/thread.h>
52	#include <iprt/vfs.h>
53	#include <iprt/zip.h>
54	#include <VBox/version.h>
55
56	#include "tstIEMAImpl.h"
57
58
59	/*********************************************************************************************************************************
60	* Defined Constants And Macros *
61	*********************************************************************************************************************************/
62	#define ENTRY_BIN_FIX(a_Name) ENTRY_BIN_FIX_EX(a_Name, 0)
63	#ifdef TSTIEMAIMPL_WITH_GENERATOR
64	# define ENTRY_BIN_FIX_EX(a_Name, a_uExtra) \
65	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
66	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
67	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, \
68	RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
69	#else
70	# define ENTRY_BIN_FIX_EX(a_Name, a_uExtra) ENTRY_BIN_EX(a_Name, a_uExtra)
71	#endif
72
73	#define ENTRY_BIN_PFN_CAST(a_Name, a_pfnType) ENTRY_BIN_PFN_CAST_EX(a_Name, a_pfnType, 0)
74	#define ENTRY_BIN_PFN_CAST_EX(a_Name, a_pfnType, a_uExtra) \
75	{ RT_XSTR(a_Name), (a_pfnType)iemAImpl_ ## a_Name, NULL, \
76	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
77	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
78
79	#define ENTRY_BIN(a_Name) ENTRY_BIN_EX(a_Name, 0)
80	#define ENTRY_BIN_EX(a_Name, a_uExtra) \
81	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
82	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
83	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
84
85	#define ENTRY_BIN_AVX(a_Name) ENTRY_BIN_AVX_EX(a_Name, 0)
86	#ifndef IEM_WITHOUT_ASSEMBLY
87	# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
88	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
89	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
90	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
91	#else
92	# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
93	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
94	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
95	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
96	#endif
97
98	#define ENTRY_BIN_SSE_OPT(a_Name) ENTRY_BIN_SSE_OPT_EX(a_Name, 0)
99	#ifndef IEM_WITHOUT_ASSEMBLY
100	# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
101	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
102	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
103	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
104	#else
105	# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
106	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
107	g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
108	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
109	#endif
110
111	#define ENTRY_BIN_INTEL(a_Name, a_fEflUndef) ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, 0)
112	#define ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
113	{ RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
114	g_abTests_ ## a_Name ## _intel, &g_cbTests_ ## a_Name ## _intel, \
115	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
116
117	#define ENTRY_BIN_AMD(a_Name, a_fEflUndef) ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, 0)
118	#define ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
119	{ RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
120	g_abTests_ ## a_Name ## _amd, &g_cbTests_ ## a_Name ## _amd, \
121	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
122
123	#define ENTRY_BIN_FIX_INTEL(a_Name, a_fEflUndef) ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, 0)
124	#ifdef TSTIEMAIMPL_WITH_GENERATOR
125	# define ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
126	{ RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
127	g_abTests_ ## a_Name ## _intel, &g_cbTests_ ## a_Name ## _intel, \
128	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL, \
129	RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
130	#else
131	# define ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, a_uExtra)
132	#endif
133
134	#define ENTRY_BIN_FIX_AMD(a_Name, a_fEflUndef) ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, 0)
135	#ifdef TSTIEMAIMPL_WITH_GENERATOR
136	# define ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
137	{ RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
138	g_abTests_ ## a_Name ## _amd, &g_cbTests_ ## a_Name ## _amd, \
139	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD, \
140	RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
141	#else
142	# define ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, a_uExtra) ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, a_uExtra)
143	#endif
144
145
146	#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
147	typedef struct a_TypeName \
148	{ \
149	const char *pszName; \
150	const a_FunctionPtrType pfn; \
151	const a_FunctionPtrType pfnNative; \
152	void const * const pvCompressedTests; \
153	uint32_t const *pcbCompressedTests; \
154	uint32_t const uExtra; \
155	uint8_t const idxCpuEflFlavour; \
156	uint16_t const cFixedTests; \
157	a_TestType const * const paFixedTests; \
158	a_TestType const paTests; /< The decompressed info. / \
159	uint32_t cTests; /*< The decompressed info. / \
160	IEMTESTENTRYINFO Info; \
161	} a_TypeName
162
163	#define COUNT_VARIATIONS(a_SubTest) \
164	(1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
165
166
167	/*********************************************************************************************************************************
168	* Structures and Typedefs *
169	*********************************************************************************************************************************/
170	typedef struct IEMBINARYHEADER
171	{
172	char szMagic[16];
173	uint32_t cbEntry;
174	uint32_t uSvnRev;
175	uint32_t auUnused[6];
176	char szCpuDesc[80];
177	} IEMBINARYHEADER;
178	AssertCompileSize(IEMBINARYHEADER, 128);
179
180	// 01234567890123456
181	#define IEMBINARYHEADER_MAGIC "IEMAImpl Bin v1"
182	AssertCompile(sizeof(IEMBINARYHEADER_MAGIC) == 16);
183
184
185	typedef struct IEMBINARYFOOTER
186	{
187	char szMagic[24];
188	uint32_t cbEntry;
189	uint32_t cEntries;
190	} IEMBINARYFOOTER;
191	AssertCompileSize(IEMBINARYFOOTER, 32);
192	// 012345678901234567890123
193	#define IEMBINARYFOOTER_MAGIC "\nIEMAImpl Bin Footer v1"
194	AssertCompile(sizeof(IEMBINARYFOOTER_MAGIC) == 24);
195
196
197	/** Fixed part of TYPEDEF_SUBTEST_TYPE and friends. */
198	typedef struct IEMTESTENTRYINFO
199	{
200	void *pvUncompressed;
201	uint32_t cbUncompressed;
202	const char *pszCpuDesc;
203	uint32_t uSvnRev;
204	} IEMTESTENTRYINFO;
205
206
207	#ifdef TSTIEMAIMPL_WITH_GENERATOR
208	typedef struct IEMBINARYOUTPUT
209	{
210	/** The output file. */
211	RTVFSFILE hVfsFile;
212	/** The stream we write uncompressed binary test data to. */
213	RTVFSIOSTREAM hVfsUncompressed;
214	/** The number of bytes written (ignoring write failures). */
215	size_t cbWritten;
216	/** The entry size. */
217	uint32_t cbEntry;
218	/** Write status. */
219	int rcWrite;
220	/** Set if NULL. */
221	bool fNull;
222	/** Set if we wrote a header and should write a footer as well. */
223	bool fWroteHeader;
224	/** Filename. */
225	char szFilename[94];
226	} IEMBINARYOUTPUT;
227	typedef IEMBINARYOUTPUT *PIEMBINARYOUTPUT;
228	#endif /* TSTIEMAIMPL_WITH_GENERATOR */
229
230
231	/*********************************************************************************************************************************
232	* Global Variables *
233	*********************************************************************************************************************************/
234	static RTTEST g_hTest;
235	static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
236	#ifdef TSTIEMAIMPL_WITH_GENERATOR
237	static uint32_t g_cZeroDstTests = 2;
238	static uint32_t g_cZeroSrcTests = 4;
239	#endif
240	static uint8_t g_pu8, g_pu8Two;
241	static uint16_t g_pu16, g_pu16Two;
242	static uint32_t g_pu32, g_pu32Two, *g_pfEfl;
243	static uint64_t g_pu64, g_pu64Two;
244	static RTUINT128U g_pu128, g_pu128Two;
245
246	static char g_aszBuf[32][256];
247	static unsigned g_idxBuf = 0;
248
249	static uint32_t g_cIncludeTestPatterns;
250	static uint32_t g_cExcludeTestPatterns;
251	static const char *g_apszIncludeTestPatterns[64];
252	static const char *g_apszExcludeTestPatterns[64];
253
254	/** Higher value, means longer benchmarking. */
255	static uint64_t g_cPicoSecBenchmark = 0;
256
257	static unsigned g_cVerbosity = 0;
258	static bool g_fVerboseSkipping = true;
259
260
261	#ifdef TSTIEMAIMPL_WITH_GENERATOR
262	/** The SVN revision (for use in the binary headers). */
263	static uint32_t g_uSvnRev = 0;
264	/** The CPU description (for use in the binary headers). */
265	static char g_szCpuDesc[80] = "";
266	#endif
267
268
269	/*********************************************************************************************************************************
270	* Internal Functions *
271	*********************************************************************************************************************************/
272	static const char *FormatR80(PCRTFLOAT80U pr80);
273	static const char *FormatR64(PCRTFLOAT64U pr64);
274	static const char *FormatR32(PCRTFLOAT32U pr32);
275
276
277	/*
278	* Random helpers.
279	*/
280
281	static uint32_t RandEFlags(void)
282	{
283	uint32_t fEfl = RTRandU32();
284	return (fEfl & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK;
285	}
286
287	#ifdef TSTIEMAIMPL_WITH_GENERATOR
288
289	static uint8_t RandU8(void)
290	{
291	return RTRandU32Ex(0, 0xff);
292	}
293
294
295	static uint16_t RandU16(void)
296	{
297	return RTRandU32Ex(0, 0xffff);
298	}
299
300
301	static uint32_t RandU32(void)
302	{
303	return RTRandU32();
304	}
305
306	#endif
307
308	static uint64_t RandU64(void)
309	{
310	return RTRandU64();
311	}
312
313
314	static RTUINT128U RandU128(void)
315	{
316	RTUINT128U Ret;
317	Ret.s.Hi = RTRandU64();
318	Ret.s.Lo = RTRandU64();
319	return Ret;
320	}
321
322	#ifdef TSTIEMAIMPL_WITH_GENERATOR
323
324	static uint8_t RandU8Dst(uint32_t iTest)
325	{
326	if (iTest < g_cZeroDstTests)
327	return 0;
328	return RandU8();
329	}
330
331
332	static uint8_t RandU8Src(uint32_t iTest)
333	{
334	if (iTest < g_cZeroSrcTests)
335	return 0;
336	return RandU8();
337	}
338
339
340	static uint16_t RandU16Dst(uint32_t iTest)
341	{
342	if (iTest < g_cZeroDstTests)
343	return 0;
344	return RandU16();
345	}
346
347
348	static uint16_t RandU16Src(uint32_t iTest)
349	{
350	if (iTest < g_cZeroSrcTests)
351	return 0;
352	return RandU16();
353	}
354
355
356	static uint32_t RandU32Dst(uint32_t iTest)
357	{
358	if (iTest < g_cZeroDstTests)
359	return 0;
360	return RandU32();
361	}
362
363
364	static uint32_t RandU32Src(uint32_t iTest)
365	{
366	if (iTest < g_cZeroSrcTests)
367	return 0;
368	return RandU32();
369	}
370
371
372	static uint64_t RandU64Dst(uint32_t iTest)
373	{
374	if (iTest < g_cZeroDstTests)
375	return 0;
376	return RandU64();
377	}
378
379
380	static uint64_t RandU64Src(uint32_t iTest)
381	{
382	if (iTest < g_cZeroSrcTests)
383	return 0;
384	return RandU64();
385	}
386
387
388	/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
389	static int16_t RandI16Src2(uint32_t iTest)
390	{
391	if (iTest < 18 * 4)
392	switch (iTest % 4)
393	{
394	case 0: return 0;
395	case 1: return INT16_MAX;
396	case 2: return INT16_MIN;
397	case 3: break;
398	}
399	return (int16_t)RandU16();
400	}
401
402
403	/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
404	static int32_t RandI32Src2(uint32_t iTest)
405	{
406	if (iTest < 18 * 4)
407	switch (iTest % 4)
408	{
409	case 0: return 0;
410	case 1: return INT32_MAX;
411	case 2: return INT32_MIN;
412	case 3: break;
413	}
414	return (int32_t)RandU32();
415	}
416
417
418	static int64_t RandI64Src(uint32_t iTest)
419	{
420	RT_NOREF(iTest);
421	return (int64_t)RandU64();
422	}
423
424
425	static uint16_t RandFcw(void)
426	{
427	return RandU16() & ~X86_FCW_ZERO_MASK;
428	}
429
430
431	static uint16_t RandFsw(void)
432	{
433	AssertCompile((X86_FSW_C_MASK \| X86_FSW_XCPT_ES_MASK \| X86_FSW_TOP_MASK \| X86_FSW_B) == 0xffff);
434	return RandU16();
435	}
436
437
438	static uint32_t RandMxcsr(void)
439	{
440	return RandU32() & ~X86_MXCSR_ZERO_MASK;
441	}
442
443
444	static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
445	{
446	if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
447	pr80->sj64.uFraction >>= cShift;
448	else
449	pr80->sj64.uFraction = (cShift % 19) + 1;
450	}
451
452
453
454	static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
455	{
456	Assert(cTarget == (!fIntTarget ? 80U : 16U) \|\| cTarget == 64U \|\| cTarget == 32U \|\| (cTarget == 59U && fIntTarget));
457
458	RTFLOAT80U r80;
459	r80.au64[0] = RandU64();
460	r80.au16[4] = RandU16();
461
462	/*
463	* Adjust the random stuff according to bType.
464	*/
465	bType &= 0x1f;
466	if (bType == 0 \|\| bType == 1 \|\| bType == 2 \|\| bType == 3)
467	{
468	/* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
469	r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
470	r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
471	r80.sj64.fInteger = bType >= 2 ? 1 : 0;
472	AssertMsg(bType != 0 \|\| RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
473	AssertMsg(bType != 1 \|\| RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
474	Assert( bType != 1 \|\| RTFLOAT80U_IS_387_INVALID(&r80));
475	AssertMsg(bType != 2 \|\| RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
476	AssertMsg(bType != 3 \|\| RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
477	}
478	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
479	{
480	/* Denormals (4,5) and Pseudo denormals (6,7) */
481	if (bType & 1)
482	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
483	else if (r80.sj64.uFraction == 0 && bType < 6)
484	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
485	r80.sj64.uExponent = 0;
486	r80.sj64.fInteger = bType >= 6;
487	AssertMsg(bType >= 6 \|\| RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
488	AssertMsg(bType < 6 \|\| RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
489	}
490	else if (bType == 8 \|\| bType == 9)
491	{
492	/* Pseudo NaN. */
493	if (bType & 1)
494	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
495	else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
496	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
497	r80.sj64.uExponent = 0x7fff;
498	if (r80.sj64.fInteger)
499	r80.sj64.uFraction \|= RT_BIT_64(62);
500	else
501	r80.sj64.uFraction &= ~RT_BIT_64(62);
502	r80.sj64.fInteger = 0;
503	AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
504	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
505	Assert(RTFLOAT80U_IS_387_INVALID(&r80));
506	}
507	else if (bType == 10 \|\| bType == 11 \|\| bType == 12 \|\| bType == 13)
508	{
509	/* Quiet and signalling NaNs. */
510	if (bType & 1)
511	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
512	else if (r80.sj64.uFraction == 0)
513	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
514	r80.sj64.uExponent = 0x7fff;
515	if (bType < 12)
516	r80.sj64.uFraction \|= RT_BIT_64(62); /* quiet */
517	else
518	r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
519	r80.sj64.fInteger = 1;
520	AssertMsg(bType >= 12 \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
521	AssertMsg(bType < 12 \|\| RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
522	AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
523	AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
524	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
525	}
526	else if (bType == 14 \|\| bType == 15)
527	{
528	/* Unnormals */
529	if (bType & 1)
530	SafeR80FractionShift(&r80, RandU8() % 62);
531	r80.sj64.fInteger = 0;
532	if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX \|\| r80.sj64.uExponent == 0)
533	r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
534	AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
535	Assert(RTFLOAT80U_IS_387_INVALID(&r80));
536	}
537	else if (bType < 26)
538	{
539	/* Make sure we have lots of normalized values. */
540	if (!fIntTarget)
541	{
542	const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
543	: cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
544	const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
545	: cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
546	r80.sj64.fInteger = 1;
547	if (r80.sj64.uExponent <= uMinExp)
548	r80.sj64.uExponent = uMinExp + 1;
549	else if (r80.sj64.uExponent >= uMaxExp)
550	r80.sj64.uExponent = uMaxExp - 1;
551
552	if (bType == 16)
553	{ /* All 1s is useful to testing rounding. Also try trigger special
554	behaviour by sometimes rounding out of range, while we're at it. */
555	r80.sj64.uFraction = RT_BIT_64(63) - 1;
556	uint8_t bExp = RandU8();
557	if ((bExp & 3) == 0)
558	r80.sj64.uExponent = uMaxExp - 1;
559	else if ((bExp & 3) == 1)
560	r80.sj64.uExponent = uMinExp + 1;
561	else if ((bExp & 3) == 2)
562	r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
563	}
564	}
565	else
566	{
567	/* integer target: */
568	const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
569	const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
570	r80.sj64.fInteger = 1;
571	if (r80.sj64.uExponent < uMinExp)
572	r80.sj64.uExponent = uMinExp;
573	else if (r80.sj64.uExponent > uMaxExp)
574	r80.sj64.uExponent = uMaxExp;
575
576	if (bType == 16)
577	{ /* All 1s is useful to testing rounding. Also try trigger special
578	behaviour by sometimes rounding out of range, while we're at it. */
579	r80.sj64.uFraction = RT_BIT_64(63) - 1;
580	uint8_t bExp = RandU8();
581	if ((bExp & 3) == 0)
582	r80.sj64.uExponent = uMaxExp;
583	else if ((bExp & 3) == 1)
584	r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
585	}
586	}
587
588	AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
589	}
590	return r80;
591	}
592
593
594	static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
595	{
596	/*
597	* Make it more likely that we get a good selection of special values.
598	*/
599	return RandR80Ex(RandU8(), cTarget, fIntTarget);
600
601	}
602
603
604	static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
605	{
606	/* Make sure we cover all the basic types first before going for random selection: */
607	if (iTest <= 18)
608	return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
609	return RandR80(cTarget, fIntTarget);
610	}
611
612
613	/**
614	* Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
615	* to a 0..17, covering all basic value types.
616	*/
617	static uint8_t RandR80Src12RemapType(uint8_t bType)
618	{
619	switch (bType)
620	{
621	case 0: return 18; /* normal */
622	case 1: return 16; /* normal extreme rounding */
623	case 2: return 14; /* unnormal */
624	case 3: return 12; /* Signalling NaN */
625	case 4: return 10; /* Quiet NaN */
626	case 5: return 8; /* PseudoNaN */
627	case 6: return 6; /* Pseudo Denormal */
628	case 7: return 4; /* Denormal */
629	case 8: return 3; /* Indefinite */
630	case 9: return 2; /* Infinity */
631	case 10: return 1; /* Pseudo-Infinity */
632	case 11: return 0; /* Zero */
633	default: AssertFailedReturn(18);
634	}
635	}
636
637
638	/**
639	* This works in tandem with RandR80Src2 to make sure we cover all operand
640	* type mixes first before we venture into regular random testing.
641	*
642	* There are 11 basic variations, when we leave out the five odd ones using
643	* SafeR80FractionShift. Because of the special normalized value targetting at
644	* rounding, we make it an even 12. So 144 combinations for two operands.
645	*/
646	static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
647	{
648	if (cPartnerBits == 80)
649	{
650	Assert(!fPartnerInt);
651	if (iTest < 12 * 12)
652	return RandR80Ex(RandR80Src12RemapType(iTest / 12));
653	}
654	else if ((cPartnerBits == 64 \|\| cPartnerBits == 32) && !fPartnerInt)
655	{
656	if (iTest < 12 * 10)
657	return RandR80Ex(RandR80Src12RemapType(iTest / 10));
658	}
659	else if (iTest < 18 * 4 && fPartnerInt)
660	return RandR80Ex(iTest / 4);
661	return RandR80();
662	}
663
664
665	/** Partner to RandR80Src1. */
666	static RTFLOAT80U RandR80Src2(uint32_t iTest)
667	{
668	if (iTest < 12 * 12)
669	return RandR80Ex(RandR80Src12RemapType(iTest % 12));
670	return RandR80();
671	}
672
673
674	static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
675	{
676	if (pr64->s64.uFraction >= RT_BIT_64(cShift))
677	pr64->s64.uFraction >>= cShift;
678	else
679	pr64->s64.uFraction = (cShift % 19) + 1;
680	}
681
682
683	static RTFLOAT64U RandR64Ex(uint8_t bType)
684	{
685	RTFLOAT64U r64;
686	r64.u = RandU64();
687
688	/*
689	* Make it more likely that we get a good selection of special values.
690	* On average 6 out of 16 calls should return a special value.
691	*/
692	bType &= 0xf;
693	if (bType == 0 \|\| bType == 1)
694	{
695	/* 0 or Infinity. We only keep fSign here. */
696	r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
697	r64.s.uFractionHigh = 0;
698	r64.s.uFractionLow = 0;
699	AssertMsg(bType != 0 \|\| RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
700	AssertMsg(bType != 1 \|\| RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
701	}
702	else if (bType == 2 \|\| bType == 3)
703	{
704	/* Subnormals */
705	if (bType == 3)
706	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
707	else if (r64.s64.uFraction == 0)
708	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
709	r64.s64.uExponent = 0;
710	AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
711	}
712	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
713	{
714	/* NaNs */
715	if (bType & 1)
716	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
717	else if (r64.s64.uFraction == 0)
718	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
719	r64.s64.uExponent = 0x7ff;
720	if (bType < 6)
721	r64.s64.uFraction \|= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
722	else
723	r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
724	AssertMsg(bType >= 6 \|\| RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
725	AssertMsg(bType < 6 \|\| RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
726	AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
727	}
728	else if (bType < 12)
729	{
730	/* Make sure we have lots of normalized values. */
731	if (r64.s.uExponent == 0)
732	r64.s.uExponent = 1;
733	else if (r64.s.uExponent == 0x7ff)
734	r64.s.uExponent = 0x7fe;
735	AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
736	}
737	return r64;
738	}
739
740
741	static RTFLOAT64U RandR64Src(uint32_t iTest)
742	{
743	if (iTest < 16)
744	return RandR64Ex(iTest);
745	return RandR64Ex(RandU8());
746	}
747
748
749	/** Pairing with a 80-bit floating point arg. */
750	static RTFLOAT64U RandR64Src2(uint32_t iTest)
751	{
752	if (iTest < 12 * 10)
753	return RandR64Ex(9 - iTest % 10); /* start with normal values */
754	return RandR64Ex(RandU8());
755	}
756
757
758	static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
759	{
760	if (pr32->s.uFraction >= RT_BIT_32(cShift))
761	pr32->s.uFraction >>= cShift;
762	else
763	pr32->s.uFraction = (cShift % 19) + 1;
764	}
765
766
767	static RTFLOAT32U RandR32Ex(uint8_t bType)
768	{
769	RTFLOAT32U r32;
770	r32.u = RandU32();
771
772	/*
773	* Make it more likely that we get a good selection of special values.
774	* On average 6 out of 16 calls should return a special value.
775	*/
776	bType &= 0xf;
777	if (bType == 0 \|\| bType == 1)
778	{
779	/* 0 or Infinity. We only keep fSign here. */
780	r32.s.uExponent = bType == 0 ? 0 : 0xff;
781	r32.s.uFraction = 0;
782	AssertMsg(bType != 0 \|\| RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
783	AssertMsg(bType != 1 \|\| RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
784	}
785	else if (bType == 2 \|\| bType == 3)
786	{
787	/* Subnormals */
788	if (bType == 3)
789	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
790	else if (r32.s.uFraction == 0)
791	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
792	r32.s.uExponent = 0;
793	AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
794	}
795	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
796	{
797	/* NaNs */
798	if (bType & 1)
799	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
800	else if (r32.s.uFraction == 0)
801	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
802	r32.s.uExponent = 0xff;
803	if (bType < 6)
804	r32.s.uFraction \|= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
805	else
806	r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
807	AssertMsg(bType >= 6 \|\| RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
808	AssertMsg(bType < 6 \|\| RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
809	AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
810	}
811	else if (bType < 12)
812	{
813	/* Make sure we have lots of normalized values. */
814	if (r32.s.uExponent == 0)
815	r32.s.uExponent = 1;
816	else if (r32.s.uExponent == 0xff)
817	r32.s.uExponent = 0xfe;
818	AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
819	}
820	return r32;
821	}
822
823
824	static RTFLOAT32U RandR32Src(uint32_t iTest)
825	{
826	if (iTest < 16)
827	return RandR32Ex(iTest);
828	return RandR32Ex(RandU8());
829	}
830
831
832	/** Pairing with a 80-bit floating point arg. */
833	static RTFLOAT32U RandR32Src2(uint32_t iTest)
834	{
835	if (iTest < 12 * 10)
836	return RandR32Ex(9 - iTest % 10); /* start with normal values */
837	return RandR32Ex(RandU8());
838	}
839
840
841	static RTPBCD80U RandD80Src(uint32_t iTest)
842	{
843	if (iTest < 3)
844	{
845	RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
846	return d80Zero;
847	}
848	if (iTest < 5)
849	{
850	RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
851	return d80Ind;
852	}
853
854	RTPBCD80U d80;
855	uint8_t b = RandU8();
856	d80.s.fSign = b & 1;
857
858	if ((iTest & 7) >= 6)
859	{
860	/* Illegal */
861	d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
862	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
863	d80.s.abPairs[iPair] = RandU8();
864	}
865	else
866	{
867	/* Normal */
868	d80.s.uPad = 0;
869	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
870	{
871	uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
872	uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
873	d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
874	}
875	}
876	return d80;
877	}
878
879	# if 0 /* unused */
880
881	static const char *GenFormatR80(PCRTFLOAT80U plrd)
882	{
883	if (RTFLOAT80U_IS_ZERO(plrd))
884	return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
885	if (RTFLOAT80U_IS_INF(plrd))
886	return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
887	if (RTFLOAT80U_IS_INDEFINITE(plrd))
888	return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
889	if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
890	return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
891	if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
892	return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
893
894	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
895	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
896	plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
897	return pszBuf;
898	}
899
900	static const char *GenFormatR64(PCRTFLOAT64U prd)
901	{
902	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
903	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
904	prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
905	return pszBuf;
906	}
907
908
909	static const char *GenFormatR32(PCRTFLOAT32U pr)
910	{
911	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
912	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
913	return pszBuf;
914	}
915
916
917	static const char *GenFormatD80(PCRTPBCD80U pd80)
918	{
919	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
920	size_t off;
921	if (pd80->s.uPad == 0)
922	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
923	else
924	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
925	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
926	while (iPair-- > 0)
927	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
928	RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
929	RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
930	pszBuf[off++] = ')';
931	pszBuf[off++] = '\0';
932	return pszBuf;
933	}
934
935
936	static const char *GenFormatI64(int64_t i64)
937	{
938	if (i64 == INT64_MIN) /* This one is problematic */
939	return "INT64_MIN";
940	if (i64 == INT64_MAX)
941	return "INT64_MAX";
942	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
943	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
944	return pszBuf;
945	}
946
947	# if 0 /* unused */
948	static const char GenFormatI64(int64_t const pi64)
949	{
950	return GenFormatI64(*pi64);
951	}
952	# endif
953
954	static const char *GenFormatI32(int32_t i32)
955	{
956	if (i32 == INT32_MIN) /* This one is problematic */
957	return "INT32_MIN";
958	if (i32 == INT32_MAX)
959	return "INT32_MAX";
960	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
961	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
962	return pszBuf;
963	}
964
965
966	const char GenFormatI32(int32_t const pi32)
967	{
968	return GenFormatI32(*pi32);
969	}
970
971
972	const char *GenFormatI16(int16_t i16)
973	{
974	if (i16 == INT16_MIN) /* This one is problematic */
975	return "INT16_MIN";
976	if (i16 == INT16_MAX)
977	return "INT16_MAX";
978	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
979	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
980	return pszBuf;
981	}
982
983
984	const char GenFormatI16(int16_t const pi16)
985	{
986	return GenFormatI16(*pi16);
987	}
988
989
990	static void GenerateHeader(PRTSTREAM pOut, const char pszCpuDesc, const char pszCpuType)
991	{
992	/* We want to tag the generated source code with the revision that produced it. */
993	static char s_szRev[] = "$Revision: 106179 $";
994	const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
995	size_t cchRev = 0;
996	while (RT_C_IS_DIGIT(pszRev[cchRev]))
997	cchRev++;
998
999	RTStrmPrintf(pOut,
1000	"/* $Id: tstIEMAImpl.cpp 106179 2024-09-29 01:14:19Z vboxsync $ */\n"
1001	"/** @file\n"
1002	" * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
1003	" */\n"
1004	"\n"
1005	"/*\n"
1006	" * Copyright (C) 2022-" VBOX_C_YEAR " Oracle and/or its affiliates.\n"
1007	" *\n"
1008	" * This file is part of VirtualBox base platform packages, as\n"
1009	" * available from https://www.virtualbox.org.\n"
1010	" *\n"
1011	" * This program is free software; you can redistribute it and/or\n"
1012	" * modify it under the terms of the GNU General Public License\n"
1013	" * as published by the Free Software Foundation, in version 3 of the\n"
1014	" * License.\n"
1015	" *\n"
1016	" * This program is distributed in the hope that it will be useful, but\n"
1017	" * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
1018	" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
1019	" * General Public License for more details.\n"
1020	" *\n"
1021	" * You should have received a copy of the GNU General Public License\n"
1022	" * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
1023	" *\n"
1024	" * SPDX-License-Identifier: GPL-3.0-only\n"
1025	" */\n"
1026	"\n"
1027	"#include \"tstIEMAImpl.h\"\n"
1028	"\n"
1029	,
1030	pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
1031	}
1032
1033
1034	static PRTSTREAM GenerateOpenWithHdr(const char pszFilename, const char pszCpuDesc, const char *pszCpuType)
1035	{
1036	PRTSTREAM pOut = NULL;
1037	int rc = RTStrmOpen(pszFilename, "w", &pOut);
1038	if (RT_SUCCESS(rc))
1039	{
1040	GenerateHeader(pOut, pszCpuDesc, pszCpuType);
1041	return pOut;
1042	}
1043	RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
1044	return NULL;
1045	}
1046
1047
1048	static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
1049	{
1050	RTStrmPrintf(pOut,
1051	"\n"
1052	"/* end of file */\n");
1053	int rc = RTStrmClose(pOut);
1054	if (RT_SUCCESS(rc))
1055	return rcExit;
1056	return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
1057	}
1058
1059
1060	static void GenerateArrayStart(PRTSTREAM pOut, const char pszName, const char pszType)
1061	{
1062	RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
1063	}
1064
1065
1066	static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
1067	{
1068	RTStrmPrintf(pOut,
1069	"};\n"
1070	"uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
1071	"\n",
1072	pszName, pszName);
1073	}
1074
1075	# endif /* unused */
1076
1077	static void GenerateBinaryWrite(PIEMBINARYOUTPUT pBinOut, const void *pvData, size_t cbData)
1078	{
1079	pBinOut->cbWritten += cbData; /* ignore errors - makes entry calculation simpler */
1080	if (RT_SUCCESS_NP(pBinOut->rcWrite))
1081	{
1082	pBinOut->rcWrite = RTVfsIoStrmWrite(pBinOut->hVfsUncompressed, pvData, cbData, true /fBlocking/, NULL);
1083	if (RT_SUCCESS(pBinOut->rcWrite))
1084	return;
1085	RTMsgError("Error writing '%s': %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1086	}
1087	}
1088
1089	static bool GenerateBinaryOpen(PIEMBINARYOUTPUT pBinOut, const char pszFilenameFmt, const char pszName,
1090	IEMTESTENTRYINFO const *pInfoToPreserve, uint32_t cbEntry)
1091	{
1092	pBinOut->cbEntry = cbEntry;
1093	pBinOut->cbWritten = 0;
1094	pBinOut->hVfsFile = NIL_RTVFSFILE;
1095	pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1096	if (pszFilenameFmt)
1097	{
1098	pBinOut->fNull = false;
1099	if (RTStrPrintf2(pBinOut->szFilename, sizeof(pBinOut->szFilename), pszFilenameFmt, pszName) > 0)
1100	{
1101	RTMsgInfo("GenerateBinaryOpen: %s...\n", pBinOut->szFilename);
1102	pBinOut->rcWrite = RTVfsFileOpenNormal(pBinOut->szFilename,
1103	RTFILE_O_CREATE_REPLACE \| RTFILE_O_WRITE \| RTFILE_O_DENY_READWRITE,
1104	&pBinOut->hVfsFile);
1105	if (RT_SUCCESS(pBinOut->rcWrite))
1106	{
1107	RTVFSIOSTREAM hVfsIoFile = RTVfsFileToIoStream(pBinOut->hVfsFile);
1108	if (hVfsIoFile != NIL_RTVFSIOSTREAM)
1109	{
1110	pBinOut->rcWrite = RTZipGzipCompressIoStream(hVfsIoFile, 0 /fFlags/, 9, &pBinOut->hVfsUncompressed);
1111	RTVfsIoStrmRelease(hVfsIoFile);
1112	if (RT_SUCCESS(pBinOut->rcWrite))
1113	{
1114	pBinOut->rcWrite = VINF_SUCCESS;
1115	pBinOut->fWroteHeader = false;
1116
1117	/* Write the header if applicable. */
1118	if ( !pInfoToPreserve
1119	\|\| (pInfoToPreserve->uSvnRev != 0 && *pInfoToPreserve->pszCpuDesc))
1120	{
1121	IEMBINARYHEADER Hdr;
1122	RT_ZERO(Hdr);
1123	memcpy(Hdr.szMagic, IEMBINARYHEADER_MAGIC, sizeof(IEMBINARYHEADER_MAGIC));
1124	Hdr.cbEntry = cbEntry;
1125	Hdr.uSvnRev = pInfoToPreserve ? pInfoToPreserve->uSvnRev : g_uSvnRev;
1126	RTStrCopy(Hdr.szCpuDesc, sizeof(Hdr.szCpuDesc),
1127	pInfoToPreserve ? pInfoToPreserve->pszCpuDesc : g_szCpuDesc);
1128	GenerateBinaryWrite(pBinOut, &Hdr, sizeof(Hdr));
1129	pBinOut->fWroteHeader = true;
1130	}
1131
1132	return true;
1133	}
1134
1135	RTMsgError("RTZipGzipCompressIoStream: %Rrc", pBinOut->rcWrite);
1136	}
1137	else
1138	{
1139	RTMsgError("RTVfsFileToIoStream failed!");
1140	pBinOut->rcWrite = VERR_VFS_CHAIN_CAST_FAILED;
1141	}
1142	RTVfsFileRelease(pBinOut->hVfsFile);
1143	RTFileDelete(pBinOut->szFilename);
1144	}
1145	else
1146	RTMsgError("Failed to open '%s' for writing: %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1147	}
1148	else
1149	{
1150	RTMsgError("filename too long: %s + %s", pszFilenameFmt, pszName);
1151	pBinOut->rcWrite = VERR_BUFFER_OVERFLOW;
1152	}
1153	return false;
1154	}
1155	RTMsgInfo("GenerateBinaryOpen: %s -> /dev/null\n", pszName);
1156	pBinOut->rcWrite = VERR_IGNORED;
1157	pBinOut->fNull = true;
1158	pBinOut->fWroteHeader = false;
1159	pBinOut->szFilename[0] = '\0';
1160	return true;
1161	}
1162
1163	# define GENERATE_BINARY_OPEN(a_pBinOut, a_papszNameFmts, a_Entry) \
1164	GenerateBinaryOpen((a_pBinOut), a_papszNameFmts[(a_Entry).idxCpuEflFlavour], (a_Entry).pszName, \
1165	NULL /pInfo/, sizeof((a_Entry).paTests[0]))
1166
1167	static bool GenerateBinaryClose(PIEMBINARYOUTPUT pBinOut)
1168	{
1169	if (!pBinOut->fNull)
1170	{
1171	/* Write footer if we've written a header. */
1172	if (pBinOut->fWroteHeader)
1173	{
1174	IEMBINARYFOOTER Ftr;
1175	RT_ZERO(Ftr);
1176	memcpy(Ftr.szMagic, IEMBINARYFOOTER_MAGIC, sizeof(IEMBINARYFOOTER_MAGIC));
1177	Ftr.cbEntry = pBinOut->cbEntry;
1178	Ftr.cEntries = (uint32_t)((pBinOut->cbWritten - sizeof(IEMBINARYHEADER)) / pBinOut->cbEntry);
1179	Assert(Ftr.cEntries * pBinOut->cbEntry + sizeof(IEMBINARYHEADER) == pBinOut->cbWritten);
1180	GenerateBinaryWrite(pBinOut, &Ftr, sizeof(Ftr));
1181	}
1182
1183	/* This is rather jovial about rcWrite. */
1184	int const rc1 = RTVfsIoStrmFlush(pBinOut->hVfsUncompressed);
1185	RTVfsIoStrmRelease(pBinOut->hVfsUncompressed);
1186	pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1187	if (RT_FAILURE(rc1))
1188	RTMsgError("Error flushing '%s' (uncompressed stream): %Rrc", pBinOut->szFilename, rc1);
1189
1190	int const rc2 = RTVfsFileFlush(pBinOut->hVfsFile);
1191	RTVfsFileRelease(pBinOut->hVfsFile);
1192	pBinOut->hVfsFile = NIL_RTVFSFILE;
1193	if (RT_FAILURE(rc2))
1194	RTMsgError("Error flushing '%s' (compressed file): %Rrc", pBinOut->szFilename, rc2);
1195
1196	return RT_SUCCESS(rc2) && RT_SUCCESS(rc1) && RT_SUCCESS(pBinOut->rcWrite);
1197	}
1198	return true;
1199	}
1200
1201	/* Helper for DumpAll. */
1202	# define DUMP_ALL_FN(a_FnBaseName, a_aSubTests) \
1203	static RTEXITCODE a_FnBaseName ## DumpAll(const char * const * papszNameFmts) \
1204	{ \
1205	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1206	{ \
1207	AssertReturn(DECOMPRESS_TESTS(a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
1208	IEMBINARYOUTPUT BinOut; \
1209	AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[a_aSubTests[iFn].idxCpuEflFlavour], \
1210	a_aSubTests[iFn].pszName, &a_aSubTests[iFn].Info, \
1211	sizeof(a_aSubTests[iFn].paTests[0])), \
1212	RTEXITCODE_FAILURE); \
1213	GenerateBinaryWrite(&BinOut, a_aSubTests[iFn].paTests, a_aSubTests[iFn].cTests); \
1214	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
1215	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1216	} \
1217	return RTEXITCODE_SUCCESS; \
1218	}
1219	#endif /* TSTIEMAIMPL_WITH_GENERATOR */
1220
1221
1222	/*
1223	* Test helpers.
1224	*/
1225	static bool IsTestEnabled(const char *pszName)
1226	{
1227	/* Process excludes first: */
1228	uint32_t i = g_cExcludeTestPatterns;
1229	while (i-- > 0)
1230	if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1231	return false;
1232
1233	/* If no include patterns, everything is included: */
1234	i = g_cIncludeTestPatterns;
1235	if (!i)
1236	return true;
1237
1238	/* Otherwise only tests in the include patters gets tested: */
1239	while (i-- > 0)
1240	if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1241	return true;
1242
1243	return false;
1244	}
1245
1246
1247	static bool SubTestAndCheckIfEnabled(const char *pszName)
1248	{
1249	bool const fEnabled = IsTestEnabled(pszName);
1250	if (g_fVerboseSkipping \|\| fEnabled)
1251	{
1252	RTTestSub(g_hTest, pszName);
1253	if (fEnabled)
1254	return true;
1255	RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
1256	}
1257	return false;
1258	}
1259
1260
1261	/** Decompresses test data before use as required. */
1262	static int DecompressBinaryTest(void const pvCompressed, uint32_t cbCompressed, size_t cbEntry, const char pszWhat,
1263	void *ppvTests, uint32_t pcTests, IEMTESTENTRYINFO *pInfo)
1264	{
1265	/* Don't do it again. */
1266	if (pInfo->pvUncompressed && *ppvTests)
1267	return VINF_SUCCESS;
1268
1269	/* Open a memory stream for the compressed binary data. */
1270	RTVFSIOSTREAM hVfsIos = NIL_RTVFSIOSTREAM;
1271	int rc = RTVfsIoStrmFromBuffer(RTFILE_O_READ, pvCompressed, cbCompressed, &hVfsIos);
1272	RTTESTI_CHECK_RC_OK_RET(rc, rc);
1273
1274	/* Open a decompressed stream for it. */
1275	RTVFSIOSTREAM hVfsIosDecomp = NIL_RTVFSIOSTREAM;
1276	rc = RTZipGzipDecompressIoStream(hVfsIos, RTZIPGZIPDECOMP_F_ALLOW_ZLIB_HDR, &hVfsIosDecomp);
1277	RTTESTI_CHECK_RC_OK(rc);
1278	if (RT_SUCCESS(rc))
1279	{
1280	/* Initial output buffer allocation. */
1281	size_t cbDecompressedAlloc = cbCompressed <= _16M ? (size_t)cbCompressed * 16 : (size_t)cbCompressed * 4;
1282	uint8_t pbDecompressed = (uint8_t )RTMemAllocZ(cbDecompressedAlloc);
1283	if (pbDecompressed)
1284	{
1285	size_t off = 0;
1286	for (;;)
1287	{
1288	size_t cbRead = 0;
1289	rc = RTVfsIoStrmRead(hVfsIosDecomp, &pbDecompressed[off], cbDecompressedAlloc - off, true /fBlocking/, &cbRead);
1290	if (RT_FAILURE(rc))
1291	break;
1292	if (rc == VINF_EOF && cbRead == 0)
1293	break;
1294	off += cbRead;
1295
1296	if (cbDecompressedAlloc < off + 256)
1297	{
1298	size_t const cbNew = cbDecompressedAlloc < _128M ? cbDecompressedAlloc * 2 : cbDecompressedAlloc + _32M;
1299	void * const pvNew = RTMemRealloc(pbDecompressed, cbNew);
1300	AssertBreakStmt(pvNew, rc = VERR_NO_MEMORY);
1301	cbDecompressedAlloc = cbNew;
1302	pbDecompressed = (uint8_t *)pvNew;
1303	}
1304	}
1305	if (RT_SUCCESS(rc))
1306	{
1307	size_t const cbUncompressed = off;
1308
1309	/* Validate the header and footer if present and subtract them from 'off'. */
1310	IEMBINARYHEADER const *pHdr = NULL;
1311	if ( off >= sizeof(IEMTESTENTRYINFO)
1312	&& memcmp(pbDecompressed, IEMBINARYHEADER_MAGIC, sizeof(IEMBINARYHEADER_MAGIC)) == 0)
1313	{
1314	pHdr = (IEMBINARYHEADER const *)pbDecompressed;
1315	IEMBINARYFOOTER const pFtr = (IEMBINARYFOOTER const )&pbDecompressed[off - sizeof(IEMBINARYFOOTER)];
1316
1317	off -= sizeof(pHdr) + sizeof(pFtr);
1318	rc = VERR_IO_BAD_UNIT;
1319	if (pHdr->cbEntry != cbEntry)
1320	RTTestIFailed("Test entry size differs for '%s': %#x (header r%u), expected %#zx (uncompressed size %#zx)",
1321	pszWhat, pHdr->cbEntry, pHdr->uSvnRev, cbEntry, off + sizeof(pHdr) + sizeof(pFtr));
1322	else if (memcmp(pFtr->szMagic, IEMBINARYFOOTER_MAGIC, sizeof(IEMBINARYFOOTER_MAGIC)) != 0)
1323	RTTestIFailed("Wrong footer magic for '%s': %.*Rhxs\n", pszWhat, sizeof(pFtr->szMagic), pFtr->szMagic);
1324	else if (pFtr->cbEntry != cbEntry)
1325	RTTestIFailed("Wrong footer entry size for '%s': %#x, expected %#x\n", pszWhat, pFtr->cbEntry, cbEntry);
1326	else if (pFtr->cEntries != off / cbEntry)
1327	RTTestIFailed("Wrong footer entry count for '%s': %#x, expected %#x\n",
1328	pszWhat, pFtr->cEntries, off / cbEntry);
1329	else
1330	rc = VINF_SUCCESS;
1331	}
1332
1333	/* Validate the decompressed size wrt entry size. */
1334	if ((off % cbEntry) != 0 && RT_SUCCESS(rc))
1335	{
1336	RTTestIFailed("Uneven decompressed data size for '%s': %#zx vs entry size %#zx -> %#zx",
1337	pszWhat, off, cbEntry, off % cbEntry);
1338	rc = VERR_IO_BAD_LENGTH;
1339	}
1340
1341	if (RT_SUCCESS(rc))
1342	{
1343	/*
1344	* We're good.
1345	*/
1346	/* Reallocate the block if it's way to big. */
1347	if (cbDecompressedAlloc - cbUncompressed > _512K)
1348	{
1349	void * const pvNew = RTMemRealloc(pbDecompressed, cbUncompressed);
1350	if (pvNew)
1351	{
1352	pbDecompressed = (uint8_t *)pvNew;
1353	if (pHdr)
1354	pHdr = (IEMBINARYHEADER const *)pbDecompressed;
1355	}
1356	}
1357	RTMEM_MAY_LEAK(pbDecompressed);
1358
1359	/* Fill in the info and other return values. */
1360	pInfo->cbUncompressed = (uint32_t)cbUncompressed;
1361	pInfo->pvUncompressed = pbDecompressed;
1362	pInfo->pszCpuDesc = pHdr ? pHdr->szCpuDesc : NULL;
1363	pInfo->uSvnRev = pHdr ? pHdr->uSvnRev : 0;
1364	*pcTests = (uint32_t)(off / cbEntry);
1365	ppvTests = pHdr ? (uint8_t )(pHdr + 1) : pbDecompressed;
1366
1367	pbDecompressed = NULL;
1368	rc = VINF_SUCCESS;
1369	}
1370	}
1371	else
1372	RTTestIFailed("Failed to decompress binary stream '%s': %Rrc (off=%#zx, cbCompressed=%#x)",
1373	pszWhat, rc, off, cbCompressed);
1374	RTMemFree(pbDecompressed);
1375	}
1376	else
1377	{
1378	RTTestIFailed("Out of memory decompressing test data '%s'", pszWhat);
1379	rc = VERR_NO_MEMORY;
1380	}
1381	RTVfsIoStrmRelease(hVfsIosDecomp);
1382	}
1383	RTVfsIoStrmRelease(hVfsIos);
1384	return rc;
1385	}
1386
1387	#define DECOMPRESS_TESTS(a_Entry) \
1388	RT_SUCCESS(DecompressBinaryTest((a_Entry).pvCompressedTests, *(a_Entry).pcbCompressedTests, \
1389	sizeof((a_Entry).paTests[0]), (a_Entry).pszName, \
1390	(void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info))
1391
1392	/** Frees the decompressed test data. */
1393	static void FreeDecompressedTests(void *ppvTests, uint32_t pcTests, IEMTESTENTRYINFO *pInfo)
1394	{
1395	RTMemFree(pInfo->pvUncompressed);
1396	pInfo->pvUncompressed = NULL;
1397	pInfo->cbUncompressed = 0;
1398	*ppvTests = NULL;
1399	*pcTests = 0;
1400	}
1401
1402	#define FREE_DECOMPRESSED_TESTS(a_Entry) \
1403	FreeDecompressedTests((void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info)
1404
1405
1406	/** Check if the test is enabled and decompresses test data. */
1407	static int SubTestAndCheckIfEnabledAndDecompress(const char pszName, void const pvCompressed, uint32_t cbCompressed,
1408	size_t cbEntry, void *ppvTests, uint32_t pcTests, IEMTESTENTRYINFO *pInfo)
1409	{
1410	if (SubTestAndCheckIfEnabled(pszName))
1411	{
1412	int const rc = DecompressBinaryTest(pvCompressed, cbCompressed, cbEntry, pszName, ppvTests, pcTests, pInfo);
1413	if (RT_SUCCESS(rc))
1414	return true;
1415	}
1416	return false;
1417	}
1418
1419	#define SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_Entry) \
1420	SubTestAndCheckIfEnabledAndDecompress((a_Entry).pszName, (a_Entry).pvCompressedTests, *(a_Entry).pcbCompressedTests, \
1421	sizeof((a_Entry).paTests[0]), \
1422	(void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info)
1423
1424
1425	static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
1426	{
1427	if (fActual == fExpected)
1428	return "";
1429
1430	uint32_t const fXor = fActual ^ fExpected;
1431	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1432	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1433
1434	static struct
1435	{
1436	const char *pszName;
1437	uint32_t fFlag;
1438	} const s_aFlags[] =
1439	{
1440	#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
1441	EFL_ENTRY(CF),
1442	EFL_ENTRY(PF),
1443	EFL_ENTRY(AF),
1444	EFL_ENTRY(ZF),
1445	EFL_ENTRY(SF),
1446	EFL_ENTRY(TF),
1447	EFL_ENTRY(IF),
1448	EFL_ENTRY(DF),
1449	EFL_ENTRY(OF),
1450	EFL_ENTRY(IOPL),
1451	EFL_ENTRY(NT),
1452	EFL_ENTRY(RF),
1453	EFL_ENTRY(VM),
1454	EFL_ENTRY(AC),
1455	EFL_ENTRY(VIF),
1456	EFL_ENTRY(VIP),
1457	EFL_ENTRY(ID),
1458	};
1459	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1460	if (s_aFlags[i].fFlag & fXor)
1461	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1462	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1463	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1464	return pszBuf;
1465	}
1466
1467
1468	static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
1469	{
1470	if (fActual == fExpected)
1471	return "";
1472
1473	uint16_t const fXor = fActual ^ fExpected;
1474	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1475	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1476
1477	static struct
1478	{
1479	const char *pszName;
1480	uint32_t fFlag;
1481	} const s_aFlags[] =
1482	{
1483	#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1484	FSW_ENTRY(IE),
1485	FSW_ENTRY(DE),
1486	FSW_ENTRY(ZE),
1487	FSW_ENTRY(OE),
1488	FSW_ENTRY(UE),
1489	FSW_ENTRY(PE),
1490	FSW_ENTRY(SF),
1491	FSW_ENTRY(ES),
1492	FSW_ENTRY(C0),
1493	FSW_ENTRY(C1),
1494	FSW_ENTRY(C2),
1495	FSW_ENTRY(C3),
1496	FSW_ENTRY(B),
1497	};
1498	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1499	if (s_aFlags[i].fFlag & fXor)
1500	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1501	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1502	if (fXor & X86_FSW_TOP_MASK)
1503	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1504	X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1505	#if 0 /* For debugging fprem & fprem1 */
1506	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1507	X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1508	#endif
1509	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1510	return pszBuf;
1511	}
1512
1513
1514	static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1515	{
1516	if (fActual == fExpected)
1517	return "";
1518
1519	uint16_t const fXor = fActual ^ fExpected;
1520	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1521	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1522
1523	static struct
1524	{
1525	const char *pszName;
1526	uint32_t fFlag;
1527	} const s_aFlags[] =
1528	{
1529	#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1530	MXCSR_ENTRY(IE),
1531	MXCSR_ENTRY(DE),
1532	MXCSR_ENTRY(ZE),
1533	MXCSR_ENTRY(OE),
1534	MXCSR_ENTRY(UE),
1535	MXCSR_ENTRY(PE),
1536
1537	MXCSR_ENTRY(IM),
1538	MXCSR_ENTRY(DM),
1539	MXCSR_ENTRY(ZM),
1540	MXCSR_ENTRY(OM),
1541	MXCSR_ENTRY(UM),
1542	MXCSR_ENTRY(PM),
1543
1544	MXCSR_ENTRY(DAZ),
1545	MXCSR_ENTRY(FZ),
1546	#undef MXCSR_ENTRY
1547	};
1548	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1549	if (s_aFlags[i].fFlag & fXor)
1550	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1551	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1552	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1553	return pszBuf;
1554	}
1555
1556
1557	static const char *FormatFcw(uint16_t fFcw)
1558	{
1559	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1560
1561	const char pszPC = NULL; / (msc+gcc are too stupid) */
1562	switch (fFcw & X86_FCW_PC_MASK)
1563	{
1564	case X86_FCW_PC_24: pszPC = "PC24"; break;
1565	case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1566	case X86_FCW_PC_53: pszPC = "PC53"; break;
1567	case X86_FCW_PC_64: pszPC = "PC64"; break;
1568	}
1569
1570	const char pszRC = NULL; / (msc+gcc are too stupid) */
1571	switch (fFcw & X86_FCW_RC_MASK)
1572	{
1573	case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1574	case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1575	case X86_FCW_RC_UP: pszRC = "UP"; break;
1576	case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1577	}
1578	size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1579
1580	static struct
1581	{
1582	const char *pszName;
1583	uint32_t fFlag;
1584	} const s_aFlags[] =
1585	{
1586	#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1587	FCW_ENTRY(IM),
1588	FCW_ENTRY(DM),
1589	FCW_ENTRY(ZM),
1590	FCW_ENTRY(OM),
1591	FCW_ENTRY(UM),
1592	FCW_ENTRY(PM),
1593	{ "6M", 64 },
1594	};
1595	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1596	if (fFcw & s_aFlags[i].fFlag)
1597	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1598
1599	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1600	return pszBuf;
1601	}
1602
1603
1604	static const char *FormatMxcsr(uint32_t fMxcsr)
1605	{
1606	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1607
1608	const char pszRC = NULL; / (msc+gcc are too stupid) */
1609	switch (fMxcsr & X86_MXCSR_RC_MASK)
1610	{
1611	case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1612	case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1613	case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1614	case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1615	}
1616
1617	const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1618	const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1619	size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1620
1621	static struct
1622	{
1623	const char *pszName;
1624	uint32_t fFlag;
1625	} const s_aFlags[] =
1626	{
1627	#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1628	MXCSR_ENTRY(IE),
1629	MXCSR_ENTRY(DE),
1630	MXCSR_ENTRY(ZE),
1631	MXCSR_ENTRY(OE),
1632	MXCSR_ENTRY(UE),
1633	MXCSR_ENTRY(PE),
1634
1635	MXCSR_ENTRY(IM),
1636	MXCSR_ENTRY(DM),
1637	MXCSR_ENTRY(ZM),
1638	MXCSR_ENTRY(OM),
1639	MXCSR_ENTRY(UM),
1640	MXCSR_ENTRY(PM),
1641	{ "6M", 64 },
1642	};
1643	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1644	if (fMxcsr & s_aFlags[i].fFlag)
1645	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1646
1647	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1648	return pszBuf;
1649	}
1650
1651
1652	static const char *FormatR80(PCRTFLOAT80U pr80)
1653	{
1654	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1655	RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1656	return pszBuf;
1657	}
1658
1659
1660	static const char *FormatR64(PCRTFLOAT64U pr64)
1661	{
1662	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1663	RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1664	return pszBuf;
1665	}
1666
1667
1668	static const char *FormatR32(PCRTFLOAT32U pr32)
1669	{
1670	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1671	RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1672	return pszBuf;
1673	}
1674
1675
1676	static const char *FormatD80(PCRTPBCD80U pd80)
1677	{
1678	/* There is only one indefinite endcoding (same as for 80-bit
1679	floating point), so get it out of the way first: */
1680	if (RTPBCD80U_IS_INDEFINITE(pd80))
1681	return "Ind";
1682
1683	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1684	size_t off = 0;
1685	pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1686	unsigned cBadDigits = 0;
1687	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1688	while (iPair-- > 0)
1689	{
1690	static const char s_szDigits[] = "0123456789abcdef";
1691	static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1692	pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1693	pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1694	cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1695	+ s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1696	}
1697	if (cBadDigits \|\| pd80->s.uPad != 0)
1698	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1699	pszBuf[off] = '\0';
1700	return pszBuf;
1701	}
1702
1703
1704	#if 0
1705	static const char FormatI64(int64_t const piVal)
1706	{
1707	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1708	RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1709	return pszBuf;
1710	}
1711	#endif
1712
1713
1714	static const char FormatI32(int32_t const piVal)
1715	{
1716	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1717	RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1718	return pszBuf;
1719	}
1720
1721
1722	static const char FormatI16(int16_t const piVal)
1723	{
1724	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1725	RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1726	return pszBuf;
1727	}
1728
1729
1730	static const char *FormatU128(PCRTUINT128U puVal)
1731	{
1732	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1733	RTStrFormatU128(pszBuf, sizeof(g_aszBuf[0]), puVal, 16, 0, 0, RTSTR_F_SPECIAL);
1734	return pszBuf;
1735	}
1736
1737
1738	/*
1739	* Binary operations.
1740	*/
1741	TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1742	TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1743	TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1744	TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1745
1746	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1747	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1748	static RTEXITCODE BinU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
1749	{ \
1750	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1751	{ \
1752	PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1753	? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1754	IEMBINARYOUTPUT BinOut; \
1755	if ( g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1756	&& g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1757	continue; \
1758	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aBinU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
1759	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1760	{ \
1761	a_TestType Test; \
1762	Test.fEflIn = RandEFlags(); \
1763	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1764	Test.uDstOut = Test.uDstIn; \
1765	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1766	if (g_aBinU ## a_cBits[iFn].uExtra) \
1767	Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1768	Test.uMisc = 0; \
1769	Test.fEflOut = pfn(Test.fEflIn, &Test.uDstOut, Test.uSrcIn); \
1770	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1771	} \
1772	for (uint32_t iTest = 0; iTest < g_aBinU ## a_cBits[iFn].cFixedTests; iTest++ ) \
1773	{ \
1774	a_TestType Test; \
1775	Test.fEflIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
1776	: g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn; \
1777	Test.uDstIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uDstIn; \
1778	Test.uDstOut = Test.uDstIn; \
1779	Test.uSrcIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uSrcIn; \
1780	Test.uMisc = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uMisc; \
1781	Test.fEflOut = pfn(Test.fEflIn, &Test.uDstOut, Test.uSrcIn); \
1782	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1783	} \
1784	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1785	} \
1786	return RTEXITCODE_SUCCESS; \
1787	} \
1788	DUMP_ALL_FN(BinU ## a_cBits, g_aBinU ## a_cBits)
1789
1790	#else
1791	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1792	#endif
1793
1794
1795	/** Based on a quick probe run, guess how long to run the benchmark. */
1796	static uint32_t EstimateIterations(uint32_t cProbeIterations, uint64_t cNsProbe)
1797	{
1798	uint64_t cPicoSecPerIteration = cNsProbe * 1000 / cProbeIterations;
1799	uint64_t cIterations = g_cPicoSecBenchmark / cPicoSecPerIteration;
1800	if (cIterations > _2G)
1801	return _2G;
1802	if (cIterations < _4K)
1803	return _4K;
1804	return RT_ALIGN_32((uint32_t)cIterations, _4K);
1805	}
1806
1807
1808	#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1809	GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1810	\
1811	static uint64_t BinU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLBINU ## a_cBits pfn, a_TestType const *pEntry) \
1812	{ \
1813	uint32_t const fEflIn = pEntry->fEflIn; \
1814	a_uType const uDstIn = pEntry->uDstIn; \
1815	a_uType const uSrcIn = pEntry->uSrcIn; \
1816	cIterations /= 4; \
1817	RTThreadYield(); \
1818	uint64_t const nsStart = RTTimeNanoTS(); \
1819	for (uint32_t i = 0; i < cIterations; i++) \
1820	{ \
1821	a_uType uBenchDst = uDstIn; \
1822	pfn(fEflIn, &uBenchDst, uSrcIn); \
1823	\
1824	uBenchDst = uDstIn; \
1825	pfn(fEflIn, &uBenchDst, uSrcIn); \
1826	\
1827	uBenchDst = uDstIn; \
1828	pfn(fEflIn, &uBenchDst, uSrcIn); \
1829	\
1830	uBenchDst = uDstIn; \
1831	pfn(fEflIn, &uBenchDst, uSrcIn); \
1832	} \
1833	return RTTimeNanoTS() - nsStart; \
1834	} \
1835	\
1836	static void BinU ## a_cBits ## Test(void) \
1837	{ \
1838	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1839	{ \
1840	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
1841	continue; \
1842	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1843	uint32_t const cTests = a_aSubTests[iFn].cTests; \
1844	PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1845	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1846	if (!cTests) { RTTestSkipped(g_hTest, "no tests"); continue; } \
1847	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1848	{ \
1849	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1850	{ \
1851	a_uType uDst = paTests[iTest].uDstIn; \
1852	uint32_t fEfl = pfn(paTests[iTest].fEflIn, &uDst, paTests[iTest].uSrcIn); \
1853	if ( uDst != paTests[iTest].uDstOut \
1854	\|\| fEfl != paTests[iTest].fEflOut) \
1855	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1856	iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1857	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1858	EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1859	uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1860	else \
1861	{ \
1862	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1863	fEfl = pfn(paTests[iTest].fEflIn, g_pu ## a_cBits, paTests[iTest].uSrcIn); \
1864	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1865	RTTEST_CHECK(g_hTest, fEfl == paTests[iTest].fEflOut); \
1866	} \
1867	} \
1868	\
1869	/* Benchmark if all succeeded. */ \
1870	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
1871	{ \
1872	uint32_t const iTest = cTests / 2; \
1873	uint32_t const cIterations = EstimateIterations(_64K, BinU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
1874	uint64_t const cNsRealRun = BinU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
1875	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
1876	"%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
1877	} \
1878	\
1879	/* Next variation is native. */ \
1880	pfn = a_aSubTests[iFn].pfnNative; \
1881	} \
1882	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
1883	} \
1884	}
1885
1886
1887	/*
1888	* 8-bit binary operations.
1889	*/
1890	static BINU8_T g_aBinU8[] =
1891	{
1892	ENTRY_BIN(add_u8),
1893	ENTRY_BIN(add_u8_locked),
1894	ENTRY_BIN(adc_u8),
1895	ENTRY_BIN(adc_u8_locked),
1896	ENTRY_BIN(sub_u8),
1897	ENTRY_BIN(sub_u8_locked),
1898	ENTRY_BIN(sbb_u8),
1899	ENTRY_BIN(sbb_u8_locked),
1900	ENTRY_BIN(or_u8),
1901	ENTRY_BIN(or_u8_locked),
1902	ENTRY_BIN(xor_u8),
1903	ENTRY_BIN(xor_u8_locked),
1904	ENTRY_BIN(and_u8),
1905	ENTRY_BIN(and_u8_locked),
1906	ENTRY_BIN_PFN_CAST(cmp_u8, PFNIEMAIMPLBINU8),
1907	ENTRY_BIN_PFN_CAST(test_u8, PFNIEMAIMPLBINU8),
1908	};
1909	TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1910
1911
1912	/*
1913	* 16-bit binary operations.
1914	*/
1915	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1916	static const BINU16_TEST_T g_aFixedTests_add_u16[] =
1917	{
1918	/* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1919	{ UINT32_MAX, 0, 1, 0, UINT16_MAX, 0 },
1920	};
1921	#endif
1922	static BINU16_T g_aBinU16[] =
1923	{
1924	ENTRY_BIN_FIX(add_u16),
1925	ENTRY_BIN(add_u16_locked),
1926	ENTRY_BIN(adc_u16),
1927	ENTRY_BIN(adc_u16_locked),
1928	ENTRY_BIN(sub_u16),
1929	ENTRY_BIN(sub_u16_locked),
1930	ENTRY_BIN(sbb_u16),
1931	ENTRY_BIN(sbb_u16_locked),
1932	ENTRY_BIN(or_u16),
1933	ENTRY_BIN(or_u16_locked),
1934	ENTRY_BIN(xor_u16),
1935	ENTRY_BIN(xor_u16_locked),
1936	ENTRY_BIN(and_u16),
1937	ENTRY_BIN(and_u16_locked),
1938	ENTRY_BIN_PFN_CAST(cmp_u16, PFNIEMAIMPLBINU16),
1939	ENTRY_BIN_PFN_CAST(test_u16, PFNIEMAIMPLBINU16),
1940	ENTRY_BIN_PFN_CAST_EX(bt_u16, PFNIEMAIMPLBINU16, 1),
1941	ENTRY_BIN_EX(btc_u16, 1),
1942	ENTRY_BIN_EX(btc_u16_locked, 1),
1943	ENTRY_BIN_EX(btr_u16, 1),
1944	ENTRY_BIN_EX(btr_u16_locked, 1),
1945	ENTRY_BIN_EX(bts_u16, 1),
1946	ENTRY_BIN_EX(bts_u16_locked, 1),
1947	ENTRY_BIN_AMD( bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1948	ENTRY_BIN_INTEL(bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1949	ENTRY_BIN_AMD( bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1950	ENTRY_BIN_INTEL(bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1951	ENTRY_BIN_AMD( imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1952	ENTRY_BIN_INTEL(imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1953	ENTRY_BIN(arpl),
1954	};
1955	TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1956
1957
1958	/*
1959	* 32-bit binary operations.
1960	*/
1961	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1962	static const BINU32_TEST_T g_aFixedTests_add_u32[] =
1963	{
1964	/* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1965	{ UINT32_MAX, 0, 1, 0, UINT32_MAX, 0 },
1966	};
1967	#endif
1968	static BINU32_T g_aBinU32[] =
1969	{
1970	ENTRY_BIN_FIX(add_u32),
1971	ENTRY_BIN(add_u32_locked),
1972	ENTRY_BIN(adc_u32),
1973	ENTRY_BIN(adc_u32_locked),
1974	ENTRY_BIN(sub_u32),
1975	ENTRY_BIN(sub_u32_locked),
1976	ENTRY_BIN(sbb_u32),
1977	ENTRY_BIN(sbb_u32_locked),
1978	ENTRY_BIN(or_u32),
1979	ENTRY_BIN(or_u32_locked),
1980	ENTRY_BIN(xor_u32),
1981	ENTRY_BIN(xor_u32_locked),
1982	ENTRY_BIN(and_u32),
1983	ENTRY_BIN(and_u32_locked),
1984	ENTRY_BIN_PFN_CAST(cmp_u32, PFNIEMAIMPLBINU32),
1985	ENTRY_BIN_PFN_CAST(test_u32, PFNIEMAIMPLBINU32),
1986	ENTRY_BIN_PFN_CAST_EX(bt_u32, PFNIEMAIMPLBINU32, 1),
1987	ENTRY_BIN_EX(btc_u32, 1),
1988	ENTRY_BIN_EX(btc_u32_locked, 1),
1989	ENTRY_BIN_EX(btr_u32, 1),
1990	ENTRY_BIN_EX(btr_u32_locked, 1),
1991	ENTRY_BIN_EX(bts_u32, 1),
1992	ENTRY_BIN_EX(bts_u32_locked, 1),
1993	ENTRY_BIN_AMD( bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1994	ENTRY_BIN_INTEL(bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1995	ENTRY_BIN_AMD( bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1996	ENTRY_BIN_INTEL(bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1997	ENTRY_BIN_AMD( imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1998	ENTRY_BIN_INTEL(imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1999	ENTRY_BIN(adcx_u32),
2000	ENTRY_BIN(adox_u32),
2001	};
2002	TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
2003
2004
2005	/*
2006	* 64-bit binary operations.
2007	*/
2008	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2009	static const BINU64_TEST_T g_aFixedTests_add_u64[] =
2010	{
2011	/* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
2012	{ UINT32_MAX, 0, 1, 0, UINT64_MAX, 0 },
2013	};
2014	#endif
2015	static BINU64_T g_aBinU64[] =
2016	{
2017	ENTRY_BIN_FIX(add_u64),
2018	ENTRY_BIN(add_u64_locked),
2019	ENTRY_BIN(adc_u64),
2020	ENTRY_BIN(adc_u64_locked),
2021	ENTRY_BIN(sub_u64),
2022	ENTRY_BIN(sub_u64_locked),
2023	ENTRY_BIN(sbb_u64),
2024	ENTRY_BIN(sbb_u64_locked),
2025	ENTRY_BIN(or_u64),
2026	ENTRY_BIN(or_u64_locked),
2027	ENTRY_BIN(xor_u64),
2028	ENTRY_BIN(xor_u64_locked),
2029	ENTRY_BIN(and_u64),
2030	ENTRY_BIN(and_u64_locked),
2031	ENTRY_BIN_PFN_CAST(cmp_u64, PFNIEMAIMPLBINU64),
2032	ENTRY_BIN_PFN_CAST(test_u64, PFNIEMAIMPLBINU64),
2033	ENTRY_BIN_PFN_CAST_EX(bt_u64, PFNIEMAIMPLBINU64, 1),
2034	ENTRY_BIN_EX(btc_u64, 1),
2035	ENTRY_BIN_EX(btc_u64_locked, 1),
2036	ENTRY_BIN_EX(btr_u64, 1),
2037	ENTRY_BIN_EX(btr_u64_locked, 1),
2038	ENTRY_BIN_EX(bts_u64, 1),
2039	ENTRY_BIN_EX(bts_u64_locked, 1),
2040	ENTRY_BIN_AMD( bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
2041	ENTRY_BIN_INTEL(bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
2042	ENTRY_BIN_AMD( bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
2043	ENTRY_BIN_INTEL(bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
2044	ENTRY_BIN_AMD( imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
2045	ENTRY_BIN_INTEL(imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
2046	ENTRY_BIN(adcx_u64),
2047	ENTRY_BIN(adox_u64),
2048	/** @todo popcnt */
2049	/** @todo tzcnt */
2050	/** @todo lzcnt */
2051	};
2052	TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
2053
2054
2055	/*
2056	* XCHG
2057	*/
2058	static void XchgTest(void)
2059	{
2060	if (!SubTestAndCheckIfEnabled("xchg"))
2061	return;
2062	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t pu8Mem, uint8_t pu8Reg));
2063	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t pu16Mem, uint16_t pu16Reg));
2064	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t pu32Mem, uint32_t pu32Reg));
2065	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t pu64Mem, uint64_t pu64Reg));
2066
2067	static struct
2068	{
2069	uint8_t cb; uint64_t fMask;
2070	union
2071	{
2072	uintptr_t pfn;
2073	FNIEMAIMPLXCHGU8 *pfnU8;
2074	FNIEMAIMPLXCHGU16 *pfnU16;
2075	FNIEMAIMPLXCHGU32 *pfnU32;
2076	FNIEMAIMPLXCHGU64 *pfnU64;
2077	} u;
2078	}
2079	s_aXchgWorkers[] =
2080	{
2081	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
2082	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
2083	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
2084	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
2085	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
2086	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
2087	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
2088	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
2089	};
2090	for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
2091	{
2092	RTUINT64U uIn1, uIn2, uMem, uDst;
2093	uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
2094	uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
2095	if (uIn1.u == uIn2.u)
2096	uDst.u = uIn2.u = ~uIn2.u;
2097
2098	switch (s_aXchgWorkers[i].cb)
2099	{
2100	case 1:
2101	s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
2102	s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
2103	break;
2104	case 2:
2105	s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
2106	s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
2107	break;
2108	case 4:
2109	s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
2110	s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
2111	break;
2112	case 8:
2113	s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
2114	s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
2115	break;
2116	default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
2117	}
2118
2119	if (uMem.u != uIn2.u \|\| uDst.u != uIn1.u)
2120	RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
2121	}
2122	}
2123
2124
2125	/*
2126	* XADD
2127	*/
2128	static void XaddTest(void)
2129	{
2130	#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
2131	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type , a_Type , uint32_t *)); \
2132	static struct \
2133	{ \
2134	const char * const pszName; \
2135	FNIEMAIMPLXADDU ## a_cBits * const pfn; \
2136	void const * const pvCompressedTests; \
2137	uint32_t const * const pcbCompressedTests; \
2138	BINU ## a_cBits ## _TEST_T const *paTests; \
2139	uint32_t cTests; \
2140	IEMTESTENTRYINFO Info; \
2141	} s_aFuncs[] = \
2142	{ \
2143	{ "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
2144	g_abTests_add_u ## a_cBits, &g_cbTests_add_u ## a_cBits }, \
2145	{ "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
2146	g_abTests_add_u ## a_cBits, &g_cbTests_add_u ## a_cBits }, \
2147	}; \
2148	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
2149	{ \
2150	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(s_aFuncs[iFn])) continue; \
2151	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
2152	uint32_t const cTests = s_aFuncs[iFn].cTests; \
2153	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2154	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2155	{ \
2156	uint32_t fEfl = paTests[iTest].fEflIn; \
2157	a_Type uSrc = paTests[iTest].uSrcIn; \
2158	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
2159	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
2160	if ( fEfl != paTests[iTest].fEflOut \
2161	\|\| *g_pu ## a_cBits != paTests[iTest].uDstOut \
2162	\|\| uSrc != paTests[iTest].uDstIn) \
2163	RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2164	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
2165	fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
2166	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2167	} \
2168	FREE_DECOMPRESSED_TESTS(s_aFuncs[iFn]); \
2169	} \
2170	} while(0)
2171	TEST_XADD(8, uint8_t, "%#04x");
2172	TEST_XADD(16, uint16_t, "%#06x");
2173	TEST_XADD(32, uint32_t, "%#010RX32");
2174	TEST_XADD(64, uint64_t, "%#010RX64");
2175	}
2176
2177
2178	/*
2179	* CMPXCHG
2180	*/
2181
2182	static void CmpXchgTest(void)
2183	{
2184	#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
2185	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type , a_Type , a_Type, uint32_t *)); \
2186	static struct \
2187	{ \
2188	const char * const pszName; \
2189	FNIEMAIMPLCMPXCHGU ## a_cBits * const pfn; \
2190	PFNIEMAIMPLBINU ## a_cBits const pfnSub; \
2191	void const * const pvCompressedTests; \
2192	uint32_t const * const pcbCompressedTests; \
2193	BINU ## a_cBits ## _TEST_T const *paTests; \
2194	uint32_t cTests; \
2195	IEMTESTENTRYINFO Info; \
2196	} s_aFuncs[] = \
2197	{ \
2198	{ "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
2199	g_abTests_cmp_u ## a_cBits, &g_cbTests_cmp_u ## a_cBits }, \
2200	{ "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
2201	g_abTests_cmp_u ## a_cBits, &g_cbTests_cmp_u ## a_cBits }, \
2202	}; \
2203	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
2204	{ \
2205	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(s_aFuncs[iFn])) continue; \
2206	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
2207	uint32_t const cTests = s_aFuncs[iFn].cTests; \
2208	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2209	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2210	{ \
2211	/* as is (99% likely to be negative). */ \
2212	uint32_t fEfl = paTests[iTest].fEflIn; \
2213	a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
2214	a_Type uA = paTests[iTest].uDstIn; \
2215	*g_pu ## a_cBits = paTests[iTest].uSrcIn; \
2216	a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
2217	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2218	if ( fEfl != paTests[iTest].fEflOut \
2219	\|\| *g_pu ## a_cBits != uExpect \
2220	\|\| uA != paTests[iTest].uSrcIn) \
2221	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2222	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
2223	uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
2224	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2225	/* positive */ \
2226	uA = paTests[iTest].uDstIn; \
2227	uint32_t fEflExpect = s_aFuncs[iFn].pfnSub(paTests[iTest].fEflIn, &uA, uA); \
2228	fEfl = paTests[iTest].fEflIn; \
2229	uA = paTests[iTest].uDstIn; \
2230	*g_pu ## a_cBits = uA; \
2231	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2232	if ( fEfl != fEflExpect \
2233	\|\| *g_pu ## a_cBits != uNew \
2234	\|\| uA != paTests[iTest].uDstIn) \
2235	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2236	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
2237	uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
2238	EFlagsDiff(fEfl, fEflExpect)); \
2239	} \
2240	FREE_DECOMPRESSED_TESTS(s_aFuncs[iFn]); \
2241	} \
2242	} while(0)
2243	TEST_CMPXCHG(8, uint8_t, "%#04RX8");
2244	TEST_CMPXCHG(16, uint16_t, "%#06x");
2245	TEST_CMPXCHG(32, uint32_t, "%#010RX32");
2246	#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
2247	TEST_CMPXCHG(64, uint64_t, "%#010RX64");
2248	#endif
2249	}
2250
2251
2252	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t , PRTUINT64U, PRTUINT64U, uint32_t ));
2253
2254	static uint64_t CmpXchg8bBench(uint32_t cIterations, FNIEMAIMPLCMPXCHG8B *pfn, uint64_t const uDstValue,
2255	uint64_t const uOldValue, uint64_t const uNewValue, uint32_t const fEflIn)
2256	{
2257	cIterations /= 4;
2258	RTThreadYield();
2259	uint64_t const nsStart = RTTimeNanoTS();
2260	for (uint32_t i = 0; i < cIterations; i++)
2261	{
2262	RTUINT64U uA, uB;
2263	uint32_t fEfl = fEflIn;
2264	uint64_t uDst = uDstValue;
2265	uB.u = uNewValue;
2266	uA.u = uOldValue;
2267	pfn(&uDst, &uA, &uB, &fEfl);
2268
2269	fEfl = fEflIn;
2270	uDst = uDstValue;
2271	uB.u = uNewValue;
2272	uA.u = uOldValue;
2273	pfn(&uDst, &uA, &uB, &fEfl);
2274
2275	fEfl = fEflIn;
2276	uDst = uDstValue;
2277	uB.u = uNewValue;
2278	uA.u = uOldValue;
2279	pfn(&uDst, &uA, &uB, &fEfl);
2280
2281	fEfl = fEflIn;
2282	uDst = uDstValue;
2283	uB.u = uNewValue;
2284	uA.u = uOldValue;
2285	pfn(&uDst, &uA, &uB, &fEfl);
2286	}
2287	return RTTimeNanoTS() - nsStart;
2288	}
2289
2290	static void CmpXchg8bTest(void)
2291	{
2292	static struct
2293	{
2294	const char *pszName;
2295	FNIEMAIMPLCMPXCHG8B *pfn;
2296	} const s_aFuncs[] =
2297	{
2298	{ "cmpxchg8b", iemAImpl_cmpxchg8b },
2299	{ "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
2300	};
2301	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2302	{
2303	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2304	continue;
2305	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2306	{
2307	uint64_t const uOldValue = RandU64();
2308	uint64_t const uNewValue = RandU64();
2309
2310	/* positive test. */
2311	RTUINT64U uA, uB;
2312	uB.u = uNewValue;
2313	uA.u = uOldValue;
2314	*g_pu64 = uOldValue;
2315	uint32_t fEflIn = RandEFlags();
2316	uint32_t fEfl = fEflIn;
2317	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2318	if ( fEfl != (fEflIn \| X86_EFL_ZF)
2319	\|\| *g_pu64 != uNewValue
2320	\|\| uA.u != uOldValue)
2321	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2322	iTest, fEflIn, uOldValue, uOldValue, uNewValue,
2323	fEfl, *g_pu64, uA.u,
2324	(fEflIn \| X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
2325	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2326
2327	/* negative */
2328	uint64_t const uExpect = ~uOldValue;
2329	*g_pu64 = uExpect;
2330	uA.u = uOldValue;
2331	uB.u = uNewValue;
2332	fEfl = fEflIn = RandEFlags();
2333	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2334	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2335	\|\| *g_pu64 != uExpect
2336	\|\| uA.u != uExpect)
2337	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2338	iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
2339	fEfl, *g_pu64, uA.u,
2340	(fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2341	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2342
2343	if (iTest == 2 && g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0)
2344	{
2345	uint32_t cIterations = EstimateIterations(_64K, CmpXchg8bBench(_64K, s_aFuncs[iFn].pfn,
2346	uOldValue, uOldValue, uNewValue, fEflIn));
2347	uint64_t cNsRealRun = CmpXchg8bBench(cIterations, s_aFuncs[iFn].pfn, uOldValue, uOldValue, uNewValue, fEflIn);
2348	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL,
2349	"%s-positive", s_aFuncs[iFn].pszName);
2350
2351	cIterations = EstimateIterations(_64K, CmpXchg8bBench(_64K, s_aFuncs[iFn].pfn,
2352	~uOldValue, uOldValue, uNewValue, fEflIn));
2353	cNsRealRun = CmpXchg8bBench(cIterations, s_aFuncs[iFn].pfn, ~uOldValue, uOldValue, uNewValue, fEflIn);
2354	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL,
2355	"%s-negative", s_aFuncs[iFn].pszName);
2356	}
2357	}
2358	}
2359	}
2360
2361	static void CmpXchg16bTest(void)
2362	{
2363	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
2364	static struct
2365	{
2366	const char *pszName;
2367	FNIEMAIMPLCMPXCHG16B *pfn;
2368	} const s_aFuncs[] =
2369	{
2370	{ "cmpxchg16b", iemAImpl_cmpxchg16b },
2371	{ "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
2372	#if !defined(RT_ARCH_ARM64)
2373	{ "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
2374	#endif
2375	};
2376	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2377	{
2378	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2379	continue;
2380	#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
2381	if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
2382	{
2383	RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
2384	continue;
2385	}
2386	#endif
2387	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2388	{
2389	RTUINT128U const uOldValue = RandU128();
2390	RTUINT128U const uNewValue = RandU128();
2391
2392	/* positive test. */
2393	RTUINT128U uA, uB;
2394	uB = uNewValue;
2395	uA = uOldValue;
2396	*g_pu128 = uOldValue;
2397	uint32_t fEflIn = RandEFlags();
2398	uint32_t fEfl = fEflIn;
2399	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2400	if ( fEfl != (fEflIn \| X86_EFL_ZF)
2401	\|\| g_pu128->s.Lo != uNewValue.s.Lo
2402	\|\| g_pu128->s.Hi != uNewValue.s.Hi
2403	\|\| uA.s.Lo != uOldValue.s.Lo
2404	\|\| uA.s.Hi != uOldValue.s.Hi)
2405	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2406	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2407	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2408	iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2409	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2410	(fEflIn \| X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
2411	EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
2412	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2413
2414	/* negative */
2415	RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
2416	*g_pu128 = uExpect;
2417	uA = uOldValue;
2418	uB = uNewValue;
2419	fEfl = fEflIn = RandEFlags();
2420	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2421	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2422	\|\| g_pu128->s.Lo != uExpect.s.Lo
2423	\|\| g_pu128->s.Hi != uExpect.s.Hi
2424	\|\| uA.s.Lo != uExpect.s.Lo
2425	\|\| uA.s.Hi != uExpect.s.Hi)
2426	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2427	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2428	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2429	iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2430	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2431	(fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
2432	EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2433	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2434	}
2435	}
2436	}
2437
2438
2439	/*
2440	* Double shifts.
2441	*
2442	* Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
2443	*/
2444	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2445	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2446	static RTEXITCODE ShiftDblU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2447	{ \
2448	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2449	{ \
2450	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2451	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2452	continue; \
2453	IEMBINARYOUTPUT BinOut; \
2454	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2455	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2456	{ \
2457	a_TestType Test; \
2458	Test.fEflIn = RandEFlags(); \
2459	Test.fEflOut = Test.fEflIn; \
2460	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2461	Test.uDstOut = Test.uDstIn; \
2462	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2463	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2464	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
2465	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2466	} \
2467	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2468	} \
2469	return RTEXITCODE_SUCCESS; \
2470	} \
2471	DUMP_ALL_FN(ShiftDblU ## a_cBits, a_aSubTests)
2472
2473	#else
2474	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2475	#endif
2476
2477	#define TEST_SHIFT_DBL(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2478	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
2479	\
2480	static a_SubTestType a_aSubTests[] = \
2481	{ \
2482	ENTRY_BIN_AMD(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
2483	ENTRY_BIN_INTEL(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
2484	ENTRY_BIN_AMD(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
2485	ENTRY_BIN_INTEL(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
2486	}; \
2487	\
2488	GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2489	\
2490	static uint64_t ShiftDblU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn, a_TestType const *pEntry) \
2491	{ \
2492	uint32_t const fEflIn = pEntry->fEflIn; \
2493	a_uType const uDstIn = pEntry->uDstIn; \
2494	a_uType const uSrcIn = pEntry->uSrcIn; \
2495	a_uType const cShift = pEntry->uMisc; \
2496	cIterations /= 4; \
2497	RTThreadYield(); \
2498	uint64_t const nsStart = RTTimeNanoTS(); \
2499	for (uint32_t i = 0; i < cIterations; i++) \
2500	{ \
2501	uint32_t fBenchEfl = fEflIn; \
2502	a_uType uBenchDst = uDstIn; \
2503	pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2504	\
2505	fBenchEfl = fEflIn; \
2506	uBenchDst = uDstIn; \
2507	pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2508	\
2509	fBenchEfl = fEflIn; \
2510	uBenchDst = uDstIn; \
2511	pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2512	\
2513	fBenchEfl = fEflIn; \
2514	uBenchDst = uDstIn; \
2515	pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2516	} \
2517	return RTTimeNanoTS() - nsStart; \
2518	} \
2519	\
2520	static void ShiftDblU ## a_cBits ## Test(void) \
2521	{ \
2522	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2523	{ \
2524	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2525	continue; \
2526	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2527	uint32_t const cTests = a_aSubTests[iFn].cTests; \
2528	PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2529	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2530	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2531	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2532	{ \
2533	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2534	{ \
2535	uint32_t fEfl = paTests[iTest].fEflIn; \
2536	a_uType uDst = paTests[iTest].uDstIn; \
2537	pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
2538	if ( uDst != paTests[iTest].uDstOut \
2539	\|\| fEfl != paTests[iTest].fEflOut) \
2540	RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
2541	iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
2542	paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
2543	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2544	EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
2545	else \
2546	{ \
2547	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
2548	*g_pfEfl = paTests[iTest].fEflIn; \
2549	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
2550	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2551	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2552	} \
2553	} \
2554	\
2555	/* Benchmark if all succeeded. */ \
2556	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
2557	{ \
2558	uint32_t const iTest = cTests / 2; \
2559	uint32_t const cIterations = EstimateIterations(_64K, ShiftDblU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
2560	uint64_t const cNsRealRun = ShiftDblU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
2561	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
2562	"%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
2563	} \
2564	\
2565	/* Next variation is native. */ \
2566	pfn = a_aSubTests[iFn].pfnNative; \
2567	} \
2568	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
2569	} \
2570	}
2571	TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
2572	TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
2573	TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
2574
2575	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2576	static RTEXITCODE ShiftDblGenerate(uint32_t cTests, const char * const * papszNameFmts)
2577	{
2578	RTEXITCODE rcExit = ShiftDblU16Generate(cTests, papszNameFmts);
2579	if (rcExit == RTEXITCODE_SUCCESS)
2580	rcExit = ShiftDblU32Generate(cTests, papszNameFmts);
2581	if (rcExit == RTEXITCODE_SUCCESS)
2582	rcExit = ShiftDblU64Generate(cTests, papszNameFmts);
2583	return rcExit;
2584	}
2585
2586	static RTEXITCODE ShiftDblDumpAll(const char * const * papszNameFmts)
2587	{
2588	RTEXITCODE rcExit = ShiftDblU16DumpAll(papszNameFmts);
2589	if (rcExit == RTEXITCODE_SUCCESS)
2590	rcExit = ShiftDblU32DumpAll(papszNameFmts);
2591	if (rcExit == RTEXITCODE_SUCCESS)
2592	rcExit = ShiftDblU64DumpAll(papszNameFmts);
2593	return rcExit;
2594	}
2595	#endif
2596
2597	static void ShiftDblTest(void)
2598	{
2599	ShiftDblU16Test();
2600	ShiftDblU32Test();
2601	ShiftDblU64Test();
2602	}
2603
2604
2605	/*
2606	* Unary operators.
2607	*
2608	* Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
2609	*/
2610	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2611	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2612	static RTEXITCODE UnaryU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2613	{ \
2614	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2615	{ \
2616	IEMBINARYOUTPUT BinOut; \
2617	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aUnaryU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
2618	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2619	{ \
2620	a_TestType Test; \
2621	Test.fEflIn = RandEFlags(); \
2622	Test.fEflOut = Test.fEflIn; \
2623	Test.uDstIn = RandU ## a_cBits(); \
2624	Test.uDstOut = Test.uDstIn; \
2625	Test.uSrcIn = 0; \
2626	Test.uMisc = 0; \
2627	g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
2628	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2629	} \
2630	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2631	} \
2632	return RTEXITCODE_SUCCESS; \
2633	} \
2634	DUMP_ALL_FN(UnaryU ## a_cBits, g_aUnaryU ## a_cBits)
2635	#else
2636	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
2637	#endif
2638
2639	#define TEST_UNARY(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2640	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
2641	static a_SubTestType a_aSubTests[] = \
2642	{ \
2643	ENTRY_BIN(inc_u ## a_cBits), \
2644	ENTRY_BIN(inc_u ## a_cBits ## _locked), \
2645	ENTRY_BIN(dec_u ## a_cBits), \
2646	ENTRY_BIN(dec_u ## a_cBits ## _locked), \
2647	ENTRY_BIN(not_u ## a_cBits), \
2648	ENTRY_BIN(not_u ## a_cBits ## _locked), \
2649	ENTRY_BIN(neg_u ## a_cBits), \
2650	ENTRY_BIN(neg_u ## a_cBits ## _locked), \
2651	}; \
2652	\
2653	GEN_UNARY(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType) \
2654	\
2655	static uint64_t UnaryU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLUNARYU ## a_cBits pfn, a_TestType const *pEntry) \
2656	{ \
2657	uint32_t const fEflIn = pEntry->fEflIn; \
2658	a_uType const uDstIn = pEntry->uDstIn; \
2659	cIterations /= 4; \
2660	RTThreadYield(); \
2661	uint64_t const nsStart = RTTimeNanoTS(); \
2662	for (uint32_t i = 0; i < cIterations; i++) \
2663	{ \
2664	uint32_t fBenchEfl = fEflIn; \
2665	a_uType uBenchDst = uDstIn; \
2666	pfn(&uBenchDst, &fBenchEfl); \
2667	\
2668	fBenchEfl = fEflIn; \
2669	uBenchDst = uDstIn; \
2670	pfn(&uBenchDst, &fBenchEfl); \
2671	\
2672	fBenchEfl = fEflIn; \
2673	uBenchDst = uDstIn; \
2674	pfn(&uBenchDst, &fBenchEfl); \
2675	\
2676	fBenchEfl = fEflIn; \
2677	uBenchDst = uDstIn; \
2678	pfn(&uBenchDst, &fBenchEfl); \
2679	} \
2680	return RTTimeNanoTS() - nsStart; \
2681	} \
2682	\
2683	static void UnaryU ## a_cBits ## Test(void) \
2684	{ \
2685	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2686	{ \
2687	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2688	continue; \
2689	PFNIEMAIMPLUNARYU ## a_cBits const pfn = a_aSubTests[iFn].pfn; \
2690	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2691	uint32_t const cTests = a_aSubTests[iFn].cTests; \
2692	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2693	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2694	{ \
2695	uint32_t fEfl = paTests[iTest].fEflIn; \
2696	a_uType uDst = paTests[iTest].uDstIn; \
2697	pfn(&uDst, &fEfl); \
2698	if ( uDst != paTests[iTest].uDstOut \
2699	\|\| fEfl != paTests[iTest].fEflOut) \
2700	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2701	iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
2702	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2703	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2704	else \
2705	{ \
2706	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
2707	*g_pfEfl = paTests[iTest].fEflIn; \
2708	pfn(g_pu ## a_cBits, g_pfEfl); \
2709	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2710	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2711	} \
2712	} \
2713	\
2714	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
2715	{ \
2716	uint32_t const iTest = cTests / 2; \
2717	uint32_t const cIterations = EstimateIterations(_64K, UnaryU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
2718	uint64_t const cNsRealRun = UnaryU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
2719	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, "%s", a_aSubTests[iFn].pszName); \
2720	} \
2721	\
2722	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
2723	} \
2724	}
2725	TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T, g_aUnaryU8)
2726	TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T, g_aUnaryU16)
2727	TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T, g_aUnaryU32)
2728	TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T, g_aUnaryU64)
2729
2730	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2731	static RTEXITCODE UnaryGenerate(uint32_t cTests, const char * const * papszNameFmts)
2732	{
2733	RTEXITCODE rcExit = UnaryU8Generate(cTests, papszNameFmts);
2734	if (rcExit == RTEXITCODE_SUCCESS)
2735	rcExit = UnaryU16Generate(cTests, papszNameFmts);
2736	if (rcExit == RTEXITCODE_SUCCESS)
2737	rcExit = UnaryU32Generate(cTests, papszNameFmts);
2738	if (rcExit == RTEXITCODE_SUCCESS)
2739	rcExit = UnaryU64Generate(cTests, papszNameFmts);
2740	return rcExit;
2741	}
2742
2743	static RTEXITCODE UnaryDumpAll(const char * const * papszNameFmts)
2744	{
2745	RTEXITCODE rcExit = UnaryU8DumpAll(papszNameFmts);
2746	if (rcExit == RTEXITCODE_SUCCESS)
2747	rcExit = UnaryU16DumpAll(papszNameFmts);
2748	if (rcExit == RTEXITCODE_SUCCESS)
2749	rcExit = UnaryU32DumpAll(papszNameFmts);
2750	if (rcExit == RTEXITCODE_SUCCESS)
2751	rcExit = UnaryU64DumpAll(papszNameFmts);
2752	return rcExit;
2753	}
2754	#endif
2755
2756	static void UnaryTest(void)
2757	{
2758	UnaryU8Test();
2759	UnaryU16Test();
2760	UnaryU32Test();
2761	UnaryU64Test();
2762	}
2763
2764
2765	/*
2766	* Shifts.
2767	*
2768	* Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2769	*/
2770	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2771	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2772	static RTEXITCODE ShiftU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2773	{ \
2774	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2775	{ \
2776	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2777	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2778	continue; \
2779	IEMBINARYOUTPUT BinOut; \
2780	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2781	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2782	{ \
2783	a_TestType Test; \
2784	Test.fEflIn = RandEFlags(); \
2785	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2786	Test.uDstOut = Test.uDstIn; \
2787	Test.uSrcIn = 0; \
2788	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2789	Test.fEflOut = a_aSubTests[iFn].pfnNative(Test.fEflIn, &Test.uDstOut, Test.uMisc); \
2790	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2791	\
2792	Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK; \
2793	Test.uDstOut = Test.uDstIn; \
2794	Test.fEflOut = a_aSubTests[iFn].pfnNative(Test.fEflIn, &Test.uDstOut, Test.uMisc); \
2795	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2796	} \
2797	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2798	} \
2799	return RTEXITCODE_SUCCESS; \
2800	} \
2801	DUMP_ALL_FN(ShiftU ## a_cBits, a_aSubTests)
2802	#else
2803	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2804	#endif
2805
2806	#define TEST_SHIFT(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2807	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2808	static a_SubTestType a_aSubTests[] = \
2809	{ \
2810	ENTRY_BIN_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2811	ENTRY_BIN_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2812	ENTRY_BIN_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2813	ENTRY_BIN_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2814	ENTRY_BIN_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2815	ENTRY_BIN_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2816	ENTRY_BIN_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2817	ENTRY_BIN_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2818	ENTRY_BIN_AMD( shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2819	ENTRY_BIN_INTEL(shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2820	ENTRY_BIN_AMD( shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2821	ENTRY_BIN_INTEL(shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2822	ENTRY_BIN_AMD( sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2823	ENTRY_BIN_INTEL(sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2824	}; \
2825	\
2826	GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2827	\
2828	static uint64_t ShiftU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLSHIFTU ## a_cBits pfn, a_TestType const *pEntry) \
2829	{ \
2830	uint32_t const fEflIn = pEntry->fEflIn; \
2831	a_uType const uDstIn = pEntry->uDstIn; \
2832	a_uType const cShift = pEntry->uMisc; \
2833	cIterations /= 4; \
2834	RTThreadYield(); \
2835	uint64_t const nsStart = RTTimeNanoTS(); \
2836	for (uint32_t i = 0; i < cIterations; i++) \
2837	{ \
2838	a_uType uBenchDst = uDstIn; \
2839	pfn(fEflIn, &uBenchDst, cShift); \
2840	\
2841	uBenchDst = uDstIn; \
2842	pfn(fEflIn, &uBenchDst, cShift); \
2843	\
2844	uBenchDst = uDstIn; \
2845	pfn(fEflIn, &uBenchDst, cShift); \
2846	\
2847	uBenchDst = uDstIn; \
2848	pfn(fEflIn, &uBenchDst, cShift); \
2849	} \
2850	return RTTimeNanoTS() - nsStart; \
2851	} \
2852	\
2853	static void ShiftU ## a_cBits ## Test(void) \
2854	{ \
2855	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2856	{ \
2857	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2858	continue; \
2859	PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2860	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2861	uint32_t const cTests = a_aSubTests[iFn].cTests; \
2862	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2863	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2864	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2865	{ \
2866	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2867	{ \
2868	a_uType uDst = paTests[iTest].uDstIn; \
2869	uint32_t fEflOut = pfn(paTests[iTest].fEflIn, &uDst, paTests[iTest].uMisc); \
2870	if ( uDst != paTests[iTest].uDstOut \
2871	\|\| fEflOut != paTests[iTest].fEflOut ) \
2872	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2873	iTest, iVar == 0 ? "" : "/n", \
2874	paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2875	fEflOut, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2876	EFlagsDiff(fEflOut, paTests[iTest].fEflOut)); \
2877	else \
2878	{ \
2879	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
2880	fEflOut = pfn(paTests[iTest].fEflIn, g_pu ## a_cBits, paTests[iTest].uMisc); \
2881	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2882	RTTEST_CHECK(g_hTest, fEflOut == paTests[iTest].fEflOut); \
2883	} \
2884	} \
2885	\
2886	/* Benchmark if all succeeded. */ \
2887	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
2888	{ \
2889	uint32_t const iTest = cTests / 2; \
2890	uint32_t const cIterations = EstimateIterations(_64K, ShiftU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
2891	uint64_t const cNsRealRun = ShiftU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
2892	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
2893	"%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
2894	} \
2895	\
2896	/* Next variation is native. */ \
2897	pfn = a_aSubTests[iFn].pfnNative; \
2898	} \
2899	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
2900	} \
2901	}
2902	TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2903	TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2904	TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2905	TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2906
2907	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2908	static RTEXITCODE ShiftGenerate(uint32_t cTests, const char * const * papszNameFmts)
2909	{
2910	RTEXITCODE rcExit = ShiftU8Generate(cTests, papszNameFmts);
2911	if (rcExit == RTEXITCODE_SUCCESS)
2912	rcExit = ShiftU16Generate(cTests, papszNameFmts);
2913	if (rcExit == RTEXITCODE_SUCCESS)
2914	rcExit = ShiftU32Generate(cTests, papszNameFmts);
2915	if (rcExit == RTEXITCODE_SUCCESS)
2916	rcExit = ShiftU64Generate(cTests, papszNameFmts);
2917	return rcExit;
2918	}
2919
2920	static RTEXITCODE ShiftDumpAll(const char * const * papszNameFmts)
2921	{
2922	RTEXITCODE rcExit = ShiftU8DumpAll(papszNameFmts);
2923	if (rcExit == RTEXITCODE_SUCCESS)
2924	rcExit = ShiftU16DumpAll(papszNameFmts);
2925	if (rcExit == RTEXITCODE_SUCCESS)
2926	rcExit = ShiftU32DumpAll(papszNameFmts);
2927	if (rcExit == RTEXITCODE_SUCCESS)
2928	rcExit = ShiftU64DumpAll(papszNameFmts);
2929	return rcExit;
2930	}
2931	#endif
2932
2933	static void ShiftTest(void)
2934	{
2935	ShiftU8Test();
2936	ShiftU16Test();
2937	ShiftU32Test();
2938	ShiftU64Test();
2939	}
2940
2941
2942	/*
2943	* Multiplication and division.
2944	*
2945	* Note! The 8-bit functions has a different format, so we need to duplicate things.
2946	* Note! Currently ignoring undefined bits.
2947	*/
2948
2949	/* U8 */
2950	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2951	static const MULDIVU8_TEST_T g_aFixedTests_idiv_u8[] =
2952	{
2953	/* efl in, efl out, uDstIn, uDstOut, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
2954	{ UINT32_MAX, 0, 0x8000, 0, 0xc7, -1 }, /* -32768 / -57 = #DE (574.8771929824...) */
2955	{ UINT32_MAX, 0, 0x8000, 0, 0xdd, -128 }, /* -32768 / -35 = #DE (936.2285714285...) */
2956	{ UINT32_MAX, 0, 0x7f00, 0, 0x7f, -1 }, /* 0x7f00 / 0x7f = #DE (0x100) */
2957	{ UINT32_MAX, 0, 0x3f80, 0, 0x7f, -1 }, /* 0x3F80 / 0x7f = #DE (0x80) */
2958	{ UINT32_MAX, 0, 0x3f7f, 0, 0x7f, 0 }, /* 0x3F7F / 0x7f = 127.992125984... */
2959	{ UINT32_MAX, 0, 0xc000, 0, 0x80, -1 }, /* -16384 / -128 = #DE (0x80) */
2960	{ UINT32_MAX, 0, 0xc001, 0, 0x80, 0 }, /* -16383 / -128 = 127.9921875 */
2961	};
2962	#endif
2963	TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2964	static INT_MULDIV_U8_T g_aMulDivU8[] =
2965	{
2966	ENTRY_BIN_AMD_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
2967	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
2968	ENTRY_BIN_INTEL_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
2969	ENTRY_BIN_AMD_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
2970	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
2971	ENTRY_BIN_INTEL_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
2972	ENTRY_BIN_AMD_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2973	ENTRY_BIN_INTEL_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2974	ENTRY_BIN_FIX_AMD_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2975	ENTRY_BIN_FIX_INTEL_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2976	};
2977
2978	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2979	DUMP_ALL_FN(MulDivU8, g_aMulDivU8)
2980	static RTEXITCODE MulDivU8Generate(uint32_t cTests, const char * const * papszNameFmts)
2981	{
2982	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2983	{
2984	if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2985	&& g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2986	continue;
2987	IEMBINARYOUTPUT BinOut; \
2988	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aMulDivU8[iFn]), RTEXITCODE_FAILURE); \
2989	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2990	{
2991	MULDIVU8_TEST_T Test;
2992	Test.fEflIn = RandEFlags();
2993	Test.uDstIn = RandU16Dst(iTest);
2994	Test.uDstOut = Test.uDstIn;
2995	Test.uSrcIn = RandU8Src(iTest);
2996	uint32_t const fEflRet = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.fEflIn);
2997	Test.fEflOut = fEflRet ? fEflRet : Test.fEflIn;
2998	Test.rc = fEflRet ? 0 : -1;
2999	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3000	}
3001	for (uint32_t iTest = 0; iTest < g_aMulDivU8[iFn].cFixedTests; iTest++)
3002	{
3003	MULDIVU8_TEST_T Test;
3004	Test.fEflIn = g_aMulDivU8[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags()
3005	: g_aMulDivU8[iFn].paFixedTests[iTest].fEflIn;
3006	Test.uDstIn = g_aMulDivU8[iFn].paFixedTests[iTest].uDstIn;
3007	Test.uDstOut = Test.uDstIn;
3008	Test.uSrcIn = g_aMulDivU8[iFn].paFixedTests[iTest].uSrcIn;
3009	uint32_t const fEflRet = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.fEflIn);
3010	Test.fEflOut = fEflRet ? fEflRet : Test.fEflIn;
3011	Test.rc = fEflRet ? 0 : -1;
3012	if (g_aMulDivU8[iFn].paFixedTests[iTest].rc == 0 \|\| g_aMulDivU8[iFn].paFixedTests[iTest].rc == -1)
3013	Test.rc = g_aMulDivU8[iFn].paFixedTests[iTest].rc;
3014	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3015	}
3016	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3017	}
3018	return RTEXITCODE_SUCCESS;
3019	}
3020	#endif
3021
3022	static uint64_t MulDivU8Bench(uint32_t cIterations, PFNIEMAIMPLMULDIVU8 pfn, MULDIVU8_TEST_T const *pEntry)
3023	{
3024	uint32_t const fEflIn = pEntry->fEflIn;
3025	uint16_t const uDstIn = pEntry->uDstIn;
3026	uint8_t const uSrcIn = pEntry->uSrcIn;
3027	cIterations /= 4;
3028	RTThreadYield();
3029	uint64_t const nsStart = RTTimeNanoTS();
3030	for (uint32_t i = 0; i < cIterations; i++)
3031	{
3032	uint16_t uBenchDst = uDstIn;
3033	pfn(&uBenchDst, uSrcIn, fEflIn);
3034
3035	uBenchDst = uDstIn;
3036	pfn(&uBenchDst, uSrcIn, fEflIn);
3037
3038	uBenchDst = uDstIn;
3039	pfn(&uBenchDst, uSrcIn, fEflIn);
3040
3041	uBenchDst = uDstIn;
3042	pfn(&uBenchDst, uSrcIn, fEflIn);
3043	}
3044	return RTTimeNanoTS() - nsStart;
3045	}
3046
3047	static void MulDivU8Test(void)
3048	{
3049	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
3050	{
3051	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aMulDivU8[iFn]))
3052	continue;
3053	MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
3054	uint32_t const cTests = g_aMulDivU8[iFn].cTests;
3055	uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
3056	PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
3057	uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]);
3058	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3059	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3060	{
3061	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
3062	{
3063	uint16_t uDst = paTests[iTest].uDstIn;
3064	uint32_t fEfl = pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].fEflIn);
3065	int rc = fEfl ? 0 : -1;
3066	fEfl = fEfl ? fEfl : paTests[iTest].fEflIn;
3067	if ( uDst != paTests[iTest].uDstOut
3068	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)
3069	\|\| rc != paTests[iTest].rc)
3070	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
3071	" %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
3072	"%sexpected %#08x %#06RX16 %d%s\n",
3073	iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
3074	iVar ? " " : "", fEfl, uDst, rc,
3075	iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
3076	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn));
3077	else
3078	{
3079	*g_pu16 = paTests[iTest].uDstIn;
3080	fEfl = pfn(g_pu16, paTests[iTest].uSrcIn, paTests[iTest].fEflIn);
3081	rc = fEfl ? 0 : -1;
3082	fEfl = fEfl ? fEfl : paTests[iTest].fEflIn;
3083	RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
3084	RTTEST_CHECK(g_hTest, (fEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn));
3085	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
3086	}
3087	}
3088
3089	/* Benchmark if all succeeded. */
3090	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0)
3091	{
3092	uint32_t const iTest = cTests / 2;
3093	uint32_t const cIterations = EstimateIterations(_64K, MulDivU8Bench(_64K, pfn, &paTests[iTest]));
3094	uint64_t const cNsRealRun = MulDivU8Bench(cIterations, pfn, &paTests[iTest]);
3095	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL,
3096	"%s%s", g_aMulDivU8[iFn].pszName, iVar ? "-native" : "");
3097	}
3098
3099	/* Next variation is native. */
3100	pfn = g_aMulDivU8[iFn].pfnNative;
3101	}
3102	FREE_DECOMPRESSED_TESTS(g_aMulDivU8[iFn]);
3103	}
3104	}
3105
3106	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3107	static const MULDIVU16_TEST_T g_aFixedTests_idiv_u16[] =
3108	{
3109	/* low high */
3110	/* --- eflags ---, -- uDst1 --, -- uDst2 --, */
3111	/* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
3112	{ UINT32_MAX, 0, 0x0000, 0, 0x8000, 0, 0xc004, -1 }, /* -2147483648 /-16380 = #DE (131104.00781...) */
3113	{ UINT32_MAX, 0, 0xffff, 0, 0x7fff, 0, 0x7fff, -1 }, /* 2147483647 / 32767 = #DE (65538.000030...) */
3114	{ UINT32_MAX, 0, 0x8000, 0, 0x3fff, 0, 0x7fff, -1 }, /* 0x3fff8000 / 0x7fff = #DE (0x8000) */
3115	{ UINT32_MAX, 0, 0x7fff, 0, 0x3fff, 0, 0x7fff, 0 }, /* 0x3fff7fff / 0x7fff = 32767.99996948... */
3116	{ UINT32_MAX, 0, 0x0000, 0, 0xc000, 0, 0x8000, -1 }, /* -1073741824 / -32768 = #DE (0x8000) */
3117	{ UINT32_MAX, 0, 0x0001, 0, 0xc000, 0, 0x8000, 0 }, /* -1073741823 / -32768 = 32767.999969482421875 */
3118	};
3119
3120	static const MULDIVU32_TEST_T g_aFixedTests_idiv_u32[] =
3121	{
3122	/* low high */
3123	/* --- eflags ---, ---- uDst1 ----, ---- uDst2 ----, */
3124	/* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
3125	{ UINT32_MAX, 0, 0x00000000, 0, 0x80000000, 0, 0xc0000004, -1 },
3126	{ UINT32_MAX, 0, 0xffffffff, 0, 0x7fffffff, 0, 0x7fffffff, -1 },
3127	{ UINT32_MAX, 0, 0x80000000, 0, 0x3fffffff, 0, 0x7fffffff, -1 },
3128	{ UINT32_MAX, 0, 0x7fffffff, 0, 0x3fffffff, 0, 0x7fffffff, 0 },
3129	{ UINT32_MAX, 0, 0x00000000, 0, 0xc0000000, 0, 0x80000000, -1 },
3130	{ UINT32_MAX, 0, 0x00000001, 0, 0xc0000000, 0, 0x80000000, 0 },
3131	};
3132
3133	static const MULDIVU64_TEST_T g_aFixedTests_idiv_u64[] =
3134	{
3135	/* low high */
3136	/* --- eflags ---, -------- uDst1 --------, -------- uDst2 --------, */
3137	/* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
3138	{ UINT32_MAX, 0, 0x0000000000000000, 0, 0x8000000000000000, 0, 0xc000000000000004, -1 },
3139	{ UINT32_MAX, 0, 0xffffffffffffffff, 0, 0x7fffffffffffffff, 0, 0x7fffffffffffffff, -1 },
3140	{ UINT32_MAX, 0, 0x8000000000000000, 0, 0x3fffffffffffffff, 0, 0x7fffffffffffffff, -1 },
3141	{ UINT32_MAX, 0, 0x7fffffffffffffff, 0, 0x3fffffffffffffff, 0, 0x7fffffffffffffff, 0 },
3142	{ UINT32_MAX, 0, 0x0000000000000000, 0, 0xc000000000000000, 0, 0x8000000000000000, -1 },
3143	{ UINT32_MAX, 0, 0x0000000000000001, 0, 0xc000000000000000, 0, 0x8000000000000000, 0 },
3144	};
3145
3146	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
3147	DUMP_ALL_FN(MulDivU ## a_cBits, a_aSubTests) \
3148	static RTEXITCODE MulDivU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
3149	{ \
3150	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3151	{ \
3152	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
3153	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3154	continue; \
3155	IEMBINARYOUTPUT BinOut; \
3156	a_TestType Test; \
3157	RT_ZERO(Test); /* 64-bit variant contains alignment padding */ \
3158	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3159	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
3160	{ \
3161	Test.fEflIn = RandEFlags(); \
3162	Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
3163	Test.uDst1Out = Test.uDst1In; \
3164	Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
3165	Test.uDst2Out = Test.uDst2In; \
3166	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
3167	uint32_t const fEflRet = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, Test.fEflIn); \
3168	Test.fEflOut = fEflRet ? fEflRet : Test.fEflIn; \
3169	Test.rc = fEflRet ? 0 : -1; \
3170	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3171	} \
3172	for (uint32_t iTest = 0; iTest < a_aSubTests[iFn].cFixedTests; iTest++ ) \
3173	{ \
3174	Test.fEflIn = a_aSubTests[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
3175	: a_aSubTests[iFn].paFixedTests[iTest].fEflIn; \
3176	Test.uDst1In = a_aSubTests[iFn].paFixedTests[iTest].uDst1In; \
3177	Test.uDst1Out = Test.uDst1In; \
3178	Test.uDst2In = a_aSubTests[iFn].paFixedTests[iTest].uDst2In; \
3179	Test.uDst2Out = Test.uDst2In; \
3180	Test.uSrcIn = a_aSubTests[iFn].paFixedTests[iTest].uSrcIn; \
3181	uint32_t const fEflRet = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, Test.fEflIn); \
3182	Test.fEflOut = fEflRet ? fEflRet : Test.fEflIn; \
3183	Test.rc = fEflRet ? 0 : -1; \
3184	if (a_aSubTests[iFn].paFixedTests[iTest].rc == 0 \|\| a_aSubTests[iFn].paFixedTests[iTest].rc == -1) \
3185	Test.rc = a_aSubTests[iFn].paFixedTests[iTest].rc; \
3186	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3187	} \
3188	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3189	} \
3190	return RTEXITCODE_SUCCESS; \
3191	}
3192	#else
3193	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
3194	#endif
3195
3196	#define TEST_MULDIV(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
3197	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
3198	static a_SubTestType a_aSubTests [] = \
3199	{ \
3200	ENTRY_BIN_AMD_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
3201	ENTRY_BIN_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
3202	ENTRY_BIN_AMD_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
3203	ENTRY_BIN_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
3204	ENTRY_BIN_AMD_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
3205	ENTRY_BIN_INTEL_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
3206	ENTRY_BIN_FIX_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
3207	ENTRY_BIN_FIX_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
3208	}; \
3209	\
3210	GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
3211	\
3212	static uint64_t MulDivU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLMULDIVU ## a_cBits pfn, a_TestType const *pEntry) \
3213	{ \
3214	uint32_t const fEflIn = pEntry->fEflIn; \
3215	a_uType const uDst1In = pEntry->uDst1In; \
3216	a_uType const uDst2In = pEntry->uDst2In; \
3217	a_uType const uSrcIn = pEntry->uSrcIn; \
3218	cIterations /= 4; \
3219	RTThreadYield(); \
3220	uint64_t const nsStart = RTTimeNanoTS(); \
3221	for (uint32_t i = 0; i < cIterations; i++) \
3222	{ \
3223	a_uType uBenchDst1 = uDst1In; \
3224	a_uType uBenchDst2 = uDst2In; \
3225	pfn(&uBenchDst1, &uBenchDst2, uSrcIn, fEflIn); \
3226	\
3227	uBenchDst1 = uDst1In; \
3228	uBenchDst2 = uDst2In; \
3229	pfn(&uBenchDst1, &uBenchDst2, uSrcIn, fEflIn); \
3230	\
3231	uBenchDst1 = uDst1In; \
3232	uBenchDst2 = uDst2In; \
3233	pfn(&uBenchDst1, &uBenchDst2, uSrcIn, fEflIn); \
3234	\
3235	uBenchDst1 = uDst1In; \
3236	uBenchDst2 = uDst2In; \
3237	pfn(&uBenchDst1, &uBenchDst2, uSrcIn, fEflIn); \
3238	} \
3239	return RTTimeNanoTS() - nsStart; \
3240	} \
3241	\
3242	static void MulDivU ## a_cBits ## Test(void) \
3243	{ \
3244	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3245	{ \
3246	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3247	continue; \
3248	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3249	uint32_t const cTests = a_aSubTests[iFn].cTests; \
3250	uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
3251	PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3252	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3253	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3254	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3255	{ \
3256	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
3257	{ \
3258	a_uType uDst1 = paTests[iTest].uDst1In; \
3259	a_uType uDst2 = paTests[iTest].uDst2In; \
3260	uint32_t fEfl = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, paTests[iTest].fEflIn); \
3261	int rc = fEfl ? 0 : -1; \
3262	fEfl = fEfl ? fEfl : paTests[iTest].fEflIn; \
3263	if ( uDst1 != paTests[iTest].uDst1Out \
3264	\|\| uDst2 != paTests[iTest].uDst2Out \
3265	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)\
3266	\|\| rc != paTests[iTest].rc) \
3267	RTTestFailed(g_hTest, "#%04u%s: efl=%#010x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
3268	" -> efl=%#010x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
3269	" expected %#010x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
3270	iTest, iVar == 0 ? " " : "/n", \
3271	paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
3272	fEfl, uDst1, uDst2, rc, \
3273	paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
3274	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn), \
3275	uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
3276	(fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn) ? " eflags" : ""); \
3277	else \
3278	{ \
3279	*g_pu ## a_cBits = paTests[iTest].uDst1In; \
3280	*g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
3281	fEfl = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, paTests[iTest].fEflIn); \
3282	rc = fEfl ? 0 : -1; \
3283	fEfl = fEfl ? fEfl : paTests[iTest].fEflIn; \
3284	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
3285	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
3286	RTTEST_CHECK(g_hTest, (fEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn)); \
3287	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
3288	} \
3289	} \
3290	\
3291	/* Benchmark if all succeeded. */ \
3292	if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
3293	{ \
3294	uint32_t const iTest = cTests / 2; \
3295	uint32_t const cIterations = EstimateIterations(_64K, MulDivU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
3296	uint64_t const cNsRealRun = MulDivU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
3297	RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
3298	"%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
3299	} \
3300	\
3301	/* Next variation is native. */ \
3302	pfn = a_aSubTests[iFn].pfnNative; \
3303	} \
3304	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3305	} \
3306	} //1068553096 = 0x3FB0D388 (1068553096)
3307	TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
3308	TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
3309	TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
3310
3311	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3312	static RTEXITCODE MulDivGenerate(uint32_t cTests, const char * const * papszNameFmts)
3313	{
3314	RTEXITCODE rcExit = MulDivU8Generate(cTests, papszNameFmts);
3315	if (rcExit == RTEXITCODE_SUCCESS)
3316	rcExit = MulDivU16Generate(cTests, papszNameFmts);
3317	if (rcExit == RTEXITCODE_SUCCESS)
3318	rcExit = MulDivU32Generate(cTests, papszNameFmts);
3319	if (rcExit == RTEXITCODE_SUCCESS)
3320	rcExit = MulDivU64Generate(cTests, papszNameFmts);
3321	return rcExit;
3322	}
3323
3324	static RTEXITCODE MulDivDumpAll(const char * const * papszNameFmts)
3325	{
3326	RTEXITCODE rcExit = MulDivU8DumpAll(papszNameFmts);
3327	if (rcExit == RTEXITCODE_SUCCESS)
3328	rcExit = MulDivU16DumpAll(papszNameFmts);
3329	if (rcExit == RTEXITCODE_SUCCESS)
3330	rcExit = MulDivU32DumpAll(papszNameFmts);
3331	if (rcExit == RTEXITCODE_SUCCESS)
3332	rcExit = MulDivU64DumpAll(papszNameFmts);
3333	return rcExit;
3334	}
3335	#endif
3336
3337	static void MulDivTest(void)
3338	{
3339	MulDivU8Test();
3340	MulDivU16Test();
3341	MulDivU32Test();
3342	MulDivU64Test();
3343	}
3344
3345
3346	/*
3347	* BSWAP
3348	*/
3349	static void BswapTest(void)
3350	{
3351	if (SubTestAndCheckIfEnabled("bswap_u16"))
3352	{
3353	*g_pu32 = UINT32_C(0x12345678);
3354	iemAImpl_bswap_u16(g_pu32);
3355	#if 0
3356	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12347856), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
3357	#else
3358	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12340000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
3359	#endif
3360	*g_pu32 = UINT32_C(0xffff1122);
3361	iemAImpl_bswap_u16(g_pu32);
3362	#if 0
3363	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff2211), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
3364	#else
3365	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff0000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
3366	#endif
3367	}
3368
3369	if (SubTestAndCheckIfEnabled("bswap_u32"))
3370	{
3371	*g_pu32 = UINT32_C(0x12345678);
3372	iemAImpl_bswap_u32(g_pu32);
3373	RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
3374	}
3375
3376	if (SubTestAndCheckIfEnabled("bswap_u64"))
3377	{
3378	*g_pu64 = UINT64_C(0x0123456789abcdef);
3379	iemAImpl_bswap_u64(g_pu64);
3380	RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
3381	}
3382	}
3383
3384
3385
3386	/*********************************************************************************************************************************
3387	* Floating point (x87 style) *
3388	*********************************************************************************************************************************/
3389
3390	/*
3391	* FPU constant loading.
3392	*/
3393	TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
3394
3395	static FPU_LD_CONST_T g_aFpuLdConst[] =
3396	{
3397	ENTRY_BIN(fld1),
3398	ENTRY_BIN(fldl2t),
3399	ENTRY_BIN(fldl2e),
3400	ENTRY_BIN(fldpi),
3401	ENTRY_BIN(fldlg2),
3402	ENTRY_BIN(fldln2),
3403	ENTRY_BIN(fldz),
3404	};
3405
3406	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3407	static RTEXITCODE FpuLdConstGenerate(uint32_t cTests, const char * const *papszNameFmts)
3408	{
3409	X86FXSTATE State;
3410	RT_ZERO(State);
3411	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
3412	{
3413	IEMBINARYOUTPUT BinOut;
3414	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuLdConst[iFn]), RTEXITCODE_FAILURE);
3415	for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
3416	{
3417	State.FCW = RandFcw();
3418	State.FSW = RandFsw();
3419
3420	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3421	{
3422	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3423	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
3424	g_aFpuLdConst[iFn].pfn(&State, &Res);
3425	FPU_LD_CONST_TEST_T const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result };
3426	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3427	}
3428	}
3429	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3430	}
3431	return RTEXITCODE_SUCCESS;
3432	}
3433	DUMP_ALL_FN(FpuLdConst, g_aFpuLdConst)
3434	#endif
3435
3436	static void FpuLdConstTest(void)
3437	{
3438	/*
3439	* Inputs:
3440	* - FSW: C0, C1, C2, C3
3441	* - FCW: Exception masks, Precision control, Rounding control.
3442	*
3443	* C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
3444	*/
3445	X86FXSTATE State;
3446	RT_ZERO(State);
3447	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
3448	{
3449	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdConst[iFn]))
3450	continue;
3451
3452	FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
3453	uint32_t const cTests = g_aFpuLdConst[iFn].cTests;
3454	PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
3455	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
3456	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3457	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3458	{
3459	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3460	{
3461	State.FCW = paTests[iTest].fFcw;
3462	State.FSW = paTests[iTest].fFswIn;
3463	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3464	pfn(&State, &Res);
3465	if ( Res.FSW != paTests[iTest].fFswOut
3466	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
3467	RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
3468	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3469	Res.FSW, FormatR80(&Res.r80Result),
3470	paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
3471	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3472	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
3473	FormatFcw(paTests[iTest].fFcw) );
3474	}
3475	pfn = g_aFpuLdConst[iFn].pfnNative;
3476	}
3477
3478	FREE_DECOMPRESSED_TESTS(g_aFpuLdConst[iFn]);
3479	}
3480	}
3481
3482
3483	/*
3484	* Load floating point values from memory.
3485	*/
3486	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3487	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
3488	static RTEXITCODE FpuLdR ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3489	{ \
3490	X86FXSTATE State; \
3491	RT_ZERO(State); \
3492	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3493	{ \
3494	IEMBINARYOUTPUT BinOut; \
3495	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3496	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3497	{ \
3498	State.FCW = RandFcw(); \
3499	State.FSW = RandFsw(); \
3500	a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
3501	\
3502	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3503	{ \
3504	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3505	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
3506	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3507	a_TestType const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result, InVal }; \
3508	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3509	} \
3510	} \
3511	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3512	} \
3513	return RTEXITCODE_SUCCESS; \
3514	} \
3515	DUMP_ALL_FN(FpuLdR ## a_cBits, a_aSubTests)
3516	#else
3517	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
3518	#endif
3519
3520	#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
3521	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
3522	typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
3523	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
3524	\
3525	static a_SubTestType a_aSubTests[] = \
3526	{ \
3527	ENTRY_BIN(RT_CONCAT(fld_r80_from_r,a_cBits)) \
3528	}; \
3529	GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
3530	\
3531	static void FpuLdR ## a_cBits ## Test(void) \
3532	{ \
3533	X86FXSTATE State; \
3534	RT_ZERO(State); \
3535	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3536	{ \
3537	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3538	continue; \
3539	\
3540	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3541	uint32_t const cTests = a_aSubTests[iFn].cTests; \
3542	PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3543	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3544	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3545	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3546	{ \
3547	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3548	{ \
3549	a_rdTypeIn const InVal = paTests[iTest].InVal; \
3550	State.FCW = paTests[iTest].fFcw; \
3551	State.FSW = paTests[iTest].fFswIn; \
3552	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3553	pfn(&State, &Res, &InVal); \
3554	if ( Res.FSW != paTests[iTest].fFswOut \
3555	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3556	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3557	"%s -> fsw=%#06x %s\n" \
3558	"%s expected %#06x %s%s%s (%s)\n", \
3559	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3560	FormatR ## a_cBits(&paTests[iTest].InVal), \
3561	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3562	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3563	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3564	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3565	FormatFcw(paTests[iTest].fFcw) ); \
3566	} \
3567	pfn = a_aSubTests[iFn].pfnNative; \
3568	} \
3569	\
3570	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3571	} \
3572	}
3573
3574	TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
3575	TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
3576	TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
3577
3578	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3579	static RTEXITCODE FpuLdMemGenerate(uint32_t cTests, const char * const *papszNameFmts)
3580	{
3581	RTEXITCODE rcExit = FpuLdR80Generate(cTests, papszNameFmts);
3582	if (rcExit == RTEXITCODE_SUCCESS)
3583	rcExit = FpuLdR64Generate(cTests, papszNameFmts);
3584	if (rcExit == RTEXITCODE_SUCCESS)
3585	rcExit = FpuLdR32Generate(cTests, papszNameFmts);
3586	return rcExit;
3587	}
3588
3589	static RTEXITCODE FpuLdMemDumpAll(const char * const *papszNameFmts)
3590	{
3591	RTEXITCODE rcExit = FpuLdR80DumpAll(papszNameFmts);
3592	if (rcExit == RTEXITCODE_SUCCESS)
3593	rcExit = FpuLdR64DumpAll(papszNameFmts);
3594	if (rcExit == RTEXITCODE_SUCCESS)
3595	rcExit = FpuLdR32DumpAll(papszNameFmts);
3596	return rcExit;
3597	}
3598	#endif
3599
3600	static void FpuLdMemTest(void)
3601	{
3602	FpuLdR80Test();
3603	FpuLdR64Test();
3604	FpuLdR32Test();
3605	}
3606
3607
3608	/*
3609	* Load integer values from memory.
3610	*/
3611	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3612	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3613	static RTEXITCODE FpuLdI ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3614	{ \
3615	X86FXSTATE State; \
3616	RT_ZERO(State); \
3617	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3618	{ \
3619	IEMBINARYOUTPUT BinOut; \
3620	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3621	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3622	{ \
3623	State.FCW = RandFcw(); \
3624	State.FSW = RandFsw(); \
3625	a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
3626	\
3627	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3628	{ \
3629	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3630	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
3631	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3632	a_TestType const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result }; \
3633	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3634	} \
3635	} \
3636	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3637	} \
3638	return RTEXITCODE_SUCCESS; \
3639	} \
3640	DUMP_ALL_FN(FpuLdI ## a_cBits, a_aSubTests)
3641	#else
3642	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
3643	#endif
3644
3645	#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
3646	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
3647	typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
3648	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
3649	\
3650	static a_SubTestType a_aSubTests[] = \
3651	{ \
3652	ENTRY_BIN(RT_CONCAT(fild_r80_from_i,a_cBits)) \
3653	}; \
3654	GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3655	\
3656	static void FpuLdI ## a_cBits ## Test(void) \
3657	{ \
3658	X86FXSTATE State; \
3659	RT_ZERO(State); \
3660	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3661	{ \
3662	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3663	continue; \
3664	\
3665	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3666	uint32_t const cTests = a_aSubTests[iFn].cTests; \
3667	PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3668	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3669	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3670	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3671	{ \
3672	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3673	{ \
3674	a_iTypeIn const iInVal = paTests[iTest].iInVal; \
3675	State.FCW = paTests[iTest].fFcw; \
3676	State.FSW = paTests[iTest].fFswIn; \
3677	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3678	pfn(&State, &Res, &iInVal); \
3679	if ( Res.FSW != paTests[iTest].fFswOut \
3680	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3681	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
3682	"%s -> fsw=%#06x %s\n" \
3683	"%s expected %#06x %s%s%s (%s)\n", \
3684	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
3685	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3686	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3687	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3688	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3689	FormatFcw(paTests[iTest].fFcw) ); \
3690	} \
3691	pfn = a_aSubTests[iFn].pfnNative; \
3692	} \
3693	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3694	} \
3695	}
3696
3697	TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
3698	TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
3699	TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
3700
3701	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3702	static RTEXITCODE FpuLdIntGenerate(uint32_t cTests, const char * const *papszNameFmts)
3703	{
3704	RTEXITCODE rcExit = FpuLdI64Generate(cTests, papszNameFmts);
3705	if (rcExit == RTEXITCODE_SUCCESS)
3706	rcExit = FpuLdI32Generate(cTests, papszNameFmts);
3707	if (rcExit == RTEXITCODE_SUCCESS)
3708	rcExit = FpuLdI16Generate(cTests, papszNameFmts);
3709	return rcExit;
3710	}
3711
3712	static RTEXITCODE FpuLdIntDumpAll(const char * const *papszNameFmts)
3713	{
3714	RTEXITCODE rcExit = FpuLdI64DumpAll(papszNameFmts);
3715	if (rcExit == RTEXITCODE_SUCCESS)
3716	rcExit = FpuLdI32DumpAll(papszNameFmts);
3717	if (rcExit == RTEXITCODE_SUCCESS)
3718	rcExit = FpuLdI16DumpAll(papszNameFmts);
3719	return rcExit;
3720	}
3721	#endif
3722
3723	static void FpuLdIntTest(void)
3724	{
3725	FpuLdI64Test();
3726	FpuLdI32Test();
3727	FpuLdI16Test();
3728	}
3729
3730
3731	/*
3732	* Load binary coded decimal values from memory.
3733	*/
3734	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
3735	typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
3736	TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
3737
3738	static FPU_LD_D80_T g_aFpuLdD80[] =
3739	{
3740	ENTRY_BIN(fld_r80_from_d80)
3741	};
3742
3743	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3744	static RTEXITCODE FpuLdD80Generate(uint32_t cTests, const char * const *papszNameFmts)
3745	{
3746	X86FXSTATE State;
3747	RT_ZERO(State);
3748	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3749	{
3750	IEMBINARYOUTPUT BinOut;
3751	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuLdD80[iFn]), RTEXITCODE_FAILURE);
3752	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3753	{
3754	State.FCW = RandFcw();
3755	State.FSW = RandFsw();
3756	RTPBCD80U InVal = RandD80Src(iTest);
3757
3758	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3759	{
3760	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3761	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
3762	g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
3763	FPU_D80_IN_TEST_T const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result, InVal };
3764	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3765	}
3766	}
3767	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3768	}
3769	return RTEXITCODE_SUCCESS;
3770	}
3771	DUMP_ALL_FN(FpuLdD80, g_aFpuLdD80)
3772	#endif
3773
3774	static void FpuLdD80Test(void)
3775	{
3776	X86FXSTATE State;
3777	RT_ZERO(State);
3778	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3779	{
3780	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdD80[iFn]))
3781	continue;
3782
3783	FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
3784	uint32_t const cTests = g_aFpuLdD80[iFn].cTests;
3785	PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
3786	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
3787	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3788	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3789	{
3790	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3791	{
3792	RTPBCD80U const InVal = paTests[iTest].InVal;
3793	State.FCW = paTests[iTest].fFcw;
3794	State.FSW = paTests[iTest].fFswIn;
3795	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3796	pfn(&State, &Res, &InVal);
3797	if ( Res.FSW != paTests[iTest].fFswOut
3798	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
3799	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
3800	"%s -> fsw=%#06x %s\n"
3801	"%s expected %#06x %s%s%s (%s)\n",
3802	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3803	FormatD80(&paTests[iTest].InVal),
3804	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3805	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
3806	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3807	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
3808	FormatFcw(paTests[iTest].fFcw) );
3809	}
3810	pfn = g_aFpuLdD80[iFn].pfnNative;
3811	}
3812
3813	FREE_DECOMPRESSED_TESTS(g_aFpuLdD80[iFn]);
3814	}
3815	}
3816
3817
3818	/*
3819	* Store values floating point values to memory.
3820	*/
3821	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3822	static const RTFLOAT80U g_aFpuStR32Specials[] =
3823	{
3824	RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3825	RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3826	RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3827	RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3828	};
3829	static const RTFLOAT80U g_aFpuStR64Specials[] =
3830	{
3831	RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3832	RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3833	RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3834	RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3835	RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
3836	};
3837	static const RTFLOAT80U g_aFpuStR80Specials[] =
3838	{
3839	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
3840	};
3841	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3842	static RTEXITCODE FpuStR ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3843	{ \
3844	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
3845	X86FXSTATE State; \
3846	RT_ZERO(State); \
3847	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3848	{ \
3849	IEMBINARYOUTPUT BinOut; \
3850	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3851	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3852	{ \
3853	uint16_t const fFcw = RandFcw(); \
3854	State.FSW = RandFsw(); \
3855	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
3856	: g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
3857	\
3858	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3859	{ \
3860	/* PC doesn't influence these, so leave as is. */ \
3861	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3862	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
3863	{ \
3864	uint16_t uFswOut = 0; \
3865	a_rdType OutVal; \
3866	RT_ZERO(OutVal); \
3867	memset(&OutVal, 0xfe, sizeof(OutVal)); \
3868	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
3869	\| (iRounding << X86_FCW_RC_SHIFT); \
3870	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
3871	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
3872	a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
3873	a_TestType const Test = { State.FCW, State.FSW, uFswOut, InVal, OutVal }; \
3874	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3875	} \
3876	} \
3877	} \
3878	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3879	} \
3880	return RTEXITCODE_SUCCESS; \
3881	} \
3882	DUMP_ALL_FN(FpuStR ## a_cBits, a_aSubTests)
3883	#else
3884	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
3885	#endif
3886
3887	#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
3888	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
3889	PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
3890	typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
3891	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
3892	\
3893	static a_SubTestType a_aSubTests[] = \
3894	{ \
3895	ENTRY_BIN(RT_CONCAT(fst_r80_to_r,a_cBits)) \
3896	}; \
3897	GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3898	\
3899	static void FpuStR ## a_cBits ## Test(void) \
3900	{ \
3901	X86FXSTATE State; \
3902	RT_ZERO(State); \
3903	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3904	{ \
3905	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3906	continue; \
3907	\
3908	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3909	uint32_t const cTests = a_aSubTests[iFn].cTests; \
3910	PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3911	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3912	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3913	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3914	{ \
3915	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3916	{ \
3917	RTFLOAT80U const InVal = paTests[iTest].InVal; \
3918	uint16_t uFswOut = 0; \
3919	a_rdType OutVal; \
3920	RT_ZERO(OutVal); \
3921	memset(&OutVal, 0xfe, sizeof(OutVal)); \
3922	State.FCW = paTests[iTest].fFcw; \
3923	State.FSW = paTests[iTest].fFswIn; \
3924	pfn(&State, &uFswOut, &OutVal, &InVal); \
3925	if ( uFswOut != paTests[iTest].fFswOut \
3926	\|\| !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
3927	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3928	"%s -> fsw=%#06x %s\n" \
3929	"%s expected %#06x %s%s%s (%s)\n", \
3930	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3931	FormatR80(&paTests[iTest].InVal), \
3932	iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
3933	iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
3934	FswDiff(uFswOut, paTests[iTest].fFswOut), \
3935	!RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
3936	FormatFcw(paTests[iTest].fFcw) ); \
3937	} \
3938	pfn = a_aSubTests[iFn].pfnNative; \
3939	} \
3940	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3941	} \
3942	}
3943
3944	TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
3945	TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
3946	TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
3947
3948	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3949	static RTEXITCODE FpuStMemGenerate(uint32_t cTests, const char * const *papszNameFmts)
3950	{
3951	RTEXITCODE rcExit = FpuStR80Generate(cTests, papszNameFmts);
3952	if (rcExit == RTEXITCODE_SUCCESS)
3953	rcExit = FpuStR64Generate(cTests, papszNameFmts);
3954	if (rcExit == RTEXITCODE_SUCCESS)
3955	rcExit = FpuStR32Generate(cTests, papszNameFmts);
3956	return rcExit;
3957	}
3958
3959	static RTEXITCODE FpuStMemDumpAll(const char * const *papszNameFmts)
3960	{
3961	RTEXITCODE rcExit = FpuStR80DumpAll(papszNameFmts);
3962	if (rcExit == RTEXITCODE_SUCCESS)
3963	rcExit = FpuStR64DumpAll(papszNameFmts);
3964	if (rcExit == RTEXITCODE_SUCCESS)
3965	rcExit = FpuStR32DumpAll(papszNameFmts);
3966	return rcExit;
3967	}
3968	#endif
3969
3970	static void FpuStMemTest(void)
3971	{
3972	FpuStR80Test();
3973	FpuStR64Test();
3974	FpuStR32Test();
3975	}
3976
3977
3978	/*
3979	* Store integer values to memory or register.
3980	*/
3981	TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
3982	TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
3983	TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
3984
3985	static FPU_ST_I16_T g_aFpuStI16[] =
3986	{
3987	ENTRY_BIN(fist_r80_to_i16),
3988	ENTRY_BIN_AMD( fistt_r80_to_i16, 0),
3989	ENTRY_BIN_INTEL(fistt_r80_to_i16, 0),
3990	};
3991	static FPU_ST_I32_T g_aFpuStI32[] =
3992	{
3993	ENTRY_BIN(fist_r80_to_i32),
3994	ENTRY_BIN(fistt_r80_to_i32),
3995	};
3996	static FPU_ST_I64_T g_aFpuStI64[] =
3997	{
3998	ENTRY_BIN(fist_r80_to_i64),
3999	ENTRY_BIN(fistt_r80_to_i64),
4000	};
4001
4002	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4003	static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
4004	{
4005	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
4006	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
4007	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4008	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4009	RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
4010	RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
4011	RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
4012	RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
4013	RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
4014	RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
4015	RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
4016	RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
4017	RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4018	RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4019	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
4020	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4021	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4022	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
4023	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
4024	RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4025	RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4026	RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4027	RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4028	RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
4029	RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4030	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
4031	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
4032	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
4033	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
4034	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
4035	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
4036	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
4037	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4038	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4039	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
4040	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
4041	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4042	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4043	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4044	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4045	RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
4046	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
4047	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4048	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4049	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4050	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4051	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
4052	};
4053	static const RTFLOAT80U g_aFpuStI32Specials[] =
4054	{
4055	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4056	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4057	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4058	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
4059	RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4060	RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
4061	RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4062	RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
4063	RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
4064	RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
4065	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4066	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4067	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4068	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4069	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4070	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4071	};
4072	static const RTFLOAT80U g_aFpuStI64Specials[] =
4073	{
4074	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
4075	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
4076	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
4077	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
4078	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
4079	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
4080	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4081	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
4082	RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
4083	RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
4084	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
4085	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
4086	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
4087	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
4088	RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
4089	RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
4090	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
4091	};
4092
4093	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
4094	static RTEXITCODE FpuStI ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4095	{ \
4096	X86FXSTATE State; \
4097	RT_ZERO(State); \
4098	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4099	{ \
4100	PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
4101	? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
4102	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
4103	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
4104	continue; \
4105	\
4106	IEMBINARYOUTPUT BinOut; \
4107	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4108	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
4109	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
4110	{ \
4111	uint16_t const fFcw = RandFcw(); \
4112	State.FSW = RandFsw(); \
4113	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
4114	: g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
4115	\
4116	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
4117	{ \
4118	/* PC doesn't influence these, so leave as is. */ \
4119	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
4120	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
4121	{ \
4122	uint16_t uFswOut = 0; \
4123	a_iType iOutVal = ~(a_iType)2; \
4124	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
4125	\| (iRounding << X86_FCW_RC_SHIFT); \
4126	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
4127	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
4128	pfn(&State, &uFswOut, &iOutVal, &InVal); \
4129	a_TestType const Test = { State.FCW, State.FSW, uFswOut, InVal, iOutVal }; \
4130	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4131	} \
4132	} \
4133	} \
4134	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4135	} \
4136	return RTEXITCODE_SUCCESS; \
4137	} \
4138	DUMP_ALL_FN(FpuStI ## a_cBits, a_aSubTests)
4139	#else
4140	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
4141	#endif
4142
4143	#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
4144	GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
4145	\
4146	static void FpuStI ## a_cBits ## Test(void) \
4147	{ \
4148	X86FXSTATE State; \
4149	RT_ZERO(State); \
4150	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4151	{ \
4152	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4153	continue; \
4154	\
4155	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4156	uint32_t const cTests = a_aSubTests[iFn].cTests; \
4157	PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
4158	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4159	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4160	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4161	{ \
4162	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4163	{ \
4164	RTFLOAT80U const InVal = paTests[iTest].InVal; \
4165	uint16_t uFswOut = 0; \
4166	a_iType iOutVal = ~(a_iType)2; \
4167	State.FCW = paTests[iTest].fFcw; \
4168	State.FSW = paTests[iTest].fFswIn; \
4169	pfn(&State, &uFswOut, &iOutVal, &InVal); \
4170	if ( uFswOut != paTests[iTest].fFswOut \
4171	\|\| iOutVal != paTests[iTest].iOutVal) \
4172	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
4173	"%s -> fsw=%#06x " a_szFmt "\n" \
4174	"%s expected %#06x " a_szFmt "%s%s (%s)\n", \
4175	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4176	FormatR80(&paTests[iTest].InVal), \
4177	iVar ? " " : "", uFswOut, iOutVal, \
4178	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
4179	FswDiff(uFswOut, paTests[iTest].fFswOut), \
4180	iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
4181	} \
4182	pfn = a_aSubTests[iFn].pfnNative; \
4183	} \
4184	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
4185	} \
4186	}
4187
4188	//fistt_r80_to_i16 diffs for AMD, of course :-)
4189
4190	TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
4191	TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
4192	TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
4193
4194	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4195	static RTEXITCODE FpuStIntGenerate(uint32_t cTests, const char * const *papszNameFmts)
4196	{
4197	RTEXITCODE rcExit = FpuStI64Generate(cTests, papszNameFmts);
4198	if (rcExit == RTEXITCODE_SUCCESS)
4199	rcExit = FpuStI32Generate(cTests, papszNameFmts);
4200	if (rcExit == RTEXITCODE_SUCCESS)
4201	rcExit = FpuStI16Generate(cTests, papszNameFmts);
4202	return rcExit;
4203	}
4204	static RTEXITCODE FpuStIntDumpAll(const char * const *papszNameFmts)
4205	{
4206	RTEXITCODE rcExit = FpuStI64DumpAll(papszNameFmts);
4207	if (rcExit == RTEXITCODE_SUCCESS)
4208	rcExit = FpuStI32DumpAll(papszNameFmts);
4209	if (rcExit == RTEXITCODE_SUCCESS)
4210	rcExit = FpuStI16DumpAll(papszNameFmts);
4211	return rcExit;
4212	}
4213	#endif
4214
4215	static void FpuStIntTest(void)
4216	{
4217	FpuStI64Test();
4218	FpuStI32Test();
4219	FpuStI16Test();
4220	}
4221
4222
4223	/*
4224	* Store as packed BCD value (memory).
4225	*/
4226	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
4227	typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
4228	TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
4229
4230	static FPU_ST_D80_T g_aFpuStD80[] =
4231	{
4232	ENTRY_BIN(fst_r80_to_d80),
4233	};
4234
4235	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4236	static RTEXITCODE FpuStD80Generate(uint32_t cTests, const char * const *papszNameFmts)
4237	{
4238	static RTFLOAT80U const s_aSpecials[] =
4239	{
4240	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
4241	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
4242	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
4243	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
4244	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
4245	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
4246	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
4247	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
4248	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
4249	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
4250	RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
4251	RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
4252	};
4253
4254	X86FXSTATE State;
4255	RT_ZERO(State);
4256	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
4257	{
4258	IEMBINARYOUTPUT BinOut;
4259	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuStD80[iFn]), RTEXITCODE_FAILURE);
4260	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4261	{
4262	uint16_t const fFcw = RandFcw();
4263	State.FSW = RandFsw();
4264	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
4265
4266	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4267	{
4268	/* PC doesn't influence these, so leave as is. */
4269	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
4270	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/)
4271	{
4272	uint16_t uFswOut = 0;
4273	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
4274	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM))
4275	\| (iRounding << X86_FCW_RC_SHIFT);
4276	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/
4277	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT;
4278	g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
4279	FPU_ST_D80_TEST_T const Test = { State.FCW, State.FSW, uFswOut, InVal, OutVal };
4280	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
4281	}
4282	}
4283	}
4284	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4285	}
4286	return RTEXITCODE_SUCCESS;
4287	}
4288	DUMP_ALL_FN(FpuStD80, g_aFpuStD80)
4289	#endif
4290
4291
4292	static void FpuStD80Test(void)
4293	{
4294	X86FXSTATE State;
4295	RT_ZERO(State);
4296	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
4297	{
4298	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuStD80[iFn]))
4299	continue;
4300
4301	FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
4302	uint32_t const cTests = g_aFpuStD80[iFn].cTests;
4303	PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
4304	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
4305	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4306	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4307	{
4308	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4309	{
4310	RTFLOAT80U const InVal = paTests[iTest].InVal;
4311	uint16_t uFswOut = 0;
4312	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
4313	State.FCW = paTests[iTest].fFcw;
4314	State.FSW = paTests[iTest].fFswIn;
4315	pfn(&State, &uFswOut, &OutVal, &InVal);
4316	if ( uFswOut != paTests[iTest].fFswOut
4317	\|\| !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
4318	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4319	"%s -> fsw=%#06x %s\n"
4320	"%s expected %#06x %s%s%s (%s)\n",
4321	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4322	FormatR80(&paTests[iTest].InVal),
4323	iVar ? " " : "", uFswOut, FormatD80(&OutVal),
4324	iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
4325	FswDiff(uFswOut, paTests[iTest].fFswOut),
4326	RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
4327	FormatFcw(paTests[iTest].fFcw) );
4328	}
4329	pfn = g_aFpuStD80[iFn].pfnNative;
4330	}
4331
4332	FREE_DECOMPRESSED_TESTS(g_aFpuStD80[iFn]);
4333	}
4334	}
4335
4336
4337
4338	/*********************************************************************************************************************************
4339	* x87 FPU Binary Operations *
4340	*********************************************************************************************************************************/
4341
4342	/*
4343	* Binary FPU operations on two 80-bit floating point values.
4344	*/
4345	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
4346	enum { kFpuBinaryHint_fprem = 1, };
4347
4348	static FPU_BINARY_R80_T g_aFpuBinaryR80[] =
4349	{
4350	ENTRY_BIN(fadd_r80_by_r80),
4351	ENTRY_BIN(fsub_r80_by_r80),
4352	ENTRY_BIN(fsubr_r80_by_r80),
4353	ENTRY_BIN(fmul_r80_by_r80),
4354	ENTRY_BIN(fdiv_r80_by_r80),
4355	ENTRY_BIN(fdivr_r80_by_r80),
4356	ENTRY_BIN_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
4357	ENTRY_BIN_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
4358	ENTRY_BIN(fscale_r80_by_r80),
4359	ENTRY_BIN_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
4360	ENTRY_BIN_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
4361	ENTRY_BIN_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
4362	ENTRY_BIN_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
4363	ENTRY_BIN_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
4364	ENTRY_BIN_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
4365	};
4366
4367	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4368	static RTEXITCODE FpuBinaryR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4369	{
4370	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4371
4372	static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
4373	{
4374	{ RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
4375	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4376	{ RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
4377	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4378	{ RTFLOAT80U_INIT_ZERO(0), /* minus variant */
4379	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4380	{ RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
4381	RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
4382	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
4383	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
4384	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
4385	RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4386	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
4387	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4388	/* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
4389	once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
4390	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
4391	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4392	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
4393	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
4394	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
4395	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
4396	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
4397	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
4398	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
4399	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
4400	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4401	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
4402	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
4403	RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
4404	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
4405	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
4406	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
4407	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
4408	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
4409	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
4410	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
4411	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
4412	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4413	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
4414	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4415	RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
4416	/* fscale: Negative variants for the essentials of the above. */
4417	{ RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4418	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
4419	{ RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
4420	RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
4421	{ RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4422	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
4423	{ RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4424	RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
4425	/* fscale: Some fun with denormals and pseudo-denormals. */
4426	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
4427	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4428	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
4429	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4430	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
4431	{ RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
4432	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4433	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
4434	{ RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
4435	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
4436	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4437	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
4438	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4439	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
4440	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
4441	};
4442
4443	X86FXSTATE State;
4444	RT_ZERO(State);
4445	uint32_t cMinNormalPairs = (cTests - 144) / 4;
4446	uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
4447	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
4448	{
4449	PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
4450	if ( g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
4451	&& g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4452	continue;
4453
4454	IEMBINARYOUTPUT BinOut;
4455	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuBinaryR80[iFn]), RTEXITCODE_FAILURE);
4456	uint32_t cNormalInputPairs = 0;
4457	uint32_t cTargetRangeInputs = 0;
4458	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4459	{
4460	RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
4461	RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
4462	bool fTargetRange = false;
4463	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
4464	{
4465	cNormalInputPairs++;
4466	if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
4467	&& (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
4468	cTargetRangeInputs += fTargetRange = true;
4469	else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
4470	if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
4471	{ /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
4472	InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
4473	InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
4474	cTargetRangeInputs += fTargetRange = true;
4475	}
4476	}
4477	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4478	{
4479	iTest -= 1;
4480	continue;
4481	}
4482
4483	uint16_t const fFcwExtra = 0;
4484	uint16_t const fFcw = RandFcw();
4485	State.FSW = RandFsw();
4486
4487	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4488	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4489	{
4490	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
4491	\| (iRounding << X86_FCW_RC_SHIFT)
4492	\| (iPrecision << X86_FCW_PC_SHIFT)
4493	\| X86_FCW_MASK_ALL;
4494	IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4495	pfn(&State, &ResM, &InVal1, &InVal2);
4496	FPU_BINARY_R80_TEST_T const TestM
4497	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResM.FSW, InVal1, InVal2, ResM.r80Result };
4498	GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
4499
4500	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4501	IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4502	pfn(&State, &ResU, &InVal1, &InVal2);
4503	FPU_BINARY_R80_TEST_T const TestU
4504	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResU.FSW, InVal1, InVal2, ResU.r80Result };
4505	GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
4506
4507	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4508	if (fXcpt)
4509	{
4510	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4511	IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4512	pfn(&State, &Res1, &InVal1, &InVal2);
4513	FPU_BINARY_R80_TEST_T const Test1
4514	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res1.FSW, InVal1, InVal2, Res1.r80Result };
4515	GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
4516
4517	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4518	{
4519	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
4520	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4521	IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4522	pfn(&State, &Res2, &InVal1, &InVal2);
4523	FPU_BINARY_R80_TEST_T const Test2
4524	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res2.FSW, InVal1, InVal2, Res2.r80Result };
4525	GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
4526	}
4527	if (!RT_IS_POWER_OF_TWO(fXcpt))
4528	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4529	if (fUnmasked & fXcpt)
4530	{
4531	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
4532	IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4533	pfn(&State, &Res3, &InVal1, &InVal2);
4534	FPU_BINARY_R80_TEST_T const Test3
4535	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res3.FSW, InVal1, InVal2, Res3.r80Result };
4536	GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
4537	}
4538	}
4539
4540	/* If the values are in range and caused no exceptions, do the whole series of
4541	partial reminders till we get the non-partial one or run into an exception. */
4542	if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
4543	{
4544	IEMFPURESULT ResPrev = ResM;
4545	for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 \| X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
4546	{
4547	State.FCW = State.FCW \| X86_FCW_MASK_ALL;
4548	State.FSW = ResPrev.FSW;
4549	IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4550	pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
4551	FPU_BINARY_R80_TEST_T const TestSeq
4552	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResSeq.FSW, ResPrev.r80Result, InVal2, ResSeq.r80Result };
4553	GenerateBinaryWrite(&BinOut, &TestSeq, sizeof(TestSeq));
4554	ResPrev = ResSeq;
4555	}
4556	}
4557	}
4558	}
4559	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4560	}
4561	return RTEXITCODE_SUCCESS;
4562	}
4563	DUMP_ALL_FN(FpuBinaryR80, g_aFpuBinaryR80)
4564	#endif
4565
4566
4567	static void FpuBinaryR80Test(void)
4568	{
4569	X86FXSTATE State;
4570	RT_ZERO(State);
4571	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
4572	{
4573	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryR80[iFn]))
4574	continue;
4575
4576	FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
4577	uint32_t const cTests = g_aFpuBinaryR80[iFn].cTests;
4578	PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
4579	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
4580	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4581	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4582	{
4583	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4584	{
4585	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4586	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4587	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4588	State.FCW = paTests[iTest].fFcw;
4589	State.FSW = paTests[iTest].fFswIn;
4590	pfn(&State, &Res, &InVal1, &InVal2);
4591	if ( Res.FSW != paTests[iTest].fFswOut
4592	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
4593	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4594	"%s -> fsw=%#06x %s\n"
4595	"%s expected %#06x %s%s%s (%s)\n",
4596	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4597	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4598	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4599	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4600	FswDiff(Res.FSW, paTests[iTest].fFswOut),
4601	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4602	FormatFcw(paTests[iTest].fFcw) );
4603	}
4604	pfn = g_aFpuBinaryR80[iFn].pfnNative;
4605	}
4606
4607	FREE_DECOMPRESSED_TESTS(g_aFpuBinaryR80[iFn]);
4608	}
4609	}
4610
4611
4612	/*
4613	* Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
4614	*/
4615	#define int64_t_IS_NORMAL(a) 1
4616	#define int32_t_IS_NORMAL(a) 1
4617	#define int16_t_IS_NORMAL(a) 1
4618
4619	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4620	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
4621	{
4622	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4623	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4624	};
4625	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
4626	{
4627	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4628	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4629	};
4630	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
4631	{
4632	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4633	};
4634	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
4635	{
4636	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4637	};
4638
4639	# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4640	static RTEXITCODE FpuBinary ## a_UpBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4641	{ \
4642	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4643	\
4644	X86FXSTATE State; \
4645	RT_ZERO(State); \
4646	uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4647	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4648	{ \
4649	IEMBINARYOUTPUT BinOut; \
4650	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4651	uint32_t cNormalInputPairs = 0; \
4652	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
4653	{ \
4654	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4655	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
4656	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4657	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
4658	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4659	cNormalInputPairs++; \
4660	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4661	{ \
4662	iTest -= 1; \
4663	continue; \
4664	} \
4665	\
4666	uint16_t const fFcw = RandFcw(); \
4667	State.FSW = RandFsw(); \
4668	\
4669	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
4670	{ \
4671	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
4672	{ \
4673	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4674	{ \
4675	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL)) \
4676	\| (iRounding << X86_FCW_RC_SHIFT) \
4677	\| (iPrecision << X86_FCW_PC_SHIFT) \
4678	\| iMask; \
4679	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4680	a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
4681	a_TestType const Test = { State.FCW, State.FSW, Res.FSW, InVal1, InVal2, Res.r80Result }; \
4682	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4683	} \
4684	} \
4685	} \
4686	} \
4687	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4688	} \
4689	return RTEXITCODE_SUCCESS; \
4690	} \
4691	DUMP_ALL_FN(FpuBinary ## a_UpBits, a_aSubTests)
4692	#else
4693	# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4694	#endif
4695
4696	#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
4697	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
4698	\
4699	static a_SubTestType a_aSubTests[] = \
4700	{ \
4701	ENTRY_BIN(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
4702	ENTRY_BIN(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
4703	ENTRY_BIN(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
4704	ENTRY_BIN(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
4705	ENTRY_BIN(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
4706	ENTRY_BIN(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
4707	}; \
4708	\
4709	GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4710	\
4711	static void FpuBinary ## a_UpBits ## Test(void) \
4712	{ \
4713	X86FXSTATE State; \
4714	RT_ZERO(State); \
4715	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4716	{ \
4717	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4718	continue; \
4719	\
4720	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4721	uint32_t const cTests = a_aSubTests[iFn].cTests; \
4722	PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
4723	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4724	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4725	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4726	{ \
4727	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4728	{ \
4729	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4730	a_Type2 const InVal2 = paTests[iTest].InVal2; \
4731	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4732	State.FCW = paTests[iTest].fFcw; \
4733	State.FSW = paTests[iTest].fFswIn; \
4734	pfn(&State, &Res, &InVal1, &InVal2); \
4735	if ( Res.FSW != paTests[iTest].fFswOut \
4736	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
4737	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4738	"%s -> fsw=%#06x %s\n" \
4739	"%s expected %#06x %s%s%s (%s)\n", \
4740	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4741	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4742	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
4743	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
4744	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
4745	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
4746	FormatFcw(paTests[iTest].fFcw) ); \
4747	} \
4748	pfn = a_aSubTests[iFn].pfnNative; \
4749	} \
4750	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
4751	} \
4752	}
4753
4754	TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
4755	TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
4756	TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
4757	TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
4758
4759
4760	/*
4761	* Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
4762	*/
4763	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4764	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
4765	{
4766	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4767	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4768	};
4769	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
4770	{
4771	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4772	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4773	};
4774	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
4775	{
4776	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4777	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4778	};
4779	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
4780	{
4781	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4782	};
4783	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
4784	{
4785	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4786	};
4787
4788	# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4789	static RTEXITCODE FpuBinaryFsw ## a_UpBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4790	{ \
4791	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4792	\
4793	X86FXSTATE State; \
4794	RT_ZERO(State); \
4795	uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4796	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4797	{ \
4798	IEMBINARYOUTPUT BinOut; \
4799	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4800	uint32_t cNormalInputPairs = 0; \
4801	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
4802	{ \
4803	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4804	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
4805	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4806	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
4807	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4808	cNormalInputPairs++; \
4809	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4810	{ \
4811	iTest -= 1; \
4812	continue; \
4813	} \
4814	\
4815	uint16_t const fFcw = RandFcw(); \
4816	State.FSW = RandFsw(); \
4817	\
4818	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
4819	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4820	{ \
4821	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask; \
4822	uint16_t fFswOut = 0; \
4823	a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
4824	a_TestType const Test = { State.FCW, State.FSW, fFswOut, InVal1, InVal2 }; \
4825	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4826	} \
4827	} \
4828	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4829	} \
4830	return RTEXITCODE_SUCCESS; \
4831	} \
4832	DUMP_ALL_FN(FpuBinaryFsw ## a_UpBits, a_aSubTests)
4833	#else
4834	# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4835	#endif
4836
4837	#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
4838	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
4839	\
4840	static a_SubTestType a_aSubTests[] = \
4841	{ \
4842	__VA_ARGS__ \
4843	}; \
4844	\
4845	GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4846	\
4847	static void FpuBinaryFsw ## a_UpBits ## Test(void) \
4848	{ \
4849	X86FXSTATE State; \
4850	RT_ZERO(State); \
4851	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4852	{ \
4853	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4854	continue; \
4855	\
4856	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4857	uint32_t const cTests = a_aSubTests[iFn].cTests; \
4858	PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
4859	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4860	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4861	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4862	{ \
4863	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4864	{ \
4865	uint16_t fFswOut = 0; \
4866	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4867	a_Type2 const InVal2 = paTests[iTest].InVal2; \
4868	State.FCW = paTests[iTest].fFcw; \
4869	State.FSW = paTests[iTest].fFswIn; \
4870	pfn(&State, &fFswOut, &InVal1, &InVal2); \
4871	if (fFswOut != paTests[iTest].fFswOut) \
4872	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4873	"%s -> fsw=%#06x\n" \
4874	"%s expected %#06x %s (%s)\n", \
4875	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4876	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4877	iVar ? " " : "", fFswOut, \
4878	iVar ? " " : "", paTests[iTest].fFswOut, \
4879	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
4880	} \
4881	pfn = a_aSubTests[iFn].pfnNative; \
4882	} \
4883	FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
4884	} \
4885	}
4886
4887	TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY_BIN(fcom_r80_by_r80), ENTRY_BIN(fucom_r80_by_r80))
4888	TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY_BIN(fcom_r80_by_r64))
4889	TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY_BIN(fcom_r80_by_r32))
4890	TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY_BIN(ficom_r80_by_i32))
4891	TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY_BIN(ficom_r80_by_i16))
4892
4893
4894	/*
4895	* Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
4896	*/
4897	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
4898
4899	static FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
4900	{
4901	ENTRY_BIN(fcomi_r80_by_r80),
4902	ENTRY_BIN(fucomi_r80_by_r80),
4903	};
4904
4905	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4906	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
4907	{
4908	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4909	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4910	};
4911
4912	static RTEXITCODE FpuBinaryEflR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4913	{
4914	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
4915
4916	X86FXSTATE State;
4917	RT_ZERO(State);
4918	uint32_t cMinNormalPairs = (cTests - 144) / 4;
4919	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4920	{
4921	IEMBINARYOUTPUT BinOut;
4922	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuBinaryEflR80[iFn]), RTEXITCODE_FAILURE);
4923	uint32_t cNormalInputPairs = 0;
4924	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
4925	{
4926	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
4927	RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
4928	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
4929	cNormalInputPairs++;
4930	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4931	{
4932	iTest -= 1;
4933	continue;
4934	}
4935
4936	uint16_t const fFcw = RandFcw();
4937	State.FSW = RandFsw();
4938
4939	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
4940	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4941	{
4942	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask;
4943	uint16_t uFswOut = 0;
4944	uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
4945	FPU_BINARY_EFL_R80_TEST_T const Test = { State.FCW, State.FSW, uFswOut, InVal1, InVal2, fEflOut, };
4946	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
4947	}
4948	}
4949	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4950	}
4951	return RTEXITCODE_SUCCESS;
4952	}
4953	DUMP_ALL_FN(FpuBinaryEflR80, g_aFpuBinaryEflR80)
4954	#endif /TSTIEMAIMPL_WITH_GENERATOR/
4955
4956	static void FpuBinaryEflR80Test(void)
4957	{
4958	X86FXSTATE State;
4959	RT_ZERO(State);
4960	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4961	{
4962	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryEflR80[iFn]))
4963	continue;
4964
4965	FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
4966	uint32_t const cTests = g_aFpuBinaryEflR80[iFn].cTests;
4967	PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
4968	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
4969	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4970	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4971	{
4972	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4973	{
4974	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4975	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4976	State.FCW = paTests[iTest].fFcw;
4977	State.FSW = paTests[iTest].fFswIn;
4978	uint16_t uFswOut = 0;
4979	uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
4980	if ( uFswOut != paTests[iTest].fFswOut
4981	\|\| fEflOut != paTests[iTest].fEflOut)
4982	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4983	"%s -> fsw=%#06x efl=%#08x\n"
4984	"%s expected %#06x %#08x %s%s (%s)\n",
4985	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4986	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4987	iVar ? " " : "", uFswOut, fEflOut,
4988	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
4989	FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
4990	FormatFcw(paTests[iTest].fFcw));
4991	}
4992	pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
4993	}
4994
4995	FREE_DECOMPRESSED_TESTS(g_aFpuBinaryEflR80[iFn]);
4996	}
4997	}
4998
4999
5000	/*********************************************************************************************************************************
5001	* x87 FPU Unary Operations *
5002	*********************************************************************************************************************************/
5003
5004	/*
5005	* Unary FPU operations on one 80-bit floating point value.
5006	*
5007	* Note! The FCW reserved bit 7 is used to indicate whether a test may produce
5008	* a rounding error or not.
5009	*/
5010	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
5011
5012	enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /probably not accurate, but need impl to know/, kUnary_Rounding_F2xm1 };
5013	static FPU_UNARY_R80_T g_aFpuUnaryR80[] =
5014	{
5015	ENTRY_BIN_EX( fabs_r80, kUnary_Accurate),
5016	ENTRY_BIN_EX( fchs_r80, kUnary_Accurate),
5017	ENTRY_BIN_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
5018	ENTRY_BIN_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
5019	ENTRY_BIN_EX( fsqrt_r80, kUnary_Accurate),
5020	ENTRY_BIN_EX( frndint_r80, kUnary_Accurate),
5021	ENTRY_BIN_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
5022	ENTRY_BIN_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
5023	ENTRY_BIN_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
5024	ENTRY_BIN_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
5025	};
5026
5027	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5028
5029	static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
5030	{
5031	if ( enmKind == kUnary_Rounding_F2xm1
5032	&& RTFLOAT80U_IS_NORMAL(pr80Val)
5033	&& pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
5034	&& pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
5035	return true;
5036	return false;
5037	}
5038
5039	DUMP_ALL_FN(FpuUnaryR80, g_aFpuUnaryR80)
5040	static RTEXITCODE FpuUnaryR80Generate(uint32_t cTests, const char * const *papszNameFmts)
5041	{
5042	static RTFLOAT80U const s_aSpecials[] =
5043	{
5044	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
5045	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
5046	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
5047	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
5048	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
5049	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
5050	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
5051	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
5052	RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
5053	RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
5054	};
5055	X86FXSTATE State;
5056	RT_ZERO(State);
5057	uint32_t cMinNormals = cTests / 4;
5058	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
5059	{
5060	PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
5061	if ( g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
5062	&& g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
5063	continue;
5064
5065	IEMBINARYOUTPUT BinOut;
5066	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryR80[iFn]), RTEXITCODE_FAILURE);
5067	uint32_t cNormalInputs = 0;
5068	uint32_t cTargetRangeInputs = 0;
5069	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5070	{
5071	RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
5072	if (RTFLOAT80U_IS_NORMAL(&InVal))
5073	{
5074	if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
5075	{
5076	unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
5077	? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 / : RTFLOAT80U_EXP_BIAS + 63 + 1 / 2^64..2^-64 */;
5078	unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
5079	if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
5080	cTargetRangeInputs++;
5081	else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
5082	{
5083	InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
5084	cTargetRangeInputs++;
5085	}
5086	}
5087	cNormalInputs++;
5088	}
5089	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
5090	{
5091	iTest -= 1;
5092	continue;
5093	}
5094
5095	uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
5096	uint16_t const fFcw = RandFcw();
5097	State.FSW = RandFsw();
5098
5099	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5100	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5101	{
5102	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
5103	\| (iRounding << X86_FCW_RC_SHIFT)
5104	\| (iPrecision << X86_FCW_PC_SHIFT)
5105	\| X86_FCW_MASK_ALL;
5106	IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5107	pfn(&State, &ResM, &InVal);
5108	FPU_UNARY_R80_TEST_T const TestM
5109	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResM.FSW, InVal, ResM.r80Result };
5110	GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
5111
5112	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
5113	IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5114	pfn(&State, &ResU, &InVal);
5115	FPU_UNARY_R80_TEST_T const TestU
5116	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResU.FSW, InVal, ResU.r80Result };
5117	GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
5118
5119	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
5120	if (fXcpt)
5121	{
5122	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
5123	IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5124	pfn(&State, &Res1, &InVal);
5125	FPU_UNARY_R80_TEST_T const Test1
5126	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res1.FSW, InVal, Res1.r80Result };
5127	GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
5128	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
5129	{
5130	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
5131	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
5132	IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5133	pfn(&State, &Res2, &InVal);
5134	FPU_UNARY_R80_TEST_T const Test2
5135	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res2.FSW, InVal, Res2.r80Result };
5136	GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
5137	}
5138	if (!RT_IS_POWER_OF_TWO(fXcpt))
5139	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
5140	if (fUnmasked & fXcpt)
5141	{
5142	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
5143	IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5144	pfn(&State, &Res3, &InVal);
5145	FPU_UNARY_R80_TEST_T const Test3
5146	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res3.FSW, InVal, Res3.r80Result };
5147	GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
5148	}
5149	}
5150	}
5151	}
5152	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5153	}
5154	return RTEXITCODE_SUCCESS;
5155	}
5156	#endif
5157
5158	static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
5159	{
5160	if (fFcw1 == fFcw2)
5161	return true;
5162	if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
5163	{
5164	*pfRndErr = true;
5165	return true;
5166	}
5167	return false;
5168	}
5169
5170	static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
5171	{
5172	if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
5173	return true;
5174	if ( fRndErrOk
5175	&& pr80Val1->s.fSign == pr80Val2->s.fSign)
5176	{
5177	if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
5178	&& ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
5179	? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
5180	: pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
5181	\|\|
5182	( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
5183	&& pr80Val1->s.uMantissa == UINT64_MAX
5184	&& pr80Val2->s.uMantissa == RT_BIT_64(63))
5185	\|\|
5186	( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
5187	&& pr80Val2->s.uMantissa == UINT64_MAX
5188	&& pr80Val1->s.uMantissa == RT_BIT_64(63)) )
5189	{
5190	*pfRndErr = true;
5191	return true;
5192	}
5193	}
5194	return false;
5195	}
5196
5197
5198	static void FpuUnaryR80Test(void)
5199	{
5200	X86FXSTATE State;
5201	RT_ZERO(State);
5202	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
5203	{
5204	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryR80[iFn]))
5205	continue;
5206
5207	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
5208	uint32_t const cTests = g_aFpuUnaryR80[iFn].cTests;
5209	PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
5210	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
5211	uint32_t cRndErrs = 0;
5212	uint32_t cPossibleRndErrs = 0;
5213	if (!cTests) RTTestSkipped(g_hTest, "no tests");
5214	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5215	{
5216	for (uint32_t iTest = 0; iTest < cTests; iTest++)
5217	{
5218	RTFLOAT80U const InVal = paTests[iTest].InVal;
5219	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5220	bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
5221	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
5222	State.FSW = paTests[iTest].fFswIn;
5223	pfn(&State, &Res, &InVal);
5224	bool fRndErr = false;
5225	if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
5226	\|\| !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
5227	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5228	"%s -> fsw=%#06x %s\n"
5229	"%s expected %#06x %s%s%s%s (%s)\n",
5230	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5231	FormatR80(&paTests[iTest].InVal),
5232	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
5233	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
5234	FswDiff(Res.FSW, paTests[iTest].fFswOut),
5235	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
5236	fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
5237	cRndErrs += fRndErr;
5238	cPossibleRndErrs += fRndErrOk;
5239	}
5240	pfn = g_aFpuUnaryR80[iFn].pfnNative;
5241	}
5242	if (cPossibleRndErrs > 0)
5243	RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
5244	FREE_DECOMPRESSED_TESTS(g_aFpuUnaryR80[iFn]);
5245	}
5246	}
5247
5248
5249	/*
5250	* Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
5251	*/
5252	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
5253
5254	static FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
5255	{
5256	ENTRY_BIN(ftst_r80),
5257	ENTRY_BIN_EX(fxam_r80, 1),
5258	};
5259
5260	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5261	static RTEXITCODE FpuUnaryFswR80Generate(uint32_t cTests, const char * const *papszNameFmts)
5262	{
5263	static RTFLOAT80U const s_aSpecials[] =
5264	{
5265	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
5266	};
5267
5268	X86FXSTATE State;
5269	RT_ZERO(State);
5270	uint32_t cMinNormals = cTests / 4;
5271	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
5272	{
5273	bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
5274	PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
5275	if ( g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
5276	&& g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
5277	continue;
5278	State.FTW = 0;
5279
5280	IEMBINARYOUTPUT BinOut;
5281	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryFswR80[iFn]), RTEXITCODE_FAILURE);
5282	uint32_t cNormalInputs = 0;
5283	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5284	{
5285	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
5286	if (RTFLOAT80U_IS_NORMAL(&InVal))
5287	cNormalInputs++;
5288	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
5289	{
5290	iTest -= 1;
5291	continue;
5292	}
5293
5294	uint16_t const fFcw = RandFcw();
5295	State.FSW = RandFsw();
5296	if (!fIsFxam)
5297	{
5298	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5299	{
5300	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5301	{
5302	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
5303	{
5304	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
5305	\| (iRounding << X86_FCW_RC_SHIFT)
5306	\| (iPrecision << X86_FCW_PC_SHIFT)
5307	\| iMask;
5308	uint16_t fFswOut = 0;
5309	pfn(&State, &fFswOut, &InVal);
5310	FPU_UNARY_R80_TEST_T const Test = { State.FCW, State.FSW, fFswOut, InVal };
5311	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
5312	}
5313	}
5314	}
5315	}
5316	else
5317	{
5318	uint16_t fFswOut = 0;
5319	uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
5320	State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
5321	State.FCW = fFcw;
5322	pfn(&State, &fFswOut, &InVal);
5323	FPU_UNARY_R80_TEST_T const Test = { (uint16_t)(fFcw \| fEmpty), State.FSW, fFswOut, InVal };
5324	GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
5325	}
5326	}
5327	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5328	}
5329	return RTEXITCODE_SUCCESS;
5330	}
5331	DUMP_ALL_FN(FpuUnaryFswR80, g_aFpuUnaryFswR80)
5332	#endif
5333
5334
5335	static void FpuUnaryFswR80Test(void)
5336	{
5337	X86FXSTATE State;
5338	RT_ZERO(State);
5339	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
5340	{
5341	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryFswR80[iFn]))
5342	continue;
5343
5344	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
5345	uint32_t const cTests = g_aFpuUnaryFswR80[iFn].cTests;
5346	PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
5347	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
5348	if (!cTests) RTTestSkipped(g_hTest, "no tests");
5349	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5350	{
5351	for (uint32_t iTest = 0; iTest < cTests; iTest++)
5352	{
5353	RTFLOAT80U const InVal = paTests[iTest].InVal;
5354	uint16_t fFswOut = 0;
5355	State.FSW = paTests[iTest].fFswIn;
5356	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
5357	State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
5358	pfn(&State, &fFswOut, &InVal);
5359	if (fFswOut != paTests[iTest].fFswOut)
5360	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5361	"%s -> fsw=%#06x\n"
5362	"%s expected %#06x %s (%s%s)\n",
5363	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5364	FormatR80(&paTests[iTest].InVal),
5365	iVar ? " " : "", fFswOut,
5366	iVar ? " " : "", paTests[iTest].fFswOut,
5367	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
5368	paTests[iTest].fFcw & 0x80 ? " empty" : "");
5369	}
5370	pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
5371	}
5372
5373	FREE_DECOMPRESSED_TESTS(g_aFpuUnaryFswR80[iFn]);
5374	}
5375	}
5376
5377	/*
5378	* Unary FPU operations on one 80-bit floating point value, but with two outputs.
5379	*/
5380	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
5381
5382	static FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
5383	{
5384	ENTRY_BIN(fxtract_r80_r80),
5385	ENTRY_BIN_AMD( fptan_r80_r80, 0), // rounding differences
5386	ENTRY_BIN_INTEL(fptan_r80_r80, 0),
5387	ENTRY_BIN_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
5388	ENTRY_BIN_INTEL(fsincos_r80_r80, 0),
5389	};
5390
5391	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5392	static RTEXITCODE FpuUnaryTwoR80Generate(uint32_t cTests, const char * const *papszNameFmts)
5393	{
5394	static RTFLOAT80U const s_aSpecials[] =
5395	{
5396	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
5397	};
5398
5399	X86FXSTATE State;
5400	RT_ZERO(State);
5401	uint32_t cMinNormals = cTests / 4;
5402	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
5403	{
5404	PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
5405	if ( g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
5406	&& g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
5407	continue;
5408
5409	IEMBINARYOUTPUT BinOut;
5410	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryTwoR80[iFn]), RTEXITCODE_FAILURE);
5411	uint32_t cNormalInputs = 0;
5412	uint32_t cTargetRangeInputs = 0;
5413	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5414	{
5415	RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
5416	if (RTFLOAT80U_IS_NORMAL(&InVal))
5417	{
5418	if (iFn != 0)
5419	{
5420	unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
5421	unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
5422	if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
5423	cTargetRangeInputs++;
5424	else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
5425	{
5426	InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
5427	cTargetRangeInputs++;
5428	}
5429	}
5430	cNormalInputs++;
5431	}
5432	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
5433	{
5434	iTest -= 1;
5435	continue;
5436	}
5437
5438	uint16_t const fFcwExtra = 0; /* for rounding error indication */
5439	uint16_t const fFcw = RandFcw();
5440	State.FSW = RandFsw();
5441
5442	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5443	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5444	{
5445	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
5446	\| (iRounding << X86_FCW_RC_SHIFT)
5447	\| (iPrecision << X86_FCW_PC_SHIFT)
5448	\| X86_FCW_MASK_ALL;
5449	IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5450	pfn(&State, &ResM, &InVal);
5451	FPU_UNARY_TWO_R80_TEST_T const TestM
5452	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResM.FSW, InVal, ResM.r80Result1, ResM.r80Result2 };
5453	GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
5454
5455	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
5456	IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5457	pfn(&State, &ResU, &InVal);
5458	FPU_UNARY_TWO_R80_TEST_T const TestU
5459	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, ResU.FSW, InVal, ResU.r80Result1, ResU.r80Result2 };
5460	GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
5461
5462	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
5463	if (fXcpt)
5464	{
5465	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
5466	IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5467	pfn(&State, &Res1, &InVal);
5468	FPU_UNARY_TWO_R80_TEST_T const Test1
5469	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res1.FSW, InVal, Res1.r80Result1, Res1.r80Result2 };
5470	GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
5471
5472	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
5473	{
5474	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
5475	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
5476	IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5477	pfn(&State, &Res2, &InVal);
5478	FPU_UNARY_TWO_R80_TEST_T const Test2
5479	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res2.FSW, InVal, Res2.r80Result1, Res2.r80Result2 };
5480	GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
5481	}
5482	if (!RT_IS_POWER_OF_TWO(fXcpt))
5483	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
5484	if (fUnmasked & fXcpt)
5485	{
5486	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
5487	IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5488	pfn(&State, &Res3, &InVal);
5489	FPU_UNARY_TWO_R80_TEST_T const Test3
5490	= { (uint16_t)(State.FCW \| fFcwExtra), State.FSW, Res3.FSW, InVal, Res3.r80Result1, Res3.r80Result2 };
5491	GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
5492	}
5493	}
5494	}
5495	}
5496	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5497	}
5498	return RTEXITCODE_SUCCESS;
5499	}
5500	DUMP_ALL_FN(FpuUnaryTwoR80, g_aFpuUnaryTwoR80)
5501	#endif
5502
5503
5504	static void FpuUnaryTwoR80Test(void)
5505	{
5506	X86FXSTATE State;
5507	RT_ZERO(State);
5508	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
5509	{
5510	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryTwoR80[iFn]))
5511	continue;
5512
5513	FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
5514	uint32_t const cTests = g_aFpuUnaryTwoR80[iFn].cTests;
5515	PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
5516	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
5517	if (!cTests) RTTestSkipped(g_hTest, "no tests");
5518	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5519	{
5520	for (uint32_t iTest = 0; iTest < cTests; iTest++)
5521	{
5522	IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5523	RTFLOAT80U const InVal = paTests[iTest].InVal;
5524	State.FCW = paTests[iTest].fFcw;
5525	State.FSW = paTests[iTest].fFswIn;
5526	pfn(&State, &Res, &InVal);
5527	if ( Res.FSW != paTests[iTest].fFswOut
5528	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
5529	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
5530	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5531	"%s -> fsw=%#06x %s %s\n"
5532	"%s expected %#06x %s %s %s%s%s (%s)\n",
5533	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5534	FormatR80(&paTests[iTest].InVal),
5535	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
5536	iVar ? " " : "", paTests[iTest].fFswOut,
5537	FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
5538	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
5539	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
5540	FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
5541	}
5542	pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
5543	}
5544
5545	FREE_DECOMPRESSED_TESTS(g_aFpuUnaryTwoR80[iFn]);
5546	}
5547	}
5548
5549
5550	/*********************************************************************************************************************************
5551	* SSE floating point Binary Operations *
5552	*********************************************************************************************************************************/
5553
5554	/*
5555	* Binary SSE operations on packed single precision floating point values.
5556	*/
5557	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5558
5559	/** Ugly hack to keep it working after changing function arguments! */
5560	IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_cvtps2pd_u128x,(uint32_t uMxCsrIn, PX86XMMREG pResult, PCX86XMMREG puSrc1, PCX86XMMREG puSrc2))
5561	{
5562	RT_NOREF(puSrc1);
5563	return iemAImpl_cvtps2pd_u128(uMxCsrIn, pResult, &puSrc2->au64[0]);
5564	}
5565
5566	static SSE_BINARY_R32_T g_aSseBinaryR32[] =
5567	{
5568	ENTRY_BIN(addps_u128),
5569	ENTRY_BIN(mulps_u128),
5570	ENTRY_BIN(subps_u128),
5571	ENTRY_BIN(minps_u128),
5572	ENTRY_BIN(divps_u128),
5573	ENTRY_BIN(maxps_u128),
5574	ENTRY_BIN(haddps_u128),
5575	ENTRY_BIN(hsubps_u128),
5576	ENTRY_BIN(sqrtps_u128),
5577	ENTRY_BIN(addsubps_u128),
5578	ENTRY_BIN(cvtps2pd_u128x), /* conversion hack */
5579	};
5580
5581	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5582	DUMP_ALL_FN(SseBinaryR32, g_aSseBinaryR32)
5583	static RTEXITCODE SseBinaryR32Generate(uint32_t cTests, const char * const *papszNameFmts)
5584	{
5585	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5586
5587	static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
5588	{
5589	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
5590	{ RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
5591	/** @todo More specials. */
5592	};
5593
5594	uint32_t cMinNormalPairs = (cTests - 144) / 4;
5595	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5596	{
5597	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
5598
5599	IEMBINARYOUTPUT BinOut;
5600	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32[iFn]), RTEXITCODE_FAILURE);
5601
5602	uint32_t cNormalInputPairs = 0;
5603	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5604	{
5605	SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5606
5607	TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5608	TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5609	TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5610	TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5611
5612	TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5613	TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
5614	TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
5615	TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
5616
5617	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
5618	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
5619	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
5620	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
5621	cNormalInputPairs++;
5622	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5623	{
5624	iTest -= 1;
5625	continue;
5626	}
5627
5628	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5629	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5630	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5631	for (uint8_t iFz = 0; iFz < 2; iFz++)
5632	{
5633	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
5634	\| (iRounding << X86_MXCSR_RC_SHIFT)
5635	\| (iDaz ? X86_MXCSR_DAZ : 0)
5636	\| (iFz ? X86_MXCSR_FZ : 0)
5637	\| X86_MXCSR_XCPT_MASK;
5638	X86XMMREG ResM; RT_ZERO(ResM);
5639	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &ResM, &TestData.InVal1, &TestData.InVal2);
5640	TestData.fMxcsrIn = uMxCsrIn;
5641	TestData.fMxcsrOut = uMxCsrOutM;
5642	TestData.OutVal = ResM;
5643	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5644
5645	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
5646	X86XMMREG ResU; RT_ZERO(ResU);
5647	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &ResU, &TestData.InVal1, &TestData.InVal2);
5648	TestData.fMxcsrIn = uMxCsrIn;
5649	TestData.fMxcsrOut = uMxCsrOutU;
5650	TestData.OutVal = ResU;
5651	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5652
5653	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
5654	if (fXcpt)
5655	{
5656	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
5657	X86XMMREG Res1; RT_ZERO(Res1);
5658	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &Res1, &TestData.InVal1, &TestData.InVal2);
5659	TestData.fMxcsrIn = uMxCsrIn;
5660	TestData.fMxcsrOut = uMxCsrOut1;
5661	TestData.OutVal = Res1;
5662	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5663
5664	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
5665	{
5666	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
5667	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5668	X86XMMREG Res2; RT_ZERO(Res2);
5669	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &Res2, &TestData.InVal1, &TestData.InVal2);
5670	TestData.fMxcsrIn = uMxCsrIn;
5671	TestData.fMxcsrOut = uMxCsrOut2;
5672	TestData.OutVal = Res2;
5673	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5674	}
5675	if (!RT_IS_POWER_OF_TWO(fXcpt))
5676	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5677	if (fUnmasked & fXcpt)
5678	{
5679	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5680	X86XMMREG Res3; RT_ZERO(Res3);
5681	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &Res3, &TestData.InVal1, &TestData.InVal2);
5682	TestData.fMxcsrIn = uMxCsrIn;
5683	TestData.fMxcsrOut = uMxCsrOut3;
5684	TestData.OutVal = Res3;
5685	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5686	}
5687	}
5688	}
5689	}
5690	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5691	}
5692
5693	return RTEXITCODE_SUCCESS;
5694	}
5695	#endif
5696
5697	static void SseBinaryR32Test(void)
5698	{
5699	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5700	{
5701	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32[iFn]))
5702	continue;
5703
5704	SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
5705	uint32_t const cbTests = g_aSseBinaryR32[iFn].cTests;
5706	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
5707	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
5708	if (!cbTests) RTTestSkipped(g_hTest, "no tests");
5709	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5710	{
5711	for (uint32_t iTest = 0; iTest < cbTests / sizeof(paTests[0]); iTest++)
5712	{
5713	X86XMMREG Res; RT_ZERO(Res);
5714
5715	uint32_t uMxCsrOut = pfn(paTests[iTest].fMxcsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5716	bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
5717	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
5718	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
5719	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5720	if ( uMxCsrOut != paTests[iTest].fMxcsrOut
5721	\|\| !fValsIdentical)
5722	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
5723	"%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5724	"%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5725	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5726	FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5727	FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5728	FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
5729	FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
5730	iVar ? " " : "", uMxCsrOut,
5731	FormatR32(&Res.ar32[0]), FormatR32(&Res.ar32[1]),
5732	FormatR32(&Res.ar32[2]), FormatR32(&Res.ar32[3]),
5733	iVar ? " " : "", paTests[iTest].fMxcsrOut,
5734	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5735	FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5736	MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
5737	!fValsIdentical ? " - val" : "",
5738	FormatMxcsr(paTests[iTest].fMxcsrIn) );
5739	}
5740	pfn = g_aSseBinaryR32[iFn].pfnNative;
5741	}
5742
5743	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32[iFn]);
5744	}
5745	}
5746
5747
5748	/*
5749	* Binary SSE operations on packed single precision floating point values.
5750	*/
5751	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5752
5753	static SSE_BINARY_R64_T g_aSseBinaryR64[] =
5754	{
5755	ENTRY_BIN(addpd_u128),
5756	ENTRY_BIN(mulpd_u128),
5757	ENTRY_BIN(subpd_u128),
5758	ENTRY_BIN(minpd_u128),
5759	ENTRY_BIN(divpd_u128),
5760	ENTRY_BIN(maxpd_u128),
5761	ENTRY_BIN(haddpd_u128),
5762	ENTRY_BIN(hsubpd_u128),
5763	ENTRY_BIN(sqrtpd_u128),
5764	ENTRY_BIN(addsubpd_u128),
5765	ENTRY_BIN(cvtpd2ps_u128),
5766	};
5767
5768	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5769	DUMP_ALL_FN(SseBinaryR64, g_aSseBinaryR32)
5770	static RTEXITCODE SseBinaryR64Generate(uint32_t cTests, const char * const *papszNameFmts)
5771	{
5772	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5773
5774	static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
5775	{
5776	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
5777	{ RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
5778	/** @todo More specials. */
5779	};
5780
5781	uint32_t cMinNormalPairs = (cTests - 144) / 4;
5782	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5783	{
5784	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
5785
5786	IEMBINARYOUTPUT BinOut;
5787	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64[iFn]), RTEXITCODE_FAILURE);
5788
5789	uint32_t cNormalInputPairs = 0;
5790	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5791	{
5792	SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5793
5794	TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5795	TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5796	TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5797	TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5798
5799	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5800	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
5801	cNormalInputPairs++;
5802	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5803	{
5804	iTest -= 1;
5805	continue;
5806	}
5807
5808	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5809	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5810	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5811	for (uint8_t iFz = 0; iFz < 2; iFz++)
5812	{
5813	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
5814	\| (iRounding << X86_MXCSR_RC_SHIFT)
5815	\| (iDaz ? X86_MXCSR_DAZ : 0)
5816	\| (iFz ? X86_MXCSR_FZ : 0)
5817	\| X86_MXCSR_XCPT_MASK;
5818	X86XMMREG ResM; RT_ZERO(ResM);
5819	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &ResM, &TestData.InVal1, &TestData.InVal2);
5820	TestData.fMxcsrIn = uMxCsrIn;
5821	TestData.fMxcsrOut = uMxCsrOutM;
5822	TestData.OutVal = ResM;
5823	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5824
5825	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
5826	X86XMMREG ResU; RT_ZERO(ResU);
5827	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &ResU, &TestData.InVal1, &TestData.InVal2);
5828	TestData.fMxcsrIn = uMxCsrIn;
5829	TestData.fMxcsrOut = uMxCsrOutU;
5830	TestData.OutVal = ResU;
5831	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5832
5833	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
5834	if (fXcpt)
5835	{
5836	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
5837	X86XMMREG Res1; RT_ZERO(Res1);
5838	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &Res1, &TestData.InVal1, &TestData.InVal2);
5839	TestData.fMxcsrIn = uMxCsrIn;
5840	TestData.fMxcsrOut = uMxCsrOut1;
5841	TestData.OutVal = Res1;
5842	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5843
5844	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
5845	{
5846	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
5847	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5848	X86XMMREG Res2; RT_ZERO(Res2);
5849	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &Res2, &TestData.InVal1, &TestData.InVal2);
5850	TestData.fMxcsrIn = uMxCsrIn;
5851	TestData.fMxcsrOut = uMxCsrOut2;
5852	TestData.OutVal = Res2;
5853	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5854	}
5855	if (!RT_IS_POWER_OF_TWO(fXcpt))
5856	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5857	if (fUnmasked & fXcpt)
5858	{
5859	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5860	X86XMMREG Res3; RT_ZERO(Res3);
5861	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &Res3, &TestData.InVal1, &TestData.InVal2);
5862	TestData.fMxcsrIn = uMxCsrIn;
5863	TestData.fMxcsrOut = uMxCsrOut3;
5864	TestData.OutVal = Res3;
5865	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5866	}
5867	}
5868	}
5869	}
5870	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5871	}
5872
5873	return RTEXITCODE_SUCCESS;
5874	}
5875	#endif
5876
5877
5878	static void SseBinaryR64Test(void)
5879	{
5880	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5881	{
5882	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64[iFn]))
5883	continue;
5884
5885	SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
5886	uint32_t const cTests = g_aSseBinaryR64[iFn].cTests;
5887	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
5888	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
5889	if (!cTests) RTTestSkipped(g_hTest, "no tests");
5890	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5891	{
5892	for (uint32_t iTest = 0; iTest < cTests; iTest++)
5893	{
5894	X86XMMREG Res; RT_ZERO(Res);
5895
5896	uint32_t uMxCsrIn = paTests[iTest].fMxcsrIn;
5897	uint32_t uMxCsrOut = pfn(uMxCsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5898	if ( uMxCsrOut != paTests[iTest].fMxcsrOut
5899	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
5900	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5901	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
5902	"%s -> mxcsr=%#08x %s'%s\n"
5903	"%s expected %#08x %s'%s%s%s (%s)\n",
5904	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5905	FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5906	FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
5907	iVar ? " " : "", uMxCsrOut,
5908	FormatR64(&Res.ar64[0]), FormatR64(&Res.ar64[1]),
5909	iVar ? " " : "", paTests[iTest].fMxcsrOut,
5910	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5911	MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
5912	( !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
5913	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5914	? " - val" : "",
5915	FormatMxcsr(paTests[iTest].fMxcsrIn) );
5916	}
5917	pfn = g_aSseBinaryR64[iFn].pfnNative;
5918	}
5919
5920	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64[iFn]);
5921	}
5922	}
5923
5924
5925	/*
5926	* Binary SSE operations on packed single precision floating point values.
5927	*/
5928	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
5929
5930	static SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
5931	{
5932	ENTRY_BIN(addss_u128_r32),
5933	ENTRY_BIN(mulss_u128_r32),
5934	ENTRY_BIN(subss_u128_r32),
5935	ENTRY_BIN(minss_u128_r32),
5936	ENTRY_BIN(divss_u128_r32),
5937	ENTRY_BIN(maxss_u128_r32),
5938	ENTRY_BIN(cvtss2sd_u128_r32),
5939	ENTRY_BIN(sqrtss_u128_r32),
5940	};
5941
5942	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5943	DUMP_ALL_FN(SseBinaryU128R32, g_aSseBinaryU128R32)
5944	static RTEXITCODE SseBinaryU128R32Generate(uint32_t cTests, const char * const *papszNameFmts)
5945	{
5946	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5947
5948	static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
5949	{
5950	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5951	/** @todo More specials. */
5952	};
5953
5954	uint32_t cMinNormalPairs = (cTests - 144) / 4;
5955	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5956	{
5957	PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
5958
5959	IEMBINARYOUTPUT BinOut;
5960	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryU128R32[iFn]), RTEXITCODE_FAILURE);
5961
5962	uint32_t cNormalInputPairs = 0;
5963	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5964	{
5965	SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
5966
5967	TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5968	TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5969	TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5970	TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5971
5972	TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5973
5974	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
5975	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
5976	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
5977	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
5978	&& RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
5979	cNormalInputPairs++;
5980	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5981	{
5982	iTest -= 1;
5983	continue;
5984	}
5985
5986	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5987	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5988	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5989	for (uint8_t iFz = 0; iFz < 2; iFz++)
5990	{
5991	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
5992	\| (iRounding << X86_MXCSR_RC_SHIFT)
5993	\| (iDaz ? X86_MXCSR_DAZ : 0)
5994	\| (iFz ? X86_MXCSR_FZ : 0)
5995	\| X86_MXCSR_XCPT_MASK;
5996	X86XMMREG ResM; RT_ZERO(ResM);
5997	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &ResM, &TestData.InVal1, &TestData.r32Val2);
5998	TestData.fMxcsrIn = uMxCsrIn;
5999	TestData.fMxcsrOut = uMxCsrOutM;
6000	TestData.OutVal = ResM;
6001	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6002
6003	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6004	X86XMMREG ResU; RT_ZERO(ResU);
6005	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &ResU, &TestData.InVal1, &TestData.r32Val2);
6006	TestData.fMxcsrIn = uMxCsrIn;
6007	TestData.fMxcsrOut = uMxCsrOutU;
6008	TestData.OutVal = ResU;
6009	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6010
6011	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
6012	if (fXcpt)
6013	{
6014	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6015	X86XMMREG Res1; RT_ZERO(Res1);
6016	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &Res1, &TestData.InVal1, &TestData.r32Val2);
6017	TestData.fMxcsrIn = uMxCsrIn;
6018	TestData.fMxcsrOut = uMxCsrOut1;
6019	TestData.OutVal = Res1;
6020	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6021
6022	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
6023	{
6024	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
6025	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6026	X86XMMREG Res2; RT_ZERO(Res2);
6027	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &Res2, &TestData.InVal1, &TestData.r32Val2);
6028	TestData.fMxcsrIn = uMxCsrIn;
6029	TestData.fMxcsrOut = uMxCsrOut2;
6030	TestData.OutVal = Res2;
6031	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6032	}
6033	if (!RT_IS_POWER_OF_TWO(fXcpt))
6034	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6035	if (fUnmasked & fXcpt)
6036	{
6037	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6038	X86XMMREG Res3; RT_ZERO(Res3);
6039	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &Res3, &TestData.InVal1, &TestData.r32Val2);
6040	TestData.fMxcsrIn = uMxCsrIn;
6041	TestData.fMxcsrOut = uMxCsrOut3;
6042	TestData.OutVal = Res3;
6043	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6044	}
6045	}
6046	}
6047	}
6048	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6049	}
6050
6051	return RTEXITCODE_SUCCESS;
6052	}
6053	#endif
6054
6055	static void SseBinaryU128R32Test(void)
6056	{
6057	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
6058	{
6059	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R32[iFn]))
6060	continue;
6061
6062	SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
6063	uint32_t const cTests = g_aSseBinaryU128R32[iFn].cTests;
6064	PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
6065	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
6066	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6067	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6068	{
6069	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6070	{
6071	X86XMMREG Res; RT_ZERO(Res);
6072
6073	uint32_t uMxCsrIn = paTests[iTest].fMxcsrIn;
6074	uint32_t uMxCsrOut = pfn(uMxCsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
6075	bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
6076	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
6077	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
6078	&& RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]);
6079	if ( uMxCsrOut != paTests[iTest].fMxcsrOut
6080	\|\| !fValsIdentical)
6081	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
6082	"%s -> mxcsr=%#08x %s'%s'%s'%s\n"
6083	"%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
6084	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6085	FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
6086	FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
6087	FormatR32(&paTests[iTest].r32Val2),
6088	iVar ? " " : "", uMxCsrOut,
6089	FormatR32(&Res.ar32[0]), FormatR32(&Res.ar32[1]),
6090	FormatR32(&Res.ar32[2]), FormatR32(&Res.ar32[3]),
6091	iVar ? " " : "", paTests[iTest].fMxcsrOut,
6092	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
6093	FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
6094	MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
6095	!fValsIdentical ? " - val" : "",
6096	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6097	}
6098	}
6099
6100	FREE_DECOMPRESSED_TESTS(g_aSseBinaryU128R32[iFn]);
6101	}
6102	}
6103
6104
6105	/*
6106	* Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
6107	*/
6108	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
6109
6110	static SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
6111	{
6112	ENTRY_BIN(addsd_u128_r64),
6113	ENTRY_BIN(mulsd_u128_r64),
6114	ENTRY_BIN(subsd_u128_r64),
6115	ENTRY_BIN(minsd_u128_r64),
6116	ENTRY_BIN(divsd_u128_r64),
6117	ENTRY_BIN(maxsd_u128_r64),
6118	ENTRY_BIN(cvtsd2ss_u128_r64),
6119	ENTRY_BIN(sqrtsd_u128_r64),
6120	};
6121
6122	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6123	DUMP_ALL_FN(SseBinaryU128R64, g_aSseBinaryU128R64)
6124	static RTEXITCODE SseBinaryU128R64Generate(uint32_t cTests, const char * const *papszNameFmts)
6125	{
6126	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6127
6128	static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
6129	{
6130	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6131	/** @todo More specials. */
6132	};
6133
6134	uint32_t cMinNormalPairs = (cTests - 144) / 4;
6135	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
6136	{
6137	PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
6138
6139	IEMBINARYOUTPUT BinOut;
6140	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryU128R64[iFn]), RTEXITCODE_FAILURE);
6141
6142	uint32_t cNormalInputPairs = 0;
6143	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6144	{
6145	SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
6146
6147	TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
6148	TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
6149	TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
6150
6151	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
6152	&& RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
6153	cNormalInputPairs++;
6154	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6155	{
6156	iTest -= 1;
6157	continue;
6158	}
6159
6160	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6161	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6162	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6163	for (uint8_t iFz = 0; iFz < 2; iFz++)
6164	{
6165	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6166	\| (iRounding << X86_MXCSR_RC_SHIFT)
6167	\| (iDaz ? X86_MXCSR_DAZ : 0)
6168	\| (iFz ? X86_MXCSR_FZ : 0)
6169	\| X86_MXCSR_XCPT_MASK;
6170	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6171	TestData.fMxcsrIn = uMxCsrIn;
6172	TestData.fMxcsrOut = uMxCsrOutM;
6173	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6174
6175	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6176	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6177	TestData.fMxcsrIn = uMxCsrIn;
6178	TestData.fMxcsrOut = uMxCsrOutU;
6179	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6180
6181	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
6182	if (fXcpt)
6183	{
6184	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6185	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6186	TestData.fMxcsrIn = uMxCsrIn;
6187	TestData.fMxcsrOut = uMxCsrOut1;
6188	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6189
6190	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
6191	{
6192	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
6193	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6194	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6195	TestData.fMxcsrIn = uMxCsrIn;
6196	TestData.fMxcsrOut = uMxCsrOut2;
6197	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6198	}
6199	if (!RT_IS_POWER_OF_TWO(fXcpt))
6200	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6201	if (fUnmasked & fXcpt)
6202	{
6203	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6204	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6205	TestData.fMxcsrIn = uMxCsrIn;
6206	TestData.fMxcsrOut = uMxCsrOut3;
6207	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6208	}
6209	}
6210	}
6211	}
6212	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6213	}
6214
6215	return RTEXITCODE_SUCCESS;
6216	}
6217	#endif
6218
6219
6220	static void SseBinaryU128R64Test(void)
6221	{
6222	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
6223	{
6224	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R64[iFn]))
6225	continue;
6226
6227	SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
6228	uint32_t const cTests = g_aSseBinaryU128R64[iFn].cTests;
6229	PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
6230	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
6231	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6232	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6233	{
6234	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6235	{
6236	X86XMMREG Res; RT_ZERO(Res);
6237
6238	uint32_t uMxCsrIn = paTests[iTest].fMxcsrIn;
6239	uint32_t uMxCsrOut = pfn(uMxCsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
6240	if ( uMxCsrOut != paTests[iTest].fMxcsrOut
6241	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
6242	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
6243	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
6244	"%s -> mxcsr=%#08x %s'%s\n"
6245	"%s expected %#08x %s'%s%s%s (%s)\n",
6246	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6247	FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
6248	FormatR64(&paTests[iTest].r64Val2),
6249	iVar ? " " : "", uMxCsrOut,
6250	FormatR64(&Res.ar64[0]), FormatR64(&Res.ar64[1]),
6251	iVar ? " " : "", paTests[iTest].fMxcsrOut,
6252	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
6253	MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
6254	( !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
6255	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
6256	? " - val" : "",
6257	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6258	}
6259	}
6260
6261	FREE_DECOMPRESSED_TESTS(g_aSseBinaryU128R64[iFn]);
6262	}
6263	}
6264
6265
6266	/*
6267	* SSE operations converting single double-precision floating point values to signed double-word integers (cvttsd2si and friends).
6268	*/
6269	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R64_T, SSE_BINARY_I32_R64_TEST_T, PFNIEMAIMPLSSEF2I32U64);
6270
6271	static SSE_BINARY_I32_R64_T g_aSseBinaryI32R64[] =
6272	{
6273	ENTRY_BIN(cvttsd2si_i32_r64),
6274	ENTRY_BIN(cvtsd2si_i32_r64),
6275	};
6276
6277	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6278	DUMP_ALL_FN(SseBinaryI32R64, g_aSseBinaryI32R64)
6279	static RTEXITCODE SseBinaryI32R64Generate(uint32_t cTests, const char * const *papszNameFmts)
6280	{
6281	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6282
6283	static struct { RTFLOAT64U Val; } const s_aSpecials[] =
6284	{
6285	{ RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6286	/** @todo More specials. */
6287	};
6288
6289	uint32_t cMinNormalPairs = (cTests - 144) / 4;
6290	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
6291	{
6292	PFNIEMAIMPLSSEF2I32U64 const pfn = g_aSseBinaryI32R64[iFn].pfnNative ? g_aSseBinaryI32R64[iFn].pfnNative : g_aSseBinaryI32R64[iFn].pfn;
6293
6294	IEMBINARYOUTPUT BinOut;
6295	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI32R64[iFn]), RTEXITCODE_FAILURE);
6296
6297	uint32_t cNormalInputPairs = 0;
6298	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6299	{
6300	SSE_BINARY_I32_R64_TEST_T TestData; RT_ZERO(TestData);
6301
6302	TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
6303
6304	if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
6305	cNormalInputPairs++;
6306	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6307	{
6308	iTest -= 1;
6309	continue;
6310	}
6311
6312	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6313	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6314	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6315	for (uint8_t iFz = 0; iFz < 2; iFz++)
6316	{
6317	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6318	\| (iRounding << X86_MXCSR_RC_SHIFT)
6319	\| (iDaz ? X86_MXCSR_DAZ : 0)
6320	\| (iFz ? X86_MXCSR_FZ : 0)
6321	\| X86_MXCSR_XCPT_MASK;
6322	uint32_t fMxcsrM; int32_t i32OutM;
6323	fMxcsrM = pfn(uMxCsrIn, &i32OutM, &TestData.r64ValIn.u);
6324	TestData.fMxcsrIn = uMxCsrIn;
6325	TestData.fMxcsrOut = fMxcsrM;
6326	TestData.i32ValOut = i32OutM;
6327	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6328
6329	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6330	uint32_t fMxcsrU; int32_t i32OutU;
6331	fMxcsrU = pfn(uMxCsrIn, &i32OutU, &TestData.r64ValIn.u);
6332	TestData.fMxcsrIn = uMxCsrIn;
6333	TestData.fMxcsrOut = fMxcsrU;
6334	TestData.i32ValOut = i32OutU;
6335	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6336
6337	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6338	if (fXcpt)
6339	{
6340	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6341	uint32_t fMxcsr1; int32_t i32Out1;
6342	fMxcsr1 = pfn(uMxCsrIn, &i32Out1, &TestData.r64ValIn.u);
6343	TestData.fMxcsrIn = uMxCsrIn;
6344	TestData.fMxcsrOut = fMxcsr1;
6345	TestData.i32ValOut = i32Out1;
6346	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6347
6348	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6349	{
6350	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6351	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6352	uint32_t fMxcsr2; int32_t i32Out2;
6353	fMxcsr2 = pfn(uMxCsrIn, &i32Out2, &TestData.r64ValIn.u);
6354	TestData.fMxcsrIn = uMxCsrIn;
6355	TestData.fMxcsrOut = fMxcsr2;
6356	TestData.i32ValOut = i32Out2;
6357	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6358	}
6359	if (!RT_IS_POWER_OF_TWO(fXcpt))
6360	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6361	if (fUnmasked & fXcpt)
6362	{
6363	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6364	uint32_t fMxcsr3; int32_t i32Out3;
6365	fMxcsr3 = pfn(uMxCsrIn, &i32Out3, &TestData.r64ValIn.u);
6366	TestData.fMxcsrIn = uMxCsrIn;
6367	TestData.fMxcsrOut = fMxcsr3;
6368	TestData.i32ValOut = i32Out3;
6369	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6370	}
6371	}
6372	}
6373	}
6374	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6375	}
6376
6377	return RTEXITCODE_SUCCESS;
6378	}
6379	#endif
6380
6381
6382	static void SseBinaryI32R64Test(void)
6383	{
6384	X86FXSTATE State;
6385	RT_ZERO(State);
6386	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
6387	{
6388	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R64[iFn]))
6389	continue;
6390
6391	SSE_BINARY_I32_R64_TEST_T const * const paTests = g_aSseBinaryI32R64[iFn].paTests;
6392	uint32_t const cTests = g_aSseBinaryI32R64[iFn].cTests;
6393	PFNIEMAIMPLSSEF2I32U64 pfn = g_aSseBinaryI32R64[iFn].pfn;
6394	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
6395	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6396	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6397	{
6398	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6399	{
6400	int32_t i32Dst = 0;
6401
6402	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i32Dst, &paTests[iTest].r64ValIn.u);
6403	if ( fMxcsr != paTests[iTest].fMxcsrOut
6404	\|\| i32Dst != paTests[iTest].i32ValOut)
6405	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6406	"%s -> mxcsr=%#08x %RI32\n"
6407	"%s expected %#08x %RI32%s%s (%s)\n",
6408	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6409	FormatR64(&paTests[iTest].r64ValIn),
6410	iVar ? " " : "", fMxcsr, i32Dst,
6411	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
6412	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6413	i32Dst != paTests[iTest].i32ValOut
6414	? " - val" : "",
6415	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6416	}
6417	}
6418
6419	FREE_DECOMPRESSED_TESTS(g_aSseBinaryI32R64[iFn]);
6420	}
6421	}
6422
6423
6424	/*
6425	* SSE operations converting single double-precision floating point values to signed quad-word integers (cvttsd2si and friends).
6426	*/
6427	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R64_T, SSE_BINARY_I64_R64_TEST_T, PFNIEMAIMPLSSEF2I64U64);
6428
6429	static SSE_BINARY_I64_R64_T g_aSseBinaryI64R64[] =
6430	{
6431	ENTRY_BIN(cvttsd2si_i64_r64),
6432	ENTRY_BIN(cvtsd2si_i64_r64),
6433	};
6434
6435	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6436	DUMP_ALL_FN(SseBinaryI64R64, g_aSseBinaryI64R64)
6437	static RTEXITCODE SseBinaryI64R64Generate(uint32_t cTests, const char * const *papszNameFmts)
6438	{
6439	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6440
6441	static struct { RTFLOAT64U Val; } const s_aSpecials[] =
6442	{
6443	{ RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6444	/** @todo More specials. */
6445	};
6446
6447	uint32_t cMinNormalPairs = (cTests - 144) / 4;
6448	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
6449	{
6450	PFNIEMAIMPLSSEF2I64U64 const pfn = g_aSseBinaryI64R64[iFn].pfnNative ? g_aSseBinaryI64R64[iFn].pfnNative : g_aSseBinaryI64R64[iFn].pfn;
6451
6452	IEMBINARYOUTPUT BinOut;
6453	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI64R64[iFn]), RTEXITCODE_FAILURE);
6454
6455	uint32_t cNormalInputPairs = 0;
6456	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6457	{
6458	SSE_BINARY_I64_R64_TEST_T TestData; RT_ZERO(TestData);
6459
6460	TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
6461
6462	if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
6463	cNormalInputPairs++;
6464	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6465	{
6466	iTest -= 1;
6467	continue;
6468	}
6469
6470	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6471	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6472	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6473	for (uint8_t iFz = 0; iFz < 2; iFz++)
6474	{
6475	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6476	\| (iRounding << X86_MXCSR_RC_SHIFT)
6477	\| (iDaz ? X86_MXCSR_DAZ : 0)
6478	\| (iFz ? X86_MXCSR_FZ : 0)
6479	\| X86_MXCSR_XCPT_MASK;
6480	uint32_t fMxcsrM; int64_t i64OutM;
6481	fMxcsrM = pfn(uMxCsrIn, &i64OutM, &TestData.r64ValIn.u);
6482	TestData.fMxcsrIn = uMxCsrIn;
6483	TestData.fMxcsrOut = fMxcsrM;
6484	TestData.i64ValOut = i64OutM;
6485	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6486
6487	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6488	uint32_t fMxcsrU; int64_t i64OutU;
6489	fMxcsrU =pfn(uMxCsrIn, &i64OutU, &TestData.r64ValIn.u);
6490	TestData.fMxcsrIn = uMxCsrIn;
6491	TestData.fMxcsrOut = fMxcsrU;
6492	TestData.i64ValOut = i64OutU;
6493	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6494
6495	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6496	if (fXcpt)
6497	{
6498	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6499	uint32_t fMxcsr1; int64_t i64Out1;
6500	fMxcsr1 = pfn(uMxCsrIn, &i64Out1, &TestData.r64ValIn.u);
6501	TestData.fMxcsrIn = uMxCsrIn;
6502	TestData.fMxcsrOut = fMxcsr1;
6503	TestData.i64ValOut = i64Out1;
6504	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6505
6506	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6507	{
6508	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6509	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6510	uint32_t fMxcsr2; int64_t i64Out2;
6511	fMxcsr2 = pfn(uMxCsrIn, &i64Out2, &TestData.r64ValIn.u);
6512	TestData.fMxcsrIn = uMxCsrIn;
6513	TestData.fMxcsrOut = fMxcsr2;
6514	TestData.i64ValOut = i64Out2;
6515	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6516	}
6517	if (!RT_IS_POWER_OF_TWO(fXcpt))
6518	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6519	if (fUnmasked & fXcpt)
6520	{
6521	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6522	uint32_t fMxcsr3; int64_t i64Out3;
6523	fMxcsr3 = pfn(uMxCsrIn, &i64Out3, &TestData.r64ValIn.u);
6524	TestData.fMxcsrIn = uMxCsrIn;
6525	TestData.fMxcsrOut = fMxcsr3;
6526	TestData.i64ValOut = i64Out3;
6527	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6528	}
6529	}
6530	}
6531	}
6532	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6533	}
6534
6535	return RTEXITCODE_SUCCESS;
6536	}
6537	#endif
6538
6539
6540	static void SseBinaryI64R64Test(void)
6541	{
6542	X86FXSTATE State;
6543	RT_ZERO(State);
6544	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
6545	{
6546	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R64[iFn]))
6547	continue;
6548
6549	SSE_BINARY_I64_R64_TEST_T const * const paTests = g_aSseBinaryI64R64[iFn].paTests;
6550	uint32_t const cTests = g_aSseBinaryI64R64[iFn].cTests;
6551	PFNIEMAIMPLSSEF2I64U64 pfn = g_aSseBinaryI64R64[iFn].pfn;
6552	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
6553	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6554	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6555	{
6556	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6557	{
6558	int64_t i64Dst = 0;
6559	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i64Dst, &paTests[iTest].r64ValIn.u);
6560	if ( fMxcsr != paTests[iTest].fMxcsrOut
6561	\|\| i64Dst != paTests[iTest].i64ValOut)
6562	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6563	"%s -> mxcsr=%#08x %RI64\n"
6564	"%s expected %#08x %RI64%s%s (%s)\n",
6565	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6566	FormatR64(&paTests[iTest].r64ValIn),
6567	iVar ? " " : "", fMxcsr, i64Dst,
6568	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6569	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6570	i64Dst != paTests[iTest].i64ValOut
6571	? " - val" : "",
6572	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6573	}
6574	}
6575
6576	FREE_DECOMPRESSED_TESTS(g_aSseBinaryI64R64[iFn]);
6577	}
6578	}
6579
6580
6581	/*
6582	* SSE operations converting single single-precision floating point values to signed double-word integers (cvttss2si and friends).
6583	*/
6584	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R32_T, SSE_BINARY_I32_R32_TEST_T, PFNIEMAIMPLSSEF2I32U32);
6585
6586	static SSE_BINARY_I32_R32_T g_aSseBinaryI32R32[] =
6587	{
6588	ENTRY_BIN(cvttss2si_i32_r32),
6589	ENTRY_BIN(cvtss2si_i32_r32),
6590	};
6591
6592	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6593	DUMP_ALL_FN(SseBinaryI32R32, g_aSseBinaryI32R32)
6594	static RTEXITCODE SseBinaryI32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
6595	{
6596	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6597
6598	static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6599	{
6600	{ RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6601	/** @todo More specials. */
6602	};
6603
6604	uint32_t cMinNormalPairs = (cTests - 144) / 4;
6605	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6606	{
6607	PFNIEMAIMPLSSEF2I32U32 const pfn = g_aSseBinaryI32R32[iFn].pfnNative ? g_aSseBinaryI32R32[iFn].pfnNative : g_aSseBinaryI32R32[iFn].pfn;
6608
6609	IEMBINARYOUTPUT BinOut;
6610	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI32R32[iFn]), RTEXITCODE_FAILURE);
6611
6612	uint32_t cNormalInputPairs = 0;
6613	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6614	{
6615	SSE_BINARY_I32_R32_TEST_T TestData; RT_ZERO(TestData);
6616
6617	TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6618
6619	if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6620	cNormalInputPairs++;
6621	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6622	{
6623	iTest -= 1;
6624	continue;
6625	}
6626
6627	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6628	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6629	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6630	for (uint8_t iFz = 0; iFz < 2; iFz++)
6631	{
6632	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6633	\| (iRounding << X86_MXCSR_RC_SHIFT)
6634	\| (iDaz ? X86_MXCSR_DAZ : 0)
6635	\| (iFz ? X86_MXCSR_FZ : 0)
6636	\| X86_MXCSR_XCPT_MASK;
6637	uint32_t fMxcsrM; int32_t i32OutM;
6638	fMxcsrM = pfn(uMxCsrIn, &i32OutM, &TestData.r32ValIn.u);
6639	TestData.fMxcsrIn = uMxCsrIn;
6640	TestData.fMxcsrOut = fMxcsrM;
6641	TestData.i32ValOut = i32OutM;
6642	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6643
6644	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6645	uint32_t fMxcsrU; int32_t i32OutU;
6646	fMxcsrU = pfn(uMxCsrIn, &i32OutU, &TestData.r32ValIn.u);
6647	TestData.fMxcsrIn = uMxCsrIn;
6648	TestData.fMxcsrOut = fMxcsrU;
6649	TestData.i32ValOut = i32OutU;
6650	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6651
6652	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6653	if (fXcpt)
6654	{
6655	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6656	uint32_t fMxcsr1; int32_t i32Out1;
6657	fMxcsr1 = pfn(uMxCsrIn, &i32Out1, &TestData.r32ValIn.u);
6658	TestData.fMxcsrIn = uMxCsrIn;
6659	TestData.fMxcsrOut = fMxcsr1;
6660	TestData.i32ValOut = i32Out1;
6661	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6662
6663	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6664	{
6665	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6666	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6667	uint32_t fMxcsr2; int32_t i32Out2;
6668	fMxcsr2 = pfn(uMxCsrIn, &i32Out2, &TestData.r32ValIn.u);
6669	TestData.fMxcsrIn = uMxCsrIn;
6670	TestData.fMxcsrOut = fMxcsr2;
6671	TestData.i32ValOut = i32Out2;
6672	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6673	}
6674	if (!RT_IS_POWER_OF_TWO(fXcpt))
6675	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6676	if (fUnmasked & fXcpt)
6677	{
6678	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6679	uint32_t fMxcsr3; int32_t i32Out3;
6680	fMxcsr3 = pfn(uMxCsrIn, &i32Out3, &TestData.r32ValIn.u);
6681	TestData.fMxcsrIn = uMxCsrIn;
6682	TestData.fMxcsrOut = fMxcsr3;
6683	TestData.i32ValOut = i32Out3;
6684	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6685	}
6686	}
6687	}
6688	}
6689	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6690	}
6691
6692	return RTEXITCODE_SUCCESS;
6693	}
6694	#endif
6695
6696
6697	static void SseBinaryI32R32Test(void)
6698	{
6699	X86FXSTATE State;
6700	RT_ZERO(State);
6701	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6702	{
6703	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R32[iFn]))
6704	continue;
6705
6706	SSE_BINARY_I32_R32_TEST_T const * const paTests = g_aSseBinaryI32R32[iFn].paTests;
6707	uint32_t const cTests = g_aSseBinaryI32R32[iFn].cTests;
6708	PFNIEMAIMPLSSEF2I32U32 pfn = g_aSseBinaryI32R32[iFn].pfn;
6709	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R32[iFn]);
6710	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6711	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6712	{
6713	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6714	{
6715	int32_t i32Dst = 0;
6716
6717	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i32Dst, &paTests[iTest].r32ValIn.u);
6718	if ( fMxcsr != paTests[iTest].fMxcsrOut
6719	\|\| i32Dst != paTests[iTest].i32ValOut)
6720	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6721	"%s -> mxcsr=%#08x %RI32\n"
6722	"%s expected %#08x %RI32%s%s (%s)\n",
6723	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6724	FormatR32(&paTests[iTest].r32ValIn),
6725	iVar ? " " : "", fMxcsr, i32Dst,
6726	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
6727	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6728	i32Dst != paTests[iTest].i32ValOut
6729	? " - val" : "",
6730	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6731	}
6732	}
6733
6734	FREE_DECOMPRESSED_TESTS(g_aSseBinaryI32R32[iFn]);
6735	}
6736	}
6737
6738
6739	/*
6740	* SSE operations converting single single-precision floating point values to signed quad-word integers (cvttss2si and friends).
6741	*/
6742	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R32_T, SSE_BINARY_I64_R32_TEST_T, PFNIEMAIMPLSSEF2I64U32);
6743
6744	static SSE_BINARY_I64_R32_T g_aSseBinaryI64R32[] =
6745	{
6746	ENTRY_BIN(cvttss2si_i64_r32),
6747	ENTRY_BIN(cvtss2si_i64_r32),
6748	};
6749
6750	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6751	DUMP_ALL_FN(SseBinaryI64R32, g_aSseBinaryI64R32)
6752	static RTEXITCODE SseBinaryI64R32Generate(uint32_t cTests, const char * const *papszNameFmts)
6753	{
6754	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6755
6756	static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6757	{
6758	{ RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6759	/** @todo More specials. */
6760	};
6761
6762	X86FXSTATE State;
6763	RT_ZERO(State);
6764	uint32_t cMinNormalPairs = (cTests - 144) / 4;
6765	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6766	{
6767	PFNIEMAIMPLSSEF2I64U32 const pfn = g_aSseBinaryI64R32[iFn].pfnNative ? g_aSseBinaryI64R32[iFn].pfnNative : g_aSseBinaryI64R32[iFn].pfn;
6768
6769	IEMBINARYOUTPUT BinOut;
6770	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI64R32[iFn]), RTEXITCODE_FAILURE);
6771
6772	uint32_t cNormalInputPairs = 0;
6773	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6774	{
6775	SSE_BINARY_I64_R32_TEST_T TestData; RT_ZERO(TestData);
6776
6777	TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6778
6779	if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6780	cNormalInputPairs++;
6781	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6782	{
6783	iTest -= 1;
6784	continue;
6785	}
6786
6787	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6788	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6789	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6790	for (uint8_t iFz = 0; iFz < 2; iFz++)
6791	{
6792	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6793	\| (iRounding << X86_MXCSR_RC_SHIFT)
6794	\| (iDaz ? X86_MXCSR_DAZ : 0)
6795	\| (iFz ? X86_MXCSR_FZ : 0)
6796	\| X86_MXCSR_XCPT_MASK;
6797	uint32_t fMxcsrM; int64_t i64OutM;
6798	fMxcsrM = pfn(uMxCsrIn, &i64OutM, &TestData.r32ValIn.u);
6799	TestData.fMxcsrIn = State.MXCSR;
6800	TestData.fMxcsrOut = fMxcsrM;
6801	TestData.i64ValOut = i64OutM;
6802	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6803
6804	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6805	uint32_t fMxcsrU; int64_t i64OutU;
6806	fMxcsrU = pfn(uMxCsrIn, &i64OutU, &TestData.r32ValIn.u);
6807	TestData.fMxcsrIn = State.MXCSR;
6808	TestData.fMxcsrOut = fMxcsrU;
6809	TestData.i64ValOut = i64OutU;
6810	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6811
6812	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6813	if (fXcpt)
6814	{
6815	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6816	uint32_t fMxcsr1; int64_t i64Out1;
6817	fMxcsr1 = pfn(uMxCsrIn, &i64Out1, &TestData.r32ValIn.u);
6818	TestData.fMxcsrIn = State.MXCSR;
6819	TestData.fMxcsrOut = fMxcsr1;
6820	TestData.i64ValOut = i64Out1;
6821	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6822
6823	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6824	{
6825	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6826	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6827	uint32_t fMxcsr2; int64_t i64Out2;
6828	fMxcsr2 = pfn(uMxCsrIn, &i64Out2, &TestData.r32ValIn.u);
6829	TestData.fMxcsrIn = State.MXCSR;
6830	TestData.fMxcsrOut = fMxcsr2;
6831	TestData.i64ValOut = i64Out2;
6832	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6833	}
6834	if (!RT_IS_POWER_OF_TWO(fXcpt))
6835	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6836	if (fUnmasked & fXcpt)
6837	{
6838	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6839	uint32_t fMxcsr3; int64_t i64Out3;
6840	fMxcsr3 = pfn(uMxCsrIn, &i64Out3, &TestData.r32ValIn.u);
6841	TestData.fMxcsrIn = State.MXCSR;
6842	TestData.fMxcsrOut = fMxcsr3;
6843	TestData.i64ValOut = i64Out3;
6844	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6845	}
6846	}
6847	}
6848	}
6849	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6850	}
6851
6852	return RTEXITCODE_SUCCESS;
6853	}
6854	#endif
6855
6856
6857	static void SseBinaryI64R32Test(void)
6858	{
6859	X86FXSTATE State;
6860	RT_ZERO(State);
6861	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6862	{
6863	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R32[iFn]))
6864	continue;
6865
6866	SSE_BINARY_I64_R32_TEST_T const * const paTests = g_aSseBinaryI64R32[iFn].paTests;
6867	uint32_t const cTests = g_aSseBinaryI64R32[iFn].cTests;
6868	PFNIEMAIMPLSSEF2I64U32 pfn = g_aSseBinaryI64R32[iFn].pfn;
6869	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI64R32[iFn]);
6870	if (!cTests) RTTestSkipped(g_hTest, "no tests");
6871	for (uint32_t iVar = 0; iVar < cVars; iVar++)
6872	{
6873	for (uint32_t iTest = 0; iTest < cTests; iTest++)
6874	{
6875	int64_t i64Dst = 0;
6876
6877	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i64Dst, &paTests[iTest].r32ValIn.u);
6878	if ( fMxcsr != paTests[iTest].fMxcsrOut
6879	\|\| i64Dst != paTests[iTest].i64ValOut)
6880	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6881	"%s -> mxcsr=%#08x %RI64\n"
6882	"%s expected %#08x %RI64%s%s (%s)\n",
6883	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6884	FormatR32(&paTests[iTest].r32ValIn),
6885	iVar ? " " : "", fMxcsr, i64Dst,
6886	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6887	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6888	i64Dst != paTests[iTest].i64ValOut
6889	? " - val" : "",
6890	FormatMxcsr(paTests[iTest].fMxcsrIn) );
6891	}
6892	}
6893
6894	FREE_DECOMPRESSED_TESTS(g_aSseBinaryI64R32[iFn]);
6895	}
6896	}
6897
6898
6899	/*
6900	* SSE operations converting single signed double-word integers to double-precision floating point values (probably only cvtsi2sd).
6901	*/
6902	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I32_T, SSE_BINARY_R64_I32_TEST_T, PFNIEMAIMPLSSEF2R64I32);
6903
6904	static SSE_BINARY_R64_I32_T g_aSseBinaryR64I32[] =
6905	{
6906	ENTRY_BIN(cvtsi2sd_r64_i32)
6907	};
6908
6909	#ifdef TSTIEMAIMPL_WITH_GENERATOR
6910	DUMP_ALL_FN(SseBinaryR64I32, g_aSseBinaryR64I32)
6911	static RTEXITCODE SseBinaryR64I32Generate(uint32_t cTests, const char * const *papszNameFmts)
6912	{
6913	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6914
6915	static int32_t const s_aSpecials[] =
6916	{
6917	INT32_MIN,
6918	INT32_MAX,
6919	/** @todo More specials. */
6920	};
6921
6922	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6923	{
6924	PFNIEMAIMPLSSEF2R64I32 const pfn = g_aSseBinaryR64I32[iFn].pfnNative ? g_aSseBinaryR64I32[iFn].pfnNative : g_aSseBinaryR64I32[iFn].pfn;
6925
6926	IEMBINARYOUTPUT BinOut;
6927	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64I32[iFn]), RTEXITCODE_FAILURE);
6928
6929	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6930	{
6931	SSE_BINARY_R64_I32_TEST_T TestData; RT_ZERO(TestData);
6932
6933	TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6934
6935	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6936	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6937	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6938	for (uint8_t iFz = 0; iFz < 2; iFz++)
6939	{
6940	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6941	\| (iRounding << X86_MXCSR_RC_SHIFT)
6942	\| (iDaz ? X86_MXCSR_DAZ : 0)
6943	\| (iFz ? X86_MXCSR_FZ : 0)
6944	\| X86_MXCSR_XCPT_MASK;
6945	uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6946	fMxcsrM = pfn(uMxCsrIn, &r64OutM, &TestData.i32ValIn);
6947	TestData.fMxcsrIn = uMxCsrIn;
6948	TestData.fMxcsrOut = fMxcsrM;
6949	TestData.r64ValOut = r64OutM;
6950	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6951
6952	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6953	uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6954	fMxcsrU = pfn(uMxCsrIn, &r64OutU, &TestData.i32ValIn);
6955	TestData.fMxcsrIn = uMxCsrIn;
6956	TestData.fMxcsrOut = fMxcsrU;
6957	TestData.r64ValOut = r64OutU;
6958	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6959
6960	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6961	if (fXcpt)
6962	{
6963	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
6964	uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6965	fMxcsr1 = pfn(uMxCsrIn, &r64Out1, &TestData.i32ValIn);
6966	TestData.fMxcsrIn = uMxCsrIn;
6967	TestData.fMxcsrOut = fMxcsr1;
6968	TestData.r64ValOut = r64Out1;
6969	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6970
6971	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6972	{
6973	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6974	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6975	uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6976	fMxcsr2 = pfn(uMxCsrIn, &r64Out2, &TestData.i32ValIn);
6977	TestData.fMxcsrIn = uMxCsrIn;
6978	TestData.fMxcsrOut = fMxcsr2;
6979	TestData.r64ValOut = r64Out2;
6980	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6981	}
6982	if (!RT_IS_POWER_OF_TWO(fXcpt))
6983	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6984	if (fUnmasked & fXcpt)
6985	{
6986	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6987	uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6988	fMxcsr3 = pfn(uMxCsrIn, &r64Out3, &TestData.i32ValIn);
6989	TestData.fMxcsrIn = uMxCsrIn;
6990	TestData.fMxcsrOut = fMxcsr3;
6991	TestData.r64ValOut = r64Out3;
6992	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6993	}
6994	}
6995	}
6996	}
6997	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6998	}
6999
7000	return RTEXITCODE_SUCCESS;
7001	}
7002	#endif
7003
7004
7005	static void SseBinaryR64I32Test(void)
7006	{
7007	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
7008	{
7009	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I32[iFn]))
7010	continue;
7011
7012	SSE_BINARY_R64_I32_TEST_T const * const paTests = g_aSseBinaryR64I32[iFn].paTests;
7013	uint32_t const cTests = g_aSseBinaryR64I32[iFn].cTests;
7014	PFNIEMAIMPLSSEF2R64I32 pfn = g_aSseBinaryR64I32[iFn].pfn;
7015	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I32[iFn]);
7016	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7017	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7018	{
7019	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7020	{
7021	RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
7022
7023	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r64Dst, &paTests[iTest].i32ValIn);
7024	if ( fMxcsr != paTests[iTest].fMxcsrOut
7025	\|\| !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
7026	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
7027	"%s -> mxcsr=%#08x %s\n"
7028	"%s expected %#08x %s%s%s (%s)\n",
7029	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7030	&paTests[iTest].i32ValIn,
7031	iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
7032	iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
7033	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7034	!RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
7035	? " - val" : "",
7036	FormatMxcsr(paTests[iTest].fMxcsrIn) );
7037	}
7038	}
7039
7040	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64I32[iFn]);
7041	}
7042	}
7043
7044
7045	/*
7046	* SSE operations converting single signed quad-word integers to double-precision floating point values (probably only cvtsi2sd).
7047	*/
7048	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I64_T, SSE_BINARY_R64_I64_TEST_T, PFNIEMAIMPLSSEF2R64I64);
7049
7050	static SSE_BINARY_R64_I64_T g_aSseBinaryR64I64[] =
7051	{
7052	ENTRY_BIN(cvtsi2sd_r64_i64),
7053	};
7054
7055	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7056	DUMP_ALL_FN(SseBinaryR64I64, g_aSseBinaryR64I64)
7057	static RTEXITCODE SseBinaryR64I64Generate(uint32_t cTests, const char * const *papszNameFmts)
7058	{
7059	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7060
7061	static int64_t const s_aSpecials[] =
7062	{
7063	INT64_MIN,
7064	INT64_MAX
7065	/** @todo More specials. */
7066	};
7067
7068	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
7069	{
7070	PFNIEMAIMPLSSEF2R64I64 const pfn = g_aSseBinaryR64I64[iFn].pfnNative ? g_aSseBinaryR64I64[iFn].pfnNative : g_aSseBinaryR64I64[iFn].pfn;
7071
7072	IEMBINARYOUTPUT BinOut;
7073	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64I64[iFn]), RTEXITCODE_FAILURE);
7074
7075	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7076	{
7077	SSE_BINARY_R64_I64_TEST_T TestData; RT_ZERO(TestData);
7078
7079	TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
7080
7081	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7082	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7083	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7084	for (uint8_t iFz = 0; iFz < 2; iFz++)
7085	{
7086	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7087	\| (iRounding << X86_MXCSR_RC_SHIFT)
7088	\| (iDaz ? X86_MXCSR_DAZ : 0)
7089	\| (iFz ? X86_MXCSR_FZ : 0)
7090	\| X86_MXCSR_XCPT_MASK;
7091	uint32_t fMxcsrM; RTFLOAT64U r64OutM;
7092	fMxcsrM = pfn(uMxCsrIn, &r64OutM, &TestData.i64ValIn);
7093	TestData.fMxcsrIn = uMxCsrIn;
7094	TestData.fMxcsrOut = fMxcsrM;
7095	TestData.r64ValOut = r64OutM;
7096	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7097
7098	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
7099	uint32_t fMxcsrU; RTFLOAT64U r64OutU;
7100	fMxcsrU = pfn(uMxCsrIn, &r64OutU, &TestData.i64ValIn);
7101	TestData.fMxcsrIn = uMxCsrIn;
7102	TestData.fMxcsrOut = fMxcsrU;
7103	TestData.r64ValOut = r64OutU;
7104	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7105
7106	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7107	if (fXcpt)
7108	{
7109	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7110	uint32_t fMxcsr1; RTFLOAT64U r64Out1;
7111	fMxcsr1 = pfn(uMxCsrIn, &r64Out1, &TestData.i64ValIn);
7112	TestData.fMxcsrIn = uMxCsrIn;
7113	TestData.fMxcsrOut = fMxcsr1;
7114	TestData.r64ValOut = r64Out1;
7115	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7116
7117	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7118	{
7119	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7120	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7121	uint32_t fMxcsr2; RTFLOAT64U r64Out2;
7122	fMxcsr2 = pfn(uMxCsrIn, &r64Out2, &TestData.i64ValIn);
7123	TestData.fMxcsrIn = uMxCsrIn;
7124	TestData.fMxcsrOut = fMxcsr2;
7125	TestData.r64ValOut = r64Out2;
7126	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7127	}
7128	if (!RT_IS_POWER_OF_TWO(fXcpt))
7129	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7130	if (fUnmasked & fXcpt)
7131	{
7132	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7133	uint32_t fMxcsr3; RTFLOAT64U r64Out3;
7134	fMxcsr3 = pfn(uMxCsrIn, &r64Out3, &TestData.i64ValIn);
7135	TestData.fMxcsrIn = uMxCsrIn;
7136	TestData.fMxcsrOut = fMxcsr3;
7137	TestData.r64ValOut = r64Out3;
7138	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7139	}
7140	}
7141	}
7142	}
7143	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7144	}
7145
7146	return RTEXITCODE_SUCCESS;
7147	}
7148	#endif
7149
7150
7151	static void SseBinaryR64I64Test(void)
7152	{
7153	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
7154	{
7155	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I64[iFn]))
7156	continue;
7157
7158	SSE_BINARY_R64_I64_TEST_T const * const paTests = g_aSseBinaryR64I64[iFn].paTests;
7159	uint32_t const cTests = g_aSseBinaryR64I64[iFn].cTests;
7160	PFNIEMAIMPLSSEF2R64I64 pfn = g_aSseBinaryR64I64[iFn].pfn;
7161	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I64[iFn]);
7162	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7163	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7164	{
7165	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7166	{
7167	RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
7168
7169	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r64Dst, &paTests[iTest].i64ValIn);
7170	if ( fMxcsr != paTests[iTest].fMxcsrOut
7171	\|\| !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
7172	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
7173	"%s -> mxcsr=%#08x %s\n"
7174	"%s expected %#08x %s%s%s (%s)\n",
7175	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7176	&paTests[iTest].i64ValIn,
7177	iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
7178	iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
7179	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7180	!RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
7181	? " - val" : "",
7182	FormatMxcsr(paTests[iTest].fMxcsrIn) );
7183	}
7184	}
7185
7186	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64I64[iFn]);
7187	}
7188	}
7189
7190
7191	/*
7192	* SSE operations converting single signed double-word integers to single-precision floating point values (probably only cvtsi2ss).
7193	*/
7194	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I32_T, SSE_BINARY_R32_I32_TEST_T, PFNIEMAIMPLSSEF2R32I32);
7195
7196	static SSE_BINARY_R32_I32_T g_aSseBinaryR32I32[] =
7197	{
7198	ENTRY_BIN(cvtsi2ss_r32_i32),
7199	};
7200
7201	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7202	DUMP_ALL_FN(SseBinaryR32I32, g_aSseBinaryR32I32)
7203	static RTEXITCODE SseBinaryR32I32Generate(uint32_t cTests, const char * const *papszNameFmts)
7204	{
7205	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7206
7207	static int32_t const s_aSpecials[] =
7208	{
7209	INT32_MIN,
7210	INT32_MAX,
7211	/** @todo More specials. */
7212	};
7213
7214	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
7215	{
7216	PFNIEMAIMPLSSEF2R32I32 const pfn = g_aSseBinaryR32I32[iFn].pfnNative ? g_aSseBinaryR32I32[iFn].pfnNative : g_aSseBinaryR32I32[iFn].pfn;
7217
7218	IEMBINARYOUTPUT BinOut;
7219	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32I32[iFn]), RTEXITCODE_FAILURE);
7220
7221	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7222	{
7223	SSE_BINARY_R32_I32_TEST_T TestData; RT_ZERO(TestData);
7224
7225	TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7226
7227	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7228	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7229	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7230	for (uint8_t iFz = 0; iFz < 2; iFz++)
7231	{
7232	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7233	\| (iRounding << X86_MXCSR_RC_SHIFT)
7234	\| (iDaz ? X86_MXCSR_DAZ : 0)
7235	\| (iFz ? X86_MXCSR_FZ : 0)
7236	\| X86_MXCSR_XCPT_MASK;
7237	uint32_t fMxcsrM; RTFLOAT32U r32OutM;
7238	fMxcsrM = pfn(uMxCsrIn, &r32OutM, &TestData.i32ValIn);
7239	TestData.fMxcsrIn = uMxCsrIn;
7240	TestData.fMxcsrOut = fMxcsrM;
7241	TestData.r32ValOut = r32OutM;
7242	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7243
7244	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
7245	uint32_t fMxcsrU; RTFLOAT32U r32OutU;
7246	fMxcsrU = pfn(uMxCsrIn, &r32OutU, &TestData.i32ValIn);
7247	TestData.fMxcsrIn = uMxCsrIn;
7248	TestData.fMxcsrOut = fMxcsrU;
7249	TestData.r32ValOut = r32OutU;
7250	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7251
7252	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7253	if (fXcpt)
7254	{
7255	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7256	uint32_t fMxcsr1; RTFLOAT32U r32Out1;
7257	fMxcsr1 = pfn(uMxCsrIn, &r32Out1, &TestData.i32ValIn);
7258	TestData.fMxcsrIn = uMxCsrIn;
7259	TestData.fMxcsrOut = fMxcsr1;
7260	TestData.r32ValOut = r32Out1;
7261	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7262
7263	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7264	{
7265	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7266	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7267	uint32_t fMxcsr2; RTFLOAT32U r32Out2;
7268	fMxcsr2 = pfn(uMxCsrIn, &r32Out2, &TestData.i32ValIn);
7269	TestData.fMxcsrIn = uMxCsrIn;
7270	TestData.fMxcsrOut = fMxcsr2;
7271	TestData.r32ValOut = r32Out2;
7272	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7273	}
7274	if (!RT_IS_POWER_OF_TWO(fXcpt))
7275	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7276	if (fUnmasked & fXcpt)
7277	{
7278	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7279	uint32_t fMxcsr3; RTFLOAT32U r32Out3;
7280	fMxcsr3 = pfn(uMxCsrIn, &r32Out3, &TestData.i32ValIn);
7281	TestData.fMxcsrIn = uMxCsrIn;
7282	TestData.fMxcsrOut = fMxcsr3;
7283	TestData.r32ValOut = r32Out3;
7284	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7285	}
7286	}
7287	}
7288	}
7289	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7290	}
7291
7292	return RTEXITCODE_SUCCESS;
7293	}
7294	#endif
7295
7296
7297	static void SseBinaryR32I32Test(void)
7298	{
7299	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
7300	{
7301	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I32[iFn]))
7302	continue;
7303
7304	SSE_BINARY_R32_I32_TEST_T const * const paTests = g_aSseBinaryR32I32[iFn].paTests;
7305	uint32_t const cTests = g_aSseBinaryR32I32[iFn].cTests;
7306	PFNIEMAIMPLSSEF2R32I32 pfn = g_aSseBinaryR32I32[iFn].pfn;
7307	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I32[iFn]);
7308	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7309	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7310	{
7311	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7312	{
7313	RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
7314
7315	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r32Dst, &paTests[iTest].i32ValIn);
7316	if ( fMxcsr != paTests[iTest].fMxcsrOut
7317	\|\| !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
7318	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
7319	"%s -> mxcsr=%#08x %RI32\n"
7320	"%s expected %#08x %RI32%s%s (%s)\n",
7321	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7322	&paTests[iTest].i32ValIn,
7323	iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
7324	iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
7325	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7326	!RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
7327	? " - val" : "",
7328	FormatMxcsr(paTests[iTest].fMxcsrIn) );
7329	}
7330	}
7331
7332	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32I32[iFn]);
7333	}
7334	}
7335
7336
7337	/*
7338	* SSE operations converting single signed quad-word integers to single-precision floating point values (probably only cvtsi2ss).
7339	*/
7340	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I64_T, SSE_BINARY_R32_I64_TEST_T, PFNIEMAIMPLSSEF2R32I64);
7341
7342	static SSE_BINARY_R32_I64_T g_aSseBinaryR32I64[] =
7343	{
7344	ENTRY_BIN(cvtsi2ss_r32_i64),
7345	};
7346
7347	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7348	DUMP_ALL_FN(SseBinaryR32I64, g_aSseBinaryR32I64)
7349	static RTEXITCODE SseBinaryR32I64Generate(uint32_t cTests, const char * const *papszNameFmts)
7350	{
7351	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7352
7353	static int64_t const s_aSpecials[] =
7354	{
7355	INT64_MIN,
7356	INT64_MAX
7357	/** @todo More specials. */
7358	};
7359
7360	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
7361	{
7362	PFNIEMAIMPLSSEF2R32I64 const pfn = g_aSseBinaryR32I64[iFn].pfnNative ? g_aSseBinaryR32I64[iFn].pfnNative : g_aSseBinaryR32I64[iFn].pfn;
7363
7364	IEMBINARYOUTPUT BinOut;
7365	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32I64[iFn]), RTEXITCODE_FAILURE);
7366
7367	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7368	{
7369	SSE_BINARY_R32_I64_TEST_T TestData; RT_ZERO(TestData);
7370
7371	TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
7372
7373	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7374	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7375	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7376	for (uint8_t iFz = 0; iFz < 2; iFz++)
7377	{
7378	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7379	\| (iRounding << X86_MXCSR_RC_SHIFT)
7380	\| (iDaz ? X86_MXCSR_DAZ : 0)
7381	\| (iFz ? X86_MXCSR_FZ : 0)
7382	\| X86_MXCSR_XCPT_MASK;
7383	uint32_t fMxcsrM; RTFLOAT32U r32OutM;
7384	fMxcsrM = pfn(uMxCsrIn, &r32OutM, &TestData.i64ValIn);
7385	TestData.fMxcsrIn = uMxCsrIn;
7386	TestData.fMxcsrOut = fMxcsrM;
7387	TestData.r32ValOut = r32OutM;
7388	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7389
7390	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
7391	uint32_t fMxcsrU; RTFLOAT32U r32OutU;
7392	fMxcsrU = pfn(uMxCsrIn, &r32OutU, &TestData.i64ValIn);
7393	TestData.fMxcsrIn = uMxCsrIn;
7394	TestData.fMxcsrOut = fMxcsrU;
7395	TestData.r32ValOut = r32OutU;
7396	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7397
7398	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7399	if (fXcpt)
7400	{
7401	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7402	uint32_t fMxcsr1; RTFLOAT32U r32Out1;
7403	fMxcsr1 = pfn(uMxCsrIn, &r32Out1, &TestData.i64ValIn);
7404	TestData.fMxcsrIn = uMxCsrIn;
7405	TestData.fMxcsrOut = fMxcsr1;
7406	TestData.r32ValOut = r32Out1;
7407	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7408
7409	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7410	{
7411	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7412	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7413	uint32_t fMxcsr2; RTFLOAT32U r32Out2;
7414	fMxcsr2 = pfn(uMxCsrIn, &r32Out2, &TestData.i64ValIn);
7415	TestData.fMxcsrIn = uMxCsrIn;
7416	TestData.fMxcsrOut = fMxcsr2;
7417	TestData.r32ValOut = r32Out2;
7418	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7419	}
7420	if (!RT_IS_POWER_OF_TWO(fXcpt))
7421	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7422	if (fUnmasked & fXcpt)
7423	{
7424	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7425	uint32_t fMxcsr3; RTFLOAT32U r32Out3;
7426	fMxcsr3 = pfn(uMxCsrIn, &r32Out3, &TestData.i64ValIn);
7427	TestData.fMxcsrIn = uMxCsrIn;
7428	TestData.fMxcsrOut = fMxcsr3;
7429	TestData.r32ValOut = r32Out3;
7430	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7431	}
7432	}
7433	}
7434	}
7435	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7436	}
7437
7438	return RTEXITCODE_SUCCESS;
7439	}
7440	#endif
7441
7442
7443	static void SseBinaryR32I64Test(void)
7444	{
7445	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
7446	{
7447	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I64[iFn]))
7448	continue;
7449
7450	SSE_BINARY_R32_I64_TEST_T const * const paTests = g_aSseBinaryR32I64[iFn].paTests;
7451	uint32_t const cTests = g_aSseBinaryR32I64[iFn].cTests;
7452	PFNIEMAIMPLSSEF2R32I64 pfn = g_aSseBinaryR32I64[iFn].pfn;
7453	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I64[iFn]);
7454	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7455	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7456	{
7457	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7458	{
7459	RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
7460
7461	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r32Dst, &paTests[iTest].i64ValIn);
7462	if ( fMxcsr != paTests[iTest].fMxcsrOut
7463	\|\| !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
7464	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
7465	"%s -> mxcsr=%#08x %RI32\n"
7466	"%s expected %#08x %RI32%s%s (%s)\n",
7467	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7468	&paTests[iTest].i64ValIn,
7469	iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
7470	iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
7471	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7472	!RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
7473	? " - val" : "",
7474	FormatMxcsr(paTests[iTest].fMxcsrIn) );
7475	}
7476	}
7477
7478	FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32I64[iFn]);
7479	}
7480	}
7481
7482
7483	/*
7484	* Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
7485	*/
7486	TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R32_R32_T, SSE_COMPARE_EFL_R32_R32_TEST_T, PFNIEMAIMPLF2EFLMXCSRR32R32);
7487
7488	static SSE_COMPARE_EFL_R32_R32_T g_aSseCompareEflR32R32[] =
7489	{
7490	ENTRY_BIN(ucomiss_u128),
7491	ENTRY_BIN(comiss_u128),
7492	ENTRY_BIN_AVX(vucomiss_u128),
7493	ENTRY_BIN_AVX(vcomiss_u128),
7494	};
7495
7496	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7497	DUMP_ALL_FN(SseCompareEflR32R32, g_aSseCompareEflR32R32)
7498	static RTEXITCODE SseCompareEflR32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
7499	{
7500	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7501
7502	static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7503	{
7504	{ RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7505	{ RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7506	{ RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7507	{ RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7508	{ RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7509	{ RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7510	{ RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7511	{ RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7512	/** @todo More specials. */
7513	};
7514
7515	uint32_t cMinNormalPairs = (cTests - 144) / 4;
7516	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7517	{
7518	PFNIEMAIMPLF2EFLMXCSRR32R32 const pfn = g_aSseCompareEflR32R32[iFn].pfnNative ? g_aSseCompareEflR32R32[iFn].pfnNative : g_aSseCompareEflR32R32[iFn].pfn;
7519
7520	IEMBINARYOUTPUT BinOut;
7521	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareEflR32R32[iFn]), RTEXITCODE_FAILURE);
7522
7523	uint32_t cNormalInputPairs = 0;
7524	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7525	{
7526	SSE_COMPARE_EFL_R32_R32_TEST_T TestData; RT_ZERO(TestData);
7527
7528	TestData.r32ValIn1 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7529	TestData.r32ValIn2 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7530
7531	if ( RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn1)
7532	&& RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn2))
7533	cNormalInputPairs++;
7534	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7535	{
7536	iTest -= 1;
7537	continue;
7538	}
7539
7540	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7541	uint32_t const fEFlags = RandEFlags();
7542	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7543	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7544	for (uint8_t iFz = 0; iFz < 2; iFz++)
7545	{
7546	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7547	\| (iRounding << X86_MXCSR_RC_SHIFT)
7548	\| (iDaz ? X86_MXCSR_DAZ : 0)
7549	\| (iFz ? X86_MXCSR_FZ : 0)
7550	\| X86_MXCSR_XCPT_MASK;
7551	uint32_t fMxcsrM = fMxcsrIn;
7552	uint32_t fEFlagsM = fEFlags;
7553	fMxcsrM = pfn(fMxcsrIn, &fEFlagsM, TestData.r32ValIn1, TestData.r32ValIn2);
7554	TestData.fMxcsrIn = fMxcsrIn;
7555	TestData.fMxcsrOut = fMxcsrM;
7556	TestData.fEflIn = fEFlags;
7557	TestData.fEflOut = fEFlagsM;
7558	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7559
7560	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7561	uint32_t fMxcsrU = fMxcsrIn;
7562	uint32_t fEFlagsU = fEFlags;
7563	fMxcsrU = pfn(fMxcsrIn, &fEFlagsU, TestData.r32ValIn1, TestData.r32ValIn2);
7564	TestData.fMxcsrIn = fMxcsrIn;
7565	TestData.fMxcsrOut = fMxcsrU;
7566	TestData.fEflIn = fEFlags;
7567	TestData.fEflOut = fEFlagsU;
7568	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7569
7570	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7571	if (fXcpt)
7572	{
7573	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7574	uint32_t fMxcsr1 = fMxcsrIn;
7575	uint32_t fEFlags1 = fEFlags;
7576	fMxcsr1 = pfn(fMxcsrIn, &fEFlags1, TestData.r32ValIn1, TestData.r32ValIn2);
7577	TestData.fMxcsrIn = fMxcsrIn;
7578	TestData.fMxcsrOut = fMxcsr1;
7579	TestData.fEflIn = fEFlags;
7580	TestData.fEflOut = fEFlags1;
7581	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7582
7583	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7584	{
7585	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7586	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7587	uint32_t fMxcsr2 = fMxcsrIn;
7588	uint32_t fEFlags2 = fEFlags;
7589	fMxcsr2 = pfn(fMxcsrIn, &fEFlags2, TestData.r32ValIn1, TestData.r32ValIn2);
7590	TestData.fMxcsrIn = fMxcsrIn;
7591	TestData.fMxcsrOut = fMxcsr2;
7592	TestData.fEflIn = fEFlags;
7593	TestData.fEflOut = fEFlags2;
7594	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7595	}
7596	if (!RT_IS_POWER_OF_TWO(fXcpt))
7597	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7598	if (fUnmasked & fXcpt)
7599	{
7600	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7601	uint32_t fMxcsr3 = fMxcsrIn;
7602	uint32_t fEFlags3 = fEFlags;
7603	fMxcsr3 = pfn(fMxcsrIn, &fEFlags3, TestData.r32ValIn1, TestData.r32ValIn2);
7604	TestData.fMxcsrIn = fMxcsrIn;
7605	TestData.fMxcsrOut = fMxcsr3;
7606	TestData.fEflIn = fEFlags;
7607	TestData.fEflOut = fEFlags3;
7608	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7609	}
7610	}
7611	}
7612	}
7613	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7614	}
7615
7616	return RTEXITCODE_SUCCESS;
7617	}
7618	#endif
7619
7620	static void SseCompareEflR32R32Test(void)
7621	{
7622	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7623	{
7624	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR32R32[iFn]))
7625	continue;
7626
7627	SSE_COMPARE_EFL_R32_R32_TEST_T const * const paTests = g_aSseCompareEflR32R32[iFn].paTests;
7628	uint32_t const cTests = g_aSseCompareEflR32R32[iFn].cTests;
7629	PFNIEMAIMPLF2EFLMXCSRR32R32 pfn = g_aSseCompareEflR32R32[iFn].pfn;
7630	uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR32R32[iFn]);
7631	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7632	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7633	{
7634	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7635	{
7636	uint32_t fEFlags = paTests[iTest].fEflIn;
7637	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &fEFlags, paTests[iTest].r32ValIn1, paTests[iTest].r32ValIn2);
7638	if ( fMxcsr != paTests[iTest].fMxcsrOut
7639	\|\| fEFlags != paTests[iTest].fEflOut)
7640	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7641	"%s -> mxcsr=%#08x %#08x\n"
7642	"%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7643	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7644	FormatR32(&paTests[iTest].r32ValIn1), FormatR32(&paTests[iTest].r32ValIn2),
7645	iVar ? " " : "", fMxcsr, fEFlags,
7646	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7647	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7648	FormatMxcsr(paTests[iTest].fMxcsrIn),
7649	EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7650	}
7651	}
7652
7653	FREE_DECOMPRESSED_TESTS(g_aSseCompareEflR32R32[iFn]);
7654	}
7655	}
7656
7657
7658	/*
7659	* Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
7660	*/
7661	TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R64_R64_T, SSE_COMPARE_EFL_R64_R64_TEST_T, PFNIEMAIMPLF2EFLMXCSRR64R64);
7662
7663	static SSE_COMPARE_EFL_R64_R64_T g_aSseCompareEflR64R64[] =
7664	{
7665	ENTRY_BIN(ucomisd_u128),
7666	ENTRY_BIN(comisd_u128),
7667	ENTRY_BIN_AVX(vucomisd_u128),
7668	ENTRY_BIN_AVX(vcomisd_u128)
7669	};
7670
7671	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7672	DUMP_ALL_FN(SseCompareEflR64R64, g_aSseCompareEflR64R64)
7673	static RTEXITCODE SseCompareEflR64R64Generate(uint32_t cTests, const char * const *papszNameFmts)
7674	{
7675	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7676
7677	static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7678	{
7679	{ RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7680	{ RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7681	{ RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7682	{ RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7683	{ RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7684	{ RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7685	{ RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7686	{ RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7687	/** @todo More specials. */
7688	};
7689
7690	uint32_t cMinNormalPairs = (cTests - 144) / 4;
7691	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7692	{
7693	PFNIEMAIMPLF2EFLMXCSRR64R64 const pfn = g_aSseCompareEflR64R64[iFn].pfnNative ? g_aSseCompareEflR64R64[iFn].pfnNative : g_aSseCompareEflR64R64[iFn].pfn;
7694
7695	IEMBINARYOUTPUT BinOut;
7696	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareEflR64R64[iFn]), RTEXITCODE_FAILURE);
7697
7698	uint32_t cNormalInputPairs = 0;
7699	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7700	{
7701	SSE_COMPARE_EFL_R64_R64_TEST_T TestData; RT_ZERO(TestData);
7702
7703	TestData.r64ValIn1 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7704	TestData.r64ValIn2 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7705
7706	if ( RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn1)
7707	&& RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn2))
7708	cNormalInputPairs++;
7709	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7710	{
7711	iTest -= 1;
7712	continue;
7713	}
7714
7715	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7716	uint32_t const fEFlags = RandEFlags();
7717	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7718	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7719	for (uint8_t iFz = 0; iFz < 2; iFz++)
7720	{
7721	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7722	\| (iRounding << X86_MXCSR_RC_SHIFT)
7723	\| (iDaz ? X86_MXCSR_DAZ : 0)
7724	\| (iFz ? X86_MXCSR_FZ : 0)
7725	\| X86_MXCSR_XCPT_MASK;
7726	uint32_t fMxcsrM = fMxcsrIn;
7727	uint32_t fEFlagsM = fEFlags;
7728	fMxcsrM = pfn(fMxcsrIn, &fEFlagsM, TestData.r64ValIn1, TestData.r64ValIn2);
7729	TestData.fMxcsrIn = fMxcsrIn;
7730	TestData.fMxcsrOut = fMxcsrM;
7731	TestData.fEflIn = fEFlags;
7732	TestData.fEflOut = fEFlagsM;
7733	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7734
7735	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7736	uint32_t fMxcsrU = fMxcsrIn;
7737	uint32_t fEFlagsU = fEFlags;
7738	fMxcsrU = pfn(fMxcsrIn, &fEFlagsU, TestData.r64ValIn1, TestData.r64ValIn2);
7739	TestData.fMxcsrIn = fMxcsrIn;
7740	TestData.fMxcsrOut = fMxcsrU;
7741	TestData.fEflIn = fEFlags;
7742	TestData.fEflOut = fEFlagsU;
7743	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7744
7745	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7746	if (fXcpt)
7747	{
7748	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7749	uint32_t fMxcsr1 = fMxcsrIn;
7750	uint32_t fEFlags1 = fEFlags;
7751	fMxcsr1 = pfn(fMxcsrIn, &fEFlags1, TestData.r64ValIn1, TestData.r64ValIn2);
7752	TestData.fMxcsrIn = fMxcsrIn;
7753	TestData.fMxcsrOut = fMxcsr1;
7754	TestData.fEflIn = fEFlags;
7755	TestData.fEflOut = fEFlags1;
7756	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7757
7758	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7759	{
7760	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7761	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7762	uint32_t fMxcsr2 = fMxcsrIn;
7763	uint32_t fEFlags2 = fEFlags;
7764	fMxcsr2 = pfn(fMxcsrIn, &fEFlags2, TestData.r64ValIn1, TestData.r64ValIn2);
7765	TestData.fMxcsrIn = fMxcsrIn;
7766	TestData.fMxcsrOut = fMxcsr2;
7767	TestData.fEflIn = fEFlags;
7768	TestData.fEflOut = fEFlags2;
7769	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7770	}
7771	if (!RT_IS_POWER_OF_TWO(fXcpt))
7772	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7773	if (fUnmasked & fXcpt)
7774	{
7775	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7776	uint32_t fMxcsr3 = fMxcsrIn;
7777	uint32_t fEFlags3 = fEFlags;
7778	fMxcsr3 = pfn(fMxcsrIn, &fEFlags3, TestData.r64ValIn1, TestData.r64ValIn2);
7779	TestData.fMxcsrIn = fMxcsrIn;
7780	TestData.fMxcsrOut = fMxcsr3;
7781	TestData.fEflIn = fEFlags;
7782	TestData.fEflOut = fEFlags3;
7783	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7784	}
7785	}
7786	}
7787	}
7788	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7789	}
7790
7791	return RTEXITCODE_SUCCESS;
7792	}
7793	#endif
7794
7795	static void SseCompareEflR64R64Test(void)
7796	{
7797	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7798	{
7799	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR64R64[iFn]))
7800	continue;
7801
7802	SSE_COMPARE_EFL_R64_R64_TEST_T const * const paTests = g_aSseCompareEflR64R64[iFn].paTests;
7803	uint32_t const cTests = g_aSseCompareEflR64R64[iFn].cTests;
7804	PFNIEMAIMPLF2EFLMXCSRR64R64 pfn = g_aSseCompareEflR64R64[iFn].pfn;
7805	uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR64R64[iFn]);
7806	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7807	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7808	{
7809	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7810	{
7811	uint32_t fEFlags = paTests[iTest].fEflIn;
7812	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &fEFlags, paTests[iTest].r64ValIn1, paTests[iTest].r64ValIn2);
7813	if ( fMxcsr != paTests[iTest].fMxcsrOut
7814	\|\| fEFlags != paTests[iTest].fEflOut)
7815	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7816	"%s -> mxcsr=%#08x %#08x\n"
7817	"%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7818	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7819	FormatR64(&paTests[iTest].r64ValIn1), FormatR64(&paTests[iTest].r64ValIn2),
7820	iVar ? " " : "", fMxcsr, fEFlags,
7821	iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7822	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7823	FormatMxcsr(paTests[iTest].fMxcsrIn),
7824	EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7825	}
7826	}
7827
7828	FREE_DECOMPRESSED_TESTS(g_aSseCompareEflR64R64[iFn]);
7829	}
7830	}
7831
7832
7833	/*
7834	* Compare SSE operations on packed and single single-precision floating point values - outputting a mask.
7835	*/
7836	/** Maximum immediate to try to keep the testdata size under control (at least a little bit)- */
7837	#define SSE_COMPARE_F2_XMM_IMM8_MAX 0x1f
7838
7839	TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_F3_XMM_IMM8_T, SSE_COMPARE_F3_XMM_IMM8_TEST_T, PFNIEMAIMPLMEDIAF3XMMIMM8);
7840
7841	static SSE_COMPARE_F3_XMM_IMM8_T g_aSseCompareF3XmmR32Imm8[] =
7842	{
7843	ENTRY_BIN(cmpps_u128),
7844	ENTRY_BIN(cmpss_u128)
7845	};
7846
7847	#ifdef TSTIEMAIMPL_WITH_GENERATOR
7848	DUMP_ALL_FN(SseCompareF3XmmR32Imm8, g_aSseCompareF3XmmR32Imm8)
7849	static RTEXITCODE SseCompareF3XmmR32Imm8Generate(uint32_t cTests, const char * const *papszNameFmts)
7850	{
7851	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7852
7853	static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7854	{
7855	{ RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7856	{ RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7857	{ RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7858	{ RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7859	{ RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7860	{ RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7861	{ RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7862	{ RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7863	/** @todo More specials. */
7864	};
7865
7866	uint32_t cMinNormalPairs = (cTests - 144) / 4;
7867	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF3XmmR32Imm8); iFn++)
7868	{
7869	PFNIEMAIMPLMEDIAF3XMMIMM8 const pfn = g_aSseCompareF3XmmR32Imm8[iFn].pfnNative ? g_aSseCompareF3XmmR32Imm8[iFn].pfnNative : g_aSseCompareF3XmmR32Imm8[iFn].pfn;
7870
7871	IEMBINARYOUTPUT BinOut;
7872	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareF3XmmR32Imm8[iFn]), RTEXITCODE_FAILURE);
7873
7874	uint32_t cNormalInputPairs = 0;
7875	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7876	{
7877	SSE_COMPARE_F3_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7878
7879	TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7880	TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7881	TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7882	TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7883
7884	TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7885	TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7886	TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7887	TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7888
7889	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
7890	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
7891	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
7892	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
7893	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
7894	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
7895	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
7896	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
7897	cNormalInputPairs++;
7898	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7899	{
7900	iTest -= 1;
7901	continue;
7902	}
7903
7904	IEMMEDIAF2XMMSRC Src;
7905	Src.uSrc1 = TestData.InVal1;
7906	Src.uSrc2 = TestData.InVal2;
7907	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7908	for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7909	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7910	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7911	for (uint8_t iFz = 0; iFz < 2; iFz++)
7912	{
7913	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7914	\| (iRounding << X86_MXCSR_RC_SHIFT)
7915	\| (iDaz ? X86_MXCSR_DAZ : 0)
7916	\| (iFz ? X86_MXCSR_FZ : 0)
7917	\| X86_MXCSR_XCPT_MASK;
7918	X86XMMREG ResM;
7919	uint32_t fMxcsrM = pfn(fMxcsrIn, &ResM, &Src, bImm);
7920	TestData.fMxcsrIn = fMxcsrIn;
7921	TestData.fMxcsrOut = fMxcsrM;
7922	TestData.bImm = bImm;
7923	TestData.OutVal = ResM;
7924	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7925
7926	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7927	X86XMMREG ResU;
7928	uint32_t fMxcsrU = pfn(fMxcsrIn, &ResU, &Src, bImm);
7929	TestData.fMxcsrIn = fMxcsrIn;
7930	TestData.fMxcsrOut = fMxcsrU;
7931	TestData.bImm = bImm;
7932	TestData.OutVal = ResU;
7933	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7934
7935	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7936	if (fXcpt)
7937	{
7938	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
7939	X86XMMREG Res1;
7940	uint32_t fMxcsr1 = pfn(fMxcsrIn, &Res1, &Src, bImm);
7941	TestData.fMxcsrIn = fMxcsrIn;
7942	TestData.fMxcsrOut = fMxcsr1;
7943	TestData.bImm = bImm;
7944	TestData.OutVal = Res1;
7945	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7946
7947	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7948	{
7949	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7950	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7951	X86XMMREG Res2;
7952	uint32_t fMxcsr2 = pfn(fMxcsrIn, &Res2, &Src, bImm);
7953	TestData.fMxcsrIn = fMxcsrIn;
7954	TestData.fMxcsrOut = fMxcsr2;
7955	TestData.bImm = bImm;
7956	TestData.OutVal = Res2;
7957	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7958	}
7959	if (!RT_IS_POWER_OF_TWO(fXcpt))
7960	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7961	if (fUnmasked & fXcpt)
7962	{
7963	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7964	X86XMMREG Res3;
7965	uint32_t fMxcsr3 = pfn(fMxcsrIn, &Res3, &Src, bImm);
7966	TestData.fMxcsrIn = fMxcsrIn;
7967	TestData.fMxcsrOut = fMxcsr3;
7968	TestData.bImm = bImm;
7969	TestData.OutVal = Res3;
7970	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7971	}
7972	}
7973	}
7974	}
7975	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7976	}
7977
7978	return RTEXITCODE_SUCCESS;
7979	}
7980	#endif
7981
7982	static void SseCompareF3XmmR32Imm8Test(void)
7983	{
7984	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF3XmmR32Imm8); iFn++)
7985	{
7986	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF3XmmR32Imm8[iFn]))
7987	continue;
7988
7989	SSE_COMPARE_F3_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF3XmmR32Imm8[iFn].paTests;
7990	uint32_t const cTests = g_aSseCompareF3XmmR32Imm8[iFn].cTests;
7991	PFNIEMAIMPLMEDIAF3XMMIMM8 pfn = g_aSseCompareF3XmmR32Imm8[iFn].pfn;
7992	uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF3XmmR32Imm8[iFn]);
7993	if (!cTests) RTTestSkipped(g_hTest, "no tests");
7994	for (uint32_t iVar = 0; iVar < cVars; iVar++)
7995	{
7996	for (uint32_t iTest = 0; iTest < cTests; iTest++)
7997	{
7998	IEMMEDIAF2XMMSRC Src;
7999	X86XMMREG ValOut;
8000
8001	Src.uSrc1 = paTests[iTest].InVal1;
8002	Src.uSrc2 = paTests[iTest].InVal2;
8003	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, &Src, paTests[iTest].bImm);
8004	if ( fMxcsr != paTests[iTest].fMxcsrOut
8005	\|\| ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
8006	\|\| ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
8007	\|\| ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
8008	\|\| ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
8009	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s imm8=%x\n"
8010	"%s -> mxcsr=%#08x %RX32'%RX32'%RX32'%RX32\n"
8011	"%s expected %#08x %RX32'%RX32'%RX32'%RX32%s%s (%s)\n",
8012	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8013	FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
8014	FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
8015	FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
8016	FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
8017	paTests[iTest].bImm,
8018	iVar ? " " : "", fMxcsr, ValOut.au32[0], ValOut.au32[1], ValOut.au32[2], ValOut.au32[3],
8019	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8020	paTests[iTest].OutVal.au32[0], paTests[iTest].OutVal.au32[1],
8021	paTests[iTest].OutVal.au32[2], paTests[iTest].OutVal.au32[3],
8022	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8023	( ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
8024	\|\| ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
8025	\|\| ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
8026	\|\| ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
8027	? " - val" : "",
8028	FormatMxcsr(paTests[iTest].fMxcsrIn));
8029	}
8030	}
8031
8032	FREE_DECOMPRESSED_TESTS(g_aSseCompareF3XmmR32Imm8[iFn]);
8033	}
8034	}
8035
8036
8037	/*
8038	* Compare SSE operations on packed and single double-precision floating point values - outputting a mask.
8039	*/
8040	static SSE_COMPARE_F3_XMM_IMM8_T g_aSseCompareF3XmmR64Imm8[] =
8041	{
8042	ENTRY_BIN(cmppd_u128),
8043	ENTRY_BIN(cmpsd_u128)
8044	};
8045
8046	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8047	DUMP_ALL_FN(SseCompareF3XmmR64Imm8, g_aSseCompareF3XmmR64Imm8)
8048	static RTEXITCODE SseCompareF3XmmR64Imm8Generate(uint32_t cTests, const char * const *papszNameFmts)
8049	{
8050	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8051
8052	static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
8053	{
8054	{ RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
8055	{ RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
8056	{ RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
8057	{ RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
8058	{ RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
8059	{ RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
8060	{ RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
8061	{ RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
8062	/** @todo More specials. */
8063	};
8064
8065	uint32_t cMinNormalPairs = (cTests - 144) / 4;
8066	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF3XmmR64Imm8); iFn++)
8067	{
8068	PFNIEMAIMPLMEDIAF3XMMIMM8 const pfn = g_aSseCompareF3XmmR64Imm8[iFn].pfnNative ? g_aSseCompareF3XmmR64Imm8[iFn].pfnNative : g_aSseCompareF3XmmR64Imm8[iFn].pfn;
8069
8070	IEMBINARYOUTPUT BinOut;
8071	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareF3XmmR64Imm8[iFn]), RTEXITCODE_FAILURE);
8072
8073	uint32_t cNormalInputPairs = 0;
8074	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8075	{
8076	SSE_COMPARE_F3_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
8077
8078	TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
8079	TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
8080
8081	TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
8082	TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
8083
8084	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0])
8085	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
8086	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0])
8087	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
8088	cNormalInputPairs++;
8089	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8090	{
8091	iTest -= 1;
8092	continue;
8093	}
8094
8095	IEMMEDIAF2XMMSRC Src;
8096	Src.uSrc1 = TestData.InVal1;
8097	Src.uSrc2 = TestData.InVal2;
8098	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8099	for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
8100	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8101	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8102	for (uint8_t iFz = 0; iFz < 2; iFz++)
8103	{
8104	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8105	\| (iRounding << X86_MXCSR_RC_SHIFT)
8106	\| (iDaz ? X86_MXCSR_DAZ : 0)
8107	\| (iFz ? X86_MXCSR_FZ : 0)
8108	\| X86_MXCSR_XCPT_MASK;
8109	X86XMMREG ResM;
8110	uint32_t fMxcsrM = pfn(fMxcsrIn, &ResM, &Src, bImm);
8111	TestData.fMxcsrIn = fMxcsrIn;
8112	TestData.fMxcsrOut = fMxcsrM;
8113	TestData.bImm = bImm;
8114	TestData.OutVal = ResM;
8115	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8116
8117	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8118	X86XMMREG ResU;
8119	uint32_t fMxcsrU = pfn(fMxcsrIn, &ResU, &Src, bImm);
8120	TestData.fMxcsrIn = fMxcsrIn;
8121	TestData.fMxcsrOut = fMxcsrU;
8122	TestData.bImm = bImm;
8123	TestData.OutVal = ResU;
8124	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8125
8126	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8127	if (fXcpt)
8128	{
8129	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8130	X86XMMREG Res1;
8131	uint32_t fMxcsr1 = pfn(fMxcsrIn, &Res1, &Src, bImm);
8132	TestData.fMxcsrIn = fMxcsrIn;
8133	TestData.fMxcsrOut = fMxcsr1;
8134	TestData.bImm = bImm;
8135	TestData.OutVal = Res1;
8136	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8137
8138	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8139	{
8140	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8141	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8142	X86XMMREG Res2;
8143	uint32_t fMxcsr2 = pfn(fMxcsrIn, &Res2, &Src, bImm);
8144	TestData.fMxcsrIn = fMxcsrIn;
8145	TestData.fMxcsrOut = fMxcsr2;
8146	TestData.bImm = bImm;
8147	TestData.OutVal = Res2;
8148	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8149	}
8150	if (!RT_IS_POWER_OF_TWO(fXcpt))
8151	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8152	if (fUnmasked & fXcpt)
8153	{
8154	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8155	X86XMMREG Res3;
8156	uint32_t fMxcsr3 = pfn(fMxcsrIn, &Res3, &Src, bImm);
8157	TestData.fMxcsrIn = fMxcsrIn;
8158	TestData.fMxcsrOut = fMxcsr3;
8159	TestData.bImm = bImm;
8160	TestData.OutVal = Res3;
8161	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8162	}
8163	}
8164	}
8165	}
8166	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8167	}
8168
8169	return RTEXITCODE_SUCCESS;
8170	}
8171	#endif
8172
8173	static void SseCompareF3XmmR64Imm8Test(void)
8174	{
8175	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF3XmmR64Imm8); iFn++)
8176	{
8177	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF3XmmR64Imm8[iFn]))
8178	continue;
8179
8180	SSE_COMPARE_F3_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF3XmmR64Imm8[iFn].paTests;
8181	uint32_t const cTests = g_aSseCompareF3XmmR64Imm8[iFn].cTests;
8182	PFNIEMAIMPLMEDIAF3XMMIMM8 pfn = g_aSseCompareF3XmmR64Imm8[iFn].pfn;
8183	uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF3XmmR64Imm8[iFn]);
8184	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8185	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8186	{
8187	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8188	{
8189	IEMMEDIAF2XMMSRC Src;
8190	X86XMMREG ValOut;
8191
8192	Src.uSrc1 = paTests[iTest].InVal1;
8193	Src.uSrc2 = paTests[iTest].InVal2;
8194	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, &Src, paTests[iTest].bImm);
8195	if ( fMxcsr != paTests[iTest].fMxcsrOut
8196	\|\| ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
8197	\|\| ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
8198	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s imm8=%x\n"
8199	"%s -> mxcsr=%#08x %RX64'%RX64\n"
8200	"%s expected %#08x %RX64'%RX64%s%s (%s)\n",
8201	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8202	FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
8203	FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
8204	paTests[iTest].bImm,
8205	iVar ? " " : "", fMxcsr, ValOut.au64[0], ValOut.au64[1],
8206	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8207	paTests[iTest].OutVal.au64[0], paTests[iTest].OutVal.au64[1],
8208	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8209	( ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
8210	\|\| ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
8211	? " - val" : "",
8212	FormatMxcsr(paTests[iTest].fMxcsrIn));
8213	}
8214	}
8215
8216	FREE_DECOMPRESSED_TESTS(g_aSseCompareF3XmmR64Imm8[iFn]);
8217	}
8218	}
8219
8220
8221	/*
8222	* Convert SSE operations converting signed double-words to single-precision floating point values.
8223	*/
8224	TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_T, SSE_CONVERT_XMM_TEST_T, PFNIEMAIMPLFPSSEF2U128);
8225
8226	static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R32[] =
8227	{
8228	ENTRY_BIN(cvtdq2ps_u128)
8229	};
8230
8231	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8232	DUMP_ALL_FN(SseConvertXmmI32R32, g_aSseConvertXmmI32R32)
8233	static RTEXITCODE SseConvertXmmI32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
8234	{
8235	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8236
8237	static int32_t const s_aSpecials[] =
8238	{
8239	INT32_MIN,
8240	INT32_MIN / 2,
8241	0,
8242	INT32_MAX / 2,
8243	INT32_MAX,
8244	(int32_t)0x80000000
8245	/** @todo More specials. */
8246	};
8247
8248	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
8249	{
8250	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R32[iFn].pfnNative ? g_aSseConvertXmmI32R32[iFn].pfnNative : g_aSseConvertXmmI32R32[iFn].pfn;
8251
8252	IEMBINARYOUTPUT BinOut;
8253	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmI32R32[iFn]), RTEXITCODE_FAILURE);
8254
8255	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8256	{
8257	SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8258
8259	TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8260	TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8261	TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8262	TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8263
8264	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8265	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8266	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8267	for (uint8_t iFz = 0; iFz < 2; iFz++)
8268	{
8269	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8270	\| (iRounding << X86_MXCSR_RC_SHIFT)
8271	\| (iDaz ? X86_MXCSR_DAZ : 0)
8272	\| (iFz ? X86_MXCSR_FZ : 0)
8273	\| X86_MXCSR_XCPT_MASK;
8274	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8275	TestData.fMxcsrIn = uMxCsrIn;
8276	TestData.fMxcsrOut = uMxCsrOutM;
8277	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8278
8279	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8280	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8281	TestData.fMxcsrIn = uMxCsrIn;
8282	TestData.fMxcsrOut = uMxCsrOutU;
8283	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8284
8285	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8286	if (fXcpt)
8287	{
8288	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8289	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8290	TestData.fMxcsrIn = uMxCsrIn;
8291	TestData.fMxcsrOut = uMxCsrOut1;
8292	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8293
8294	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8295	{
8296	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8297	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8298	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8299	TestData.fMxcsrIn = uMxCsrIn;
8300	TestData.fMxcsrOut = uMxCsrOut2;
8301	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8302	}
8303	if (!RT_IS_POWER_OF_TWO(fXcpt))
8304	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8305	if (fUnmasked & fXcpt)
8306	{
8307	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8308	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8309	TestData.fMxcsrIn = uMxCsrIn;
8310	TestData.fMxcsrOut = uMxCsrOut3;
8311	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8312	}
8313	}
8314	}
8315	}
8316	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8317	}
8318
8319	return RTEXITCODE_SUCCESS;
8320	}
8321	#endif
8322
8323	static void SseConvertXmmI32R32Test(void)
8324	{
8325	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
8326	{
8327	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R32[iFn]))
8328	continue;
8329
8330	SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R32[iFn].paTests;
8331	uint32_t const cTests = g_aSseConvertXmmI32R32[iFn].cTests;
8332	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R32[iFn].pfn;
8333	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R32[iFn]);
8334	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8335	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8336	{
8337	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8338	{
8339	X86XMMREG Res; RT_ZERO(Res);
8340
8341	uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8342	if ( fMxCsr != paTests[iTest].fMxcsrOut
8343	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
8344	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
8345	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
8346	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]))
8347	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
8348	"%s -> mxcsr=%#08x %s'%s'%s'%s\n"
8349	"%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
8350	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8351	paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8352	paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
8353	iVar ? " " : "", fMxCsr,
8354	FormatR32(&Res.ar32[0]), FormatR32(&Res.ar32[1]),
8355	FormatR32(&Res.ar32[2]), FormatR32(&Res.ar32[3]),
8356	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8357	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
8358	FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
8359	MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8360	( !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
8361	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
8362	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
8363	\|\| !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]))
8364	? " - val" : "",
8365	FormatMxcsr(paTests[iTest].fMxcsrIn));
8366	}
8367	}
8368
8369	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmI32R32[iFn]);
8370	}
8371	}
8372
8373
8374	/*
8375	* Convert SSE operations converting signed double-words to single-precision floating point values.
8376	*/
8377	static SSE_CONVERT_XMM_T g_aSseConvertXmmR32I32[] =
8378	{
8379	ENTRY_BIN(cvtps2dq_u128),
8380	ENTRY_BIN(cvttps2dq_u128)
8381	};
8382
8383	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8384	DUMP_ALL_FN(SseConvertXmmR32I32, g_aSseConvertXmmR32I32)
8385	static RTEXITCODE SseConvertXmmR32I32Generate(uint32_t cTests, const char * const *papszNameFmts)
8386	{
8387	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8388
8389	static struct { RTFLOAT32U aVal1[4]; } const s_aSpecials[] =
8390	{
8391	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
8392	{ { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
8393	{ { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
8394	{ { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
8395	/** @todo More specials. */
8396	};
8397
8398	uint32_t cMinNormalPairs = (cTests - 144) / 4;
8399	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
8400	{
8401	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR32I32[iFn].pfnNative ? g_aSseConvertXmmR32I32[iFn].pfnNative : g_aSseConvertXmmR32I32[iFn].pfn;
8402
8403	IEMBINARYOUTPUT BinOut;
8404	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR32I32[iFn]), RTEXITCODE_FAILURE);
8405
8406	uint32_t cNormalInputPairs = 0;
8407	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8408	{
8409	SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8410
8411	TestData.InVal.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8412	TestData.InVal.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8413	TestData.InVal.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
8414	TestData.InVal.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
8415
8416	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[0])
8417	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[1])
8418	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[2])
8419	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[3]))
8420	cNormalInputPairs++;
8421	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8422	{
8423	iTest -= 1;
8424	continue;
8425	}
8426
8427	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8428	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8429	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8430	for (uint8_t iFz = 0; iFz < 2; iFz++)
8431	{
8432	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8433	\| (iRounding << X86_MXCSR_RC_SHIFT)
8434	\| (iDaz ? X86_MXCSR_DAZ : 0)
8435	\| (iFz ? X86_MXCSR_FZ : 0)
8436	\| X86_MXCSR_XCPT_MASK;
8437	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8438	TestData.fMxcsrIn = uMxCsrIn;
8439	TestData.fMxcsrOut = uMxCsrOutM;
8440	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8441
8442	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8443	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8444	TestData.fMxcsrIn = uMxCsrIn;
8445	TestData.fMxcsrOut = uMxCsrOutU;
8446	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8447
8448	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8449	if (fXcpt)
8450	{
8451	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8452	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8453	TestData.fMxcsrIn = uMxCsrIn;
8454	TestData.fMxcsrOut = uMxCsrOut1;
8455	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8456
8457	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8458	{
8459	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8460	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8461	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8462	TestData.fMxcsrIn = uMxCsrIn;
8463	TestData.fMxcsrOut = uMxCsrOut2;
8464	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8465	}
8466	if (!RT_IS_POWER_OF_TWO(fXcpt))
8467	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8468	if (fUnmasked & fXcpt)
8469	{
8470	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8471	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8472	TestData.fMxcsrIn = uMxCsrIn;
8473	TestData.fMxcsrOut = uMxCsrOut3;
8474	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8475	}
8476	}
8477	}
8478	}
8479	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8480	}
8481
8482	return RTEXITCODE_SUCCESS;
8483	}
8484	#endif
8485
8486	static void SseConvertXmmR32I32Test(void)
8487	{
8488	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
8489	{
8490	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32I32[iFn]))
8491	continue;
8492
8493	SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR32I32[iFn].paTests;
8494	uint32_t const cTests = g_aSseConvertXmmR32I32[iFn].cTests;
8495	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR32I32[iFn].pfn;
8496	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32I32[iFn]);
8497	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8498	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8499	{
8500	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8501	{
8502	X86XMMREG Res; RT_ZERO(Res);
8503
8504	uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8505	if ( fMxCsr != paTests[iTest].fMxcsrOut
8506	\|\| Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8507	\|\| Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8508	\|\| Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8509	\|\| Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8510	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s \n"
8511	"%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8512	"%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8513	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8514	FormatR32(&paTests[iTest].InVal.ar32[0]), FormatR32(&paTests[iTest].InVal.ar32[1]),
8515	FormatR32(&paTests[iTest].InVal.ar32[2]), FormatR32(&paTests[iTest].InVal.ar32[3]),
8516	iVar ? " " : "", fMxCsr,
8517	Res.ai32[0], Res.ai32[1],
8518	Res.ai32[2], Res.ai32[3],
8519	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8520	paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8521	paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8522	MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8523	( Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8524	\|\| Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8525	\|\| Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8526	\|\| Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8527	? " - val" : "",
8528	FormatMxcsr(paTests[iTest].fMxcsrIn));
8529	}
8530	}
8531
8532	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR32I32[iFn]);
8533	}
8534	}
8535
8536
8537	/*
8538	* Convert SSE operations converting signed double-words to double-precision floating point values.
8539	*/
8540	static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R64[] =
8541	{
8542	ENTRY_BIN(cvtdq2pd_u128)
8543	};
8544
8545	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8546	DUMP_ALL_FN(SseConvertXmmI32R64, g_aSseConvertXmmI32R64)
8547	static RTEXITCODE SseConvertXmmI32R64Generate(uint32_t cTests, const char * const *papszNameFmts)
8548	{
8549	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8550
8551	static int32_t const s_aSpecials[] =
8552	{
8553	INT32_MIN,
8554	INT32_MIN / 2,
8555	0,
8556	INT32_MAX / 2,
8557	INT32_MAX,
8558	(int32_t)0x80000000
8559	/** @todo More specials. */
8560	};
8561
8562	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8563	{
8564	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R64[iFn].pfnNative ? g_aSseConvertXmmI32R64[iFn].pfnNative : g_aSseConvertXmmI32R64[iFn].pfn;
8565
8566	IEMBINARYOUTPUT BinOut;
8567	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmI32R64[iFn]), RTEXITCODE_FAILURE);
8568
8569	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8570	{
8571	SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8572
8573	TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8574	TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8575	TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8576	TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8577
8578	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8579	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8580	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8581	for (uint8_t iFz = 0; iFz < 2; iFz++)
8582	{
8583	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8584	\| (iRounding << X86_MXCSR_RC_SHIFT)
8585	\| (iDaz ? X86_MXCSR_DAZ : 0)
8586	\| (iFz ? X86_MXCSR_FZ : 0)
8587	\| X86_MXCSR_XCPT_MASK;
8588	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8589	TestData.fMxcsrIn = uMxCsrIn;
8590	TestData.fMxcsrOut = uMxCsrOutM;
8591	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8592
8593	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8594	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8595	TestData.fMxcsrIn = uMxCsrIn;
8596	TestData.fMxcsrOut = uMxCsrOutU;
8597	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8598
8599	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8600	if (fXcpt)
8601	{
8602	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8603	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8604	TestData.fMxcsrIn = uMxCsrIn;
8605	TestData.fMxcsrOut = uMxCsrOut1;
8606	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8607
8608	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8609	{
8610	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8611	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8612	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8613	TestData.fMxcsrIn = uMxCsrIn;
8614	TestData.fMxcsrOut = uMxCsrOut2;
8615	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8616	}
8617	if (!RT_IS_POWER_OF_TWO(fXcpt))
8618	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8619	if (fUnmasked & fXcpt)
8620	{
8621	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8622	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8623	TestData.fMxcsrIn = uMxCsrIn;
8624	TestData.fMxcsrOut = uMxCsrOut3;
8625	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8626	}
8627	}
8628	}
8629	}
8630	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8631	}
8632
8633	return RTEXITCODE_SUCCESS;
8634	}
8635	#endif
8636
8637	static void SseConvertXmmI32R64Test(void)
8638	{
8639	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8640	{
8641	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R64[iFn]))
8642	continue;
8643
8644	SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R64[iFn].paTests;
8645	uint32_t const cTests = g_aSseConvertXmmI32R64[iFn].cTests;
8646	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R64[iFn].pfn;
8647	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R64[iFn]);
8648	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8649	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8650	{
8651	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8652	{
8653	X86XMMREG Res; RT_ZERO(Res);
8654
8655	uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8656	if ( fMxCsr != paTests[iTest].fMxcsrOut
8657	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
8658	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8659	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
8660	"%s -> mxcsr=%#08x %s'%s\n"
8661	"%s expected %#08x %s'%s%s%s (%s)\n",
8662	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8663	paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8664	paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
8665	iVar ? " " : "", fMxCsr,
8666	FormatR64(&Res.ar64[0]), FormatR64(&Res.ar64[1]),
8667	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8668	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8669	MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8670	( !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
8671	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8672	? " - val" : "",
8673	FormatMxcsr(paTests[iTest].fMxcsrIn));
8674	}
8675	}
8676
8677	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmI32R64[iFn]);
8678	}
8679	}
8680
8681
8682	/*
8683	* Convert SSE operations converting signed double-words to double-precision floating point values.
8684	*/
8685	static SSE_CONVERT_XMM_T g_aSseConvertXmmR64I32[] =
8686	{
8687	ENTRY_BIN(cvtpd2dq_u128),
8688	ENTRY_BIN(cvttpd2dq_u128)
8689	};
8690
8691	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8692	DUMP_ALL_FN(SseConvertXmmR64I32, g_aSseConvertXmmR64I32)
8693	static RTEXITCODE SseConvertXmmR64I32Generate(uint32_t cTests, const char * const *papszNameFmts)
8694	{
8695	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8696
8697	static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8698	{
8699	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8700	{ { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8701	{ { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8702	{ { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8703	/** @todo More specials. */
8704	};
8705
8706	uint32_t cMinNormalPairs = (cTests - 144) / 4;
8707	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8708	{
8709	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR64I32[iFn].pfnNative ? g_aSseConvertXmmR64I32[iFn].pfnNative : g_aSseConvertXmmR64I32[iFn].pfn;
8710
8711	IEMBINARYOUTPUT BinOut;
8712	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR64I32[iFn]), RTEXITCODE_FAILURE);
8713
8714	uint32_t cNormalInputPairs = 0;
8715	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8716	{
8717	SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8718
8719	TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8720	TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8721
8722	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8723	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8724	cNormalInputPairs++;
8725	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8726	{
8727	iTest -= 1;
8728	continue;
8729	}
8730
8731	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8732	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8733	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8734	for (uint8_t iFz = 0; iFz < 2; iFz++)
8735	{
8736	uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8737	\| (iRounding << X86_MXCSR_RC_SHIFT)
8738	\| (iDaz ? X86_MXCSR_DAZ : 0)
8739	\| (iFz ? X86_MXCSR_FZ : 0)
8740	\| X86_MXCSR_XCPT_MASK;
8741	uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8742	TestData.fMxcsrIn = uMxCsrIn;
8743	TestData.fMxcsrOut = uMxCsrOutM;
8744	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8745
8746	uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8747	uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8748	TestData.fMxcsrIn = uMxCsrIn;
8749	TestData.fMxcsrOut = uMxCsrOutU;
8750	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8751
8752	uint16_t fXcpt = (uMxCsrOutM \| uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8753	if (fXcpt)
8754	{
8755	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8756	uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8757	TestData.fMxcsrIn = uMxCsrIn;
8758	TestData.fMxcsrOut = uMxCsrOut1;
8759	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8760
8761	if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8762	{
8763	fXcpt \|= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8764	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8765	uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8766	TestData.fMxcsrIn = uMxCsrIn;
8767	TestData.fMxcsrOut = uMxCsrOut2;
8768	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8769	}
8770	if (!RT_IS_POWER_OF_TWO(fXcpt))
8771	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8772	if (fUnmasked & fXcpt)
8773	{
8774	uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8775	uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8776	TestData.fMxcsrIn = uMxCsrIn;
8777	TestData.fMxcsrOut = uMxCsrOut3;
8778	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8779	}
8780	}
8781	}
8782	}
8783	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8784	}
8785
8786	return RTEXITCODE_SUCCESS;
8787	}
8788	#endif
8789
8790	static void SseConvertXmmR64I32Test(void)
8791	{
8792	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8793	{
8794	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64I32[iFn]))
8795	continue;
8796
8797	SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR64I32[iFn].paTests;
8798	uint32_t const cTests = g_aSseConvertXmmR64I32[iFn].cTests;
8799	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR64I32[iFn].pfn;
8800	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64I32[iFn]);
8801	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8802	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8803	{
8804	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8805	{
8806	X86XMMREG Res; RT_ZERO(Res);
8807
8808	uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8809	if ( fMxCsr != paTests[iTest].fMxcsrOut
8810	\|\| Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8811	\|\| Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8812	\|\| Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8813	\|\| Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8814	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8815	"%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8816	"%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8817	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8818	FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8819	iVar ? " " : "", fMxCsr,
8820	Res.ai32[0], Res.ai32[1],
8821	Res.ai32[2], Res.ai32[3],
8822	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8823	paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8824	paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8825	MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8826	( Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8827	\|\| Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8828	\|\| Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8829	\|\| Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8830	? " - val" : "",
8831	FormatMxcsr(paTests[iTest].fMxcsrIn));
8832	}
8833	}
8834
8835	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR64I32[iFn]);
8836	}
8837	}
8838
8839
8840	/*
8841	* Convert SSE operations converting double-precision floating point values to signed double-word values.
8842	*/
8843	TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_XMM_T, SSE_CONVERT_MM_XMM_TEST_T, PFNIEMAIMPLMXCSRU64U128);
8844
8845	static SSE_CONVERT_MM_XMM_T g_aSseConvertMmXmm[] =
8846	{
8847	ENTRY_BIN(cvtpd2pi_u128),
8848	ENTRY_BIN(cvttpd2pi_u128)
8849	};
8850
8851	#ifdef TSTIEMAIMPL_WITH_GENERATOR
8852	DUMP_ALL_FN(SseConvertMmXmm, g_aSseConvertMmXmm)
8853	static RTEXITCODE SseConvertMmXmmGenerate(uint32_t cTests, const char * const *papszNameFmts)
8854	{
8855	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8856
8857	static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8858	{
8859	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8860	{ { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8861	{ { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8862	{ { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8863	/** @todo More specials. */
8864	};
8865
8866	uint32_t cMinNormalPairs = (cTests - 144) / 4;
8867	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8868	{
8869	PFNIEMAIMPLMXCSRU64U128 const pfn = g_aSseConvertMmXmm[iFn].pfnNative ? g_aSseConvertMmXmm[iFn].pfnNative : g_aSseConvertMmXmm[iFn].pfn;
8870
8871	IEMBINARYOUTPUT BinOut;
8872	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertMmXmm[iFn]), RTEXITCODE_FAILURE);
8873
8874	uint32_t cNormalInputPairs = 0;
8875	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8876	{
8877	SSE_CONVERT_MM_XMM_TEST_T TestData; RT_ZERO(TestData);
8878
8879	TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8880	TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8881
8882	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8883	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8884	cNormalInputPairs++;
8885	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8886	{
8887	iTest -= 1;
8888	continue;
8889	}
8890
8891	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8892	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8893	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8894	for (uint8_t iFz = 0; iFz < 2; iFz++)
8895	{
8896	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8897	\| (iRounding << X86_MXCSR_RC_SHIFT)
8898	\| (iDaz ? X86_MXCSR_DAZ : 0)
8899	\| (iFz ? X86_MXCSR_FZ : 0)
8900	\| X86_MXCSR_XCPT_MASK;
8901	uint64_t u64ResM;
8902	uint32_t fMxcsrM = pfn(fMxcsrIn, &u64ResM, &TestData.InVal);
8903	TestData.fMxcsrIn = fMxcsrIn;
8904	TestData.fMxcsrOut = fMxcsrM;
8905	TestData.OutVal.u = u64ResM;
8906	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8907
8908	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8909	uint64_t u64ResU;
8910	uint32_t fMxcsrU = pfn(fMxcsrIn, &u64ResU, &TestData.InVal);
8911	TestData.fMxcsrIn = fMxcsrIn;
8912	TestData.fMxcsrOut = fMxcsrU;
8913	TestData.OutVal.u = u64ResU;
8914	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8915
8916	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8917	if (fXcpt)
8918	{
8919	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
8920	uint64_t u64Res1;
8921	uint32_t fMxcsr1 = pfn(fMxcsrIn, &u64Res1, &TestData.InVal);
8922	TestData.fMxcsrIn = fMxcsrIn;
8923	TestData.fMxcsrOut = fMxcsr1;
8924	TestData.OutVal.u = u64Res1;
8925	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8926
8927	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8928	{
8929	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8930	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8931	uint64_t u64Res2;
8932	uint32_t fMxcsr2 = pfn(fMxcsrIn, &u64Res2, &TestData.InVal);
8933	TestData.fMxcsrIn = fMxcsrIn;
8934	TestData.fMxcsrOut = fMxcsr2;
8935	TestData.OutVal.u = u64Res2;
8936	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8937	}
8938	if (!RT_IS_POWER_OF_TWO(fXcpt))
8939	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8940	if (fUnmasked & fXcpt)
8941	{
8942	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8943	uint64_t u64Res3;
8944	uint32_t fMxcsr3 = pfn(fMxcsrIn, &u64Res3, &TestData.InVal);
8945	TestData.fMxcsrIn = fMxcsrIn;
8946	TestData.fMxcsrOut = fMxcsr3;
8947	TestData.OutVal.u = u64Res3;
8948	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8949	}
8950	}
8951	}
8952	}
8953	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8954	}
8955
8956	return RTEXITCODE_SUCCESS;
8957	}
8958	#endif
8959
8960	static void SseConvertMmXmmTest(void)
8961	{
8962	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8963	{
8964	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmXmm[iFn]))
8965	continue;
8966
8967	SSE_CONVERT_MM_XMM_TEST_T const * const paTests = g_aSseConvertMmXmm[iFn].paTests;
8968	uint32_t const cTests = g_aSseConvertMmXmm[iFn].cTests;
8969	PFNIEMAIMPLMXCSRU64U128 pfn = g_aSseConvertMmXmm[iFn].pfn;
8970	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmXmm[iFn]);
8971	if (!cTests) RTTestSkipped(g_hTest, "no tests");
8972	for (uint32_t iVar = 0; iVar < cVars; iVar++)
8973	{
8974	for (uint32_t iTest = 0; iTest < cTests; iTest++)
8975	{
8976	RTUINT64U ValOut;
8977	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut.u, &paTests[iTest].InVal);
8978	if ( fMxcsr != paTests[iTest].fMxcsrOut
8979	\|\| ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8980	\|\| ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8981	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s\n"
8982	"%s -> mxcsr=%#08x %RI32'%RI32\n"
8983	"%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8984	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8985	FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8986	iVar ? " " : "", fMxcsr, ValOut.ai32[0], ValOut.ai32[1],
8987	iVar ? " " : "", paTests[iTest].fMxcsrOut,
8988	paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8989	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8990	( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8991	\|\| ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8992	? " - val" : "",
8993	FormatMxcsr(paTests[iTest].fMxcsrIn));
8994	}
8995	}
8996
8997	FREE_DECOMPRESSED_TESTS(g_aSseConvertMmXmm[iFn]);
8998	}
8999	}
9000
9001
9002	/*
9003	* Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
9004	*/
9005	TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R64_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
9006
9007	static SSE_CONVERT_XMM_R64_MM_T g_aSseConvertXmmR64Mm[] =
9008	{
9009	ENTRY_BIN(cvtpi2pd_u128)
9010	};
9011
9012	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9013	DUMP_ALL_FN(SseConvertXmmR64Mm, g_aSseConvertXmmR64Mm)
9014	static RTEXITCODE SseConvertXmmR64MmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9015	{
9016	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9017
9018	static struct { int32_t aVal[2]; } const s_aSpecials[] =
9019	{
9020	{ { INT32_MIN, INT32_MIN } },
9021	{ { INT32_MAX, INT32_MAX } }
9022	/** @todo More specials. */
9023	};
9024
9025	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
9026	{
9027	PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR64Mm[iFn].pfnNative ? g_aSseConvertXmmR64Mm[iFn].pfnNative : g_aSseConvertXmmR64Mm[iFn].pfn;
9028
9029	IEMBINARYOUTPUT BinOut;
9030	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR64Mm[iFn]), RTEXITCODE_FAILURE);
9031
9032	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9033	{
9034	SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
9035
9036	TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
9037	TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
9038
9039	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9040	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9041	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9042	for (uint8_t iFz = 0; iFz < 2; iFz++)
9043	{
9044	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9045	\| (iRounding << X86_MXCSR_RC_SHIFT)
9046	\| (iDaz ? X86_MXCSR_DAZ : 0)
9047	\| (iFz ? X86_MXCSR_FZ : 0)
9048	\| X86_MXCSR_XCPT_MASK;
9049	uint32_t fMxcsrM = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9050	TestData.fMxcsrIn = fMxcsrIn;
9051	TestData.fMxcsrOut = fMxcsrM;
9052	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9053
9054	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9055	uint32_t fMxcsrU = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9056	TestData.fMxcsrIn = fMxcsrIn;
9057	TestData.fMxcsrOut = fMxcsrU;
9058	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9059
9060	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9061	if (fXcpt)
9062	{
9063	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
9064	uint32_t fMxcsr1 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9065	TestData.fMxcsrIn = fMxcsrIn;
9066	TestData.fMxcsrOut = fMxcsr1;
9067	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9068
9069	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9070	{
9071	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9072	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9073	uint32_t fMxcsr2 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9074	TestData.fMxcsrIn = fMxcsrIn;
9075	TestData.fMxcsrOut = fMxcsr2;
9076	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9077	}
9078	if (!RT_IS_POWER_OF_TWO(fXcpt))
9079	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9080	if (fUnmasked & fXcpt)
9081	{
9082	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9083	uint32_t fMxcsr3 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9084	TestData.fMxcsrIn = fMxcsrIn;
9085	TestData.fMxcsrOut = fMxcsr3;
9086	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9087	}
9088	}
9089	}
9090	}
9091	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9092	}
9093
9094	return RTEXITCODE_SUCCESS;
9095	}
9096	#endif
9097
9098	static void SseConvertXmmR64MmTest(void)
9099	{
9100	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
9101	{
9102	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64Mm[iFn]))
9103	continue;
9104
9105	SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR64Mm[iFn].paTests;
9106	uint32_t const cTests = g_aSseConvertXmmR64Mm[iFn].cTests;
9107	PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR64Mm[iFn].pfn;
9108	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64Mm[iFn]);
9109	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9110	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9111	{
9112	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9113	{
9114	X86XMMREG ValOut;
9115	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, paTests[iTest].InVal.u);
9116	if ( fMxcsr != paTests[iTest].fMxcsrOut
9117	\|\| !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
9118	\|\| !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
9119	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
9120	"%s -> mxcsr=%#08x %s'%s\n"
9121	"%s expected %#08x %s'%s%s%s (%s)\n",
9122	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9123	paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
9124	iVar ? " " : "", fMxcsr,
9125	FormatR64(&ValOut.ar64[0]), FormatR64(&ValOut.ar64[1]),
9126	iVar ? " " : "", paTests[iTest].fMxcsrOut,
9127	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
9128	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9129	( !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
9130	\|\| !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
9131	? " - val" : "",
9132	FormatMxcsr(paTests[iTest].fMxcsrIn));
9133	}
9134	}
9135
9136	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR64Mm[iFn]);
9137	}
9138	}
9139
9140
9141	/*
9142	* Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
9143	*/
9144	TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R32_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
9145
9146	static SSE_CONVERT_XMM_R32_MM_T g_aSseConvertXmmR32Mm[] =
9147	{
9148	ENTRY_BIN(cvtpi2ps_u128)
9149	};
9150
9151	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9152	DUMP_ALL_FN(SseConvertXmmR32Mm, g_aSseConvertXmmR32Mm)
9153	static RTEXITCODE SseConvertXmmR32MmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9154	{
9155	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9156
9157	static struct { int32_t aVal[2]; } const s_aSpecials[] =
9158	{
9159	{ { INT32_MIN, INT32_MIN } },
9160	{ { INT32_MAX, INT32_MAX } }
9161	/** @todo More specials. */
9162	};
9163
9164	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
9165	{
9166	PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR32Mm[iFn].pfnNative ? g_aSseConvertXmmR32Mm[iFn].pfnNative : g_aSseConvertXmmR32Mm[iFn].pfn;
9167
9168	IEMBINARYOUTPUT BinOut;
9169	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR32Mm[iFn]), RTEXITCODE_FAILURE);
9170
9171	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9172	{
9173	SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
9174
9175	TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
9176	TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
9177
9178	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9179	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9180	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9181	for (uint8_t iFz = 0; iFz < 2; iFz++)
9182	{
9183	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9184	\| (iRounding << X86_MXCSR_RC_SHIFT)
9185	\| (iDaz ? X86_MXCSR_DAZ : 0)
9186	\| (iFz ? X86_MXCSR_FZ : 0)
9187	\| X86_MXCSR_XCPT_MASK;
9188	uint32_t fMxcsrM = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9189	TestData.fMxcsrIn = fMxcsrIn;
9190	TestData.fMxcsrOut = fMxcsrM;
9191	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9192
9193	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9194	uint32_t fMxcsrU = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9195	TestData.fMxcsrIn = fMxcsrIn;
9196	TestData.fMxcsrOut = fMxcsrU;
9197	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9198
9199	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9200	if (fXcpt)
9201	{
9202	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
9203	uint32_t fMxcsr1 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9204	TestData.fMxcsrIn = fMxcsrIn;
9205	TestData.fMxcsrOut = fMxcsr1;
9206	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9207
9208	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9209	{
9210	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9211	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9212	uint32_t fMxcsr2 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9213	TestData.fMxcsrIn = fMxcsrIn;
9214	TestData.fMxcsrOut = fMxcsr2;
9215	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9216	}
9217	if (!RT_IS_POWER_OF_TWO(fXcpt))
9218	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9219	if (fUnmasked & fXcpt)
9220	{
9221	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9222	uint32_t fMxcsr3 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9223	TestData.fMxcsrIn = fMxcsrIn;
9224	TestData.fMxcsrOut = fMxcsr3;
9225	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9226	}
9227	}
9228	}
9229	}
9230	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9231	}
9232
9233	return RTEXITCODE_SUCCESS;
9234	}
9235	#endif
9236
9237	static void SseConvertXmmR32MmTest(void)
9238	{
9239	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
9240	{
9241	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32Mm[iFn]))
9242	continue;
9243
9244	SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR32Mm[iFn].paTests;
9245	uint32_t const cTests = g_aSseConvertXmmR32Mm[iFn].cTests;
9246	PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR32Mm[iFn].pfn;
9247	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32Mm[iFn]);
9248	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9249	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9250	{
9251	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9252	{
9253	X86XMMREG ValOut;
9254	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, paTests[iTest].InVal.u);
9255	if ( fMxcsr != paTests[iTest].fMxcsrOut
9256	\|\| !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
9257	\|\| !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
9258	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
9259	"%s -> mxcsr=%#08x %s'%s\n"
9260	"%s expected %#08x %s'%s%s%s (%s)\n",
9261	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9262	paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
9263	iVar ? " " : "", fMxcsr,
9264	FormatR32(&ValOut.ar32[0]), FormatR32(&ValOut.ar32[1]),
9265	iVar ? " " : "", paTests[iTest].fMxcsrOut,
9266	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
9267	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9268	( !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
9269	\|\| !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
9270	? " - val" : "",
9271	FormatMxcsr(paTests[iTest].fMxcsrIn));
9272	}
9273	}
9274
9275	FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR32Mm[iFn]);
9276	}
9277	}
9278
9279
9280	/*
9281	* Convert SSE operations converting single-precision floating point values to signed double-word values.
9282	*/
9283	TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_I32_XMM_R32_T, SSE_CONVERT_MM_R32_TEST_T, PFNIEMAIMPLMXCSRU64U64);
9284
9285	static SSE_CONVERT_MM_I32_XMM_R32_T g_aSseConvertMmI32XmmR32[] =
9286	{
9287	ENTRY_BIN(cvtps2pi_u128),
9288	ENTRY_BIN(cvttps2pi_u128)
9289	};
9290
9291	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9292	DUMP_ALL_FN(SseConvertMmI32XmmR32, g_aSseConvertMmI32XmmR32)
9293	static RTEXITCODE SseConvertMmI32XmmR32Generate(uint32_t cTests, const char * const *papszNameFmts)
9294	{
9295	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9296
9297	static struct { RTFLOAT32U aVal1[2]; } const s_aSpecials[] =
9298	{
9299	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
9300	{ { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
9301	{ { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
9302	{ { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
9303	/** @todo More specials. */
9304	};
9305
9306	uint32_t cMinNormalPairs = (cTests - 144) / 4;
9307	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
9308	{
9309	PFNIEMAIMPLMXCSRU64U64 const pfn = g_aSseConvertMmI32XmmR32[iFn].pfnNative ? g_aSseConvertMmI32XmmR32[iFn].pfnNative : g_aSseConvertMmI32XmmR32[iFn].pfn;
9310
9311	IEMBINARYOUTPUT BinOut;
9312	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertMmI32XmmR32[iFn]), RTEXITCODE_FAILURE);
9313
9314	uint32_t cNormalInputPairs = 0;
9315	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9316	{
9317	SSE_CONVERT_MM_R32_TEST_T TestData; RT_ZERO(TestData);
9318
9319	TestData.ar32InVal[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
9320	TestData.ar32InVal[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
9321
9322	if ( RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[0])
9323	&& RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[1]))
9324	cNormalInputPairs++;
9325	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
9326	{
9327	iTest -= 1;
9328	continue;
9329	}
9330
9331	RTFLOAT64U TestVal;
9332	TestVal.au32[0] = TestData.ar32InVal[0].u;
9333	TestVal.au32[1] = TestData.ar32InVal[1].u;
9334
9335	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9336	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9337	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9338	for (uint8_t iFz = 0; iFz < 2; iFz++)
9339	{
9340	uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9341	\| (iRounding << X86_MXCSR_RC_SHIFT)
9342	\| (iDaz ? X86_MXCSR_DAZ : 0)
9343	\| (iFz ? X86_MXCSR_FZ : 0)
9344	\| X86_MXCSR_XCPT_MASK;
9345	uint64_t u64ResM;
9346	uint32_t fMxcsrM = pfn(fMxcsrIn, &u64ResM, TestVal.u);
9347	TestData.fMxcsrIn = fMxcsrIn;
9348	TestData.fMxcsrOut = fMxcsrM;
9349	TestData.OutVal.u = u64ResM;
9350	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9351
9352	fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9353	uint64_t u64ResU;
9354	uint32_t fMxcsrU = pfn(fMxcsrIn, &u64ResU, TestVal.u);
9355	TestData.fMxcsrIn = fMxcsrIn;
9356	TestData.fMxcsrOut = fMxcsrU;
9357	TestData.OutVal.u = u64ResU;
9358	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9359
9360	uint16_t fXcpt = (fMxcsrM \| fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9361	if (fXcpt)
9362	{
9363	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
9364	uint64_t u64Res1;
9365	uint32_t fMxcsr1 = pfn(fMxcsrIn, &u64Res1, TestVal.u);
9366	TestData.fMxcsrIn = fMxcsrIn;
9367	TestData.fMxcsrOut = fMxcsr1;
9368	TestData.OutVal.u = u64Res1;
9369	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9370
9371	if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9372	{
9373	fXcpt \|= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9374	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9375	uint64_t u64Res2;
9376	uint32_t fMxcsr2 = pfn(fMxcsrIn, &u64Res2, TestVal.u);
9377	TestData.fMxcsrIn = fMxcsrIn;
9378	TestData.fMxcsrOut = fMxcsr2;
9379	TestData.OutVal.u = u64Res2;
9380	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9381	}
9382	if (!RT_IS_POWER_OF_TWO(fXcpt))
9383	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9384	if (fUnmasked & fXcpt)
9385	{
9386	fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9387	uint64_t u64Res3;
9388	uint32_t fMxcsr3 = pfn(fMxcsrIn, &u64Res3, TestVal.u);
9389	TestData.fMxcsrIn = fMxcsrIn;
9390	TestData.fMxcsrOut = fMxcsr3;
9391	TestData.OutVal.u = u64Res3;
9392	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9393	}
9394	}
9395	}
9396	}
9397	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9398	}
9399
9400	return RTEXITCODE_SUCCESS;
9401	}
9402	#endif
9403
9404	static void SseConvertMmI32XmmR32Test(void)
9405	{
9406	X86FXSTATE State;
9407	RT_ZERO(State);
9408
9409	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
9410	{
9411	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmI32XmmR32[iFn]))
9412	continue;
9413
9414	SSE_CONVERT_MM_R32_TEST_T const * const paTests = g_aSseConvertMmI32XmmR32[iFn].paTests;
9415	uint32_t const cTests = g_aSseConvertMmI32XmmR32[iFn].cTests;
9416	PFNIEMAIMPLMXCSRU64U64 pfn = g_aSseConvertMmI32XmmR32[iFn].pfn;
9417	uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmI32XmmR32[iFn]);
9418	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9419	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9420	{
9421	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9422	{
9423	RTUINT64U ValOut;
9424	RTUINT64U ValIn;
9425
9426	ValIn.au32[0] = paTests[iTest].ar32InVal[0].u;
9427	ValIn.au32[1] = paTests[iTest].ar32InVal[1].u;
9428
9429	uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut.u, ValIn.u);
9430	if ( fMxcsr != paTests[iTest].fMxcsrOut
9431	\|\| ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9432	\|\| ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9433	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
9434	"%s -> mxcsr=%#08x %RI32'%RI32\n"
9435	"%s expected %#08x %RI32'%RI32%s%s (%s)\n",
9436	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9437	FormatR32(&paTests[iTest].ar32InVal[0]), FormatR32(&paTests[iTest].ar32InVal[1]),
9438	iVar ? " " : "", fMxcsr,
9439	ValOut.ai32[0], ValOut.ai32[1],
9440	iVar ? " " : "", paTests[iTest].fMxcsrOut,
9441	paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
9442	MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9443	( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9444	\|\| ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9445	? " - val" : "",
9446	FormatMxcsr(paTests[iTest].fMxcsrIn));
9447	}
9448	}
9449
9450	FREE_DECOMPRESSED_TESTS(g_aSseConvertMmI32XmmR32[iFn]);
9451	}
9452	}
9453
9454
9455	/*
9456	* SSE 4.2 pcmpxstrx instructions.
9457	*/
9458	TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRI_T, SSE_PCMPISTRI_TEST_T, PFNIEMAIMPLPCMPISTRIU128IMM8);
9459
9460	static SSE_PCMPISTRI_T g_aSsePcmpistri[] =
9461	{
9462	ENTRY_BIN_SSE_OPT(pcmpistri_u128),
9463	ENTRY_BIN_SSE_OPT(vpcmpistri_u128),
9464	};
9465
9466	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9467	DUMP_ALL_FN(SseComparePcmpistri, g_aSsePcmpistri)
9468	static RTEXITCODE SseComparePcmpistriGenerate(uint32_t cTests, const char * const *papszNameFmts)
9469	{
9470	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9471
9472	static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9473	{
9474	{ RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9475	/** @todo More specials. */
9476	};
9477
9478	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9479	{
9480	PFNIEMAIMPLPCMPISTRIU128IMM8 const pfn = g_aSsePcmpistri[iFn].pfnNative ? g_aSsePcmpistri[iFn].pfnNative : g_aSsePcmpistri[iFn].pfn;
9481
9482	IEMBINARYOUTPUT BinOut;
9483	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpistri[iFn]), RTEXITCODE_FAILURE);
9484
9485	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9486	{
9487	SSE_PCMPISTRI_TEST_T TestData; RT_ZERO(TestData);
9488
9489	TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9490	TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9491
9492	uint32_t const fEFlagsIn = RandEFlags();
9493	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9494	{
9495	uint32_t fEFlagsOut = fEFlagsIn;
9496	TestData.u32EcxOut = pfn(&fEFlagsOut, &TestData.InVal1.uXmm, &TestData.InVal2.uXmm, (uint8_t)u16Imm);
9497	TestData.fEFlagsIn = fEFlagsIn;
9498	TestData.fEFlagsOut = fEFlagsOut;
9499	TestData.bImm = (uint8_t)u16Imm;
9500	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9501	}
9502
9503	/* Repeat the test with the input value being the same. */
9504	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9505	{
9506	uint32_t fEFlagsOut = fEFlagsIn;
9507	TestData.u32EcxOut = pfn(&fEFlagsOut, &TestData.InVal1.uXmm, &TestData.InVal2.uXmm, (uint8_t)u16Imm);
9508	TestData.fEFlagsIn = fEFlagsIn;
9509	TestData.fEFlagsOut = fEFlagsOut;
9510	TestData.bImm = (uint8_t)u16Imm;
9511	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9512	}
9513	}
9514	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9515	}
9516
9517	return RTEXITCODE_SUCCESS;
9518	}
9519	#endif
9520
9521	static void SseComparePcmpistriTest(void)
9522	{
9523	X86FXSTATE State;
9524	RT_ZERO(State);
9525
9526	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9527	{
9528	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistri[iFn]))
9529	continue;
9530
9531	SSE_PCMPISTRI_TEST_T const * const paTests = g_aSsePcmpistri[iFn].paTests;
9532	uint32_t const cTests = g_aSsePcmpistri[iFn].cTests;
9533	PFNIEMAIMPLPCMPISTRIU128IMM8 pfn = g_aSsePcmpistri[iFn].pfn;
9534	uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistri[iFn]);
9535	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9536	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9537	{
9538	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9539	{
9540	uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9541	uint32_t u32EcxOut = pfn(&fEFlags, &paTests[iTest].InVal1.uXmm, &paTests[iTest].InVal2.uXmm, paTests[iTest].bImm);
9542	if ( fEFlags != paTests[iTest].fEFlagsOut
9543	\|\| u32EcxOut != paTests[iTest].u32EcxOut)
9544	RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9545	"%s -> efl=%#08x %RU32\n"
9546	"%s expected %#08x %RU32%s%s\n",
9547	iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9548	FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9549	iVar ? " " : "", fEFlags, u32EcxOut,
9550	iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9551	EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9552	(u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9553	}
9554	}
9555
9556	FREE_DECOMPRESSED_TESTS(g_aSsePcmpistri[iFn]);
9557	}
9558	}
9559
9560
9561	TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRM_T, SSE_PCMPISTRM_TEST_T, PFNIEMAIMPLPCMPISTRMU128IMM8);
9562
9563	static SSE_PCMPISTRM_T g_aSsePcmpistrm[] =
9564	{
9565	ENTRY_BIN_SSE_OPT(pcmpistrm_u128),
9566	ENTRY_BIN_SSE_OPT(vpcmpistrm_u128),
9567	};
9568
9569	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9570	DUMP_ALL_FN(SseComparePcmpistrm, g_aSsePcmpistrm)
9571	static RTEXITCODE SseComparePcmpistrmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9572	{
9573	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9574
9575	static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9576	{
9577	{ RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9578	/** @todo More specials. */
9579	};
9580
9581	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9582	{
9583	PFNIEMAIMPLPCMPISTRMU128IMM8 const pfn = g_aSsePcmpistrm[iFn].pfnNative ? g_aSsePcmpistrm[iFn].pfnNative : g_aSsePcmpistrm[iFn].pfn;
9584
9585	IEMBINARYOUTPUT BinOut;
9586	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpistrm[iFn]), RTEXITCODE_FAILURE);
9587
9588	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9589	{
9590	SSE_PCMPISTRM_TEST_T TestData; RT_ZERO(TestData);
9591
9592	TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9593	TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9594
9595	IEMPCMPISTRXSRC TestVal;
9596	TestVal.uSrc1 = TestData.InVal1.uXmm;
9597	TestVal.uSrc2 = TestData.InVal2.uXmm;
9598
9599	uint32_t const fEFlagsIn = RandEFlags();
9600	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9601	{
9602	uint32_t fEFlagsOut = fEFlagsIn;
9603	pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9604	TestData.fEFlagsIn = fEFlagsIn;
9605	TestData.fEFlagsOut = fEFlagsOut;
9606	TestData.bImm = (uint8_t)u16Imm;
9607	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9608	}
9609
9610	/* Repeat the test with the input value being the same. */
9611	TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9612	TestVal.uSrc1 = TestData.InVal1.uXmm;
9613	TestVal.uSrc2 = TestData.InVal2.uXmm;
9614
9615	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9616	{
9617	uint32_t fEFlagsOut = fEFlagsIn;
9618	pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9619	TestData.fEFlagsIn = fEFlagsIn;
9620	TestData.fEFlagsOut = fEFlagsOut;
9621	TestData.bImm = (uint8_t)u16Imm;
9622	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9623	}
9624	}
9625	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9626	}
9627
9628	return RTEXITCODE_SUCCESS;
9629	}
9630	#endif
9631
9632	static void SseComparePcmpistrmTest(void)
9633	{
9634	X86FXSTATE State;
9635	RT_ZERO(State);
9636
9637	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9638	{
9639	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistrm[iFn]))
9640	continue;
9641
9642	SSE_PCMPISTRM_TEST_T const * const paTests = g_aSsePcmpistrm[iFn].paTests;
9643	uint32_t const cTests = g_aSsePcmpistrm[iFn].cTests;
9644	PFNIEMAIMPLPCMPISTRMU128IMM8 pfn = g_aSsePcmpistrm[iFn].pfn;
9645	uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistrm[iFn]);
9646	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9647	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9648	{
9649	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9650	{
9651	IEMPCMPISTRXSRC TestVal;
9652	TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9653	TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9654
9655	uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9656	RTUINT128U OutVal;
9657	pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9658	if ( fEFlags != paTests[iTest].fEFlagsOut
9659	\|\| OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9660	\|\| OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9661	RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9662	"%s -> efl=%#08x %s\n"
9663	"%s expected %#08x %s%s%s\n",
9664	iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9665	FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9666	iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9667	iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9668	EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9669	( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9670	\|\| OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9671	}
9672	}
9673
9674	FREE_DECOMPRESSED_TESTS(g_aSsePcmpistrm[iFn]);
9675	}
9676	}
9677
9678
9679	TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRI_T, SSE_PCMPESTRI_TEST_T, PFNIEMAIMPLPCMPESTRIU128IMM8);
9680
9681	static SSE_PCMPESTRI_T g_aSsePcmpestri[] =
9682	{
9683	ENTRY_BIN_SSE_OPT(pcmpestri_u128),
9684	ENTRY_BIN_SSE_OPT(vpcmpestri_u128),
9685	};
9686
9687	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9688	DUMP_ALL_FN(SseComparePcmpestri, g_aSsePcmpestri)
9689	static RTEXITCODE SseComparePcmpestriGenerate(uint32_t cTests, const char * const *papszNameFmts)
9690	{
9691	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9692
9693	static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9694	{
9695	{ RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9696	/** @todo More specials. */
9697	};
9698
9699	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9700	{
9701	PFNIEMAIMPLPCMPESTRIU128IMM8 const pfn = g_aSsePcmpestri[iFn].pfnNative ? g_aSsePcmpestri[iFn].pfnNative : g_aSsePcmpestri[iFn].pfn;
9702
9703	IEMBINARYOUTPUT BinOut;
9704	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpestri[iFn]), RTEXITCODE_FAILURE);
9705
9706	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9707	{
9708	SSE_PCMPESTRI_TEST_T TestData; RT_ZERO(TestData);
9709
9710	TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9711	TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9712
9713	for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9714	for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9715	{
9716	TestData.u64Rax = (uint64_t)i64Rax;
9717	TestData.u64Rdx = (uint64_t)i64Rdx;
9718
9719	IEMPCMPESTRXSRC TestVal;
9720	TestVal.uSrc1 = TestData.InVal1.uXmm;
9721	TestVal.uSrc2 = TestData.InVal2.uXmm;
9722	TestVal.u64Rax = TestData.u64Rax;
9723	TestVal.u64Rdx = TestData.u64Rdx;
9724
9725	uint32_t const fEFlagsIn = RandEFlags();
9726	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9727	{
9728	uint32_t fEFlagsOut = fEFlagsIn;
9729	pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9730	TestData.fEFlagsIn = fEFlagsIn;
9731	TestData.fEFlagsOut = fEFlagsOut;
9732	TestData.bImm = (uint8_t)u16Imm;
9733	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9734	}
9735
9736	/* Repeat the test with the input value being the same. */
9737	TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9738	TestVal.uSrc1 = TestData.InVal1.uXmm;
9739	TestVal.uSrc2 = TestData.InVal2.uXmm;
9740
9741	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9742	{
9743	uint32_t fEFlagsOut = fEFlagsIn;
9744	pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9745	TestData.fEFlagsIn = fEFlagsIn;
9746	TestData.fEFlagsOut = fEFlagsOut;
9747	TestData.bImm = (uint8_t)u16Imm;
9748	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9749	}
9750	}
9751	}
9752	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9753	}
9754
9755	return RTEXITCODE_SUCCESS;
9756	}
9757	#endif
9758
9759	static void SseComparePcmpestriTest(void)
9760	{
9761	X86FXSTATE State;
9762	RT_ZERO(State);
9763
9764	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9765	{
9766	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestri[iFn]))
9767	continue;
9768
9769	SSE_PCMPESTRI_TEST_T const * const paTests = g_aSsePcmpestri[iFn].paTests;
9770	uint32_t const cTests = g_aSsePcmpestri[iFn].cTests;
9771	PFNIEMAIMPLPCMPESTRIU128IMM8 pfn = g_aSsePcmpestri[iFn].pfn;
9772	uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestri[iFn]);
9773	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9774	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9775	{
9776	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9777	{
9778	IEMPCMPESTRXSRC TestVal;
9779	TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9780	TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9781	TestVal.u64Rax = paTests[iTest].u64Rax;
9782	TestVal.u64Rdx = paTests[iTest].u64Rdx;
9783
9784	uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9785	uint32_t u32EcxOut = 0;
9786	pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9787	if ( fEFlags != paTests[iTest].fEFlagsOut
9788	\|\| u32EcxOut != paTests[iTest].u32EcxOut)
9789	RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9790	"%s -> efl=%#08x %RU32\n"
9791	"%s expected %#08x %RU32%s%s\n",
9792	iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9793	FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9794	FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9795	paTests[iTest].bImm,
9796	iVar ? " " : "", fEFlags, u32EcxOut,
9797	iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9798	EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9799	(u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9800	}
9801	}
9802
9803	FREE_DECOMPRESSED_TESTS(g_aSsePcmpestri[iFn]);
9804	}
9805	}
9806
9807
9808	TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRM_T, SSE_PCMPESTRM_TEST_T, PFNIEMAIMPLPCMPESTRMU128IMM8);
9809
9810	static SSE_PCMPESTRM_T g_aSsePcmpestrm[] =
9811	{
9812	ENTRY_BIN_SSE_OPT(pcmpestrm_u128),
9813	ENTRY_BIN_SSE_OPT(vpcmpestrm_u128),
9814	};
9815
9816	#ifdef TSTIEMAIMPL_WITH_GENERATOR
9817	DUMP_ALL_FN(SseComparePcmpestrm, g_aSsePcmpestrm)
9818	static RTEXITCODE SseComparePcmpestrmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9819	{
9820	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9821
9822	static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9823	{
9824	{ RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9825	/** @todo More specials. */
9826	};
9827
9828	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9829	{
9830	PFNIEMAIMPLPCMPESTRMU128IMM8 const pfn = g_aSsePcmpestrm[iFn].pfnNative ? g_aSsePcmpestrm[iFn].pfnNative : g_aSsePcmpestrm[iFn].pfn;
9831
9832	IEMBINARYOUTPUT BinOut;
9833	AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpestrm[iFn]), RTEXITCODE_FAILURE);
9834
9835	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9836	{
9837	SSE_PCMPESTRM_TEST_T TestData; RT_ZERO(TestData);
9838
9839	TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9840	TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9841
9842	for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9843	for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9844	{
9845	TestData.u64Rax = (uint64_t)i64Rax;
9846	TestData.u64Rdx = (uint64_t)i64Rdx;
9847
9848	IEMPCMPESTRXSRC TestVal;
9849	TestVal.uSrc1 = TestData.InVal1.uXmm;
9850	TestVal.uSrc2 = TestData.InVal2.uXmm;
9851	TestVal.u64Rax = TestData.u64Rax;
9852	TestVal.u64Rdx = TestData.u64Rdx;
9853
9854	uint32_t const fEFlagsIn = RandEFlags();
9855	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9856	{
9857	uint32_t fEFlagsOut = fEFlagsIn;
9858	pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9859	TestData.fEFlagsIn = fEFlagsIn;
9860	TestData.fEFlagsOut = fEFlagsOut;
9861	TestData.bImm = (uint8_t)u16Imm;
9862	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9863	}
9864
9865	/* Repeat the test with the input value being the same. */
9866	TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9867	TestVal.uSrc1 = TestData.InVal1.uXmm;
9868	TestVal.uSrc2 = TestData.InVal2.uXmm;
9869
9870	for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9871	{
9872	uint32_t fEFlagsOut = fEFlagsIn;
9873	pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9874	TestData.fEFlagsIn = fEFlagsIn;
9875	TestData.fEFlagsOut = fEFlagsOut;
9876	TestData.bImm = (uint8_t)u16Imm;
9877	GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9878	}
9879	}
9880	}
9881	AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9882	}
9883
9884	return RTEXITCODE_SUCCESS;
9885	}
9886	#endif
9887
9888	static void SseComparePcmpestrmTest(void)
9889	{
9890	X86FXSTATE State;
9891	RT_ZERO(State);
9892
9893	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9894	{
9895	if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestrm[iFn]))
9896	continue;
9897
9898	SSE_PCMPESTRM_TEST_T const * const paTests = g_aSsePcmpestrm[iFn].paTests;
9899	uint32_t const cTests = g_aSsePcmpestrm[iFn].cTests;
9900	PFNIEMAIMPLPCMPESTRMU128IMM8 pfn = g_aSsePcmpestrm[iFn].pfn;
9901	uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestrm[iFn]);
9902	if (!cTests) RTTestSkipped(g_hTest, "no tests");
9903	for (uint32_t iVar = 0; iVar < cVars; iVar++)
9904	{
9905	for (uint32_t iTest = 0; iTest < cTests; iTest++)
9906	{
9907	IEMPCMPESTRXSRC TestVal;
9908	TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9909	TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9910	TestVal.u64Rax = paTests[iTest].u64Rax;
9911	TestVal.u64Rdx = paTests[iTest].u64Rdx;
9912
9913	uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9914	RTUINT128U OutVal;
9915	pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9916	if ( fEFlags != paTests[iTest].fEFlagsOut
9917	\|\| OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9918	\|\| OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9919	RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9920	"%s -> efl=%#08x %s\n"
9921	"%s expected %#08x %s%s%s\n",
9922	iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9923	FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9924	FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9925	paTests[iTest].bImm,
9926	iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9927	iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9928	EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9929	( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9930	\|\| OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9931	}
9932	}
9933
9934	FREE_DECOMPRESSED_TESTS(g_aSsePcmpestrm[iFn]);
9935	}
9936	}
9937
9938
9939
9940	int main(int argc, char **argv)
9941	{
9942	int rc = RTR3InitExe(argc, &argv, 0);
9943	if (RT_FAILURE(rc))
9944	return RTMsgInitFailure(rc);
9945
9946	/*
9947	* Determin the host CPU.
9948	* If not using the IEMAllAImpl.asm code, this will be set to Intel.
9949	*/
9950	#if (defined(RT_ARCH_X86) \|\| defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
9951	g_idxCpuEflFlavour = ASMIsAmdCpu() \|\| ASMIsHygonCpu()
9952	? IEMTARGETCPU_EFL_BEHAVIOR_AMD
9953	: IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9954	#else
9955	g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9956	#endif
9957
9958	/*
9959	* Parse arguments.
9960	*/
9961	enum { kModeNotSet, kModeTest, kModeGenerate, kModeDump }
9962	enmMode = kModeNotSet;
9963	#define CATEGORY_INT RT_BIT_32(0)
9964	#define CATEGORY_FPU_LD_ST RT_BIT_32(1)
9965	#define CATEGORY_FPU_BINARY_1 RT_BIT_32(2)
9966	#define CATEGORY_FPU_BINARY_2 RT_BIT_32(3)
9967	#define CATEGORY_FPU_OTHER RT_BIT_32(4)
9968	#define CATEGORY_SSE_FP_BINARY RT_BIT_32(5)
9969	#define CATEGORY_SSE_FP_OTHER RT_BIT_32(6)
9970	#define CATEGORY_SSE_PCMPXSTRX RT_BIT_32(7)
9971	uint32_t fCategories = UINT32_MAX;
9972	bool fCpuData = true;
9973	bool fCommonData = true;
9974	uint32_t const cDefaultTests = 96;
9975	uint32_t cTests = cDefaultTests;
9976
9977	RTGETOPTDEF const s_aOptions[] =
9978	{
9979	// mode:
9980	{ "--generate", 'g', RTGETOPT_REQ_NOTHING },
9981	{ "--dump", 'G', RTGETOPT_REQ_NOTHING },
9982	{ "--test", 't', RTGETOPT_REQ_NOTHING },
9983	{ "--benchmark", 'b', RTGETOPT_REQ_NOTHING },
9984	// test selection (both)
9985	{ "--all", 'a', RTGETOPT_REQ_NOTHING },
9986	{ "--none", 'z', RTGETOPT_REQ_NOTHING },
9987	{ "--zap", 'z', RTGETOPT_REQ_NOTHING },
9988	{ "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
9989	{ "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
9990	{ "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
9991	{ "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
9992	{ "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
9993	{ "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
9994	{ "--sse-fp-other", 'T', RTGETOPT_REQ_NOTHING },
9995	{ "--sse-pcmpxstrx", 'C', RTGETOPT_REQ_NOTHING },
9996	{ "--int", 'i', RTGETOPT_REQ_NOTHING },
9997	{ "--include", 'I', RTGETOPT_REQ_STRING },
9998	{ "--exclude", 'X', RTGETOPT_REQ_STRING },
9999	// generation parameters
10000	{ "--common", 'm', RTGETOPT_REQ_NOTHING },
10001	{ "--cpu", 'c', RTGETOPT_REQ_NOTHING },
10002	{ "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
10003	{ "--verbose", 'v', RTGETOPT_REQ_NOTHING },
10004	{ "--quiet", 'q', RTGETOPT_REQ_NOTHING },
10005	{ "--quiet-skipping", 'Q', RTGETOPT_REQ_NOTHING },
10006	};
10007
10008	RTGETOPTSTATE State;
10009	rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
10010	AssertRCReturn(rc, RTEXITCODE_FAILURE);
10011
10012	RTGETOPTUNION ValueUnion;
10013	while ((rc = RTGetOpt(&State, &ValueUnion)))
10014	{
10015	switch (rc)
10016	{
10017	case 'g':
10018	enmMode = kModeGenerate;
10019	g_cPicoSecBenchmark = 0;
10020	break;
10021	case 'G':
10022	enmMode = kModeDump;
10023	g_cPicoSecBenchmark = 0;
10024	break;
10025	case 't':
10026	enmMode = kModeTest;
10027	g_cPicoSecBenchmark = 0;
10028	break;
10029	case 'b':
10030	enmMode = kModeTest;
10031	g_cPicoSecBenchmark += RT_NS_1SEC / 2 * UINT64_C(1000); /* half a second in pico seconds */
10032	break;
10033
10034	case 'a':
10035	fCpuData = true;
10036	fCommonData = true;
10037	fCategories = UINT32_MAX;
10038	break;
10039	case 'z':
10040	fCpuData = false;
10041	fCommonData = false;
10042	fCategories = 0;
10043	break;
10044
10045	case 'F':
10046	fCategories \|= CATEGORY_FPU_LD_ST;
10047	break;
10048	case 'O':
10049	fCategories \|= CATEGORY_FPU_OTHER;
10050	break;
10051	case 'B':
10052	fCategories \|= CATEGORY_FPU_BINARY_1;
10053	break;
10054	case 'P':
10055	fCategories \|= CATEGORY_FPU_BINARY_2;
10056	break;
10057	case 'S':
10058	fCategories \|= CATEGORY_SSE_FP_BINARY;
10059	break;
10060	case 'T':
10061	fCategories \|= CATEGORY_SSE_FP_OTHER;
10062	break;
10063	case 'C':
10064	fCategories \|= CATEGORY_SSE_PCMPXSTRX;
10065	break;
10066	case 'i':
10067	fCategories \|= CATEGORY_INT;
10068	break;
10069
10070	case 'I':
10071	if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
10072	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
10073	RT_ELEMENTS(g_apszIncludeTestPatterns));
10074	g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
10075	break;
10076	case 'X':
10077	if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
10078	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
10079	RT_ELEMENTS(g_apszExcludeTestPatterns));
10080	g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
10081	break;
10082
10083	case 'm':
10084	fCommonData = true;
10085	break;
10086	case 'c':
10087	fCpuData = true;
10088	break;
10089	case 'n':
10090	cTests = ValueUnion.u32;
10091	break;
10092
10093	case 'q':
10094	g_cVerbosity = 0;
10095	break;
10096	case 'v':
10097	g_cVerbosity++;
10098	break;
10099	case 'Q':
10100	g_fVerboseSkipping = false;
10101	break;
10102
10103	case 'h':
10104	RTPrintf("usage: %Rbn <-g\|-t> [options]\n"
10105	"\n"
10106	"Mode:\n"
10107	" -g, --generate\n"
10108	" Generate test data.\n"
10109	" -t, --test\n"
10110	" Execute tests.\n"
10111	" -b, --benchmark\n"
10112	" Execute tests and do 1/2 seconds of benchmarking.\n"
10113	" Repeating the option increases the benchmark duration by 0.5 seconds.\n"
10114	"\n"
10115	"Test selection (both modes):\n"
10116	" -a, --all\n"
10117	" Enable all tests and generated test data. (default)\n"
10118	" -z, --zap, --none\n"
10119	" Disable all tests and test data types.\n"
10120	" -i, --int\n"
10121	" Enable non-FPU tests.\n"
10122	" -F, --fpu-ld-st\n"
10123	" Enable FPU load and store tests.\n"
10124	" -B, --fpu-binary-1\n"
10125	" Enable FPU binary 80-bit FP tests.\n"
10126	" -P, --fpu-binary-2\n"
10127	" Enable FPU binary 64- and 32-bit FP tests.\n"
10128	" -O, --fpu-other\n"
10129	" Enable FPU binary 64- and 32-bit FP tests.\n"
10130	" -S, --sse-fp-binary\n"
10131	" Enable SSE binary 64- and 32-bit FP tests.\n"
10132	" -T, --sse-fp-other\n"
10133	" Enable misc SSE 64- and 32-bit FP tests.\n"
10134	" -C, --sse-pcmpxstrx\n"
10135	" Enable SSE pcmpxstrx tests.\n"
10136	" -I,--include=<test-patter>\n"
10137	" Enable tests matching the given pattern.\n"
10138	" -X,--exclude=<test-patter>\n"
10139	" Skip tests matching the given pattern (overrides --include).\n"
10140	"\n"
10141	"Generation:\n"
10142	" -m, --common\n"
10143	" Enable generating common test data.\n"
10144	" -c, --only-cpu\n"
10145	" Enable generating CPU specific test data.\n"
10146	" -n, --number-of-test <count>\n"
10147	" Number of tests to generate. Default: %u\n"
10148	"\n"
10149	"Other:\n"
10150	" -v, --verbose\n"
10151	" -q, --quiet\n"
10152	" Noise level. Default: --quiet\n"
10153	" -Q, --quiet-skipping\n"
10154	" Don't display skipped tests.\n"
10155	"\n"
10156	"Tip! When working on a single instruction, use the the -I and -Q options to\n"
10157	" restrict the testing: %Rbn -tiQI \"shr_*\"\n"
10158	, argv[0], cDefaultTests, argv[0]);
10159	return RTEXITCODE_SUCCESS;
10160	default:
10161	return RTGetOptPrintError(rc, &ValueUnion);
10162	}
10163	}
10164
10165	static const struct
10166	{
10167	uint32_t fCategory;
10168	void (*pfnTest)(void);
10169	#ifdef TSTIEMAIMPL_WITH_GENERATOR
10170	const char *pszFilenameFmt;
10171	RTEXITCODE (pfnGenerate)(uint32_t cTests, const char const *papszNameFmts);
10172	RTEXITCODE (pfnDumpAll)(const char const *papszNameFmts);
10173	uint32_t cMinTests;
10174	# define GROUP_ENTRY(a_fCategory, a_BaseNm, a_szFilenameFmt, a_cMinTests) \
10175	{ a_fCategory, a_BaseNm ## Test, a_szFilenameFmt, a_BaseNm ## Generate, a_BaseNm ## DumpAll, a_cMinTests }
10176	#else
10177	# define GROUP_ENTRY(a_fCategory, a_BaseNm, a_szFilenameFmt, a_cMinTests) \
10178	{ a_fCategory, a_BaseNm ## Test }
10179	#endif
10180	#define GROUP_ENTRY_MANUAL(a_fCategory, a_BaseNm) \
10181	{ a_fCategory, a_BaseNm ## Test }
10182	} s_aGroups[] =
10183	{
10184	GROUP_ENTRY(CATEGORY_INT, BinU8, "tstIEMAImplDataInt-%s.bin.gz", 0),
10185	GROUP_ENTRY(CATEGORY_INT, BinU16, "tstIEMAImplDataInt-%s.bin.gz", 0),
10186	GROUP_ENTRY(CATEGORY_INT, BinU32, "tstIEMAImplDataInt-%s.bin.gz", 0),
10187	GROUP_ENTRY(CATEGORY_INT, BinU64, "tstIEMAImplDataInt-%s.bin.gz", 0),
10188	GROUP_ENTRY(CATEGORY_INT, ShiftDbl, "tstIEMAImplDataInt-%s.bin.gz", 128),
10189	GROUP_ENTRY(CATEGORY_INT, Unary, "tstIEMAImplDataInt-%s.bin.gz", 0),
10190	GROUP_ENTRY(CATEGORY_INT, Shift, "tstIEMAImplDataInt-%s.bin.gz", 0),
10191	GROUP_ENTRY(CATEGORY_INT, MulDiv, "tstIEMAImplDataInt-%s.bin.gz", 0),
10192	GROUP_ENTRY_MANUAL(CATEGORY_INT, Xchg),
10193	GROUP_ENTRY_MANUAL(CATEGORY_INT, Xadd),
10194	GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg),
10195	GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg8b),
10196	GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg16b),
10197	GROUP_ENTRY_MANUAL(CATEGORY_INT, Bswap),
10198
10199	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdConst, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10200	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdInt, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10201	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdD80, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10202	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdMem, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 384), /* needs better coverage */
10203
10204	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStInt, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10205	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStD80, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10206	GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStMem, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 384), /* needs better coverage */
10207
10208	GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10209	GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryFswR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10210	GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryEflR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10211
10212	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryR64, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10213	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryR32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10214	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryI32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10215	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryI16, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10216
10217	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswR64, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10218	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswR32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10219	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswI32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10220	GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswI16, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10221
10222	GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10223	GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryFswR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10224	GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryTwoR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10225
10226	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10227	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10228	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryU128R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10229	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryU128R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10230
10231	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI32R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10232	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI64R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10233	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI32R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10234	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI64R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10235
10236	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64I32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10237	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64I64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10238	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32I32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10239	GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32I64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10240
10241	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareEflR32R32, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10242	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareEflR64R64, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10243	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareF3XmmR32Imm8, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10244	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareF3XmmR64Imm8, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10245
10246	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmI32R32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10247	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR32I32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10248	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmI32R64, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10249	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR64I32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10250	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertMmXmm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10251	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR32Mm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10252	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR64Mm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10253	GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertMmI32XmmR32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10254
10255	GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpistri, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10256	GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpistrm, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10257	GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpestri, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10258	GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpestrm, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10259	};
10260
10261	/*
10262	* Generate data?
10263	*/
10264	if (enmMode == kModeGenerate)
10265	{
10266	#ifdef TSTIEMAIMPL_WITH_GENERATOR
10267	if (cTests == 0)
10268	cTests = cDefaultTests;
10269	g_cZeroDstTests = RT_MIN(cTests / 16, 32);
10270	g_cZeroSrcTests = g_cZeroDstTests * 2;
10271
10272	RTMpGetDescription(NIL_RTCPUID, g_szCpuDesc, sizeof(g_szCpuDesc));
10273
10274	/* For the revision, use the highest for this file and VBoxRT. */
10275	static const char s_szRev[] = "$Revision: 106179 $";
10276	const char *pszRev = s_szRev;
10277	while (pszRev && !RT_C_IS_DIGIT(pszRev))
10278	pszRev++;
10279	g_uSvnRev = RTStrToUInt32(pszRev);
10280	g_uSvnRev = RT_MAX(g_uSvnRev, RTBldCfgRevision());
10281
10282	/* Loop thru the groups and call the generate for any that's enabled. */
10283	for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10284	if ((s_aGroups[i].fCategory & fCategories) && s_aGroups[i].pfnGenerate)
10285	{
10286	const char * const apszNameFmts[] =
10287	{
10288	/[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =/ fCommonData ? s_aGroups[i].pszFilenameFmt : NULL,
10289	/[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10290	/[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10291	};
10292	RTEXITCODE rcExit = s_aGroups[i].pfnGenerate(RT_MAX(cTests, s_aGroups[i].cMinTests), apszNameFmts);
10293	if (rcExit != RTEXITCODE_SUCCESS)
10294	return rcExit;
10295	}
10296	return RTEXITCODE_SUCCESS;
10297	#else
10298	return RTMsgErrorExitFailure("Test data generator not compiled in!");
10299	#endif
10300	}
10301
10302	/*
10303	* Dump tables (used for the conversion, mostly useless now).
10304	*/
10305	if (enmMode == kModeDump)
10306	{
10307	#ifdef TSTIEMAIMPL_WITH_GENERATOR
10308	/* Loop thru the groups and call the generate for any that's enabled. */
10309	for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10310	if ((s_aGroups[i].fCategory & fCategories) && s_aGroups[i].pfnDumpAll)
10311	{
10312	const char * const apszNameFmts[] =
10313	{
10314	/[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =/ fCommonData ? s_aGroups[i].pszFilenameFmt : NULL,
10315	/[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10316	/[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10317	};
10318	RTEXITCODE rcExit = s_aGroups[i].pfnGenerate(RT_MAX(cTests, s_aGroups[i].cMinTests), apszNameFmts);
10319	if (rcExit != RTEXITCODE_SUCCESS)
10320	return rcExit;
10321	}
10322	return RTEXITCODE_SUCCESS;
10323	#else
10324	return RTMsgErrorExitFailure("Test data generator not compiled in!");
10325	#endif
10326	}
10327
10328
10329	/*
10330	* Do testing. Currrently disabled by default as data needs to be checked
10331	* on both intel and AMD systems first.
10332	*/
10333	rc = RTTestCreate("tstIEMAImpl", &g_hTest);
10334	AssertRCReturn(rc, RTEXITCODE_FAILURE);
10335	if (enmMode == kModeTest)
10336	{
10337	RTTestBanner(g_hTest);
10338
10339	/* Allocate guarded memory for use in the tests. */
10340	#define ALLOC_GUARDED_VAR(a_puVar) do { \
10341	rc = RTTestGuardedAlloc(g_hTest, sizeof(a_puVar), sizeof(a_puVar), false /fHead/, (void **)&a_puVar); \
10342	if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
10343	} while (0)
10344	ALLOC_GUARDED_VAR(g_pu8);
10345	ALLOC_GUARDED_VAR(g_pu16);
10346	ALLOC_GUARDED_VAR(g_pu32);
10347	ALLOC_GUARDED_VAR(g_pu64);
10348	ALLOC_GUARDED_VAR(g_pu128);
10349	ALLOC_GUARDED_VAR(g_pu8Two);
10350	ALLOC_GUARDED_VAR(g_pu16Two);
10351	ALLOC_GUARDED_VAR(g_pu32Two);
10352	ALLOC_GUARDED_VAR(g_pu64Two);
10353	ALLOC_GUARDED_VAR(g_pu128Two);
10354	ALLOC_GUARDED_VAR(g_pfEfl);
10355	if (RTTestErrorCount(g_hTest) == 0)
10356	{
10357	/* Loop thru the groups and call test function for anything that's enabled. */
10358	for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10359	if ((s_aGroups[i].fCategory & fCategories))
10360	s_aGroups[i].pfnTest();
10361	}
10362	return RTTestSummaryAndDestroy(g_hTest);
10363	}
10364	return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
10365	}
10366

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp

Download in other formats: