tstIEMAImpl.cpp@ 96348

Last change on this file since 96348 was 96348, checked in by vboxsync, 2 years ago
VMM/testcase/tstIEMAImpl: Implement basic minss/minsd testcases, bugref:9898
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 254.9 KB

Line
1	/* $Id: tstIEMAImpl.cpp 96348 2022-08-19 17:00:45Z vboxsync $ */
2	/** @file
3	* IEM Assembly Instruction Helper Testcase.
4	*/
5
6	/*
7	* Copyright (C) 2022 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/*********************************************************************************************************************************
20	* Header Files *
21	*********************************************************************************************************************************/
22	#include "../include/IEMInternal.h"
23
24	#include <iprt/errcore.h>
25	#include <VBox/log.h>
26	#include <iprt/assert.h>
27	#include <iprt/ctype.h>
28	#include <iprt/getopt.h>
29	#include <iprt/initterm.h>
30	#include <iprt/message.h>
31	#include <iprt/mp.h>
32	#include <iprt/rand.h>
33	#include <iprt/stream.h>
34	#include <iprt/string.h>
35	#include <iprt/test.h>
36
37	#include "tstIEMAImpl.h"
38
39
40	/*********************************************************************************************************************************
41	* Defined Constants And Macros *
42	*********************************************************************************************************************************/
43	#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
44	#define ENTRY_EX(a_Name, a_uExtra) \
45	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
46	g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
47	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
48
49	#define ENTRY_BIN(a_Name) ENTRY_EX_BIN(a_Name, 0)
50	#define ENTRY_EX_BIN(a_Name, a_uExtra) \
51	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
52	g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
53	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
54
55	#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
56	#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
57	{ RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
58	g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
59	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
60
61	#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
62	#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
63	{ RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
64	g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
65	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
66
67	#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
68	typedef struct a_TypeName \
69	{ \
70	const char *pszName; \
71	a_FunctionPtrType pfn; \
72	a_FunctionPtrType pfnNative; \
73	a_TestType const *paTests; \
74	uint32_t const *pcTests; \
75	uint32_t uExtra; \
76	uint8_t idxCpuEflFlavour; \
77	} a_TypeName
78
79	#define COUNT_VARIATIONS(a_SubTest) \
80	(1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
81
82
83	/*********************************************************************************************************************************
84	* Global Variables *
85	*********************************************************************************************************************************/
86	static RTTEST g_hTest;
87	static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
88	#ifdef TSTIEMAIMPL_WITH_GENERATOR
89	static uint32_t g_cZeroDstTests = 2;
90	static uint32_t g_cZeroSrcTests = 4;
91	#endif
92	static uint8_t g_pu8, g_pu8Two;
93	static uint16_t g_pu16, g_pu16Two;
94	static uint32_t g_pu32, g_pu32Two, *g_pfEfl;
95	static uint64_t g_pu64, g_pu64Two;
96	static RTUINT128U g_pu128, g_pu128Two;
97
98	static char g_aszBuf[32][256];
99	static unsigned g_idxBuf = 0;
100
101	static uint32_t g_cIncludeTestPatterns;
102	static uint32_t g_cExcludeTestPatterns;
103	static const char *g_apszIncludeTestPatterns[64];
104	static const char *g_apszExcludeTestPatterns[64];
105
106	static unsigned g_cVerbosity = 0;
107
108
109	/*********************************************************************************************************************************
110	* Internal Functions *
111	*********************************************************************************************************************************/
112	static const char *FormatR80(PCRTFLOAT80U pr80);
113	static const char *FormatR64(PCRTFLOAT64U pr64);
114	static const char *FormatR32(PCRTFLOAT32U pr32);
115
116
117	/*
118	* Random helpers.
119	*/
120
121	static uint32_t RandEFlags(void)
122	{
123	uint32_t fEfl = RTRandU32();
124	return (fEfl & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK;
125	}
126
127	#ifdef TSTIEMAIMPL_WITH_GENERATOR
128
129	static uint8_t RandU8(void)
130	{
131	return RTRandU32Ex(0, 0xff);
132	}
133
134
135	static uint16_t RandU16(void)
136	{
137	return RTRandU32Ex(0, 0xffff);
138	}
139
140
141	static uint32_t RandU32(void)
142	{
143	return RTRandU32();
144	}
145
146	#endif
147
148	static uint64_t RandU64(void)
149	{
150	return RTRandU64();
151	}
152
153
154	static RTUINT128U RandU128(void)
155	{
156	RTUINT128U Ret;
157	Ret.s.Hi = RTRandU64();
158	Ret.s.Lo = RTRandU64();
159	return Ret;
160	}
161
162	#ifdef TSTIEMAIMPL_WITH_GENERATOR
163
164	static uint8_t RandU8Dst(uint32_t iTest)
165	{
166	if (iTest < g_cZeroDstTests)
167	return 0;
168	return RandU8();
169	}
170
171
172	static uint8_t RandU8Src(uint32_t iTest)
173	{
174	if (iTest < g_cZeroSrcTests)
175	return 0;
176	return RandU8();
177	}
178
179
180	static uint16_t RandU16Dst(uint32_t iTest)
181	{
182	if (iTest < g_cZeroDstTests)
183	return 0;
184	return RandU16();
185	}
186
187
188	static uint16_t RandU16Src(uint32_t iTest)
189	{
190	if (iTest < g_cZeroSrcTests)
191	return 0;
192	return RandU16();
193	}
194
195
196	static uint32_t RandU32Dst(uint32_t iTest)
197	{
198	if (iTest < g_cZeroDstTests)
199	return 0;
200	return RandU32();
201	}
202
203
204	static uint32_t RandU32Src(uint32_t iTest)
205	{
206	if (iTest < g_cZeroSrcTests)
207	return 0;
208	return RandU32();
209	}
210
211
212	static uint64_t RandU64Dst(uint32_t iTest)
213	{
214	if (iTest < g_cZeroDstTests)
215	return 0;
216	return RandU64();
217	}
218
219
220	static uint64_t RandU64Src(uint32_t iTest)
221	{
222	if (iTest < g_cZeroSrcTests)
223	return 0;
224	return RandU64();
225	}
226
227
228	/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
229	static int16_t RandI16Src2(uint32_t iTest)
230	{
231	if (iTest < 18 * 4)
232	switch (iTest % 4)
233	{
234	case 0: return 0;
235	case 1: return INT16_MAX;
236	case 2: return INT16_MIN;
237	case 3: break;
238	}
239	return (int16_t)RandU16();
240	}
241
242
243	/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
244	static int32_t RandI32Src2(uint32_t iTest)
245	{
246	if (iTest < 18 * 4)
247	switch (iTest % 4)
248	{
249	case 0: return 0;
250	case 1: return INT32_MAX;
251	case 2: return INT32_MIN;
252	case 3: break;
253	}
254	return (int32_t)RandU32();
255	}
256
257
258	#if 0
259	static int64_t RandI64Src(uint32_t iTest)
260	{
261	RT_NOREF(iTest);
262	return (int64_t)RandU64();
263	}
264	#endif
265
266
267	static uint16_t RandFcw(void)
268	{
269	return RandU16() & ~X86_FCW_ZERO_MASK;
270	}
271
272
273	static uint16_t RandFsw(void)
274	{
275	AssertCompile((X86_FSW_C_MASK \| X86_FSW_XCPT_ES_MASK \| X86_FSW_TOP_MASK \| X86_FSW_B) == 0xffff);
276	return RandU16();
277	}
278
279
280	static uint32_t RandMxcsr(void)
281	{
282	return RandU32() & ~X86_MXCSR_ZERO_MASK;
283	}
284
285
286	static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
287	{
288	if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
289	pr80->sj64.uFraction >>= cShift;
290	else
291	pr80->sj64.uFraction = (cShift % 19) + 1;
292	}
293
294
295
296	static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
297	{
298	Assert(cTarget == (!fIntTarget ? 80U : 16U) \|\| cTarget == 64U \|\| cTarget == 32U \|\| (cTarget == 59U && fIntTarget));
299
300	RTFLOAT80U r80;
301	r80.au64[0] = RandU64();
302	r80.au16[4] = RandU16();
303
304	/*
305	* Adjust the random stuff according to bType.
306	*/
307	bType &= 0x1f;
308	if (bType == 0 \|\| bType == 1 \|\| bType == 2 \|\| bType == 3)
309	{
310	/* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
311	r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
312	r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
313	r80.sj64.fInteger = bType >= 2 ? 1 : 0;
314	AssertMsg(bType != 0 \|\| RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
315	AssertMsg(bType != 1 \|\| RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
316	Assert( bType != 1 \|\| RTFLOAT80U_IS_387_INVALID(&r80));
317	AssertMsg(bType != 2 \|\| RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
318	AssertMsg(bType != 3 \|\| RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
319	}
320	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
321	{
322	/* Denormals (4,5) and Pseudo denormals (6,7) */
323	if (bType & 1)
324	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
325	else if (r80.sj64.uFraction == 0 && bType < 6)
326	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
327	r80.sj64.uExponent = 0;
328	r80.sj64.fInteger = bType >= 6;
329	AssertMsg(bType >= 6 \|\| RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
330	AssertMsg(bType < 6 \|\| RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
331	}
332	else if (bType == 8 \|\| bType == 9)
333	{
334	/* Pseudo NaN. */
335	if (bType & 1)
336	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
337	else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
338	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
339	r80.sj64.uExponent = 0x7fff;
340	if (r80.sj64.fInteger)
341	r80.sj64.uFraction \|= RT_BIT_64(62);
342	else
343	r80.sj64.uFraction &= ~RT_BIT_64(62);
344	r80.sj64.fInteger = 0;
345	AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
346	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
347	Assert(RTFLOAT80U_IS_387_INVALID(&r80));
348	}
349	else if (bType == 10 \|\| bType == 11 \|\| bType == 12 \|\| bType == 13)
350	{
351	/* Quiet and signalling NaNs. */
352	if (bType & 1)
353	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
354	else if (r80.sj64.uFraction == 0)
355	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
356	r80.sj64.uExponent = 0x7fff;
357	if (bType < 12)
358	r80.sj64.uFraction \|= RT_BIT_64(62); /* quiet */
359	else
360	r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
361	r80.sj64.fInteger = 1;
362	AssertMsg(bType >= 12 \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
363	AssertMsg(bType < 12 \|\| RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
364	AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
365	AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
366	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
367	}
368	else if (bType == 14 \|\| bType == 15)
369	{
370	/* Unnormals */
371	if (bType & 1)
372	SafeR80FractionShift(&r80, RandU8() % 62);
373	r80.sj64.fInteger = 0;
374	if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX \|\| r80.sj64.uExponent == 0)
375	r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
376	AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
377	Assert(RTFLOAT80U_IS_387_INVALID(&r80));
378	}
379	else if (bType < 26)
380	{
381	/* Make sure we have lots of normalized values. */
382	if (!fIntTarget)
383	{
384	const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
385	: cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
386	const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
387	: cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
388	r80.sj64.fInteger = 1;
389	if (r80.sj64.uExponent <= uMinExp)
390	r80.sj64.uExponent = uMinExp + 1;
391	else if (r80.sj64.uExponent >= uMaxExp)
392	r80.sj64.uExponent = uMaxExp - 1;
393
394	if (bType == 16)
395	{ /* All 1s is useful to testing rounding. Also try trigger special
396	behaviour by sometimes rounding out of range, while we're at it. */
397	r80.sj64.uFraction = RT_BIT_64(63) - 1;
398	uint8_t bExp = RandU8();
399	if ((bExp & 3) == 0)
400	r80.sj64.uExponent = uMaxExp - 1;
401	else if ((bExp & 3) == 1)
402	r80.sj64.uExponent = uMinExp + 1;
403	else if ((bExp & 3) == 2)
404	r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
405	}
406	}
407	else
408	{
409	/* integer target: */
410	const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
411	const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
412	r80.sj64.fInteger = 1;
413	if (r80.sj64.uExponent < uMinExp)
414	r80.sj64.uExponent = uMinExp;
415	else if (r80.sj64.uExponent > uMaxExp)
416	r80.sj64.uExponent = uMaxExp;
417
418	if (bType == 16)
419	{ /* All 1s is useful to testing rounding. Also try trigger special
420	behaviour by sometimes rounding out of range, while we're at it. */
421	r80.sj64.uFraction = RT_BIT_64(63) - 1;
422	uint8_t bExp = RandU8();
423	if ((bExp & 3) == 0)
424	r80.sj64.uExponent = uMaxExp;
425	else if ((bExp & 3) == 1)
426	r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
427	}
428	}
429
430	AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
431	}
432	return r80;
433	}
434
435
436	static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
437	{
438	/*
439	* Make it more likely that we get a good selection of special values.
440	*/
441	return RandR80Ex(RandU8(), cTarget, fIntTarget);
442
443	}
444
445
446	static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
447	{
448	/* Make sure we cover all the basic types first before going for random selection: */
449	if (iTest <= 18)
450	return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
451	return RandR80(cTarget, fIntTarget);
452	}
453
454
455	/**
456	* Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
457	* to a 0..17, covering all basic value types.
458	*/
459	static uint8_t RandR80Src12RemapType(uint8_t bType)
460	{
461	switch (bType)
462	{
463	case 0: return 18; /* normal */
464	case 1: return 16; /* normal extreme rounding */
465	case 2: return 14; /* unnormal */
466	case 3: return 12; /* Signalling NaN */
467	case 4: return 10; /* Quiet NaN */
468	case 5: return 8; /* PseudoNaN */
469	case 6: return 6; /* Pseudo Denormal */
470	case 7: return 4; /* Denormal */
471	case 8: return 3; /* Indefinite */
472	case 9: return 2; /* Infinity */
473	case 10: return 1; /* Pseudo-Infinity */
474	case 11: return 0; /* Zero */
475	default: AssertFailedReturn(18);
476	}
477	}
478
479
480	/**
481	* This works in tandem with RandR80Src2 to make sure we cover all operand
482	* type mixes first before we venture into regular random testing.
483	*
484	* There are 11 basic variations, when we leave out the five odd ones using
485	* SafeR80FractionShift. Because of the special normalized value targetting at
486	* rounding, we make it an even 12. So 144 combinations for two operands.
487	*/
488	static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
489	{
490	if (cPartnerBits == 80)
491	{
492	Assert(!fPartnerInt);
493	if (iTest < 12 * 12)
494	return RandR80Ex(RandR80Src12RemapType(iTest / 12));
495	}
496	else if ((cPartnerBits == 64 \|\| cPartnerBits == 32) && !fPartnerInt)
497	{
498	if (iTest < 12 * 10)
499	return RandR80Ex(RandR80Src12RemapType(iTest / 10));
500	}
501	else if (iTest < 18 * 4 && fPartnerInt)
502	return RandR80Ex(iTest / 4);
503	return RandR80();
504	}
505
506
507	/** Partner to RandR80Src1. */
508	static RTFLOAT80U RandR80Src2(uint32_t iTest)
509	{
510	if (iTest < 12 * 12)
511	return RandR80Ex(RandR80Src12RemapType(iTest % 12));
512	return RandR80();
513	}
514
515
516	static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
517	{
518	if (pr64->s64.uFraction >= RT_BIT_64(cShift))
519	pr64->s64.uFraction >>= cShift;
520	else
521	pr64->s64.uFraction = (cShift % 19) + 1;
522	}
523
524
525	static RTFLOAT64U RandR64Ex(uint8_t bType)
526	{
527	RTFLOAT64U r64;
528	r64.u = RandU64();
529
530	/*
531	* Make it more likely that we get a good selection of special values.
532	* On average 6 out of 16 calls should return a special value.
533	*/
534	bType &= 0xf;
535	if (bType == 0 \|\| bType == 1)
536	{
537	/* 0 or Infinity. We only keep fSign here. */
538	r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
539	r64.s.uFractionHigh = 0;
540	r64.s.uFractionLow = 0;
541	AssertMsg(bType != 0 \|\| RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
542	AssertMsg(bType != 1 \|\| RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
543	}
544	else if (bType == 2 \|\| bType == 3)
545	{
546	/* Subnormals */
547	if (bType == 3)
548	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
549	else if (r64.s64.uFraction == 0)
550	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
551	r64.s64.uExponent = 0;
552	AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
553	}
554	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
555	{
556	/* NaNs */
557	if (bType & 1)
558	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
559	else if (r64.s64.uFraction == 0)
560	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
561	r64.s64.uExponent = 0x7ff;
562	if (bType < 6)
563	r64.s64.uFraction \|= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
564	else
565	r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
566	AssertMsg(bType >= 6 \|\| RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
567	AssertMsg(bType < 6 \|\| RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
568	AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
569	}
570	else if (bType < 12)
571	{
572	/* Make sure we have lots of normalized values. */
573	if (r64.s.uExponent == 0)
574	r64.s.uExponent = 1;
575	else if (r64.s.uExponent == 0x7ff)
576	r64.s.uExponent = 0x7fe;
577	AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
578	}
579	return r64;
580	}
581
582
583	static RTFLOAT64U RandR64Src(uint32_t iTest)
584	{
585	if (iTest < 16)
586	return RandR64Ex(iTest);
587	return RandR64Ex(RandU8());
588	}
589
590
591	/** Pairing with a 80-bit floating point arg. */
592	static RTFLOAT64U RandR64Src2(uint32_t iTest)
593	{
594	if (iTest < 12 * 10)
595	return RandR64Ex(9 - iTest % 10); /* start with normal values */
596	return RandR64Ex(RandU8());
597	}
598
599
600	static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
601	{
602	if (pr32->s.uFraction >= RT_BIT_32(cShift))
603	pr32->s.uFraction >>= cShift;
604	else
605	pr32->s.uFraction = (cShift % 19) + 1;
606	}
607
608
609	static RTFLOAT32U RandR32Ex(uint8_t bType)
610	{
611	RTFLOAT32U r32;
612	r32.u = RandU32();
613
614	/*
615	* Make it more likely that we get a good selection of special values.
616	* On average 6 out of 16 calls should return a special value.
617	*/
618	bType &= 0xf;
619	if (bType == 0 \|\| bType == 1)
620	{
621	/* 0 or Infinity. We only keep fSign here. */
622	r32.s.uExponent = bType == 0 ? 0 : 0xff;
623	r32.s.uFraction = 0;
624	AssertMsg(bType != 0 \|\| RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
625	AssertMsg(bType != 1 \|\| RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
626	}
627	else if (bType == 2 \|\| bType == 3)
628	{
629	/* Subnormals */
630	if (bType == 3)
631	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
632	else if (r32.s.uFraction == 0)
633	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
634	r32.s.uExponent = 0;
635	AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
636	}
637	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
638	{
639	/* NaNs */
640	if (bType & 1)
641	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
642	else if (r32.s.uFraction == 0)
643	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
644	r32.s.uExponent = 0xff;
645	if (bType < 6)
646	r32.s.uFraction \|= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
647	else
648	r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
649	AssertMsg(bType >= 6 \|\| RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
650	AssertMsg(bType < 6 \|\| RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
651	AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
652	}
653	else if (bType < 12)
654	{
655	/* Make sure we have lots of normalized values. */
656	if (r32.s.uExponent == 0)
657	r32.s.uExponent = 1;
658	else if (r32.s.uExponent == 0xff)
659	r32.s.uExponent = 0xfe;
660	AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
661	}
662	return r32;
663	}
664
665
666	static RTFLOAT32U RandR32Src(uint32_t iTest)
667	{
668	if (iTest < 16)
669	return RandR32Ex(iTest);
670	return RandR32Ex(RandU8());
671	}
672
673
674	/** Pairing with a 80-bit floating point arg. */
675	static RTFLOAT32U RandR32Src2(uint32_t iTest)
676	{
677	if (iTest < 12 * 10)
678	return RandR32Ex(9 - iTest % 10); /* start with normal values */
679	return RandR32Ex(RandU8());
680	}
681
682
683	static RTPBCD80U RandD80Src(uint32_t iTest)
684	{
685	if (iTest < 3)
686	{
687	RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
688	return d80Zero;
689	}
690	if (iTest < 5)
691	{
692	RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
693	return d80Ind;
694	}
695
696	RTPBCD80U d80;
697	uint8_t b = RandU8();
698	d80.s.fSign = b & 1;
699
700	if ((iTest & 7) >= 6)
701	{
702	/* Illegal */
703	d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
704	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
705	d80.s.abPairs[iPair] = RandU8();
706	}
707	else
708	{
709	/* Normal */
710	d80.s.uPad = 0;
711	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
712	{
713	uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
714	uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
715	d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
716	}
717	}
718	return d80;
719	}
720
721
722	const char *GenFormatR80(PCRTFLOAT80U plrd)
723	{
724	if (RTFLOAT80U_IS_ZERO(plrd))
725	return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
726	if (RTFLOAT80U_IS_INF(plrd))
727	return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
728	if (RTFLOAT80U_IS_INDEFINITE(plrd))
729	return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
730	if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
731	return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
732	if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
733	return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
734
735	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
736	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
737	plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
738	return pszBuf;
739	}
740
741	const char *GenFormatR64(PCRTFLOAT64U prd)
742	{
743	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
744	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
745	prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
746	return pszBuf;
747	}
748
749
750	const char *GenFormatR32(PCRTFLOAT32U pr)
751	{
752	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
753	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
754	return pszBuf;
755	}
756
757
758	const char *GenFormatD80(PCRTPBCD80U pd80)
759	{
760	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
761	size_t off;
762	if (pd80->s.uPad == 0)
763	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
764	else
765	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
766	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
767	while (iPair-- > 0)
768	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
769	RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
770	RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
771	pszBuf[off++] = ')';
772	pszBuf[off++] = '\0';
773	return pszBuf;
774	}
775
776
777	const char *GenFormatI64(int64_t i64)
778	{
779	if (i64 == INT64_MIN) /* This one is problematic */
780	return "INT64_MIN";
781	if (i64 == INT64_MAX)
782	return "INT64_MAX";
783	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
784	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
785	return pszBuf;
786	}
787
788
789	const char GenFormatI64(int64_t const pi64)
790	{
791	return GenFormatI64(*pi64);
792	}
793
794
795	const char *GenFormatI32(int32_t i32)
796	{
797	if (i32 == INT32_MIN) /* This one is problematic */
798	return "INT32_MIN";
799	if (i32 == INT32_MAX)
800	return "INT32_MAX";
801	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
802	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
803	return pszBuf;
804	}
805
806
807	const char GenFormatI32(int32_t const pi32)
808	{
809	return GenFormatI32(*pi32);
810	}
811
812
813	const char *GenFormatI16(int16_t i16)
814	{
815	if (i16 == INT16_MIN) /* This one is problematic */
816	return "INT16_MIN";
817	if (i16 == INT16_MAX)
818	return "INT16_MAX";
819	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
820	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
821	return pszBuf;
822	}
823
824
825	const char GenFormatI16(int16_t const pi16)
826	{
827	return GenFormatI16(*pi16);
828	}
829
830
831	static void GenerateHeader(PRTSTREAM pOut, const char pszCpuDesc, const char pszCpuType)
832	{
833	/* We want to tag the generated source code with the revision that produced it. */
834	static char s_szRev[] = "$Revision: 96348 $";
835	const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
836	size_t cchRev = 0;
837	while (RT_C_IS_DIGIT(pszRev[cchRev]))
838	cchRev++;
839
840	RTStrmPrintf(pOut,
841	"/* $Id: tstIEMAImpl.cpp 96348 2022-08-19 17:00:45Z vboxsync $ */\n"
842	"/** @file\n"
843	" * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
844	" */\n"
845	"\n"
846	"/*\n"
847	" * Copyright (C) 2022 Oracle Corporation\n"
848	" *\n"
849	" * This file is part of VirtualBox Open Source Edition (OSE), as\n"
850	" * available from http://www.virtualbox.org. This file is free software;\n"
851	" * you can redistribute it and/or modify it under the terms of the GNU\n"
852	" * General Public License (GPL) as published by the Free Software\n"
853	" * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
854	" * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
855	" * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
856	" */\n"
857	"\n"
858	"#include \"tstIEMAImpl.h\"\n"
859	"\n"
860	,
861	pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
862	}
863
864
865	static PRTSTREAM GenerateOpenWithHdr(const char pszFilename, const char pszCpuDesc, const char *pszCpuType)
866	{
867	PRTSTREAM pOut = NULL;
868	int rc = RTStrmOpen(pszFilename, "w", &pOut);
869	if (RT_SUCCESS(rc))
870	{
871	GenerateHeader(pOut, pszCpuDesc, pszCpuType);
872	return pOut;
873	}
874	RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
875	return NULL;
876	}
877
878
879	static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
880	{
881	RTStrmPrintf(pOut,
882	"\n"
883	"/* end of file */\n");
884	int rc = RTStrmClose(pOut);
885	if (RT_SUCCESS(rc))
886	return rcExit;
887	return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
888	}
889
890
891	static void GenerateArrayStart(PRTSTREAM pOut, const char pszName, const char pszType)
892	{
893	RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
894	}
895
896
897	static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
898	{
899	RTStrmPrintf(pOut,
900	"};\n"
901	"uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
902	"\n",
903	pszName, pszName);
904	}
905
906	#endif /* TSTIEMAIMPL_WITH_GENERATOR */
907
908
909	/*
910	* Test helpers.
911	*/
912	static bool IsTestEnabled(const char *pszName)
913	{
914	/* Process excludes first: */
915	uint32_t i = g_cExcludeTestPatterns;
916	while (i-- > 0)
917	if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
918	return false;
919
920	/* If no include patterns, everything is included: */
921	i = g_cIncludeTestPatterns;
922	if (!i)
923	return true;
924
925	/* Otherwise only tests in the include patters gets tested: */
926	while (i-- > 0)
927	if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
928	return true;
929
930	return false;
931	}
932
933
934	static bool SubTestAndCheckIfEnabled(const char *pszName)
935	{
936	RTTestSub(g_hTest, pszName);
937	if (IsTestEnabled(pszName))
938	return true;
939	RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
940	return false;
941	}
942
943
944	static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
945	{
946	if (fActual == fExpected)
947	return "";
948
949	uint32_t const fXor = fActual ^ fExpected;
950	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
951	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
952
953	static struct
954	{
955	const char *pszName;
956	uint32_t fFlag;
957	} const s_aFlags[] =
958	{
959	#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
960	EFL_ENTRY(CF),
961	EFL_ENTRY(PF),
962	EFL_ENTRY(AF),
963	EFL_ENTRY(ZF),
964	EFL_ENTRY(SF),
965	EFL_ENTRY(TF),
966	EFL_ENTRY(IF),
967	EFL_ENTRY(DF),
968	EFL_ENTRY(OF),
969	EFL_ENTRY(IOPL),
970	EFL_ENTRY(NT),
971	EFL_ENTRY(RF),
972	EFL_ENTRY(VM),
973	EFL_ENTRY(AC),
974	EFL_ENTRY(VIF),
975	EFL_ENTRY(VIP),
976	EFL_ENTRY(ID),
977	};
978	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
979	if (s_aFlags[i].fFlag & fXor)
980	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
981	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
982	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
983	return pszBuf;
984	}
985
986
987	static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
988	{
989	if (fActual == fExpected)
990	return "";
991
992	uint16_t const fXor = fActual ^ fExpected;
993	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
994	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
995
996	static struct
997	{
998	const char *pszName;
999	uint32_t fFlag;
1000	} const s_aFlags[] =
1001	{
1002	#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1003	FSW_ENTRY(IE),
1004	FSW_ENTRY(DE),
1005	FSW_ENTRY(ZE),
1006	FSW_ENTRY(OE),
1007	FSW_ENTRY(UE),
1008	FSW_ENTRY(PE),
1009	FSW_ENTRY(SF),
1010	FSW_ENTRY(ES),
1011	FSW_ENTRY(C0),
1012	FSW_ENTRY(C1),
1013	FSW_ENTRY(C2),
1014	FSW_ENTRY(C3),
1015	FSW_ENTRY(B),
1016	};
1017	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1018	if (s_aFlags[i].fFlag & fXor)
1019	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1020	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1021	if (fXor & X86_FSW_TOP_MASK)
1022	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1023	X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1024	#if 0 /* For debugging fprem & fprem1 */
1025	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1026	X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1027	#endif
1028	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1029	return pszBuf;
1030	}
1031
1032
1033	static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1034	{
1035	if (fActual == fExpected)
1036	return "";
1037
1038	uint16_t const fXor = fActual ^ fExpected;
1039	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1040	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1041
1042	static struct
1043	{
1044	const char *pszName;
1045	uint32_t fFlag;
1046	} const s_aFlags[] =
1047	{
1048	#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1049	MXCSR_ENTRY(IE),
1050	MXCSR_ENTRY(DE),
1051	MXCSR_ENTRY(ZE),
1052	MXCSR_ENTRY(OE),
1053	MXCSR_ENTRY(UE),
1054	MXCSR_ENTRY(PE),
1055
1056	MXCSR_ENTRY(IM),
1057	MXCSR_ENTRY(DM),
1058	MXCSR_ENTRY(ZM),
1059	MXCSR_ENTRY(OM),
1060	MXCSR_ENTRY(UM),
1061	MXCSR_ENTRY(PM),
1062
1063	MXCSR_ENTRY(DAZ),
1064	MXCSR_ENTRY(FZ),
1065	#undef MXCSR_ENTRY
1066	};
1067	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1068	if (s_aFlags[i].fFlag & fXor)
1069	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1070	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1071	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1072	return pszBuf;
1073	}
1074
1075
1076	static const char *FormatFcw(uint16_t fFcw)
1077	{
1078	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1079
1080	const char pszPC = NULL; / (msc+gcc are too stupid) */
1081	switch (fFcw & X86_FCW_PC_MASK)
1082	{
1083	case X86_FCW_PC_24: pszPC = "PC24"; break;
1084	case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1085	case X86_FCW_PC_53: pszPC = "PC53"; break;
1086	case X86_FCW_PC_64: pszPC = "PC64"; break;
1087	}
1088
1089	const char pszRC = NULL; / (msc+gcc are too stupid) */
1090	switch (fFcw & X86_FCW_RC_MASK)
1091	{
1092	case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1093	case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1094	case X86_FCW_RC_UP: pszRC = "UP"; break;
1095	case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1096	}
1097	size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1098
1099	static struct
1100	{
1101	const char *pszName;
1102	uint32_t fFlag;
1103	} const s_aFlags[] =
1104	{
1105	#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1106	FCW_ENTRY(IM),
1107	FCW_ENTRY(DM),
1108	FCW_ENTRY(ZM),
1109	FCW_ENTRY(OM),
1110	FCW_ENTRY(UM),
1111	FCW_ENTRY(PM),
1112	{ "6M", 64 },
1113	};
1114	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1115	if (fFcw & s_aFlags[i].fFlag)
1116	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1117
1118	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1119	return pszBuf;
1120	}
1121
1122
1123	static const char *FormatMxcsr(uint32_t fMxcsr)
1124	{
1125	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1126
1127	const char pszRC = NULL; / (msc+gcc are too stupid) */
1128	switch (fMxcsr & X86_MXCSR_RC_MASK)
1129	{
1130	case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1131	case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1132	case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1133	case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1134	}
1135
1136	const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1137	const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1138	size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1139
1140	static struct
1141	{
1142	const char *pszName;
1143	uint32_t fFlag;
1144	} const s_aFlags[] =
1145	{
1146	#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1147	MXCSR_ENTRY(IE),
1148	MXCSR_ENTRY(DE),
1149	MXCSR_ENTRY(ZE),
1150	MXCSR_ENTRY(OE),
1151	MXCSR_ENTRY(UE),
1152	MXCSR_ENTRY(PE),
1153
1154	MXCSR_ENTRY(IM),
1155	MXCSR_ENTRY(DM),
1156	MXCSR_ENTRY(ZM),
1157	MXCSR_ENTRY(OM),
1158	MXCSR_ENTRY(UM),
1159	MXCSR_ENTRY(PM),
1160	{ "6M", 64 },
1161	};
1162	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1163	if (fMxcsr & s_aFlags[i].fFlag)
1164	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1165
1166	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1167	return pszBuf;
1168	}
1169
1170
1171	static const char *FormatR80(PCRTFLOAT80U pr80)
1172	{
1173	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1174	RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1175	return pszBuf;
1176	}
1177
1178
1179	static const char *FormatR64(PCRTFLOAT64U pr64)
1180	{
1181	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1182	RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1183	return pszBuf;
1184	}
1185
1186
1187	static const char *FormatR32(PCRTFLOAT32U pr32)
1188	{
1189	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1190	RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1191	return pszBuf;
1192	}
1193
1194
1195	static const char *FormatD80(PCRTPBCD80U pd80)
1196	{
1197	/* There is only one indefinite endcoding (same as for 80-bit
1198	floating point), so get it out of the way first: */
1199	if (RTPBCD80U_IS_INDEFINITE(pd80))
1200	return "Ind";
1201
1202	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1203	size_t off = 0;
1204	pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1205	unsigned cBadDigits = 0;
1206	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1207	while (iPair-- > 0)
1208	{
1209	static const char s_szDigits[] = "0123456789abcdef";
1210	static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1211	pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1212	pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1213	cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1214	+ s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1215	}
1216	if (cBadDigits \|\| pd80->s.uPad != 0)
1217	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1218	pszBuf[off] = '\0';
1219	return pszBuf;
1220	}
1221
1222
1223	#if 0
1224	static const char FormatI64(int64_t const piVal)
1225	{
1226	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1227	RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1228	return pszBuf;
1229	}
1230	#endif
1231
1232
1233	static const char FormatI32(int32_t const piVal)
1234	{
1235	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1236	RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1237	return pszBuf;
1238	}
1239
1240
1241	static const char FormatI16(int16_t const piVal)
1242	{
1243	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1244	RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1245	return pszBuf;
1246	}
1247
1248
1249	/*
1250	* Binary operations.
1251	*/
1252	TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1253	TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1254	TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1255	TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1256
1257	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1258	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1259	static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1260	{ \
1261	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1262	{ \
1263	PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1264	? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1265	PRTSTREAM pOutFn = pOut; \
1266	if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1267	{ \
1268	if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1269	continue; \
1270	pOutFn = pOutCpu; \
1271	} \
1272	\
1273	GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1274	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1275	{ \
1276	a_TestType Test; \
1277	Test.fEflIn = RandEFlags(); \
1278	Test.fEflOut = Test.fEflIn; \
1279	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1280	Test.uDstOut = Test.uDstIn; \
1281	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1282	if (g_aBinU ## a_cBits[iFn].uExtra) \
1283	Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1284	Test.uMisc = 0; \
1285	pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1286	RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1287	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1288	} \
1289	GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1290	} \
1291	}
1292	#else
1293	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1294	#endif
1295
1296	#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1297	GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1298	\
1299	static void BinU ## a_cBits ## Test(void) \
1300	{ \
1301	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1302	{ \
1303	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1304	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1305	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1306	PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1307	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1308	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1309	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1310	{ \
1311	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1312	{ \
1313	uint32_t fEfl = paTests[iTest].fEflIn; \
1314	a_uType uDst = paTests[iTest].uDstIn; \
1315	pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1316	if ( uDst != paTests[iTest].uDstOut \
1317	\|\| fEfl != paTests[iTest].fEflOut) \
1318	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1319	iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1320	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1321	EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1322	uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1323	else \
1324	{ \
1325	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1326	*g_pfEfl = paTests[iTest].fEflIn; \
1327	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1328	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1329	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1330	} \
1331	} \
1332	pfn = a_aSubTests[iFn].pfnNative; \
1333	} \
1334	} \
1335	}
1336
1337
1338	/*
1339	* 8-bit binary operations.
1340	*/
1341	static const BINU8_T g_aBinU8[] =
1342	{
1343	ENTRY(add_u8),
1344	ENTRY(add_u8_locked),
1345	ENTRY(adc_u8),
1346	ENTRY(adc_u8_locked),
1347	ENTRY(sub_u8),
1348	ENTRY(sub_u8_locked),
1349	ENTRY(sbb_u8),
1350	ENTRY(sbb_u8_locked),
1351	ENTRY(or_u8),
1352	ENTRY(or_u8_locked),
1353	ENTRY(xor_u8),
1354	ENTRY(xor_u8_locked),
1355	ENTRY(and_u8),
1356	ENTRY(and_u8_locked),
1357	ENTRY(cmp_u8),
1358	ENTRY(test_u8),
1359	};
1360	TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1361
1362
1363	/*
1364	* 16-bit binary operations.
1365	*/
1366	static const BINU16_T g_aBinU16[] =
1367	{
1368	ENTRY(add_u16),
1369	ENTRY(add_u16_locked),
1370	ENTRY(adc_u16),
1371	ENTRY(adc_u16_locked),
1372	ENTRY(sub_u16),
1373	ENTRY(sub_u16_locked),
1374	ENTRY(sbb_u16),
1375	ENTRY(sbb_u16_locked),
1376	ENTRY(or_u16),
1377	ENTRY(or_u16_locked),
1378	ENTRY(xor_u16),
1379	ENTRY(xor_u16_locked),
1380	ENTRY(and_u16),
1381	ENTRY(and_u16_locked),
1382	ENTRY(cmp_u16),
1383	ENTRY(test_u16),
1384	ENTRY_EX(bt_u16, 1),
1385	ENTRY_EX(btc_u16, 1),
1386	ENTRY_EX(btc_u16_locked, 1),
1387	ENTRY_EX(btr_u16, 1),
1388	ENTRY_EX(btr_u16_locked, 1),
1389	ENTRY_EX(bts_u16, 1),
1390	ENTRY_EX(bts_u16_locked, 1),
1391	ENTRY_AMD( bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1392	ENTRY_INTEL(bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1393	ENTRY_AMD( bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1394	ENTRY_INTEL(bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1395	ENTRY_AMD( imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1396	ENTRY_INTEL(imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1397	ENTRY(arpl),
1398	};
1399	TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1400
1401
1402	/*
1403	* 32-bit binary operations.
1404	*/
1405	static const BINU32_T g_aBinU32[] =
1406	{
1407	ENTRY(add_u32),
1408	ENTRY(add_u32_locked),
1409	ENTRY(adc_u32),
1410	ENTRY(adc_u32_locked),
1411	ENTRY(sub_u32),
1412	ENTRY(sub_u32_locked),
1413	ENTRY(sbb_u32),
1414	ENTRY(sbb_u32_locked),
1415	ENTRY(or_u32),
1416	ENTRY(or_u32_locked),
1417	ENTRY(xor_u32),
1418	ENTRY(xor_u32_locked),
1419	ENTRY(and_u32),
1420	ENTRY(and_u32_locked),
1421	ENTRY(cmp_u32),
1422	ENTRY(test_u32),
1423	ENTRY_EX(bt_u32, 1),
1424	ENTRY_EX(btc_u32, 1),
1425	ENTRY_EX(btc_u32_locked, 1),
1426	ENTRY_EX(btr_u32, 1),
1427	ENTRY_EX(btr_u32_locked, 1),
1428	ENTRY_EX(bts_u32, 1),
1429	ENTRY_EX(bts_u32_locked, 1),
1430	ENTRY_AMD( bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1431	ENTRY_INTEL(bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1432	ENTRY_AMD( bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1433	ENTRY_INTEL(bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1434	ENTRY_AMD( imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1435	ENTRY_INTEL(imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1436	};
1437	TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1438
1439
1440	/*
1441	* 64-bit binary operations.
1442	*/
1443	static const BINU64_T g_aBinU64[] =
1444	{
1445	ENTRY(add_u64),
1446	ENTRY(add_u64_locked),
1447	ENTRY(adc_u64),
1448	ENTRY(adc_u64_locked),
1449	ENTRY(sub_u64),
1450	ENTRY(sub_u64_locked),
1451	ENTRY(sbb_u64),
1452	ENTRY(sbb_u64_locked),
1453	ENTRY(or_u64),
1454	ENTRY(or_u64_locked),
1455	ENTRY(xor_u64),
1456	ENTRY(xor_u64_locked),
1457	ENTRY(and_u64),
1458	ENTRY(and_u64_locked),
1459	ENTRY(cmp_u64),
1460	ENTRY(test_u64),
1461	ENTRY_EX(bt_u64, 1),
1462	ENTRY_EX(btc_u64, 1),
1463	ENTRY_EX(btc_u64_locked, 1),
1464	ENTRY_EX(btr_u64, 1),
1465	ENTRY_EX(btr_u64_locked, 1),
1466	ENTRY_EX(bts_u64, 1),
1467	ENTRY_EX(bts_u64_locked, 1),
1468	ENTRY_AMD( bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1469	ENTRY_INTEL(bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1470	ENTRY_AMD( bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1471	ENTRY_INTEL(bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1472	ENTRY_AMD( imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1473	ENTRY_INTEL(imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1474	};
1475	TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1476
1477
1478	/*
1479	* XCHG
1480	*/
1481	static void XchgTest(void)
1482	{
1483	if (!SubTestAndCheckIfEnabled("xchg"))
1484	return;
1485	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t pu8Mem, uint8_t pu8Reg));
1486	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t pu16Mem, uint16_t pu16Reg));
1487	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t pu32Mem, uint32_t pu32Reg));
1488	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t pu64Mem, uint64_t pu64Reg));
1489
1490	static struct
1491	{
1492	uint8_t cb; uint64_t fMask;
1493	union
1494	{
1495	uintptr_t pfn;
1496	FNIEMAIMPLXCHGU8 *pfnU8;
1497	FNIEMAIMPLXCHGU16 *pfnU16;
1498	FNIEMAIMPLXCHGU32 *pfnU32;
1499	FNIEMAIMPLXCHGU64 *pfnU64;
1500	} u;
1501	}
1502	s_aXchgWorkers[] =
1503	{
1504	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1505	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1506	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1507	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1508	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1509	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1510	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1511	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1512	};
1513	for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1514	{
1515	RTUINT64U uIn1, uIn2, uMem, uDst;
1516	uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1517	uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1518	if (uIn1.u == uIn2.u)
1519	uDst.u = uIn2.u = ~uIn2.u;
1520
1521	switch (s_aXchgWorkers[i].cb)
1522	{
1523	case 1:
1524	s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1525	s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1526	break;
1527	case 2:
1528	s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1529	s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1530	break;
1531	case 4:
1532	s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1533	s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1534	break;
1535	case 8:
1536	s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1537	s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1538	break;
1539	default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1540	}
1541
1542	if (uMem.u != uIn2.u \|\| uDst.u != uIn1.u)
1543	RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1544	}
1545	}
1546
1547
1548	/*
1549	* XADD
1550	*/
1551	static void XaddTest(void)
1552	{
1553	#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1554	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type , a_Type , uint32_t *)); \
1555	static struct \
1556	{ \
1557	const char *pszName; \
1558	FNIEMAIMPLXADDU ## a_cBits *pfn; \
1559	BINU ## a_cBits ## _TEST_T const *paTests; \
1560	uint32_t const *pcTests; \
1561	} const s_aFuncs[] = \
1562	{ \
1563	{ "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1564	g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1565	{ "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1566	g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1567	}; \
1568	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1569	{ \
1570	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1571	uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1572	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1573	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1574	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1575	{ \
1576	uint32_t fEfl = paTests[iTest].fEflIn; \
1577	a_Type uSrc = paTests[iTest].uSrcIn; \
1578	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1579	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1580	if ( fEfl != paTests[iTest].fEflOut \
1581	\|\| *g_pu ## a_cBits != paTests[iTest].uDstOut \
1582	\|\| uSrc != paTests[iTest].uDstIn) \
1583	RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1584	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1585	fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1586	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1587	} \
1588	} \
1589	} while(0)
1590	TEST_XADD(8, uint8_t, "%#04x");
1591	TEST_XADD(16, uint16_t, "%#06x");
1592	TEST_XADD(32, uint32_t, "%#010RX32");
1593	TEST_XADD(64, uint64_t, "%#010RX64");
1594	}
1595
1596
1597	/*
1598	* CMPXCHG
1599	*/
1600
1601	static void CmpXchgTest(void)
1602	{
1603	#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1604	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type , a_Type , a_Type, uint32_t *)); \
1605	static struct \
1606	{ \
1607	const char *pszName; \
1608	FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1609	PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1610	BINU ## a_cBits ## _TEST_T const *paTests; \
1611	uint32_t const *pcTests; \
1612	} const s_aFuncs[] = \
1613	{ \
1614	{ "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1615	g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1616	{ "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1617	g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1618	}; \
1619	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1620	{ \
1621	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1622	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1623	uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1624	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1625	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1626	{ \
1627	/* as is (99% likely to be negative). */ \
1628	uint32_t fEfl = paTests[iTest].fEflIn; \
1629	a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1630	a_Type uA = paTests[iTest].uDstIn; \
1631	*g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1632	a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1633	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1634	if ( fEfl != paTests[iTest].fEflOut \
1635	\|\| *g_pu ## a_cBits != uExpect \
1636	\|\| uA != paTests[iTest].uSrcIn) \
1637	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1638	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1639	uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1640	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1641	/* positive */ \
1642	uint32_t fEflExpect = paTests[iTest].fEflIn; \
1643	uA = paTests[iTest].uDstIn; \
1644	s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1645	fEfl = paTests[iTest].fEflIn; \
1646	uA = paTests[iTest].uDstIn; \
1647	*g_pu ## a_cBits = uA; \
1648	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1649	if ( fEfl != fEflExpect \
1650	\|\| *g_pu ## a_cBits != uNew \
1651	\|\| uA != paTests[iTest].uDstIn) \
1652	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1653	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1654	uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1655	EFlagsDiff(fEfl, fEflExpect)); \
1656	} \
1657	} \
1658	} while(0)
1659	TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1660	TEST_CMPXCHG(16, uint16_t, "%#06x");
1661	TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1662	#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1663	TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1664	#endif
1665	}
1666
1667	static void CmpXchg8bTest(void)
1668	{
1669	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t , PRTUINT64U, PRTUINT64U, uint32_t ));
1670	static struct
1671	{
1672	const char *pszName;
1673	FNIEMAIMPLCMPXCHG8B *pfn;
1674	} const s_aFuncs[] =
1675	{
1676	{ "cmpxchg8b", iemAImpl_cmpxchg8b },
1677	{ "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1678	};
1679	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1680	{
1681	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1682	continue;
1683	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1684	{
1685	uint64_t const uOldValue = RandU64();
1686	uint64_t const uNewValue = RandU64();
1687
1688	/* positive test. */
1689	RTUINT64U uA, uB;
1690	uB.u = uNewValue;
1691	uA.u = uOldValue;
1692	*g_pu64 = uOldValue;
1693	uint32_t fEflIn = RandEFlags();
1694	uint32_t fEfl = fEflIn;
1695	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1696	if ( fEfl != (fEflIn \| X86_EFL_ZF)
1697	\|\| *g_pu64 != uNewValue
1698	\|\| uA.u != uOldValue)
1699	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1700	iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1701	fEfl, *g_pu64, uA.u,
1702	(fEflIn \| X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
1703	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1704
1705	/* negative */
1706	uint64_t const uExpect = ~uOldValue;
1707	*g_pu64 = uExpect;
1708	uA.u = uOldValue;
1709	uB.u = uNewValue;
1710	fEfl = fEflIn = RandEFlags();
1711	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1712	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1713	\|\| *g_pu64 != uExpect
1714	\|\| uA.u != uExpect)
1715	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1716	iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1717	fEfl, *g_pu64, uA.u,
1718	(fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1719	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1720	}
1721	}
1722	}
1723
1724	static void CmpXchg16bTest(void)
1725	{
1726	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1727	static struct
1728	{
1729	const char *pszName;
1730	FNIEMAIMPLCMPXCHG16B *pfn;
1731	} const s_aFuncs[] =
1732	{
1733	{ "cmpxchg16b", iemAImpl_cmpxchg16b },
1734	{ "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1735	#if !defined(RT_ARCH_ARM64)
1736	{ "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1737	#endif
1738	};
1739	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1740	{
1741	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1742	continue;
1743	#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1744	if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1745	{
1746	RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1747	continue;
1748	}
1749	#endif
1750	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1751	{
1752	RTUINT128U const uOldValue = RandU128();
1753	RTUINT128U const uNewValue = RandU128();
1754
1755	/* positive test. */
1756	RTUINT128U uA, uB;
1757	uB = uNewValue;
1758	uA = uOldValue;
1759	*g_pu128 = uOldValue;
1760	uint32_t fEflIn = RandEFlags();
1761	uint32_t fEfl = fEflIn;
1762	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1763	if ( fEfl != (fEflIn \| X86_EFL_ZF)
1764	\|\| g_pu128->s.Lo != uNewValue.s.Lo
1765	\|\| g_pu128->s.Hi != uNewValue.s.Hi
1766	\|\| uA.s.Lo != uOldValue.s.Lo
1767	\|\| uA.s.Hi != uOldValue.s.Hi)
1768	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1769	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1770	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1771	iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1772	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1773	(fEflIn \| X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1774	EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
1775	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1776
1777	/* negative */
1778	RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1779	*g_pu128 = uExpect;
1780	uA = uOldValue;
1781	uB = uNewValue;
1782	fEfl = fEflIn = RandEFlags();
1783	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1784	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1785	\|\| g_pu128->s.Lo != uExpect.s.Lo
1786	\|\| g_pu128->s.Hi != uExpect.s.Hi
1787	\|\| uA.s.Lo != uExpect.s.Lo
1788	\|\| uA.s.Hi != uExpect.s.Hi)
1789	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1790	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1791	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1792	iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1793	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1794	(fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1795	EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1796	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1797	}
1798	}
1799	}
1800
1801
1802	/*
1803	* Double shifts.
1804	*
1805	* Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1806	*/
1807	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1808	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1809	void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1810	{ \
1811	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1812	{ \
1813	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1814	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1815	continue; \
1816	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1817	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1818	{ \
1819	a_TestType Test; \
1820	Test.fEflIn = RandEFlags(); \
1821	Test.fEflOut = Test.fEflIn; \
1822	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1823	Test.uDstOut = Test.uDstIn; \
1824	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1825	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1826	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1827	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1828	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1829	} \
1830	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1831	} \
1832	}
1833	#else
1834	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1835	#endif
1836
1837	#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1838	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1839	\
1840	static a_SubTestType const a_aSubTests[] = \
1841	{ \
1842	ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1843	ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1844	ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1845	ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1846	}; \
1847	\
1848	GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1849	\
1850	static void ShiftDblU ## a_cBits ## Test(void) \
1851	{ \
1852	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1853	{ \
1854	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1855	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1856	PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1857	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1858	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1859	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1860	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1861	{ \
1862	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1863	{ \
1864	uint32_t fEfl = paTests[iTest].fEflIn; \
1865	a_Type uDst = paTests[iTest].uDstIn; \
1866	pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1867	if ( uDst != paTests[iTest].uDstOut \
1868	\|\| fEfl != paTests[iTest].fEflOut) \
1869	RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1870	iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1871	paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1872	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1873	EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1874	else \
1875	{ \
1876	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1877	*g_pfEfl = paTests[iTest].fEflIn; \
1878	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1879	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1880	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1881	} \
1882	} \
1883	pfn = a_aSubTests[iFn].pfnNative; \
1884	} \
1885	} \
1886	}
1887	TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1888	TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1889	TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1890
1891	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1892	static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1893	{
1894	ShiftDblU16Generate(pOut, cTests);
1895	ShiftDblU32Generate(pOut, cTests);
1896	ShiftDblU64Generate(pOut, cTests);
1897	}
1898	#endif
1899
1900	static void ShiftDblTest(void)
1901	{
1902	ShiftDblU16Test();
1903	ShiftDblU32Test();
1904	ShiftDblU64Test();
1905	}
1906
1907
1908	/*
1909	* Unary operators.
1910	*
1911	* Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1912	*/
1913	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1914	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1915	void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1916	{ \
1917	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1918	{ \
1919	GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1920	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1921	{ \
1922	a_TestType Test; \
1923	Test.fEflIn = RandEFlags(); \
1924	Test.fEflOut = Test.fEflIn; \
1925	Test.uDstIn = RandU ## a_cBits(); \
1926	Test.uDstOut = Test.uDstIn; \
1927	Test.uSrcIn = 0; \
1928	Test.uMisc = 0; \
1929	g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1930	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1931	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1932	} \
1933	GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1934	} \
1935	}
1936	#else
1937	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1938	#endif
1939
1940	#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1941	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1942	static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1943	{ \
1944	ENTRY(inc_u ## a_cBits), \
1945	ENTRY(inc_u ## a_cBits ## _locked), \
1946	ENTRY(dec_u ## a_cBits), \
1947	ENTRY(dec_u ## a_cBits ## _locked), \
1948	ENTRY(not_u ## a_cBits), \
1949	ENTRY(not_u ## a_cBits ## _locked), \
1950	ENTRY(neg_u ## a_cBits), \
1951	ENTRY(neg_u ## a_cBits ## _locked), \
1952	}; \
1953	\
1954	GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1955	\
1956	static void UnaryU ## a_cBits ## Test(void) \
1957	{ \
1958	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1959	{ \
1960	if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
1961	a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1962	uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1963	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1964	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1965	{ \
1966	uint32_t fEfl = paTests[iTest].fEflIn; \
1967	a_Type uDst = paTests[iTest].uDstIn; \
1968	g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
1969	if ( uDst != paTests[iTest].uDstOut \
1970	\|\| fEfl != paTests[iTest].fEflOut) \
1971	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1972	iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
1973	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1974	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1975	else \
1976	{ \
1977	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1978	*g_pfEfl = paTests[iTest].fEflIn; \
1979	g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
1980	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1981	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1982	} \
1983	} \
1984	} \
1985	}
1986	TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
1987	TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
1988	TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
1989	TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
1990
1991	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1992	static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
1993	{
1994	UnaryU8Generate(pOut, cTests);
1995	UnaryU16Generate(pOut, cTests);
1996	UnaryU32Generate(pOut, cTests);
1997	UnaryU64Generate(pOut, cTests);
1998	}
1999	#endif
2000
2001	static void UnaryTest(void)
2002	{
2003	UnaryU8Test();
2004	UnaryU16Test();
2005	UnaryU32Test();
2006	UnaryU64Test();
2007	}
2008
2009
2010	/*
2011	* Shifts.
2012	*
2013	* Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2014	*/
2015	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2016	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2017	void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2018	{ \
2019	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2020	{ \
2021	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2022	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2023	continue; \
2024	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2025	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2026	{ \
2027	a_TestType Test; \
2028	Test.fEflIn = RandEFlags(); \
2029	Test.fEflOut = Test.fEflIn; \
2030	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2031	Test.uDstOut = Test.uDstIn; \
2032	Test.uSrcIn = 0; \
2033	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2034	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2035	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
2036	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2037	\
2038	Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK; \
2039	Test.fEflOut = Test.fEflIn; \
2040	Test.uDstOut = Test.uDstIn; \
2041	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2042	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
2043	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2044	} \
2045	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2046	} \
2047	}
2048	#else
2049	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2050	#endif
2051
2052	#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2053	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2054	static a_SubTestType const a_aSubTests[] = \
2055	{ \
2056	ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2057	ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2058	ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2059	ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2060	ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2061	ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2062	ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2063	ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2064	ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2065	ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2066	ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2067	ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2068	ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2069	ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2070	}; \
2071	\
2072	GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2073	\
2074	static void ShiftU ## a_cBits ## Test(void) \
2075	{ \
2076	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2077	{ \
2078	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2079	PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2080	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2081	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2082	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2083	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2084	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2085	{ \
2086	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2087	{ \
2088	uint32_t fEfl = paTests[iTest].fEflIn; \
2089	a_Type uDst = paTests[iTest].uDstIn; \
2090	pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2091	if ( uDst != paTests[iTest].uDstOut \
2092	\|\| fEfl != paTests[iTest].fEflOut ) \
2093	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2094	iTest, iVar == 0 ? "" : "/n", \
2095	paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2096	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2097	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2098	else \
2099	{ \
2100	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
2101	*g_pfEfl = paTests[iTest].fEflIn; \
2102	pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2103	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2104	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2105	} \
2106	} \
2107	pfn = a_aSubTests[iFn].pfnNative; \
2108	} \
2109	} \
2110	}
2111	TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2112	TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2113	TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2114	TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2115
2116	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2117	static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2118	{
2119	ShiftU8Generate(pOut, cTests);
2120	ShiftU16Generate(pOut, cTests);
2121	ShiftU32Generate(pOut, cTests);
2122	ShiftU64Generate(pOut, cTests);
2123	}
2124	#endif
2125
2126	static void ShiftTest(void)
2127	{
2128	ShiftU8Test();
2129	ShiftU16Test();
2130	ShiftU32Test();
2131	ShiftU64Test();
2132	}
2133
2134
2135	/*
2136	* Multiplication and division.
2137	*
2138	* Note! The 8-bit functions has a different format, so we need to duplicate things.
2139	* Note! Currently ignoring undefined bits.
2140	*/
2141
2142	/* U8 */
2143	TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2144	static INT_MULDIV_U8_T const g_aMulDivU8[] =
2145	{
2146	ENTRY_AMD_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
2147	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
2148	ENTRY_INTEL_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
2149	ENTRY_AMD_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
2150	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
2151	ENTRY_INTEL_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
2152	ENTRY_AMD_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2153	ENTRY_INTEL_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2154	ENTRY_AMD_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2155	ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2156	};
2157
2158	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2159	static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2160	{
2161	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2162	{
2163	if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2164	&& g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2165	continue;
2166	GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2167	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2168	{
2169	MULDIVU8_TEST_T Test;
2170	Test.fEflIn = RandEFlags();
2171	Test.fEflOut = Test.fEflIn;
2172	Test.uDstIn = RandU16Dst(iTest);
2173	Test.uDstOut = Test.uDstIn;
2174	Test.uSrcIn = RandU8Src(iTest);
2175	Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2176	RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2177	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2178	}
2179	GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2180	}
2181	}
2182	#endif
2183
2184	static void MulDivU8Test(void)
2185	{
2186	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2187	{
2188	if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2189	MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2190	uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2191	uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2192	PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2193	uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2194	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2195	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2196	{
2197	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2198	{
2199	uint32_t fEfl = paTests[iTest].fEflIn;
2200	uint16_t uDst = paTests[iTest].uDstIn;
2201	int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2202	if ( uDst != paTests[iTest].uDstOut
2203	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)
2204	\|\| rc != paTests[iTest].rc)
2205	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2206	" %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2207	"%sexpected %#08x %#06RX16 %d%s\n",
2208	iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2209	iVar ? " " : "", fEfl, uDst, rc,
2210	iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2211	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn));
2212	else
2213	{
2214	*g_pu16 = paTests[iTest].uDstIn;
2215	*g_pfEfl = paTests[iTest].fEflIn;
2216	rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2217	RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2218	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn));
2219	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2220	}
2221	}
2222	pfn = g_aMulDivU8[iFn].pfnNative;
2223	}
2224	}
2225	}
2226
2227	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2228	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2229	void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2230	{ \
2231	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2232	{ \
2233	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2234	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2235	continue; \
2236	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2237	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2238	{ \
2239	a_TestType Test; \
2240	Test.fEflIn = RandEFlags(); \
2241	Test.fEflOut = Test.fEflIn; \
2242	Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2243	Test.uDst1Out = Test.uDst1In; \
2244	Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2245	Test.uDst2Out = Test.uDst2In; \
2246	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2247	Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2248	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2249	Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2250	Test.rc, iTest); \
2251	} \
2252	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2253	} \
2254	}
2255	#else
2256	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2257	#endif
2258
2259	#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2260	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2261	static a_SubTestType const a_aSubTests [] = \
2262	{ \
2263	ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2264	ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2265	ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2266	ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2267	ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2268	ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2269	ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2270	ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2271	}; \
2272	\
2273	GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2274	\
2275	static void MulDivU ## a_cBits ## Test(void) \
2276	{ \
2277	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2278	{ \
2279	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2280	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2281	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2282	uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2283	PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2284	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2285	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2286	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2287	{ \
2288	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2289	{ \
2290	uint32_t fEfl = paTests[iTest].fEflIn; \
2291	a_Type uDst1 = paTests[iTest].uDst1In; \
2292	a_Type uDst2 = paTests[iTest].uDst2In; \
2293	int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2294	if ( uDst1 != paTests[iTest].uDst1Out \
2295	\|\| uDst2 != paTests[iTest].uDst2Out \
2296	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)\
2297	\|\| rc != paTests[iTest].rc) \
2298	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2299	" -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2300	"expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2301	iTest, iVar == 0 ? "" : "/n", \
2302	paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2303	fEfl, uDst1, uDst2, rc, \
2304	paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2305	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn), \
2306	uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2307	(fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn) ? " eflags" : ""); \
2308	else \
2309	{ \
2310	*g_pu ## a_cBits = paTests[iTest].uDst1In; \
2311	*g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2312	*g_pfEfl = paTests[iTest].fEflIn; \
2313	rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2314	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2315	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2316	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn)); \
2317	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2318	} \
2319	} \
2320	pfn = a_aSubTests[iFn].pfnNative; \
2321	} \
2322	} \
2323	}
2324	TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2325	TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2326	TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2327
2328	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2329	static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2330	{
2331	MulDivU8Generate(pOut, cTests);
2332	MulDivU16Generate(pOut, cTests);
2333	MulDivU32Generate(pOut, cTests);
2334	MulDivU64Generate(pOut, cTests);
2335	}
2336	#endif
2337
2338	static void MulDivTest(void)
2339	{
2340	MulDivU8Test();
2341	MulDivU16Test();
2342	MulDivU32Test();
2343	MulDivU64Test();
2344	}
2345
2346
2347	/*
2348	* BSWAP
2349	*/
2350	static void BswapTest(void)
2351	{
2352	if (SubTestAndCheckIfEnabled("bswap_u16"))
2353	{
2354	*g_pu32 = UINT32_C(0x12345678);
2355	iemAImpl_bswap_u16(g_pu32);
2356	#if 0
2357	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12347856), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2358	#else
2359	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12340000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2360	#endif
2361	*g_pu32 = UINT32_C(0xffff1122);
2362	iemAImpl_bswap_u16(g_pu32);
2363	#if 0
2364	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff2211), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2365	#else
2366	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff0000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2367	#endif
2368	}
2369
2370	if (SubTestAndCheckIfEnabled("bswap_u32"))
2371	{
2372	*g_pu32 = UINT32_C(0x12345678);
2373	iemAImpl_bswap_u32(g_pu32);
2374	RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2375	}
2376
2377	if (SubTestAndCheckIfEnabled("bswap_u64"))
2378	{
2379	*g_pu64 = UINT64_C(0x0123456789abcdef);
2380	iemAImpl_bswap_u64(g_pu64);
2381	RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2382	}
2383	}
2384
2385
2386
2387	/*********************************************************************************************************************************
2388	* Floating point (x87 style) *
2389	*********************************************************************************************************************************/
2390
2391	/*
2392	* FPU constant loading.
2393	*/
2394	TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2395
2396	static const FPU_LD_CONST_T g_aFpuLdConst[] =
2397	{
2398	ENTRY(fld1),
2399	ENTRY(fldl2t),
2400	ENTRY(fldl2e),
2401	ENTRY(fldpi),
2402	ENTRY(fldlg2),
2403	ENTRY(fldln2),
2404	ENTRY(fldz),
2405	};
2406
2407	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2408	static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2409	{
2410	X86FXSTATE State;
2411	RT_ZERO(State);
2412	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2413	{
2414	GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2415	for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2416	{
2417	State.FCW = RandFcw();
2418	State.FSW = RandFsw();
2419
2420	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2421	{
2422	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2423	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
2424	g_aFpuLdConst[iFn].pfn(&State, &Res);
2425	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2426	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2427	}
2428	}
2429	GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2430	}
2431	}
2432	#endif
2433
2434	static void FpuLoadConstTest(void)
2435	{
2436	/*
2437	* Inputs:
2438	* - FSW: C0, C1, C2, C3
2439	* - FCW: Exception masks, Precision control, Rounding control.
2440	*
2441	* C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2442	*/
2443	X86FXSTATE State;
2444	RT_ZERO(State);
2445	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2446	{
2447	if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2448	continue;
2449
2450	uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2451	FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2452	PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2453	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2454	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2455	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2456	{
2457	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2458	{
2459	State.FCW = paTests[iTest].fFcw;
2460	State.FSW = paTests[iTest].fFswIn;
2461	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2462	pfn(&State, &Res);
2463	if ( Res.FSW != paTests[iTest].fFswOut
2464	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2465	RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2466	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2467	Res.FSW, FormatR80(&Res.r80Result),
2468	paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2469	FswDiff(Res.FSW, paTests[iTest].fFswOut),
2470	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2471	FormatFcw(paTests[iTest].fFcw) );
2472	}
2473	pfn = g_aFpuLdConst[iFn].pfnNative;
2474	}
2475	}
2476	}
2477
2478
2479	/*
2480	* Load floating point values from memory.
2481	*/
2482	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2483	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2484	static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2485	{ \
2486	X86FXSTATE State; \
2487	RT_ZERO(State); \
2488	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2489	{ \
2490	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2491	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2492	{ \
2493	State.FCW = RandFcw(); \
2494	State.FSW = RandFsw(); \
2495	a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2496	\
2497	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2498	{ \
2499	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2500	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
2501	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2502	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2503	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2504	GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2505	} \
2506	} \
2507	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2508	} \
2509	}
2510	#else
2511	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2512	#endif
2513
2514	#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2515	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2516	typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2517	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2518	\
2519	static const a_SubTestType a_aSubTests[] = \
2520	{ \
2521	ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2522	}; \
2523	GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2524	\
2525	static void FpuLdR ## a_cBits ## Test(void) \
2526	{ \
2527	X86FXSTATE State; \
2528	RT_ZERO(State); \
2529	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2530	{ \
2531	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2532	\
2533	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2534	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2535	PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2536	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2537	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2538	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2539	{ \
2540	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2541	{ \
2542	a_rdTypeIn const InVal = paTests[iTest].InVal; \
2543	State.FCW = paTests[iTest].fFcw; \
2544	State.FSW = paTests[iTest].fFswIn; \
2545	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2546	pfn(&State, &Res, &InVal); \
2547	if ( Res.FSW != paTests[iTest].fFswOut \
2548	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2549	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2550	"%s -> fsw=%#06x %s\n" \
2551	"%s expected %#06x %s%s%s (%s)\n", \
2552	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2553	FormatR ## a_cBits(&paTests[iTest].InVal), \
2554	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2555	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2556	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2557	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2558	FormatFcw(paTests[iTest].fFcw) ); \
2559	} \
2560	pfn = a_aSubTests[iFn].pfnNative; \
2561	} \
2562	} \
2563	}
2564
2565	TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2566	TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2567	TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2568
2569	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2570	static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2571	{
2572	FpuLdR80Generate(pOut, cTests);
2573	FpuLdR64Generate(pOut, cTests);
2574	FpuLdR32Generate(pOut, cTests);
2575	}
2576	#endif
2577
2578	static void FpuLdMemTest(void)
2579	{
2580	FpuLdR80Test();
2581	FpuLdR64Test();
2582	FpuLdR32Test();
2583	}
2584
2585
2586	/*
2587	* Load integer values from memory.
2588	*/
2589	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2590	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2591	static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2592	{ \
2593	X86FXSTATE State; \
2594	RT_ZERO(State); \
2595	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2596	{ \
2597	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2598	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2599	{ \
2600	State.FCW = RandFcw(); \
2601	State.FSW = RandFsw(); \
2602	a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2603	\
2604	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2605	{ \
2606	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2607	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
2608	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2609	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2610	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2611	} \
2612	} \
2613	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2614	} \
2615	}
2616	#else
2617	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2618	#endif
2619
2620	#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2621	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2622	typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2623	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2624	\
2625	static const a_SubTestType a_aSubTests[] = \
2626	{ \
2627	ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2628	}; \
2629	GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2630	\
2631	static void FpuLdI ## a_cBits ## Test(void) \
2632	{ \
2633	X86FXSTATE State; \
2634	RT_ZERO(State); \
2635	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2636	{ \
2637	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2638	\
2639	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2640	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2641	PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2642	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2643	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2644	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2645	{ \
2646	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2647	{ \
2648	a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2649	State.FCW = paTests[iTest].fFcw; \
2650	State.FSW = paTests[iTest].fFswIn; \
2651	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2652	pfn(&State, &Res, &iInVal); \
2653	if ( Res.FSW != paTests[iTest].fFswOut \
2654	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2655	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2656	"%s -> fsw=%#06x %s\n" \
2657	"%s expected %#06x %s%s%s (%s)\n", \
2658	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2659	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2660	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2661	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2662	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2663	FormatFcw(paTests[iTest].fFcw) ); \
2664	} \
2665	pfn = a_aSubTests[iFn].pfnNative; \
2666	} \
2667	} \
2668	}
2669
2670	TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2671	TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2672	TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2673
2674	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2675	static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2676	{
2677	FpuLdI64Generate(pOut, cTests);
2678	FpuLdI32Generate(pOut, cTests);
2679	FpuLdI16Generate(pOut, cTests);
2680	}
2681	#endif
2682
2683	static void FpuLdIntTest(void)
2684	{
2685	FpuLdI64Test();
2686	FpuLdI32Test();
2687	FpuLdI16Test();
2688	}
2689
2690
2691	/*
2692	* Load binary coded decimal values from memory.
2693	*/
2694	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2695	typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2696	TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2697
2698	static const FPU_LD_D80_T g_aFpuLdD80[] =
2699	{
2700	ENTRY(fld_r80_from_d80)
2701	};
2702
2703	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2704	static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2705	{
2706	X86FXSTATE State;
2707	RT_ZERO(State);
2708	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2709	{
2710	GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2711	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2712	{
2713	State.FCW = RandFcw();
2714	State.FSW = RandFsw();
2715	RTPBCD80U InVal = RandD80Src(iTest);
2716
2717	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2718	{
2719	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2720	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
2721	g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2722	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2723	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2724	iTest, iRounding);
2725	}
2726	}
2727	GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2728	}
2729	}
2730	#endif
2731
2732	static void FpuLdD80Test(void)
2733	{
2734	X86FXSTATE State;
2735	RT_ZERO(State);
2736	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2737	{
2738	if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2739	continue;
2740
2741	uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2742	FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2743	PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2744	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2745	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2746	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2747	{
2748	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2749	{
2750	RTPBCD80U const InVal = paTests[iTest].InVal;
2751	State.FCW = paTests[iTest].fFcw;
2752	State.FSW = paTests[iTest].fFswIn;
2753	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2754	pfn(&State, &Res, &InVal);
2755	if ( Res.FSW != paTests[iTest].fFswOut
2756	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2757	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2758	"%s -> fsw=%#06x %s\n"
2759	"%s expected %#06x %s%s%s (%s)\n",
2760	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2761	FormatD80(&paTests[iTest].InVal),
2762	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2763	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2764	FswDiff(Res.FSW, paTests[iTest].fFswOut),
2765	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2766	FormatFcw(paTests[iTest].fFcw) );
2767	}
2768	pfn = g_aFpuLdD80[iFn].pfnNative;
2769	}
2770	}
2771	}
2772
2773
2774	/*
2775	* Store values floating point values to memory.
2776	*/
2777	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2778	static const RTFLOAT80U g_aFpuStR32Specials[] =
2779	{
2780	RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2781	RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2782	RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2783	RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2784	};
2785	static const RTFLOAT80U g_aFpuStR64Specials[] =
2786	{
2787	RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2788	RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2789	RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2790	RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2791	RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2792	};
2793	static const RTFLOAT80U g_aFpuStR80Specials[] =
2794	{
2795	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2796	};
2797	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2798	static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2799	{ \
2800	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2801	X86FXSTATE State; \
2802	RT_ZERO(State); \
2803	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2804	{ \
2805	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2806	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2807	{ \
2808	uint16_t const fFcw = RandFcw(); \
2809	State.FSW = RandFsw(); \
2810	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2811	: g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2812	\
2813	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2814	{ \
2815	/* PC doesn't influence these, so leave as is. */ \
2816	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2817	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
2818	{ \
2819	uint16_t uFswOut = 0; \
2820	a_rdType OutVal; \
2821	RT_ZERO(OutVal); \
2822	memset(&OutVal, 0xfe, sizeof(OutVal)); \
2823	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
2824	\| (iRounding << X86_FCW_RC_SHIFT); \
2825	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
2826	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
2827	a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2828	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2829	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2830	GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2831	} \
2832	} \
2833	} \
2834	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2835	} \
2836	}
2837	#else
2838	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2839	#endif
2840
2841	#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2842	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2843	PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2844	typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2845	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2846	\
2847	static const a_SubTestType a_aSubTests[] = \
2848	{ \
2849	ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2850	}; \
2851	GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2852	\
2853	static void FpuStR ## a_cBits ## Test(void) \
2854	{ \
2855	X86FXSTATE State; \
2856	RT_ZERO(State); \
2857	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2858	{ \
2859	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2860	\
2861	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2862	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2863	PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2864	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2865	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2866	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2867	{ \
2868	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2869	{ \
2870	RTFLOAT80U const InVal = paTests[iTest].InVal; \
2871	uint16_t uFswOut = 0; \
2872	a_rdType OutVal; \
2873	RT_ZERO(OutVal); \
2874	memset(&OutVal, 0xfe, sizeof(OutVal)); \
2875	State.FCW = paTests[iTest].fFcw; \
2876	State.FSW = paTests[iTest].fFswIn; \
2877	pfn(&State, &uFswOut, &OutVal, &InVal); \
2878	if ( uFswOut != paTests[iTest].fFswOut \
2879	\|\| !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2880	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2881	"%s -> fsw=%#06x %s\n" \
2882	"%s expected %#06x %s%s%s (%s)\n", \
2883	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2884	FormatR80(&paTests[iTest].InVal), \
2885	iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2886	iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2887	FswDiff(uFswOut, paTests[iTest].fFswOut), \
2888	!RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2889	FormatFcw(paTests[iTest].fFcw) ); \
2890	} \
2891	pfn = a_aSubTests[iFn].pfnNative; \
2892	} \
2893	} \
2894	}
2895
2896	TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2897	TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2898	TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2899
2900	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2901	static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2902	{
2903	FpuStR80Generate(pOut, cTests);
2904	FpuStR64Generate(pOut, cTests);
2905	FpuStR32Generate(pOut, cTests);
2906	}
2907	#endif
2908
2909	static void FpuStMemTest(void)
2910	{
2911	FpuStR80Test();
2912	FpuStR64Test();
2913	FpuStR32Test();
2914	}
2915
2916
2917	/*
2918	* Store integer values to memory or register.
2919	*/
2920	TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2921	TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2922	TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2923
2924	static const FPU_ST_I16_T g_aFpuStI16[] =
2925	{
2926	ENTRY(fist_r80_to_i16),
2927	ENTRY_AMD( fistt_r80_to_i16, 0),
2928	ENTRY_INTEL(fistt_r80_to_i16, 0),
2929	};
2930	static const FPU_ST_I32_T g_aFpuStI32[] =
2931	{
2932	ENTRY(fist_r80_to_i32),
2933	ENTRY(fistt_r80_to_i32),
2934	};
2935	static const FPU_ST_I64_T g_aFpuStI64[] =
2936	{
2937	ENTRY(fist_r80_to_i64),
2938	ENTRY(fistt_r80_to_i64),
2939	};
2940
2941	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2942	static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2943	{
2944	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2945	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2946	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2947	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2948	RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2949	RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2950	RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2951	RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2952	RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2953	RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2954	RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2955	RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2956	RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2957	RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2958	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2959	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2960	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2961	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2962	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2963	RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2964	RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2965	RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2966	RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2967	RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
2968	RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2969	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
2970	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
2971	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
2972	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
2973	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
2974	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
2975	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
2976	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2977	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2978	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2979	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2980	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2981	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2982	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2983	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2984	RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2985	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2986	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2987	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2988	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2989	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2990	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
2991	};
2992	static const RTFLOAT80U g_aFpuStI32Specials[] =
2993	{
2994	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2995	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2996	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2997	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2998	RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2999	RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3000	RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3001	RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3002	RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3003	RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3004	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3005	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3006	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3007	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3008	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3009	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3010	};
3011	static const RTFLOAT80U g_aFpuStI64Specials[] =
3012	{
3013	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3014	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3015	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3016	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3017	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3018	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3019	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3020	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3021	RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3022	RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3023	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3024	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3025	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3026	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3027	RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3028	RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3029	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3030	};
3031
3032	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3033	static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
3034	{ \
3035	X86FXSTATE State; \
3036	RT_ZERO(State); \
3037	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3038	{ \
3039	PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3040	? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3041	PRTSTREAM pOutFn = pOut; \
3042	if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
3043	{ \
3044	if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3045	continue; \
3046	pOutFn = pOutCpu; \
3047	} \
3048	\
3049	GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
3050	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3051	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3052	{ \
3053	uint16_t const fFcw = RandFcw(); \
3054	State.FSW = RandFsw(); \
3055	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3056	: g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3057	\
3058	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3059	{ \
3060	/* PC doesn't influence these, so leave as is. */ \
3061	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3062	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
3063	{ \
3064	uint16_t uFswOut = 0; \
3065	a_iType iOutVal = ~(a_iType)2; \
3066	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
3067	\| (iRounding << X86_FCW_RC_SHIFT); \
3068	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
3069	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
3070	pfn(&State, &uFswOut, &iOutVal, &InVal); \
3071	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3072	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3073	GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
3074	} \
3075	} \
3076	} \
3077	GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
3078	} \
3079	}
3080	#else
3081	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3082	#endif
3083
3084	#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3085	GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3086	\
3087	static void FpuStI ## a_cBits ## Test(void) \
3088	{ \
3089	X86FXSTATE State; \
3090	RT_ZERO(State); \
3091	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3092	{ \
3093	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3094	\
3095	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3096	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3097	PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3098	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3099	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3100	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3101	{ \
3102	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3103	{ \
3104	RTFLOAT80U const InVal = paTests[iTest].InVal; \
3105	uint16_t uFswOut = 0; \
3106	a_iType iOutVal = ~(a_iType)2; \
3107	State.FCW = paTests[iTest].fFcw; \
3108	State.FSW = paTests[iTest].fFswIn; \
3109	pfn(&State, &uFswOut, &iOutVal, &InVal); \
3110	if ( uFswOut != paTests[iTest].fFswOut \
3111	\|\| iOutVal != paTests[iTest].iOutVal) \
3112	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3113	"%s -> fsw=%#06x " a_szFmt "\n" \
3114	"%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3115	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3116	FormatR80(&paTests[iTest].InVal), \
3117	iVar ? " " : "", uFswOut, iOutVal, \
3118	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3119	FswDiff(uFswOut, paTests[iTest].fFswOut), \
3120	iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3121	} \
3122	pfn = a_aSubTests[iFn].pfnNative; \
3123	} \
3124	} \
3125	}
3126
3127	//fistt_r80_to_i16 diffs for AMD, of course :-)
3128
3129	TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3130	TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3131	TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3132
3133	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3134	static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3135	{
3136	FpuStI64Generate(pOut, pOutCpu, cTests);
3137	FpuStI32Generate(pOut, pOutCpu, cTests);
3138	FpuStI16Generate(pOut, pOutCpu, cTests);
3139	}
3140	#endif
3141
3142	static void FpuStIntTest(void)
3143	{
3144	FpuStI64Test();
3145	FpuStI32Test();
3146	FpuStI16Test();
3147	}
3148
3149
3150	/*
3151	* Store as packed BCD value (memory).
3152	*/
3153	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3154	typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3155	TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3156
3157	static const FPU_ST_D80_T g_aFpuStD80[] =
3158	{
3159	ENTRY(fst_r80_to_d80),
3160	};
3161
3162	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3163	static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3164	{
3165	static RTFLOAT80U const s_aSpecials[] =
3166	{
3167	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3168	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3169	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3170	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3171	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3172	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3173	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3174	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3175	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3176	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3177	RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3178	RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3179	};
3180
3181	X86FXSTATE State;
3182	RT_ZERO(State);
3183	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3184	{
3185	GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3186	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3187	{
3188	uint16_t const fFcw = RandFcw();
3189	State.FSW = RandFsw();
3190	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3191
3192	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3193	{
3194	/* PC doesn't influence these, so leave as is. */
3195	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3196	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/)
3197	{
3198	uint16_t uFswOut = 0;
3199	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3200	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM))
3201	\| (iRounding << X86_FCW_RC_SHIFT);
3202	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/
3203	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT;
3204	g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3205	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3206	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3207	GenFormatD80(&OutVal), iTest, iRounding, iMask);
3208	}
3209	}
3210	}
3211	GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3212	}
3213	}
3214	#endif
3215
3216
3217	static void FpuStD80Test(void)
3218	{
3219	X86FXSTATE State;
3220	RT_ZERO(State);
3221	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3222	{
3223	if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3224	continue;
3225
3226	uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3227	FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3228	PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3229	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3230	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3231	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3232	{
3233	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3234	{
3235	RTFLOAT80U const InVal = paTests[iTest].InVal;
3236	uint16_t uFswOut = 0;
3237	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3238	State.FCW = paTests[iTest].fFcw;
3239	State.FSW = paTests[iTest].fFswIn;
3240	pfn(&State, &uFswOut, &OutVal, &InVal);
3241	if ( uFswOut != paTests[iTest].fFswOut
3242	\|\| !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3243	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3244	"%s -> fsw=%#06x %s\n"
3245	"%s expected %#06x %s%s%s (%s)\n",
3246	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3247	FormatR80(&paTests[iTest].InVal),
3248	iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3249	iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3250	FswDiff(uFswOut, paTests[iTest].fFswOut),
3251	RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3252	FormatFcw(paTests[iTest].fFcw) );
3253	}
3254	pfn = g_aFpuStD80[iFn].pfnNative;
3255	}
3256	}
3257	}
3258
3259
3260
3261	/*********************************************************************************************************************************
3262	* x87 FPU Binary Operations *
3263	*********************************************************************************************************************************/
3264
3265	/*
3266	* Binary FPU operations on two 80-bit floating point values.
3267	*/
3268	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3269	enum { kFpuBinaryHint_fprem = 1, };
3270
3271	static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3272	{
3273	ENTRY(fadd_r80_by_r80),
3274	ENTRY(fsub_r80_by_r80),
3275	ENTRY(fsubr_r80_by_r80),
3276	ENTRY(fmul_r80_by_r80),
3277	ENTRY(fdiv_r80_by_r80),
3278	ENTRY(fdivr_r80_by_r80),
3279	ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3280	ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3281	ENTRY(fscale_r80_by_r80),
3282	ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3283	ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3284	ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3285	ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3286	ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3287	ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3288	};
3289
3290	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3291	static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3292	{
3293	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3294
3295	static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3296	{
3297	{ RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3298	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3299	{ RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3300	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3301	{ RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3302	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3303	{ RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3304	RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3305	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3306	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3307	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3308	RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3309	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3310	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3311	/* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3312	once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3313	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3314	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3315	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3316	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3317	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3318	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3319	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3320	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3321	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3322	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3323	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3324	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3325	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3326	RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3327	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3328	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3329	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3330	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3331	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3332	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3333	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3334	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3335	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3336	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3337	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3338	RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3339	/* fscale: Negative variants for the essentials of the above. */
3340	{ RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3341	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3342	{ RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3343	RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3344	{ RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3345	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3346	{ RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3347	RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3348	/* fscale: Some fun with denormals and pseudo-denormals. */
3349	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3350	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3351	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3352	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3353	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3354	{ RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3355	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3356	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3357	{ RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3358	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3359	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3360	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3361	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3362	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3363	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3364	};
3365
3366	X86FXSTATE State;
3367	RT_ZERO(State);
3368	uint32_t cMinNormalPairs = (cTests - 144) / 4;
3369	uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3370	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3371	{
3372	PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3373	PRTSTREAM pOutFn = pOut;
3374	if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3375	{
3376	if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3377	continue;
3378	pOutFn = pOutCpu;
3379	}
3380
3381	GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3382	uint32_t iTestOutput = 0;
3383	uint32_t cNormalInputPairs = 0;
3384	uint32_t cTargetRangeInputs = 0;
3385	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3386	{
3387	RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3388	RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3389	bool fTargetRange = false;
3390	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3391	{
3392	cNormalInputPairs++;
3393	if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3394	&& (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3395	cTargetRangeInputs += fTargetRange = true;
3396	else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3397	if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3398	{ /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3399	InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3400	InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3401	cTargetRangeInputs += fTargetRange = true;
3402	}
3403	}
3404	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3405	{
3406	iTest -= 1;
3407	continue;
3408	}
3409
3410	uint16_t const fFcwExtra = 0;
3411	uint16_t const fFcw = RandFcw();
3412	State.FSW = RandFsw();
3413
3414	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3415	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3416	{
3417	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3418	\| (iRounding << X86_FCW_RC_SHIFT)
3419	\| (iPrecision << X86_FCW_PC_SHIFT)
3420	\| X86_FCW_MASK_ALL;
3421	IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3422	pfn(&State, &ResM, &InVal1, &InVal2);
3423	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3424	State.FCW \| fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3425	GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3426
3427	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3428	IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3429	pfn(&State, &ResU, &InVal1, &InVal2);
3430	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3431	State.FCW \| fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3432	GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3433
3434	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3435	if (fXcpt)
3436	{
3437	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
3438	IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3439	pfn(&State, &Res1, &InVal1, &InVal2);
3440	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3441	State.FCW \| fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3442	GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3443	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3444	{
3445	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
3446	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
3447	IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3448	pfn(&State, &Res2, &InVal1, &InVal2);
3449	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3450	State.FCW \| fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3451	GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3452	}
3453	if (!RT_IS_POWER_OF_TWO(fXcpt))
3454	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3455	if (fUnmasked & fXcpt)
3456	{
3457	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
3458	IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3459	pfn(&State, &Res3, &InVal1, &InVal2);
3460	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3461	State.FCW \| fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3462	GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3463	}
3464	}
3465
3466	/* If the values are in range and caused no exceptions, do the whole series of
3467	partial reminders till we get the non-partial one or run into an exception. */
3468	if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3469	{
3470	IEMFPURESULT ResPrev = ResM;
3471	for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 \| X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
3472	{
3473	State.FCW = State.FCW \| X86_FCW_MASK_ALL;
3474	State.FSW = ResPrev.FSW;
3475	IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3476	pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
3477	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
3478	State.FCW \| fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
3479	GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
3480	iTest, iRounding, iPrecision, i + 1, iTestOutput++);
3481	ResPrev = ResSeq;
3482	}
3483	}
3484	}
3485	}
3486	GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3487	}
3488	}
3489	#endif
3490
3491
3492	static void FpuBinaryR80Test(void)
3493	{
3494	X86FXSTATE State;
3495	RT_ZERO(State);
3496	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3497	{
3498	if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3499	continue;
3500
3501	uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3502	FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3503	PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3504	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3505	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3506	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3507	{
3508	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3509	{
3510	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3511	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3512	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3513	State.FCW = paTests[iTest].fFcw;
3514	State.FSW = paTests[iTest].fFswIn;
3515	pfn(&State, &Res, &InVal1, &InVal2);
3516	if ( Res.FSW != paTests[iTest].fFswOut
3517	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3518	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3519	"%s -> fsw=%#06x %s\n"
3520	"%s expected %#06x %s%s%s (%s)\n",
3521	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3522	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3523	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3524	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3525	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3526	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3527	FormatFcw(paTests[iTest].fFcw) );
3528	}
3529	pfn = g_aFpuBinaryR80[iFn].pfnNative;
3530	}
3531	}
3532	}
3533
3534
3535	/*
3536	* Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3537	*/
3538	#define int64_t_IS_NORMAL(a) 1
3539	#define int32_t_IS_NORMAL(a) 1
3540	#define int16_t_IS_NORMAL(a) 1
3541
3542	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3543	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3544	{
3545	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3546	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3547	};
3548	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3549	{
3550	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3551	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3552	};
3553	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3554	{
3555	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3556	};
3557	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3558	{
3559	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3560	};
3561
3562	# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3563	static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3564	{ \
3565	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3566	\
3567	X86FXSTATE State; \
3568	RT_ZERO(State); \
3569	uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3570	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3571	{ \
3572	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3573	uint32_t cNormalInputPairs = 0; \
3574	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3575	{ \
3576	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3577	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3578	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3579	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3580	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3581	cNormalInputPairs++; \
3582	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3583	{ \
3584	iTest -= 1; \
3585	continue; \
3586	} \
3587	\
3588	uint16_t const fFcw = RandFcw(); \
3589	State.FSW = RandFsw(); \
3590	\
3591	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3592	{ \
3593	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3594	{ \
3595	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3596	{ \
3597	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL)) \
3598	\| (iRounding << X86_FCW_RC_SHIFT) \
3599	\| (iPrecision << X86_FCW_PC_SHIFT) \
3600	\| iMask; \
3601	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3602	a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3603	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3604	State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3605	GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3606	} \
3607	} \
3608	} \
3609	} \
3610	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3611	} \
3612	}
3613	#else
3614	# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3615	#endif
3616
3617	#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3618	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3619	\
3620	static const a_SubTestType a_aSubTests[] = \
3621	{ \
3622	ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3623	ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3624	ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3625	ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3626	ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3627	ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3628	}; \
3629	\
3630	GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3631	\
3632	static void FpuBinary ## a_UpBits ## Test(void) \
3633	{ \
3634	X86FXSTATE State; \
3635	RT_ZERO(State); \
3636	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3637	{ \
3638	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3639	\
3640	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3641	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3642	PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3643	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3644	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3645	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3646	{ \
3647	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3648	{ \
3649	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3650	a_Type2 const InVal2 = paTests[iTest].InVal2; \
3651	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3652	State.FCW = paTests[iTest].fFcw; \
3653	State.FSW = paTests[iTest].fFswIn; \
3654	pfn(&State, &Res, &InVal1, &InVal2); \
3655	if ( Res.FSW != paTests[iTest].fFswOut \
3656	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3657	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3658	"%s -> fsw=%#06x %s\n" \
3659	"%s expected %#06x %s%s%s (%s)\n", \
3660	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3661	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3662	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3663	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3664	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3665	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3666	FormatFcw(paTests[iTest].fFcw) ); \
3667	} \
3668	pfn = a_aSubTests[iFn].pfnNative; \
3669	} \
3670	} \
3671	}
3672
3673	TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3674	TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3675	TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3676	TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3677
3678
3679	/*
3680	* Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3681	*/
3682	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3683	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3684	{
3685	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3686	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3687	};
3688	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3689	{
3690	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3691	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3692	};
3693	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3694	{
3695	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3696	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3697	};
3698	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3699	{
3700	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3701	};
3702	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3703	{
3704	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3705	};
3706
3707	# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3708	static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3709	{ \
3710	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3711	\
3712	X86FXSTATE State; \
3713	RT_ZERO(State); \
3714	uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3715	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3716	{ \
3717	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3718	uint32_t cNormalInputPairs = 0; \
3719	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3720	{ \
3721	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3722	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3723	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3724	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3725	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3726	cNormalInputPairs++; \
3727	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3728	{ \
3729	iTest -= 1; \
3730	continue; \
3731	} \
3732	\
3733	uint16_t const fFcw = RandFcw(); \
3734	State.FSW = RandFsw(); \
3735	\
3736	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3737	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3738	{ \
3739	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask; \
3740	uint16_t fFswOut = 0; \
3741	a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3742	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3743	State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3744	iTest, iMask ? 'c' : 'u'); \
3745	} \
3746	} \
3747	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3748	} \
3749	}
3750	#else
3751	# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3752	#endif
3753
3754	#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3755	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3756	\
3757	static const a_SubTestType a_aSubTests[] = \
3758	{ \
3759	__VA_ARGS__ \
3760	}; \
3761	\
3762	GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3763	\
3764	static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3765	{ \
3766	X86FXSTATE State; \
3767	RT_ZERO(State); \
3768	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3769	{ \
3770	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3771	\
3772	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3773	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3774	PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3775	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3776	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3777	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3778	{ \
3779	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3780	{ \
3781	uint16_t fFswOut = 0; \
3782	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3783	a_Type2 const InVal2 = paTests[iTest].InVal2; \
3784	State.FCW = paTests[iTest].fFcw; \
3785	State.FSW = paTests[iTest].fFswIn; \
3786	pfn(&State, &fFswOut, &InVal1, &InVal2); \
3787	if (fFswOut != paTests[iTest].fFswOut) \
3788	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3789	"%s -> fsw=%#06x\n" \
3790	"%s expected %#06x %s (%s)\n", \
3791	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3792	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3793	iVar ? " " : "", fFswOut, \
3794	iVar ? " " : "", paTests[iTest].fFswOut, \
3795	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3796	} \
3797	pfn = a_aSubTests[iFn].pfnNative; \
3798	} \
3799	} \
3800	}
3801
3802	TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3803	TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3804	TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3805	TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3806	TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3807
3808
3809	/*
3810	* Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3811	*/
3812	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3813
3814	static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3815	{
3816	ENTRY(fcomi_r80_by_r80),
3817	ENTRY(fucomi_r80_by_r80),
3818	};
3819
3820	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3821	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3822	{
3823	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3824	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3825	};
3826
3827	static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3828	{
3829	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3830
3831	X86FXSTATE State;
3832	RT_ZERO(State);
3833	uint32_t cMinNormalPairs = (cTests - 144) / 4;
3834	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3835	{
3836	GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3837	uint32_t cNormalInputPairs = 0;
3838	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3839	{
3840	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3841	RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3842	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3843	cNormalInputPairs++;
3844	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3845	{
3846	iTest -= 1;
3847	continue;
3848	}
3849
3850	uint16_t const fFcw = RandFcw();
3851	State.FSW = RandFsw();
3852
3853	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3854	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3855	{
3856	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask;
3857	uint16_t uFswOut = 0;
3858	uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3859	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3860	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3861	iTest, iMask ? 'c' : 'u');
3862	}
3863	}
3864	GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3865	}
3866	}
3867	#endif /TSTIEMAIMPL_WITH_GENERATOR/
3868
3869	static void FpuBinaryEflR80Test(void)
3870	{
3871	X86FXSTATE State;
3872	RT_ZERO(State);
3873	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3874	{
3875	if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3876	continue;
3877
3878	uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3879	FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3880	PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3881	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3882	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3883	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3884	{
3885	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3886	{
3887	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3888	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3889	State.FCW = paTests[iTest].fFcw;
3890	State.FSW = paTests[iTest].fFswIn;
3891	uint16_t uFswOut = 0;
3892	uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3893	if ( uFswOut != paTests[iTest].fFswOut
3894	\|\| fEflOut != paTests[iTest].fEflOut)
3895	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3896	"%s -> fsw=%#06x efl=%#08x\n"
3897	"%s expected %#06x %#08x %s%s (%s)\n",
3898	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3899	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3900	iVar ? " " : "", uFswOut, fEflOut,
3901	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3902	FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
3903	FormatFcw(paTests[iTest].fFcw));
3904	}
3905	pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3906	}
3907	}
3908	}
3909
3910
3911	/*********************************************************************************************************************************
3912	* x87 FPU Unary Operations *
3913	*********************************************************************************************************************************/
3914
3915	/*
3916	* Unary FPU operations on one 80-bit floating point value.
3917	*
3918	* Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3919	* a rounding error or not.
3920	*/
3921	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3922
3923	enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /probably not accurate, but need impl to know/, kUnary_Rounding_F2xm1 };
3924	static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3925	{
3926	ENTRY_EX( fabs_r80, kUnary_Accurate),
3927	ENTRY_EX( fchs_r80, kUnary_Accurate),
3928	ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3929	ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3930	ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3931	ENTRY_EX( frndint_r80, kUnary_Accurate),
3932	ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3933	ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
3934	ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
3935	ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
3936	};
3937
3938	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3939
3940	static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3941	{
3942	if ( enmKind == kUnary_Rounding_F2xm1
3943	&& RTFLOAT80U_IS_NORMAL(pr80Val)
3944	&& pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3945	&& pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
3946	return true;
3947	return false;
3948	}
3949
3950	static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3951	{
3952	static RTFLOAT80U const s_aSpecials[] =
3953	{
3954	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
3955	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
3956	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
3957	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
3958	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
3959	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
3960	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
3961	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
3962	RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
3963	RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
3964	};
3965	X86FXSTATE State;
3966	RT_ZERO(State);
3967	uint32_t cMinNormals = cTests / 4;
3968	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3969	{
3970	PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
3971	PRTSTREAM pOutFn = pOut;
3972	if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3973	{
3974	if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3975	continue;
3976	pOutFn = pOutCpu;
3977	}
3978
3979	GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3980	uint32_t iTestOutput = 0;
3981	uint32_t cNormalInputs = 0;
3982	uint32_t cTargetRangeInputs = 0;
3983	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3984	{
3985	RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
3986	if (RTFLOAT80U_IS_NORMAL(&InVal))
3987	{
3988	if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
3989	{
3990	unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
3991	? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 / : RTFLOAT80U_EXP_BIAS + 63 + 1 / 2^64..2^-64 */;
3992	unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
3993	if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
3994	cTargetRangeInputs++;
3995	else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
3996	{
3997	InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
3998	cTargetRangeInputs++;
3999	}
4000	}
4001	cNormalInputs++;
4002	}
4003	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4004	{
4005	iTest -= 1;
4006	continue;
4007	}
4008
4009	uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4010	uint16_t const fFcw = RandFcw();
4011	State.FSW = RandFsw();
4012
4013	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4014	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4015	{
4016	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
4017	\| (iRounding << X86_FCW_RC_SHIFT)
4018	\| (iPrecision << X86_FCW_PC_SHIFT)
4019	\| X86_FCW_MASK_ALL;
4020	IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4021	pfn(&State, &ResM, &InVal);
4022	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4023	State.FCW \| fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
4024	GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4025
4026	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4027	IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4028	pfn(&State, &ResU, &InVal);
4029	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4030	State.FCW \| fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
4031	GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4032
4033	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4034	if (fXcpt)
4035	{
4036	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4037	IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4038	pfn(&State, &Res1, &InVal);
4039	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4040	State.FCW \| fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
4041	GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4042	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4043	{
4044	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
4045	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4046	IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4047	pfn(&State, &Res2, &InVal);
4048	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4049	State.FCW \| fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
4050	GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4051	}
4052	if (!RT_IS_POWER_OF_TWO(fXcpt))
4053	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4054	if (fUnmasked & fXcpt)
4055	{
4056	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
4057	IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4058	pfn(&State, &Res3, &InVal);
4059	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4060	State.FCW \| fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
4061	GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4062	}
4063	}
4064	}
4065	}
4066	GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
4067	}
4068	}
4069	#endif
4070
4071	static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4072	{
4073	if (fFcw1 == fFcw2)
4074	return true;
4075	if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4076	{
4077	*pfRndErr = true;
4078	return true;
4079	}
4080	return false;
4081	}
4082
4083	static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4084	{
4085	if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4086	return true;
4087	if ( fRndErrOk
4088	&& pr80Val1->s.fSign == pr80Val2->s.fSign)
4089	{
4090	if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4091	&& ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4092	? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4093	: pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4094	\|\|
4095	( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4096	&& pr80Val1->s.uMantissa == UINT64_MAX
4097	&& pr80Val2->s.uMantissa == RT_BIT_64(63))
4098	\|\|
4099	( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4100	&& pr80Val2->s.uMantissa == UINT64_MAX
4101	&& pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4102	{
4103	*pfRndErr = true;
4104	return true;
4105	}
4106	}
4107	return false;
4108	}
4109
4110
4111	static void FpuUnaryR80Test(void)
4112	{
4113	X86FXSTATE State;
4114	RT_ZERO(State);
4115	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4116	{
4117	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
4118	continue;
4119
4120	uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4121	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4122	PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4123	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4124	uint32_t cRndErrs = 0;
4125	uint32_t cPossibleRndErrs = 0;
4126	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4127	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4128	{
4129	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4130	{
4131	RTFLOAT80U const InVal = paTests[iTest].InVal;
4132	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4133	bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4134	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4135	State.FSW = paTests[iTest].fFswIn;
4136	pfn(&State, &Res, &InVal);
4137	bool fRndErr = false;
4138	if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4139	\|\| !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4140	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4141	"%s -> fsw=%#06x %s\n"
4142	"%s expected %#06x %s%s%s%s (%s)\n",
4143	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4144	FormatR80(&paTests[iTest].InVal),
4145	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4146	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4147	FswDiff(Res.FSW, paTests[iTest].fFswOut),
4148	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4149	fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4150	cRndErrs += fRndErr;
4151	cPossibleRndErrs += fRndErrOk;
4152	}
4153	pfn = g_aFpuUnaryR80[iFn].pfnNative;
4154	}
4155	if (cPossibleRndErrs > 0)
4156	RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4157	}
4158	}
4159
4160
4161	/*
4162	* Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4163	*/
4164	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4165
4166	static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4167	{
4168	ENTRY(ftst_r80),
4169	ENTRY_EX(fxam_r80, 1),
4170	};
4171
4172	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4173	static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4174	{
4175	static RTFLOAT80U const s_aSpecials[] =
4176	{
4177	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4178	};
4179
4180	X86FXSTATE State;
4181	RT_ZERO(State);
4182	uint32_t cMinNormals = cTests / 4;
4183	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4184	{
4185	bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4186	PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4187	PRTSTREAM pOutFn = pOut;
4188	if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4189	{
4190	if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4191	continue;
4192	pOutFn = pOutCpu;
4193	}
4194	State.FTW = 0;
4195
4196	GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4197	uint32_t cNormalInputs = 0;
4198	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4199	{
4200	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4201	if (RTFLOAT80U_IS_NORMAL(&InVal))
4202	cNormalInputs++;
4203	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4204	{
4205	iTest -= 1;
4206	continue;
4207	}
4208
4209	uint16_t const fFcw = RandFcw();
4210	State.FSW = RandFsw();
4211	if (!fIsFxam)
4212	{
4213	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4214	{
4215	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4216	{
4217	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4218	{
4219	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
4220	\| (iRounding << X86_FCW_RC_SHIFT)
4221	\| (iPrecision << X86_FCW_PC_SHIFT)
4222	\| iMask;
4223	uint16_t fFswOut = 0;
4224	pfn(&State, &fFswOut, &InVal);
4225	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4226	State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4227	iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4228	}
4229	}
4230	}
4231	}
4232	else
4233	{
4234	uint16_t fFswOut = 0;
4235	uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4236	State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4237	State.FCW = fFcw;
4238	pfn(&State, &fFswOut, &InVal);
4239	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4240	fFcw \| fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4241	}
4242	}
4243	GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4244	}
4245	}
4246	#endif
4247
4248
4249	static void FpuUnaryFswR80Test(void)
4250	{
4251	X86FXSTATE State;
4252	RT_ZERO(State);
4253	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4254	{
4255	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4256	continue;
4257
4258	uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4259	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4260	PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4261	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4262	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4263	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4264	{
4265	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4266	{
4267	RTFLOAT80U const InVal = paTests[iTest].InVal;
4268	uint16_t fFswOut = 0;
4269	State.FSW = paTests[iTest].fFswIn;
4270	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4271	State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4272	pfn(&State, &fFswOut, &InVal);
4273	if (fFswOut != paTests[iTest].fFswOut)
4274	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4275	"%s -> fsw=%#06x\n"
4276	"%s expected %#06x %s (%s%s)\n",
4277	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4278	FormatR80(&paTests[iTest].InVal),
4279	iVar ? " " : "", fFswOut,
4280	iVar ? " " : "", paTests[iTest].fFswOut,
4281	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4282	paTests[iTest].fFcw & 0x80 ? " empty" : "");
4283	}
4284	pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4285	}
4286	}
4287	}
4288
4289	/*
4290	* Unary FPU operations on one 80-bit floating point value, but with two outputs.
4291	*/
4292	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4293
4294	static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4295	{
4296	ENTRY(fxtract_r80_r80),
4297	ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4298	ENTRY_INTEL(fptan_r80_r80, 0),
4299	ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4300	ENTRY_INTEL(fsincos_r80_r80, 0),
4301	};
4302
4303	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4304	static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4305	{
4306	static RTFLOAT80U const s_aSpecials[] =
4307	{
4308	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4309	};
4310
4311	X86FXSTATE State;
4312	RT_ZERO(State);
4313	uint32_t cMinNormals = cTests / 4;
4314	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4315	{
4316	PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4317	PRTSTREAM pOutFn = pOut;
4318	if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4319	{
4320	if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4321	continue;
4322	pOutFn = pOutCpu;
4323	}
4324
4325	GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4326	uint32_t iTestOutput = 0;
4327	uint32_t cNormalInputs = 0;
4328	uint32_t cTargetRangeInputs = 0;
4329	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4330	{
4331	RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4332	if (RTFLOAT80U_IS_NORMAL(&InVal))
4333	{
4334	if (iFn != 0)
4335	{
4336	unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4337	unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4338	if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4339	cTargetRangeInputs++;
4340	else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4341	{
4342	InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4343	cTargetRangeInputs++;
4344	}
4345	}
4346	cNormalInputs++;
4347	}
4348	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4349	{
4350	iTest -= 1;
4351	continue;
4352	}
4353
4354	uint16_t const fFcwExtra = 0; /* for rounding error indication */
4355	uint16_t const fFcw = RandFcw();
4356	State.FSW = RandFsw();
4357
4358	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4359	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4360	{
4361	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
4362	\| (iRounding << X86_FCW_RC_SHIFT)
4363	\| (iPrecision << X86_FCW_PC_SHIFT)
4364	\| X86_FCW_MASK_ALL;
4365	IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4366	pfn(&State, &ResM, &InVal);
4367	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4368	State.FCW \| fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4369	GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4370
4371	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4372	IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4373	pfn(&State, &ResU, &InVal);
4374	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4375	State.FCW \| fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4376	GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4377
4378	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4379	if (fXcpt)
4380	{
4381	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4382	IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4383	pfn(&State, &Res1, &InVal);
4384	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4385	State.FCW \| fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4386	GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4387	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4388	{
4389	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
4390	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4391	IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4392	pfn(&State, &Res2, &InVal);
4393	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4394	State.FCW \| fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4395	GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4396	}
4397	if (!RT_IS_POWER_OF_TWO(fXcpt))
4398	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4399	if (fUnmasked & fXcpt)
4400	{
4401	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
4402	IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4403	pfn(&State, &Res3, &InVal);
4404	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4405	State.FCW \| fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4406	GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4407	}
4408	}
4409	}
4410	}
4411	GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4412	}
4413	}
4414	#endif
4415
4416
4417	static void FpuUnaryTwoR80Test(void)
4418	{
4419	X86FXSTATE State;
4420	RT_ZERO(State);
4421	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4422	{
4423	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4424	continue;
4425
4426	uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4427	FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4428	PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4429	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4430	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4431	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4432	{
4433	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4434	{
4435	IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4436	RTFLOAT80U const InVal = paTests[iTest].InVal;
4437	State.FCW = paTests[iTest].fFcw;
4438	State.FSW = paTests[iTest].fFswIn;
4439	pfn(&State, &Res, &InVal);
4440	if ( Res.FSW != paTests[iTest].fFswOut
4441	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4442	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4443	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4444	"%s -> fsw=%#06x %s %s\n"
4445	"%s expected %#06x %s %s %s%s%s (%s)\n",
4446	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4447	FormatR80(&paTests[iTest].InVal),
4448	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4449	iVar ? " " : "", paTests[iTest].fFswOut,
4450	FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4451	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4452	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4453	FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4454	}
4455	pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4456	}
4457	}
4458	}
4459
4460
4461	/*********************************************************************************************************************************
4462	* SSE floating point Binary Operations *
4463	*********************************************************************************************************************************/
4464
4465	/*
4466	* Binary SSE operations on packed single precision floating point values.
4467	*/
4468	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4469
4470	static const SSE_BINARY_R32_T g_aSseBinaryR32[] =
4471	{
4472	ENTRY_BIN(addps_u128),
4473	ENTRY_BIN(mulps_u128),
4474	ENTRY_BIN(subps_u128),
4475	ENTRY_BIN(minps_u128),
4476	ENTRY_BIN(divps_u128),
4477	ENTRY_BIN(maxps_u128),
4478	};
4479
4480	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4481	static RTEXITCODE SseBinaryR32Generate(const char *pszDataFileFmt, uint32_t cTests)
4482	{
4483	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4484
4485	static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
4486	{
4487	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
4488	{ RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
4489	/** @todo More specials. */
4490	};
4491
4492	X86FXSTATE State;
4493	RT_ZERO(State);
4494	uint32_t cMinNormalPairs = (cTests - 144) / 4;
4495	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4496	{
4497	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
4498
4499	PRTSTREAM pStrmOut = NULL;
4500	int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32[iFn].pszName);
4501	if (RT_FAILURE(rc))
4502	{
4503	RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4504	return RTEXITCODE_FAILURE;
4505	}
4506
4507	uint32_t cNormalInputPairs = 0;
4508	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4509	{
4510	SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4511
4512	TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4513	TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4514	TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4515	TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4516
4517	TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4518	TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
4519	TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
4520	TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
4521
4522	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
4523	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
4524	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
4525	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
4526	cNormalInputPairs++;
4527	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4528	{
4529	iTest -= 1;
4530	continue;
4531	}
4532
4533	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4534	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4535	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4536	for (uint8_t iFz = 0; iFz < 2; iFz++)
4537	{
4538	State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4539	\| (iRounding << X86_MXCSR_RC_SHIFT)
4540	\| (iDaz ? X86_MXCSR_DAZ : 0)
4541	\| (iFz ? X86_MXCSR_FZ : 0)
4542	\| X86_MXCSR_XCPT_MASK;
4543	IEMSSERESULT ResM; RT_ZERO(ResM);
4544	pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4545	TestData.fMxcsrIn = State.MXCSR;
4546	TestData.fMxcsrOut = ResM.MXCSR;
4547	TestData.OutVal = ResM.uResult;
4548	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4549
4550	State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4551	IEMSSERESULT ResU; RT_ZERO(ResU);
4552	pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4553	TestData.fMxcsrIn = State.MXCSR;
4554	TestData.fMxcsrOut = ResU.MXCSR;
4555	TestData.OutVal = ResU.uResult;
4556	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4557
4558	uint16_t fXcpt = (ResM.MXCSR \| ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4559	if (fXcpt)
4560	{
4561	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
4562	IEMSSERESULT Res1; RT_ZERO(Res1);
4563	pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4564	TestData.fMxcsrIn = State.MXCSR;
4565	TestData.fMxcsrOut = Res1.MXCSR;
4566	TestData.OutVal = Res1.uResult;
4567	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4568
4569	if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4570	{
4571	fXcpt \|= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4572	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4573	IEMSSERESULT Res2; RT_ZERO(Res2);
4574	pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4575	TestData.fMxcsrIn = State.MXCSR;
4576	TestData.fMxcsrOut = Res2.MXCSR;
4577	TestData.OutVal = Res2.uResult;
4578	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4579	}
4580	if (!RT_IS_POWER_OF_TWO(fXcpt))
4581	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4582	if (fUnmasked & fXcpt)
4583	{
4584	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4585	IEMSSERESULT Res3; RT_ZERO(Res3);
4586	pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4587	TestData.fMxcsrIn = State.MXCSR;
4588	TestData.fMxcsrOut = Res3.MXCSR;
4589	TestData.OutVal = Res3.uResult;
4590	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4591	}
4592	}
4593	}
4594	}
4595	rc = RTStrmClose(pStrmOut);
4596	if (RT_FAILURE(rc))
4597	{
4598	RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4599	return RTEXITCODE_FAILURE;
4600	}
4601	}
4602
4603	return RTEXITCODE_SUCCESS;
4604	}
4605	#endif
4606
4607	static void SseBinaryR32Test(void)
4608	{
4609	X86FXSTATE State;
4610	RT_ZERO(State);
4611	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4612	{
4613	if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32[iFn].pszName))
4614	continue;
4615
4616	uint32_t const cTests = *g_aSseBinaryR32[iFn].pcTests;
4617	SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
4618	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
4619	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
4620	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4621	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4622	{
4623	for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4624	{
4625	IEMSSERESULT Res; RT_ZERO(Res);
4626
4627	State.MXCSR = paTests[iTest].fMxcsrIn;
4628	pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4629	bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
4630	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
4631	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
4632	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
4633	if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4634	\|\| !fValsIdentical)
4635	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
4636	"%s -> mxcsr=%#08x %s'%s'%s'%s\n"
4637	"%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
4638	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4639	FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
4640	FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
4641	FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
4642	FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
4643	iVar ? " " : "", Res.MXCSR,
4644	FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
4645	FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
4646	iVar ? " " : "", paTests[iTest].fMxcsrOut,
4647	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
4648	FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
4649	MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4650	!fValsIdentical ? " - val" : "",
4651	FormatMxcsr(paTests[iTest].fMxcsrIn) );
4652	}
4653	pfn = g_aSseBinaryR32[iFn].pfnNative;
4654	}
4655	}
4656	}
4657
4658
4659	/*
4660	* Binary SSE operations on packed single precision floating point values.
4661	*/
4662	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4663
4664	static const SSE_BINARY_R64_T g_aSseBinaryR64[] =
4665	{
4666	ENTRY_BIN(addpd_u128),
4667	ENTRY_BIN(mulpd_u128),
4668	ENTRY_BIN(subpd_u128),
4669	ENTRY_BIN(minpd_u128),
4670	ENTRY_BIN(divpd_u128),
4671	ENTRY_BIN(maxpd_u128),
4672	};
4673
4674	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4675	static RTEXITCODE SseBinaryR64Generate(const char *pszDataFileFmt, uint32_t cTests)
4676	{
4677	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4678
4679	static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
4680	{
4681	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
4682	{ RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
4683	/** @todo More specials. */
4684	};
4685
4686	X86FXSTATE State;
4687	RT_ZERO(State);
4688	uint32_t cMinNormalPairs = (cTests - 144) / 4;
4689	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4690	{
4691	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
4692
4693	PRTSTREAM pStrmOut = NULL;
4694	int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64[iFn].pszName);
4695	if (RT_FAILURE(rc))
4696	{
4697	RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4698	return RTEXITCODE_FAILURE;
4699	}
4700
4701	uint32_t cNormalInputPairs = 0;
4702	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4703	{
4704	SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4705
4706	TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4707	TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4708	TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4709	TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4710
4711	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
4712	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
4713	cNormalInputPairs++;
4714	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4715	{
4716	iTest -= 1;
4717	continue;
4718	}
4719
4720	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4721	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4722	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4723	for (uint8_t iFz = 0; iFz < 2; iFz++)
4724	{
4725	State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4726	\| (iRounding << X86_MXCSR_RC_SHIFT)
4727	\| (iDaz ? X86_MXCSR_DAZ : 0)
4728	\| (iFz ? X86_MXCSR_FZ : 0)
4729	\| X86_MXCSR_XCPT_MASK;
4730	IEMSSERESULT ResM; RT_ZERO(ResM);
4731	pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4732	TestData.fMxcsrIn = State.MXCSR;
4733	TestData.fMxcsrOut = ResM.MXCSR;
4734	TestData.OutVal = ResM.uResult;
4735	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4736
4737	State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4738	IEMSSERESULT ResU; RT_ZERO(ResU);
4739	pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4740	TestData.fMxcsrIn = State.MXCSR;
4741	TestData.fMxcsrOut = ResU.MXCSR;
4742	TestData.OutVal = ResU.uResult;
4743	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4744
4745	uint16_t fXcpt = (ResM.MXCSR \| ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4746	if (fXcpt)
4747	{
4748	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
4749	IEMSSERESULT Res1; RT_ZERO(Res1);
4750	pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4751	TestData.fMxcsrIn = State.MXCSR;
4752	TestData.fMxcsrOut = Res1.MXCSR;
4753	TestData.OutVal = Res1.uResult;
4754	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4755
4756	if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4757	{
4758	fXcpt \|= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4759	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4760	IEMSSERESULT Res2; RT_ZERO(Res2);
4761	pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4762	TestData.fMxcsrIn = State.MXCSR;
4763	TestData.fMxcsrOut = Res2.MXCSR;
4764	TestData.OutVal = Res2.uResult;
4765	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4766	}
4767	if (!RT_IS_POWER_OF_TWO(fXcpt))
4768	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4769	if (fUnmasked & fXcpt)
4770	{
4771	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4772	IEMSSERESULT Res3; RT_ZERO(Res3);
4773	pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4774	TestData.fMxcsrIn = State.MXCSR;
4775	TestData.fMxcsrOut = Res3.MXCSR;
4776	TestData.OutVal = Res3.uResult;
4777	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4778	}
4779	}
4780	}
4781	}
4782	rc = RTStrmClose(pStrmOut);
4783	if (RT_FAILURE(rc))
4784	{
4785	RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4786	return RTEXITCODE_FAILURE;
4787	}
4788	}
4789
4790	return RTEXITCODE_SUCCESS;
4791	}
4792	#endif
4793
4794
4795	static void SseBinaryR64Test(void)
4796	{
4797	X86FXSTATE State;
4798	RT_ZERO(State);
4799	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4800	{
4801	if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64[iFn].pszName))
4802	continue;
4803
4804	uint32_t const cTests = *g_aSseBinaryR64[iFn].pcTests;
4805	SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
4806	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
4807	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
4808	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4809	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4810	{
4811	for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4812	{
4813	IEMSSERESULT Res; RT_ZERO(Res);
4814
4815	State.MXCSR = paTests[iTest].fMxcsrIn;
4816	pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4817	if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4818	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4819	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4820	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
4821	"%s -> mxcsr=%#08x %s'%s\n"
4822	"%s expected %#08x %s'%s%s%s (%s)\n",
4823	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4824	FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
4825	FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
4826	iVar ? " " : "", Res.MXCSR,
4827	FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
4828	iVar ? " " : "", paTests[iTest].fMxcsrOut,
4829	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
4830	MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4831	( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4832	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4833	? " - val" : "",
4834	FormatMxcsr(paTests[iTest].fMxcsrIn) );
4835	}
4836	pfn = g_aSseBinaryR64[iFn].pfnNative;
4837	}
4838	}
4839	}
4840
4841
4842	/*
4843	* Binary SSE operations on packed single precision floating point values.
4844	*/
4845	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
4846
4847	static const SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
4848	{
4849	ENTRY_BIN(addss_u128_r32),
4850	ENTRY_BIN(mulss_u128_r32),
4851	ENTRY_BIN(subss_u128_r32),
4852	ENTRY_BIN(minss_u128_r32),
4853	};
4854
4855	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4856	static RTEXITCODE SseBinaryU128R32Generate(const char *pszDataFileFmt, uint32_t cTests)
4857	{
4858	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4859
4860	static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
4861	{
4862	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
4863	/** @todo More specials. */
4864	};
4865
4866	X86FXSTATE State;
4867	RT_ZERO(State);
4868	uint32_t cMinNormalPairs = (cTests - 144) / 4;
4869	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
4870	{
4871	PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
4872
4873	PRTSTREAM pStrmOut = NULL;
4874	int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R32[iFn].pszName);
4875	if (RT_FAILURE(rc))
4876	{
4877	RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
4878	return RTEXITCODE_FAILURE;
4879	}
4880
4881	uint32_t cNormalInputPairs = 0;
4882	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4883	{
4884	SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
4885
4886	TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4887	TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4888	TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4889	TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4890
4891	TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
4892
4893	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
4894	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
4895	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
4896	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
4897	&& RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
4898	cNormalInputPairs++;
4899	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4900	{
4901	iTest -= 1;
4902	continue;
4903	}
4904
4905	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4906	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4907	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4908	for (uint8_t iFz = 0; iFz < 2; iFz++)
4909	{
4910	State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4911	\| (iRounding << X86_MXCSR_RC_SHIFT)
4912	\| (iDaz ? X86_MXCSR_DAZ : 0)
4913	\| (iFz ? X86_MXCSR_FZ : 0)
4914	\| X86_MXCSR_XCPT_MASK;
4915	IEMSSERESULT ResM; RT_ZERO(ResM);
4916	pfn(&State, &ResM, &TestData.InVal1, &TestData.r32Val2);
4917	TestData.fMxcsrIn = State.MXCSR;
4918	TestData.fMxcsrOut = ResM.MXCSR;
4919	TestData.OutVal = ResM.uResult;
4920	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4921
4922	State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4923	IEMSSERESULT ResU; RT_ZERO(ResU);
4924	pfn(&State, &ResU, &TestData.InVal1, &TestData.r32Val2);
4925	TestData.fMxcsrIn = State.MXCSR;
4926	TestData.fMxcsrOut = ResU.MXCSR;
4927	TestData.OutVal = ResU.uResult;
4928	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4929
4930	uint16_t fXcpt = (ResM.MXCSR \| ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4931	if (fXcpt)
4932	{
4933	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
4934	IEMSSERESULT Res1; RT_ZERO(Res1);
4935	pfn(&State, &Res1, &TestData.InVal1, &TestData.r32Val2);
4936	TestData.fMxcsrIn = State.MXCSR;
4937	TestData.fMxcsrOut = Res1.MXCSR;
4938	TestData.OutVal = Res1.uResult;
4939	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4940
4941	if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4942	{
4943	fXcpt \|= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4944	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4945	IEMSSERESULT Res2; RT_ZERO(Res2);
4946	pfn(&State, &Res2, &TestData.InVal1, &TestData.r32Val2);
4947	TestData.fMxcsrIn = State.MXCSR;
4948	TestData.fMxcsrOut = Res2.MXCSR;
4949	TestData.OutVal = Res2.uResult;
4950	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4951	}
4952	if (!RT_IS_POWER_OF_TWO(fXcpt))
4953	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4954	if (fUnmasked & fXcpt)
4955	{
4956	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4957	IEMSSERESULT Res3; RT_ZERO(Res3);
4958	pfn(&State, &Res3, &TestData.InVal1, &TestData.r32Val2);
4959	TestData.fMxcsrIn = State.MXCSR;
4960	TestData.fMxcsrOut = Res3.MXCSR;
4961	TestData.OutVal = Res3.uResult;
4962	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4963	}
4964	}
4965	}
4966	}
4967	rc = RTStrmClose(pStrmOut);
4968	if (RT_FAILURE(rc))
4969	{
4970	RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
4971	return RTEXITCODE_FAILURE;
4972	}
4973	}
4974
4975	return RTEXITCODE_SUCCESS;
4976	}
4977	#endif
4978
4979	static void SseBinaryU128R32Test(void)
4980	{
4981	X86FXSTATE State;
4982	RT_ZERO(State);
4983	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
4984	{
4985	if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R32[iFn].pszName))
4986	continue;
4987
4988	uint32_t const cTests = *g_aSseBinaryU128R32[iFn].pcTests;
4989	SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
4990	PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
4991	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
4992	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4993	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4994	{
4995	for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4996	{
4997	IEMSSERESULT Res; RT_ZERO(Res);
4998
4999	State.MXCSR = paTests[iTest].fMxcsrIn;
5000	pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
5001	bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5002	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5003	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5004	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5005	if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5006	\|\| !fValsIdentical)
5007	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
5008	"%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5009	"%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5010	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5011	FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5012	FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5013	FormatR32(&paTests[iTest].r32Val2),
5014	iVar ? " " : "", Res.MXCSR,
5015	FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5016	FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5017	iVar ? " " : "", paTests[iTest].fMxcsrOut,
5018	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5019	FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5020	MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5021	!fValsIdentical ? " - val" : "",
5022	FormatMxcsr(paTests[iTest].fMxcsrIn) );
5023	}
5024	}
5025	}
5026	}
5027
5028
5029	/*
5030	* Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
5031	*/
5032	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
5033
5034	static const SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
5035	{
5036	ENTRY_BIN(addsd_u128_r64),
5037	ENTRY_BIN(mulsd_u128_r64),
5038	ENTRY_BIN(subsd_u128_r64),
5039	ENTRY_BIN(minsd_u128_r64),
5040	};
5041
5042	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5043	static RTEXITCODE SseBinaryU128R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5044	{
5045	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5046
5047	static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
5048	{
5049	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5050	/** @todo More specials. */
5051	};
5052
5053	X86FXSTATE State;
5054	RT_ZERO(State);
5055	uint32_t cMinNormalPairs = (cTests - 144) / 4;
5056	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5057	{
5058	PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
5059
5060	PRTSTREAM pStrmOut = NULL;
5061	int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R64[iFn].pszName);
5062	if (RT_FAILURE(rc))
5063	{
5064	RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5065	return RTEXITCODE_FAILURE;
5066	}
5067
5068	uint32_t cNormalInputPairs = 0;
5069	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5070	{
5071	SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
5072
5073	TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5074	TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5075	TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5076
5077	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5078	&& RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
5079	cNormalInputPairs++;
5080	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5081	{
5082	iTest -= 1;
5083	continue;
5084	}
5085
5086	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5087	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5088	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5089	for (uint8_t iFz = 0; iFz < 2; iFz++)
5090	{
5091	State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5092	\| (iRounding << X86_MXCSR_RC_SHIFT)
5093	\| (iDaz ? X86_MXCSR_DAZ : 0)
5094	\| (iFz ? X86_MXCSR_FZ : 0)
5095	\| X86_MXCSR_XCPT_MASK;
5096	IEMSSERESULT ResM; RT_ZERO(ResM);
5097	pfn(&State, &ResM, &TestData.InVal1, &TestData.r64Val2);
5098	TestData.fMxcsrIn = State.MXCSR;
5099	TestData.fMxcsrOut = ResM.MXCSR;
5100	TestData.OutVal = ResM.uResult;
5101	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5102
5103	State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5104	IEMSSERESULT ResU; RT_ZERO(ResU);
5105	pfn(&State, &ResU, &TestData.InVal1, &TestData.r64Val2);
5106	TestData.fMxcsrIn = State.MXCSR;
5107	TestData.fMxcsrOut = ResU.MXCSR;
5108	TestData.OutVal = ResU.uResult;
5109	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5110
5111	uint16_t fXcpt = (ResM.MXCSR \| ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5112	if (fXcpt)
5113	{
5114	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
5115	IEMSSERESULT Res1; RT_ZERO(Res1);
5116	pfn(&State, &Res1, &TestData.InVal1, &TestData.r64Val2);
5117	TestData.fMxcsrIn = State.MXCSR;
5118	TestData.fMxcsrOut = Res1.MXCSR;
5119	TestData.OutVal = Res1.uResult;
5120	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5121
5122	if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5123	{
5124	fXcpt \|= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5125	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5126	IEMSSERESULT Res2; RT_ZERO(Res2);
5127	pfn(&State, &Res2, &TestData.InVal1, &TestData.r64Val2);
5128	TestData.fMxcsrIn = State.MXCSR;
5129	TestData.fMxcsrOut = Res2.MXCSR;
5130	TestData.OutVal = Res2.uResult;
5131	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5132	}
5133	if (!RT_IS_POWER_OF_TWO(fXcpt))
5134	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5135	if (fUnmasked & fXcpt)
5136	{
5137	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5138	IEMSSERESULT Res3; RT_ZERO(Res3);
5139	pfn(&State, &Res3, &TestData.InVal1, &TestData.r64Val2);
5140	TestData.fMxcsrIn = State.MXCSR;
5141	TestData.fMxcsrOut = Res3.MXCSR;
5142	TestData.OutVal = Res3.uResult;
5143	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5144	}
5145	}
5146	}
5147	}
5148	rc = RTStrmClose(pStrmOut);
5149	if (RT_FAILURE(rc))
5150	{
5151	RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5152	return RTEXITCODE_FAILURE;
5153	}
5154	}
5155
5156	return RTEXITCODE_SUCCESS;
5157	}
5158	#endif
5159
5160
5161	static void SseBinaryU128R64Test(void)
5162	{
5163	X86FXSTATE State;
5164	RT_ZERO(State);
5165	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5166	{
5167	if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R64[iFn].pszName))
5168	continue;
5169
5170	uint32_t const cTests = *g_aSseBinaryU128R64[iFn].pcTests;
5171	SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
5172	PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
5173	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
5174	if (!cTests) RTTestSkipped(g_hTest, "no tests");
5175	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5176	{
5177	for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_U128_R64_TEST_T); iTest++)
5178	{
5179	IEMSSERESULT Res; RT_ZERO(Res);
5180
5181	State.MXCSR = paTests[iTest].fMxcsrIn;
5182	pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
5183	if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5184	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5185	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5186	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
5187	"%s -> mxcsr=%#08x %s'%s\n"
5188	"%s expected %#08x %s'%s%s%s (%s)\n",
5189	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5190	FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5191	FormatR64(&paTests[iTest].r64Val2),
5192	iVar ? " " : "", Res.MXCSR,
5193	FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5194	iVar ? " " : "", paTests[iTest].fMxcsrOut,
5195	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5196	MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5197	( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5198	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5199	? " - val" : "",
5200	FormatMxcsr(paTests[iTest].fMxcsrIn) );
5201	}
5202	}
5203	}
5204	}
5205
5206
5207
5208	int main(int argc, char **argv)
5209	{
5210	int rc = RTR3InitExe(argc, &argv, 0);
5211	if (RT_FAILURE(rc))
5212	return RTMsgInitFailure(rc);
5213
5214	/*
5215	* Determin the host CPU.
5216	* If not using the IEMAllAImpl.asm code, this will be set to Intel.
5217	*/
5218	#if (defined(RT_ARCH_X86) \|\| defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
5219	g_idxCpuEflFlavour = ASMIsAmdCpu() \|\| ASMIsHygonCpu()
5220	? IEMTARGETCPU_EFL_BEHAVIOR_AMD
5221	: IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
5222	#else
5223	g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
5224	#endif
5225
5226	/*
5227	* Parse arguments.
5228	*/
5229	enum { kModeNotSet, kModeTest, kModeGenerate }
5230	enmMode = kModeNotSet;
5231	bool fInt = true;
5232	bool fFpuLdSt = true;
5233	bool fFpuBinary1 = true;
5234	bool fFpuBinary2 = true;
5235	bool fFpuOther = true;
5236	bool fCpuData = true;
5237	bool fCommonData = true;
5238	bool fSseFpBinary = true;
5239	uint32_t const cDefaultTests = 96;
5240	uint32_t cTests = cDefaultTests;
5241	RTGETOPTDEF const s_aOptions[] =
5242	{
5243	// mode:
5244	{ "--generate", 'g', RTGETOPT_REQ_NOTHING },
5245	{ "--test", 't', RTGETOPT_REQ_NOTHING },
5246	// test selection (both)
5247	{ "--all", 'a', RTGETOPT_REQ_NOTHING },
5248	{ "--none", 'z', RTGETOPT_REQ_NOTHING },
5249	{ "--zap", 'z', RTGETOPT_REQ_NOTHING },
5250	{ "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
5251	{ "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
5252	{ "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
5253	{ "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
5254	{ "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
5255	{ "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
5256	{ "--int", 'i', RTGETOPT_REQ_NOTHING },
5257	{ "--include", 'I', RTGETOPT_REQ_STRING },
5258	{ "--exclude", 'X', RTGETOPT_REQ_STRING },
5259	// generation parameters
5260	{ "--common", 'm', RTGETOPT_REQ_NOTHING },
5261	{ "--cpu", 'c', RTGETOPT_REQ_NOTHING },
5262	{ "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
5263	{ "--verbose", 'v', RTGETOPT_REQ_NOTHING },
5264	{ "--quiet", 'q', RTGETOPT_REQ_NOTHING },
5265	};
5266
5267	RTGETOPTSTATE State;
5268	rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
5269	AssertRCReturn(rc, RTEXITCODE_FAILURE);
5270
5271	RTGETOPTUNION ValueUnion;
5272	while ((rc = RTGetOpt(&State, &ValueUnion)))
5273	{
5274	switch (rc)
5275	{
5276	case 'g':
5277	enmMode = kModeGenerate;
5278	break;
5279	case 't':
5280	enmMode = kModeTest;
5281	break;
5282
5283	case 'a':
5284	fCpuData = true;
5285	fCommonData = true;
5286	fInt = true;
5287	fFpuLdSt = true;
5288	fFpuBinary1 = true;
5289	fFpuBinary2 = true;
5290	fFpuOther = true;
5291	fSseFpBinary = true;
5292	break;
5293	case 'z':
5294	fCpuData = false;
5295	fCommonData = false;
5296	fInt = false;
5297	fFpuLdSt = false;
5298	fFpuBinary1 = false;
5299	fFpuBinary2 = false;
5300	fFpuOther = false;
5301	fSseFpBinary = false;
5302	break;
5303
5304	case 'F':
5305	fFpuLdSt = true;
5306	break;
5307	case 'O':
5308	fFpuOther = true;
5309	break;
5310	case 'B':
5311	fFpuBinary1 = true;
5312	break;
5313	case 'P':
5314	fFpuBinary2 = true;
5315	break;
5316	case 'S':
5317	fSseFpBinary = true;
5318	break;
5319	case 'i':
5320	fInt = true;
5321	break;
5322
5323	case 'I':
5324	if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
5325	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
5326	RT_ELEMENTS(g_apszIncludeTestPatterns));
5327	g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
5328	break;
5329	case 'X':
5330	if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
5331	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
5332	RT_ELEMENTS(g_apszExcludeTestPatterns));
5333	g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
5334	break;
5335
5336	case 'm':
5337	fCommonData = true;
5338	break;
5339	case 'c':
5340	fCpuData = true;
5341	break;
5342	case 'n':
5343	cTests = ValueUnion.u32;
5344	break;
5345
5346	case 'q':
5347	g_cVerbosity = 0;
5348	break;
5349	case 'v':
5350	g_cVerbosity++;
5351	break;
5352
5353	case 'h':
5354	RTPrintf("usage: %s <-g\|-t> [options]\n"
5355	"\n"
5356	"Mode:\n"
5357	" -g, --generate\n"
5358	" Generate test data.\n"
5359	" -t, --test\n"
5360	" Execute tests.\n"
5361	"\n"
5362	"Test selection (both modes):\n"
5363	" -a, --all\n"
5364	" Enable all tests and generated test data. (default)\n"
5365	" -z, --zap, --none\n"
5366	" Disable all tests and test data types.\n"
5367	" -i, --int\n"
5368	" Enable non-FPU tests.\n"
5369	" -F, --fpu-ld-st\n"
5370	" Enable FPU load and store tests.\n"
5371	" -B, --fpu-binary-1\n"
5372	" Enable FPU binary 80-bit FP tests.\n"
5373	" -P, --fpu-binary-2\n"
5374	" Enable FPU binary 64- and 32-bit FP tests.\n"
5375	" -O, --fpu-other\n"
5376	" Enable FPU binary 64- and 32-bit FP tests.\n"
5377	" -S, --sse-fp-binary\n"
5378	" Enable SSE binary 64- and 32-bit FP tests.\n"
5379	" -I,--include=<test-patter>\n"
5380	" Enable tests matching the given pattern.\n"
5381	" -X,--exclude=<test-patter>\n"
5382	" Skip tests matching the given pattern (overrides --include).\n"
5383	"\n"
5384	"Generation:\n"
5385	" -m, --common\n"
5386	" Enable generating common test data.\n"
5387	" -c, --only-cpu\n"
5388	" Enable generating CPU specific test data.\n"
5389	" -n, --number-of-test <count>\n"
5390	" Number of tests to generate. Default: %u\n"
5391	"\n"
5392	"Other:\n"
5393	" -v, --verbose\n"
5394	" -q, --quiet\n"
5395	" Noise level. Default: --quiet\n"
5396	, argv[0], cDefaultTests);
5397	return RTEXITCODE_SUCCESS;
5398	default:
5399	return RTGetOptPrintError(rc, &ValueUnion);
5400	}
5401	}
5402
5403	/*
5404	* Generate data?
5405	*/
5406	if (enmMode == kModeGenerate)
5407	{
5408	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5409	char szCpuDesc[256] = {0};
5410	RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
5411	const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
5412	# if defined(RT_OS_WINDOWS) \|\| defined(RT_OS_OS2)
5413	const char * const pszBitBucket = "NUL";
5414	# else
5415	const char * const pszBitBucket = "/dev/null";
5416	# endif
5417
5418	if (cTests == 0)
5419	cTests = cDefaultTests;
5420	g_cZeroDstTests = RT_MIN(cTests / 16, 32);
5421	g_cZeroSrcTests = g_cZeroDstTests * 2;
5422
5423	if (fInt)
5424	{
5425	const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
5426	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5427	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5428	? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
5429	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5430	if (!pStrmData \|\| !pStrmDataCpu)
5431	return RTEXITCODE_FAILURE;
5432
5433	BinU8Generate( pStrmData, pStrmDataCpu, cTests);
5434	BinU16Generate(pStrmData, pStrmDataCpu, cTests);
5435	BinU32Generate(pStrmData, pStrmDataCpu, cTests);
5436	BinU64Generate(pStrmData, pStrmDataCpu, cTests);
5437	ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
5438	UnaryGenerate(pStrmData, cTests);
5439	ShiftGenerate(pStrmDataCpu, cTests);
5440	MulDivGenerate(pStrmDataCpu, cTests);
5441
5442	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5443	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5444	if (rcExit != RTEXITCODE_SUCCESS)
5445	return rcExit;
5446	}
5447
5448	if (fFpuLdSt)
5449	{
5450	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
5451	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5452	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5453	? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
5454	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5455	if (!pStrmData \|\| !pStrmDataCpu)
5456	return RTEXITCODE_FAILURE;
5457
5458	FpuLdConstGenerate(pStrmData, cTests);
5459	FpuLdIntGenerate(pStrmData, cTests);
5460	FpuLdD80Generate(pStrmData, cTests);
5461	FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
5462	FpuStD80Generate(pStrmData, cTests);
5463	uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
5464	FpuLdMemGenerate(pStrmData, cTests2);
5465	FpuStMemGenerate(pStrmData, cTests2);
5466
5467	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5468	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5469	if (rcExit != RTEXITCODE_SUCCESS)
5470	return rcExit;
5471	}
5472
5473	if (fFpuBinary1)
5474	{
5475	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
5476	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5477	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5478	? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
5479	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5480	if (!pStrmData \|\| !pStrmDataCpu)
5481	return RTEXITCODE_FAILURE;
5482
5483	FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
5484	FpuBinaryFswR80Generate(pStrmData, cTests);
5485	FpuBinaryEflR80Generate(pStrmData, cTests);
5486
5487	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5488	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5489	if (rcExit != RTEXITCODE_SUCCESS)
5490	return rcExit;
5491	}
5492
5493	if (fFpuBinary2)
5494	{
5495	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
5496	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5497	const char pszDataCpuFile = pszBitBucket; /!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5498	? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
5499	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5500	if (!pStrmData \|\| !pStrmDataCpu)
5501	return RTEXITCODE_FAILURE;
5502
5503	FpuBinaryR64Generate(pStrmData, cTests);
5504	FpuBinaryR32Generate(pStrmData, cTests);
5505	FpuBinaryI32Generate(pStrmData, cTests);
5506	FpuBinaryI16Generate(pStrmData, cTests);
5507	FpuBinaryFswR64Generate(pStrmData, cTests);
5508	FpuBinaryFswR32Generate(pStrmData, cTests);
5509	FpuBinaryFswI32Generate(pStrmData, cTests);
5510	FpuBinaryFswI16Generate(pStrmData, cTests);
5511
5512	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5513	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5514	if (rcExit != RTEXITCODE_SUCCESS)
5515	return rcExit;
5516	}
5517
5518	if (fFpuOther)
5519	{
5520	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
5521	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5522	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5523	? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
5524	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5525	if (!pStrmData \|\| !pStrmDataCpu)
5526	return RTEXITCODE_FAILURE;
5527
5528	FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
5529	FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
5530	FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
5531
5532	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5533	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5534	if (rcExit != RTEXITCODE_SUCCESS)
5535	return rcExit;
5536	}
5537
5538	if (fSseFpBinary)
5539	{
5540	const char *pszDataFileFmt = fCommonData ? "tstIEMAImplDataSseBinary-%s.bin" : pszBitBucket;
5541
5542	RTEXITCODE rcExit = SseBinaryR32Generate(pszDataFileFmt, cTests);
5543	if (rcExit == RTEXITCODE_SUCCESS)
5544	rcExit = SseBinaryR64Generate(pszDataFileFmt, cTests);
5545	if (rcExit == RTEXITCODE_SUCCESS)
5546	rcExit = SseBinaryU128R32Generate(pszDataFileFmt, cTests);
5547	if (rcExit == RTEXITCODE_SUCCESS)
5548	rcExit = SseBinaryU128R64Generate(pszDataFileFmt, cTests);
5549	if (rcExit != RTEXITCODE_SUCCESS)
5550	return rcExit;
5551	}
5552
5553	return RTEXITCODE_SUCCESS;
5554	#else
5555	return RTMsgErrorExitFailure("Test data generator not compiled in!");
5556	#endif
5557	}
5558
5559	/*
5560	* Do testing. Currrently disabled by default as data needs to be checked
5561	* on both intel and AMD systems first.
5562	*/
5563	rc = RTTestCreate("tstIEMAimpl", &g_hTest);
5564	AssertRCReturn(rc, RTEXITCODE_FAILURE);
5565	if (enmMode == kModeTest)
5566	{
5567	RTTestBanner(g_hTest);
5568
5569	/* Allocate guarded memory for use in the tests. */
5570	#define ALLOC_GUARDED_VAR(a_puVar) do { \
5571	rc = RTTestGuardedAlloc(g_hTest, sizeof(a_puVar), sizeof(a_puVar), false /fHead/, (void **)&a_puVar); \
5572	if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
5573	} while (0)
5574	ALLOC_GUARDED_VAR(g_pu8);
5575	ALLOC_GUARDED_VAR(g_pu16);
5576	ALLOC_GUARDED_VAR(g_pu32);
5577	ALLOC_GUARDED_VAR(g_pu64);
5578	ALLOC_GUARDED_VAR(g_pu128);
5579	ALLOC_GUARDED_VAR(g_pu8Two);
5580	ALLOC_GUARDED_VAR(g_pu16Two);
5581	ALLOC_GUARDED_VAR(g_pu32Two);
5582	ALLOC_GUARDED_VAR(g_pu64Two);
5583	ALLOC_GUARDED_VAR(g_pu128Two);
5584	ALLOC_GUARDED_VAR(g_pfEfl);
5585	if (RTTestErrorCount(g_hTest) == 0)
5586	{
5587	if (fInt)
5588	{
5589	BinU8Test();
5590	BinU16Test();
5591	BinU32Test();
5592	BinU64Test();
5593	XchgTest();
5594	XaddTest();
5595	CmpXchgTest();
5596	CmpXchg8bTest();
5597	CmpXchg16bTest();
5598	ShiftDblTest();
5599	UnaryTest();
5600	ShiftTest();
5601	MulDivTest();
5602	BswapTest();
5603	}
5604
5605	if (fFpuLdSt)
5606	{
5607	FpuLoadConstTest();
5608	FpuLdMemTest();
5609	FpuLdIntTest();
5610	FpuLdD80Test();
5611	FpuStMemTest();
5612	FpuStIntTest();
5613	FpuStD80Test();
5614	}
5615
5616	if (fFpuBinary1)
5617	{
5618	FpuBinaryR80Test();
5619	FpuBinaryFswR80Test();
5620	FpuBinaryEflR80Test();
5621	}
5622
5623	if (fFpuBinary2)
5624	{
5625	FpuBinaryR64Test();
5626	FpuBinaryR32Test();
5627	FpuBinaryI32Test();
5628	FpuBinaryI16Test();
5629	FpuBinaryFswR64Test();
5630	FpuBinaryFswR32Test();
5631	FpuBinaryFswI32Test();
5632	FpuBinaryFswI16Test();
5633	}
5634
5635	if (fFpuOther)
5636	{
5637	FpuUnaryR80Test();
5638	FpuUnaryFswR80Test();
5639	FpuUnaryTwoR80Test();
5640	}
5641
5642	if (fSseFpBinary)
5643	{
5644	SseBinaryR32Test();
5645	SseBinaryR64Test();
5646	SseBinaryU128R32Test();
5647	SseBinaryU128R64Test();
5648	}
5649	}
5650	return RTTestSummaryAndDestroy(g_hTest);
5651	}
5652	return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
5653	}
5654

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 96348

Download in other formats: