VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-4.c32@ 105073

Last change on this file since 105073 was 105054, checked in by vboxsync, 10 months ago

ValidationKit/bootsector: bugref:10658 SIMD FP testcase: [v]subpd.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 204.7 KB
Line 
1/* $Id: bs3-cpu-instr-4.c32 105054 2024-06-27 10:27:09Z vboxsync $ */
2/** @file
3 * BS3Kit - bs3-cpu-instr-4 - SSE, AVX FPU instructions, C code template.
4 */
5
6/*
7 * Copyright (C) 2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <bs3kit.h>
42#include "bs3-cpu-instr-4-asm-auto.h"
43
44#include <iprt/asm.h>
45#include <iprt/asm-amd64-x86.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** Converts an execution mode (BS3_MODE_XXX) into an index into an array
52 * initialized by BS3CPUINSTR4_TEST1_MODES_INIT etc. */
53#define BS3CPUINSTR4_TEST_MODES_INDEX(a_bMode) (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2)
54
55/** Maximum length for the names of all SIMD FP exception flags combined. */
56#define BS3_FP_XCPT_NAMES_MAXLEN sizeof(" IE DE ZE OE UE PE ")
57
58/*
59 * Single-precision (32 bits) floating-point defines.
60 */
61/** The max exponent value for a single-precision floating-point normal. */
62#define BS3_FP32_EXP_NORMAL_MAX 254
63/** The min exponent value for a single-precision floating-point normal. */
64#define BS3_FP32_EXP_NORMAL_MIN 1
65/** The max fraction value for a single-precision floating-point normal. */
66#define BS3_FP32_FRACTION_NORMAL_MAX 0x7fffff
67/** The min fraction value for a single-precision floating-point normal. */
68#define BS3_FP32_FRACTION_NORMAL_MIN 0
69/** The exponent bias for the single-precision floating-point format. */
70#define BS3_FP32_EXP_BIAS RTFLOAT32U_EXP_BIAS
71/** Fraction width (in bits) for the single-precision floating-point format. */
72#define BS3_FP32_FRACTION_BITS RTFLOAT32U_FRACTION_BITS
73/** The max exponent value for a single-precision floating-point integer without
74 * losing precision. */
75#define BS3_FP32_EXP_SAFE_INT_MAX BS3_FP32_EXP_BIAS + BS3_FP32_FRACTION_BITS
76/** The min exponent value for a single-precision floating-point integer without
77 * losing precision. */
78#define BS3_FP32_EXP_SAFE_INT_MIN 1
79/** The max fraction value for a double-precision floating-point denormal. */
80#define BS3_FP32_FRACTION_DENORMAL_MAX 0x7fffff
81/** The min fraction value for a double-precision floating-point denormal. */
82#define BS3_FP32_FRACTION_DENORMAL_MIN 1
83
84#define BS3_FP32_NORMAL_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_NORMAL_MAX)
85#define BS3_FP32_NORMAL_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MIN, BS3_FP32_EXP_NORMAL_MIN)
86#define BS3_FP32_ZERO(a_Sign) RTFLOAT32U_INIT_ZERO(a_Sign)
87#define BS3_FP32_ONE(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0, RTFLOAT32U_EXP_BIAS)
88#define BS3_FP32_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT32U_INIT_C(a_Sign, a_Frac, a_Exp)
89#define BS3_FP32_INF(a_Sign) RTFLOAT32U_INIT_INF(a_Sign)
90#define BS3_FP32_QNAN(a_Sign) RTFLOAT32U_INIT_QNAN(a_Sign)
91#define BS3_FP32_QNAN_VAL(a_Sign, a_Val) RTFLOAT32U_INIT_QNAN_EX(a_Sign, a_Val)
92#define BS3_FP32_SNAN(a_Sign) RTFLOAT32U_INIT_SNAN(a_Sign)
93
94
95/*
96 * Single-precision floating normals.
97 * Fraction - 23 bits, all usable.
98 * Exponent - 8 bits, least significant bit MBZ.
99 */
100#define BS3_FP32_NORMAL_VAL_1(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5fcabd, 0xbc)
101#define BS3_FP32_NORMAL_VAL_2(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x7e117a, 0x7e)
102#define BS3_FP32_NORMAL_VAL_3(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5b5b5b, 0x9a)
103/* The maximum integer value (all 23 + 1 implied bit of the fraction part set) without losing precision. */
104#define BS3_FP32_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX)
105/* The minimum integer value without losing precision. */
106#define BS3_FP32_NORMAL_SAFE_INT_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MIN, BS3_FP32_EXP_SAFE_INT_MIN)
107
108/*
109 * Single-precision floating-point denormals.
110 */
111/** The maximum denormal value. */
112#define BS3_FP32_DENORMAL_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_DENORMAL_MAX, 0)
113/** The maximum denormal value. */
114#define BS3_FP32_DENORMAL_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_DENORMAL_MIN, 0)
115
116/*
117 * Single-precision random values (incl. potentially invalid values).
118 * We don't care what the exact values are as these are meant to populate
119 * unmodified operands and be compared bitwise.
120 */
121#define BS3_FP32_RAND_VAL_0(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x7bacda, 0x55)
122#define BS3_FP32_RAND_VAL_1(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x7010f0, 0xc0)
123#define BS3_FP32_RAND_VAL_2(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x4ffcbe, 0xf1)
124#define BS3_FP32_RAND_VAL_3(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x2fd7c8, 0x1f)
125#define BS3_FP32_RAND_VAL_4(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5b5b5b, 0x09)
126#define BS3_FP32_RAND_VAL_5(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x3d2d1d, 0x99)
127#define BS3_FP32_RAND_VAL_6(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x123456, 0x5e)
128#define BS3_FP32_RAND_VAL_7(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x05432f, 0xd7)
129
130/*
131 * Double-precision (64 bits) floating-point defines.
132 */
133/** The max exponent value for a double-precision floating-point normal. */
134#define BS3_FP64_EXP_NORMAL_MAX 2046
135/** The min exponent value for a double-precision floating-point normal. */
136#define BS3_FP64_EXP_NORMAL_MIN 1
137/** The max fraction value for a double-precision floating-point normal. */
138#define BS3_FP64_FRACTION_NORMAL_MAX 0xfffffffffffff
139/** The min fraction value for a double-precision floating-point normal. */
140#define BS3_FP64_FRACTION_NORMAL_MIN 0
141/** The exponent bias for the double-precision floating-point format. */
142#define BS3_FP64_EXP_BIAS RTFLOAT64U_EXP_BIAS
143/** Fraction width (in bits) for the double-precision floating-point format. */
144#define BS3_FP64_FRACTION_BITS RTFLOAT64U_FRACTION_BITS
145/** The max exponent value for a double-precision floating-point integer without
146 * losing precision. */
147#define BS3_FP64_EXP_SAFE_INT_MAX BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS
148/** The min exponent value for a double-precision floating-point integer without
149 * losing precision. */
150#define BS3_FP64_EXP_SAFE_INT_MIN 1
151/** The max fraction value for a double-precision floating-point denormal. */
152#define BS3_FP64_FRACTION_DENORMAL_MAX 0xfffffffffffff
153/** The min fraction value for a double-precision floating-point denormal. */
154#define BS3_FP64_FRACTION_DENORMAL_MIN 1
155
156#define BS3_FP64_NORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_NORMAL_MAX)
157#define BS3_FP64_NORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_NORMAL_MIN)
158#define BS3_FP64_ZERO(a_Sign) RTFLOAT64U_INIT_ZERO(a_Sign)
159#define BS3_FP64_ONE(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0, RTFLOAT64U_EXP_BIAS)
160#define BS3_FP64_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT64U_INIT_C(a_Sign, a_Frac, a_Exp)
161#define BS3_FP64_INF(a_Sign) RTFLOAT64U_INIT_INF(a_Sign)
162#define BS3_FP64_QNAN(a_Sign) RTFLOAT64U_INIT_QNAN(a_Sign)
163#define BS3_FP64_QNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_QNAN_EX(a_Sign, a_Val)
164#define BS3_FP64_SNAN(a_Sign) RTFLOAT64U_INIT_SNAN(a_Sign)
165#define BS3_FP64_SNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_SNAN_EX(a_Sign, a_Val)
166
167/*
168 * Double-precision floating-point normals.
169 * Fraction - 52 bits, all usable.
170 * Exponent - 11 bits, least significant bit MBZ.
171 */
172#define BS3_FP64_NORMAL_VAL_1(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xf10a7ab1ec01a, 0x4bc)
173#define BS3_FP64_NORMAL_VAL_2(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xca5cadea1b1ed, 0x3ae)
174#define BS3_FP64_NORMAL_VAL_3(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xb5b5b5b5b5b5b, 0xffe)
175/* The maximum integer value (all 52 + 1 implied bit of the fraction part set) without losing precision. */
176#define BS3_FP64_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX)
177/* The minimum integer value without losing precision. */
178#define BS3_FP64_NORMAL_SAFE_INT_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_SAFE_INT_MIN)
179
180/*
181 * Double-precision floating-point denormals.
182 */
183/** The maximum denormal value. */
184#define BS3_FP64_DENORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_DENORMAL_MAX, 0)
185/** The maximum denormal value. */
186#define BS3_FP64_DENORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_DENORMAL_MIN, 0)
187
188
189/*********************************************************************************************************************************
190* Structures and Typedefs *
191*********************************************************************************************************************************/
192/** Instruction set type and operand width. */
193typedef enum BS3CPUINSTRX_INSTRTYPE_T
194{
195 T_INVALID,
196 T_MMX,
197 T_MMX_SSE, /**< MMX instruction, but require the SSE CPUID to work. */
198 T_MMX_SSE2, /**< MMX instruction, but require the SSE2 CPUID to work. */
199 T_MMX_SSSE3, /**< MMX instruction, but require the SSSE3 CPUID to work. */
200 T_AXMMX,
201 T_AXMMX_OR_SSE,
202 T_SSE,
203 T_128BITS = T_SSE,
204 T_SSE2,
205 T_SSE3,
206 T_SSSE3,
207 T_SSE4_1,
208 T_SSE4_2,
209 T_SSE4A,
210 T_PCLMUL,
211 T_SHA,
212 T_AVX_128,
213 T_AVX2_128,
214 T_AVX_PCLMUL,
215 T_AVX_256,
216 T_256BITS = T_AVX_256,
217 T_AVX2_256,
218 T_MAX
219} BS3CPUINSTRX_INSTRTYPE_T;
220
221/** Memory or register rm variant. */
222enum {
223 RM_REG = 0,
224 RM_MEM,
225 RM_MEM8, /**< Memory operand is 8 bytes. Hack for movss and similar. */
226 RM_MEM16, /**< Memory operand is 16 bytes. Hack for movss and similar. */
227 RM_MEM32, /**< Memory operand is 32 bytes. Hack for movss and similar. */
228 RM_MEM64 /**< Memory operand is 64 bytes. Hack for movss and similar. */
229};
230
231/**
232 * Execution environment configuration.
233 */
234typedef struct BS3CPUINSTR4_CONFIG_T
235{
236 uint16_t fCr0Mp : 1;
237 uint16_t fCr0Em : 1;
238 uint16_t fCr0Ts : 1;
239 uint16_t fCr4OsFxSR : 1;
240 uint16_t fCr4OsXSave : 1;
241 uint16_t fCr4OsXmmExcpt : 1;
242 uint16_t fXcr0Sse : 1;
243 uint16_t fXcr0Avx : 1;
244 uint16_t fAligned : 1; /**< Aligned mem operands. If 0, they will be misaligned and tests w/o mem operands skipped. */
245 uint16_t fAlignCheck : 1;
246 uint16_t fMxCsrMM : 1; /**< AMD only */
247 uint8_t bXcptSse;
248 uint8_t bXcptAvx;
249} BS3CPUINSTR4_CONFIG_T;
250/** Pointer to an execution environment configuration. */
251typedef BS3CPUINSTR4_CONFIG_T const BS3_FAR *PCBS3CPUINSTR4_CONFIG_T;
252
253/** State saved by bs3CpuInstr4ConfigReconfigure. */
254typedef struct BS3CPUINSTRX_CONFIG_SAVED_T
255{
256 uint32_t uCr0;
257 uint32_t uCr4;
258 uint32_t uEfl;
259 uint16_t uFcw;
260 uint16_t uFsw;
261 uint32_t uMxCsr;
262} BS3CPUINSTRX_CONFIG_SAVED_T;
263typedef BS3CPUINSTRX_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTRX_CONFIG_SAVED_T;
264typedef BS3CPUINSTRX_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTRX_CONFIG_SAVED_T;
265
266/**
267 * YMM packed single-precision floating-point register.
268 * @todo move to x86.h?
269 */
270typedef union X86YMMFLOATPSREG
271{
272 /** Packed single-precision floating-point view. */
273 RTFLOAT32U ar32[8];
274 /** 256-bit integer view. */
275 RTUINT256U ymm;
276} X86YMMFLOATPSREG;
277# ifndef VBOX_FOR_DTRACE_LIB
278AssertCompileSize(X86YMMFLOATPSREG, 32);
279AssertCompileSize(X86YMMFLOATPSREG, sizeof(X86YMMREG));
280# endif
281/** Pointer to a YMM packed single-precision floating-point register. */
282typedef X86YMMFLOATPSREG BS3_FAR *PX86YMMFLOATPSREG;
283/** Pointer to a const YMM single-precision packed floating-point register. */
284typedef X86YMMFLOATPSREG const BS3_FAR *PCX86YMMFLOATPSREG;
285
286/**
287 * YMM packed double-precision floating-point register.
288 * @todo move to x86.h?
289 */
290typedef union X86YMMFLOATPDREG
291{
292 /** Packed double-precision floating-point view. */
293 RTFLOAT64U ar64[4];
294 /** 256-bit integer view. */
295 RTUINT256U ymm;
296} X86YMMFLOATPDREG;
297# ifndef VBOX_FOR_DTRACE_LIB
298AssertCompileSize(X86YMMFLOATPDREG, 32);
299AssertCompileSize(X86YMMFLOATPDREG, sizeof(X86YMMREG));
300# endif
301/** Pointer to a YMM packed floating-point register. */
302typedef X86YMMFLOATPDREG BS3_FAR *PX86YMMFLOATPDREG;
303/** Pointer to a const YMM packed floating-point register. */
304typedef X86YMMFLOATPDREG const BS3_FAR *PCX86YMMFLOATPDREG;
305
306/**
307 * YMM scalar single-precision floating-point register.
308 * @todo move to x86.h?
309 */
310typedef union X86YMMFLOATSSREG
311{
312 /** Scalar single-precision floating-point view. */
313 RTFLOAT32U ar32[8];
314 /** 256-bit integer view. */
315 RTUINT256U ymm;
316} X86YMMFLOATSSREG;
317# ifndef VBOX_FOR_DTRACE_LIB
318AssertCompileSize(X86YMMFLOATSSREG, 32);
319AssertCompileSize(X86YMMFLOATSSREG, sizeof(X86YMMREG));
320# endif
321/** Pointer to a YMM scalar single-precision floating-point register. */
322typedef X86YMMFLOATSSREG BS3_FAR *PX86YMMFLOATSSREG;
323/** Pointer to a const YMM scalar single-precision floating-point register. */
324typedef X86YMMFLOATSSREG const BS3_FAR *PCX86YMMFLOATSSREG;
325
326/**
327 * YMM scalar double-precision floating-point register.
328 * @todo move to x86.h?
329 */
330typedef union X86YMMFLOATSDREG
331{
332 /** Scalar double-precision floating-point view. */
333 RTFLOAT64U ar64[3];
334 /** 256-bit integer view. */
335 RTUINT256U ymm;
336} X86YMMFLOATSDREG;
337# ifndef VBOX_FOR_DTRACE_LIB
338AssertCompileSize(X86YMMFLOATSDREG, 32);
339AssertCompileSize(X86YMMFLOATSDREG, sizeof(X86YMMREG));
340# endif
341/** Pointer to a YMM scalar double-precision floating-point register. */
342typedef X86YMMFLOATSDREG BS3_FAR *PX86YMMFLOATSDREG;
343/** Pointer to a const YMM scalar double-precision floating-point register. */
344typedef X86YMMFLOATSDREG const BS3_FAR *PCX86YMMFLOATSDREG;
345
346/**
347 * YMM scalar quadruple-precision floating-point register.
348 * @todo move to x86.h?
349 */
350typedef union X86YMMFLOATSQREG
351{
352 /** Scalar quadruple-precision floating point view. */
353 RTFLOAT128U ar128[2];
354 /** 256-bit integer view. */
355 RTUINT256U ymm;
356} X86YMMFLOATSQREG;
357# ifndef VBOX_FOR_DTRACE_LIB
358AssertCompileSize(X86YMMFLOATSQREG, 32);
359AssertCompileSize(X86YMMFLOATSQREG, sizeof(X86YMMREG));
360# endif
361/** Pointer to a YMM scalar quadruple-precision floating-point register. */
362typedef X86YMMFLOATSQREG *PX86YMMFLOATSQREG;
363/** Pointer to a const YMM scalar quadruple-precision floating-point register. */
364typedef X86YMMFLOATSQREG const *PCX86YMMFLOATSQREG;
365
366
367/*********************************************************************************************************************************
368* Global Variables *
369*********************************************************************************************************************************/
370static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false, false };
371static bool g_fAmdMisalignedSse = false;
372static uint8_t g_enmExtCtxMethod = BS3EXTCTXMETHOD_INVALID;
373static bool g_fMxCsrDazSupported = false;
374
375/** Zero value (indexed by fSign). */
376RTFLOAT32U const g_ar32Zero[] = { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) };
377RTFLOAT64U const g_ar64Zero[] = { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) };
378
379/** One value (indexed by fSign). */
380RTFLOAT32U const g_ar32One[] = { RTFLOAT32U_INIT_C(0, 0, RTFLOAT32U_EXP_BIAS),
381 RTFLOAT32U_INIT_C(1, 0, RTFLOAT32U_EXP_BIAS) };
382RTFLOAT64U const g_ar64One[] = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_BIAS),
383 RTFLOAT64U_INIT_C(1, 0, RTFLOAT64U_EXP_BIAS) };
384
385/** Infinity (indexed by fSign). */
386RTFLOAT32U const g_ar32Infinity[] = { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) };
387RTFLOAT64U const g_ar64Infinity[] = { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) };
388
389/** Default QNaNs (indexed by fSign). */
390RTFLOAT32U const g_ar32QNaN[] = { RTFLOAT32U_INIT_QNAN(0), RTFLOAT32U_INIT_QNAN(1) };
391RTFLOAT64U const g_ar64QNaN[] = { RTFLOAT64U_INIT_QNAN(0), RTFLOAT64U_INIT_QNAN(1) };
392
393/** Size of g_pbBuf - at least three pages. */
394static uint32_t g_cbBuf;
395/** Buffer of g_cbBuf size. */
396static uint8_t BS3_FAR *g_pbBuf;
397/** RW alias for the buffer memory at g_pbBuf. Set up by bs3CpuInstrXBufSetup. */
398static uint8_t BS3_FAR *g_pbBufAlias;
399/** RW alias for the memory at g_pbBuf. */
400static uint8_t BS3_FAR *g_pbBufAliasAlloc;
401
402/** Exception type \#2 test configurations, 16 & 32 bytes strictly aligned. */
403static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig2[] =
404{
405/*
406 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
407 * +AVX +AVX +AMD/SSE +AMD/SSE
408 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
409 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
410 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
411 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
412 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
413 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
414 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
415 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
416 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
417 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
418 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
419 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
420 /* Memory misalignment and alignment checks: */
421 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #10 */
422 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #11 */
423 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
424 /* AMD only: */
425 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
426 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
427};
428
429/** Exception type \#3 test configurations (< 16-byte memory argument). */
430static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig3[] =
431{
432/*
433 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
434 * +AVX +AVX +AMD/SSE +AMD/SSE
435 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
436 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
437 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
438 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
439 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
440 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
441 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
442 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
443 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
444 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
445 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
446 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
447 /* Memory misalignment and alignment checks: */
448 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #10 */ /* [Avx]:DB */
449 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_AC, X86_XCPT_AC }, /* #11 */ /* [Avx]:AC */
450 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
451 /* AMD only: */
452 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
453 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
454};
455
456
457/**
458 * Returns the name of an X86 exception given the vector.
459 *
460 * @returns Name of the exception.
461 * @param uVector The exception vector.
462 */
463static const char BS3_FAR *bs3CpuInstr4XcptName(uint8_t uVector)
464{
465 switch (uVector)
466 {
467 case X86_XCPT_DE: return "#DE";
468 case X86_XCPT_DB: return "#DB";
469 case X86_XCPT_NMI: return "#NMI";
470 case X86_XCPT_BP: return "#BP";
471 case X86_XCPT_OF: return "#OF";
472 case X86_XCPT_BR: return "#BR";
473 case X86_XCPT_UD: return "#UD";
474 case X86_XCPT_NM: return "#NM";
475 case X86_XCPT_DF: return "#DF";
476 case X86_XCPT_CO_SEG_OVERRUN: return "#CO_SEG_OVERRUN";
477 case X86_XCPT_TS: return "#TS";
478 case X86_XCPT_NP: return "#NP";
479 case X86_XCPT_SS: return "#SS";
480 case X86_XCPT_GP: return "#GP";
481 case X86_XCPT_PF: return "#PF";
482 case X86_XCPT_MF: return "#MF";
483 case X86_XCPT_AC: return "#AC";
484 case X86_XCPT_MC: return "#MC";
485 case X86_XCPT_XF: return "#XF";
486 case X86_XCPT_VE: return "#VE";
487 case X86_XCPT_CP: return "#CP";
488 case X86_XCPT_VC: return "#VC";
489 case X86_XCPT_SX: return "#SX";
490 }
491 return "UNKNOWN";
492}
493
494
495/**
496 * Gets the names of floating-point exception flags that are set for a given MXCSR.
497 *
498 * @returns Names of floating-point exception flags that are set.
499 * @param pszBuf Where to store the floating-point exception flags.
500 * @param cchBuf The size of the buffer.
501 * @param fMxCsr The MXCSR value.
502 */
503static size_t bs3CpuInstr4GetXcptFlags(char BS3_FAR *pszBuf, size_t cchBuf, uint32_t fMxCsr)
504{
505 BS3_ASSERT(cchBuf >= BS3_FP_XCPT_NAMES_MAXLEN);
506 if (!(fMxCsr & X86_MXCSR_XCPT_FLAGS))
507 return Bs3StrPrintf(pszBuf, cchBuf, " None");
508 return Bs3StrPrintf(pszBuf, cchBuf, "%s%s%s%s%s%s", fMxCsr & X86_MXCSR_IE ? " IE" : "", fMxCsr & X86_MXCSR_DE ? " DE" : "",
509 fMxCsr & X86_MXCSR_ZE ? " ZE" : "", fMxCsr & X86_MXCSR_OE ? " OE" : "",
510 fMxCsr & X86_MXCSR_UE ? " UE" : "", fMxCsr & X86_MXCSR_PE ? " PE" : "");
511}
512
513
514/**
515 * Reconfigures the execution environment according to @a pConfig.
516 *
517 * Call bs3CpuInstrXConfigRestore to undo the changes.
518 *
519 * @returns true on success, false if the configuration cannot be applied. In
520 * the latter case, no context changes are made.
521 * @param pSavedCfg Where to save state we modify.
522 * @param pCtx The register context to modify.
523 * @param pExtCtx The extended register context to modify.
524 * @param pConfig The configuration to apply.
525 * @param bMode The target mode.
526 */
527static bool bs3CpuInstr4ConfigReconfigure(PBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx,
528 PCBS3CPUINSTR4_CONFIG_T pConfig, uint8_t bMode)
529{
530 /*
531 * Save context bits we may change here
532 */
533 pSavedCfg->uCr0 = pCtx->cr0.u32;
534 pSavedCfg->uCr4 = pCtx->cr4.u32;
535 pSavedCfg->uEfl = pCtx->rflags.u32;
536 pSavedCfg->uFcw = Bs3ExtCtxGetFcw(pExtCtx);
537 pSavedCfg->uFsw = Bs3ExtCtxGetFsw(pExtCtx);
538 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx);
539
540 /*
541 * Can we make these changes?
542 */
543 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse)
544 return false;
545
546 /*
547 * Modify the test context.
548 */
549 if (pConfig->fCr0Mp)
550 pCtx->cr0.u32 |= X86_CR0_MP;
551 else
552 pCtx->cr0.u32 &= ~X86_CR0_MP;
553 if (pConfig->fCr0Em)
554 pCtx->cr0.u32 |= X86_CR0_EM;
555 else
556 pCtx->cr0.u32 &= ~X86_CR0_EM;
557 if (pConfig->fCr0Ts)
558 pCtx->cr0.u32 |= X86_CR0_TS;
559 else
560 pCtx->cr0.u32 &= ~X86_CR0_TS;
561
562 if (pConfig->fCr4OsFxSR)
563 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
564 else
565 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
566
567 if (pConfig->fCr4OsXmmExcpt && g_afTypeSupports[T_SSE])
568 pCtx->cr4.u32 |= X86_CR4_OSXMMEEXCPT;
569 else
570 pCtx->cr4.u32 &= ~X86_CR4_OSXMMEEXCPT;
571
572 if (pConfig->fCr4OsFxSR)
573 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
574 else
575 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
576
577 if (pConfig->fCr4OsXSave)
578 pCtx->cr4.u32 |= X86_CR4_OSXSAVE;
579 else
580 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE;
581
582 if (pConfig->fXcr0Sse)
583 pExtCtx->fXcr0Saved |= XSAVE_C_SSE;
584 else
585 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE;
586 if (pConfig->fXcr0Avx && g_afTypeSupports[T_AVX_256])
587 pExtCtx->fXcr0Saved |= XSAVE_C_YMM;
588 else
589 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM;
590
591 if (pConfig->fAlignCheck)
592 {
593 pCtx->rflags.u32 |= X86_EFL_AC;
594 pCtx->cr0.u32 |= X86_CR0_AM;
595 }
596 else
597 {
598 pCtx->rflags.u32 &= ~X86_EFL_AC;
599 pCtx->cr0.u32 &= ~X86_CR0_AM;
600 }
601
602 /** @todo Can we remove this? x87 FPU and SIMD are independent. */
603 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw & ~(X86_FSW_ES | X86_FSW_B));
604
605 if (pConfig->fMxCsrMM)
606 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM);
607 else
608 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM);
609 return true;
610}
611
612
613/**
614 * Undoes changes made by bs3CpuInstr4ConfigReconfigure.
615 */
616static void bs3CpuInstrXConfigRestore(PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx)
617{
618 pCtx->cr0.u32 = pSavedCfg->uCr0;
619 pCtx->cr4.u32 = pSavedCfg->uCr4;
620 pCtx->rflags.u32 = pSavedCfg->uEfl;
621 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal;
622 Bs3ExtCtxSetFcw(pExtCtx, pSavedCfg->uFcw);
623 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw);
624 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr);
625}
626
627
628/**
629 * Allocates three extended CPU contexts and initializes the first one
630 * with random data.
631 * @returns First extended context, initialized with randomish data. NULL on
632 * failure (complained).
633 * @param ppExtCtx2 Where to return the 2nd context.
634 */
635static PBS3EXTCTX bs3CpuInstrXAllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2)
636{
637 /* Allocate extended context structures. */
638 uint64_t fFlags;
639 uint16_t cb = Bs3ExtCtxGetSize(&fFlags);
640 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 2);
641 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb);
642 if (pExtCtx1)
643 {
644 Bs3ExtCtxInit(pExtCtx1, cb, fFlags);
645 /** @todo populate with semi-random stuff. */
646
647 Bs3ExtCtxInit(pExtCtx2, cb, fFlags);
648 *ppExtCtx2 = pExtCtx2;
649 return pExtCtx1;
650 }
651 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2);
652 *ppExtCtx2 = NULL;
653 return NULL;
654}
655
656
657/**
658 * Frees the extended CPU contexts allocated by bs3CpuInstrXAllocExtCtxs.
659 *
660 * @param pExtCtx1 The first extended context.
661 * @param pExtCtx2 The second extended context.
662 */
663static void bs3CpuInstrXFreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2)
664{
665 RT_NOREF_PV(pExtCtx2);
666 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2);
667}
668
669
670/**
671 * Sets up SSE and AVX bits relevant for FPU instructions.
672 */
673static void bs3CpuInstr4SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx)
674{
675 /* CR0: */
676 uint32_t cr0 = Bs3RegGetCr0();
677 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
678 cr0 |= X86_CR0_NE;
679 Bs3RegSetCr0(cr0);
680
681 /* If real mode context, the cr0 value will differ from the current one (we're in PE32 mode). */
682 pCtx->cr0.u32 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
683 pCtx->cr0.u32 |= X86_CR0_NE;
684
685 /* CR4: */
686 BS3_ASSERT( pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
687 || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE);
688 {
689 uint32_t cr4 = Bs3RegGetCr4();
690 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
691 {
692 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
693 Bs3RegSetCr4(cr4);
694 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal);
695 }
696 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
697 {
698 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
699 Bs3RegSetCr4(cr4);
700 }
701 pCtx->cr4.u32 = cr4;
702 }
703}
704
705
706/**
707 * Configures the buffer with electric fences in paged modes.
708 *
709 * @returns Adjusted buffer pointer.
710 * @param pbBuf The buffer pointer.
711 * @param pcbBuf Pointer to the buffer size (input & output).
712 * @param bMode The testing target mode.
713 */
714DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufSetup(uint8_t BS3_FAR *pbBuf, uint32_t *pcbBuf, uint8_t bMode)
715{
716 if (BS3_MODE_IS_PAGED(bMode))
717 {
718 int rc;
719 uint32_t cbBuf = *pcbBuf;
720 Bs3PagingProtectPtr(&pbBuf[0], X86_PAGE_SIZE, 0, X86_PTE_P);
721 Bs3PagingProtectPtr(&pbBuf[cbBuf - X86_PAGE_SIZE], X86_PAGE_SIZE, 0, X86_PTE_P);
722 pbBuf += X86_PAGE_SIZE;
723 cbBuf -= X86_PAGE_SIZE * 2;
724 *pcbBuf = cbBuf;
725
726 g_pbBufAlias = g_pbBufAliasAlloc;
727 rc = Bs3PagingAlias((uintptr_t)g_pbBufAlias, (uintptr_t)pbBuf, cbBuf + X86_PAGE_SIZE, /* must include the tail guard pg */
728 X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW);
729 if (RT_FAILURE(rc))
730 Bs3TestFailedF("Bs3PagingAlias failed on %p/%p LB %#x: %d", g_pbBufAlias, pbBuf, cbBuf, rc);
731 }
732 else
733 g_pbBufAlias = pbBuf;
734 return pbBuf;
735}
736
737
738/**
739 * Undoes what bs3CpuInstrXBufSetup did.
740 *
741 * @param pbBuf The buffer pointer.
742 * @param cbBuf The buffer size.
743 * @param bMode The testing target mode.
744 */
745DECLINLINE(void) bs3CpuInstrXBufCleanup(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t bMode)
746{
747 if (BS3_MODE_IS_PAGED(bMode))
748 {
749 Bs3PagingProtectPtr(&pbBuf[-X86_PAGE_SIZE], X86_PAGE_SIZE, X86_PTE_P, 0);
750 Bs3PagingProtectPtr(&pbBuf[cbBuf], X86_PAGE_SIZE, X86_PTE_P, 0);
751 }
752}
753
754
755/**
756 * Gets a buffer of a @a cbMemOp sized operand according to the given
757 * configuration and alignment restrictions.
758 *
759 * @returns Pointer to the buffer.
760 * @param pbBuf The buffer pointer.
761 * @param cbBuf The buffer size.
762 * @param cbMemOp The operand size.
763 * @param cbAlign The operand alignment restriction.
764 * @param pConfig The configuration.
765 * @param fPageFault The \#PF test setting.
766 */
767DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufForOperand(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t cbMemOp, uint8_t cbAlign,
768 PCBS3CPUINSTR4_CONFIG_T pConfig, unsigned fPageFault)
769{
770 /* All allocations are at the tail end of the buffer, so that we've got a
771 guard page following the operand. When asked to consistenly trigger
772 a #PF, we slide the buffer into that guard page. */
773 if (fPageFault)
774 cbBuf += X86_PAGE_SIZE;
775
776 if (pConfig->fAligned)
777 {
778 if (!pConfig->fAlignCheck)
779 return &pbBuf[cbBuf - cbMemOp];
780 return &pbBuf[cbBuf - cbMemOp - cbAlign];
781 }
782 return &pbBuf[cbBuf - cbMemOp - 1];
783}
784
785
786/**
787 * Determines the size of memory operands.
788 */
789DECLINLINE(uint8_t) bs3CpuInstrXMemOpSize(uint8_t cbOperand, uint8_t enmRm)
790{
791 if (enmRm <= RM_MEM)
792 return cbOperand;
793 if (enmRm == RM_MEM8)
794 return sizeof(uint8_t);
795 if (enmRm == RM_MEM16)
796 return sizeof(uint16_t);
797 if (enmRm == RM_MEM32)
798 return sizeof(uint32_t);
799 if (enmRm == RM_MEM64)
800 return sizeof(uint64_t);
801 BS3_ASSERT(0);
802 return cbOperand;
803}
804
805
806/*
807 * Code to make testing the tests faster. `bs3CpuInstrX_SkipIt()' randomly
808 * skips a large fraction of the micro-tests. It is sufficiently random
809 * that over a large number of runs, all micro-tests will be hit.
810 *
811 * This improves the runtime of the worst case (`#define ALL_TESTS' on a
812 * debug build, run with '--execute-all-in-iem') from ~9000 to ~800 seconds
813 * (on an Intel Core i7-10700, fwiw).
814 *
815 * To activate this 'developer's speed-testing mode', turn on
816 * `#define BS3_SKIPIT_DO_SKIP' here.
817 *
818 * BS3_SKIPIT_AVG_SKIP governs approximately how many micro-tests are
819 * skipped in a row; e.g. the default of 26 means about every 27th
820 * micro-test is run during a particular test run. (This is not 27x
821 * faster due to other activities which are not skipped!) Note this is
822 * only an average; the actual skips are random.
823 *
824 * You can also modify bs3CpuInstrX_SkipIt() to focus on specific sub-tests,
825 * using its (currently ignored) `bRing, iCfg, iTest, iVal, iVariant' args
826 * (to enable this: turn on `#define BS3_SKIPIT_DO_ARGS': which costs about
827 * 3% performance).
828 *
829 * Note! The skipping is not compatible with testing the native recompiler as
830 * it requires the test code to be run a number of times before it kicks
831 * in and does the native recompilation (currently around 16 times).
832 */
833#define BS3_SKIPIT_AVG_SKIP 26
834#define BS3_SKIPIT_REPORT_COUNT 150000
835#undef BS3_SKIPIT_DO_SKIP
836#undef BS3_SKIPIT_DO_ARGS
837
838#ifndef BS3_SKIPIT_DO_SKIP
839# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) (false)
840#else
841# include <iprt/asm-amd64-x86.h>
842# include <iprt/asm-math.h>
843
844DECLINLINE(uint32_t) bs3CpuInstrX_SimpleRand(void)
845{
846 /*
847 * A simple Lehmer linear congruential pseudo-random number
848 * generator using the constants suggested by Park & Miller:
849 *
850 * modulus = 2^31 - 1 (INT32_MAX)
851 * multiplier = 7^5 (16807)
852 *
853 * It produces numbers in the range [1..INT32_MAX-1] and is
854 * more chaotic in the higher bits.
855 *
856 * Note! Runtime/common/rand/randparkmiller.cpp is also use this algorithm,
857 * though the zero handling is different.
858 */
859 static uint32_t s_uSeedMemory = 0;
860 uint32_t uVal = s_uSeedMemory;
861 if (!uVal)
862 uVal = (uint32_t)ASMReadTSC();
863 uVal = ASMModU64ByU32RetU32(ASMMult2xU32RetU64(uVal, 16807), INT32_MAX);
864 s_uSeedMemory = uVal;
865 return uVal;
866}
867
868static unsigned g_cSeen, g_cSkipped;
869
870static void bs3CpuInstrX_ShowTallies(void)
871{
872 Bs3TestPrintf("Micro-tests %d: tested %d / skipped %d\n", g_cSeen, g_cSeen - g_cSkipped, g_cSkipped);
873}
874
875# ifdef BS3_SKIPIT_DO_ARGS
876# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt(bRing, iCfg, iTest, iVal, iVariant)
877static bool bs3CpuInstrX_SkipIt(uint8_t bRing, unsigned iCfg, unsigned iTest, unsigned iVal, unsigned iVariant)
878# else
879# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt()
880static bool bs3CpuInstrX_SkipIt(void)
881# endif
882{
883 static unsigned s_uTimes = 0;
884 bool fSkip;
885
886 /* Cache calls to the relatively expensive random routine */
887 if (!s_uTimes)
888 s_uTimes = bs3CpuInstrX_SimpleRand() % (BS3_SKIPIT_AVG_SKIP * 2 + 1) + 1;
889 fSkip = --s_uTimes > 0;
890 if (fSkip)
891 ++g_cSkipped;
892
893 if (++g_cSeen % BS3_SKIPIT_REPORT_COUNT == 0)
894 bs3CpuInstrX_ShowTallies();
895 return fSkip;
896}
897
898#endif /* BS3_SKIPIT_DO_SKIP */
899
900/*
901 * Test type #1.
902 * Generic YMM registers.
903 */
904typedef struct BS3CPUINSTR4_TEST1_VALUES_T
905{
906 X86YMMREG uSrc2; /**< Second source operand. */
907 X86YMMREG uSrc1; /**< uDstIn for SSE */
908 X86YMMREG uDstOut; /**< Destination output. */
909 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
910 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
911 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
912 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
913 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
914 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
915} BS3CPUINSTR4_TEST1_VALUES_T;
916
917/*
918 * Test type #1.
919 * Packed single-precision.
920 */
921typedef struct BS3CPUINSTR4_TEST1_VALUES_PS_T
922{
923 X86YMMFLOATPSREG uSrc2; /**< Second source operand. */
924 X86YMMFLOATPSREG uSrc1; /**< uDstIn for SSE */
925 X86YMMFLOATPSREG uDstOut; /**< Destination output. */
926 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
927 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
928 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
929 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
930 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
931 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
932} BS3CPUINSTR4_TEST1_VALUES_PS_T;
933AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PS_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
934AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
935AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
936AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
937AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
938AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
939AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
940AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
941AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
942AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
943
944/*
945 * Test type #1.
946 * Packed double-precision.
947 */
948typedef struct BS3CPUINSTR4_TEST1_VALUES_PD_T
949{
950 X86YMMFLOATPDREG uSrc2; /**< Second source operand. */
951 X86YMMFLOATPDREG uSrc1; /**< uDstIn for SSE */
952 X86YMMFLOATPDREG uDstOut; /**< Destination output. */
953 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
954 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
955 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
956 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
957 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
958 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
959} BS3CPUINSTR4_TEST1_VALUES_PD_T;
960AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PD_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
961AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
962AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
963AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
964AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
965AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
966AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
967AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
968AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
969AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
970
971/*
972 * Test type #1.
973 * Scalar single-precision.
974 */
975typedef struct BS3CPUINSTR4_TEST1_VALUES_SS_T
976{
977 X86YMMFLOATSSREG uSrc2; /**< Second source operand. */
978 X86YMMFLOATSSREG uSrc1; /**< uDstIn for SSE */
979 X86YMMFLOATSSREG uDstOut; /**< Destination output. */
980 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
981 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
982 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
983 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
984 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
985 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
986} BS3CPUINSTR4_TEST1_VALUES_SS_T;
987AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_SS_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
988AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
989AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
990AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
991AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
992AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
993AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
994AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
995AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
996AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
997
998/*
999 * Test type #1.
1000 * Scalar quadruple-precision.
1001 */
1002typedef struct BS3CPUINSTR4_TEST1_VALUES_SQ_T
1003{
1004 X86YMMFLOATSQREG uSrc2; /**< Second source operand. */
1005 X86YMMFLOATSQREG uSrc1; /**< uDstIn for SSE */
1006 X86YMMFLOATSQREG uDstOut; /**< Destination output. */
1007 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
1008 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
1009 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
1010 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
1011 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
1012 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
1013} BS3CPUINSTR4_TEST1_VALUES_SQ_T;
1014AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_SQ_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
1015AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
1016AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
1017AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
1018AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
1019AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
1020AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
1021AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
1022AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
1023AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
1024
1025typedef struct BS3CPUINSTR4_TEST1_T
1026{
1027 FPFNBS3FAR pfnWorker; /**< Test function worker. */
1028 uint8_t bAvxMisalignXcpt; /**< AVX misalignment exception. */
1029 uint8_t enmRm; /**< R/M type. */
1030 uint8_t enmType; /**< CPU instruction type (see T_XXX). */
1031 uint8_t iRegDst; /**< Index of destination register, UINT8_MAX if N/A. */
1032 uint8_t iRegSrc1; /**< Index of first source register, UINT8_MAX if N/A. */
1033 uint8_t iRegSrc2; /**< Index of second source register, UINT8_MAX if N/A. */
1034 uint8_t cValues; /**< Number of test values in @c paValues. */
1035 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *paValues; /**< Test values. */
1036} BS3CPUINSTR4_TEST1_T;
1037
1038typedef struct BS3CPUINSTR4_TEST1_MODE_T
1039{
1040 BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests;
1041 unsigned cTests;
1042} BS3CPUINSTR4_TEST1_MODE_T;
1043
1044/** Initializer for a BS3CPUINSTR4_TEST1_MODE_T array (three entries). */
1045#define BS3CPUINSTR4_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \
1046 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } }
1047
1048typedef struct BS3CPUINSTR4_TEST1_CTX_T
1049{
1050 BS3CPUINSTR4_CONFIG_T const BS3_FAR *pConfig; /**< The test execution environment configuration. */
1051 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest; /**< The instruction being tested. */
1052 unsigned iVal; /**< Which iteration of the test value is this. */
1053 const char BS3_FAR *pszMode; /**< The testing mode (e.g. real, protected, paged and permutations). */
1054 PBS3TRAPFRAME pTrapFrame; /**< The exception (trap) frame. */
1055 PBS3REGCTX pCtx; /**< The general-purpose register context. */
1056 PBS3EXTCTX pExtCtx; /**< The extended (FPU) register context. */
1057 PBS3EXTCTX pExtCtxOut; /**< The output extended (FPU) register context. */
1058 uint8_t BS3_FAR *puMemOp; /**< The memory operand buffer. */
1059 uint8_t BS3_FAR *puMemOpAlias; /**< The memory operand alias buffer for comparing result. */
1060 uint8_t cbMemOp; /**< Size of the memory operand (and alias) buffer in bytes. */
1061 uint8_t cbOperand; /**< Size of the instruction operand (8 for MMX, 16 for SSE etc). */
1062 uint8_t cbInstr; /**< Size of the instruction opcode. */
1063 uint8_t bXcptExpect; /**< The expected exception while/after executing the instruction. */
1064 bool fSseInstr; /**< Whether this is an SSE instruction. */
1065 bool fAvxInstr; /**< Whether this is an AVX instruction. */
1066 uint16_t idTestStep; /**< The test iteration step. */
1067} BS3CPUINSTR4_TEST1_CTX_T;
1068/** Pointer to a test 1 context. */
1069typedef BS3CPUINSTR4_TEST1_CTX_T BS3_FAR *PBS3CPUINSTR4_TEST1_CTX_T;
1070
1071
1072/**
1073 * Worker for bs3CpuInstr4_WorkerTestType1.
1074 */
1075static uint16_t bs3CpuInstr4_WorkerTestType1_Inner(uint8_t bMode, PBS3CPUINSTR4_TEST1_CTX_T pTestCtx,
1076 PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg)
1077{
1078 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = pTestCtx->pTest;
1079 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *pValues = &pTestCtx->pTest->paValues[pTestCtx->iVal];
1080 PBS3TRAPFRAME pTrapFrame = pTestCtx->pTrapFrame;
1081 PBS3REGCTX pCtx = pTestCtx->pCtx;
1082 PBS3EXTCTX pExtCtx = pTestCtx->pExtCtx;
1083 PBS3EXTCTX pExtCtxOut = pTestCtx->pExtCtxOut;
1084 uint8_t BS3_FAR *puMemOp = pTestCtx->puMemOp;
1085 uint8_t BS3_FAR *puMemOpAlias = pTestCtx->puMemOpAlias;
1086 uint8_t cbMemOp = pTestCtx->cbMemOp;
1087 uint8_t const cbOperand = pTestCtx->cbOperand;
1088 uint8_t const cbInstr = ((uint8_t const BS3_FAR *)(uintptr_t)pTestCtx->pTest->pfnWorker)[-1];
1089 uint8_t bXcptExpect = pTestCtx->bXcptExpect;
1090 uint8_t const bFpXcpt = pTestCtx->pConfig->fCr4OsXmmExcpt ? X86_XCPT_XF : X86_XCPT_UD;
1091 uint32_t const fExpectedMxCsrFlags = pTestCtx->cbOperand > 16 ? pValues->f256ExpectedMxCsrFlags
1092 : pValues->f128ExpectedMxCsrFlags;
1093 bool const fFpFlagsExpect = RT_BOOL( (fExpectedMxCsrFlags
1094 & (~pValues->fMxCsrMask >> X86_MXCSR_XCPT_MASK_SHIFT)) & X86_MXCSR_XCPT_FLAGS);
1095 uint32_t uMxCsr;
1096 X86YMMREG MemOpExpect;
1097 uint16_t cErrors;
1098
1099 /*
1100 * Set up the context and some expectations.
1101 */
1102 /* Destination. */
1103 Bs3MemZero(&MemOpExpect, sizeof(MemOpExpect));
1104 if (pTest->iRegDst == UINT8_MAX)
1105 {
1106 BS3_ASSERT(pTest->enmRm >= RM_MEM);
1107 Bs3MemSet(puMemOpAlias, 0xcc, cbMemOp);
1108 if (bXcptExpect == X86_XCPT_DB)
1109 MemOpExpect.ymm = pValues->uDstOut.ymm;
1110 else
1111 Bs3MemSet(&MemOpExpect, 0xcc, sizeof(MemOpExpect));
1112 }
1113
1114 /* Source #1 (/ destination for SSE). */
1115 if (pTest->iRegSrc1 == UINT8_MAX)
1116 {
1117 BS3_ASSERT(pTest->enmRm >= RM_MEM);
1118 Bs3MemCpy(puMemOpAlias, &pValues->uSrc1, cbMemOp);
1119 if (pTest->iRegDst == UINT8_MAX)
1120 BS3_ASSERT(pTestCtx->fSseInstr);
1121 else
1122 MemOpExpect.ymm = pValues->uSrc1.ymm;
1123 }
1124 else if (pTestCtx->fSseInstr)
1125 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm.DQWords.dqw0);
1126 else
1127 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm, 32);
1128
1129 /* Source #2. */
1130 if (pTest->iRegSrc2 == UINT8_MAX)
1131 {
1132 BS3_ASSERT(pTest->enmRm >= RM_MEM);
1133 BS3_ASSERT(pTest->iRegDst != UINT8_MAX && pTest->iRegSrc1 != UINT8_MAX);
1134 Bs3MemCpy(puMemOpAlias, &pValues->uSrc2, cbMemOp);
1135 MemOpExpect.ymm = pValues->uSrc2.ymm;
1136 }
1137 else if (pTestCtx->fSseInstr)
1138 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm.DQWords.dqw0);
1139 else
1140 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm, 32);
1141
1142 /* Memory pointer. */
1143 if (pTest->enmRm >= RM_MEM)
1144 {
1145 BS3_ASSERT( pTest->iRegDst == UINT8_MAX
1146 || pTest->iRegSrc1 == UINT8_MAX
1147 || pTest->iRegSrc2 == UINT8_MAX);
1148 Bs3RegCtxSetGrpSegFromCurPtr(pCtx, &pCtx->rbx, &pCtx->fs, puMemOp);
1149 }
1150
1151 /* Setup MXCSR for the current test. */
1152 uMxCsr = (pSavedCfg->uMxCsr & ~(X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK))
1153 | (pValues->fMxCsrMask & X86_MXCSR_XCPT_MASK)
1154 | (pValues->fRoundingCtlMask & X86_MXCSR_RC_MASK);
1155 if ( pValues->fDenormalsAreZero == X86_MXCSR_DAZ
1156 && g_fMxCsrDazSupported)
1157 uMxCsr |= X86_MXCSR_DAZ;
1158 if (pValues->fFlushToZero == X86_MXCSR_FZ)
1159 uMxCsr |= X86_MXCSR_FZ;
1160 Bs3ExtCtxSetMxCsr(pExtCtx, uMxCsr);
1161
1162 /*
1163 * Prepare globals and execute.
1164 */
1165 g_uBs3TrapEipHint = pCtx->rip.u32;
1166 if ( bXcptExpect == X86_XCPT_DB
1167 && !fFpFlagsExpect)
1168 g_uBs3TrapEipHint += cbInstr + 1;
1169 Bs3TrapSetJmpAndRestoreWithExtCtxAndRm(pCtx, pExtCtx, pTrapFrame, pExtCtxOut);
1170
1171 /*
1172 * Check the result.
1173 *
1174 * If a floating-point exception is expected, the destination is not updated by the instruction.
1175 * In the case of SSE instructions, updating the destination here will work because it is the same
1176 * as the source, but for AVX++ it won't because the destination is different and would contain 0s.
1177 */
1178 cErrors = Bs3TestSubErrorCount();
1179 if ( bXcptExpect == X86_XCPT_DB
1180 && !fFpFlagsExpect
1181 && pTest->iRegDst != UINT8_MAX)
1182 {
1183 if (pTestCtx->fSseInstr)
1184 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm.DQWords.dqw0);
1185 else
1186 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm, cbOperand);
1187 }
1188#if defined(DEBUG_aeichner) /** @todo Necessary kludge on a i7-1068NG7. */
1189 if ( pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE
1190 && pExtCtx->Ctx.x.Hdr.bmXState == 0x7
1191 && pExtCtxOut->Ctx.x.Hdr.bmXState == 0x3)
1192 pExtCtxOut->Ctx.x.Hdr.bmXState = 0x7;
1193#endif
1194 if (bXcptExpect == X86_XCPT_DB)
1195 Bs3ExtCtxSetMxCsr(pExtCtx, (uMxCsr & ~X86_MXCSR_XCPT_FLAGS)
1196 | (fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS));
1197 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pTestCtx->pszMode, pTestCtx->idTestStep);
1198
1199 if (bXcptExpect == X86_XCPT_DB)
1200 {
1201 uint32_t const fMxCsrXcptFlags = Bs3ExtCtxGetMxCsr(pExtCtxOut) & X86_MXCSR_XCPT_FLAGS;
1202
1203 /* Check if the SIMD FP exception flags (or lack of) are as expected. */
1204 if (fMxCsrXcptFlags != (fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS))
1205 {
1206 char szGotBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1207 char szExpectBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1208 bs3CpuInstr4GetXcptFlags(&szExpectBuf[0], sizeof(szExpectBuf), fExpectedMxCsrFlags);
1209 bs3CpuInstr4GetXcptFlags(&szGotBuf[0], sizeof(szGotBuf), fMxCsrXcptFlags);
1210 Bs3TestFailedF("Expected floating-point xcpt flags%s, got%s", szExpectBuf, szGotBuf);
1211 }
1212
1213 /* Check if the SIMD FP exception (or lack of) is as expected. */
1214 if (fFpFlagsExpect)
1215 {
1216 if (pTrapFrame->bXcpt == bFpXcpt)
1217 { /* likely */ }
1218 else
1219 Bs3TestFailedF("Expected floating-point xcpt %s, got %s", bs3CpuInstr4XcptName(bFpXcpt),
1220 bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1221 }
1222 else if (pTrapFrame->bXcpt == X86_XCPT_DB)
1223 { /* likely */ }
1224 else
1225 Bs3TestFailedF("Expected no xcpt, got %s", bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1226 }
1227 /* Check if non-FP exception is as expected. */
1228 else if (pTrapFrame->bXcpt != bXcptExpect)
1229 Bs3TestFailedF("Expected xcpt %s, got %s", bs3CpuInstr4XcptName(bXcptExpect), bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1230
1231 /* Kludge! Looks like EFLAGS.AC is cleared when raising #GP in real mode on the 10980XE. WEIRD! */
1232 if (bMode == BS3_MODE_RM && (pCtx->rflags.u32 & X86_EFL_AC))
1233 {
1234 if (pTrapFrame->Ctx.rflags.u32 & X86_EFL_AC)
1235 Bs3TestFailedF("Expected EFLAGS.AC to be cleared (bXcpt=%d)", pTrapFrame->bXcpt);
1236 pTrapFrame->Ctx.rflags.u32 |= X86_EFL_AC;
1237 }
1238 if (bXcptExpect == X86_XCPT_PF)
1239 pCtx->cr2.u = (uintptr_t)puMemOp;
1240 Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pCtx, bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect ? cbInstr + 1 : 0, 0 /*cbSpAdjust*/,
1241 (bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect) || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF,
1242 pTestCtx->pszMode, pTestCtx->idTestStep);
1243 pCtx->cr2.u = 0;
1244
1245 if ( pTest->enmRm >= RM_MEM
1246 && Bs3MemCmp(puMemOpAlias, &MemOpExpect, cbMemOp) != 0)
1247 Bs3TestFailedF("Expected uMemOp %.*Rhxs, got %.*Rhxs", cbMemOp, &MemOpExpect, cbMemOp, puMemOpAlias);
1248
1249 return cErrors;
1250}
1251
1252
1253/**
1254 * Test type #1 worker.
1255 */
1256static uint8_t bs3CpuInstr4_WorkerTestType1(uint8_t bMode, BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests, unsigned cTests,
1257 PCBS3CPUINSTR4_CONFIG_T paConfigs, unsigned cConfigs)
1258{
1259 BS3REGCTX Ctx;
1260 BS3TRAPFRAME TrapFrame;
1261 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode);
1262 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0;
1263 uint8_t BS3_FAR *pbBuf = g_pbBuf;
1264 uint32_t cbBuf = g_cbBuf;
1265 PBS3EXTCTX pExtCtxOut;
1266 PBS3EXTCTX pExtCtx = bs3CpuInstrXAllocExtCtxs(&pExtCtxOut);
1267 if (pExtCtx)
1268 { /* likely */ }
1269 else
1270 return 0;
1271 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT)
1272 { /* likely */ }
1273 else
1274 {
1275 Bs3TestPrintf("Skipped due to ancient FPU state format\n");
1276 return 0;
1277 }
1278
1279 /* Ensure the structures are allocated before we sample the stack pointer. */
1280 Bs3MemSet(&Ctx, 0, sizeof(Ctx));
1281 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
1282
1283 /*
1284 * Create test context.
1285 */
1286 pbBuf = bs3CpuInstrXBufSetup(pbBuf, &cbBuf, bMode);
1287 Bs3RegCtxSaveForMode(&Ctx, bMode, 1024);
1288 bs3CpuInstr4SetupSseAndAvx(&Ctx, pExtCtx);
1289
1290 /*
1291 * Run the tests in all rings since alignment issues may behave
1292 * differently in ring-3 compared to ring-0.
1293 */
1294 for (;;)
1295 {
1296 unsigned fPf = 0;
1297 do
1298 {
1299 unsigned iCfg;
1300 for (iCfg = 0; iCfg < cConfigs; iCfg++)
1301 {
1302 unsigned iTest;
1303 BS3CPUINSTRX_CONFIG_SAVED_T SavedCfg;
1304 if (!bs3CpuInstr4ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg], bMode))
1305 continue; /* unsupported config */
1306
1307 /*
1308 * Iterate the tests.
1309 */
1310 for (iTest = 0; iTest < cTests; iTest++)
1311 {
1312 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = &paTests[iTest];
1313 unsigned const cValues = pTest->cValues;
1314 bool const fSseInstr = pTest->enmType >= T_SSE && pTest->enmType < T_AVX_128;
1315 bool const fAvxInstr = pTest->enmType >= T_AVX_128;
1316 uint8_t const cbOperand = pTest->enmType < T_128BITS ? 64/8
1317 : pTest->enmType < T_256BITS ? 128/8 : 256/8;
1318 uint8_t const cbMemOp = bs3CpuInstrXMemOpSize(cbOperand, pTest->enmRm);
1319 uint8_t const cbAlign = cbMemOp;
1320 uint8_t BS3_FAR *puMemOp = bs3CpuInstrXBufForOperand(pbBuf, cbBuf, cbMemOp, cbAlign, &paConfigs[iCfg], fPf);
1321 uint8_t *puMemOpAlias = &g_pbBufAlias[(uintptr_t)puMemOp - (uintptr_t)pbBuf];
1322 uint8_t bXcptExpect = !g_afTypeSupports[pTest->enmType] ? X86_XCPT_UD
1323 : fSseInstr ? paConfigs[iCfg].bXcptSse
1324 : BS3_MODE_IS_RM_OR_V86(bMode) ? X86_XCPT_UD : paConfigs[iCfg].bXcptAvx;
1325 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10;
1326 unsigned cRecompRuns = 0;
1327 unsigned const cMaxRecompRuns = g_cBs3ThresholdNativeRecompiler + cValues;
1328 unsigned iVal;
1329
1330 /* If testing unaligned memory accesses (or #PF), skip register-only tests. This
1331 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */
1332 if ( (pTest->enmRm == RM_REG || pTest->enmRm == RM_MEM8)
1333 && (!paConfigs[iCfg].fAligned || paConfigs[iCfg].fAlignCheck || fPf))
1334 continue;
1335
1336 /* #AC is only raised in ring-3. */
1337 if (bXcptExpect == X86_XCPT_AC)
1338 {
1339 if (bRing != 3)
1340 bXcptExpect = X86_XCPT_DB;
1341 else if (fAvxInstr)
1342 bXcptExpect = pTest->bAvxMisalignXcpt; /* they generally don't raise #AC */
1343 }
1344
1345 if (fPf && bXcptExpect == X86_XCPT_DB)
1346 bXcptExpect = X86_XCPT_PF;
1347
1348 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, pTest->pfnWorker);
1349
1350 /*
1351 * Iterate the test values and do the actual testing.
1352 */
1353 while (cRecompRuns < cMaxRecompRuns)
1354 {
1355 for (iVal = 0; iVal < cValues; iVal++, idTestStep++, cRecompRuns++)
1356 {
1357 uint16_t cErrors;
1358 BS3CPUINSTR4_TEST1_CTX_T TestCtx;
1359 uint32_t const fExpectedMxCsrFlags = pTest->enmType >= T_128BITS
1360 ? pTest->paValues[iVal].f128ExpectedMxCsrFlags
1361 : pTest->paValues[iVal].f256ExpectedMxCsrFlags;
1362
1363 if (BS3_SKIPIT(bRing, iCfg, iTest, iVal, 0))
1364 continue;
1365
1366 /*
1367 * If the hardware does not support DAZ bit and we are testing DE exceptions,
1368 * then skip testing them. We still want to test values that set the MXCSR.DAZ
1369 * if we are not expecting DE exceptions to make sure DAZ bit in and of itself
1370 * is not influencing other cases.
1371 */
1372 if ( !g_fMxCsrDazSupported
1373 && pTest->paValues[iVal].fDenormalsAreZero == X86_MXCSR_DAZ
1374 && (fExpectedMxCsrFlags & X86_MXCSR_DE))
1375 continue;
1376
1377 /*
1378 * Setup the test instruction context and pass it to the worker.
1379 * A few of these can be figured out by the worker but initializing
1380 * it outside the inner most loop is more optimal.
1381 */
1382 TestCtx.pConfig = &paConfigs[iCfg];
1383 TestCtx.pTest = pTest;
1384 TestCtx.iVal = iVal;
1385 TestCtx.pszMode = pszMode;
1386 TestCtx.pTrapFrame = &TrapFrame;
1387 TestCtx.pCtx = &Ctx;
1388 TestCtx.pExtCtx = pExtCtx;
1389 TestCtx.pExtCtxOut = pExtCtxOut;
1390 TestCtx.puMemOp = (uint8_t *)puMemOp;
1391 TestCtx.puMemOpAlias = puMemOpAlias;
1392 TestCtx.cbMemOp = cbMemOp;
1393 TestCtx.cbOperand = cbOperand;
1394 TestCtx.bXcptExpect = bXcptExpect;
1395 TestCtx.fSseInstr = fSseInstr;
1396 TestCtx.fAvxInstr = fAvxInstr;
1397 TestCtx.idTestStep = idTestStep;
1398 cErrors = bs3CpuInstr4_WorkerTestType1_Inner(bMode, &TestCtx, &SavedCfg);
1399 if (cErrors != Bs3TestSubErrorCount())
1400 {
1401 if (paConfigs[iCfg].fAligned)
1402 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, %s %u-bit)",
1403 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1404 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), fSseInstr ? "SSE" : "AVX", cbOperand * 8);
1405 else
1406 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, puMemOp=%p, EFLAGS=%#RX32, %s %u-bit)",
1407 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1408 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), puMemOp,
1409 TrapFrame.Ctx.rflags.u32, fSseInstr ? "SSE" : "AVX", cbOperand * 8);
1410 Bs3TestPrintf("\n");
1411 }
1412 }
1413 }
1414 }
1415 bs3CpuInstrXConfigRestore(&SavedCfg, &Ctx, pExtCtx);
1416 }
1417 } while (fPf++ == 0 && BS3_MODE_IS_PAGED(bMode));
1418
1419 /*
1420 * Next ring.
1421 */
1422 bRing++;
1423 if (bRing > 3 || bMode == BS3_MODE_RM)
1424 break;
1425 Bs3RegCtxConvertToRingX(&Ctx, bRing);
1426 }
1427
1428 /*
1429 * Cleanup.
1430 */
1431 bs3CpuInstrXBufCleanup(pbBuf, cbBuf, bMode);
1432 bs3CpuInstrXFreeExtCtxs(pExtCtx, pExtCtxOut);
1433 return 0;
1434}
1435
1436
1437/*
1438 * [V]ADDPS.
1439 */
1440BS3_DECL_FAR(uint8_t) bs3CpuInstr4_v_addps(uint8_t bMode)
1441{
1442 static BS3CPUINSTR4_TEST1_VALUES_PS_T const s_aValues[] =
1443 {
1444 /*
1445 * Zero.
1446 */
1447 /* 0*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1448 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1449 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1450 /*mask */ X86_MXCSR_XCPT_MASK,
1451 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1452 /*flags */ 0, 0 },
1453 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1454 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1455 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1456 /*mask */ ~X86_MXCSR_XCPT_MASK,
1457 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1458 /*flags */ 0, 0 },
1459 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1460 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1461 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1462 /*mask */ ~X86_MXCSR_XCPT_MASK,
1463 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1464 /*flags */ 0, 0 },
1465 { { /*src2 */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1466 { /*src1 */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1467 { /* => */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1468 /*mask */ ~X86_MXCSR_XCPT_MASK,
1469 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
1470 /*flags */ 0, 0 },
1471 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1472 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1473 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1474 /*mask */ ~X86_MXCSR_XCPT_MASK,
1475 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1476 /*flags */ 0, 0 },
1477 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1478 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1479 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1480 /*mask */ X86_MXCSR_XCPT_MASK,
1481 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1482 /*flags */ 0, 0 },
1483 /*
1484 * Infinity.
1485 */
1486 /* 6*/{ { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1487 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1488 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1489 /*mask */ ~X86_MXCSR_IM,
1490 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1491 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1492 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1493 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1494 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1495 /*mask */ X86_MXCSR_XCPT_MASK,
1496 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1497 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1498 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1499 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1500 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1501 /*mask */ X86_MXCSR_XCPT_MASK,
1502 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1503 /*flags */ 0, X86_MXCSR_IE },
1504 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
1505 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1) } },
1506 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(0) } },
1507 /*mask */ ~X86_MXCSR_XCPT_MASK,
1508 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1509 /*flags */ 0, X86_MXCSR_IE },
1510 { { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(1), BS3_FP32_INF(1), BS3_FP32_INF(1), BS3_FP32_INF(0) } },
1511 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_INF(0), BS3_FP32_ZERO(1), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_INF(0), BS3_FP32_INF(1) } },
1512 { /* => */ { BS3_FP32_INF(1), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(1), BS3_FP32_QNAN(1), BS3_FP32_QNAN(1), BS3_FP32_INF(1) } },
1513 /*mask */ ~X86_MXCSR_XCPT_MASK,
1514 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1515 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1516 /*
1517 * Overflow, Precision.
1518 */
1519 /*11*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1520 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1521 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), } },
1522 /*mask */ ~X86_MXCSR_XCPT_MASK,
1523 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1524 /*flags */ 0, X86_MXCSR_OE },
1525 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1526 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0) } },
1527 { /* => */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1528 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1529 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1530 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1531 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1532 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1533 { /* => */ { BS3_FP32_INF(0), BS3_FP32_VAL(1, 0, 2), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_INF(0), BS3_FP32_VAL(1, 0, 2), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
1534 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1535 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1536 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1537 { { /*src2 */ { BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1) } },
1538 { /*src1 */ { BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1) } },
1539 { /* => */ { BS3_FP32_VAL(1, 0, 2), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_VAL(1, 0, 2) } },
1540 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1541 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1542 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1543 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1544 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1545 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1546 /*mask */ X86_MXCSR_XCPT_MASK,
1547 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1548 /*flags */ 0, X86_MXCSR_OE | X86_MXCSR_PE },
1549 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1550 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1551 { /* => */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1552 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1553 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1554 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1555 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1) } },
1556 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1) } },
1557 { /* => */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(1, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(1, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX + 1) } },
1558 /*mask */ ~X86_MXCSR_XCPT_MASK,
1559 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1560 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1561 /*
1562 * Normals.
1563 */
1564 /*18*/{ { /*src2 */ { BS3_FP32_VAL(0, 0x600000, 0x7f)/* 1.75*/, BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7d)/*0.25*/, BS3_FP32_VAL(0, 0x600000, 0x7f)/* 1.75*/, BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7d)/*0.25*/ } },
1565 { /*src1 */ { BS3_FP32_VAL(1, 0, 0x7d)/*-0.25*/, BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7e)/*0.50*/, BS3_FP32_VAL(1, 0, 0x7d)/*-0.25*/, BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7e)/*0.50*/ } },
1566 { /* => */ { BS3_FP32_VAL(0, 0x400000, 0x7f)/* 1.50*/, BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x400000, 0x7e)/*0.75*/, BS3_FP32_VAL(0, 0x400000, 0x7f)/* 1.50*/, BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x400000, 0x7e)/*0.75*/ } },
1567 /*mask */ X86_MXCSR_XCPT_MASK,
1568 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1569 /*flags */ 0, 0 },
1570 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_VAL_1(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_VAL_1(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1571 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_VAL_1(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_VAL_1(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1572 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1573 /*mask */ ~X86_MXCSR_XCPT_MASK,
1574 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1575 /*flags */ 0, 0 },
1576 { { /*src2 */ { BS3_FP32_VAL(0, 0x5ca5b8, 0x93)/*1807543*/, BS3_FP32_VAL(0, 0x5c0000, 0x84)/*55*/, BS3_FP32_VAL(0, 0x2514d6, 0x93)/*1352346.75*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x5c0000, 0x84)/*55*/, BS3_FP32_VAL(0, 0x2514d6, 0x93)/*1352346.75*/, BS3_FP32_VAL(0, 0x534000, 0x86)/*211.25*/, BS3_FP32_ZERO(0) } },
1577 { /*src1 */ { BS3_FP32_VAL(0, 0x1ea980, 0x8f)/* 81235*/, BS3_FP32_VAL(0, 0x600000, 0x81)/* 7*/, BS3_FP32_VAL(0, 0x7c9000, 0x88)/* 1010.25*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x600000, 0x81)/* 7*/, BS3_FP32_VAL(0, 0x7c9000, 0x88)/* 1010.25*/, BS3_FP32_ONE(1) /*- 1.00*/, BS3_FP32_ZERO(0) } },
1578 { /* => */ { BS3_FP32_VAL(0, 0x669050, 0x93)/*1888778*/, BS3_FP32_VAL(0, 0x780000, 0x84)/*62*/, BS3_FP32_VAL(0, 0x253468, 0x93)/*1353357.00*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x780000, 0x84)/*62*/, BS3_FP32_VAL(0, 0x253468, 0x93)/*1353357.00*/, BS3_FP32_VAL(0, 0x524000, 0x86)/*210.25*/, BS3_FP32_ZERO(0) } },
1579 /*mask */ X86_MXCSR_XCPT_MASK,
1580 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1581 /*flags */ 0, 0 },
1582 { { /*src2 */ { BS3_FP32_VAL(0, 0x74429f, 0x97)/*32015678*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x74429f, 0x97)/*32015678*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ONE(1), BS3_FP32_ZERO(0) } },
1583 { /*src1 */ { BS3_FP32_VAL(1, 0x712060, 0x92)/*- 987654*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ZERO(0), BS3_FP32_ONE(0), BS3_FP32_VAL(1, 0x712060, 0x92)/*- 987654*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ONE(0), BS3_FP32_ONE(0) } },
1584 { /* => */ { BS3_FP32_VAL(0, 0x6cb99c, 0x97)/*31028024*/, BS3_FP32_VAL(0, 0x3c614e, 0x97)/*24691356*/, BS3_FP32_ZERO(0), BS3_FP32_ONE(0), BS3_FP32_VAL(0, 0x6cb99c, 0x97)/*31028024*/, BS3_FP32_VAL(0, 0x3c614e, 0x97)/*24691356*/, BS3_FP32_ZERO(0), BS3_FP32_ONE(0) } },
1585 /*mask */ ~X86_MXCSR_XCPT_MASK,
1586 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1587 /*flags */ 0, 0 },
1588 { { /*src2 */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1589 { /*src1 */ { BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_ONE(1), BS3_FP32_ONE(0), BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1590 { /* => */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1591 /*mask */ X86_MXCSR_XCPT_MASK,
1592 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1593 /*flags */ 0, 0 },
1594 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ONE(1), } },
1595 { /*src1 */ { BS3_FP32_ONE(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), } },
1596 { /* => */ { BS3_FP32_VAL(0, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_VAL(1, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_VAL(0, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_VAL(1, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1) } },
1597 /*mask */ ~X86_MXCSR_XCPT_MASK,
1598 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1599 /*flags */ 0, 0 },
1600 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0) } },
1601 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0) } },
1602 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0, 2) , BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0, 2) } },
1603 /*mask */ ~X86_MXCSR_XCPT_MASK,
1604 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1605 /*flags */ 0, 0 },
1606 { { /*src2 */ { BS3_FP32_VAL(0, 0x600000, 0x7e)/* 0.875*/, BS3_FP32_VAL(0, 0x3ce348, 0x90)/*193421.125*/, BS3_FP32_VAL(0, 0x6423f2, 0x92)/*934463.125*/, BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0x16b43a, 0x93)/*1234567.25*/, BS3_FP32_VAL(0, 0x792318, 0x91)/*510232.75*/, BS3_FP32_VAL(0, 0x600000, 0x7e)/* 0.875*/ } },
1607 { /*src1 */ { BS3_FP32_VAL(0, 0x769b50, 0x92)/*1010101.000*/, BS3_FP32_VAL(0, 0x430ebc, 0x91)/*399477.875*/, BS3_FP32_VAL(1, 0x0a19f0, 0x8f)/*-70707.875*/, BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0x00c6d3, 0x94)/*2109876.75*/, BS3_FP32_VAL(1, 0x316740, 0x8e)/*-45415.25*/, BS3_FP32_VAL(0, 0x769b50, 0x92)/*1010101.000*/ } },
1608 { /* => */ { BS3_FP32_VAL(0, 0x769b5e, 0x92)/*1010101.875*/, BS3_FP32_VAL(0, 0x10c030, 0x92)/*592899.000*/, BS3_FP32_VAL(0, 0x52e0b4, 0x92)/*863755.250*/, BS3_FP32_VAL(1, 0, 2), BS3_FP32_VAL(0, 0, 2), BS3_FP32_VAL(0, 0x4c20f0, 0x94)/*3344444.00*/, BS3_FP32_VAL(0, 0x62f630, 0x91)/*464817.50*/, BS3_FP32_VAL(0, 0x769b5e, 0x92)/*1010101.875*/ } },
1609 /*mask */ X86_MXCSR_XCPT_MASK,
1610 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1611 /*flags */ 0, 0 },
1612 /*26*/{ { /*src2 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1613 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1) } },
1614 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1) } },
1615 /*mask */ ~X86_MXCSR_XCPT_MASK,
1616 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1617 /*flags */ X86_MXCSR_DE, X86_MXCSR_DE },
1618 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1619 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1620 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1621 /*mask */ X86_MXCSR_XCPT_MASK,
1622 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_NEAREST,
1623 /*flags */ 0, 0 },
1624 { { /*src2 */ { BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MAX(0) } },
1625 { /*src1 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0) } },
1626 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), } },
1627 /*mask */ X86_MXCSR_XCPT_MASK,
1628 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1629 /*flags */ 0, 0 },
1630 /*
1631 * Denormals.
1632 */
1633 /*29*/{ { /*src2 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1634 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1635 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1636 /*mask */ ~X86_MXCSR_XCPT_MASK,
1637 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1638 /*flags */ X86_MXCSR_DE, X86_MXCSR_DE },
1639 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1) } },
1640 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1641 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1642 /*mask */ X86_MXCSR_XCPT_MASK,
1643 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_NEAREST,
1644 /*flags */ 0, 0 },
1645 { { /*src2 */ { BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MAX(0) } },
1646 { /*src1 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0) } },
1647 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) , BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1648 /*mask */ X86_MXCSR_XCPT_MASK,
1649 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1650 /*flags */ 0, 0 },
1651 /** @todo More denormals; Underflow, Precision; Rounding, FZ etc. */
1652 };
1653
1654 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1655 {
1656 { bs3CpuInstr4_addps_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1657 { bs3CpuInstr4_addps_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1658
1659 { bs3CpuInstr4_vaddps_XMM1_XMM2_XMM3_icebp_c16, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1660 { bs3CpuInstr4_vaddps_XMM1_XMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1661
1662 { bs3CpuInstr4_vaddps_YMM1_YMM2_YMM3_icebp_c16, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1663 { bs3CpuInstr4_vaddps_YMM1_YMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1664 };
1665 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1666 {
1667 { bs3CpuInstr4_addps_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1668 { bs3CpuInstr4_addps_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1669
1670 { bs3CpuInstr4_vaddps_XMM1_XMM2_XMM3_icebp_c32, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1671 { bs3CpuInstr4_vaddps_XMM1_XMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1672
1673 { bs3CpuInstr4_vaddps_YMM1_YMM2_YMM3_icebp_c32, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1674 { bs3CpuInstr4_vaddps_YMM1_YMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1675 };
1676 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1677 {
1678 { bs3CpuInstr4_addps_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1679 { bs3CpuInstr4_addps_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1680
1681 { bs3CpuInstr4_vaddps_XMM1_XMM2_XMM3_icebp_c64, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1682 { bs3CpuInstr4_vaddps_XMM1_XMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1683
1684 { bs3CpuInstr4_vaddps_YMM1_YMM2_YMM3_icebp_c64, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1685 { bs3CpuInstr4_vaddps_YMM1_YMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1686
1687 { bs3CpuInstr4_addps_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1688 { bs3CpuInstr4_addps_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1689
1690 { bs3CpuInstr4_vaddps_YMM8_YMM9_YMM10_icebp_c64, 255, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1691 { bs3CpuInstr4_vaddps_YMM8_YMM9_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1692 };
1693
1694 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1695 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1696 return bs3CpuInstr4_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1697 g_aXcptConfig2, RT_ELEMENTS(g_aXcptConfig2));
1698}
1699
1700
1701/*
1702 * [V]ADDPD.
1703 */
1704BS3_DECL_FAR(uint8_t) bs3CpuInstr4_v_addpd(uint8_t bMode)
1705{
1706 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
1707 {
1708 /*
1709 * Zero.
1710 */
1711 /* 0*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1712 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1713 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1714 /*mask */ X86_MXCSR_XCPT_MASK,
1715 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1716 /*flags */ 0, 0 },
1717 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1718 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1719 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1720 /*mask */ ~X86_MXCSR_XCPT_MASK,
1721 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1722 /*flags */ 0, 0 },
1723 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1724 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1725 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1726 /*mask */ X86_MXCSR_XCPT_MASK,
1727 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_DOWN,
1728 /*flags */ 0, 0 },
1729 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1730 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1731 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1732 /*mask */ ~X86_MXCSR_XCPT_MASK,
1733 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1734 /*flags */ 0, 0 },
1735 { { /*src2 */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1736 { /*src1 */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1737 { /* => */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1738 /*mask */ X86_MXCSR_XCPT_MASK,
1739 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1740 /*flags */ 0, 0 },
1741 /*
1742 * Infinity.
1743 */
1744 /* 5*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1745 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1746 { /* => */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1747 /*mask */ ~X86_MXCSR_IM,
1748 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1749 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1750 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1751 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1752 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1753 /*mask */ ~X86_MXCSR_IM,
1754 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1755 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1756 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1757 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1758 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1759 /*mask */ ~X86_MXCSR_IM,
1760 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1761 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1762 { { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_INF(1) } },
1763 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
1764 { /* => */ { BS3_FP64_QNAN(1), BS3_FP64_QNAN(1), BS3_FP64_ZERO(0), BS3_FP64_QNAN(1) } },
1765 /*mask */ X86_MXCSR_XCPT_MASK,
1766 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
1767 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1768 { { /*src2 */ { BS3_FP64_VAL(0, 0, 0x3fd)/*0.25*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_INF(1) } },
1769 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x3fe)/*0.50*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
1770 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x3fe)/*0.75*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_QNAN(1) } },
1771 /*mask */ X86_MXCSR_XCPT_MASK,
1772 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1773 /*flags */ 0, X86_MXCSR_IE },
1774 /*
1775 * Overflow, Precision.
1776 */
1777 /*10*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
1778 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
1779 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1780 /*mask */ ~X86_MXCSR_XCPT_MASK,
1781 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1782 /*flags */ X86_MXCSR_OE, X86_MXCSR_OE },
1783 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1784 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1785 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1786 /*mask */ ~X86_MXCSR_XCPT_MASK,
1787 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1788 /*flags */ X86_MXCSR_OE, X86_MXCSR_OE },
1789 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0) } },
1790 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0) } },
1791 { /* => */ { BS3_FP64_INF(0), BS3_FP64_VAL(1, 0, 2), BS3_FP64_ZERO(0), BS3_FP64_INF(0), } },
1792 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1793 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1794 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1795 { { /*src2 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0) } },
1796 { /*src1 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0) } },
1797 { /* => */ { BS3_FP64_VAL(1, 0, 2), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1798 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1799 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1800 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1801 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1802 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1803 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1804 /*mask */ X86_MXCSR_XCPT_MASK,
1805 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1806 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1807 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
1808 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
1809 { /* => */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(1, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX + 1) } },
1810 /*mask */ ~X86_MXCSR_XCPT_MASK,
1811 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1812 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1813 /** @todo Why does the below on cause PE?! */
1814 { { /*src2 */ { BS3_FP64_VAL(0, 0xc000000000000, 0x3ff)/* 1.75*/, BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0, 0x3fd)/*0.25*/ } },
1815 { /*src1 */ { BS3_FP64_VAL(1, 0, 0x07d)/*-0.25*/, BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0, 0x3fe)/*0.50*/ } },
1816 { /* => */ { BS3_FP64_VAL(0, 0xbffffffffffff, 0x3ff)/* 1.50*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0x8000000000000, 0x3fe)/*0.75*/ } },
1817 /*mask */ X86_MXCSR_XCPT_MASK,
1818 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1819 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1820 /*
1821 * Normals.
1822 */
1823 /*17*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_VAL_1(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1824 { /*src1 */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_VAL_1(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1825 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1826 /*mask */ ~X86_MXCSR_XCPT_MASK,
1827 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1828 /*flags */ 0, 0 },
1829 { { /*src2 */ { BS3_FP64_VAL(0, 0, 0x409)/*1024*/, BS3_FP64_VAL(0, 0xb800000000000, 0x404)/*55*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1830 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x408)/* 512*/, BS3_FP64_VAL(0, 0xc000000000000, 0x401)/* 7*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1831 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x409)/*1536*/, BS3_FP64_VAL(0, 0xf000000000000, 0x404)/*62*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1832 /*mask */ X86_MXCSR_XCPT_MASK,
1833 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1834 /*flags */ 0, 0 },
1835 { { /*src2 */ { BS3_FP64_VAL(0, 0x26580b4800000, 0x41d)/* 1234567890*/, BS3_FP64_VAL(0, 0xd6f3458800000, 0x41c)/*987654321*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf0033a34f337, 0x432)/*4072598000007579.5*/ } },
1836 { /*src1 */ { BS3_FP64_VAL(1, 0x26580b4800000, 0x41d)/*-1234567890*/, BS3_FP64_VAL(1, 0x9000000000000, 0x405)/* -100*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6eca42000000, 0x419)/* 123450000.5*/ } },
1837 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6f3426800000, 0x41c)/*987654221*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf00348ec5858, 0x432)/*4072598123457580.0*/ } },
1838 /*mask */ ~X86_MXCSR_XCPT_MASK,
1839 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1840 /*flags */ 0, 0 },
1841 { { /*src2 */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1842 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1843 { /* => */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1844 /*mask */ X86_MXCSR_XCPT_MASK,
1845 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1846 /*flags */ 0, 0 },
1847 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1848 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1849 { /* => */ { BS3_FP64_VAL(0, 0, BS3_FP64_EXP_SAFE_INT_MAX + 1), BS3_FP64_VAL(1, 0, BS3_FP64_EXP_SAFE_INT_MAX + 1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1850 /*mask */ ~X86_MXCSR_XCPT_MASK,
1851 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1852 /*flags */ 0, 0 },
1853 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
1854 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
1855 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_VAL(0, 0, 2) } },
1856 /*mask */ ~X86_MXCSR_XCPT_MASK,
1857 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1858 /*flags */ 0, 0 },
1859 { { /*src2 */ { BS3_FP64_VAL(0, 0xc122186c3cfd0, 0x42d)/*123456789876543.25*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
1860 { /*src1 */ { BS3_FP64_VAL(0, 0xb88e0395d49b0, 0x42d)/*121098765432102.75*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
1861 { /* => */ { BS3_FP64_VAL(0, 0xbcd80e0108cc0, 0x42e)/*244555555308646.00*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(1, 0, 2) } },
1862 /*mask */ X86_MXCSR_XCPT_MASK,
1863 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1864 /*flags */ 0, 0 },
1865 /*
1866 * Denormals.
1867 */
1868 /*24*/{ { /*src2 */ { BS3_FP64_DENORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1869 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1870 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1871 /*mask */ ~X86_MXCSR_XCPT_MASK,
1872 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1873 /*flags */ X86_MXCSR_DE, X86_MXCSR_DE },
1874 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1875 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1876 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1877 /*mask */ X86_MXCSR_XCPT_MASK,
1878 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_NEAREST,
1879 /*flags */ 0, 0 },
1880 { { /*src2 */ { BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MAX(0) } },
1881 { /*src1 */ { BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MIN(0) } },
1882 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1883 /*mask */ X86_MXCSR_XCPT_MASK,
1884 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1885 /*flags */ 0, 0 },
1886 /** @todo More denormals; Underflow, Precision; Rounding, FZ etc. */
1887 };
1888
1889 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1890 {
1891 { bs3CpuInstr4_addpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1892 { bs3CpuInstr4_addpd_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1893
1894 { bs3CpuInstr4_vaddpd_XMM1_XMM2_XMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1895 { bs3CpuInstr4_vaddpd_XMM1_XMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1896
1897 { bs3CpuInstr4_vaddpd_YMM1_YMM2_YMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1898 { bs3CpuInstr4_vaddpd_YMM1_YMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1899 };
1900 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1901 {
1902 { bs3CpuInstr4_addpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1903 { bs3CpuInstr4_addpd_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1904
1905 { bs3CpuInstr4_vaddpd_XMM1_XMM2_XMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1906 { bs3CpuInstr4_vaddpd_XMM1_XMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1907
1908 { bs3CpuInstr4_vaddpd_YMM1_YMM2_YMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1909 { bs3CpuInstr4_vaddpd_YMM1_YMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1910 };
1911 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1912 {
1913 { bs3CpuInstr4_addpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1914 { bs3CpuInstr4_addpd_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1915
1916 { bs3CpuInstr4_vaddpd_XMM1_XMM2_XMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1917 { bs3CpuInstr4_vaddpd_XMM1_XMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1918
1919 { bs3CpuInstr4_vaddpd_YMM1_YMM2_YMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1920 { bs3CpuInstr4_vaddpd_YMM1_YMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1921
1922 { bs3CpuInstr4_addpd_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE2, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1923 { bs3CpuInstr4_addpd_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1924
1925 { bs3CpuInstr4_vaddpd_YMM8_YMM9_YMM10_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1926 { bs3CpuInstr4_vaddpd_YMM8_YMM9_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1927 };
1928
1929 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1930 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1931 return bs3CpuInstr4_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1932 g_aXcptConfig2, RT_ELEMENTS(g_aXcptConfig2));
1933}
1934
1935/*
1936 * [V]ADDSS.
1937 */
1938BS3_DECL_FAR(uint8_t) bs3CpuInstr4_v_addss(uint8_t bMode)
1939{
1940 static BS3CPUINSTR4_TEST1_VALUES_SS_T const s_aValues[] =
1941 {
1942 /*
1943 * Zero.
1944 */
1945 /* 0*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1946 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1947 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1948 /*mask */ X86_MXCSR_XCPT_MASK,
1949 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1950 /*flags */ 0, 0 },
1951 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_7(0), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
1952 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1953 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1954 /*mask */ ~X86_MXCSR_XCPT_MASK,
1955 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1956 /*flags */ 0, 0 },
1957 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_7(0), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
1958 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1959 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1960 /*mask */ ~X86_MXCSR_XCPT_MASK,
1961 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1962 /*flags */ 0, 0 },
1963 { { /*src2 */ { BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(1) } },
1964 { /*src1 */ { BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
1965 { /* => */ { BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
1966 /*mask */ ~X86_MXCSR_XCPT_MASK,
1967 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
1968 /*flags */ 0, 0 },
1969 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(1) } },
1970 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
1971 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
1972 /*mask */ ~X86_MXCSR_XCPT_MASK,
1973 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1974 /*flags */ 0, 0 },
1975 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
1976 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1977 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1978 /*mask */ X86_MXCSR_XCPT_MASK,
1979 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1980 /*flags */ 0, 0 },
1981 /*
1982 * Infinity.
1983 */
1984 /* 6*/{ { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1985 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1986 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1987 /*mask */ ~X86_MXCSR_IM,
1988 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1989 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1990 { { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_RAND_VAL_7(0), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
1991 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1992 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1993 /*mask */ ~X86_MXCSR_IM,
1994 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1995 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1996 { { /*src2 */ { BS3_FP32_INF(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
1997 { /*src1 */ { BS3_FP32_INF(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1998 { /* => */ { BS3_FP32_QNAN(1), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1999 /*mask */ X86_MXCSR_XCPT_MASK,
2000 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2001 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
2002 { { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(1) } },
2003 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
2004 { /* => */ { BS3_FP32_QNAN(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
2005 /*mask */ X86_MXCSR_XCPT_MASK,
2006 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2007 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
2008 { { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_3(1) } },
2009 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2010 { /* => */ { BS3_FP32_QNAN(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2011 /*mask */ ~X86_MXCSR_XCPT_MASK,
2012 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2013 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
2014 { { /*src2 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_6(1) } },
2015 { /*src1 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1) } },
2016 { /* => */ { BS3_FP32_QNAN(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1) } },
2017 /*mask */ ~X86_MXCSR_XCPT_MASK,
2018 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2019 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
2020 /*
2021 * Overflow, Precision.
2022 */
2023 /*12*/{ { /*src2 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_6(1) } },
2024 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1) } },
2025 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1) } },
2026 /*mask */ ~X86_MXCSR_XCPT_MASK,
2027 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2028 /*flags */ X86_MXCSR_OE, X86_MXCSR_OE },
2029 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
2030 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2031 { /* => */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2032 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2033 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2034 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2035 { { /*src2 */ { BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_3(1) } },
2036 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2037 { /* => */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2038 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2039 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2040 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
2041 { { /*src2 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
2042 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2043 { /* => */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2044 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2045 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2046 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2047 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(1) } },
2048 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2049 { /* => */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2050 /*mask */ ~X86_MXCSR_XCPT_MASK,
2051 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2052 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
2053 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
2054 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_5(1) } },
2055 { /* => */ { BS3_FP32_VAL(1, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_5(1) } },
2056 /*mask */ ~X86_MXCSR_XCPT_MASK,
2057 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2058 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
2059 /*
2060 * Normals.
2061 */
2062 /*18*/{ { /*src2 */ { BS3_FP32_VAL(0, 0x600000, 0x7f)/* 1.75*/, BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
2063 { /*src1 */ { BS3_FP32_VAL(1, 0, 0x7d)/*-0.25*/, BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_5(1) } },
2064 { /* => */ { BS3_FP32_VAL(0, 0x400000, 0x7f)/* 1.50*/, BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_5(1) } },
2065 /*mask */ X86_MXCSR_XCPT_MASK,
2066 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
2067 /*flags */ 0, 0 },
2068 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(1) } },
2069 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2070 { /* => */ { BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2071 /*mask */ X86_MXCSR_XCPT_MASK,
2072 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
2073 /*flags */ 0, 0 },
2074 { { /*src2 */ { BS3_FP32_VAL(0, 0x5ca5b8, 0x93)/*1807543*/, BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
2075 { /*src1 */ { BS3_FP32_VAL(0, 0x1ea980, 0x8f)/* 81235*/, BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2076 { /* => */ { BS3_FP32_VAL(0, 0x669050, 0x93)/*1888778*/, BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2077 /*mask */ X86_MXCSR_XCPT_MASK,
2078 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2079 /*flags */ 0, 0 },
2080 { { /*src2 */ { BS3_FP32_VAL(0, 0x2514d6, 0x93)/*1352346.75*/, BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
2081 { /*src1 */ { BS3_FP32_VAL(0, 0x7c9000, 0x88)/* 1010.25*/, BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2082 { /* => */ { BS3_FP32_VAL(0, 0x253468, 0x93)/*1353357.00*/, BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2083 /*mask */ X86_MXCSR_XCPT_MASK,
2084 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2085 /*flags */ 0, 0 },
2086 { { /*src2 */ { BS3_FP32_VAL(0, 0x74429f, 0x97)/*32015678*/, BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_3(1) } },
2087 { /*src1 */ { BS3_FP32_VAL(1, 0x712060, 0x92)/*- 987654*/, BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2088 { /* => */ { BS3_FP32_VAL(0, 0x6cb99c, 0x97)/*31028024*/, BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2089 /*mask */ ~X86_MXCSR_XCPT_MASK,
2090 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2091 /*flags */ 0, 0 },
2092 { { /*src2 */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_3(1) } },
2093 { /*src1 */ { BS3_FP32_ONE(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2094 { /* => */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2095 /*mask */ X86_MXCSR_XCPT_MASK,
2096 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2097 /*flags */ 0, 0 },
2098 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_3(1) } },
2099 { /*src1 */ { BS3_FP32_ONE(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2100 { /* => */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2101 /*mask */ X86_MXCSR_XCPT_MASK,
2102 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2103 /*flags */ 0, 0 },
2104 { { /*src2 */ { BS3_FP32_VAL(0, 0x600000, 0x7e)/* 0.875*/, BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_3(1) } },
2105 { /*src1 */ { BS3_FP32_VAL(0, 0x769b50, 0x92)/*1010101.000*/, BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2106 { /* => */ { BS3_FP32_VAL(0, 0x769b5e, 0x92)/*1010101.875*/, BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2107 /*mask */ X86_MXCSR_XCPT_MASK,
2108 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2109 /*flags */ 0, 0 },
2110 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(1) } },
2111 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_7(1) } },
2112 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_7(1) } },
2113 /*mask */ ~X86_MXCSR_XCPT_MASK,
2114 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2115 /*flags */ 0, 0 },
2116 /*
2117 * Denormals.
2118 */
2119 /*27*/{ { /*src2 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(0) } },
2120 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_0(0) } },
2121 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_0(0) } },
2122 /*mask */ ~X86_MXCSR_XCPT_MASK,
2123 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2124 /*flags */ X86_MXCSR_DE, X86_MXCSR_DE },
2125 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(1) } },
2126 { /*src1 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_7(1) } },
2127 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_7(1) } },
2128 /*mask */ X86_MXCSR_XCPT_MASK,
2129 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_NEAREST,
2130 /*flags */ 0, 0 },
2131 { { /*src2 */ { BS3_FP32_DENORMAL_MIN(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_7(0) } },
2132 { /*src1 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_6(1) } },
2133 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_6(1) } },
2134 /*mask */ X86_MXCSR_XCPT_MASK,
2135 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
2136 /*flags */ 0, 0 },
2137 /** @todo More denormals; Underflow, Precision; Rounding, FZ etc. */
2138 };
2139
2140 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
2141 {
2142 { bs3CpuInstr4_addss_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2143 { bs3CpuInstr4_addss_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2144
2145 { bs3CpuInstr4_vaddss_XMM1_XMM2_XMM3_icebp_c16, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2146 { bs3CpuInstr4_vaddss_XMM1_XMM2_FSxBX_icebp_c16, X86_XCPT_AC, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2147 };
2148 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
2149 {
2150 { bs3CpuInstr4_addss_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2151 { bs3CpuInstr4_addss_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2152
2153 { bs3CpuInstr4_vaddss_XMM1_XMM2_XMM3_icebp_c32, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2154 { bs3CpuInstr4_vaddss_XMM1_XMM2_FSxBX_icebp_c32, X86_XCPT_AC, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2155 };
2156 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
2157 {
2158 { bs3CpuInstr4_addss_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2159 { bs3CpuInstr4_addss_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2160
2161 { bs3CpuInstr4_vaddss_XMM1_XMM2_XMM3_icebp_c64, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2162 { bs3CpuInstr4_vaddss_XMM1_XMM2_FSxBX_icebp_c64, X86_XCPT_AC, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2163
2164 { bs3CpuInstr4_addss_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2165 { bs3CpuInstr4_addss_XMM8_FSxBX_icebp_c64, X86_XCPT_AC, RM_MEM, T_SSE, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2166 };
2167
2168 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
2169 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
2170 return bs3CpuInstr4_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
2171 g_aXcptConfig3, RT_ELEMENTS(g_aXcptConfig3));
2172}
2173
2174
2175/*
2176 * [V]SUBPS.
2177 */
2178BS3_DECL_FAR(uint8_t) bs3CpuInstr4_v_subps(uint8_t bMode)
2179{
2180 static BS3CPUINSTR4_TEST1_VALUES_PS_T const s_aValues[] =
2181 {
2182 /*
2183 * Zero.
2184 */
2185 /* 0*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2186 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2187 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2188 /*mask */ X86_MXCSR_XCPT_MASK,
2189 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2190 /*flags */ 0, 0 },
2191 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2192 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2193 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2194 /*mask */ ~X86_MXCSR_XCPT_MASK,
2195 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2196 /*flags */ 0, 0 },
2197 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2198 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2199 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2200 /*mask */ ~X86_MXCSR_XCPT_MASK,
2201 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
2202 /*flags */ 0, 0 },
2203 { { /*src2 */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
2204 { /*src1 */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
2205 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2206 /*mask */ ~X86_MXCSR_XCPT_MASK,
2207 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
2208 /*flags */ 0, 0 },
2209 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
2210 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
2211 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2212 /*mask */ ~X86_MXCSR_XCPT_MASK,
2213 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2214 /*flags */ 0, 0 },
2215 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
2216 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
2217 { /* => */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
2218 /*mask */ X86_MXCSR_XCPT_MASK,
2219 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
2220 /*flags */ 0, 0 },
2221 /*
2222 * Infinity.
2223 */
2224 /* 6*/{ { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1) } },
2225 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
2226 { /* => */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
2227 /*mask */ ~X86_MXCSR_IM,
2228 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2229 /*flags */ 0, 0 },
2230 { { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1) } },
2231 { /*src1 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1) } },
2232 { /* => */ { BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1) } },
2233 /*mask */ X86_MXCSR_XCPT_MASK,
2234 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2235 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
2236 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1) } },
2237 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1) } },
2238 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1) } },
2239 /*mask */ X86_MXCSR_XCPT_MASK,
2240 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2241 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
2242 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
2243 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
2244 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2245 /*mask */ X86_MXCSR_XCPT_MASK,
2246 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2247 /*flags */ 0, X86_MXCSR_IE },
2248 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
2249 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
2250 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1) } },
2251 /*mask */ ~X86_MXCSR_XCPT_MASK,
2252 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2253 /*flags */ 0, X86_MXCSR_IE },
2254 { { /*src2 */ { BS3_FP32_INF(1), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_INF(1), BS3_FP32_INF(0) } },
2255 { /*src1 */ { BS3_FP32_INF(0), BS3_FP32_INF(0), BS3_FP32_ZERO(1), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_INF(0), BS3_FP32_INF(1) } },
2256 { /* => */ { BS3_FP32_INF(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_QNAN(1), BS3_FP32_INF(1) } },
2257 /*mask */ ~X86_MXCSR_XCPT_MASK,
2258 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2259 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
2260 /*
2261 * Overflow, Precision.
2262 */
2263 /*12*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MIN(0) } },
2264 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0) } },
2265 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2266 /*mask */ ~X86_MXCSR_XCPT_MASK,
2267 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2268 /*flags */ 0, X86_MXCSR_PE },
2269 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
2270 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
2271 { /* => */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2272 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2273 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2274 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2275 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
2276 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1) } },
2277 { /* => */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1) } },
2278 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2279 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2280 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2281 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(1, 0, 2), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MAX(0) } },
2282 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MAX(0) } },
2283 { /* => */ { BS3_FP32_INF(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MIN(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2284 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2285 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2286 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2287 { { /*src2 */ { BS3_FP32_VAL(1, 0, 2), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_VAL(1, 0, 2) } },
2288 { /*src1 */ { BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MIN(1) } },
2289 { /* => */ { BS3_FP32_NORMAL_MIN(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MIN(0) } },
2290 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2291 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2292 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2293 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MIN(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MIN(0) } },
2294 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MIN(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MIN(0) } },
2295 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2296 /*mask */ X86_MXCSR_XCPT_MASK,
2297 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2298 /*flags */ 0, X86_MXCSR_OE | X86_MXCSR_PE },
2299 { { /*src2 */ { BS3_FP32_NORMAL_MIN(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MIN(0), BS3_FP32_NORMAL_MIN(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MIN(0) } },
2300 { /*src1 */ { BS3_FP32_NORMAL_MIN(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MIN(0), BS3_FP32_NORMAL_MIN(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MIN(0) } },
2301 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2302 /*mask */ ~(X86_MXCSR_OM | X86_MXCSR_PM),
2303 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2304 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
2305 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1) } },
2306 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1) } },
2307 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2308 /*mask */ ~X86_MXCSR_XCPT_MASK,
2309 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2310 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2311 /*
2312 * Normals.
2313 */
2314 /*20*/{ { /*src2 */ { BS3_FP32_VAL(0, 0, 0x7d)/*0.25*/, BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(1, 0, 0x7e)/*-0.50*/, BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_VAL(0, 0x400000, 0x7e)/* 0.75*/ } },
2315 { /*src1 */ { BS3_FP32_VAL(0, 0x600000, 0x7f)/*1.75*/, BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(1, 0x400000, 0x7e)/*-0.75*/, BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_VAL(0, 0, 0x7e)/* 0.50*/ } },
2316 { /* => */ { BS3_FP32_VAL(0, 0x400000, 0x7f)/*1.50*/, BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_VAL(1, 0, 0x7d)/*-0.25*/, BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_VAL(1, 0, 0x7d)/*-0.25*/ } },
2317 /*mask */ ~X86_MXCSR_XCPT_MASK,
2318 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
2319 /*flags */ 0, 0 },
2320 { { /*src2 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_VAL_1(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_VAL_1(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2321 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_VAL_1(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_VAL_1(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2322 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2323 /*mask */ ~X86_MXCSR_XCPT_MASK,
2324 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2325 /*flags */ 0, 0 },
2326 { { /*src2 */ { BS3_FP32_VAL(0, 0x5ca5b8, 0x93)/*1807543*/, BS3_FP32_VAL(0, 0x600000, 0x81)/* 7*/, BS3_FP32_VAL(0, 0x7c9000, 0x88)/* 1010.25*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x5c0000, 0x84)/* 55*/, BS3_FP32_VAL(0, 0x2514d6, 0x93)/* 1352346.75*/, BS3_FP32_VAL(0, 0x534000, 0x86)/*211.25*/, BS3_FP32_ZERO(0) } },
2327 { /*src1 */ { BS3_FP32_VAL(0, 0x669050, 0x93)/*1888778*/, BS3_FP32_VAL(0, 0x780000, 0x84)/*62*/, BS3_FP32_VAL(0, 0x253468, 0x93)/*1353357.00*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(1, 0x600000, 0x81)/* -7*/, BS3_FP32_VAL(1, 0x7c9000, 0x88)/* -1010.25*/, BS3_FP32_ONE(0) /* 1.00*/, BS3_FP32_ZERO(0) } },
2328 { /* => */ { BS3_FP32_VAL(0, 0x1ea980, 0x8f)/* 81235*/, BS3_FP32_VAL(0, 0x5c0000, 0x84)/*55*/, BS3_FP32_VAL(0, 0x2514d6, 0x93)/*1352346.75*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(1, 0x780000, 0x84)/*-62*/, BS3_FP32_VAL(1, 0x253468, 0x93)/*-1353357.00*/, BS3_FP32_VAL(1, 0x524000, 0x86)/*210.25*/, BS3_FP32_ZERO(0) } },
2329 /*mask */ X86_MXCSR_XCPT_MASK,
2330 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2331 /*flags */ 0, 0 },
2332 { { /*src2 */ { BS3_FP32_VAL(0, 0x6cb99c, 0x97)/*31028024*/, BS3_FP32_VAL(1, 0x3c614e, 0x96)/*-12345678*/, BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x712060, 0x92)/* 987654*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2333 { /*src1 */ { BS3_FP32_VAL(0, 0x74429f, 0x97)/*32015678*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/* 12345678*/, BS3_FP32_ZERO(0), BS3_FP32_ONE(0), BS3_FP32_VAL(0, 0x74429f, 0x97)/*32015678*/, BS3_FP32_VAL(0, 0x3c614e, 0x97)/*24691356*/, BS3_FP32_ONE(0), BS3_FP32_ONE(0) } },
2334 { /* => */ { BS3_FP32_VAL(0, 0x712060, 0x92)/* 987654*/, BS3_FP32_VAL(0, 0x3c614e, 0x97)/* 24691356*/, BS3_FP32_ZERO(0), BS3_FP32_ONE(0), BS3_FP32_VAL(0, 0x6cb99c, 0x97)/*31028024*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ONE(0), BS3_FP32_ONE(0) } },
2335 /*mask */ ~X86_MXCSR_XCPT_MASK,
2336 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2337 /*flags */ 0, 0 },
2338 { { /*src2 */ { BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_ONE(1), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
2339 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ONE(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
2340 { /* => */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_VAL(1, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2341 /*mask */ X86_MXCSR_XCPT_MASK,
2342 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2343 /*flags */ 0, 0 },
2344 { { /*src2 */ { BS3_FP32_ONE(0), BS3_FP32_ONE(0), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_ONE(1), BS3_FP32_NORMAL_SAFE_INT_MAX(0) } },
2345 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_VAL(0, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1) } },
2346 { /* => */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_VAL(1, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_ZERO(0), BS3_FP32_ONE(0), BS3_FP32_ZERO(0), BS3_FP32_ONE(0), BS3_FP32_VAL(0, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_ONE(0) } },
2347 /*mask */ ~X86_MXCSR_XCPT_MASK,
2348 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2349 /*flags */ 0, 0 },
2350 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(1) } },
2351 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 2), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1) } },
2352 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_ZERO(0) } },
2353 /*mask */ ~X86_MXCSR_XCPT_MASK,
2354 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2355 /*flags */ 0, 0 },
2356 { { /*src2 */ { BS3_FP32_VAL(0, 0x600000, 0x7e)/* 0.875*/, BS3_FP32_VAL(0, 0x3ce348, 0x90)/*193421.125*/, BS3_FP32_VAL(1, 0x0a19f0, 0x8f)/*-70707.875*/, BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0x00c6d3, 0x94)/*2109876.75*/, BS3_FP32_VAL(0, 0x316740, 0x8e)/* 45415.25*/, BS3_FP32_VAL(0, 0x600000, 0x7e)/* 0.875*/ } },
2357 { /*src1 */ { BS3_FP32_VAL(0, 0x769b5e, 0x92)/*1010101.875*/, BS3_FP32_VAL(0, 0x10c030, 0x92)/*592899.000*/, BS3_FP32_VAL(0, 0x52e0b4, 0x92)/*863755.250*/, BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_VAL(0, 0x4c20f0, 0x94)/*3344444.00*/, BS3_FP32_VAL(0, 0x792318, 0x91)/*510232.75*/, BS3_FP32_VAL(1, 0x769b50, 0x92)/*-1010101.000*/ } },
2358 { /* => */ { BS3_FP32_VAL(0, 0x769b50, 0x92)/*1010101.000*/, BS3_FP32_VAL(0, 0x430ebc, 0x91)/*399477.875*/, BS3_FP32_VAL(0, 0x6423f2, 0x92)/*934463.125*/, BS3_FP32_VAL(0, 0, 2), BS3_FP32_VAL(1, 0, 2), BS3_FP32_VAL(0, 0x16b43a, 0x93)/*1234567.25*/, BS3_FP32_VAL(0, 0x62f630, 0x91)/*464817.50*/, BS3_FP32_VAL(1, 0x769b5e, 0x92)/*-1010101.875*/ } },
2359 /*mask */ X86_MXCSR_XCPT_MASK,
2360 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
2361 /*flags */ 0, 0 },
2362 /*28*/{ { /*src2 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2363 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1) } },
2364 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1) } },
2365 /*mask */ ~X86_MXCSR_XCPT_MASK,
2366 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2367 /*flags */ X86_MXCSR_DE, X86_MXCSR_DE },
2368 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2369 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2370 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2371 /*mask */ X86_MXCSR_XCPT_MASK,
2372 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_NEAREST,
2373 /*flags */ 0, 0 },
2374 { { /*src2 */ { BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MAX(0) } },
2375 { /*src1 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0) } },
2376 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), } },
2377 /*mask */ X86_MXCSR_XCPT_MASK,
2378 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
2379 /*flags */ 0, 0 },
2380 /*
2381 * Denormals.
2382 */
2383 /*31*/{ { /*src2 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2384 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2385 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2386 /*mask */ ~X86_MXCSR_XCPT_MASK,
2387 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2388 /*flags */ X86_MXCSR_DE, X86_MXCSR_DE },
2389 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1) } },
2390 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2391 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2392 /*mask */ X86_MXCSR_XCPT_MASK,
2393 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_NEAREST,
2394 /*flags */ 0, 0 },
2395 { { /*src2 */ { BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(1), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MAX(1), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(1), BS3_FP32_DENORMAL_MAX(0) } },
2396 { /*src1 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(1), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(1), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(1), BS3_FP32_DENORMAL_MIN(0) } },
2397 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) , BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
2398 /*mask */ X86_MXCSR_XCPT_MASK,
2399 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
2400 /*flags */ 0, 0 },
2401 /** @todo More denormals; Underflow, Precision; Rounding, FZ etc. */
2402 };
2403
2404 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
2405 {
2406 { bs3CpuInstr4_subps_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2407 { bs3CpuInstr4_subps_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2408
2409 { bs3CpuInstr4_vsubps_XMM1_XMM2_XMM3_icebp_c16, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2410 { bs3CpuInstr4_vsubps_XMM1_XMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2411
2412 { bs3CpuInstr4_vsubps_YMM1_YMM2_YMM3_icebp_c16, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2413 { bs3CpuInstr4_vsubps_YMM1_YMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2414 };
2415 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
2416 {
2417 { bs3CpuInstr4_subps_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2418 { bs3CpuInstr4_subps_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2419
2420 { bs3CpuInstr4_vsubps_XMM1_XMM2_XMM3_icebp_c32, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2421 { bs3CpuInstr4_vsubps_XMM1_XMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2422
2423 { bs3CpuInstr4_vsubps_YMM1_YMM2_YMM3_icebp_c32, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2424 { bs3CpuInstr4_vsubps_YMM1_YMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2425 };
2426 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
2427 {
2428 { bs3CpuInstr4_subps_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2429 { bs3CpuInstr4_subps_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2430
2431 { bs3CpuInstr4_vsubps_XMM1_XMM2_XMM3_icebp_c64, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2432 { bs3CpuInstr4_vsubps_XMM1_XMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2433
2434 { bs3CpuInstr4_vsubps_YMM1_YMM2_YMM3_icebp_c64, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2435 { bs3CpuInstr4_vsubps_YMM1_YMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2436
2437 { bs3CpuInstr4_subps_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2438 { bs3CpuInstr4_subps_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2439
2440 { bs3CpuInstr4_vsubps_YMM8_YMM9_YMM10_icebp_c64, 255, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2441 { bs3CpuInstr4_vsubps_YMM8_YMM9_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2442 };
2443
2444 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
2445 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
2446 return bs3CpuInstr4_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
2447 g_aXcptConfig2, RT_ELEMENTS(g_aXcptConfig2));
2448}
2449
2450
2451/*
2452 * [V]SUBPD.
2453 */
2454BS3_DECL_FAR(uint8_t) bs3CpuInstr4_v_subpd(uint8_t bMode)
2455{
2456 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
2457 {
2458 /*
2459 * Zero.
2460 */
2461 /* 0*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2462 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2463 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2464 /*mask */ X86_MXCSR_XCPT_MASK,
2465 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2466 /*flags */ 0, 0 },
2467 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2468 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2469 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2470 /*mask */ ~X86_MXCSR_XCPT_MASK,
2471 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2472 /*flags */ 0, 0 },
2473 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2474 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2475 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2476 /*mask */ ~X86_MXCSR_XCPT_MASK,
2477 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
2478 /*flags */ 0, 0 },
2479 { { /*src2 */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2480 { /*src1 */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2481 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2482 /*mask */ ~X86_MXCSR_XCPT_MASK,
2483 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
2484 /*flags */ 0, 0 },
2485 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
2486 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
2487 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2488 /*mask */ ~X86_MXCSR_XCPT_MASK,
2489 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2490 /*flags */ 0, 0 },
2491 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2492 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2493 { /* => */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
2494 /*mask */ X86_MXCSR_XCPT_MASK,
2495 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
2496 /*flags */ 0, 0 },
2497 /*
2498 * Infinity.
2499 */
2500 /* 6*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_INF(1) } },
2501 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
2502 { /* => */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
2503 /*mask */ ~X86_MXCSR_IM,
2504 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2505 /*flags */ 0, 0 },
2506 { { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_INF(1), BS3_FP64_INF(1) } },
2507 { /*src1 */ { BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_INF(0), BS3_FP64_INF(1) } },
2508 { /* => */ { BS3_FP64_QNAN(1), BS3_FP64_QNAN(1), BS3_FP64_INF(0), BS3_FP64_QNAN(1) } },
2509 /*mask */ X86_MXCSR_XCPT_MASK,
2510 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2511 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
2512 { { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_INF(1) } },
2513 { /*src1 */ { BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_INF(0), BS3_FP64_INF(1) } },
2514 { /* => */ { BS3_FP64_QNAN(1), BS3_FP64_INF(1), BS3_FP64_INF(0), BS3_FP64_QNAN(1) } },
2515 /*mask */ X86_MXCSR_XCPT_MASK,
2516 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2517 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
2518 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_INF(1) } },
2519 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_INF(1) } },
2520 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_QNAN(1) } },
2521 /*mask */ X86_MXCSR_XCPT_MASK,
2522 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2523 /*flags */ 0, X86_MXCSR_IE },
2524 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
2525 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
2526 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_QNAN(1) } },
2527 /*mask */ ~X86_MXCSR_XCPT_MASK,
2528 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2529 /*flags */ 0, X86_MXCSR_IE },
2530 { { /*src2 */ { BS3_FP64_INF(1), BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_INF(0) } },
2531 { /*src1 */ { BS3_FP64_INF(0), BS3_FP64_INF(0), BS3_FP64_INF(0), BS3_FP64_INF(1) } },
2532 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
2533 /*mask */ ~X86_MXCSR_XCPT_MASK,
2534 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2535 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
2536 /*
2537 * Overflow, Precision.
2538 */
2539 /*12*/{ { /*src2 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MIN(0) } },
2540 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0) } },
2541 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2542 /*mask */ ~X86_MXCSR_XCPT_MASK,
2543 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2544 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
2545 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MIN(0) } },
2546 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0) } },
2547 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2548 /*mask */ ~X86_MXCSR_XCPT_MASK,
2549 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2550 /*flags */ 0, X86_MXCSR_PE },
2551 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1) } },
2552 { /*src1 */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1) } },
2553 { /* => */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0) } },
2554 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2555 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2556 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2557 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0) } },
2558 { /*src1 */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
2559 { /* => */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_INF(1) } },
2560 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2561 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2562 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2563 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_VAL(1, 0, 2), BS3_FP64_NORMAL_MIN(1) } },
2564 { /*src1 */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MIN(1) } },
2565 { /* => */ { BS3_FP64_INF(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(0), BS3_FP64_ZERO(0), } },
2566 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2567 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2568 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2569 { { /*src2 */ { BS3_FP64_VAL(1, 0, 2), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_VAL(1, 0, 2) } },
2570 { /*src1 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MIN(1) } },
2571 { /* => */ { BS3_FP64_NORMAL_MIN(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MIN(0) } },
2572 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2573 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2574 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2575 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MIN(0), BS3_FP64_NORMAL_MAX(0) } },
2576 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MIN(0), BS3_FP64_NORMAL_MAX(1) } },
2577 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
2578 /*mask */ X86_MXCSR_XCPT_MASK,
2579 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2580 /*flags */ 0, X86_MXCSR_OE | X86_MXCSR_PE },
2581 { { /*src2 */ { BS3_FP64_NORMAL_MIN(0), BS3_FP64_NORMAL_MIN(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MIN(0) } },
2582 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
2583 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2584 /*mask */ ~(X86_MXCSR_OM | X86_MXCSR_PM),
2585 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2586 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
2587 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
2588 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_SAFE_INT_MAX(1), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
2589 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2590 /*mask */ ~X86_MXCSR_XCPT_MASK,
2591 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2592 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2593 /*
2594 * Normals.
2595 */
2596 /*21*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_VAL_1(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_VAL_1(0) } },
2597 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_VAL_1(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_VAL_1(0) } },
2598 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), } },
2599 /*mask */ ~X86_MXCSR_XCPT_MASK,
2600 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2601 /*flags */ 0, 0 },
2602 { { /*src2 */ { BS3_FP64_VAL(0, 0, 0x409)/*1024*/, BS3_FP64_VAL(0, 0xb800000000000, 0x404)/*55*/, BS3_FP64_VAL(1, 0xc000000000000, 0x401)/* 7*/, BS3_FP64_VAL(0, 0x8000000000000, 0x409)/*1536*/ } },
2603 { /*src1 */ { BS3_FP64_VAL(0, 0x8000000000000, 0x409)/*1536*/, BS3_FP64_VAL(1, 0xc000000000000, 0x401)/* 7*/, BS3_FP64_VAL(0, 0xb800000000000, 0x404)/*55*/, BS3_FP64_VAL(0, 0, 0x409)/*1024*/ } },
2604 { /* => */ { BS3_FP64_VAL(0, 0, 0x408)/* 512*/, BS3_FP64_VAL(1, 0xf000000000000, 0x404)/*62*/, BS3_FP64_VAL(0, 0xf000000000000, 0x404)/*62*/, BS3_FP64_VAL(1, 0, 0x408)/* 512*/ } },
2605 /*mask */ X86_MXCSR_XCPT_MASK,
2606 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2607 /*flags */ 0, 0 },
2608 { { /*src2 */ { BS3_FP64_VAL(0, 0x26580b4800000, 0x41d)/*1234567890*/, BS3_FP64_VAL(0, 0x9000000000000, 0x405)/* 100*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6eca42000000, 0x419)/* 123450000.5*/ } },
2609 { /*src1 */ { BS3_FP64_VAL(0, 0x26580b4800000, 0x41d)/*1234567890*/, BS3_FP64_VAL(0, 0xd6f3458800000, 0x41c)/*987654321*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf00348ec5858, 0x432)/*4072598123457580.0*/ } },
2610 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6f3426800000, 0x41c)/*987654221*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf0033a34f337, 0x432)/*4072598000007579.5*/ } },
2611 /*mask */ ~X86_MXCSR_XCPT_MASK,
2612 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2613 /*flags */ 0, 0 },
2614 { { /*src2 */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2615 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ONE(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2616 { /* => */ { BS3_FP64_ONE(0), BS3_FP64_VAL(1, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
2617 /*mask */ X86_MXCSR_XCPT_MASK,
2618 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2619 /*flags */ 0, 0 },
2620 { { /*src2 */ { BS3_FP64_ONE(0), BS3_FP64_ONE(1), BS3_FP64_ONE(1), BS3_FP64_NORMAL_SAFE_INT_MAX(0), } },
2621 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_ONE(0), } },
2622 { /* => */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_VAL(1, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX) } },
2623 /*mask */ X86_MXCSR_XCPT_MASK,
2624 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2625 /*flags */ 0, 0 },
2626 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
2627 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(1), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
2628 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_VAL(0, 0, 2) } },
2629 /*mask */ ~X86_MXCSR_XCPT_MASK,
2630 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2631 /*flags */ 0, 0 },
2632 { { /*src2 */ { BS3_FP64_VAL(0, 0xc122186c3cfd0, 0x42d)/*123456789876543.25*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
2633 { /*src1 */ { BS3_FP64_VAL(0, 0xbcd80e0108cc0, 0x42e)/*244555555308646.00*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
2634 { /* => */ { BS3_FP64_VAL(0, 0xb88e0395d49b0, 0x42d)/*121098765432102.75*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_VAL(1, 0, 2) } },
2635 /*mask */ X86_MXCSR_XCPT_MASK,
2636 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
2637 /*flags */ 0, 0 },
2638 /** @todo Denormals; Underflow, Precision; Rounding,
2639 * FZ etc. */
2640 };
2641
2642 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
2643 {
2644 { bs3CpuInstr4_subpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2645 { bs3CpuInstr4_subpd_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2646
2647 { bs3CpuInstr4_vsubpd_XMM1_XMM2_XMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2648 { bs3CpuInstr4_vsubpd_XMM1_XMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2649
2650 { bs3CpuInstr4_vsubpd_YMM1_YMM2_YMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2651 { bs3CpuInstr4_vsubpd_YMM1_YMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2652 };
2653 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
2654 {
2655 { bs3CpuInstr4_subpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2656 { bs3CpuInstr4_subpd_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2657
2658 { bs3CpuInstr4_vsubpd_XMM1_XMM2_XMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2659 { bs3CpuInstr4_vsubpd_XMM1_XMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2660
2661 { bs3CpuInstr4_vsubpd_YMM1_YMM2_YMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2662 { bs3CpuInstr4_vsubpd_YMM1_YMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2663 };
2664 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
2665 {
2666 { bs3CpuInstr4_subpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2667 { bs3CpuInstr4_subpd_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2668
2669 { bs3CpuInstr4_vsubpd_XMM1_XMM2_XMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2670 { bs3CpuInstr4_vsubpd_XMM1_XMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2671
2672 { bs3CpuInstr4_vsubpd_YMM1_YMM2_YMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2673 { bs3CpuInstr4_vsubpd_YMM1_YMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2674
2675 { bs3CpuInstr4_subpd_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE2, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2676 { bs3CpuInstr4_subpd_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2677
2678 { bs3CpuInstr4_vsubpd_YMM8_YMM9_YMM10_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2679 { bs3CpuInstr4_vsubpd_YMM8_YMM9_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2680 };
2681
2682 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
2683 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
2684 return bs3CpuInstr4_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
2685 g_aXcptConfig2, RT_ELEMENTS(g_aXcptConfig2));
2686}
2687
2688
2689/**
2690 * The 32-bit protected mode main function.
2691 *
2692 * The tests a driven by 32-bit test drivers, even for real-mode tests (though
2693 * we'll switch between PE32 and RM for each test step we perform). Given that
2694 * we test SSE and AVX here, we don't need to worry about 286 or 8086.
2695 *
2696 * Some extra steps needs to be taken to properly handle extended state in LM64
2697 * (Bs3ExtCtxRestoreEx & Bs3ExtCtxSaveEx) and when testing real mode
2698 * (Bs3RegCtxSaveForMode & Bs3TrapSetJmpAndRestoreWithExtCtxAndRm).
2699 */
2700BS3_DECL(void) Main_pe32()
2701{
2702 static const BS3TESTMODEBYONEENTRY g_aTests[] =
2703 {
2704#if 1 /*ndef DEBUG_bird*/
2705# define ALL_TESTS
2706#endif
2707#if defined(ALL_TESTS)
2708 { "[v]addps", bs3CpuInstr4_v_addps, 0 },
2709 { "[v]addpd", bs3CpuInstr4_v_addpd, 0 },
2710 { "[v]addss", bs3CpuInstr4_v_addss, 0 },
2711 { "[v]subps", bs3CpuInstr4_v_subps, 0 },
2712 { "[v]subpd", bs3CpuInstr4_v_subpd, 0 },
2713#endif
2714 };
2715 Bs3TestInit("bs3-cpu-instr-4");
2716
2717 /*
2718 * Initialize globals.
2719 */
2720 if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
2721 {
2722 uint32_t fEbx, fEcx, fEdx;
2723 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
2724 g_afTypeSupports[T_MMX] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_MMX);
2725 g_afTypeSupports[T_MMX_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
2726 g_afTypeSupports[T_MMX_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
2727 g_afTypeSupports[T_MMX_SSSE3] = RT_BOOL(fEdx & X86_CPUID_FEATURE_ECX_SSSE3);
2728 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
2729 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
2730 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3);
2731 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3);
2732 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1);
2733 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2);
2734 g_afTypeSupports[T_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL);
2735 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
2736 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
2737 g_afTypeSupports[T_AVX_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL)
2738 && RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
2739
2740 if (ASMCpuId_EAX(0) >= 7)
2741 {
2742 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &fEbx, NULL, NULL);
2743 g_afTypeSupports[T_AVX2_128] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
2744 g_afTypeSupports[T_AVX2_256] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
2745 g_afTypeSupports[T_SHA] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_SHA);
2746 }
2747
2748 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
2749 {
2750 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
2751 g_afTypeSupports[T_AXMMX] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
2752 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A);
2753 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);
2754 }
2755 g_afTypeSupports[T_AXMMX_OR_SSE] = g_afTypeSupports[T_AXMMX] || g_afTypeSupports[T_SSE];
2756
2757 /*
2758 * Figure out FPU save/restore method and support for DAZ bit.
2759 */
2760 {
2761 /** @todo Add bs3kit API to just get the ext ctx method without needing to
2762 * alloc/free a context. Replicating the logic in the bs3kit here, though
2763 * doable, runs a risk of not updating this when the other logic is
2764 * changed. */
2765 uint64_t fFlags;
2766 uint16_t const cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
2767 PBS3EXTCTX pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cbExtCtx);
2768 if (pExtCtx)
2769 {
2770 Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
2771 g_enmExtCtxMethod = pExtCtx->enmMethod;
2772 if ( ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_XSAVE
2773 && (pExtCtx->Ctx.x.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
2774 || ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_FXSAVE)
2775 && (pExtCtx->Ctx.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
2776 g_fMxCsrDazSupported = true;
2777 }
2778 else
2779 Bs3TestFailedF("Failed to allocate %u bytes for extended CPU context (tiled addressable)\n", cbExtCtx);
2780 }
2781
2782 /*
2783 * Allocate a buffer for testing.
2784 */
2785 g_cbBuf = X86_PAGE_SIZE * 4;
2786 g_pbBuf = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_REAL, g_cbBuf);
2787 if (g_pbBuf)
2788 {
2789 g_pbBufAliasAlloc = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_TILED, g_cbBuf);
2790 if (g_pbBufAliasAlloc)
2791 {
2792 /*
2793 * Do the tests.
2794 */
2795 Bs3TestDoModesByOne_pe32(g_aTests, RT_ELEMENTS(g_aTests), BS3TESTMODEBYONEENTRY_F_REAL_MODE_READY);
2796#ifdef BS3_SKIPIT_DO_SKIP
2797 bs3CpuInstrX_ShowTallies();
2798#endif
2799 }
2800 else
2801 Bs3TestFailed("Failed to allocate 16K alias buffer (tiled addressable)");
2802 }
2803 else
2804 Bs3TestFailed("Failed to allocate 16K buffer (real mode addressable)");
2805 }
2806
2807 Bs3TestTerm();
2808}
2809
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette