VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-4.c32@ 104953

Last change on this file since 104953 was 104953, checked in by vboxsync, 10 months ago

ValidationKit/bootsector: bugref:10658 SIMD FP testcase: [v]addss.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 137.2 KB
Line 
1/* $Id: bs3-cpu-instr-4.c32 104953 2024-06-18 11:01:07Z vboxsync $ */
2/** @file
3 * BS3Kit - bs3-cpu-instr-4 - SSE, AVX FPU instructions, C code template.
4 */
5
6/*
7 * Copyright (C) 2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <bs3kit.h>
42#include "bs3-cpu-instr-4-asm-auto.h"
43
44#include <iprt/asm.h>
45#include <iprt/asm-amd64-x86.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** Converts an execution mode (BS3_MODE_XXX) into an index into an array
52 * initialized by BS3CPUINSTR4_TEST1_MODES_INIT etc. */
53#define BS3CPUINSTR4_TEST_MODES_INDEX(a_bMode) (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2)
54
55/** Maximum length for the names of all SIMD FP exception flags combined. */
56#define BS3_FP_XCPT_NAMES_MAXLEN sizeof(" IE DE ZE OE UE PE ")
57
58/*
59 * Single-precision (32 bits) floating-point defines.
60 */
61/** The max exponent value for a single-precision floating-point normal. */
62#define BS3_FP32_EXP_NORMAL_MAX 254
63/** The min exponent value for a single-precision floating-point normal. */
64#define BS3_FP32_EXP_NORMAL_MIN 1
65/** The max fraction value for a single-precision floating-point normal. */
66#define BS3_FP32_FRACTION_NORMAL_MAX 0x7fffff
67/** The min fraction value for a single-precision floating-point normal. */
68#define BS3_FP32_FRACTION_NORMAL_MIN 0
69/** The exponent bias for the single-precision floating-point format. */
70#define BS3_FP32_EXP_BIAS RTFLOAT32U_EXP_BIAS
71/** Fraction width (in bits) for the single-precision floating-point format. */
72#define BS3_FP32_FRACTION_BITS RTFLOAT32U_FRACTION_BITS
73/** The max exponent value for a single-precision floating-point integer without
74 * losing precision. */
75#define BS3_FP32_EXP_SAFE_INT_MAX BS3_FP32_EXP_BIAS + BS3_FP32_FRACTION_BITS
76/** The min exponent value for a single-precision floating-point integer without
77 * losing precision. */
78#define BS3_FP32_EXP_SAFE_INT_MIN 1
79/** The max fraction value for a double-precision floating-point denormal. */
80#define BS3_FP32_FRACTION_DENORMAL_MAX 0x7fffff
81/** The min fraction value for a double-precision floating-point denormal. */
82#define BS3_FP32_FRACTION_DENORMAL_MIN 1
83
84#define BS3_FP32_NORMAL_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_NORMAL_MAX)
85#define BS3_FP32_NORMAL_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MIN, BS3_FP32_EXP_NORMAL_MIN)
86#define BS3_FP32_ZERO(a_Sign) RTFLOAT32U_INIT_ZERO(a_Sign)
87#define BS3_FP32_ONE(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0, RTFLOAT32U_EXP_BIAS)
88#define BS3_FP32_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT32U_INIT_C(a_Sign, a_Frac, a_Exp)
89#define BS3_FP32_INF(a_Sign) RTFLOAT32U_INIT_INF(a_Sign)
90#define BS3_FP32_QNAN(a_Sign) RTFLOAT32U_INIT_QNAN(a_Sign)
91#define BS3_FP32_QNAN_VAL(a_Sign, a_Val) RTFLOAT32U_INIT_QNAN_EX(a_Sign, a_Val)
92#define BS3_FP32_SNAN(a_Sign) RTFLOAT32U_INIT_SNAN(a_Sign)
93
94
95/*
96 * Single-precision floating normals.
97 * Fraction - 23 bits, all usable.
98 * Exponent - 8 bits, least significant bit MBZ.
99 */
100#define BS3_FP32_NORMAL_VAL_1(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5fcabd, 0xbc)
101#define BS3_FP32_NORMAL_VAL_2(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x7e117a, 0x7e)
102#define BS3_FP32_NORMAL_VAL_3(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5b5b5b, 0x9a)
103/* The maximum integer value (all 23 + 1 implied bit of the fraction part set) without losing precision. */
104#define BS3_FP32_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX)
105/* The minimum integer value without losing precision. */
106#define BS3_FP32_NORMAL_SAFE_INT_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MIN, BS3_FP32_EXP_SAFE_INT_MIN)
107
108/*
109 * Single-precision floating-point denormals.
110 */
111/** The maximum denormal value. */
112#define BS3_FP32_DENORMAL_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_DENORMAL_MAX, 0)
113/** The maximum denormal value. */
114#define BS3_FP32_DENORMAL_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_DENORMAL_MIN, 0)
115
116/*
117 * Single-precision random values (incl. potentially invalid values).
118 * We don't care what the exact values are as these are meant to populate
119 * unmodified operands and be compared bitwise.
120 */
121#define BS3_FP32_RAND_VAL_0(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x7bacda, 0x55)
122#define BS3_FP32_RAND_VAL_1(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x7010f0, 0xc0)
123#define BS3_FP32_RAND_VAL_2(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x4ffcbe, 0xf1)
124#define BS3_FP32_RAND_VAL_3(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x2fd7c8, 0x1f)
125#define BS3_FP32_RAND_VAL_4(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5b5b5b, 0x09)
126#define BS3_FP32_RAND_VAL_5(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x3d2d1d, 0x99)
127#define BS3_FP32_RAND_VAL_6(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x123456, 0x5e)
128#define BS3_FP32_RAND_VAL_7(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x05432f, 0xd7)
129
130/*
131 * Double-precision (64 bits) floating-point defines.
132 */
133/** The max exponent value for a double-precision floating-point normal. */
134#define BS3_FP64_EXP_NORMAL_MAX 2046
135/** The min exponent value for a double-precision floating-point normal. */
136#define BS3_FP64_EXP_NORMAL_MIN 1
137/** The max fraction value for a double-precision floating-point normal. */
138#define BS3_FP64_FRACTION_NORMAL_MAX 0xfffffffffffff
139/** The min fraction value for a double-precision floating-point normal. */
140#define BS3_FP64_FRACTION_NORMAL_MIN 0
141/** The exponent bias for the double-precision floating-point format. */
142#define BS3_FP64_EXP_BIAS RTFLOAT64U_EXP_BIAS
143/** Fraction width (in bits) for the double-precision floating-point format. */
144#define BS3_FP64_FRACTION_BITS RTFLOAT64U_FRACTION_BITS
145/** The max exponent value for a double-precision floating-point integer without
146 * losing precision. */
147#define BS3_FP64_EXP_SAFE_INT_MAX BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS
148/** The min exponent value for a double-precision floating-point integer without
149 * losing precision. */
150#define BS3_FP64_EXP_SAFE_INT_MIN 1
151/** The max fraction value for a double-precision floating-point denormal. */
152#define BS3_FP64_FRACTION_DENORMAL_MAX 0xfffffffffffff
153/** The min fraction value for a double-precision floating-point denormal. */
154#define BS3_FP64_FRACTION_DENORMAL_MIN 1
155
156#define BS3_FP64_NORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_NORMAL_MAX)
157#define BS3_FP64_NORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_NORMAL_MIN)
158#define BS3_FP64_ZERO(a_Sign) RTFLOAT64U_INIT_ZERO(a_Sign)
159#define BS3_FP64_ONE(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0, RTFLOAT64U_EXP_BIAS)
160#define BS3_FP64_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT64U_INIT_C(a_Sign, a_Frac, a_Exp)
161#define BS3_FP64_INF(a_Sign) RTFLOAT64U_INIT_INF(a_Sign)
162#define BS3_FP64_QNAN(a_Sign) RTFLOAT64U_INIT_QNAN(a_Sign)
163#define BS3_FP64_QNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_QNAN_EX(a_Sign, a_Val)
164#define BS3_FP64_SNAN(a_Sign) RTFLOAT64U_INIT_SNAN(a_Sign)
165#define BS3_FP64_SNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_SNAN_EX(a_Sign, a_Val)
166
167/*
168 * Double-precision floating-point normals.
169 * Fraction - 52 bits, all usable.
170 * Exponent - 11 bits, least significant bit MBZ.
171 */
172#define BS3_FP64_NORMAL_VAL_1(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xf10a7ab1ec01a, 0x4bc)
173#define BS3_FP64_NORMAL_VAL_2(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xca5cadea1b1ed, 0x3ae)
174#define BS3_FP64_NORMAL_VAL_3(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xb5b5b5b5b5b5b, 0xffe)
175/* The maximum integer value (all 52 + 1 implied bit of the fraction part set) without losing precision. */
176#define BS3_FP64_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX)
177/* The minimum integer value without losing precision. */
178#define BS3_FP64_NORMAL_SAFE_INT_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_SAFE_INT_MIN)
179
180/*
181 * Double-precision floating-point denormals.
182 */
183/** The maximum denormal value. */
184#define BS3_FP64_DENORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_DENORMAL_MAX, 0)
185/** The maximum denormal value. */
186#define BS3_FP64_DENORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_DENORMAL_MIN, 0)
187
188
189/*********************************************************************************************************************************
190* Structures and Typedefs *
191*********************************************************************************************************************************/
192/** Instruction set type and operand width. */
193typedef enum BS3CPUINSTRX_INSTRTYPE_T
194{
195 T_INVALID,
196 T_MMX,
197 T_MMX_SSE, /**< MMX instruction, but require the SSE CPUID to work. */
198 T_MMX_SSE2, /**< MMX instruction, but require the SSE2 CPUID to work. */
199 T_MMX_SSSE3, /**< MMX instruction, but require the SSSE3 CPUID to work. */
200 T_AXMMX,
201 T_AXMMX_OR_SSE,
202 T_SSE,
203 T_128BITS = T_SSE,
204 T_SSE2,
205 T_SSE3,
206 T_SSSE3,
207 T_SSE4_1,
208 T_SSE4_2,
209 T_SSE4A,
210 T_PCLMUL,
211 T_SHA,
212 T_AVX_128,
213 T_AVX2_128,
214 T_AVX_PCLMUL,
215 T_AVX_256,
216 T_256BITS = T_AVX_256,
217 T_AVX2_256,
218 T_MAX
219} BS3CPUINSTRX_INSTRTYPE_T;
220
221/** Memory or register rm variant. */
222enum {
223 RM_REG = 0,
224 RM_MEM,
225 RM_MEM8, /**< Memory operand is 8 bytes. Hack for movss and similar. */
226 RM_MEM16, /**< Memory operand is 16 bytes. Hack for movss and similar. */
227 RM_MEM32, /**< Memory operand is 32 bytes. Hack for movss and similar. */
228 RM_MEM64 /**< Memory operand is 64 bytes. Hack for movss and similar. */
229};
230
231/**
232 * Execution environment configuration.
233 */
234typedef struct BS3CPUINSTR4_CONFIG_T
235{
236 uint16_t fCr0Mp : 1;
237 uint16_t fCr0Em : 1;
238 uint16_t fCr0Ts : 1;
239 uint16_t fCr4OsFxSR : 1;
240 uint16_t fCr4OsXSave : 1;
241 uint16_t fCr4OsXmmExcpt : 1;
242 uint16_t fXcr0Sse : 1;
243 uint16_t fXcr0Avx : 1;
244 uint16_t fAligned : 1; /**< Aligned mem operands. If 0, they will be misaligned and tests w/o mem operands skipped. */
245 uint16_t fAlignCheck : 1;
246 uint16_t fMxCsrMM : 1; /**< AMD only */
247 uint8_t bXcptSse;
248 uint8_t bXcptAvx;
249} BS3CPUINSTR4_CONFIG_T;
250/** Pointer to an execution environment configuration. */
251typedef BS3CPUINSTR4_CONFIG_T const BS3_FAR *PCBS3CPUINSTR4_CONFIG_T;
252
253/** State saved by bs3CpuInstr4ConfigReconfigure. */
254typedef struct BS3CPUINSTRX_CONFIG_SAVED_T
255{
256 uint32_t uCr0;
257 uint32_t uCr4;
258 uint32_t uEfl;
259 uint16_t uFcw;
260 uint16_t uFsw;
261 uint32_t uMxCsr;
262} BS3CPUINSTRX_CONFIG_SAVED_T;
263typedef BS3CPUINSTRX_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTRX_CONFIG_SAVED_T;
264typedef BS3CPUINSTRX_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTRX_CONFIG_SAVED_T;
265
266/**
267 * YMM packed single-precision floating-point register.
268 * @todo move to x86.h?
269 */
270typedef union X86YMMFLOATPSREG
271{
272 /** Packed single-precision floating-point view. */
273 RTFLOAT32U ar32[8];
274 /** 256-bit integer view. */
275 RTUINT256U ymm;
276} X86YMMFLOATPSREG;
277# ifndef VBOX_FOR_DTRACE_LIB
278AssertCompileSize(X86YMMFLOATPSREG, 32);
279AssertCompileSize(X86YMMFLOATPSREG, sizeof(X86YMMREG));
280# endif
281/** Pointer to a YMM packed single-precision floating-point register. */
282typedef X86YMMFLOATPSREG BS3_FAR *PX86YMMFLOATPSREG;
283/** Pointer to a const YMM single-precision packed floating-point register. */
284typedef X86YMMFLOATPSREG const BS3_FAR *PCX86YMMFLOATPSREG;
285
286/**
287 * YMM packed double-precision floating-point register.
288 * @todo move to x86.h?
289 */
290typedef union X86YMMFLOATPDREG
291{
292 /** Packed double-precision floating-point view. */
293 RTFLOAT64U ar64[4];
294 /** 256-bit integer view. */
295 RTUINT256U ymm;
296} X86YMMFLOATPDREG;
297# ifndef VBOX_FOR_DTRACE_LIB
298AssertCompileSize(X86YMMFLOATPDREG, 32);
299AssertCompileSize(X86YMMFLOATPDREG, sizeof(X86YMMREG));
300# endif
301/** Pointer to a YMM packed floating-point register. */
302typedef X86YMMFLOATPDREG BS3_FAR *PX86YMMFLOATPDREG;
303/** Pointer to a const YMM packed floating-point register. */
304typedef X86YMMFLOATPDREG const BS3_FAR *PCX86YMMFLOATPDREG;
305
306/**
307 * YMM scalar single-precision floating-point register.
308 * @todo move to x86.h?
309 */
310typedef union X86YMMFLOATSSREG
311{
312 /** Scalar single-precision floating-point view. */
313 RTFLOAT32U ar32[8];
314 /** 256-bit integer view. */
315 RTUINT256U ymm;
316} X86YMMFLOATSSREG;
317# ifndef VBOX_FOR_DTRACE_LIB
318AssertCompileSize(X86YMMFLOATSSREG, 32);
319AssertCompileSize(X86YMMFLOATSSREG, sizeof(X86YMMREG));
320# endif
321/** Pointer to a YMM scalar single-precision floating-point register. */
322typedef X86YMMFLOATSSREG BS3_FAR *PX86YMMFLOATSSREG;
323/** Pointer to a const YMM scalar single-precision floating-point register. */
324typedef X86YMMFLOATSSREG const BS3_FAR *PCX86YMMFLOATSSREG;
325
326/**
327 * YMM scalar double-precision floating-point register.
328 * @todo move to x86.h?
329 */
330typedef union X86YMMFLOATSDREG
331{
332 /** Scalar double-precision floating-point view. */
333 RTFLOAT64U ar64[3];
334 /** 256-bit integer view. */
335 RTUINT256U ymm;
336} X86YMMFLOATSDREG;
337# ifndef VBOX_FOR_DTRACE_LIB
338AssertCompileSize(X86YMMFLOATSDREG, 32);
339AssertCompileSize(X86YMMFLOATSDREG, sizeof(X86YMMREG));
340# endif
341/** Pointer to a YMM scalar double-precision floating-point register. */
342typedef X86YMMFLOATSDREG BS3_FAR *PX86YMMFLOATSDREG;
343/** Pointer to a const YMM scalar double-precision floating-point register. */
344typedef X86YMMFLOATSDREG const BS3_FAR *PCX86YMMFLOATSDREG;
345
346/**
347 * YMM scalar quadruple-precision floating-point register.
348 * @todo move to x86.h?
349 */
350typedef union X86YMMFLOATSQREG
351{
352 /** Scalar quadruple-precision floating point view. */
353 RTFLOAT128U ar128[2];
354 /** 256-bit integer view. */
355 RTUINT256U ymm;
356} X86YMMFLOATSQREG;
357# ifndef VBOX_FOR_DTRACE_LIB
358AssertCompileSize(X86YMMFLOATSQREG, 32);
359AssertCompileSize(X86YMMFLOATSQREG, sizeof(X86YMMREG));
360# endif
361/** Pointer to a YMM scalar quadruple-precision floating-point register. */
362typedef X86YMMFLOATSQREG *PX86YMMFLOATSQREG;
363/** Pointer to a const YMM scalar quadruple-precision floating-point register. */
364typedef X86YMMFLOATSQREG const *PCX86YMMFLOATSQREG;
365
366
367/*********************************************************************************************************************************
368* Global Variables *
369*********************************************************************************************************************************/
370static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false, false };
371static bool g_fAmdMisalignedSse = false;
372static uint8_t g_enmExtCtxMethod = BS3EXTCTXMETHOD_INVALID;
373static bool g_fMxCsrDazSupported = false;
374
375/** Zero value (indexed by fSign). */
376RTFLOAT32U const g_ar32Zero[] = { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) };
377RTFLOAT64U const g_ar64Zero[] = { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) };
378
379/** One value (indexed by fSign). */
380RTFLOAT32U const g_ar32One[] = { RTFLOAT32U_INIT_C(0, 0, RTFLOAT32U_EXP_BIAS),
381 RTFLOAT32U_INIT_C(1, 0, RTFLOAT32U_EXP_BIAS) };
382RTFLOAT64U const g_ar64One[] = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_BIAS),
383 RTFLOAT64U_INIT_C(1, 0, RTFLOAT64U_EXP_BIAS) };
384
385/** Infinity (indexed by fSign). */
386RTFLOAT32U const g_ar32Infinity[] = { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) };
387RTFLOAT64U const g_ar64Infinity[] = { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) };
388
389/** Default QNaNs (indexed by fSign). */
390RTFLOAT32U const g_ar32QNaN[] = { RTFLOAT32U_INIT_QNAN(0), RTFLOAT32U_INIT_QNAN(1) };
391RTFLOAT64U const g_ar64QNaN[] = { RTFLOAT64U_INIT_QNAN(0), RTFLOAT64U_INIT_QNAN(1) };
392
393/** Size of g_pbBuf - at least three pages. */
394static uint32_t g_cbBuf;
395/** Buffer of g_cbBuf size. */
396static uint8_t BS3_FAR *g_pbBuf;
397/** RW alias for the buffer memory at g_pbBuf. Set up by bs3CpuInstrXBufSetup. */
398static uint8_t BS3_FAR *g_pbBufAlias;
399/** RW alias for the memory at g_pbBuf. */
400static uint8_t BS3_FAR *g_pbBufAliasAlloc;
401
402/** Exception type \#2 test configurations, 16 & 32 bytes strictly aligned. */
403static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig2[] =
404{
405/*
406 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
407 * +AVX +AVX +AMD/SSE +AMD/SSE
408 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
409 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
410 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
411 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
412 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
413 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
414 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
415 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
416 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
417 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
418 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
419 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
420 /* Memory misalignment and alignment checks: */
421 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #10 */
422 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #11 */
423 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
424 /* AMD only: */
425 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
426 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
427};
428
429/** Exception type \#3 test configurations (< 16-byte memory argument). */
430static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig3[] =
431{
432/*
433 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
434 * +AVX +AVX +AMD/SSE +AMD/SSE
435 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
436 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
437 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
438 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
439 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
440 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
441 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
442 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
443 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
444 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
445 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
446 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
447 /* Memory misalignment and alignment checks: */
448 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #10 */ /* [Avx]:DB */
449 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_AC, X86_XCPT_AC }, /* #11 */ /* [Avx]:AC */
450 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
451 /* AMD only: */
452 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
453 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
454};
455
456
457/**
458 * Returns the name of an X86 exception given the vector.
459 *
460 * @returns Name of the exception.
461 * @param uVector The exception vector.
462 */
463static const char BS3_FAR *bs3CpuInstr4XcptName(uint8_t uVector)
464{
465 switch (uVector)
466 {
467 case X86_XCPT_DE: return "#DE";
468 case X86_XCPT_DB: return "#DB";
469 case X86_XCPT_NMI: return "#NMI";
470 case X86_XCPT_BP: return "#BP";
471 case X86_XCPT_OF: return "#OF";
472 case X86_XCPT_BR: return "#BR";
473 case X86_XCPT_UD: return "#UD";
474 case X86_XCPT_NM: return "#NM";
475 case X86_XCPT_DF: return "#DF";
476 case X86_XCPT_CO_SEG_OVERRUN: return "#CO_SEG_OVERRUN";
477 case X86_XCPT_TS: return "#TS";
478 case X86_XCPT_NP: return "#NP";
479 case X86_XCPT_SS: return "#SS";
480 case X86_XCPT_GP: return "#GP";
481 case X86_XCPT_PF: return "#PF";
482 case X86_XCPT_MF: return "#MF";
483 case X86_XCPT_AC: return "#AC";
484 case X86_XCPT_MC: return "#MC";
485 case X86_XCPT_XF: return "#XF";
486 case X86_XCPT_VE: return "#VE";
487 case X86_XCPT_CP: return "#CP";
488 case X86_XCPT_VC: return "#VC";
489 case X86_XCPT_SX: return "#SX";
490 }
491 return "UNKNOWN";
492}
493
494
495/**
496 * Gets the names of floating-point exception flags that are set for a given MXCSR.
497 *
498 * @returns Names of floating-point exception flags that are set.
499 * @param pszBuf Where to store the floating-point exception flags.
500 * @param cchBuf The size of the buffer.
501 * @param fMxCsr The MXCSR value.
502 */
503static size_t bs3CpuInstr4GetXcptFlags(char BS3_FAR *pszBuf, size_t cchBuf, uint32_t fMxCsr)
504{
505 BS3_ASSERT(cchBuf >= BS3_FP_XCPT_NAMES_MAXLEN);
506 if (!(fMxCsr & X86_MXCSR_XCPT_FLAGS))
507 return Bs3StrPrintf(pszBuf, cchBuf, " None");
508 return Bs3StrPrintf(pszBuf, cchBuf, "%s%s%s%s%s%s", fMxCsr & X86_MXCSR_IE ? " IE" : "", fMxCsr & X86_MXCSR_DE ? " DE" : "",
509 fMxCsr & X86_MXCSR_ZE ? " ZE" : "", fMxCsr & X86_MXCSR_OE ? " OE" : "",
510 fMxCsr & X86_MXCSR_UE ? " UE" : "", fMxCsr & X86_MXCSR_PE ? " PE" : "");
511}
512
513
514/**
515 * Reconfigures the execution environment according to @a pConfig.
516 *
517 * Call bs3CpuInstrXConfigRestore to undo the changes.
518 *
519 * @returns true on success, false if the configuration cannot be applied. In
520 * the latter case, no context changes are made.
521 * @param pSavedCfg Where to save state we modify.
522 * @param pCtx The register context to modify.
523 * @param pExtCtx The extended register context to modify.
524 * @param pConfig The configuration to apply.
525 * @param bMode The target mode.
526 */
527static bool bs3CpuInstr4ConfigReconfigure(PBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx,
528 PCBS3CPUINSTR4_CONFIG_T pConfig, uint8_t bMode)
529{
530 /*
531 * Save context bits we may change here
532 */
533 pSavedCfg->uCr0 = pCtx->cr0.u32;
534 pSavedCfg->uCr4 = pCtx->cr4.u32;
535 pSavedCfg->uEfl = pCtx->rflags.u32;
536 pSavedCfg->uFcw = Bs3ExtCtxGetFcw(pExtCtx);
537 pSavedCfg->uFsw = Bs3ExtCtxGetFsw(pExtCtx);
538 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx);
539
540 /*
541 * Can we make these changes?
542 */
543 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse)
544 return false;
545
546 /*
547 * Modify the test context.
548 */
549 if (pConfig->fCr0Mp)
550 pCtx->cr0.u32 |= X86_CR0_MP;
551 else
552 pCtx->cr0.u32 &= ~X86_CR0_MP;
553 if (pConfig->fCr0Em)
554 pCtx->cr0.u32 |= X86_CR0_EM;
555 else
556 pCtx->cr0.u32 &= ~X86_CR0_EM;
557 if (pConfig->fCr0Ts)
558 pCtx->cr0.u32 |= X86_CR0_TS;
559 else
560 pCtx->cr0.u32 &= ~X86_CR0_TS;
561
562 if (pConfig->fCr4OsFxSR)
563 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
564 else
565 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
566
567 if (pConfig->fCr4OsXmmExcpt && g_afTypeSupports[T_SSE])
568 pCtx->cr4.u32 |= X86_CR4_OSXMMEEXCPT;
569 else
570 pCtx->cr4.u32 &= ~X86_CR4_OSXMMEEXCPT;
571
572 if (pConfig->fCr4OsFxSR)
573 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
574 else
575 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
576
577 if (pConfig->fCr4OsXSave)
578 pCtx->cr4.u32 |= X86_CR4_OSXSAVE;
579 else
580 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE;
581
582 if (pConfig->fXcr0Sse)
583 pExtCtx->fXcr0Saved |= XSAVE_C_SSE;
584 else
585 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE;
586 if (pConfig->fXcr0Avx && g_afTypeSupports[T_AVX_256])
587 pExtCtx->fXcr0Saved |= XSAVE_C_YMM;
588 else
589 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM;
590
591 if (pConfig->fAlignCheck)
592 {
593 pCtx->rflags.u32 |= X86_EFL_AC;
594 pCtx->cr0.u32 |= X86_CR0_AM;
595 }
596 else
597 {
598 pCtx->rflags.u32 &= ~X86_EFL_AC;
599 pCtx->cr0.u32 &= ~X86_CR0_AM;
600 }
601
602 /** @todo Can we remove this? x87 FPU and SIMD are independent. */
603 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw & ~(X86_FSW_ES | X86_FSW_B));
604
605 if (pConfig->fMxCsrMM)
606 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM);
607 else
608 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM);
609 return true;
610}
611
612
613/**
614 * Undoes changes made by bs3CpuInstr4ConfigReconfigure.
615 */
616static void bs3CpuInstrXConfigRestore(PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx)
617{
618 pCtx->cr0.u32 = pSavedCfg->uCr0;
619 pCtx->cr4.u32 = pSavedCfg->uCr4;
620 pCtx->rflags.u32 = pSavedCfg->uEfl;
621 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal;
622 Bs3ExtCtxSetFcw(pExtCtx, pSavedCfg->uFcw);
623 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw);
624 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr);
625}
626
627
628/**
629 * Allocates three extended CPU contexts and initializes the first one
630 * with random data.
631 * @returns First extended context, initialized with randomish data. NULL on
632 * failure (complained).
633 * @param ppExtCtx2 Where to return the 2nd context.
634 */
635static PBS3EXTCTX bs3CpuInstrXAllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2)
636{
637 /* Allocate extended context structures. */
638 uint64_t fFlags;
639 uint16_t cb = Bs3ExtCtxGetSize(&fFlags);
640 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 2);
641 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb);
642 if (pExtCtx1)
643 {
644 Bs3ExtCtxInit(pExtCtx1, cb, fFlags);
645 /** @todo populate with semi-random stuff. */
646
647 Bs3ExtCtxInit(pExtCtx2, cb, fFlags);
648 *ppExtCtx2 = pExtCtx2;
649 return pExtCtx1;
650 }
651 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2);
652 *ppExtCtx2 = NULL;
653 return NULL;
654}
655
656
657/**
658 * Frees the extended CPU contexts allocated by bs3CpuInstrXAllocExtCtxs.
659 *
660 * @param pExtCtx1 The first extended context.
661 * @param pExtCtx2 The second extended context.
662 */
663static void bs3CpuInstrXFreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2)
664{
665 RT_NOREF_PV(pExtCtx2);
666 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2);
667}
668
669
670/**
671 * Sets up SSE and AVX bits relevant for FPU instructions.
672 */
673static void bs3CpuInstr4SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx)
674{
675 /* CR0: */
676 uint32_t cr0 = Bs3RegGetCr0();
677 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
678 cr0 |= X86_CR0_NE;
679 Bs3RegSetCr0(cr0);
680
681 /* If real mode context, the cr0 value will differ from the current one (we're in PE32 mode). */
682 pCtx->cr0.u32 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
683 pCtx->cr0.u32 |= X86_CR0_NE;
684
685 /* CR4: */
686 BS3_ASSERT( pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
687 || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE);
688 {
689 uint32_t cr4 = Bs3RegGetCr4();
690 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
691 {
692 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
693 Bs3RegSetCr4(cr4);
694 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal);
695 }
696 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
697 {
698 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
699 Bs3RegSetCr4(cr4);
700 }
701 pCtx->cr4.u32 = cr4;
702 }
703}
704
705
706/**
707 * Configures the buffer with electric fences in paged modes.
708 *
709 * @returns Adjusted buffer pointer.
710 * @param pbBuf The buffer pointer.
711 * @param pcbBuf Pointer to the buffer size (input & output).
712 * @param bMode The testing target mode.
713 */
714DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufSetup(uint8_t BS3_FAR *pbBuf, uint32_t *pcbBuf, uint8_t bMode)
715{
716 if (BS3_MODE_IS_PAGED(bMode))
717 {
718 int rc;
719 uint32_t cbBuf = *pcbBuf;
720 Bs3PagingProtectPtr(&pbBuf[0], X86_PAGE_SIZE, 0, X86_PTE_P);
721 Bs3PagingProtectPtr(&pbBuf[cbBuf - X86_PAGE_SIZE], X86_PAGE_SIZE, 0, X86_PTE_P);
722 pbBuf += X86_PAGE_SIZE;
723 cbBuf -= X86_PAGE_SIZE * 2;
724 *pcbBuf = cbBuf;
725
726 g_pbBufAlias = g_pbBufAliasAlloc;
727 rc = Bs3PagingAlias((uintptr_t)g_pbBufAlias, (uintptr_t)pbBuf, cbBuf + X86_PAGE_SIZE, /* must include the tail guard pg */
728 X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW);
729 if (RT_FAILURE(rc))
730 Bs3TestFailedF("Bs3PagingAlias failed on %p/%p LB %#x: %d", g_pbBufAlias, pbBuf, cbBuf, rc);
731 }
732 else
733 g_pbBufAlias = pbBuf;
734 return pbBuf;
735}
736
737
738/**
739 * Undoes what bs3CpuInstrXBufSetup did.
740 *
741 * @param pbBuf The buffer pointer.
742 * @param cbBuf The buffer size.
743 * @param bMode The testing target mode.
744 */
745DECLINLINE(void) bs3CpuInstrXBufCleanup(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t bMode)
746{
747 if (BS3_MODE_IS_PAGED(bMode))
748 {
749 Bs3PagingProtectPtr(&pbBuf[-X86_PAGE_SIZE], X86_PAGE_SIZE, X86_PTE_P, 0);
750 Bs3PagingProtectPtr(&pbBuf[cbBuf], X86_PAGE_SIZE, X86_PTE_P, 0);
751 }
752}
753
754
755/**
756 * Gets a buffer of a @a cbMemOp sized operand according to the given
757 * configuration and alignment restrictions.
758 *
759 * @returns Pointer to the buffer.
760 * @param pbBuf The buffer pointer.
761 * @param cbBuf The buffer size.
762 * @param cbMemOp The operand size.
763 * @param cbAlign The operand alignment restriction.
764 * @param pConfig The configuration.
765 * @param fPageFault The \#PF test setting.
766 */
767DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufForOperand(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t cbMemOp, uint8_t cbAlign,
768 PCBS3CPUINSTR4_CONFIG_T pConfig, unsigned fPageFault)
769{
770 /* All allocations are at the tail end of the buffer, so that we've got a
771 guard page following the operand. When asked to consistenly trigger
772 a #PF, we slide the buffer into that guard page. */
773 if (fPageFault)
774 cbBuf += X86_PAGE_SIZE;
775
776 if (pConfig->fAligned)
777 {
778 if (!pConfig->fAlignCheck)
779 return &pbBuf[cbBuf - cbMemOp];
780 return &pbBuf[cbBuf - cbMemOp - cbAlign];
781 }
782 return &pbBuf[cbBuf - cbMemOp - 1];
783}
784
785
786/**
787 * Determines the size of memory operands.
788 */
789DECLINLINE(uint8_t) bs3CpuInstrXMemOpSize(uint8_t cbOperand, uint8_t enmRm)
790{
791 if (enmRm <= RM_MEM)
792 return cbOperand;
793 if (enmRm == RM_MEM8)
794 return sizeof(uint8_t);
795 if (enmRm == RM_MEM16)
796 return sizeof(uint16_t);
797 if (enmRm == RM_MEM32)
798 return sizeof(uint32_t);
799 if (enmRm == RM_MEM64)
800 return sizeof(uint64_t);
801 BS3_ASSERT(0);
802 return cbOperand;
803}
804
805
806/*
807 * Code to make testing the tests faster. `bs3CpuInstrX_SkipIt()' randomly
808 * skips a large fraction of the micro-tests. It is sufficiently random
809 * that over a large number of runs, all micro-tests will be hit.
810 *
811 * This improves the runtime of the worst case (`#define ALL_TESTS' on a
812 * debug build, run with '--execute-all-in-iem') from ~9000 to ~800 seconds
813 * (on an Intel Core i7-10700, fwiw).
814 *
815 * To activate this 'developer's speed-testing mode', turn on
816 * `#define BS3_SKIPIT_DO_SKIP' here.
817 *
818 * BS3_SKIPIT_AVG_SKIP governs approximately how many micro-tests are
819 * skipped in a row; e.g. the default of 26 means about every 27th
820 * micro-test is run during a particular test run. (This is not 27x
821 * faster due to other activities which are not skipped!) Note this is
822 * only an average; the actual skips are random.
823 *
824 * You can also modify bs3CpuInstrX_SkipIt() to focus on specific sub-tests,
825 * using its (currently ignored) `bRing, iCfg, iTest, iVal, iVariant' args
826 * (to enable this: turn on `#define BS3_SKIPIT_DO_ARGS': which costs about
827 * 3% performance).
828 *
829 * Note! The skipping is not compatible with testing the native recompiler as
830 * it requires the test code to be run a number of times before it kicks
831 * in and does the native recompilation (currently around 16 times).
832 */
833#define BS3_SKIPIT_AVG_SKIP 26
834#define BS3_SKIPIT_REPORT_COUNT 150000
835#undef BS3_SKIPIT_DO_SKIP
836#undef BS3_SKIPIT_DO_ARGS
837
838#ifndef BS3_SKIPIT_DO_SKIP
839# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) (false)
840#else
841# include <iprt/asm-amd64-x86.h>
842# include <iprt/asm-math.h>
843
844DECLINLINE(uint32_t) bs3CpuInstrX_SimpleRand(void)
845{
846 /*
847 * A simple Lehmer linear congruential pseudo-random number
848 * generator using the constants suggested by Park & Miller:
849 *
850 * modulus = 2^31 - 1 (INT32_MAX)
851 * multiplier = 7^5 (16807)
852 *
853 * It produces numbers in the range [1..INT32_MAX-1] and is
854 * more chaotic in the higher bits.
855 *
856 * Note! Runtime/common/rand/randparkmiller.cpp is also use this algorithm,
857 * though the zero handling is different.
858 */
859 static uint32_t s_uSeedMemory = 0;
860 uint32_t uVal = s_uSeedMemory;
861 if (!uVal)
862 uVal = (uint32_t)ASMReadTSC();
863 uVal = ASMModU64ByU32RetU32(ASMMult2xU32RetU64(uVal, 16807), INT32_MAX);
864 s_uSeedMemory = uVal;
865 return uVal;
866}
867
868static unsigned g_cSeen, g_cSkipped;
869
870static void bs3CpuInstrX_ShowTallies(void)
871{
872 Bs3TestPrintf("Micro-tests %d: tested %d / skipped %d\n", g_cSeen, g_cSeen - g_cSkipped, g_cSkipped);
873}
874
875# ifdef BS3_SKIPIT_DO_ARGS
876# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt(bRing, iCfg, iTest, iVal, iVariant)
877static bool bs3CpuInstrX_SkipIt(uint8_t bRing, unsigned iCfg, unsigned iTest, unsigned iVal, unsigned iVariant)
878# else
879# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt()
880static bool bs3CpuInstrX_SkipIt(void)
881# endif
882{
883 static unsigned s_uTimes = 0;
884 bool fSkip;
885
886 /* Cache calls to the relatively expensive random routine */
887 if (!s_uTimes)
888 s_uTimes = bs3CpuInstrX_SimpleRand() % (BS3_SKIPIT_AVG_SKIP * 2 + 1) + 1;
889 fSkip = --s_uTimes > 0;
890 if (fSkip)
891 ++g_cSkipped;
892
893 if (++g_cSeen % BS3_SKIPIT_REPORT_COUNT == 0)
894 bs3CpuInstrX_ShowTallies();
895 return fSkip;
896}
897
898#endif /* BS3_SKIPIT_DO_SKIP */
899
900/*
901 * Test type #1.
902 * Generic YMM registers.
903 */
904typedef struct BS3CPUINSTR4_TEST1_VALUES_T
905{
906 X86YMMREG uSrc2; /**< Second source operand. */
907 X86YMMREG uSrc1; /**< uDstIn for SSE */
908 X86YMMREG uDstOut; /**< Destination output. */
909 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
910 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
911 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
912 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
913 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
914 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
915} BS3CPUINSTR4_TEST1_VALUES_T;
916
917/*
918 * Test type #1.
919 * Packed single-precision.
920 */
921typedef struct BS3CPUINSTR4_TEST1_VALUES_PS_T
922{
923 X86YMMFLOATPSREG uSrc2; /**< Second source operand. */
924 X86YMMFLOATPSREG uSrc1; /**< uDstIn for SSE */
925 X86YMMFLOATPSREG uDstOut; /**< Destination output. */
926 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
927 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
928 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
929 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
930 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
931 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
932} BS3CPUINSTR4_TEST1_VALUES_PS_T;
933AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PS_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
934AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
935AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
936AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
937AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
938AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
939AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
940AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
941AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
942AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
943
944/*
945 * Test type #1.
946 * Packed double-precision.
947 */
948typedef struct BS3CPUINSTR4_TEST1_VALUES_PD_T
949{
950 X86YMMFLOATPDREG uSrc2; /**< Second source operand. */
951 X86YMMFLOATPDREG uSrc1; /**< uDstIn for SSE */
952 X86YMMFLOATPDREG uDstOut; /**< Destination output. */
953 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
954 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
955 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
956 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
957 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
958 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
959} BS3CPUINSTR4_TEST1_VALUES_PD_T;
960AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PD_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
961AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
962AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
963AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
964AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
965AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
966AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
967AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
968AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
969AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
970
971/*
972 * Test type #1.
973 * Scalar single-precision.
974 */
975typedef struct BS3CPUINSTR4_TEST1_VALUES_SS_T
976{
977 X86YMMFLOATSSREG uSrc2; /**< Second source operand. */
978 X86YMMFLOATSSREG uSrc1; /**< uDstIn for SSE */
979 X86YMMFLOATSSREG uDstOut; /**< Destination output. */
980 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
981 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
982 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
983 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
984 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
985 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
986} BS3CPUINSTR4_TEST1_VALUES_SS_T;
987AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_SS_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
988AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
989AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
990AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
991AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
992AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
993AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
994AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
995AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
996AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SS_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
997
998/*
999 * Test type #1.
1000 * Scalar quadruple-precision.
1001 */
1002typedef struct BS3CPUINSTR4_TEST1_VALUES_SQ_T
1003{
1004 X86YMMFLOATSQREG uSrc2; /**< Second source operand. */
1005 X86YMMFLOATSQREG uSrc1; /**< uDstIn for SSE */
1006 X86YMMFLOATSQREG uDstOut; /**< Destination output. */
1007 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
1008 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
1009 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
1010 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
1011 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
1012 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
1013} BS3CPUINSTR4_TEST1_VALUES_SQ_T;
1014AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_SQ_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
1015AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
1016AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
1017AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
1018AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
1019AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
1020AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
1021AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
1022AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
1023AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
1024
1025typedef struct BS3CPUINSTR4_TEST1_T
1026{
1027 FPFNBS3FAR pfnWorker; /**< Test function worker. */
1028 uint8_t bAvxMisalignXcpt; /**< AVX misalignment exception. */
1029 uint8_t enmRm; /**< R/M type. */
1030 uint8_t enmType; /**< CPU instruction type (see T_XXX). */
1031 uint8_t iRegDst; /**< Index of destination register, UINT8_MAX if N/A. */
1032 uint8_t iRegSrc1; /**< Index of first source register, UINT8_MAX if N/A. */
1033 uint8_t iRegSrc2; /**< Index of second source register, UINT8_MAX if N/A. */
1034 uint8_t cValues; /**< Number of test values in @c paValues. */
1035 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *paValues; /**< Test values. */
1036} BS3CPUINSTR4_TEST1_T;
1037
1038typedef struct BS3CPUINSTR4_TEST1_MODE_T
1039{
1040 BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests;
1041 unsigned cTests;
1042} BS3CPUINSTR4_TEST1_MODE_T;
1043
1044/** Initializer for a BS3CPUINSTR4_TEST1_MODE_T array (three entries). */
1045#define BS3CPUINSTR4_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \
1046 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } }
1047
1048typedef struct BS3CPUINSTR4_TEST1_CTX_T
1049{
1050 BS3CPUINSTR4_CONFIG_T const BS3_FAR *pConfig; /**< The test execution environment configuration. */
1051 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest; /**< The instruction being tested. */
1052 unsigned iVal; /**< Which iteration of the test value is this. */
1053 const char BS3_FAR *pszMode; /**< The testing mode (e.g. real, protected, paged and permutations). */
1054 PBS3TRAPFRAME pTrapFrame; /**< The exception (trap) frame. */
1055 PBS3REGCTX pCtx; /**< The general-purpose register context. */
1056 PBS3EXTCTX pExtCtx; /**< The extended (FPU) register context. */
1057 PBS3EXTCTX pExtCtxOut; /**< The output extended (FPU) register context. */
1058 uint8_t BS3_FAR *puMemOp; /**< The memory operand buffer. */
1059 uint8_t BS3_FAR *puMemOpAlias; /**< The memory operand alias buffer for comparing result. */
1060 uint8_t cbMemOp; /**< Size of the memory operand (and alias) buffer in bytes. */
1061 uint8_t cbOperand; /**< Size of the instruction operand (8 for MMX, 16 for SSE etc). */
1062 uint8_t cbInstr; /**< Size of the instruction opcode. */
1063 uint8_t bXcptExpect; /**< The expected exception while/after executing the instruction. */
1064 bool fSseInstr; /**< Whether this is an SSE instruction. */
1065 bool fAvxInstr; /**< Whether this is an AVX instruction. */
1066 uint16_t idTestStep; /**< The test iteration step. */
1067} BS3CPUINSTR4_TEST1_CTX_T;
1068/** Pointer to a test 1 context. */
1069typedef BS3CPUINSTR4_TEST1_CTX_T BS3_FAR *PBS3CPUINSTR4_TEST1_CTX_T;
1070
1071
1072/**
1073 * Worker for bs3CpuInstr4_WorkerTestType1.
1074 */
1075static uint16_t bs3CpuInstr4_WorkerTestType1_Inner(uint8_t bMode, PBS3CPUINSTR4_TEST1_CTX_T pTestCtx,
1076 PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg)
1077{
1078 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = pTestCtx->pTest;
1079 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *pValues = &pTestCtx->pTest->paValues[pTestCtx->iVal];
1080 PBS3TRAPFRAME pTrapFrame = pTestCtx->pTrapFrame;
1081 PBS3REGCTX pCtx = pTestCtx->pCtx;
1082 PBS3EXTCTX pExtCtx = pTestCtx->pExtCtx;
1083 PBS3EXTCTX pExtCtxOut = pTestCtx->pExtCtxOut;
1084 uint8_t BS3_FAR *puMemOp = pTestCtx->puMemOp;
1085 uint8_t BS3_FAR *puMemOpAlias = pTestCtx->puMemOpAlias;
1086 uint8_t cbMemOp = pTestCtx->cbMemOp;
1087 uint8_t const cbOperand = pTestCtx->cbOperand;
1088 uint8_t const cbInstr = ((uint8_t const BS3_FAR *)(uintptr_t)pTestCtx->pTest->pfnWorker)[-1];
1089 uint8_t bXcptExpect = pTestCtx->bXcptExpect;
1090 uint8_t const bFpXcpt = pTestCtx->pConfig->fCr4OsXmmExcpt ? X86_XCPT_XF : X86_XCPT_UD;
1091 uint32_t const fExpectedMxCsrFlags = pTestCtx->cbOperand > 16 ? pValues->f256ExpectedMxCsrFlags
1092 : pValues->f128ExpectedMxCsrFlags;
1093 bool const fFpFlagsExpect = RT_BOOL( (fExpectedMxCsrFlags
1094 & (~pValues->fMxCsrMask >> X86_MXCSR_XCPT_MASK_SHIFT)) & X86_MXCSR_XCPT_FLAGS);
1095 uint32_t uMxCsr;
1096 X86YMMREG MemOpExpect;
1097 uint16_t cErrors;
1098
1099 /*
1100 * Set up the context and some expectations.
1101 */
1102 /* Destination. */
1103 Bs3MemZero(&MemOpExpect, sizeof(MemOpExpect));
1104 if (pTest->iRegDst == UINT8_MAX)
1105 {
1106 BS3_ASSERT(pTest->enmRm >= RM_MEM);
1107 Bs3MemSet(puMemOpAlias, 0xcc, cbMemOp);
1108 if (bXcptExpect == X86_XCPT_DB)
1109 MemOpExpect.ymm = pValues->uDstOut.ymm;
1110 else
1111 Bs3MemSet(&MemOpExpect, 0xcc, sizeof(MemOpExpect));
1112 }
1113
1114 /* Source #1 (/ destination for SSE). */
1115 if (pTest->iRegSrc1 == UINT8_MAX)
1116 {
1117 BS3_ASSERT(pTest->enmRm >= RM_MEM);
1118 Bs3MemCpy(puMemOpAlias, &pValues->uSrc1, cbMemOp);
1119 if (pTest->iRegDst == UINT8_MAX)
1120 BS3_ASSERT(pTestCtx->fSseInstr);
1121 else
1122 MemOpExpect.ymm = pValues->uSrc1.ymm;
1123 }
1124 else if (pTestCtx->fSseInstr)
1125 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm.DQWords.dqw0);
1126 else
1127 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm, 32);
1128
1129 /* Source #2. */
1130 if (pTest->iRegSrc2 == UINT8_MAX)
1131 {
1132 BS3_ASSERT(pTest->enmRm >= RM_MEM);
1133 BS3_ASSERT(pTest->iRegDst != UINT8_MAX && pTest->iRegSrc1 != UINT8_MAX);
1134 Bs3MemCpy(puMemOpAlias, &pValues->uSrc2, cbMemOp);
1135 MemOpExpect.ymm = pValues->uSrc2.ymm;
1136 }
1137 else if (pTestCtx->fSseInstr)
1138 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm.DQWords.dqw0);
1139 else
1140 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm, 32);
1141
1142 /* Memory pointer. */
1143 if (pTest->enmRm >= RM_MEM)
1144 {
1145 BS3_ASSERT( pTest->iRegDst == UINT8_MAX
1146 || pTest->iRegSrc1 == UINT8_MAX
1147 || pTest->iRegSrc2 == UINT8_MAX);
1148 Bs3RegCtxSetGrpSegFromCurPtr(pCtx, &pCtx->rbx, &pCtx->fs, puMemOp);
1149 }
1150
1151 /* Setup MXCSR for the current test. */
1152 uMxCsr = (pSavedCfg->uMxCsr & ~(X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK))
1153 | (pValues->fMxCsrMask & X86_MXCSR_XCPT_MASK)
1154 | (pValues->fRoundingCtlMask & X86_MXCSR_RC_MASK);
1155 if ( pValues->fDenormalsAreZero == X86_MXCSR_DAZ
1156 && g_fMxCsrDazSupported)
1157 uMxCsr |= X86_MXCSR_DAZ;
1158 if (pValues->fFlushToZero == X86_MXCSR_FZ)
1159 uMxCsr |= X86_MXCSR_FZ;
1160 Bs3ExtCtxSetMxCsr(pExtCtx, uMxCsr);
1161
1162 /*
1163 * Prepare globals and execute.
1164 */
1165 g_uBs3TrapEipHint = pCtx->rip.u32;
1166 if ( bXcptExpect == X86_XCPT_DB
1167 && !fFpFlagsExpect)
1168 g_uBs3TrapEipHint += cbInstr + 1;
1169 Bs3TrapSetJmpAndRestoreWithExtCtxAndRm(pCtx, pExtCtx, pTrapFrame, pExtCtxOut);
1170
1171 /*
1172 * Check the result.
1173 *
1174 * If a floating-point exception is expected, the destination is not updated by the instruction.
1175 * In the case of SSE instructions, updating the destination here will work because it is the same
1176 * as the source, but for AVX++ it won't because the destination is different and would contain 0s.
1177 */
1178 cErrors = Bs3TestSubErrorCount();
1179 if ( bXcptExpect == X86_XCPT_DB
1180 && !fFpFlagsExpect
1181 && pTest->iRegDst != UINT8_MAX)
1182 {
1183 if (pTestCtx->fSseInstr)
1184 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm.DQWords.dqw0);
1185 else
1186 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm, cbOperand);
1187 }
1188#if defined(DEBUG_aeichner) /** @todo Necessary kludge on a i7-1068NG7. */
1189 if ( pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE
1190 && pExtCtx->Ctx.x.Hdr.bmXState == 0x7
1191 && pExtCtxOut->Ctx.x.Hdr.bmXState == 0x3)
1192 pExtCtxOut->Ctx.x.Hdr.bmXState = 0x7;
1193#endif
1194 if (bXcptExpect == X86_XCPT_DB)
1195 Bs3ExtCtxSetMxCsr(pExtCtx, (uMxCsr & ~X86_MXCSR_XCPT_FLAGS)
1196 | (fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS));
1197 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pTestCtx->pszMode, pTestCtx->idTestStep);
1198
1199 if (bXcptExpect == X86_XCPT_DB)
1200 {
1201 uint32_t const fMxCsrXcptFlags = Bs3ExtCtxGetMxCsr(pExtCtxOut) & X86_MXCSR_XCPT_FLAGS;
1202
1203 /* Check if the SIMD FP exception flags (or lack of) are as expected. */
1204 if (fMxCsrXcptFlags != (fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS))
1205 {
1206 char szGotBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1207 char szExpectBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1208 bs3CpuInstr4GetXcptFlags(&szExpectBuf[0], sizeof(szExpectBuf), fExpectedMxCsrFlags);
1209 bs3CpuInstr4GetXcptFlags(&szGotBuf[0], sizeof(szGotBuf), fMxCsrXcptFlags);
1210 Bs3TestFailedF("Expected floating-point xcpt flags%s, got%s", szExpectBuf, szGotBuf);
1211 }
1212
1213 /* Check if the SIMD FP exception (or lack of) is as expected. */
1214 if (fFpFlagsExpect)
1215 {
1216 if (pTrapFrame->bXcpt == bFpXcpt)
1217 { /* likely */ }
1218 else
1219 Bs3TestFailedF("Expected floating-point xcpt %s, got %s", bs3CpuInstr4XcptName(bFpXcpt),
1220 bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1221 }
1222 else if (pTrapFrame->bXcpt == X86_XCPT_DB)
1223 { /* likely */ }
1224 else
1225 Bs3TestFailedF("Expected no xcpt, got %s", bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1226 }
1227 /* Check if non-FP exception is as expected. */
1228 else if (pTrapFrame->bXcpt != bXcptExpect)
1229 Bs3TestFailedF("Expected xcpt %s, got %s", bs3CpuInstr4XcptName(bXcptExpect), bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1230
1231 /* Kludge! Looks like EFLAGS.AC is cleared when raising #GP in real mode on the 10980XE. WEIRD! */
1232 if (bMode == BS3_MODE_RM && (pCtx->rflags.u32 & X86_EFL_AC))
1233 {
1234 if (pTrapFrame->Ctx.rflags.u32 & X86_EFL_AC)
1235 Bs3TestFailedF("Expected EFLAGS.AC to be cleared (bXcpt=%d)", pTrapFrame->bXcpt);
1236 pTrapFrame->Ctx.rflags.u32 |= X86_EFL_AC;
1237 }
1238 if (bXcptExpect == X86_XCPT_PF)
1239 pCtx->cr2.u = (uintptr_t)puMemOp;
1240 Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pCtx, bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect ? cbInstr + 1 : 0, 0 /*cbSpAdjust*/,
1241 (bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect) || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF,
1242 pTestCtx->pszMode, pTestCtx->idTestStep);
1243 pCtx->cr2.u = 0;
1244
1245 if ( pTest->enmRm >= RM_MEM
1246 && Bs3MemCmp(puMemOpAlias, &MemOpExpect, cbMemOp) != 0)
1247 Bs3TestFailedF("Expected uMemOp %.*Rhxs, got %.*Rhxs", cbMemOp, &MemOpExpect, cbMemOp, puMemOpAlias);
1248
1249 return cErrors;
1250}
1251
1252
1253/**
1254 * Test type #1 worker.
1255 */
1256static uint8_t bs3CpuInstr4_WorkerTestType1(uint8_t bMode, BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests, unsigned cTests,
1257 PCBS3CPUINSTR4_CONFIG_T paConfigs, unsigned cConfigs)
1258{
1259 BS3REGCTX Ctx;
1260 BS3TRAPFRAME TrapFrame;
1261 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode);
1262 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0;
1263 uint8_t BS3_FAR *pbBuf = g_pbBuf;
1264 uint32_t cbBuf = g_cbBuf;
1265 PBS3EXTCTX pExtCtxOut;
1266 PBS3EXTCTX pExtCtx = bs3CpuInstrXAllocExtCtxs(&pExtCtxOut);
1267 if (pExtCtx)
1268 { /* likely */ }
1269 else
1270 return 0;
1271 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT)
1272 { /* likely */ }
1273 else
1274 {
1275 Bs3TestPrintf("Skipped due to ancient FPU state format\n");
1276 return 0;
1277 }
1278
1279 /* Ensure the structures are allocated before we sample the stack pointer. */
1280 Bs3MemSet(&Ctx, 0, sizeof(Ctx));
1281 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
1282
1283 /*
1284 * Create test context.
1285 */
1286 pbBuf = bs3CpuInstrXBufSetup(pbBuf, &cbBuf, bMode);
1287 Bs3RegCtxSaveForMode(&Ctx, bMode, 1024);
1288 bs3CpuInstr4SetupSseAndAvx(&Ctx, pExtCtx);
1289
1290 /*
1291 * Run the tests in all rings since alignment issues may behave
1292 * differently in ring-3 compared to ring-0.
1293 */
1294 for (;;)
1295 {
1296 unsigned fPf = 0;
1297 do
1298 {
1299 unsigned iCfg;
1300 for (iCfg = 0; iCfg < cConfigs; iCfg++)
1301 {
1302 unsigned iTest;
1303 BS3CPUINSTRX_CONFIG_SAVED_T SavedCfg;
1304 if (!bs3CpuInstr4ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg], bMode))
1305 continue; /* unsupported config */
1306
1307 /*
1308 * Iterate the tests.
1309 */
1310 for (iTest = 0; iTest < cTests; iTest++)
1311 {
1312 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = &paTests[iTest];
1313 unsigned const cValues = pTest->cValues;
1314 bool const fSseInstr = pTest->enmType >= T_SSE && pTest->enmType < T_AVX_128;
1315 bool const fAvxInstr = pTest->enmType >= T_AVX_128;
1316 uint8_t const cbOperand = pTest->enmType < T_128BITS ? 64/8
1317 : pTest->enmType < T_256BITS ? 128/8 : 256/8;
1318 uint8_t const cbMemOp = bs3CpuInstrXMemOpSize(cbOperand, pTest->enmRm);
1319 uint8_t const cbAlign = cbMemOp;
1320 uint8_t BS3_FAR *puMemOp = bs3CpuInstrXBufForOperand(pbBuf, cbBuf, cbMemOp, cbAlign, &paConfigs[iCfg], fPf);
1321 uint8_t *puMemOpAlias = &g_pbBufAlias[(uintptr_t)puMemOp - (uintptr_t)pbBuf];
1322 uint8_t bXcptExpect = !g_afTypeSupports[pTest->enmType] ? X86_XCPT_UD
1323 : fSseInstr ? paConfigs[iCfg].bXcptSse
1324 : BS3_MODE_IS_RM_OR_V86(bMode) ? X86_XCPT_UD : paConfigs[iCfg].bXcptAvx;
1325 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10;
1326 unsigned cRecompRuns = 0;
1327 unsigned const cMaxRecompRuns = g_cBs3ThresholdNativeRecompiler + cValues;
1328 unsigned iVal;
1329
1330 /* If testing unaligned memory accesses (or #PF), skip register-only tests. This
1331 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */
1332 if ( (pTest->enmRm == RM_REG || pTest->enmRm == RM_MEM8)
1333 && (!paConfigs[iCfg].fAligned || paConfigs[iCfg].fAlignCheck || fPf))
1334 continue;
1335
1336 /* #AC is only raised in ring-3. */
1337 if (bXcptExpect == X86_XCPT_AC)
1338 {
1339 if (bRing != 3)
1340 bXcptExpect = X86_XCPT_DB;
1341 else if (fAvxInstr)
1342 bXcptExpect = pTest->bAvxMisalignXcpt; /* they generally don't raise #AC */
1343 }
1344
1345 if (fPf && bXcptExpect == X86_XCPT_DB)
1346 bXcptExpect = X86_XCPT_PF;
1347
1348 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, pTest->pfnWorker);
1349
1350 /*
1351 * Iterate the test values and do the actual testing.
1352 */
1353 while (cRecompRuns < cMaxRecompRuns)
1354 {
1355 for (iVal = 0; iVal < cValues; iVal++, idTestStep++, cRecompRuns++)
1356 {
1357 uint16_t cErrors;
1358 BS3CPUINSTR4_TEST1_CTX_T TestCtx;
1359 uint32_t const fExpectedMxCsrFlags = pTest->enmType >= T_128BITS
1360 ? pTest->paValues[iVal].f128ExpectedMxCsrFlags
1361 : pTest->paValues[iVal].f256ExpectedMxCsrFlags;
1362
1363 if (BS3_SKIPIT(bRing, iCfg, iTest, iVal, 0))
1364 continue;
1365
1366 /*
1367 * If the hardware does not support DAZ bit and we are testing DE exceptions,
1368 * then skip testing them. We still want to test values that set the MXCSR.DAZ
1369 * if we are not expecting DE exceptions to make sure DAZ bit in and of itself
1370 * is not influencing other cases.
1371 */
1372 if ( !g_fMxCsrDazSupported
1373 && pTest->paValues[iVal].fDenormalsAreZero == X86_MXCSR_DAZ
1374 && (fExpectedMxCsrFlags & X86_MXCSR_DE))
1375 continue;
1376
1377 /*
1378 * Setup the test instruction context and pass it to the worker.
1379 * A few of these can be figured out by the worker but initializing
1380 * it outside the inner most loop is more optimal.
1381 */
1382 TestCtx.pConfig = &paConfigs[iCfg];
1383 TestCtx.pTest = pTest;
1384 TestCtx.iVal = iVal;
1385 TestCtx.pszMode = pszMode;
1386 TestCtx.pTrapFrame = &TrapFrame;
1387 TestCtx.pCtx = &Ctx;
1388 TestCtx.pExtCtx = pExtCtx;
1389 TestCtx.pExtCtxOut = pExtCtxOut;
1390 TestCtx.puMemOp = (uint8_t *)puMemOp;
1391 TestCtx.puMemOpAlias = puMemOpAlias;
1392 TestCtx.cbMemOp = cbMemOp;
1393 TestCtx.cbOperand = cbOperand;
1394 TestCtx.bXcptExpect = bXcptExpect;
1395 TestCtx.fSseInstr = fSseInstr;
1396 TestCtx.fAvxInstr = fAvxInstr;
1397 TestCtx.idTestStep = idTestStep;
1398 cErrors = bs3CpuInstr4_WorkerTestType1_Inner(bMode, &TestCtx, &SavedCfg);
1399 if (cErrors != Bs3TestSubErrorCount())
1400 {
1401 if (paConfigs[iCfg].fAligned)
1402 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, %s %u-bit)",
1403 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1404 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), fSseInstr ? "SSE" : "AVX", cbOperand * 8);
1405 else
1406 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, puMemOp=%p, EFLAGS=%#RX32, %s %u-bit)",
1407 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1408 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), puMemOp,
1409 TrapFrame.Ctx.rflags.u32, fSseInstr ? "SSE" : "AVX", cbOperand * 8);
1410 Bs3TestPrintf("\n");
1411 }
1412 }
1413 }
1414 }
1415 bs3CpuInstrXConfigRestore(&SavedCfg, &Ctx, pExtCtx);
1416 }
1417 } while (fPf++ == 0 && BS3_MODE_IS_PAGED(bMode));
1418
1419 /*
1420 * Next ring.
1421 */
1422 bRing++;
1423 if (bRing > 3 || bMode == BS3_MODE_RM)
1424 break;
1425 Bs3RegCtxConvertToRingX(&Ctx, bRing);
1426 }
1427
1428 /*
1429 * Cleanup.
1430 */
1431 bs3CpuInstrXBufCleanup(pbBuf, cbBuf, bMode);
1432 bs3CpuInstrXFreeExtCtxs(pExtCtx, pExtCtxOut);
1433 return 0;
1434}
1435
1436
1437/*
1438 * [V]ADDPS.
1439 */
1440BS3_DECL_FAR(uint8_t) bs3CpuInstr4_v_addps(uint8_t bMode)
1441{
1442 static BS3CPUINSTR4_TEST1_VALUES_PS_T const s_aValues[] =
1443 {
1444 /*
1445 * Zero.
1446 */
1447 /* 0*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1448 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1449 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1450 /*mask */ X86_MXCSR_XCPT_MASK,
1451 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1452 /*flags */ 0, 0 },
1453 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1454 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1455 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1456 /*mask */ ~X86_MXCSR_XCPT_MASK,
1457 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1458 /*flags */ 0, 0 },
1459 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1460 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1461 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1462 /*mask */ ~X86_MXCSR_XCPT_MASK,
1463 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1464 /*flags */ 0, 0 },
1465 { { /*src2 */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1466 { /*src1 */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1467 { /* => */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1468 /*mask */ ~X86_MXCSR_XCPT_MASK,
1469 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
1470 /*flags */ 0, 0 },
1471 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1472 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1473 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1474 /*mask */ ~X86_MXCSR_XCPT_MASK,
1475 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1476 /*flags */ 0, 0 },
1477 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1478 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1479 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1480 /*mask */ X86_MXCSR_XCPT_MASK,
1481 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1482 /*flags */ 0, 0 },
1483 /*
1484 * Infinity.
1485 */
1486 /* 6*/{ { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1487 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1488 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1489 /*mask */ ~X86_MXCSR_IM,
1490 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1491 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1492 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1493 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1494 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1495 /*mask */ X86_MXCSR_XCPT_MASK,
1496 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1497 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1498 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1499 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1500 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1501 /*mask */ X86_MXCSR_XCPT_MASK,
1502 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1503 /*flags */ 0, X86_MXCSR_IE },
1504 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
1505 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1) } },
1506 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(0) } },
1507 /*mask */ ~X86_MXCSR_XCPT_MASK,
1508 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1509 /*flags */ 0, X86_MXCSR_IE },
1510 { { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(1), BS3_FP32_INF(1), BS3_FP32_INF(1), BS3_FP32_INF(0) } },
1511 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_INF(0), BS3_FP32_ZERO(1), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_INF(0), BS3_FP32_INF(1) } },
1512 { /* => */ { BS3_FP32_INF(1), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(1), BS3_FP32_QNAN(1), BS3_FP32_QNAN(1), BS3_FP32_INF(1) } },
1513 /*mask */ ~X86_MXCSR_XCPT_MASK,
1514 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1515 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1516 /*
1517 * Overflow, Precision.
1518 */
1519 /*11*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1520 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1521 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), } },
1522 /*mask */ ~X86_MXCSR_XCPT_MASK,
1523 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1524 /*flags */ 0, X86_MXCSR_OE },
1525 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1526 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0) } },
1527 { /* => */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1528 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1529 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1530 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1531 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1532 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1533 { /* => */ { BS3_FP32_INF(0), BS3_FP32_VAL(1, 0, 2), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_INF(0), BS3_FP32_VAL(1, 0, 2), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
1534 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1535 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1536 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1537 { { /*src2 */ { BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1) } },
1538 { /*src1 */ { BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1) } },
1539 { /* => */ { BS3_FP32_VAL(1, 0, 2), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_VAL(1, 0, 2) } },
1540 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1541 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1542 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1543 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1544 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1545 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1546 /*mask */ X86_MXCSR_XCPT_MASK,
1547 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1548 /*flags */ 0, X86_MXCSR_OE | X86_MXCSR_PE },
1549 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1550 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1551 { /* => */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1552 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1553 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1554 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1555 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1) } },
1556 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1) } },
1557 { /* => */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(1, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(1, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX + 1) } },
1558 /*mask */ ~X86_MXCSR_XCPT_MASK,
1559 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1560 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1561 /*
1562 * Normals.
1563 */
1564 /*18*/{ { /*src2 */ { BS3_FP32_VAL(0, 0x600000, 0x7f)/* 1.75*/, BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7d)/*0.25*/, BS3_FP32_VAL(0, 0x600000, 0x7f)/* 1.75*/, BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7d)/*0.25*/ } },
1565 { /*src1 */ { BS3_FP32_VAL(1, 0, 0x7d)/*-0.25*/, BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7e)/*0.50*/, BS3_FP32_VAL(1, 0, 0x7d)/*-0.25*/, BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7e)/*0.50*/ } },
1566 { /* => */ { BS3_FP32_VAL(0, 0x400000, 0x7f)/* 1.50*/, BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x400000, 0x7e)/*0.75*/, BS3_FP32_VAL(0, 0x400000, 0x7f)/* 1.50*/, BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x400000, 0x7e)/*0.75*/ } },
1567 /*mask */ X86_MXCSR_XCPT_MASK,
1568 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1569 /*flags */ 0, 0 },
1570 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_VAL_1(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_VAL_1(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1571 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_VAL_1(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_VAL_1(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1572 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1573 /*mask */ ~X86_MXCSR_XCPT_MASK,
1574 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1575 /*flags */ 0, 0 },
1576 { { /*src2 */ { BS3_FP32_VAL(0, 0x5ca5b8, 0x93)/*1807543*/, BS3_FP32_VAL(0, 0x5c0000, 0x84)/*55*/, BS3_FP32_VAL(0, 0x2514d6, 0x93)/*1352346.75*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x5c0000, 0x84)/*55*/, BS3_FP32_VAL(0, 0x2514d6, 0x93)/*1352346.75*/, BS3_FP32_VAL(0, 0x534000, 0x86)/*211.25*/, BS3_FP32_ZERO(0) } },
1577 { /*src1 */ { BS3_FP32_VAL(0, 0x1ea980, 0x8f)/* 81235*/, BS3_FP32_VAL(0, 0x600000, 0x81)/* 7*/, BS3_FP32_VAL(0, 0x7c9000, 0x88)/* 1010.25*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x600000, 0x81)/* 7*/, BS3_FP32_VAL(0, 0x7c9000, 0x88)/* 1010.25*/, BS3_FP32_ONE(1) /*- 1.00*/, BS3_FP32_ZERO(0) } },
1578 { /* => */ { BS3_FP32_VAL(0, 0x669050, 0x93)/*1888778*/, BS3_FP32_VAL(0, 0x780000, 0x84)/*62*/, BS3_FP32_VAL(0, 0x253468, 0x93)/*1353357.00*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x780000, 0x84)/*62*/, BS3_FP32_VAL(0, 0x253468, 0x93)/*1353357.00*/, BS3_FP32_VAL(0, 0x524000, 0x86)/*210.25*/, BS3_FP32_ZERO(0) } },
1579 /*mask */ X86_MXCSR_XCPT_MASK,
1580 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1581 /*flags */ 0, 0 },
1582 { { /*src2 */ { BS3_FP32_VAL(0, 0x74429f, 0x97)/*32015678*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x74429f, 0x97)/*32015678*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ONE(1), BS3_FP32_ZERO(0) } },
1583 { /*src1 */ { BS3_FP32_VAL(1, 0x712060, 0x92)/*- 987654*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ZERO(0), BS3_FP32_ONE(0), BS3_FP32_VAL(1, 0x712060, 0x92)/*- 987654*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ONE(0), BS3_FP32_ONE(0) } },
1584 { /* => */ { BS3_FP32_VAL(0, 0x6cb99c, 0x97)/*31028024*/, BS3_FP32_VAL(0, 0x3c614e, 0x97)/*24691356*/, BS3_FP32_ZERO(0), BS3_FP32_ONE(0), BS3_FP32_VAL(0, 0x6cb99c, 0x97)/*31028024*/, BS3_FP32_VAL(0, 0x3c614e, 0x97)/*24691356*/, BS3_FP32_ZERO(0), BS3_FP32_ONE(0) } },
1585 /*mask */ ~X86_MXCSR_XCPT_MASK,
1586 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1587 /*flags */ 0, 0 },
1588 { { /*src2 */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1589 { /*src1 */ { BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_ONE(1), BS3_FP32_ONE(0), BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1590 { /* => */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1591 /*mask */ X86_MXCSR_XCPT_MASK,
1592 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1593 /*flags */ 0, 0 },
1594 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ONE(1), } },
1595 { /*src1 */ { BS3_FP32_ONE(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), } },
1596 { /* => */ { BS3_FP32_VAL(0, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_VAL(1, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_VAL(0, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_VAL(1, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1) } },
1597 /*mask */ ~X86_MXCSR_XCPT_MASK,
1598 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1599 /*flags */ 0, 0 },
1600 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0) } },
1601 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0) } },
1602 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0, 2) , BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0, 2) } },
1603 /*mask */ ~X86_MXCSR_XCPT_MASK,
1604 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1605 /*flags */ 0, 0 },
1606 { { /*src2 */ { BS3_FP32_VAL(0, 0x600000, 0x7e)/* 0.875*/, BS3_FP32_VAL(0, 0x3ce348, 0x90)/*193421.125*/, BS3_FP32_VAL(0, 0x6423f2, 0x92)/*934463.125*/, BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0x16b43a, 0x93)/*1234567.25*/, BS3_FP32_VAL(0, 0x792318, 0x91)/*510232.75*/, BS3_FP32_VAL(0, 0x600000, 0x7e)/* 0.875*/ } },
1607 { /*src1 */ { BS3_FP32_VAL(0, 0x769b50, 0x92)/*1010101.000*/, BS3_FP32_VAL(0, 0x430ebc, 0x91)/*399477.875*/, BS3_FP32_VAL(1, 0x0a19f0, 0x8f)/*-70707.875*/, BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0x00c6d3, 0x94)/*2109876.75*/, BS3_FP32_VAL(1, 0x316740, 0x8e)/*-45415.25*/, BS3_FP32_VAL(0, 0x769b50, 0x92)/*1010101.000*/ } },
1608 { /* => */ { BS3_FP32_VAL(0, 0x769b5e, 0x92)/*1010101.875*/, BS3_FP32_VAL(0, 0x10c030, 0x92)/*592899.000*/, BS3_FP32_VAL(0, 0x52e0b4, 0x92)/*863755.250*/, BS3_FP32_VAL(1, 0, 2), BS3_FP32_VAL(0, 0, 2), BS3_FP32_VAL(0, 0x4c20f0, 0x94)/*3344444.00*/, BS3_FP32_VAL(0, 0x62f630, 0x91)/*464817.50*/, BS3_FP32_VAL(0, 0x769b5e, 0x92)/*1010101.875*/ } },
1609 /*mask */ X86_MXCSR_XCPT_MASK,
1610 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1611 /*flags */ 0, 0 },
1612 /*26*/{ { /*src2 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1613 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1) } },
1614 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1) } },
1615 /*mask */ ~X86_MXCSR_XCPT_MASK,
1616 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1617 /*flags */ X86_MXCSR_DE, X86_MXCSR_DE },
1618 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1619 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1620 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1621 /*mask */ X86_MXCSR_XCPT_MASK,
1622 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_NEAREST,
1623 /*flags */ 0, 0 },
1624 { { /*src2 */ { BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MAX(0) } },
1625 { /*src1 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0) } },
1626 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), } },
1627 /*mask */ X86_MXCSR_XCPT_MASK,
1628 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1629 /*flags */ 0, 0 },
1630 /** @todo Denormals; Underflow, Precision; Rounding, FZ etc. */
1631 };
1632
1633 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1634 {
1635 { bs3CpuInstr4_addps_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1636 { bs3CpuInstr4_addps_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1637
1638 { bs3CpuInstr4_vaddps_XMM1_XMM2_XMM3_icebp_c16, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1639 { bs3CpuInstr4_vaddps_XMM1_XMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1640
1641 { bs3CpuInstr4_vaddps_YMM1_YMM2_YMM3_icebp_c16, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1642 { bs3CpuInstr4_vaddps_YMM1_YMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1643 };
1644 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1645 {
1646 { bs3CpuInstr4_addps_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1647 { bs3CpuInstr4_addps_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1648
1649 { bs3CpuInstr4_vaddps_XMM1_XMM2_XMM3_icebp_c32, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1650 { bs3CpuInstr4_vaddps_XMM1_XMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1651
1652 { bs3CpuInstr4_vaddps_YMM1_YMM2_YMM3_icebp_c32, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1653 { bs3CpuInstr4_vaddps_YMM1_YMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1654 };
1655 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1656 {
1657 { bs3CpuInstr4_addps_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1658 { bs3CpuInstr4_addps_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1659
1660 { bs3CpuInstr4_vaddps_XMM1_XMM2_XMM3_icebp_c64, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1661 { bs3CpuInstr4_vaddps_XMM1_XMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1662
1663 { bs3CpuInstr4_vaddps_YMM1_YMM2_YMM3_icebp_c64, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1664 { bs3CpuInstr4_vaddps_YMM1_YMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1665
1666 { bs3CpuInstr4_addps_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1667 { bs3CpuInstr4_addps_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1668
1669 { bs3CpuInstr4_vaddps_YMM8_YMM9_YMM10_icebp_c64, 255, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1670 { bs3CpuInstr4_vaddps_YMM8_YMM9_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1671 };
1672
1673 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1674 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1675 return bs3CpuInstr4_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1676 g_aXcptConfig2, RT_ELEMENTS(g_aXcptConfig2));
1677}
1678
1679
1680/*
1681 * [V]ADDPD.
1682 */
1683BS3_DECL_FAR(uint8_t) bs3CpuInstr4_v_addpd(uint8_t bMode)
1684{
1685 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
1686 {
1687 /*
1688 * Zero.
1689 */
1690 /* 0*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1691 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1692 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1693 /*mask */ X86_MXCSR_XCPT_MASK,
1694 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1695 /*flags */ 0, 0 },
1696 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1697 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1698 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1699 /*mask */ ~X86_MXCSR_XCPT_MASK,
1700 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1701 /*flags */ 0, 0 },
1702 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1703 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1704 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1705 /*mask */ X86_MXCSR_XCPT_MASK,
1706 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_DOWN,
1707 /*flags */ 0, 0 },
1708 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1709 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1710 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1711 /*mask */ ~X86_MXCSR_XCPT_MASK,
1712 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1713 /*flags */ 0, 0 },
1714 { { /*src2 */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1715 { /*src1 */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1716 { /* => */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1717 /*mask */ X86_MXCSR_XCPT_MASK,
1718 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1719 /*flags */ 0, 0 },
1720 /*
1721 * Infinity.
1722 */
1723 /* 5*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1724 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1725 { /* => */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1726 /*mask */ ~X86_MXCSR_IM,
1727 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1728 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1729 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1730 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1731 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1732 /*mask */ ~X86_MXCSR_IM,
1733 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1734 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1735 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1736 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1737 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1738 /*mask */ ~X86_MXCSR_IM,
1739 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1740 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1741 { { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_INF(1) } },
1742 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
1743 { /* => */ { BS3_FP64_QNAN(1), BS3_FP64_QNAN(1), BS3_FP64_ZERO(0), BS3_FP64_QNAN(1) } },
1744 /*mask */ X86_MXCSR_XCPT_MASK,
1745 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
1746 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1747 { { /*src2 */ { BS3_FP64_VAL(0, 0, 0x3fd)/*0.25*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_INF(1) } },
1748 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x3fe)/*0.50*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
1749 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x3fe)/*0.75*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_QNAN(1) } },
1750 /*mask */ X86_MXCSR_XCPT_MASK,
1751 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1752 /*flags */ 0, X86_MXCSR_IE },
1753 /*
1754 * Overflow, Precision.
1755 */
1756 /*10*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
1757 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
1758 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1759 /*mask */ ~X86_MXCSR_XCPT_MASK,
1760 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1761 /*flags */ X86_MXCSR_OE, X86_MXCSR_OE },
1762 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1763 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1764 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1765 /*mask */ ~X86_MXCSR_XCPT_MASK,
1766 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1767 /*flags */ X86_MXCSR_OE, X86_MXCSR_OE },
1768 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0) } },
1769 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0) } },
1770 { /* => */ { BS3_FP64_INF(0), BS3_FP64_VAL(1, 0, 2), BS3_FP64_ZERO(0), BS3_FP64_INF(0), } },
1771 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1772 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1773 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1774 { { /*src2 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0) } },
1775 { /*src1 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0) } },
1776 { /* => */ { BS3_FP64_VAL(1, 0, 2), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1777 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1778 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1779 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1780 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1781 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1782 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1783 /*mask */ X86_MXCSR_XCPT_MASK,
1784 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1785 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1786 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
1787 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
1788 { /* => */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(1, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX + 1) } },
1789 /*mask */ ~X86_MXCSR_XCPT_MASK,
1790 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1791 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1792 /** @todo Why does the below on cause PE?! */
1793 { { /*src2 */ { BS3_FP64_VAL(0, 0xc000000000000, 0x3ff)/* 1.75*/, BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0, 0x3fd)/*0.25*/ } },
1794 { /*src1 */ { BS3_FP64_VAL(1, 0, 0x07d)/*-0.25*/, BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0, 0x3fe)/*0.50*/ } },
1795 { /* => */ { BS3_FP64_VAL(0, 0xbffffffffffff, 0x3ff)/* 1.50*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0x8000000000000, 0x3fe)/*0.75*/ } },
1796 /*mask */ X86_MXCSR_XCPT_MASK,
1797 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1798 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1799 /*
1800 * Normals.
1801 */
1802 /*17*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_VAL_1(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1803 { /*src1 */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_VAL_1(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1804 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1805 /*mask */ ~X86_MXCSR_XCPT_MASK,
1806 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1807 /*flags */ 0, 0 },
1808 { { /*src2 */ { BS3_FP64_VAL(0, 0, 0x409)/*1024*/, BS3_FP64_VAL(0, 0xb800000000000, 0x404)/*55*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1809 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x408)/* 512*/, BS3_FP64_VAL(0, 0xc000000000000, 0x401)/* 7*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1810 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x409)/*1536*/, BS3_FP64_VAL(0, 0xf000000000000, 0x404)/*62*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1811 /*mask */ X86_MXCSR_XCPT_MASK,
1812 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1813 /*flags */ 0, 0 },
1814 { { /*src2 */ { BS3_FP64_VAL(0, 0x26580b4800000, 0x41d)/* 1234567890*/, BS3_FP64_VAL(0, 0xd6f3458800000, 0x41c)/*987654321*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf0033a34f337, 0x432)/*4072598000007579.5*/ } },
1815 { /*src1 */ { BS3_FP64_VAL(1, 0x26580b4800000, 0x41d)/*-1234567890*/, BS3_FP64_VAL(1, 0x9000000000000, 0x405)/* -100*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6eca42000000, 0x419)/* 123450000.5*/ } },
1816 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6f3426800000, 0x41c)/*987654221*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf00348ec5858, 0x432)/*4072598123457580.0*/ } },
1817 /*mask */ ~X86_MXCSR_XCPT_MASK,
1818 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1819 /*flags */ 0, 0 },
1820 { { /*src2 */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1821 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1822 { /* => */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1823 /*mask */ X86_MXCSR_XCPT_MASK,
1824 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1825 /*flags */ 0, 0 },
1826 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1827 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1828 { /* => */ { BS3_FP64_VAL(0, 0, BS3_FP64_EXP_SAFE_INT_MAX + 1), BS3_FP64_VAL(1, 0, BS3_FP64_EXP_SAFE_INT_MAX + 1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1829 /*mask */ ~X86_MXCSR_XCPT_MASK,
1830 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1831 /*flags */ 0, 0 },
1832 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
1833 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
1834 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_VAL(0, 0, 2) } },
1835 /*mask */ ~X86_MXCSR_XCPT_MASK,
1836 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1837 /*flags */ 0, 0 },
1838 { { /*src2 */ { BS3_FP64_VAL(0, 0xc122186c3cfd0, 0x42d)/*123456789876543.25*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
1839 { /*src1 */ { BS3_FP64_VAL(0, 0xb88e0395d49b0, 0x42d)/*121098765432102.75*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
1840 { /* => */ { BS3_FP64_VAL(0, 0xbcd80e0108cc0, 0x42e)/*244555555308646.00*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(1, 0, 2) } },
1841 /*mask */ X86_MXCSR_XCPT_MASK,
1842 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1843 /*flags */ 0, 0 },
1844 /*
1845 * Denormals.
1846 */
1847 /*24*/{ { /*src2 */ { BS3_FP64_DENORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1848 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1849 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1850 /*mask */ ~X86_MXCSR_XCPT_MASK,
1851 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1852 /*flags */ X86_MXCSR_DE, X86_MXCSR_DE },
1853 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1854 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1855 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1856 /*mask */ X86_MXCSR_XCPT_MASK,
1857 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_NEAREST,
1858 /*flags */ 0, 0 },
1859 { { /*src2 */ { BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MAX(0) } },
1860 { /*src1 */ { BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MIN(0) } },
1861 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1862 /*mask */ X86_MXCSR_XCPT_MASK,
1863 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1864 /*flags */ 0, 0 },
1865 /** @todo More denormals; Underflow, Precision; Rounding, FZ etc. */
1866 };
1867
1868 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1869 {
1870 { bs3CpuInstr4_addpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1871 { bs3CpuInstr4_addpd_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1872
1873 { bs3CpuInstr4_vaddpd_XMM1_XMM2_XMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1874 { bs3CpuInstr4_vaddpd_XMM1_XMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1875
1876 { bs3CpuInstr4_vaddpd_YMM1_YMM2_YMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1877 { bs3CpuInstr4_vaddpd_YMM1_YMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1878 };
1879 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1880 {
1881 { bs3CpuInstr4_addpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1882 { bs3CpuInstr4_addpd_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1883
1884 { bs3CpuInstr4_vaddpd_XMM1_XMM2_XMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1885 { bs3CpuInstr4_vaddpd_XMM1_XMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1886
1887 { bs3CpuInstr4_vaddpd_YMM1_YMM2_YMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1888 { bs3CpuInstr4_vaddpd_YMM1_YMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1889 };
1890 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1891 {
1892 { bs3CpuInstr4_addpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1893 { bs3CpuInstr4_addpd_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1894
1895 { bs3CpuInstr4_vaddpd_XMM1_XMM2_XMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1896 { bs3CpuInstr4_vaddpd_XMM1_XMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1897
1898 { bs3CpuInstr4_vaddpd_YMM1_YMM2_YMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1899 { bs3CpuInstr4_vaddpd_YMM1_YMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1900
1901 { bs3CpuInstr4_addpd_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE2, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1902 { bs3CpuInstr4_addpd_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1903
1904 { bs3CpuInstr4_vaddpd_YMM8_YMM9_YMM10_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1905 { bs3CpuInstr4_vaddpd_YMM8_YMM9_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1906 };
1907
1908 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1909 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1910 return bs3CpuInstr4_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1911 g_aXcptConfig2, RT_ELEMENTS(g_aXcptConfig2));
1912}
1913
1914/*
1915 * [V]ADDSS.
1916 */
1917BS3_DECL_FAR(uint8_t) bs3CpuInstr4_v_addss(uint8_t bMode)
1918{
1919 static BS3CPUINSTR4_TEST1_VALUES_SS_T const s_aValues[] =
1920 {
1921 /*
1922 * Zero.
1923 */
1924 /* 0*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1925 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1926 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1927 /*mask */ X86_MXCSR_XCPT_MASK,
1928 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1929 /*flags */ 0, 0 },
1930 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_7(0), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
1931 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1932 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1933 /*mask */ ~X86_MXCSR_XCPT_MASK,
1934 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1935 /*flags */ 0, 0 },
1936 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_7(0), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
1937 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1938 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1939 /*mask */ ~X86_MXCSR_XCPT_MASK,
1940 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1941 /*flags */ 0, 0 },
1942 { { /*src2 */ { BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(1) } },
1943 { /*src1 */ { BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
1944 { /* => */ { BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
1945 /*mask */ ~X86_MXCSR_XCPT_MASK,
1946 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
1947 /*flags */ 0, 0 },
1948 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(1) } },
1949 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
1950 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
1951 /*mask */ ~X86_MXCSR_XCPT_MASK,
1952 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1953 /*flags */ 0, 0 },
1954 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
1955 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1956 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1957 /*mask */ X86_MXCSR_XCPT_MASK,
1958 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1959 /*flags */ 0, 0 },
1960 /*
1961 * Infinity.
1962 */
1963 /* 6*/{ { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1964 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1965 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1966 /*mask */ ~X86_MXCSR_IM,
1967 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1968 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1969 { { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_RAND_VAL_7(0), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
1970 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1971 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1972 /*mask */ ~X86_MXCSR_IM,
1973 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1974 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1975 { { /*src2 */ { BS3_FP32_INF(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
1976 { /*src1 */ { BS3_FP32_INF(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1977 { /* => */ { BS3_FP32_QNAN(1), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
1978 /*mask */ X86_MXCSR_XCPT_MASK,
1979 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1980 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1981 { { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(1) } },
1982 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
1983 { /* => */ { BS3_FP32_QNAN(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_1(1) } },
1984 /*mask */ X86_MXCSR_XCPT_MASK,
1985 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1986 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1987 { { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_3(1) } },
1988 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
1989 { /* => */ { BS3_FP32_QNAN(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
1990 /*mask */ ~X86_MXCSR_XCPT_MASK,
1991 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1992 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1993 { { /*src2 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_6(1) } },
1994 { /*src1 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1) } },
1995 { /* => */ { BS3_FP32_QNAN(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1) } },
1996 /*mask */ ~X86_MXCSR_XCPT_MASK,
1997 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1998 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1999 /*
2000 * Overflow, Precision.
2001 */
2002 /*12*/{ { /*src2 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_6(1) } },
2003 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1) } },
2004 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_RAND_VAL_1(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_5(1) } },
2005 /*mask */ ~X86_MXCSR_XCPT_MASK,
2006 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
2007 /*flags */ X86_MXCSR_OE, X86_MXCSR_OE },
2008 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
2009 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2010 { /* => */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2011 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2012 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
2013 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2014 { { /*src2 */ { BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_0(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_3(1) } },
2015 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2016 { /* => */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_RAND_VAL_2(0), BS3_FP32_RAND_VAL_6(1), BS3_FP32_RAND_VAL_5(0), BS3_FP32_RAND_VAL_4(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_3(1), BS3_FP32_RAND_VAL_2(0) } },
2017 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2018 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
2019 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
2020 { { /*src2 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_RAND_VAL_7(1), BS3_FP32_RAND_VAL_6(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_1(1) } },
2021 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2022 { /* => */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_RAND_VAL_0(1), BS3_FP32_RAND_VAL_1(0), BS3_FP32_RAND_VAL_2(1), BS3_FP32_RAND_VAL_3(0), BS3_FP32_RAND_VAL_4(0), BS3_FP32_RAND_VAL_5(1), BS3_FP32_RAND_VAL_6(0) } },
2023 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
2024 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
2025 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
2026 /** @todo rest of OE, PE. */
2027 };
2028
2029 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
2030 {
2031 { bs3CpuInstr4_addss_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2032 { bs3CpuInstr4_addss_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2033
2034 { bs3CpuInstr4_vaddss_XMM1_XMM2_XMM3_icebp_c16, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2035 { bs3CpuInstr4_vaddss_XMM1_XMM2_FSxBX_icebp_c16, X86_XCPT_AC, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2036 };
2037 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
2038 {
2039 { bs3CpuInstr4_addss_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2040 { bs3CpuInstr4_addss_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2041
2042 { bs3CpuInstr4_vaddss_XMM1_XMM2_XMM3_icebp_c32, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2043 { bs3CpuInstr4_vaddss_XMM1_XMM2_FSxBX_icebp_c32, X86_XCPT_AC, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2044 };
2045 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
2046 {
2047 { bs3CpuInstr4_addss_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2048 { bs3CpuInstr4_addss_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2049
2050 { bs3CpuInstr4_vaddss_XMM1_XMM2_XMM3_icebp_c64, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2051 { bs3CpuInstr4_vaddss_XMM1_XMM2_FSxBX_icebp_c64, X86_XCPT_AC, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2052
2053 { bs3CpuInstr4_addss_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2054 { bs3CpuInstr4_addss_XMM8_FSxBX_icebp_c64, X86_XCPT_AC, RM_MEM, T_SSE, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
2055 };
2056
2057 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
2058 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
2059 return bs3CpuInstr4_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
2060 g_aXcptConfig3, RT_ELEMENTS(g_aXcptConfig3));
2061}
2062
2063
2064/**
2065 * The 32-bit protected mode main function.
2066 *
2067 * The tests a driven by 32-bit test drivers, even for real-mode tests (though
2068 * we'll switch between PE32 and RM for each test step we perform). Given that
2069 * we test SSE and AVX here, we don't need to worry about 286 or 8086.
2070 *
2071 * Some extra steps needs to be taken to properly handle extended state in LM64
2072 * (Bs3ExtCtxRestoreEx & Bs3ExtCtxSaveEx) and when testing real mode
2073 * (Bs3RegCtxSaveForMode & Bs3TrapSetJmpAndRestoreWithExtCtxAndRm).
2074 */
2075BS3_DECL(void) Main_pe32()
2076{
2077 static const BS3TESTMODEBYONEENTRY g_aTests[] =
2078 {
2079#if 1 /*ndef DEBUG_bird*/
2080# define ALL_TESTS
2081#endif
2082#if defined(ALL_TESTS)
2083 { "[v]addps", bs3CpuInstr4_v_addps, 0 },
2084 { "[v]addpd", bs3CpuInstr4_v_addpd, 0 },
2085 { "[v]addss", bs3CpuInstr4_v_addss, 0 },
2086#endif
2087 };
2088 Bs3TestInit("bs3-cpu-instr-4");
2089
2090 /*
2091 * Initialize globals.
2092 */
2093 if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
2094 {
2095 uint32_t fEbx, fEcx, fEdx;
2096 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
2097 g_afTypeSupports[T_MMX] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_MMX);
2098 g_afTypeSupports[T_MMX_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
2099 g_afTypeSupports[T_MMX_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
2100 g_afTypeSupports[T_MMX_SSSE3] = RT_BOOL(fEdx & X86_CPUID_FEATURE_ECX_SSSE3);
2101 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
2102 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
2103 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3);
2104 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3);
2105 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1);
2106 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2);
2107 g_afTypeSupports[T_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL);
2108 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
2109 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
2110 g_afTypeSupports[T_AVX_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL)
2111 && RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
2112
2113 if (ASMCpuId_EAX(0) >= 7)
2114 {
2115 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &fEbx, NULL, NULL);
2116 g_afTypeSupports[T_AVX2_128] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
2117 g_afTypeSupports[T_AVX2_256] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
2118 g_afTypeSupports[T_SHA] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_SHA);
2119 }
2120
2121 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
2122 {
2123 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
2124 g_afTypeSupports[T_AXMMX] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
2125 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A);
2126 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);
2127 }
2128 g_afTypeSupports[T_AXMMX_OR_SSE] = g_afTypeSupports[T_AXMMX] || g_afTypeSupports[T_SSE];
2129
2130 /*
2131 * Figure out FPU save/restore method and support for DAZ bit.
2132 */
2133 {
2134 /** @todo Add bs3kit API to just get the ext ctx method without needing to
2135 * alloc/free a context. Replicating the logic in the bs3kit here, though
2136 * doable, runs a risk of not updating this when the other logic is
2137 * changed. */
2138 uint64_t fFlags;
2139 uint16_t const cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
2140 PBS3EXTCTX pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cbExtCtx);
2141 if (pExtCtx)
2142 {
2143 Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
2144 g_enmExtCtxMethod = pExtCtx->enmMethod;
2145 if ( ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_XSAVE
2146 && (pExtCtx->Ctx.x.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
2147 || ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_FXSAVE)
2148 && (pExtCtx->Ctx.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
2149 g_fMxCsrDazSupported = true;
2150 }
2151 else
2152 Bs3TestFailedF("Failed to allocate %u bytes for extended CPU context (tiled addressable)\n", cbExtCtx);
2153 }
2154
2155 /*
2156 * Allocate a buffer for testing.
2157 */
2158 g_cbBuf = X86_PAGE_SIZE * 4;
2159 g_pbBuf = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_REAL, g_cbBuf);
2160 if (g_pbBuf)
2161 {
2162 g_pbBufAliasAlloc = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_TILED, g_cbBuf);
2163 if (g_pbBufAliasAlloc)
2164 {
2165 /*
2166 * Do the tests.
2167 */
2168 Bs3TestDoModesByOne_pe32(g_aTests, RT_ELEMENTS(g_aTests), BS3TESTMODEBYONEENTRY_F_REAL_MODE_READY);
2169#ifdef BS3_SKIPIT_DO_SKIP
2170 bs3CpuInstrX_ShowTallies();
2171#endif
2172 }
2173 else
2174 Bs3TestFailed("Failed to allocate 16K alias buffer (tiled addressable)");
2175 }
2176 else
2177 Bs3TestFailed("Failed to allocate 16K buffer (real mode addressable)");
2178 }
2179
2180 Bs3TestTerm();
2181}
2182
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette