VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-4.c32@ 104873

Last change on this file since 104873 was 104873, checked in by vboxsync, 8 months ago

ValidationKit/bootsectors: bugref:10658 SIMD FP testcase: [v]addps denormals.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 114.6 KB
Line 
1/* $Id: bs3-cpu-instr-4.c32 104873 2024-06-10 09:27:11Z vboxsync $ */
2/** @file
3 * BS3Kit - bs3-cpu-instr-4 - SSE, AVX FPU instructions, C code template.
4 */
5
6/*
7 * Copyright (C) 2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <bs3kit.h>
42#include "bs3-cpu-instr-4-asm-auto.h"
43
44#include <iprt/asm.h>
45#include <iprt/asm-amd64-x86.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** Converts an execution mode (BS3_MODE_XXX) into an index into an array
52 * initialized by BS3CPUINSTR4_TEST1_MODES_INIT etc. */
53#define BS3CPUINSTR4_TEST_MODES_INDEX(a_bMode) (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2)
54
55/** Maximum length for the names of all SIMD FP exception flags combined. */
56#define BS3_FP_XCPT_NAMES_MAXLEN sizeof(" IE DE ZE OE UE PE ")
57
58/*
59 * Single-precision (32 bits) floating-point defines.
60 */
61/** The max exponent value for a single-precision floating-point normal. */
62#define BS3_FP32_EXP_NORMAL_MAX 254
63/** The min exponent value for a single-precision floating-point normal. */
64#define BS3_FP32_EXP_NORMAL_MIN 1
65/** The max fraction value for a single-precision floating-point normal. */
66#define BS3_FP32_FRACTION_NORMAL_MAX 0x7fffff
67/** The min fraction value for a single-precision floating-point normal. */
68#define BS3_FP32_FRACTION_NORMAL_MIN 0
69/** The exponent bias for the single-precision floating-point format. */
70#define BS3_FP32_EXP_BIAS RTFLOAT32U_EXP_BIAS
71/** Fraction width (in bits) for the single-precision floating-point format. */
72#define BS3_FP32_FRACTION_BITS RTFLOAT32U_FRACTION_BITS
73/** The max exponent value for a single-precision floating-point integer without
74 * losing precision. */
75#define BS3_FP32_EXP_SAFE_INT_MAX BS3_FP32_EXP_BIAS + BS3_FP32_FRACTION_BITS
76/** The min exponent value for a single-precision floating-point integer without
77 * losing precision. */
78#define BS3_FP32_EXP_SAFE_INT_MIN 1
79/** The max fraction value for a double-precision floating-point denormal. */
80#define BS3_FP32_FRACTION_DENORMAL_MAX 0x7fffff
81/** The min fraction value for a double-precision floating-point denormal. */
82#define BS3_FP32_FRACTION_DENORMAL_MIN 1
83
84#define BS3_FP32_NORMAL_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_NORMAL_MAX)
85#define BS3_FP32_NORMAL_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MIN, BS3_FP32_EXP_NORMAL_MIN)
86#define BS3_FP32_ZERO(a_Sign) RTFLOAT32U_INIT_ZERO(a_Sign)
87#define BS3_FP32_ONE(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0, RTFLOAT32U_EXP_BIAS)
88#define BS3_FP32_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT32U_INIT_C(a_Sign, a_Frac, a_Exp)
89#define BS3_FP32_INF(a_Sign) RTFLOAT32U_INIT_INF(a_Sign)
90#define BS3_FP32_QNAN(a_Sign) RTFLOAT32U_INIT_QNAN(a_Sign)
91#define BS3_FP32_QNAN_VAL(a_Sign, a_Val) RTFLOAT32U_INIT_QNAN_EX(a_Sign, a_Val)
92#define BS3_FP32_SNAN(a_Sign) RTFLOAT32U_INIT_SNAN(a_Sign)
93
94/*
95 * Single-precision floating normals.
96 * Fraction - 23 bits, all usable.
97 * Exponent - 8 bits, least significant bit MBZ.
98 */
99#define BS3_FP32_NORMAL_VAL_1(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5fcabd, 0xbc)
100#define BS3_FP32_NORMAL_VAL_2(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x7e117a, 0x7e)
101#define BS3_FP32_NORMAL_VAL_3(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5b5b5b, 0x9a)
102/* The maximum integer value (all 23 + 1 implied bit of the fraction part set) without losing precision. */
103#define BS3_FP32_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX)
104/* The minimum integer value without losing precision. */
105#define BS3_FP32_NORMAL_SAFE_INT_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MIN, BS3_FP32_EXP_SAFE_INT_MIN)
106
107/*
108 * Single-precision floating-point denormals.
109 */
110/** The maximum denormal value. */
111#define BS3_FP32_DENORMAL_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_DENORMAL_MAX, 0)
112/** The maximum denormal value. */
113#define BS3_FP32_DENORMAL_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_DENORMAL_MIN, 0)
114
115/*
116 * Double-precision (64 bits) floating-point defines.
117 */
118/** The max exponent value for a double-precision floating-point normal. */
119#define BS3_FP64_EXP_NORMAL_MAX 2046
120/** The min exponent value for a double-precision floating-point normal. */
121#define BS3_FP64_EXP_NORMAL_MIN 1
122/** The max fraction value for a double-precision floating-point normal. */
123#define BS3_FP64_FRACTION_NORMAL_MAX 0xfffffffffffff
124/** The min fraction value for a double-precision floating-point normal. */
125#define BS3_FP64_FRACTION_NORMAL_MIN 0
126/** The exponent bias for the double-precision floating-point format. */
127#define BS3_FP64_EXP_BIAS RTFLOAT64U_EXP_BIAS
128/** Fraction width (in bits) for the double-precision floating-point format. */
129#define BS3_FP64_FRACTION_BITS RTFLOAT64U_FRACTION_BITS
130/** The max exponent value for a double-precision floating-point integer without
131 * losing precision. */
132#define BS3_FP64_EXP_SAFE_INT_MAX BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS
133/** The min exponent value for a double-precision floating-point integer without
134 * losing precision. */
135#define BS3_FP64_EXP_SAFE_INT_MIN 1
136/** The max fraction value for a double-precision floating-point denormal. */
137#define BS3_FP64_FRACTION_DENORMAL_MAX 0xfffffffffffff
138/** The min fraction value for a double-precision floating-point denormal. */
139#define BS3_FP64_FRACTION_DENORMAL_MIN 1
140
141#define BS3_FP64_NORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_NORMAL_MAX)
142#define BS3_FP64_NORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_NORMAL_MIN)
143#define BS3_FP64_ZERO(a_Sign) RTFLOAT64U_INIT_ZERO(a_Sign)
144#define BS3_FP64_ONE(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0, RTFLOAT64U_EXP_BIAS)
145#define BS3_FP64_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT64U_INIT_C(a_Sign, a_Frac, a_Exp)
146#define BS3_FP64_INF(a_Sign) RTFLOAT64U_INIT_INF(a_Sign)
147#define BS3_FP64_QNAN(a_Sign) RTFLOAT64U_INIT_QNAN(a_Sign)
148#define BS3_FP64_QNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_QNAN_EX(a_Sign, a_Val)
149#define BS3_FP64_SNAN(a_Sign) RTFLOAT64U_INIT_SNAN(a_Sign)
150#define BS3_FP64_SNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_SNAN_EX(a_Sign, a_Val)
151
152/*
153 * Double-precision floating-point normals.
154 * Fraction - 52 bits, all usable.
155 * Exponent - 11 bits, least significant bit MBZ.
156 */
157#define BS3_FP64_NORMAL_VAL_1(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xf10a7ab1ec01a, 0x4bc)
158#define BS3_FP64_NORMAL_VAL_2(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xca5cadea1b1ed, 0x3ae)
159#define BS3_FP64_NORMAL_VAL_3(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xb5b5b5b5b5b5b, 0xffe)
160/* The maximum integer value (all 52 + 1 implied bit of the fraction part set) without losing precision. */
161#define BS3_FP64_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX)
162/* The minimum integer value without losing precision. */
163#define BS3_FP64_NORMAL_SAFE_INT_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_SAFE_INT_MIN)
164
165/*
166 * Double-precision floating-point denormals.
167 */
168/** The maximum denormal value. */
169#define BS3_FP64_DENORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_DENORMAL_MAX, 0)
170/** The maximum denormal value. */
171#define BS3_FP64_DENORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_DENORMAL_MIN, 0)
172
173
174/*********************************************************************************************************************************
175* Structures and Typedefs *
176*********************************************************************************************************************************/
177/** Instruction set type and operand width. */
178typedef enum BS3CPUINSTRX_INSTRTYPE_T
179{
180 T_INVALID,
181 T_MMX,
182 T_MMX_SSE, /**< MMX instruction, but require the SSE CPUID to work. */
183 T_MMX_SSE2, /**< MMX instruction, but require the SSE2 CPUID to work. */
184 T_MMX_SSSE3, /**< MMX instruction, but require the SSSE3 CPUID to work. */
185 T_AXMMX,
186 T_AXMMX_OR_SSE,
187 T_SSE,
188 T_128BITS = T_SSE,
189 T_SSE2,
190 T_SSE3,
191 T_SSSE3,
192 T_SSE4_1,
193 T_SSE4_2,
194 T_SSE4A,
195 T_PCLMUL,
196 T_SHA,
197 T_AVX_128,
198 T_AVX2_128,
199 T_AVX_PCLMUL,
200 T_AVX_256,
201 T_256BITS = T_AVX_256,
202 T_AVX2_256,
203 T_MAX
204} BS3CPUINSTRX_INSTRTYPE_T;
205
206/** Memory or register rm variant. */
207enum {
208 RM_REG = 0,
209 RM_MEM,
210 RM_MEM8, /**< Memory operand is 8 bytes. Hack for movss and similar. */
211 RM_MEM16, /**< Memory operand is 16 bytes. Hack for movss and similar. */
212 RM_MEM32, /**< Memory operand is 32 bytes. Hack for movss and similar. */
213 RM_MEM64 /**< Memory operand is 64 bytes. Hack for movss and similar. */
214};
215
216/**
217 * Execution environment configuration.
218 */
219typedef struct BS3CPUINSTR4_CONFIG_T
220{
221 uint16_t fCr0Mp : 1;
222 uint16_t fCr0Em : 1;
223 uint16_t fCr0Ts : 1;
224 uint16_t fCr4OsFxSR : 1;
225 uint16_t fCr4OsXSave : 1;
226 uint16_t fCr4OsXmmExcpt : 1;
227 uint16_t fXcr0Sse : 1;
228 uint16_t fXcr0Avx : 1;
229 uint16_t fAligned : 1; /**< Aligned mem operands. If 0, they will be misaligned and tests w/o mem operands skipped. */
230 uint16_t fAlignCheck : 1;
231 uint16_t fMxCsrMM : 1; /**< AMD only */
232 uint8_t bXcptSse;
233 uint8_t bXcptAvx;
234} BS3CPUINSTR4_CONFIG_T;
235/** Pointer to an execution environment configuration. */
236typedef BS3CPUINSTR4_CONFIG_T const BS3_FAR *PCBS3CPUINSTR4_CONFIG_T;
237
238/** State saved by bs3CpuInstr4ConfigReconfigure. */
239typedef struct BS3CPUINSTRX_CONFIG_SAVED_T
240{
241 uint32_t uCr0;
242 uint32_t uCr4;
243 uint32_t uEfl;
244 uint16_t uFcw;
245 uint16_t uFsw;
246 uint32_t uMxCsr;
247} BS3CPUINSTRX_CONFIG_SAVED_T;
248typedef BS3CPUINSTRX_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTRX_CONFIG_SAVED_T;
249typedef BS3CPUINSTRX_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTRX_CONFIG_SAVED_T;
250
251/**
252 * YMM packed single-precision floating-point register.
253 * @todo move to x86.h?
254 */
255typedef union X86YMMFLOATPSREG
256{
257 /** Packed single-precision floating-point view. */
258 RTFLOAT32U ar32[8];
259 /** 256-bit integer view. */
260 RTUINT256U ymm;
261} X86YMMFLOATPSREG;
262# ifndef VBOX_FOR_DTRACE_LIB
263AssertCompileSize(X86YMMFLOATPSREG, 32);
264AssertCompileSize(X86YMMFLOATPSREG, sizeof(X86YMMREG));
265# endif
266/** Pointer to a YMM packed single-precision floating-point register. */
267typedef X86YMMFLOATPSREG BS3_FAR *PX86YMMFLOATPSREG;
268/** Pointer to a const YMM single-precision packed floating-point register. */
269typedef X86YMMFLOATPSREG const BS3_FAR *PCX86YMMFLOATPSREG;
270
271/**
272 * YMM packed double-precision floating-point register.
273 * @todo move to x86.h?
274 */
275typedef union X86YMMFLOATPDREG
276{
277 /** Packed double-precision floating-point view. */
278 RTFLOAT64U ar64[4];
279 /** 256-bit integer view. */
280 RTUINT256U ymm;
281} X86YMMFLOATPDREG;
282# ifndef VBOX_FOR_DTRACE_LIB
283AssertCompileSize(X86YMMFLOATPDREG, 32);
284AssertCompileSize(X86YMMFLOATPDREG, sizeof(X86YMMREG));
285# endif
286/** Pointer to a YMM packed floating-point register. */
287typedef X86YMMFLOATPDREG BS3_FAR *PX86YMMFLOATPDREG;
288/** Pointer to a const YMM packed floating-point register. */
289typedef X86YMMFLOATPDREG const BS3_FAR *PCX86YMMFLOATPDREG;
290
291/**
292 * YMM scalar quadruple-precision floating-point register.
293 * @todo move to x86.h?
294 */
295typedef union X86YMMFLOATSQREG
296{
297 /** Scalar quadruple-precision floating point view. */
298 RTFLOAT128U ar128[2];
299 /** 256-bit integer view. */
300 RTUINT256U ymm;
301} X86YMMFLOATSQREG;
302# ifndef VBOX_FOR_DTRACE_LIB
303AssertCompileSize(X86YMMFLOATSQREG, 32);
304AssertCompileSize(X86YMMFLOATSQREG, sizeof(X86YMMREG));
305# endif
306/** Pointer to a YMM scalar quadruple-precision floating-point register. */
307typedef X86YMMFLOATSQREG *PX86YMMFLOATSQREG;
308/** Pointer to a const YMM scalar quadruple-precision floating-point register. */
309typedef X86YMMFLOATSQREG const *PCX86YMMFLOATSQREG;
310
311
312/*********************************************************************************************************************************
313* Global Variables *
314*********************************************************************************************************************************/
315static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false, false };
316static bool g_fAmdMisalignedSse = false;
317static uint8_t g_enmExtCtxMethod = BS3EXTCTXMETHOD_INVALID;
318static bool g_fMxCsrDazSupported = false;
319
320/** Zero value (indexed by fSign). */
321RTFLOAT32U const g_ar32Zero[] = { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) };
322RTFLOAT64U const g_ar64Zero[] = { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) };
323
324/** One value (indexed by fSign). */
325RTFLOAT32U const g_ar32One[] = { RTFLOAT32U_INIT_C(0, 0, RTFLOAT32U_EXP_BIAS),
326 RTFLOAT32U_INIT_C(1, 0, RTFLOAT32U_EXP_BIAS) };
327RTFLOAT64U const g_ar64One[] = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_BIAS),
328 RTFLOAT64U_INIT_C(1, 0, RTFLOAT64U_EXP_BIAS) };
329
330/** Infinity (indexed by fSign). */
331RTFLOAT32U const g_ar32Infinity[] = { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) };
332RTFLOAT64U const g_ar64Infinity[] = { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) };
333
334/** Default QNaNs (indexed by fSign). */
335RTFLOAT32U const g_ar32QNaN[] = { RTFLOAT32U_INIT_QNAN(0), RTFLOAT32U_INIT_QNAN(1) };
336RTFLOAT64U const g_ar64QNaN[] = { RTFLOAT64U_INIT_QNAN(0), RTFLOAT64U_INIT_QNAN(1) };
337
338/** Size of g_pbBuf - at least three pages. */
339static uint32_t g_cbBuf;
340/** Buffer of g_cbBuf size. */
341static uint8_t BS3_FAR *g_pbBuf;
342/** RW alias for the buffer memory at g_pbBuf. Set up by bs3CpuInstrXBufSetup. */
343static uint8_t BS3_FAR *g_pbBufAlias;
344/** RW alias for the memory at g_pbBuf. */
345static uint8_t BS3_FAR *g_pbBufAliasAlloc;
346
347/** Exception type \#1 test configurations, 16 & 32 bytes strictly aligned. */
348static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig1[] =
349{
350/*
351 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
352 * +AVX +AVX +AMD/SSE +AMD/SSE
353 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
354 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
355 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
356 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
357 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
358 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
359 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
360 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
361 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
362 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
363 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
364 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
365 /* Memory misalignment and alignment checks: */
366 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #10 */
367 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #11 */
368 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
369 /* AMD only: */
370 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
371 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
372};
373
374
375
376/**
377 * Returns the name of an X86 exception given the vector.
378 *
379 * @returns Name of the exception.
380 * @param uVector The exception vector.
381 */
382static const char BS3_FAR *bs3CpuInstr4XcptName(uint8_t uVector)
383{
384 switch (uVector)
385 {
386 case X86_XCPT_DE: return "#DE";
387 case X86_XCPT_DB: return "#DB";
388 case X86_XCPT_NMI: return "#NMI";
389 case X86_XCPT_BP: return "#BP";
390 case X86_XCPT_OF: return "#OF";
391 case X86_XCPT_BR: return "#BR";
392 case X86_XCPT_UD: return "#UD";
393 case X86_XCPT_NM: return "#NM";
394 case X86_XCPT_DF: return "#DF";
395 case X86_XCPT_CO_SEG_OVERRUN: return "#CO_SEG_OVERRUN";
396 case X86_XCPT_TS: return "#TS";
397 case X86_XCPT_NP: return "#NP";
398 case X86_XCPT_SS: return "#SS";
399 case X86_XCPT_GP: return "#GP";
400 case X86_XCPT_PF: return "#PF";
401 case X86_XCPT_MF: return "#MF";
402 case X86_XCPT_AC: return "#AC";
403 case X86_XCPT_MC: return "#MC";
404 case X86_XCPT_XF: return "#XF";
405 case X86_XCPT_VE: return "#VE";
406 case X86_XCPT_CP: return "#CP";
407 case X86_XCPT_VC: return "#VC";
408 case X86_XCPT_SX: return "#SX";
409 }
410 return "UNKNOWN";
411}
412
413
414/**
415 * Gets the names of floating-point exception flags that are set for a given MXCSR.
416 *
417 * @returns Names of floating-point exception flags that are set.
418 * @param pszBuf Where to store the floating-point exception flags.
419 * @param cchBuf The size of the buffer.
420 * @param fMxCsr The MXCSR value.
421 */
422static size_t bs3CpuInstr4GetXcptFlags(char BS3_FAR *pszBuf, size_t cchBuf, uint32_t fMxCsr)
423{
424 BS3_ASSERT(cchBuf >= BS3_FP_XCPT_NAMES_MAXLEN);
425 if (!(fMxCsr & X86_MXCSR_XCPT_FLAGS))
426 return Bs3StrPrintf(pszBuf, cchBuf, " None");
427 return Bs3StrPrintf(pszBuf, cchBuf, "%s%s%s%s%s%s", fMxCsr & X86_MXCSR_IE ? " IE" : "", fMxCsr & X86_MXCSR_DE ? " DE" : "",
428 fMxCsr & X86_MXCSR_ZE ? " ZE" : "", fMxCsr & X86_MXCSR_OE ? " OE" : "",
429 fMxCsr & X86_MXCSR_UE ? " UE" : "", fMxCsr & X86_MXCSR_PE ? " PE" : "");
430}
431
432
433/**
434 * Reconfigures the execution environment according to @a pConfig.
435 *
436 * Call bs3CpuInstrXConfigRestore to undo the changes.
437 *
438 * @returns true on success, false if the configuration cannot be applied. In
439 * the latter case, no context changes are made.
440 * @param pSavedCfg Where to save state we modify.
441 * @param pCtx The register context to modify.
442 * @param pExtCtx The extended register context to modify.
443 * @param pConfig The configuration to apply.
444 * @param bMode The target mode.
445 */
446static bool bs3CpuInstr4ConfigReconfigure(PBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx,
447 PCBS3CPUINSTR4_CONFIG_T pConfig, uint8_t bMode)
448{
449 /*
450 * Save context bits we may change here
451 */
452 pSavedCfg->uCr0 = pCtx->cr0.u32;
453 pSavedCfg->uCr4 = pCtx->cr4.u32;
454 pSavedCfg->uEfl = pCtx->rflags.u32;
455 pSavedCfg->uFcw = Bs3ExtCtxGetFcw(pExtCtx);
456 pSavedCfg->uFsw = Bs3ExtCtxGetFsw(pExtCtx);
457 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx);
458
459 /*
460 * Can we make these changes?
461 */
462 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse)
463 return false;
464
465 /*
466 * Modify the test context.
467 */
468 if (pConfig->fCr0Mp)
469 pCtx->cr0.u32 |= X86_CR0_MP;
470 else
471 pCtx->cr0.u32 &= ~X86_CR0_MP;
472 if (pConfig->fCr0Em)
473 pCtx->cr0.u32 |= X86_CR0_EM;
474 else
475 pCtx->cr0.u32 &= ~X86_CR0_EM;
476 if (pConfig->fCr0Ts)
477 pCtx->cr0.u32 |= X86_CR0_TS;
478 else
479 pCtx->cr0.u32 &= ~X86_CR0_TS;
480
481 if (pConfig->fCr4OsFxSR)
482 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
483 else
484 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
485
486 if (pConfig->fCr4OsXmmExcpt && g_afTypeSupports[T_SSE])
487 pCtx->cr4.u32 |= X86_CR4_OSXMMEEXCPT;
488 else
489 pCtx->cr4.u32 &= ~X86_CR4_OSXMMEEXCPT;
490
491 if (pConfig->fCr4OsFxSR)
492 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
493 else
494 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
495
496 if (pConfig->fCr4OsXSave)
497 pCtx->cr4.u32 |= X86_CR4_OSXSAVE;
498 else
499 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE;
500
501 if (pConfig->fXcr0Sse)
502 pExtCtx->fXcr0Saved |= XSAVE_C_SSE;
503 else
504 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE;
505 if (pConfig->fXcr0Avx && g_afTypeSupports[T_AVX_256])
506 pExtCtx->fXcr0Saved |= XSAVE_C_YMM;
507 else
508 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM;
509
510 if (pConfig->fAlignCheck)
511 {
512 pCtx->rflags.u32 |= X86_EFL_AC;
513 pCtx->cr0.u32 |= X86_CR0_AM;
514 }
515 else
516 {
517 pCtx->rflags.u32 &= ~X86_EFL_AC;
518 pCtx->cr0.u32 &= ~X86_CR0_AM;
519 }
520
521 /** @todo Can we remove this? x87 FPU and SIMD are independent. */
522 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw & ~(X86_FSW_ES | X86_FSW_B));
523
524 if (pConfig->fMxCsrMM)
525 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM);
526 else
527 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM);
528 return true;
529}
530
531
532/**
533 * Undoes changes made by bs3CpuInstr4ConfigReconfigure.
534 */
535static void bs3CpuInstrXConfigRestore(PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx)
536{
537 pCtx->cr0.u32 = pSavedCfg->uCr0;
538 pCtx->cr4.u32 = pSavedCfg->uCr4;
539 pCtx->rflags.u32 = pSavedCfg->uEfl;
540 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal;
541 Bs3ExtCtxSetFcw(pExtCtx, pSavedCfg->uFcw);
542 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw);
543 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr);
544}
545
546
547/**
548 * Allocates three extended CPU contexts and initializes the first one
549 * with random data.
550 * @returns First extended context, initialized with randomish data. NULL on
551 * failure (complained).
552 * @param ppExtCtx2 Where to return the 2nd context.
553 */
554static PBS3EXTCTX bs3CpuInstrXAllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2)
555{
556 /* Allocate extended context structures. */
557 uint64_t fFlags;
558 uint16_t cb = Bs3ExtCtxGetSize(&fFlags);
559 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 2);
560 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb);
561 if (pExtCtx1)
562 {
563 Bs3ExtCtxInit(pExtCtx1, cb, fFlags);
564 /** @todo populate with semi-random stuff. */
565
566 Bs3ExtCtxInit(pExtCtx2, cb, fFlags);
567 *ppExtCtx2 = pExtCtx2;
568 return pExtCtx1;
569 }
570 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2);
571 *ppExtCtx2 = NULL;
572 return NULL;
573}
574
575
576/**
577 * Frees the extended CPU contexts allocated by bs3CpuInstrXAllocExtCtxs.
578 *
579 * @param pExtCtx1 The first extended context.
580 * @param pExtCtx2 The second extended context.
581 */
582static void bs3CpuInstrXFreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2)
583{
584 RT_NOREF_PV(pExtCtx2);
585 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2);
586}
587
588
589/**
590 * Sets up SSE and AVX bits relevant for FPU instructions.
591 */
592static void bs3CpuInstr4SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx)
593{
594 /* CR0: */
595 uint32_t cr0 = Bs3RegGetCr0();
596 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
597 cr0 |= X86_CR0_NE;
598 Bs3RegSetCr0(cr0);
599
600 /* If real mode context, the cr0 value will differ from the current one (we're in PE32 mode). */
601 pCtx->cr0.u32 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
602 pCtx->cr0.u32 |= X86_CR0_NE;
603
604 /* CR4: */
605 BS3_ASSERT( pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
606 || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE);
607 {
608 uint32_t cr4 = Bs3RegGetCr4();
609 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
610 {
611 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
612 Bs3RegSetCr4(cr4);
613 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal);
614 }
615 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
616 {
617 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
618 Bs3RegSetCr4(cr4);
619 }
620 pCtx->cr4.u32 = cr4;
621 }
622}
623
624
625/**
626 * Configures the buffer with electric fences in paged modes.
627 *
628 * @returns Adjusted buffer pointer.
629 * @param pbBuf The buffer pointer.
630 * @param pcbBuf Pointer to the buffer size (input & output).
631 * @param bMode The testing target mode.
632 */
633DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufSetup(uint8_t BS3_FAR *pbBuf, uint32_t *pcbBuf, uint8_t bMode)
634{
635 if (BS3_MODE_IS_PAGED(bMode))
636 {
637 int rc;
638 uint32_t cbBuf = *pcbBuf;
639 Bs3PagingProtectPtr(&pbBuf[0], X86_PAGE_SIZE, 0, X86_PTE_P);
640 Bs3PagingProtectPtr(&pbBuf[cbBuf - X86_PAGE_SIZE], X86_PAGE_SIZE, 0, X86_PTE_P);
641 pbBuf += X86_PAGE_SIZE;
642 cbBuf -= X86_PAGE_SIZE * 2;
643 *pcbBuf = cbBuf;
644
645 g_pbBufAlias = g_pbBufAliasAlloc;
646 rc = Bs3PagingAlias((uintptr_t)g_pbBufAlias, (uintptr_t)pbBuf, cbBuf + X86_PAGE_SIZE, /* must include the tail guard pg */
647 X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW);
648 if (RT_FAILURE(rc))
649 Bs3TestFailedF("Bs3PagingAlias failed on %p/%p LB %#x: %d", g_pbBufAlias, pbBuf, cbBuf, rc);
650 }
651 else
652 g_pbBufAlias = pbBuf;
653 return pbBuf;
654}
655
656
657/**
658 * Undoes what bs3CpuInstrXBufSetup did.
659 *
660 * @param pbBuf The buffer pointer.
661 * @param cbBuf The buffer size.
662 * @param bMode The testing target mode.
663 */
664DECLINLINE(void) bs3CpuInstrXBufCleanup(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t bMode)
665{
666 if (BS3_MODE_IS_PAGED(bMode))
667 {
668 Bs3PagingProtectPtr(&pbBuf[-X86_PAGE_SIZE], X86_PAGE_SIZE, X86_PTE_P, 0);
669 Bs3PagingProtectPtr(&pbBuf[cbBuf], X86_PAGE_SIZE, X86_PTE_P, 0);
670 }
671}
672
673
674/**
675 * Gets a buffer of a @a cbMemOp sized operand according to the given
676 * configuration and alignment restrictions.
677 *
678 * @returns Pointer to the buffer.
679 * @param pbBuf The buffer pointer.
680 * @param cbBuf The buffer size.
681 * @param cbMemOp The operand size.
682 * @param cbAlign The operand alignment restriction.
683 * @param pConfig The configuration.
684 * @param fPageFault The \#PF test setting.
685 */
686DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufForOperand(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t cbMemOp, uint8_t cbAlign,
687 PCBS3CPUINSTR4_CONFIG_T pConfig, unsigned fPageFault)
688{
689 /* All allocations are at the tail end of the buffer, so that we've got a
690 guard page following the operand. When asked to consistenly trigger
691 a #PF, we slide the buffer into that guard page. */
692 if (fPageFault)
693 cbBuf += X86_PAGE_SIZE;
694
695 if (pConfig->fAligned)
696 {
697 if (!pConfig->fAlignCheck)
698 return &pbBuf[cbBuf - cbMemOp];
699 return &pbBuf[cbBuf - cbMemOp - cbAlign];
700 }
701 return &pbBuf[cbBuf - cbMemOp - 1];
702}
703
704
705/**
706 * Determines the size of memory operands.
707 */
708DECLINLINE(uint8_t) bs3CpuInstrXMemOpSize(uint8_t cbOperand, uint8_t enmRm)
709{
710 if (enmRm <= RM_MEM)
711 return cbOperand;
712 if (enmRm == RM_MEM8)
713 return sizeof(uint8_t);
714 if (enmRm == RM_MEM16)
715 return sizeof(uint16_t);
716 if (enmRm == RM_MEM32)
717 return sizeof(uint32_t);
718 if (enmRm == RM_MEM64)
719 return sizeof(uint64_t);
720 BS3_ASSERT(0);
721 return cbOperand;
722}
723
724
725/*
726 * Code to make testing the tests faster. `bs3CpuInstrX_SkipIt()' randomly
727 * skips a large fraction of the micro-tests. It is sufficiently random
728 * that over a large number of runs, all micro-tests will be hit.
729 *
730 * This improves the runtime of the worst case (`#define ALL_TESTS' on a
731 * debug build, run with '--execute-all-in-iem') from ~9000 to ~800 seconds
732 * (on an Intel Core i7-10700, fwiw).
733 *
734 * To activate this 'developer's speed-testing mode', turn on
735 * `#define BS3_SKIPIT_DO_SKIP' here.
736 *
737 * BS3_SKIPIT_AVG_SKIP governs approximately how many micro-tests are
738 * skipped in a row; e.g. the default of 26 means about every 27th
739 * micro-test is run during a particular test run. (This is not 27x
740 * faster due to other activities which are not skipped!) Note this is
741 * only an average; the actual skips are random.
742 *
743 * You can also modify bs3CpuInstrX_SkipIt() to focus on specific sub-tests,
744 * using its (currently ignored) `bRing, iCfg, iTest, iVal, iVariant' args
745 * (to enable this: turn on `#define BS3_SKIPIT_DO_ARGS': which costs about
746 * 3% performance).
747 *
748 * Note! The skipping is not compatible with testing the native recompiler as
749 * it requires the test code to be run a number of times before it kicks
750 * in and does the native recompilation (currently around 16 times).
751 */
752#define BS3_SKIPIT_AVG_SKIP 26
753#define BS3_SKIPIT_REPORT_COUNT 150000
754#undef BS3_SKIPIT_DO_SKIP
755#undef BS3_SKIPIT_DO_ARGS
756
757#ifndef BS3_SKIPIT_DO_SKIP
758# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) (false)
759#else
760# include <iprt/asm-amd64-x86.h>
761# include <iprt/asm-math.h>
762
763DECLINLINE(uint32_t) bs3CpuInstrX_SimpleRand(void)
764{
765 /*
766 * A simple Lehmer linear congruential pseudo-random number
767 * generator using the constants suggested by Park & Miller:
768 *
769 * modulus = 2^31 - 1 (INT32_MAX)
770 * multiplier = 7^5 (16807)
771 *
772 * It produces numbers in the range [1..INT32_MAX-1] and is
773 * more chaotic in the higher bits.
774 *
775 * Note! Runtime/common/rand/randparkmiller.cpp is also use this algorithm,
776 * though the zero handling is different.
777 */
778 static uint32_t s_uSeedMemory = 0;
779 uint32_t uVal = s_uSeedMemory;
780 if (!uVal)
781 uVal = (uint32_t)ASMReadTSC();
782 uVal = ASMModU64ByU32RetU32(ASMMult2xU32RetU64(uVal, 16807), INT32_MAX);
783 s_uSeedMemory = uVal;
784 return uVal;
785}
786
787static unsigned g_cSeen, g_cSkipped;
788
789static void bs3CpuInstrX_ShowTallies(void)
790{
791 Bs3TestPrintf("Micro-tests %d: tested %d / skipped %d\n", g_cSeen, g_cSeen - g_cSkipped, g_cSkipped);
792}
793
794# ifdef BS3_SKIPIT_DO_ARGS
795# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt(bRing, iCfg, iTest, iVal, iVariant)
796static bool bs3CpuInstrX_SkipIt(uint8_t bRing, unsigned iCfg, unsigned iTest, unsigned iVal, unsigned iVariant)
797# else
798# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt()
799static bool bs3CpuInstrX_SkipIt(void)
800# endif
801{
802 static unsigned s_uTimes = 0;
803 bool fSkip;
804
805 /* Cache calls to the relatively expensive random routine */
806 if (!s_uTimes)
807 s_uTimes = bs3CpuInstrX_SimpleRand() % (BS3_SKIPIT_AVG_SKIP * 2 + 1) + 1;
808 fSkip = --s_uTimes > 0;
809 if (fSkip)
810 ++g_cSkipped;
811
812 if (++g_cSeen % BS3_SKIPIT_REPORT_COUNT == 0)
813 bs3CpuInstrX_ShowTallies();
814 return fSkip;
815}
816
817#endif /* BS3_SKIPIT_DO_SKIP */
818
819/*
820 * Test type #1.
821 * Generic YMM registers.
822 */
823typedef struct BS3CPUINSTR4_TEST1_VALUES_T
824{
825 X86YMMREG uSrc2; /**< Second source operand. */
826 X86YMMREG uSrc1; /**< uDstIn for SSE */
827 X86YMMREG uDstOut; /**< Destination output. */
828 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
829 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
830 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
831 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
832 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
833 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
834} BS3CPUINSTR4_TEST1_VALUES_T;
835
836/*
837 * Test type #1.
838 * Packed single-precision.
839 */
840typedef struct BS3CPUINSTR4_TEST1_VALUES_PS_T
841{
842 X86YMMFLOATPSREG uSrc2; /**< Second source operand. */
843 X86YMMFLOATPSREG uSrc1; /**< uDstIn for SSE */
844 X86YMMFLOATPSREG uDstOut; /**< Destination output. */
845 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
846 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
847 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
848 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
849 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
850 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
851} BS3CPUINSTR4_TEST1_VALUES_PS_T;
852AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PS_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
853AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
854AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
855AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
856AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
857AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
858AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
859AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
860AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
861AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
862
863/*
864 * Test type #1.
865 * Packed double-precision.
866 */
867typedef struct BS3CPUINSTR4_TEST1_VALUES_PD_T
868{
869 X86YMMFLOATPDREG uSrc2; /**< Second source operand. */
870 X86YMMFLOATPDREG uSrc1; /**< uDstIn for SSE */
871 X86YMMFLOATPDREG uDstOut; /**< Destination output. */
872 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
873 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
874 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
875 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
876 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
877 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
878} BS3CPUINSTR4_TEST1_VALUES_PD_T;
879AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PD_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
880AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
881AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
882AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
883AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
884AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
885AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
886AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
887AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
888AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
889
890/*
891 * Test type #1.
892 * Scalar quadruple-precision.
893 */
894typedef struct BS3CPUINSTR4_TEST1_VALUES_SQ_T
895{
896 X86YMMFLOATSQREG uSrc2; /**< Second source operand. */
897 X86YMMFLOATSQREG uSrc1; /**< uDstIn for SSE */
898 X86YMMFLOATSQREG uDstOut; /**< Destination output. */
899 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
900 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
901 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
902 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
903 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
904 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
905} BS3CPUINSTR4_TEST1_VALUES_SQ_T;
906AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_SQ_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
907AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
908AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
909AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
910AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
911AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
912AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
913AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
914AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
915AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
916
917typedef struct BS3CPUINSTR4_TEST1_T
918{
919 FPFNBS3FAR pfnWorker; /**< Test function worker. */
920 uint8_t bAvxMisalignXcpt; /**< AVX misalignment exception. */
921 uint8_t enmRm; /**< R/M type. */
922 uint8_t enmType; /**< CPU instruction type (see T_XXX). */
923 uint8_t iRegDst; /**< Index of destination register, UINT8_MAX if N/A. */
924 uint8_t iRegSrc1; /**< Index of first source register, UINT8_MAX if N/A. */
925 uint8_t iRegSrc2; /**< Index of second source register, UINT8_MAX if N/A. */
926 uint8_t cValues; /**< Number of test values in @c paValues. */
927 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *paValues; /**< Test values. */
928} BS3CPUINSTR4_TEST1_T;
929
930typedef struct BS3CPUINSTR4_TEST1_MODE_T
931{
932 BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests;
933 unsigned cTests;
934} BS3CPUINSTR4_TEST1_MODE_T;
935
936/** Initializer for a BS3CPUINSTR4_TEST1_MODE_T array (three entries). */
937#define BS3CPUINSTR4_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \
938 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } }
939
940typedef struct BS3CPUINSTR4_TEST1_CTX_T
941{
942 BS3CPUINSTR4_CONFIG_T const BS3_FAR *pConfig; /**< The test execution environment configuration. */
943 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest; /**< The instruction being tested. */
944 unsigned iVal; /**< Which iteration of the test value is this. */
945 const char BS3_FAR *pszMode; /**< The testing mode (e.g. real, protected, paged and permutations). */
946 PBS3TRAPFRAME pTrapFrame; /**< The exception (trap) frame. */
947 PBS3REGCTX pCtx; /**< The general-purpose register context. */
948 PBS3EXTCTX pExtCtx; /**< The extended (FPU) register context. */
949 PBS3EXTCTX pExtCtxOut; /**< The output extended (FPU) register context. */
950 uint8_t BS3_FAR *puMemOp; /**< The memory operand buffer. */
951 uint8_t BS3_FAR *puMemOpAlias; /**< The memory operand alias buffer for comparing result. */
952 uint8_t cbMemOp; /**< Size of the memory operand (and alias) buffer in bytes. */
953 uint8_t cbOperand; /**< Size of the instruction operand (8 for MMX, 16 for SSE etc). */
954 uint8_t cbInstr; /**< Size of the instruction opcode. */
955 uint8_t bXcptExpect; /**< The expected exception while/after executing the instruction. */
956 bool fSseInstr; /**< Whether this is an SSE instruction. */
957 bool fAvxInstr; /**< Whether this is an AVX instruction. */
958 uint16_t idTestStep; /**< The test iteration step. */
959} BS3CPUINSTR4_TEST1_CTX_T;
960/** Pointer to a test 1 context. */
961typedef BS3CPUINSTR4_TEST1_CTX_T BS3_FAR *PBS3CPUINSTR4_TEST1_CTX_T;
962
963
964/**
965 * Worker for bs3CpuInstrX_WorkerTestType1.
966 */
967static uint16_t bs3CpuInstr4_WorkerTestType1_Inner(uint8_t bMode, PBS3CPUINSTR4_TEST1_CTX_T pTestCtx,
968 PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg)
969{
970 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = pTestCtx->pTest;
971 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *pValues = &pTestCtx->pTest->paValues[pTestCtx->iVal];
972 PBS3TRAPFRAME pTrapFrame = pTestCtx->pTrapFrame;
973 PBS3REGCTX pCtx = pTestCtx->pCtx;
974 PBS3EXTCTX pExtCtx = pTestCtx->pExtCtx;
975 PBS3EXTCTX pExtCtxOut = pTestCtx->pExtCtxOut;
976 uint8_t BS3_FAR *puMemOp = pTestCtx->puMemOp;
977 uint8_t BS3_FAR *puMemOpAlias = pTestCtx->puMemOpAlias;
978 uint8_t cbMemOp = pTestCtx->cbMemOp;
979 uint8_t const cbOperand = pTestCtx->cbOperand;
980 uint8_t const cbInstr = ((uint8_t const BS3_FAR *)(uintptr_t)pTestCtx->pTest->pfnWorker)[-1];
981 uint8_t bXcptExpect = pTestCtx->bXcptExpect;
982 uint8_t const bFpXcpt = pTestCtx->pConfig->fCr4OsXmmExcpt ? X86_XCPT_XF : X86_XCPT_UD;
983 uint32_t const fExpectedMxCsrFlags = pTestCtx->cbOperand > 16 ? pValues->f256ExpectedMxCsrFlags
984 : pValues->f128ExpectedMxCsrFlags;
985 bool const fFpFlagsExpect = RT_BOOL( (fExpectedMxCsrFlags
986 & (~pValues->fMxCsrMask >> X86_MXCSR_XCPT_MASK_SHIFT)) & X86_MXCSR_XCPT_FLAGS);
987 uint32_t uMxCsr;
988 X86YMMREG MemOpExpect;
989 uint16_t cErrors;
990
991 /*
992 * Set up the context and some expectations.
993 */
994 /* Destination. */
995 Bs3MemZero(&MemOpExpect, sizeof(MemOpExpect));
996 if (pTest->iRegDst == UINT8_MAX)
997 {
998 BS3_ASSERT(pTest->enmRm >= RM_MEM);
999 Bs3MemSet(puMemOpAlias, 0xcc, cbMemOp);
1000 if (bXcptExpect == X86_XCPT_DB)
1001 MemOpExpect.ymm = pValues->uDstOut.ymm;
1002 else
1003 Bs3MemSet(&MemOpExpect, 0xcc, sizeof(MemOpExpect));
1004 }
1005
1006 /* Source #1 (/ destination for SSE). */
1007 if (pTest->iRegSrc1 == UINT8_MAX)
1008 {
1009 BS3_ASSERT(pTest->enmRm >= RM_MEM);
1010 Bs3MemCpy(puMemOpAlias, &pValues->uSrc1, cbMemOp);
1011 if (pTest->iRegDst == UINT8_MAX)
1012 BS3_ASSERT(pTestCtx->fSseInstr);
1013 else
1014 MemOpExpect.ymm = pValues->uSrc1.ymm;
1015 }
1016 else if (pTestCtx->fSseInstr)
1017 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm.DQWords.dqw0);
1018 else
1019 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm, 32);
1020
1021 /* Source #2. */
1022 if (pTest->iRegSrc2 == UINT8_MAX)
1023 {
1024 BS3_ASSERT(pTest->enmRm >= RM_MEM);
1025 BS3_ASSERT(pTest->iRegDst != UINT8_MAX && pTest->iRegSrc1 != UINT8_MAX);
1026 Bs3MemCpy(puMemOpAlias, &pValues->uSrc2, cbMemOp);
1027 MemOpExpect.ymm = pValues->uSrc2.ymm;
1028 }
1029 else if (pTestCtx->fSseInstr)
1030 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm.DQWords.dqw0);
1031 else
1032 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm, 32);
1033
1034 /* Memory pointer. */
1035 if (pTest->enmRm >= RM_MEM)
1036 {
1037 BS3_ASSERT( pTest->iRegDst == UINT8_MAX
1038 || pTest->iRegSrc1 == UINT8_MAX
1039 || pTest->iRegSrc2 == UINT8_MAX);
1040 Bs3RegCtxSetGrpSegFromCurPtr(pCtx, &pCtx->rbx, &pCtx->fs, puMemOp);
1041 }
1042
1043 /* Setup MXCSR for the current test. */
1044 uMxCsr = (pSavedCfg->uMxCsr & ~(X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK))
1045 | (pValues->fMxCsrMask & X86_MXCSR_XCPT_MASK)
1046 | (pValues->fRoundingCtlMask & X86_MXCSR_RC_MASK);
1047 if ( pValues->fDenormalsAreZero == X86_MXCSR_DAZ
1048 && g_fMxCsrDazSupported)
1049 uMxCsr |= X86_MXCSR_DAZ;
1050 if (pValues->fFlushToZero == X86_MXCSR_FZ)
1051 uMxCsr |= X86_MXCSR_FZ;
1052 Bs3ExtCtxSetMxCsr(pExtCtx, uMxCsr);
1053
1054 /*
1055 * Prepare globals and execute.
1056 */
1057 g_uBs3TrapEipHint = pCtx->rip.u32;
1058 if ( bXcptExpect == X86_XCPT_DB
1059 && !fFpFlagsExpect)
1060 g_uBs3TrapEipHint += cbInstr + 1;
1061 Bs3TrapSetJmpAndRestoreWithExtCtxAndRm(pCtx, pExtCtx, pTrapFrame, pExtCtxOut);
1062
1063 /*
1064 * Check the result.
1065 *
1066 * If a floating-point exception is expected, the destination is not updated by the instruction.
1067 * In the case of SSE instructions, updating the destination here will work because it is the same
1068 * as the source, but for AVX++ it won't because the destination is different and would contain 0s.
1069 */
1070 cErrors = Bs3TestSubErrorCount();
1071 if ( bXcptExpect == X86_XCPT_DB
1072 && !fFpFlagsExpect
1073 && pTest->iRegDst != UINT8_MAX)
1074 {
1075 if (pTestCtx->fSseInstr)
1076 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm.DQWords.dqw0);
1077 else
1078 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm, cbOperand);
1079 }
1080#if defined(DEBUG_aeichner) /** @todo Necessary kludge on a i7-1068NG7. */
1081 if ( pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE
1082 && pExtCtx->Ctx.x.Hdr.bmXState == 0x7
1083 && pExtCtxOut->Ctx.x.Hdr.bmXState == 0x3)
1084 pExtCtxOut->Ctx.x.Hdr.bmXState = 0x7;
1085#endif
1086 if (bXcptExpect == X86_XCPT_DB)
1087 Bs3ExtCtxSetMxCsr(pExtCtx, (uMxCsr & ~X86_MXCSR_XCPT_FLAGS)
1088 | (fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS));
1089 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pTestCtx->pszMode, pTestCtx->idTestStep);
1090
1091 if (bXcptExpect == X86_XCPT_DB)
1092 {
1093 uint32_t const fMxCsrXcptFlags = Bs3ExtCtxGetMxCsr(pExtCtxOut) & X86_MXCSR_XCPT_FLAGS;
1094
1095 /* Check if the SIMD FP exception flags (or lack of) are as expected. */
1096 if (fMxCsrXcptFlags != (fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS))
1097 {
1098 char szGotBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1099 char szExpectBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1100 bs3CpuInstr4GetXcptFlags(&szExpectBuf[0], sizeof(szExpectBuf), fExpectedMxCsrFlags);
1101 bs3CpuInstr4GetXcptFlags(&szGotBuf[0], sizeof(szGotBuf), fMxCsrXcptFlags);
1102 Bs3TestFailedF("Expected floating-point xcpt flags%s, got%s", szExpectBuf, szGotBuf);
1103 }
1104
1105 /* Check if the SIMD FP exception (or lack of) is as expected. */
1106 if (fFpFlagsExpect)
1107 {
1108 if (pTrapFrame->bXcpt == bFpXcpt)
1109 { /* likely */ }
1110 else
1111 Bs3TestFailedF("Expected floating-point xcpt %s, got %s", bs3CpuInstr4XcptName(bFpXcpt),
1112 bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1113 }
1114 else if (pTrapFrame->bXcpt == X86_XCPT_DB)
1115 { /* likely */ }
1116 else
1117 Bs3TestFailedF("Expected no xcpt, got %s", bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1118 }
1119 /* Check if non-FP exception is as expected. */
1120 else if (pTrapFrame->bXcpt != bXcptExpect)
1121 Bs3TestFailedF("Expected xcpt %s, got %s", bs3CpuInstr4XcptName(bXcptExpect), bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1122
1123 /* Kludge! Looks like EFLAGS.AC is cleared when raising #GP in real mode on the 10980XE. WEIRD! */
1124 if (bMode == BS3_MODE_RM && (pCtx->rflags.u32 & X86_EFL_AC))
1125 {
1126 if (pTrapFrame->Ctx.rflags.u32 & X86_EFL_AC)
1127 Bs3TestFailedF("Expected EFLAGS.AC to be cleared (bXcpt=%d)", pTrapFrame->bXcpt);
1128 pTrapFrame->Ctx.rflags.u32 |= X86_EFL_AC;
1129 }
1130 if (bXcptExpect == X86_XCPT_PF)
1131 pCtx->cr2.u = (uintptr_t)puMemOp;
1132 Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pCtx, bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect ? cbInstr + 1 : 0, 0 /*cbSpAdjust*/,
1133 (bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect) || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF,
1134 pTestCtx->pszMode, pTestCtx->idTestStep);
1135 pCtx->cr2.u = 0;
1136
1137 if ( pTest->enmRm >= RM_MEM
1138 && Bs3MemCmp(puMemOpAlias, &MemOpExpect, cbMemOp) != 0)
1139 Bs3TestFailedF("Expected uMemOp %.*Rhxs, got %.*Rhxs", cbMemOp, &MemOpExpect, cbMemOp, puMemOpAlias);
1140
1141 return cErrors;
1142}
1143
1144
1145/**
1146 * Test type #1 worker.
1147 */
1148static uint8_t bs3CpuInstrX_WorkerTestType1(uint8_t bMode, BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests, unsigned cTests,
1149 PCBS3CPUINSTR4_CONFIG_T paConfigs, unsigned cConfigs)
1150{
1151 BS3REGCTX Ctx;
1152 BS3TRAPFRAME TrapFrame;
1153 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode);
1154 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0;
1155 uint8_t BS3_FAR *pbBuf = g_pbBuf;
1156 uint32_t cbBuf = g_cbBuf;
1157 PBS3EXTCTX pExtCtxOut;
1158 PBS3EXTCTX pExtCtx = bs3CpuInstrXAllocExtCtxs(&pExtCtxOut);
1159 if (pExtCtx)
1160 { /* likely */ }
1161 else
1162 return 0;
1163 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT)
1164 { /* likely */ }
1165 else
1166 {
1167 Bs3TestPrintf("Skipped due to ancient FPU state format\n");
1168 return 0;
1169 }
1170
1171 /* Ensure the structures are allocated before we sample the stack pointer. */
1172 Bs3MemSet(&Ctx, 0, sizeof(Ctx));
1173 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
1174
1175 /*
1176 * Create test context.
1177 */
1178 pbBuf = bs3CpuInstrXBufSetup(pbBuf, &cbBuf, bMode);
1179 Bs3RegCtxSaveForMode(&Ctx, bMode, 1024);
1180 bs3CpuInstr4SetupSseAndAvx(&Ctx, pExtCtx);
1181
1182 /*
1183 * Run the tests in all rings since alignment issues may behave
1184 * differently in ring-3 compared to ring-0.
1185 */
1186 for (;;)
1187 {
1188 unsigned fPf = 0;
1189 do
1190 {
1191 unsigned iCfg;
1192 for (iCfg = 0; iCfg < cConfigs; iCfg++)
1193 {
1194 unsigned iTest;
1195 BS3CPUINSTRX_CONFIG_SAVED_T SavedCfg;
1196 if (!bs3CpuInstr4ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg], bMode))
1197 continue; /* unsupported config */
1198
1199 /*
1200 * Iterate the tests.
1201 */
1202 for (iTest = 0; iTest < cTests; iTest++)
1203 {
1204 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = &paTests[iTest];
1205 unsigned const cValues = pTest->cValues;
1206 bool const fSseInstr = pTest->enmType >= T_SSE && pTest->enmType < T_AVX_128;
1207 bool const fAvxInstr = pTest->enmType >= T_AVX_128;
1208 uint8_t const cbOperand = pTest->enmType < T_128BITS ? 64/8
1209 : pTest->enmType < T_256BITS ? 128/8 : 256/8;
1210 uint8_t const cbMemOp = bs3CpuInstrXMemOpSize(cbOperand, pTest->enmRm);
1211 uint8_t const cbAlign = cbMemOp;
1212 uint8_t BS3_FAR *puMemOp = bs3CpuInstrXBufForOperand(pbBuf, cbBuf, cbMemOp, cbAlign, &paConfigs[iCfg], fPf);
1213 uint8_t *puMemOpAlias = &g_pbBufAlias[(uintptr_t)puMemOp - (uintptr_t)pbBuf];
1214 uint8_t bXcptExpect = !g_afTypeSupports[pTest->enmType] ? X86_XCPT_UD
1215 : fSseInstr ? paConfigs[iCfg].bXcptSse
1216 : BS3_MODE_IS_RM_OR_V86(bMode) ? X86_XCPT_UD : paConfigs[iCfg].bXcptAvx;
1217 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10;
1218 unsigned cRecompRuns = 0;
1219 unsigned const cMaxRecompRuns = g_cBs3ThresholdNativeRecompiler + cValues;
1220 unsigned iVal;
1221
1222 /* If testing unaligned memory accesses (or #PF), skip register-only tests. This
1223 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */
1224 if ( (pTest->enmRm == RM_REG || pTest->enmRm == RM_MEM8)
1225 && (!paConfigs[iCfg].fAligned || paConfigs[iCfg].fAlignCheck || fPf))
1226 continue;
1227
1228 /* #AC is only raised in ring-3. */
1229 if (bXcptExpect == X86_XCPT_AC)
1230 {
1231 if (bRing != 3)
1232 bXcptExpect = X86_XCPT_DB;
1233 else if (fAvxInstr)
1234 bXcptExpect = pTest->bAvxMisalignXcpt; /* they generally don't raise #AC */
1235 }
1236
1237 if (fPf && bXcptExpect == X86_XCPT_DB)
1238 bXcptExpect = X86_XCPT_PF;
1239
1240 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, pTest->pfnWorker);
1241
1242 /*
1243 * Iterate the test values and do the actual testing.
1244 */
1245 while (cRecompRuns < cMaxRecompRuns)
1246 {
1247 for (iVal = 0; iVal < cValues; iVal++, idTestStep++, cRecompRuns++)
1248 {
1249 uint16_t cErrors;
1250 BS3CPUINSTR4_TEST1_CTX_T TestCtx;
1251 uint32_t const fExpectedMxCsrFlags = pTest->enmType >= T_128BITS
1252 ? pTest->paValues[iVal].f128ExpectedMxCsrFlags
1253 : pTest->paValues[iVal].f256ExpectedMxCsrFlags;
1254
1255 if (BS3_SKIPIT(bRing, iCfg, iTest, iVal, 0))
1256 continue;
1257
1258 /*
1259 * If the hardware does not support DAZ bit and we are testing DE exceptions,
1260 * then skip testing them. We still want to test values that set the MXCSR.DAZ
1261 * if we are not expecting DE exceptions to make sure DAZ bit in and of itself
1262 * is not influencing other cases.
1263 */
1264 if ( !g_fMxCsrDazSupported
1265 && pTest->paValues[iVal].fDenormalsAreZero == X86_MXCSR_DAZ
1266 && (fExpectedMxCsrFlags & X86_MXCSR_DE))
1267 continue;
1268
1269 /*
1270 * Setup the test instruction context and pass it to the worker.
1271 * A few of these can be figured out by the worker but initializing
1272 * it outside the inner most loop is more optimal.
1273 */
1274 TestCtx.pConfig = &paConfigs[iCfg];
1275 TestCtx.pTest = pTest;
1276 TestCtx.iVal = iVal;
1277 TestCtx.pszMode = pszMode;
1278 TestCtx.pTrapFrame = &TrapFrame;
1279 TestCtx.pCtx = &Ctx;
1280 TestCtx.pExtCtx = pExtCtx;
1281 TestCtx.pExtCtxOut = pExtCtxOut;
1282 TestCtx.puMemOp = (uint8_t *)puMemOp;
1283 TestCtx.puMemOpAlias = puMemOpAlias;
1284 TestCtx.cbMemOp = cbMemOp;
1285 TestCtx.cbOperand = cbOperand;
1286 TestCtx.bXcptExpect = bXcptExpect;
1287 TestCtx.fSseInstr = fSseInstr;
1288 TestCtx.fAvxInstr = fAvxInstr;
1289 TestCtx.idTestStep = idTestStep;
1290 cErrors = bs3CpuInstr4_WorkerTestType1_Inner(bMode, &TestCtx, &SavedCfg);
1291 if (cErrors != Bs3TestSubErrorCount())
1292 {
1293 if (paConfigs[iCfg].fAligned)
1294 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, %s %u-bit)",
1295 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1296 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), fSseInstr ? "SSE" : "AVX", cbOperand * 8);
1297 else
1298 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, puMemOp=%p, EFLAGS=%#RX32, CR0=%#RX32, %s %u-bit)",
1299 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1300 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), puMemOp,
1301 TrapFrame.Ctx.rflags.u32, TrapFrame.Ctx.cr0, fSseInstr ? "SSE" : "AVX", cbOperand * 8);
1302 Bs3TestPrintf("\n");
1303 }
1304 }
1305 }
1306 }
1307 bs3CpuInstrXConfigRestore(&SavedCfg, &Ctx, pExtCtx);
1308 }
1309 } while (fPf++ == 0 && BS3_MODE_IS_PAGED(bMode));
1310
1311 /*
1312 * Next ring.
1313 */
1314 bRing++;
1315 if (bRing > 3 || bMode == BS3_MODE_RM)
1316 break;
1317 Bs3RegCtxConvertToRingX(&Ctx, bRing);
1318 }
1319
1320 /*
1321 * Cleanup.
1322 */
1323 bs3CpuInstrXBufCleanup(pbBuf, cbBuf, bMode);
1324 bs3CpuInstrXFreeExtCtxs(pExtCtx, pExtCtxOut);
1325 return 0;
1326}
1327
1328
1329/*
1330 * [V]ADDPS.
1331 */
1332BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addps(uint8_t bMode)
1333{
1334 static BS3CPUINSTR4_TEST1_VALUES_PS_T const s_aValues[] =
1335 {
1336 /*
1337 * Zero.
1338 */
1339 /* 0*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1340 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1341 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1342 /*mask */ X86_MXCSR_XCPT_MASK,
1343 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1344 /*flags */ 0, 0 },
1345 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1346 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1347 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1348 /*mask */ ~X86_MXCSR_XCPT_MASK,
1349 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1350 /*flags */ 0, 0 },
1351 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1352 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1353 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1354 /*mask */ ~X86_MXCSR_XCPT_MASK,
1355 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1356 /*flags */ 0, 0 },
1357 { { /*src2 */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1358 { /*src1 */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1359 { /* => */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1360 /*mask */ ~X86_MXCSR_XCPT_MASK,
1361 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
1362 /*flags */ 0, 0 },
1363 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1364 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1365 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1366 /*mask */ ~X86_MXCSR_XCPT_MASK,
1367 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1368 /*flags */ 0, 0 },
1369 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1370 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1371 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1372 /*mask */ X86_MXCSR_XCPT_MASK,
1373 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1374 /*flags */ 0, 0 },
1375 /*
1376 * Infinity.
1377 */
1378 /* 6*/{ { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1379 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1380 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1381 /*mask */ ~X86_MXCSR_IM,
1382 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1383 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1384 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1385 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1386 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1387 /*mask */ X86_MXCSR_XCPT_MASK,
1388 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1389 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1390 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1391 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1392 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1393 /*mask */ X86_MXCSR_XCPT_MASK,
1394 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1395 /*flags */ 0, X86_MXCSR_IE },
1396 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
1397 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1) } },
1398 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(0) } },
1399 /*mask */ ~X86_MXCSR_XCPT_MASK,
1400 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1401 /*flags */ 0, X86_MXCSR_IE },
1402 { { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(1), BS3_FP32_INF(1), BS3_FP32_INF(1), BS3_FP32_INF(0) } },
1403 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_INF(0), BS3_FP32_ZERO(1), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_INF(0), BS3_FP32_INF(1) } },
1404 { /* => */ { BS3_FP32_INF(1), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(1), BS3_FP32_QNAN(1), BS3_FP32_QNAN(1), BS3_FP32_INF(1) } },
1405 /*mask */ ~X86_MXCSR_XCPT_MASK,
1406 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1407 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1408 /*
1409 * Overflow, Precision.
1410 */
1411 /*11*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1412 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1413 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), } },
1414 /*mask */ ~X86_MXCSR_XCPT_MASK,
1415 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1416 /*flags */ 0, X86_MXCSR_OE },
1417 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1418 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0) } },
1419 { /* => */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1420 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1421 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1422 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1423 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1424 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1425 { /* => */ { BS3_FP32_INF(0), BS3_FP32_VAL(1, 0, 2), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_INF(0), BS3_FP32_VAL(1, 0, 2), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
1426 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1427 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1428 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1429 { { /*src2 */ { BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1) } },
1430 { /*src1 */ { BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1) } },
1431 { /* => */ { BS3_FP32_VAL(1, 0, 2), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_VAL(1, 0, 2) } },
1432 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1433 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1434 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1435 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1436 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1437 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1438 /*mask */ X86_MXCSR_XCPT_MASK,
1439 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1440 /*flags */ 0, X86_MXCSR_OE | X86_MXCSR_PE },
1441 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1442 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1443 { /* => */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1444 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1445 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1446 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1447 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1) } },
1448 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1) } },
1449 { /* => */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(1, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(1, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX + 1) } },
1450 /*mask */ ~X86_MXCSR_XCPT_MASK,
1451 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1452 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1453 /*
1454 * Normals.
1455 */
1456 /*18*/{ { /*src2 */ { BS3_FP32_VAL(0, 0x600000, 0x7f)/* 1.75*/, BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7d)/*0.25*/, BS3_FP32_VAL(0, 0x600000, 0x7f)/* 1.75*/, BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7d)/*0.25*/ } },
1457 { /*src1 */ { BS3_FP32_VAL(1, 0, 0x7d)/*-0.25*/, BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7e)/*0.50*/, BS3_FP32_VAL(1, 0, 0x7d)/*-0.25*/, BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7e)/*0.50*/ } },
1458 { /* => */ { BS3_FP32_VAL(0, 0x400000, 0x7f)/* 1.50*/, BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x400000, 0x7e)/*0.75*/, BS3_FP32_VAL(0, 0x400000, 0x7f)/* 1.50*/, BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x400000, 0x7e)/*0.75*/ } },
1459 /*mask */ X86_MXCSR_XCPT_MASK,
1460 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1461 /*flags */ 0, 0 },
1462 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_VAL_1(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_VAL_1(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1463 { /*src1 */ { BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_VAL_1(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_VAL_1(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1464 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1465 /*mask */ ~X86_MXCSR_XCPT_MASK,
1466 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1467 /*flags */ 0, 0 },
1468 { { /*src2 */ { BS3_FP32_VAL(0, 0x5ca5b8, 0x93)/*1807543*/, BS3_FP32_VAL(0, 0x5c0000, 0x84)/*55*/, BS3_FP32_VAL(0, 0x2514d6, 0x93)/*1352346.75*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x5c0000, 0x84)/*55*/, BS3_FP32_VAL(0, 0x2514d6, 0x93)/*1352346.75*/, BS3_FP32_VAL(0, 0x534000, 0x86)/*211.25*/, BS3_FP32_ZERO(0) } },
1469 { /*src1 */ { BS3_FP32_VAL(0, 0x1ea980, 0x8f)/* 81235*/, BS3_FP32_VAL(0, 0x600000, 0x81)/* 7*/, BS3_FP32_VAL(0, 0x7c9000, 0x88)/* 1010.25*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x600000, 0x81)/* 7*/, BS3_FP32_VAL(0, 0x7c9000, 0x88)/* 1010.25*/, BS3_FP32_ONE(1) /*- 1.00*/, BS3_FP32_ZERO(0) } },
1470 { /* => */ { BS3_FP32_VAL(0, 0x669050, 0x93)/*1888778*/, BS3_FP32_VAL(0, 0x780000, 0x84)/*62*/, BS3_FP32_VAL(0, 0x253468, 0x93)/*1353357.00*/, BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x780000, 0x84)/*62*/, BS3_FP32_VAL(0, 0x253468, 0x93)/*1353357.00*/, BS3_FP32_VAL(0, 0x524000, 0x86)/*210.25*/, BS3_FP32_ZERO(0) } },
1471 /*mask */ X86_MXCSR_XCPT_MASK,
1472 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1473 /*flags */ 0, 0 },
1474 { { /*src2 */ { BS3_FP32_VAL(0, 0x74429f, 0x97)/*32015678*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x74429f, 0x97)/*32015678*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ONE(1), BS3_FP32_ZERO(0) } },
1475 { /*src1 */ { BS3_FP32_VAL(1, 0x712060, 0x92)/*- 987654*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ZERO(0), BS3_FP32_ONE(0), BS3_FP32_VAL(1, 0x712060, 0x92)/*- 987654*/, BS3_FP32_VAL(0, 0x3c614e, 0x96)/*12345678*/, BS3_FP32_ONE(0), BS3_FP32_ONE(0) } },
1476 { /* => */ { BS3_FP32_VAL(0, 0x6cb99c, 0x97)/*31028024*/, BS3_FP32_VAL(0, 0x3c614e, 0x97)/*24691356*/, BS3_FP32_ZERO(0), BS3_FP32_ONE(0), BS3_FP32_VAL(0, 0x6cb99c, 0x97)/*31028024*/, BS3_FP32_VAL(0, 0x3c614e, 0x97)/*24691356*/, BS3_FP32_ZERO(0), BS3_FP32_ONE(0) } },
1477 /*mask */ ~X86_MXCSR_XCPT_MASK,
1478 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1479 /*flags */ 0, 0 },
1480 { { /*src2 */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1481 { /*src1 */ { BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_ONE(1), BS3_FP32_ONE(0), BS3_FP32_ONE(0), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1482 { /* => */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX - 1, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1483 /*mask */ X86_MXCSR_XCPT_MASK,
1484 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1485 /*flags */ 0, 0 },
1486 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_ONE(1), } },
1487 { /*src1 */ { BS3_FP32_ONE(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), } },
1488 { /* => */ { BS3_FP32_VAL(0, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_VAL(1, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_ZERO(1), BS3_FP32_ONE(1), BS3_FP32_VAL(0, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_VAL(1, 0, BS3_FP32_EXP_SAFE_INT_MAX + 1) } },
1489 /*mask */ ~X86_MXCSR_XCPT_MASK,
1490 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1491 /*flags */ 0, 0 },
1492 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0) } },
1493 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(0) } },
1494 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0, 2) , BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0, 2) } },
1495 /*mask */ ~X86_MXCSR_XCPT_MASK,
1496 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1497 /*flags */ 0, 0 },
1498 { { /*src2 */ { BS3_FP32_VAL(0, 0x600000, 0x7e)/* 0.875*/, BS3_FP32_VAL(0, 0x3ce348, 0x90)/*193421.125*/, BS3_FP32_VAL(0, 0x6423f2, 0x92)/*934463.125*/, BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0x16b43a, 0x93)/*1234567.25*/, BS3_FP32_VAL(0, 0x792318, 0x91)/*510232.75*/, BS3_FP32_VAL(0, 0x600000, 0x7e)/* 0.875*/ } },
1499 { /*src1 */ { BS3_FP32_VAL(0, 0x769b50, 0x92)/*1010101.000*/, BS3_FP32_VAL(0, 0x430ebc, 0x91)/*399477.875*/, BS3_FP32_VAL(1, 0x0a19f0, 0x8f)/*-70707.875*/, BS3_FP32_NORMAL_SAFE_INT_MIN(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_VAL(0, 0x00c6d3, 0x94)/*2109876.75*/, BS3_FP32_VAL(1, 0x316740, 0x8e)/*-45415.25*/, BS3_FP32_VAL(0, 0x769b50, 0x92)/*1010101.000*/ } },
1500 { /* => */ { BS3_FP32_VAL(0, 0x769b5e, 0x92)/*1010101.875*/, BS3_FP32_VAL(0, 0x10c030, 0x92)/*592899.000*/, BS3_FP32_VAL(0, 0x52e0b4, 0x92)/*863755.250*/, BS3_FP32_VAL(1, 0, 2), BS3_FP32_VAL(0, 0, 2), BS3_FP32_VAL(0, 0x4c20f0, 0x94)/*3344444.00*/, BS3_FP32_VAL(0, 0x62f630, 0x91)/*464817.50*/, BS3_FP32_VAL(0, 0x769b5e, 0x92)/*1010101.875*/ } },
1501 /*mask */ X86_MXCSR_XCPT_MASK,
1502 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1503 /*flags */ 0, 0 },
1504 /*26*/{ { /*src2 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1505 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1) } },
1506 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(1) } },
1507 /*mask */ ~X86_MXCSR_XCPT_MASK,
1508 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1509 /*flags */ X86_MXCSR_DE, X86_MXCSR_DE },
1510 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1511 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1512 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1513 /*mask */ X86_MXCSR_XCPT_MASK,
1514 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_NEAREST,
1515 /*flags */ 0, 0 },
1516 { { /*src2 */ { BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MAX(0) } },
1517 { /*src1 */ { BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0), BS3_FP32_DENORMAL_MAX(0), BS3_FP32_DENORMAL_MIN(0) } },
1518 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), } },
1519 /*mask */ X86_MXCSR_XCPT_MASK,
1520 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1521 /*flags */ 0, 0 },
1522 /** @todo Denormals; Underflow, Precision; Rounding, FZ etc. */
1523 };
1524
1525 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1526 {
1527 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1528 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1529
1530 { bs3CpuInstrX_vaddps_XMM1_XMM2_XMM3_icebp_c16, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1531 { bs3CpuInstrX_vaddps_XMM1_XMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1532
1533 { bs3CpuInstrX_vaddps_YMM1_YMM2_YMM3_icebp_c16, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1534 { bs3CpuInstrX_vaddps_YMM1_YMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1535 };
1536 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1537 {
1538 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1539 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1540
1541 { bs3CpuInstrX_vaddps_XMM1_XMM2_XMM3_icebp_c32, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1542 { bs3CpuInstrX_vaddps_XMM1_XMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1543
1544 { bs3CpuInstrX_vaddps_YMM1_YMM2_YMM3_icebp_c32, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1545 { bs3CpuInstrX_vaddps_YMM1_YMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1546 };
1547 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1548 {
1549 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1550 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1551
1552 { bs3CpuInstrX_vaddps_XMM1_XMM2_XMM3_icebp_c64, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1553 { bs3CpuInstrX_vaddps_XMM1_XMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1554
1555 { bs3CpuInstrX_vaddps_YMM1_YMM2_YMM3_icebp_c64, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1556 { bs3CpuInstrX_vaddps_YMM1_YMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1557
1558 { bs3CpuInstrX_addps_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1559 { bs3CpuInstrX_addps_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1560
1561 { bs3CpuInstrX_vaddps_YMM8_YMM9_YMM10_icebp_c64, 255, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1562 { bs3CpuInstrX_vaddps_YMM8_YMM9_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1563 };
1564
1565 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1566 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1567 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1568 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1569}
1570
1571
1572/*
1573 * [V]ADDPD.
1574 */
1575BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addpd(uint8_t bMode)
1576{
1577 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
1578 {
1579 /*
1580 * Zero.
1581 */
1582 /* 0*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1583 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1584 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1585 /*mask */ X86_MXCSR_XCPT_MASK,
1586 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1587 /*flags */ 0, 0 },
1588 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1589 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1590 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1591 /*mask */ ~X86_MXCSR_XCPT_MASK,
1592 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1593 /*flags */ 0, 0 },
1594 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1595 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1596 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1597 /*mask */ X86_MXCSR_XCPT_MASK,
1598 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_DOWN,
1599 /*flags */ 0, 0 },
1600 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1601 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1602 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1603 /*mask */ ~X86_MXCSR_XCPT_MASK,
1604 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1605 /*flags */ 0, 0 },
1606 { { /*src2 */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1607 { /*src1 */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1608 { /* => */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1609 /*mask */ X86_MXCSR_XCPT_MASK,
1610 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1611 /*flags */ 0, 0 },
1612 /*
1613 * Infinity.
1614 */
1615 /* 5*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1616 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1617 { /* => */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1618 /*mask */ ~X86_MXCSR_IM,
1619 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1620 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1621 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1622 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1623 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1624 /*mask */ ~X86_MXCSR_IM,
1625 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1626 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1627 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1628 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1629 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1630 /*mask */ ~X86_MXCSR_IM,
1631 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1632 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1633 { { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_INF(1) } },
1634 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
1635 { /* => */ { BS3_FP64_QNAN(1), BS3_FP64_QNAN(1), BS3_FP64_ZERO(0), BS3_FP64_QNAN(1) } },
1636 /*mask */ X86_MXCSR_XCPT_MASK,
1637 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
1638 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1639 { { /*src2 */ { BS3_FP64_VAL(0, 0, 0x3fd)/*0.25*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_INF(1) } },
1640 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x3fe)/*0.50*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
1641 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x3fe)/*0.75*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_QNAN(1) } },
1642 /*mask */ X86_MXCSR_XCPT_MASK,
1643 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1644 /*flags */ 0, X86_MXCSR_IE },
1645 /*
1646 * Overflow, Precision.
1647 */
1648 /*10*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
1649 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
1650 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1651 /*mask */ ~X86_MXCSR_XCPT_MASK,
1652 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1653 /*flags */ X86_MXCSR_OE, X86_MXCSR_OE },
1654 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1655 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1656 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1657 /*mask */ ~X86_MXCSR_XCPT_MASK,
1658 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1659 /*flags */ X86_MXCSR_OE, X86_MXCSR_OE },
1660 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0) } },
1661 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0) } },
1662 { /* => */ { BS3_FP64_INF(0), BS3_FP64_VAL(1, 0, 2), BS3_FP64_ZERO(0), BS3_FP64_INF(0), } },
1663 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1664 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1665 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1666 { { /*src2 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0) } },
1667 { /*src1 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0) } },
1668 { /* => */ { BS3_FP64_VAL(1, 0, 2), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1669 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1670 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1671 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1672 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1673 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1674 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1675 /*mask */ X86_MXCSR_XCPT_MASK,
1676 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1677 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1678 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
1679 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
1680 { /* => */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(1, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX + 1) } },
1681 /*mask */ ~X86_MXCSR_XCPT_MASK,
1682 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1683 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1684 /** @todo Why does the below on cause PE?! */
1685 { { /*src2 */ { BS3_FP64_VAL(0, 0xc000000000000, 0x3ff)/* 1.75*/, BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0, 0x3fd)/*0.25*/ } },
1686 { /*src1 */ { BS3_FP64_VAL(1, 0, 0x07d)/*-0.25*/, BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0, 0x3fe)/*0.50*/ } },
1687 { /* => */ { BS3_FP64_VAL(0, 0xbffffffffffff, 0x3ff)/* 1.50*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0x8000000000000, 0x3fe)/*0.75*/ } },
1688 /*mask */ X86_MXCSR_XCPT_MASK,
1689 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1690 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1691 /*
1692 * Normals.
1693 */
1694 /*17*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_VAL_1(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1695 { /*src1 */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_VAL_1(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1696 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1697 /*mask */ ~X86_MXCSR_XCPT_MASK,
1698 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1699 /*flags */ 0, 0 },
1700 { { /*src2 */ { BS3_FP64_VAL(0, 0, 0x409)/*1024*/, BS3_FP64_VAL(0, 0xb800000000000, 0x404)/*55*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1701 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x408)/* 512*/, BS3_FP64_VAL(0, 0xc000000000000, 0x401)/* 7*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1702 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x409)/*1536*/, BS3_FP64_VAL(0, 0xf000000000000, 0x404)/*62*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1703 /*mask */ X86_MXCSR_XCPT_MASK,
1704 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1705 /*flags */ 0, 0 },
1706 { { /*src2 */ { BS3_FP64_VAL(0, 0x26580b4800000, 0x41d)/* 1234567890*/, BS3_FP64_VAL(0, 0xd6f3458800000, 0x41c)/*987654321*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf0033a34f337, 0x432)/*4072598000007579.5*/ } },
1707 { /*src1 */ { BS3_FP64_VAL(1, 0x26580b4800000, 0x41d)/*-1234567890*/, BS3_FP64_VAL(1, 0x9000000000000, 0x405)/* -100*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6eca42000000, 0x419)/* 123450000.5*/ } },
1708 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6f3426800000, 0x41c)/*987654221*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf00348ec5858, 0x432)/*4072598123457580.0*/ } },
1709 /*mask */ ~X86_MXCSR_XCPT_MASK,
1710 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1711 /*flags */ 0, 0 },
1712 { { /*src2 */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1713 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1714 { /* => */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1715 /*mask */ X86_MXCSR_XCPT_MASK,
1716 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1717 /*flags */ 0, 0 },
1718 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1719 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1720 { /* => */ { BS3_FP64_VAL(0, 0, BS3_FP64_EXP_SAFE_INT_MAX + 1), BS3_FP64_VAL(1, 0, BS3_FP64_EXP_SAFE_INT_MAX + 1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1721 /*mask */ ~X86_MXCSR_XCPT_MASK,
1722 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1723 /*flags */ 0, 0 },
1724 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
1725 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
1726 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_VAL(0, 0, 2) } },
1727 /*mask */ ~X86_MXCSR_XCPT_MASK,
1728 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1729 /*flags */ 0, 0 },
1730 { { /*src2 */ { BS3_FP64_VAL(0, 0xc122186c3cfd0, 0x42d)/*123456789876543.25*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
1731 { /*src1 */ { BS3_FP64_VAL(0, 0xb88e0395d49b0, 0x42d)/*121098765432102.75*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
1732 { /* => */ { BS3_FP64_VAL(0, 0xbcd80e0108cc0, 0x42e)/*244555555308646.00*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(1, 0, 2) } },
1733 /*mask */ X86_MXCSR_XCPT_MASK,
1734 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1735 /*flags */ 0, 0 },
1736 /*
1737 * Denormals.
1738 */
1739 /*24*/{ { /*src2 */ { BS3_FP64_DENORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1740 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1741 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1742 /*mask */ ~X86_MXCSR_XCPT_MASK,
1743 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1744 /*flags */ X86_MXCSR_DE, X86_MXCSR_DE },
1745 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1746 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1747 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1748 /*mask */ X86_MXCSR_XCPT_MASK,
1749 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_NEAREST,
1750 /*flags */ 0, 0 },
1751 { { /*src2 */ { BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MAX(0) } },
1752 { /*src1 */ { BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MIN(0) } },
1753 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1754 /*mask */ X86_MXCSR_XCPT_MASK,
1755 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1756 /*flags */ 0, 0 },
1757 /** @todo More denormals; Underflow, Precision; Rounding, FZ etc. */
1758 };
1759
1760 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1761 {
1762 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1763 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1764
1765 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1766 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1767
1768 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1769 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1770 };
1771 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1772 {
1773 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1774 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1775
1776 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1777 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1778
1779 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1780 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1781 };
1782 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1783 {
1784 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1785 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1786
1787 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1788 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1789
1790 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1791 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1792
1793 { bs3CpuInstrX_addpd_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE2, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1794 { bs3CpuInstrX_addpd_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1795
1796 { bs3CpuInstrX_vaddpd_YMM8_YMM9_YMM10_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1797 { bs3CpuInstrX_vaddpd_YMM8_YMM9_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1798 };
1799
1800 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1801 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1802 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1803 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1804}
1805
1806
1807/**
1808 * The 32-bit protected mode main function.
1809 *
1810 * The tests a driven by 32-bit test drivers, even for real-mode tests (though
1811 * we'll switch between PE32 and RM for each test step we perform). Given that
1812 * we test SSE and AVX here, we don't need to worry about 286 or 8086.
1813 *
1814 * Some extra steps needs to be taken to properly handle extended state in LM64
1815 * (Bs3ExtCtxRestoreEx & Bs3ExtCtxSaveEx) and when testing real mode
1816 * (Bs3RegCtxSaveForMode & Bs3TrapSetJmpAndRestoreWithExtCtxAndRm).
1817 */
1818BS3_DECL(void) Main_pe32()
1819{
1820 static const BS3TESTMODEBYONEENTRY g_aTests[] =
1821 {
1822#if 1 /*ndef DEBUG_bird*/
1823# define ALL_TESTS
1824#endif
1825#if defined(ALL_TESTS)
1826 { "[v]addps", bs3CpuInstrX_v_addps, 0 },
1827 { "[v]addpd", bs3CpuInstrX_v_addpd, 0 },
1828#endif
1829 };
1830 Bs3TestInit("bs3-cpu-instr-4");
1831
1832 /*
1833 * Initialize globals.
1834 */
1835 if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
1836 {
1837 uint32_t fEbx, fEcx, fEdx;
1838 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1839 g_afTypeSupports[T_MMX] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_MMX);
1840 g_afTypeSupports[T_MMX_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1841 g_afTypeSupports[T_MMX_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1842 g_afTypeSupports[T_MMX_SSSE3] = RT_BOOL(fEdx & X86_CPUID_FEATURE_ECX_SSSE3);
1843 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1844 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1845 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3);
1846 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3);
1847 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1);
1848 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2);
1849 g_afTypeSupports[T_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL);
1850 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1851 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1852 g_afTypeSupports[T_AVX_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL)
1853 && RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1854
1855 if (ASMCpuId_EAX(0) >= 7)
1856 {
1857 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &fEbx, NULL, NULL);
1858 g_afTypeSupports[T_AVX2_128] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1859 g_afTypeSupports[T_AVX2_256] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1860 g_afTypeSupports[T_SHA] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_SHA);
1861 }
1862
1863 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
1864 {
1865 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1866 g_afTypeSupports[T_AXMMX] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
1867 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A);
1868 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);
1869 }
1870 g_afTypeSupports[T_AXMMX_OR_SSE] = g_afTypeSupports[T_AXMMX] || g_afTypeSupports[T_SSE];
1871
1872 /*
1873 * Figure out FPU save/restore method and support for DAZ bit.
1874 */
1875 {
1876 /** @todo Add bs3kit API to just get the ext ctx method without needing to
1877 * alloc/free a context. Replicating the logic in the bs3kit here, though
1878 * doable, runs a risk of not updating this when the other logic is
1879 * changed. */
1880 uint64_t fFlags;
1881 uint16_t const cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
1882 PBS3EXTCTX pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cbExtCtx);
1883 if (pExtCtx)
1884 {
1885 Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
1886 g_enmExtCtxMethod = pExtCtx->enmMethod;
1887 if ( ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_XSAVE
1888 && (pExtCtx->Ctx.x.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1889 || ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_FXSAVE)
1890 && (pExtCtx->Ctx.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1891 g_fMxCsrDazSupported = true;
1892 }
1893 else
1894 Bs3TestFailedF("Failed to allocate %u bytes for extended CPU context (tiled addressable)\n", cbExtCtx);
1895 }
1896
1897 /*
1898 * Allocate a buffer for testing.
1899 */
1900 g_cbBuf = X86_PAGE_SIZE * 4;
1901 g_pbBuf = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_REAL, g_cbBuf);
1902 if (g_pbBuf)
1903 {
1904 g_pbBufAliasAlloc = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_TILED, g_cbBuf);
1905 if (g_pbBufAliasAlloc)
1906 {
1907 /*
1908 * Do the tests.
1909 */
1910 Bs3TestDoModesByOne_pe32(g_aTests, RT_ELEMENTS(g_aTests), BS3TESTMODEBYONEENTRY_F_REAL_MODE_READY);
1911#ifdef BS3_SKIPIT_DO_SKIP
1912 bs3CpuInstrX_ShowTallies();
1913#endif
1914 }
1915 else
1916 Bs3TestFailed("Failed to allocate 16K alias buffer (tiled addressable)");
1917 }
1918 else
1919 Bs3TestFailed("Failed to allocate 16K buffer (real mode addressable)");
1920 }
1921
1922 Bs3TestTerm();
1923}
1924
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette