VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-4.c32@ 104866

Last change on this file since 104866 was 104866, checked in by vboxsync, 8 months ago

ValidationKit/bootsectors: bugref:10658 SIMD FP testcase: [v]addps and other nits.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 104.0 KB
Line 
1/* $Id: bs3-cpu-instr-4.c32 104866 2024-06-07 09:27:39Z vboxsync $ */
2/** @file
3 * BS3Kit - bs3-cpu-instr-4 - SSE, AVX FPU instructions, C code template.
4 */
5
6/*
7 * Copyright (C) 2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <bs3kit.h>
42#include "bs3-cpu-instr-4-asm-auto.h"
43
44#include <iprt/asm.h>
45#include <iprt/asm-amd64-x86.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** Converts an execution mode (BS3_MODE_XXX) into an index into an array
52 * initialized by BS3CPUINSTR4_TEST1_MODES_INIT etc. */
53#define BS3CPUINSTR4_TEST_MODES_INDEX(a_bMode) (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2)
54
55/** Maximum length for the names of all SIMD FP exception flags combined. */
56#define BS3_FP_XCPT_NAMES_MAXLEN sizeof(" IE DE ZE OE UE PE ")
57
58/*
59 * Single-precision (32 bits) floating-point defines.
60 */
61/** The max exponent value for a single-precision floating-point normal. */
62#define BS3_FP32_EXP_NORMAL_MAX 254
63/** The min exponent value for a single-precision floating-point normal. */
64#define BS3_FP32_EXP_NORMAL_MIN 1
65/** The max fraction value for a single-precision floating-point normal. */
66#define BS3_FP32_FRACTION_NORMAL_MAX 0x7fffff
67/** The min fraction value for a single-precision floating-point normal. */
68#define BS3_FP32_FRACTION_NORMAL_MIN 0
69/** The exponent bias for the single-precision floating-point format. */
70#define BS3_FP32_EXP_BIAS RTFLOAT32U_EXP_BIAS
71/** Fraction width (in bits) for the single-precision floating-point format. */
72#define BS3_FP32_FRACTION_BITS RTFLOAT32U_FRACTION_BITS
73/** The max exponent value for a single-precision floating-point integer without
74 * losing precision. */
75#define BS3_FP32_EXP_SAFE_INT_MAX BS3_FP32_EXP_BIAS + BS3_FP32_FRACTION_BITS
76/** The min exponent value for a single-precision floating-point integer without
77 * losing precision. */
78#define BS3_FP32_EXP_SAFE_INT_MIN 1
79
80#define BS3_FP32_NORMAL_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_NORMAL_MAX)
81#define BS3_FP32_NORMAL_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MIN, BS3_FP32_EXP_NORMAL_MIN)
82#define BS3_FP32_ZERO(a_Sign) RTFLOAT32U_INIT_ZERO(a_Sign)
83#define BS3_FP32_ONE(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0, RTFLOAT32U_EXP_BIAS)
84#define BS3_FP32_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT32U_INIT_C(a_Sign, a_Frac, a_Exp)
85#define BS3_FP32_INF(a_Sign) RTFLOAT32U_INIT_INF(a_Sign)
86#define BS3_FP32_QNAN(a_Sign) RTFLOAT32U_INIT_QNAN(a_Sign)
87#define BS3_FP32_QNAN_VAL(a_Sign, a_Val) RTFLOAT32U_INIT_QNAN_EX(a_Sign, a_Val)
88#define BS3_FP32_SNAN(a_Sign) RTFLOAT32U_INIT_SNAN(a_Sign)
89
90/*
91 * Single-precision floating normals.
92 * Fraction - 23 bits, all usable.
93 * Exponent - 8 bits, least significant bit MBZ.
94 */
95#define BS3_FP32_NORMAL_VAL_1(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5fcabd, 0xbc)
96#define BS3_FP32_NORMAL_VAL_2(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x7e117a, 0x7e)
97#define BS3_FP32_NORMAL_VAL_3(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5b5b5b, 0x9a)
98/* The maximum integer value (all 23 + 1 implied bit of the fraction part set) without losing precision. */
99#define BS3_FP32_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX)
100/* The minimum integer value without losing precision. */
101#define BS3_FP32_NORMAL_SAFE_INT_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MIN, BS3_FP32_EXP_SAFE_INT_MIN)
102
103/*
104 * Double-precision (64 bits) floating-point defines.
105 */
106/** The max exponent value for a double-precision floating-point normal. */
107#define BS3_FP64_EXP_NORMAL_MAX 2046
108/** The min exponent value for a double-precision floating-point normal. */
109#define BS3_FP64_EXP_NORMAL_MIN 1
110/** The max fraction value for a double-precision floating-point normal. */
111#define BS3_FP64_FRACTION_NORMAL_MAX 0xfffffffffffff
112/** The min fraction value for a double-precision floating-point normal. */
113#define BS3_FP64_FRACTION_NORMAL_MIN 0
114/** The exponent bias for the double-precision floating-point format. */
115#define BS3_FP64_EXP_BIAS RTFLOAT64U_EXP_BIAS
116/** Fraction width (in bits) for the double-precision floating-point format. */
117#define BS3_FP64_FRACTION_BITS RTFLOAT64U_FRACTION_BITS
118/** The max exponent value for a double-precision floating-point integer without
119 * losing precision. */
120#define BS3_FP64_EXP_SAFE_INT_MAX BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS
121/** The min exponent value for a double-precision floating-point integer without
122 * losing precision. */
123#define BS3_FP64_EXP_SAFE_INT_MIN 1
124/** The max fraction value for a double-precision floating-point denormal. */
125#define BS3_FP64_FRACTION_DENORMAL_MAX 0xfffffffffffff
126/** The min fraction value for a double-precision floating-point denormal. */
127#define BS3_FP64_FRACTION_DENORMAL_MIN 1
128
129#define BS3_FP64_NORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_NORMAL_MAX)
130#define BS3_FP64_NORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_NORMAL_MIN)
131#define BS3_FP64_ZERO(a_Sign) RTFLOAT64U_INIT_ZERO(a_Sign)
132#define BS3_FP64_ONE(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0, RTFLOAT64U_EXP_BIAS)
133#define BS3_FP64_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT64U_INIT_C(a_Sign, a_Frac, a_Exp)
134#define BS3_FP64_INF(a_Sign) RTFLOAT64U_INIT_INF(a_Sign)
135#define BS3_FP64_QNAN(a_Sign) RTFLOAT64U_INIT_QNAN(a_Sign)
136#define BS3_FP64_QNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_QNAN_EX(a_Sign, a_Val)
137#define BS3_FP64_SNAN(a_Sign) RTFLOAT64U_INIT_SNAN(a_Sign)
138#define BS3_FP64_SNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_SNAN_EX(a_Sign, a_Val)
139
140/*
141 * Double-precision floating normals.
142 * Fraction - 52 bits, all usable.
143 * Exponent - 11 bits, least significant bit MBZ.
144 */
145#define BS3_FP64_NORMAL_VAL_1(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xf10a7ab1ec01a, 0x4bc)
146#define BS3_FP64_NORMAL_VAL_2(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xca5cadea1b1ed, 0x3ae)
147#define BS3_FP64_NORMAL_VAL_3(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xb5b5b5b5b5b5b, 0xffe)
148/* The maximum integer value (all 52 + 1 implied bit of the fraction part set) without losing precision. */
149#define BS3_FP64_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX)
150/* The minimum integer value without losing precision. */
151#define BS3_FP64_NORMAL_SAFE_INT_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_SAFE_INT_MIN)
152/** The maximum denormal value. */
153#define BS3_FP64_DENORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_DENORMAL_MAX, 0)
154/** The maximum denormal value. */
155#define BS3_FP64_DENORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_DENORMAL_MIN, 0)
156
157
158/*********************************************************************************************************************************
159* Structures and Typedefs *
160*********************************************************************************************************************************/
161/** Instruction set type and operand width. */
162typedef enum BS3CPUINSTRX_INSTRTYPE_T
163{
164 T_INVALID,
165 T_MMX,
166 T_MMX_SSE, /**< MMX instruction, but require the SSE CPUID to work. */
167 T_MMX_SSE2, /**< MMX instruction, but require the SSE2 CPUID to work. */
168 T_MMX_SSSE3, /**< MMX instruction, but require the SSSE3 CPUID to work. */
169 T_AXMMX,
170 T_AXMMX_OR_SSE,
171 T_SSE,
172 T_128BITS = T_SSE,
173 T_SSE2,
174 T_SSE3,
175 T_SSSE3,
176 T_SSE4_1,
177 T_SSE4_2,
178 T_SSE4A,
179 T_PCLMUL,
180 T_SHA,
181 T_AVX_128,
182 T_AVX2_128,
183 T_AVX_PCLMUL,
184 T_AVX_256,
185 T_256BITS = T_AVX_256,
186 T_AVX2_256,
187 T_MAX
188} BS3CPUINSTRX_INSTRTYPE_T;
189
190/** Memory or register rm variant. */
191enum {
192 RM_REG = 0,
193 RM_MEM,
194 RM_MEM8, /**< Memory operand is 8 bytes. Hack for movss and similar. */
195 RM_MEM16, /**< Memory operand is 16 bytes. Hack for movss and similar. */
196 RM_MEM32, /**< Memory operand is 32 bytes. Hack for movss and similar. */
197 RM_MEM64 /**< Memory operand is 64 bytes. Hack for movss and similar. */
198};
199
200/**
201 * Execution environment configuration.
202 */
203typedef struct BS3CPUINSTR4_CONFIG_T
204{
205 uint16_t fCr0Mp : 1;
206 uint16_t fCr0Em : 1;
207 uint16_t fCr0Ts : 1;
208 uint16_t fCr4OsFxSR : 1;
209 uint16_t fCr4OsXSave : 1;
210 uint16_t fCr4OsXmmExcpt : 1;
211 uint16_t fXcr0Sse : 1;
212 uint16_t fXcr0Avx : 1;
213 uint16_t fAligned : 1; /**< Aligned mem operands. If 0, they will be misaligned and tests w/o mem operands skipped. */
214 uint16_t fAlignCheck : 1;
215 uint16_t fMxCsrMM : 1; /**< AMD only */
216 uint8_t bXcptSse;
217 uint8_t bXcptAvx;
218} BS3CPUINSTR4_CONFIG_T;
219/** Pointer to an execution environment configuration. */
220typedef BS3CPUINSTR4_CONFIG_T const BS3_FAR *PCBS3CPUINSTR4_CONFIG_T;
221
222/** State saved by bs3CpuInstr4ConfigReconfigure. */
223typedef struct BS3CPUINSTRX_CONFIG_SAVED_T
224{
225 uint32_t uCr0;
226 uint32_t uCr4;
227 uint32_t uEfl;
228 uint16_t uFcw;
229 uint16_t uFsw;
230 uint32_t uMxCsr;
231} BS3CPUINSTRX_CONFIG_SAVED_T;
232typedef BS3CPUINSTRX_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTRX_CONFIG_SAVED_T;
233typedef BS3CPUINSTRX_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTRX_CONFIG_SAVED_T;
234
235/**
236 * YMM packed single-precision floating-point register.
237 * @todo move to x86.h?
238 */
239typedef union X86YMMFLOATPSREG
240{
241 /** Packed single-precision floating-point view. */
242 RTFLOAT32U ar32[8];
243 /** 256-bit integer view. */
244 RTUINT256U ymm;
245} X86YMMFLOATPSREG;
246# ifndef VBOX_FOR_DTRACE_LIB
247AssertCompileSize(X86YMMFLOATPSREG, 32);
248AssertCompileSize(X86YMMFLOATPSREG, sizeof(X86YMMREG));
249# endif
250/** Pointer to a YMM packed single-precision floating-point register. */
251typedef X86YMMFLOATPSREG BS3_FAR *PX86YMMFLOATPSREG;
252/** Pointer to a const YMM single-precision packed floating-point register. */
253typedef X86YMMFLOATPSREG const BS3_FAR *PCX86YMMFLOATPSREG;
254
255/**
256 * YMM packed double-precision floating-point register.
257 * @todo move to x86.h?
258 */
259typedef union X86YMMFLOATPDREG
260{
261 /** Packed double-precision floating-point view. */
262 RTFLOAT64U ar64[4];
263 /** 256-bit integer view. */
264 RTUINT256U ymm;
265} X86YMMFLOATPDREG;
266# ifndef VBOX_FOR_DTRACE_LIB
267AssertCompileSize(X86YMMFLOATPDREG, 32);
268AssertCompileSize(X86YMMFLOATPDREG, sizeof(X86YMMREG));
269# endif
270/** Pointer to a YMM packed floating-point register. */
271typedef X86YMMFLOATPDREG BS3_FAR *PX86YMMFLOATPDREG;
272/** Pointer to a const YMM packed floating-point register. */
273typedef X86YMMFLOATPDREG const BS3_FAR *PCX86YMMFLOATPDREG;
274
275/**
276 * YMM scalar quadruple-precision floating-point register.
277 * @todo move to x86.h?
278 */
279typedef union X86YMMFLOATSQREG
280{
281 /** Scalar quadruple-precision floating point view. */
282 RTFLOAT128U ar128[2];
283 /** 256-bit integer view. */
284 RTUINT256U ymm;
285} X86YMMFLOATSQREG;
286# ifndef VBOX_FOR_DTRACE_LIB
287AssertCompileSize(X86YMMFLOATSQREG, 32);
288AssertCompileSize(X86YMMFLOATSQREG, sizeof(X86YMMREG));
289# endif
290/** Pointer to a YMM scalar quadruple-precision floating-point register. */
291typedef X86YMMFLOATSQREG *PX86YMMFLOATSQREG;
292/** Pointer to a const YMM scalar quadruple-precision floating-point register. */
293typedef X86YMMFLOATSQREG const *PCX86YMMFLOATSQREG;
294
295
296/*********************************************************************************************************************************
297* Global Variables *
298*********************************************************************************************************************************/
299static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false, false };
300static bool g_fAmdMisalignedSse = false;
301static uint8_t g_enmExtCtxMethod = BS3EXTCTXMETHOD_INVALID;
302static bool g_fMxCsrDazSupported = false;
303
304/** Zero value (indexed by fSign). */
305RTFLOAT32U const g_ar32Zero[] = { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) };
306RTFLOAT64U const g_ar64Zero[] = { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) };
307
308/** One value (indexed by fSign). */
309RTFLOAT32U const g_ar32One[] = { RTFLOAT32U_INIT_C(0, 0, RTFLOAT32U_EXP_BIAS),
310 RTFLOAT32U_INIT_C(1, 0, RTFLOAT32U_EXP_BIAS) };
311RTFLOAT64U const g_ar64One[] = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_BIAS),
312 RTFLOAT64U_INIT_C(1, 0, RTFLOAT64U_EXP_BIAS) };
313
314/** Infinity (indexed by fSign). */
315RTFLOAT32U const g_ar32Infinity[] = { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) };
316RTFLOAT64U const g_ar64Infinity[] = { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) };
317
318/** Default QNaNs (indexed by fSign). */
319RTFLOAT32U const g_ar32QNaN[] = { RTFLOAT32U_INIT_QNAN(0), RTFLOAT32U_INIT_QNAN(1) };
320RTFLOAT64U const g_ar64QNaN[] = { RTFLOAT64U_INIT_QNAN(0), RTFLOAT64U_INIT_QNAN(1) };
321
322/** Size of g_pbBuf - at least three pages. */
323static uint32_t g_cbBuf;
324/** Buffer of g_cbBuf size. */
325static uint8_t BS3_FAR *g_pbBuf;
326/** RW alias for the buffer memory at g_pbBuf. Set up by bs3CpuInstrXBufSetup. */
327static uint8_t BS3_FAR *g_pbBufAlias;
328/** RW alias for the memory at g_pbBuf. */
329static uint8_t BS3_FAR *g_pbBufAliasAlloc;
330
331/** Exception type \#1 test configurations, 16 & 32 bytes strictly aligned. */
332static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig1[] =
333{
334/*
335 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
336 * +AVX +AVX +AMD/SSE +AMD/SSE
337 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
338 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
339 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
340 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
341 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
342 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
343 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
344 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
345 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
346 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
347 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
348 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
349 /* Memory misalignment and alignment checks: */
350 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #10 */
351 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #11 */
352 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
353 /* AMD only: */
354 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
355 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
356};
357
358
359
360/**
361 * Returns the name of an X86 exception given the vector.
362 *
363 * @returns Name of the exception.
364 * @param uVector The exception vector.
365 */
366static const char BS3_FAR *bs3CpuInstr4XcptName(uint8_t uVector)
367{
368 switch (uVector)
369 {
370 case X86_XCPT_DE: return "#DE";
371 case X86_XCPT_DB: return "#DB";
372 case X86_XCPT_NMI: return "#NMI";
373 case X86_XCPT_BP: return "#BP";
374 case X86_XCPT_OF: return "#OF";
375 case X86_XCPT_BR: return "#BR";
376 case X86_XCPT_UD: return "#UD";
377 case X86_XCPT_NM: return "#NM";
378 case X86_XCPT_DF: return "#DF";
379 case X86_XCPT_CO_SEG_OVERRUN: return "#CO_SEG_OVERRUN";
380 case X86_XCPT_TS: return "#TS";
381 case X86_XCPT_NP: return "#NP";
382 case X86_XCPT_SS: return "#SS";
383 case X86_XCPT_GP: return "#GP";
384 case X86_XCPT_PF: return "#PF";
385 case X86_XCPT_MF: return "#MF";
386 case X86_XCPT_AC: return "#AC";
387 case X86_XCPT_MC: return "#MC";
388 case X86_XCPT_XF: return "#XF";
389 case X86_XCPT_VE: return "#VE";
390 case X86_XCPT_CP: return "#CP";
391 case X86_XCPT_VC: return "#VC";
392 case X86_XCPT_SX: return "#SX";
393 }
394 return "UNKNOWN";
395}
396
397
398/**
399 * Gets the names of floating-point exception flags that are set for a given MXCSR.
400 *
401 * @returns Names of floating-point exception flags that are set.
402 * @param pszBuf Where to store the floating-point exception flags.
403 * @param cchBuf The size of the buffer.
404 * @param fMxCsr The MXCSR value.
405 */
406static size_t bs3CpuInstr4GetXcptFlags(char BS3_FAR *pszBuf, size_t cchBuf, uint32_t fMxCsr)
407{
408 BS3_ASSERT(cchBuf >= BS3_FP_XCPT_NAMES_MAXLEN);
409 if (!(fMxCsr & X86_MXCSR_XCPT_FLAGS))
410 return Bs3StrPrintf(pszBuf, cchBuf, " None");
411 return Bs3StrPrintf(pszBuf, cchBuf, "%s%s%s%s%s%s", fMxCsr & X86_MXCSR_IE ? " IE" : "", fMxCsr & X86_MXCSR_DE ? " DE" : "",
412 fMxCsr & X86_MXCSR_ZE ? " ZE" : "", fMxCsr & X86_MXCSR_OE ? " OE" : "",
413 fMxCsr & X86_MXCSR_UE ? " UE" : "", fMxCsr & X86_MXCSR_PE ? " PE" : "");
414}
415
416
417/**
418 * Reconfigures the execution environment according to @a pConfig.
419 *
420 * Call bs3CpuInstrXConfigRestore to undo the changes.
421 *
422 * @returns true on success, false if the configuration cannot be applied. In
423 * the latter case, no context changes are made.
424 * @param pSavedCfg Where to save state we modify.
425 * @param pCtx The register context to modify.
426 * @param pExtCtx The extended register context to modify.
427 * @param pConfig The configuration to apply.
428 * @param bMode The target mode.
429 */
430static bool bs3CpuInstr4ConfigReconfigure(PBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx,
431 PCBS3CPUINSTR4_CONFIG_T pConfig, uint8_t bMode)
432{
433 /*
434 * Save context bits we may change here
435 */
436 pSavedCfg->uCr0 = pCtx->cr0.u32;
437 pSavedCfg->uCr4 = pCtx->cr4.u32;
438 pSavedCfg->uEfl = pCtx->rflags.u32;
439 pSavedCfg->uFcw = Bs3ExtCtxGetFcw(pExtCtx);
440 pSavedCfg->uFsw = Bs3ExtCtxGetFsw(pExtCtx);
441 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx);
442
443 /*
444 * Can we make these changes?
445 */
446 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse)
447 return false;
448
449 /*
450 * Modify the test context.
451 */
452 if (pConfig->fCr0Mp)
453 pCtx->cr0.u32 |= X86_CR0_MP;
454 else
455 pCtx->cr0.u32 &= ~X86_CR0_MP;
456 if (pConfig->fCr0Em)
457 pCtx->cr0.u32 |= X86_CR0_EM;
458 else
459 pCtx->cr0.u32 &= ~X86_CR0_EM;
460 if (pConfig->fCr0Ts)
461 pCtx->cr0.u32 |= X86_CR0_TS;
462 else
463 pCtx->cr0.u32 &= ~X86_CR0_TS;
464
465 if (pConfig->fCr4OsFxSR)
466 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
467 else
468 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
469
470 if (pConfig->fCr4OsXmmExcpt && g_afTypeSupports[T_SSE])
471 pCtx->cr4.u32 |= X86_CR4_OSXMMEEXCPT;
472 else
473 pCtx->cr4.u32 &= ~X86_CR4_OSXMMEEXCPT;
474
475 if (pConfig->fCr4OsFxSR)
476 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
477 else
478 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
479
480 if (pConfig->fCr4OsXSave)
481 pCtx->cr4.u32 |= X86_CR4_OSXSAVE;
482 else
483 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE;
484
485 if (pConfig->fXcr0Sse)
486 pExtCtx->fXcr0Saved |= XSAVE_C_SSE;
487 else
488 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE;
489 if (pConfig->fXcr0Avx && g_afTypeSupports[T_AVX_256])
490 pExtCtx->fXcr0Saved |= XSAVE_C_YMM;
491 else
492 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM;
493
494 if (pConfig->fAlignCheck)
495 {
496 pCtx->rflags.u32 |= X86_EFL_AC;
497 pCtx->cr0.u32 |= X86_CR0_AM;
498 }
499 else
500 {
501 pCtx->rflags.u32 &= ~X86_EFL_AC;
502 pCtx->cr0.u32 &= ~X86_CR0_AM;
503 }
504
505 /** @todo Can we remove this? x87 FPU and SIMD are independent. */
506 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw & ~(X86_FSW_ES | X86_FSW_B));
507
508 if (pConfig->fMxCsrMM)
509 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM);
510 else
511 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM);
512 return true;
513}
514
515
516/**
517 * Undoes changes made by bs3CpuInstr4ConfigReconfigure.
518 */
519static void bs3CpuInstrXConfigRestore(PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx)
520{
521 pCtx->cr0.u32 = pSavedCfg->uCr0;
522 pCtx->cr4.u32 = pSavedCfg->uCr4;
523 pCtx->rflags.u32 = pSavedCfg->uEfl;
524 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal;
525 Bs3ExtCtxSetFcw(pExtCtx, pSavedCfg->uFcw);
526 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw);
527 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr);
528}
529
530
531/**
532 * Allocates three extended CPU contexts and initializes the first one
533 * with random data.
534 * @returns First extended context, initialized with randomish data. NULL on
535 * failure (complained).
536 * @param ppExtCtx2 Where to return the 2nd context.
537 */
538static PBS3EXTCTX bs3CpuInstrXAllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2)
539{
540 /* Allocate extended context structures. */
541 uint64_t fFlags;
542 uint16_t cb = Bs3ExtCtxGetSize(&fFlags);
543 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 2);
544 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb);
545 if (pExtCtx1)
546 {
547 Bs3ExtCtxInit(pExtCtx1, cb, fFlags);
548 /** @todo populate with semi-random stuff. */
549
550 Bs3ExtCtxInit(pExtCtx2, cb, fFlags);
551 *ppExtCtx2 = pExtCtx2;
552 return pExtCtx1;
553 }
554 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2);
555 *ppExtCtx2 = NULL;
556 return NULL;
557}
558
559
560/**
561 * Frees the extended CPU contexts allocated by bs3CpuInstrXAllocExtCtxs.
562 *
563 * @param pExtCtx1 The first extended context.
564 * @param pExtCtx2 The second extended context.
565 */
566static void bs3CpuInstrXFreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2)
567{
568 RT_NOREF_PV(pExtCtx2);
569 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2);
570}
571
572
573/**
574 * Sets up SSE and AVX bits relevant for FPU instructions.
575 */
576static void bs3CpuInstr4SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx)
577{
578 /* CR0: */
579 uint32_t cr0 = Bs3RegGetCr0();
580 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
581 cr0 |= X86_CR0_NE;
582 Bs3RegSetCr0(cr0);
583
584 /* If real mode context, the cr0 value will differ from the current one (we're in PE32 mode). */
585 pCtx->cr0.u32 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
586 pCtx->cr0.u32 |= X86_CR0_NE;
587
588 /* CR4: */
589 BS3_ASSERT( pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
590 || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE);
591 {
592 uint32_t cr4 = Bs3RegGetCr4();
593 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
594 {
595 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
596 Bs3RegSetCr4(cr4);
597 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal);
598 }
599 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
600 {
601 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
602 Bs3RegSetCr4(cr4);
603 }
604 pCtx->cr4.u32 = cr4;
605 }
606}
607
608
609/**
610 * Configures the buffer with electric fences in paged modes.
611 *
612 * @returns Adjusted buffer pointer.
613 * @param pbBuf The buffer pointer.
614 * @param pcbBuf Pointer to the buffer size (input & output).
615 * @param bMode The testing target mode.
616 */
617DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufSetup(uint8_t BS3_FAR *pbBuf, uint32_t *pcbBuf, uint8_t bMode)
618{
619 if (BS3_MODE_IS_PAGED(bMode))
620 {
621 int rc;
622 uint32_t cbBuf = *pcbBuf;
623 Bs3PagingProtectPtr(&pbBuf[0], X86_PAGE_SIZE, 0, X86_PTE_P);
624 Bs3PagingProtectPtr(&pbBuf[cbBuf - X86_PAGE_SIZE], X86_PAGE_SIZE, 0, X86_PTE_P);
625 pbBuf += X86_PAGE_SIZE;
626 cbBuf -= X86_PAGE_SIZE * 2;
627 *pcbBuf = cbBuf;
628
629 g_pbBufAlias = g_pbBufAliasAlloc;
630 rc = Bs3PagingAlias((uintptr_t)g_pbBufAlias, (uintptr_t)pbBuf, cbBuf + X86_PAGE_SIZE, /* must include the tail guard pg */
631 X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW);
632 if (RT_FAILURE(rc))
633 Bs3TestFailedF("Bs3PagingAlias failed on %p/%p LB %#x: %d", g_pbBufAlias, pbBuf, cbBuf, rc);
634 }
635 else
636 g_pbBufAlias = pbBuf;
637 return pbBuf;
638}
639
640
641/**
642 * Undoes what bs3CpuInstrXBufSetup did.
643 *
644 * @param pbBuf The buffer pointer.
645 * @param cbBuf The buffer size.
646 * @param bMode The testing target mode.
647 */
648DECLINLINE(void) bs3CpuInstrXBufCleanup(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t bMode)
649{
650 if (BS3_MODE_IS_PAGED(bMode))
651 {
652 Bs3PagingProtectPtr(&pbBuf[-X86_PAGE_SIZE], X86_PAGE_SIZE, X86_PTE_P, 0);
653 Bs3PagingProtectPtr(&pbBuf[cbBuf], X86_PAGE_SIZE, X86_PTE_P, 0);
654 }
655}
656
657
658/**
659 * Gets a buffer of a @a cbMemOp sized operand according to the given
660 * configuration and alignment restrictions.
661 *
662 * @returns Pointer to the buffer.
663 * @param pbBuf The buffer pointer.
664 * @param cbBuf The buffer size.
665 * @param cbMemOp The operand size.
666 * @param cbAlign The operand alignment restriction.
667 * @param pConfig The configuration.
668 * @param fPageFault The \#PF test setting.
669 */
670DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufForOperand(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t cbMemOp, uint8_t cbAlign,
671 PCBS3CPUINSTR4_CONFIG_T pConfig, unsigned fPageFault)
672{
673 /* All allocations are at the tail end of the buffer, so that we've got a
674 guard page following the operand. When asked to consistenly trigger
675 a #PF, we slide the buffer into that guard page. */
676 if (fPageFault)
677 cbBuf += X86_PAGE_SIZE;
678
679 if (pConfig->fAligned)
680 {
681 if (!pConfig->fAlignCheck)
682 return &pbBuf[cbBuf - cbMemOp];
683 return &pbBuf[cbBuf - cbMemOp - cbAlign];
684 }
685 return &pbBuf[cbBuf - cbMemOp - 1];
686}
687
688
689/**
690 * Determines the size of memory operands.
691 */
692DECLINLINE(uint8_t) bs3CpuInstrXMemOpSize(uint8_t cbOperand, uint8_t enmRm)
693{
694 if (enmRm <= RM_MEM)
695 return cbOperand;
696 if (enmRm == RM_MEM8)
697 return sizeof(uint8_t);
698 if (enmRm == RM_MEM16)
699 return sizeof(uint16_t);
700 if (enmRm == RM_MEM32)
701 return sizeof(uint32_t);
702 if (enmRm == RM_MEM64)
703 return sizeof(uint64_t);
704 BS3_ASSERT(0);
705 return cbOperand;
706}
707
708
709/*
710 * Code to make testing the tests faster. `bs3CpuInstrX_SkipIt()' randomly
711 * skips a large fraction of the micro-tests. It is sufficiently random
712 * that over a large number of runs, all micro-tests will be hit.
713 *
714 * This improves the runtime of the worst case (`#define ALL_TESTS' on a
715 * debug build, run with '--execute-all-in-iem') from ~9000 to ~800 seconds
716 * (on an Intel Core i7-10700, fwiw).
717 *
718 * To activate this 'developer's speed-testing mode', turn on
719 * `#define BS3_SKIPIT_DO_SKIP' here.
720 *
721 * BS3_SKIPIT_AVG_SKIP governs approximately how many micro-tests are
722 * skipped in a row; e.g. the default of 26 means about every 27th
723 * micro-test is run during a particular test run. (This is not 27x
724 * faster due to other activities which are not skipped!) Note this is
725 * only an average; the actual skips are random.
726 *
727 * You can also modify bs3CpuInstrX_SkipIt() to focus on specific sub-tests,
728 * using its (currently ignored) `bRing, iCfg, iTest, iVal, iVariant' args
729 * (to enable this: turn on `#define BS3_SKIPIT_DO_ARGS': which costs about
730 * 3% performance).
731 *
732 * Note! The skipping is not compatible with testing the native recompiler as
733 * it requires the test code to be run a number of times before it kicks
734 * in and does the native recompilation (currently around 16 times).
735 */
736#define BS3_SKIPIT_AVG_SKIP 26
737#define BS3_SKIPIT_REPORT_COUNT 150000
738#undef BS3_SKIPIT_DO_SKIP
739#undef BS3_SKIPIT_DO_ARGS
740
741#ifndef BS3_SKIPIT_DO_SKIP
742# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) (false)
743#else
744# include <iprt/asm-amd64-x86.h>
745# include <iprt/asm-math.h>
746
747DECLINLINE(uint32_t) bs3CpuInstrX_SimpleRand(void)
748{
749 /*
750 * A simple Lehmer linear congruential pseudo-random number
751 * generator using the constants suggested by Park & Miller:
752 *
753 * modulus = 2^31 - 1 (INT32_MAX)
754 * multiplier = 7^5 (16807)
755 *
756 * It produces numbers in the range [1..INT32_MAX-1] and is
757 * more chaotic in the higher bits.
758 *
759 * Note! Runtime/common/rand/randparkmiller.cpp is also use this algorithm,
760 * though the zero handling is different.
761 */
762 static uint32_t s_uSeedMemory = 0;
763 uint32_t uVal = s_uSeedMemory;
764 if (!uVal)
765 uVal = (uint32_t)ASMReadTSC();
766 uVal = ASMModU64ByU32RetU32(ASMMult2xU32RetU64(uVal, 16807), INT32_MAX);
767 s_uSeedMemory = uVal;
768 return uVal;
769}
770
771static unsigned g_cSeen, g_cSkipped;
772
773static void bs3CpuInstrX_ShowTallies(void)
774{
775 Bs3TestPrintf("Micro-tests %d: tested %d / skipped %d\n", g_cSeen, g_cSeen - g_cSkipped, g_cSkipped);
776}
777
778# ifdef BS3_SKIPIT_DO_ARGS
779# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt(bRing, iCfg, iTest, iVal, iVariant)
780static bool bs3CpuInstrX_SkipIt(uint8_t bRing, unsigned iCfg, unsigned iTest, unsigned iVal, unsigned iVariant)
781# else
782# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt()
783static bool bs3CpuInstrX_SkipIt(void)
784# endif
785{
786 static unsigned s_uTimes = 0;
787 bool fSkip;
788
789 /* Cache calls to the relatively expensive random routine */
790 if (!s_uTimes)
791 s_uTimes = bs3CpuInstrX_SimpleRand() % (BS3_SKIPIT_AVG_SKIP * 2 + 1) + 1;
792 fSkip = --s_uTimes > 0;
793 if (fSkip)
794 ++g_cSkipped;
795
796 if (++g_cSeen % BS3_SKIPIT_REPORT_COUNT == 0)
797 bs3CpuInstrX_ShowTallies();
798 return fSkip;
799}
800
801#endif /* BS3_SKIPIT_DO_SKIP */
802
803/*
804 * Test type #1.
805 * Generic YMM registers.
806 */
807typedef struct BS3CPUINSTR4_TEST1_VALUES_T
808{
809 X86YMMREG uSrc2; /**< Second source operand. */
810 X86YMMREG uSrc1; /**< uDstIn for SSE */
811 X86YMMREG uDstOut; /**< Destination output. */
812 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
813 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
814 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
815 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
816 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
817 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
818} BS3CPUINSTR4_TEST1_VALUES_T;
819
820/*
821 * Test type #1.
822 * Packed single-precision.
823 */
824typedef struct BS3CPUINSTR4_TEST1_VALUES_PS_T
825{
826 X86YMMFLOATPSREG uSrc2; /**< Second source operand. */
827 X86YMMFLOATPSREG uSrc1; /**< uDstIn for SSE */
828 X86YMMFLOATPSREG uDstOut; /**< Destination output. */
829 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
830 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
831 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
832 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
833 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
834 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
835} BS3CPUINSTR4_TEST1_VALUES_PS_T;
836AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PS_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
837AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
838AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
839AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
840AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
841AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
842AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
843AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
844AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
845AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
846
847/*
848 * Test type #1.
849 * Packed double-precision.
850 */
851typedef struct BS3CPUINSTR4_TEST1_VALUES_PD_T
852{
853 X86YMMFLOATPDREG uSrc2; /**< Second source operand. */
854 X86YMMFLOATPDREG uSrc1; /**< uDstIn for SSE */
855 X86YMMFLOATPDREG uDstOut; /**< Destination output. */
856 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
857 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
858 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
859 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
860 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
861 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
862} BS3CPUINSTR4_TEST1_VALUES_PD_T;
863AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PD_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
864AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
865AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
866AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
867AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
868AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
869AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
870AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
871AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
872AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
873
874/*
875 * Test type #1.
876 * Scalar quadruple-precision.
877 */
878typedef struct BS3CPUINSTR4_TEST1_VALUES_SQ_T
879{
880 X86YMMFLOATSQREG uSrc2; /**< Second source operand. */
881 X86YMMFLOATSQREG uSrc1; /**< uDstIn for SSE */
882 X86YMMFLOATSQREG uDstOut; /**< Destination output. */
883 uint32_t fMxCsrMask; /**< MXCSR exception mask. */
884 uint32_t fDenormalsAreZero; /**< DAZ (Denormals-Are-Zero) exception mask. */
885 uint32_t fFlushToZero; /**< Flush-To-Zero (FZ) exception mask. */
886 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
887 uint32_t f128ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 128-bit instruction. */
888 uint32_t f256ExpectedMxCsrFlags; /**< Expected MXCSR exception flags for 256-bit instructions. */
889} BS3CPUINSTR4_TEST1_VALUES_SQ_T;
890AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_SQ_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
891AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
892AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
893AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
894AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
895AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
896AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
897AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
898AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, f128ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f128ExpectedMxCsrFlags);
899AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, f256ExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, f256ExpectedMxCsrFlags);
900
901typedef struct BS3CPUINSTR4_TEST1_T
902{
903 FPFNBS3FAR pfnWorker; /**< Test function worker. */
904 uint8_t bAvxMisalignXcpt; /**< AVX misalignment exception. */
905 uint8_t enmRm; /**< R/M type. */
906 uint8_t enmType; /**< CPU instruction type (see T_XXX). */
907 uint8_t iRegDst; /**< Index of destination register, UINT8_MAX if N/A. */
908 uint8_t iRegSrc1; /**< Index of first source register, UINT8_MAX if N/A. */
909 uint8_t iRegSrc2; /**< Index of second source register, UINT8_MAX if N/A. */
910 uint8_t cValues; /**< Number of test values in @c paValues. */
911 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *paValues; /**< Test values. */
912} BS3CPUINSTR4_TEST1_T;
913
914typedef struct BS3CPUINSTR4_TEST1_MODE_T
915{
916 BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests;
917 unsigned cTests;
918} BS3CPUINSTR4_TEST1_MODE_T;
919
920/** Initializer for a BS3CPUINSTR4_TEST1_MODE_T array (three entries). */
921#define BS3CPUINSTR4_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \
922 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } }
923
924typedef struct BS3CPUINSTR4_TEST1_CTX_T
925{
926 BS3CPUINSTR4_CONFIG_T const BS3_FAR *pConfig; /**< The test execution environment configuration. */
927 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest; /**< The instruction being tested. */
928 unsigned iVal; /**< Which iteration of the test value is this. */
929 const char BS3_FAR *pszMode; /**< The testing mode (e.g. real, protected, paged and permutations). */
930 PBS3TRAPFRAME pTrapFrame; /**< The exception (trap) frame. */
931 PBS3REGCTX pCtx; /**< The general-purpose register context. */
932 PBS3EXTCTX pExtCtx; /**< The extended (FPU) register context. */
933 PBS3EXTCTX pExtCtxOut; /**< The output extended (FPU) register context. */
934 uint8_t BS3_FAR *puMemOp; /**< The memory operand buffer. */
935 uint8_t BS3_FAR *puMemOpAlias; /**< The memory operand alias buffer for comparing result. */
936 uint8_t cbMemOp; /**< Size of the memory operand (and alias) buffer in bytes. */
937 uint8_t cbOperand; /**< Size of the instruction operand (8 for MMX, 16 for SSE etc). */
938 uint8_t cbInstr; /**< Size of the instruction opcode. */
939 uint8_t bXcptExpect; /**< The expected exception while/after executing the instruction. */
940 bool fSseInstr; /**< Whether this is an SSE instruction. */
941 bool fAvxInstr; /**< Whether this is an AVX instruction. */
942 uint16_t idTestStep; /**< The test iteration step. */
943} BS3CPUINSTR4_TEST1_CTX_T;
944/** Pointer to a test 1 context. */
945typedef BS3CPUINSTR4_TEST1_CTX_T BS3_FAR *PBS3CPUINSTR4_TEST1_CTX_T;
946
947
948/**
949 * Worker for bs3CpuInstrX_WorkerTestType1.
950 */
951static uint16_t bs3CpuInstr4_WorkerTestType1_Inner(uint8_t bMode, PBS3CPUINSTR4_TEST1_CTX_T pTestCtx,
952 PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg)
953{
954 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = pTestCtx->pTest;
955 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *pValues = &pTestCtx->pTest->paValues[pTestCtx->iVal];
956 PBS3TRAPFRAME pTrapFrame = pTestCtx->pTrapFrame;
957 PBS3REGCTX pCtx = pTestCtx->pCtx;
958 PBS3EXTCTX pExtCtx = pTestCtx->pExtCtx;
959 PBS3EXTCTX pExtCtxOut = pTestCtx->pExtCtxOut;
960 uint8_t BS3_FAR *puMemOp = pTestCtx->puMemOp;
961 uint8_t BS3_FAR *puMemOpAlias = pTestCtx->puMemOpAlias;
962 uint8_t cbMemOp = pTestCtx->cbMemOp;
963 uint8_t const cbOperand = pTestCtx->cbOperand;
964 uint8_t const cbInstr = ((uint8_t const BS3_FAR *)(uintptr_t)pTestCtx->pTest->pfnWorker)[-1];
965 uint8_t bXcptExpect = pTestCtx->bXcptExpect;
966 uint8_t const bFpXcpt = pTestCtx->pConfig->fCr4OsXmmExcpt ? X86_XCPT_XF : X86_XCPT_UD;
967 uint32_t const fExpectedMxCsrFlags = pTestCtx->cbOperand > 16 ? pValues->f256ExpectedMxCsrFlags
968 : pValues->f128ExpectedMxCsrFlags;
969 bool const fFpFlagsExpect = RT_BOOL( (fExpectedMxCsrFlags
970 & (~pValues->fMxCsrMask >> X86_MXCSR_XCPT_MASK_SHIFT)) & X86_MXCSR_XCPT_FLAGS);
971 uint32_t uMxCsr;
972 X86YMMREG MemOpExpect;
973 uint16_t cErrors;
974
975 /*
976 * Set up the context and some expectations.
977 */
978 /* Destination. */
979 Bs3MemZero(&MemOpExpect, sizeof(MemOpExpect));
980 if (pTest->iRegDst == UINT8_MAX)
981 {
982 BS3_ASSERT(pTest->enmRm >= RM_MEM);
983 Bs3MemSet(puMemOpAlias, 0xcc, cbMemOp);
984 if (bXcptExpect == X86_XCPT_DB)
985 MemOpExpect.ymm = pValues->uDstOut.ymm;
986 else
987 Bs3MemSet(&MemOpExpect, 0xcc, sizeof(MemOpExpect));
988 }
989
990 /* Source #1 (/ destination for SSE). */
991 if (pTest->iRegSrc1 == UINT8_MAX)
992 {
993 BS3_ASSERT(pTest->enmRm >= RM_MEM);
994 Bs3MemCpy(puMemOpAlias, &pValues->uSrc1, cbMemOp);
995 if (pTest->iRegDst == UINT8_MAX)
996 BS3_ASSERT(pTestCtx->fSseInstr);
997 else
998 MemOpExpect.ymm = pValues->uSrc1.ymm;
999 }
1000 else if (pTestCtx->fSseInstr)
1001 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm.DQWords.dqw0);
1002 else
1003 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm, 32);
1004
1005 /* Source #2. */
1006 if (pTest->iRegSrc2 == UINT8_MAX)
1007 {
1008 BS3_ASSERT(pTest->enmRm >= RM_MEM);
1009 BS3_ASSERT(pTest->iRegDst != UINT8_MAX && pTest->iRegSrc1 != UINT8_MAX);
1010 Bs3MemCpy(puMemOpAlias, &pValues->uSrc2, cbMemOp);
1011 MemOpExpect.ymm = pValues->uSrc2.ymm;
1012 }
1013 else if (pTestCtx->fSseInstr)
1014 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm.DQWords.dqw0);
1015 else
1016 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm, 32);
1017
1018 /* Memory pointer. */
1019 if (pTest->enmRm >= RM_MEM)
1020 {
1021 BS3_ASSERT( pTest->iRegDst == UINT8_MAX
1022 || pTest->iRegSrc1 == UINT8_MAX
1023 || pTest->iRegSrc2 == UINT8_MAX);
1024 Bs3RegCtxSetGrpSegFromCurPtr(pCtx, &pCtx->rbx, &pCtx->fs, puMemOp);
1025 }
1026
1027 /* Setup MXCSR for the current test. */
1028 uMxCsr = (pSavedCfg->uMxCsr & ~(X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK))
1029 | (pValues->fMxCsrMask & X86_MXCSR_XCPT_MASK)
1030 | (pValues->fRoundingCtlMask & X86_MXCSR_RC_MASK);
1031 if ( pValues->fDenormalsAreZero == X86_MXCSR_DAZ
1032 && g_fMxCsrDazSupported)
1033 uMxCsr |= X86_MXCSR_DAZ;
1034 if (pValues->fFlushToZero == X86_MXCSR_FZ)
1035 uMxCsr |= X86_MXCSR_FZ;
1036 Bs3ExtCtxSetMxCsr(pExtCtx, uMxCsr);
1037
1038 /*
1039 * Prepare globals and execute.
1040 */
1041 g_uBs3TrapEipHint = pCtx->rip.u32;
1042 if ( bXcptExpect == X86_XCPT_DB
1043 && !fFpFlagsExpect)
1044 g_uBs3TrapEipHint += cbInstr + 1;
1045 Bs3TrapSetJmpAndRestoreWithExtCtxAndRm(pCtx, pExtCtx, pTrapFrame, pExtCtxOut);
1046
1047 /*
1048 * Check the result.
1049 *
1050 * If a floating-point exception is expected, the destination is not updated by the instruction.
1051 * In the case of SSE instructions, updating the destination here will work because it is the same
1052 * as the source, but for AVX++ it won't because the destination is different and would contain 0s.
1053 */
1054 cErrors = Bs3TestSubErrorCount();
1055 if ( bXcptExpect == X86_XCPT_DB
1056 && !fFpFlagsExpect
1057 && pTest->iRegDst != UINT8_MAX)
1058 {
1059 if (pTestCtx->fSseInstr)
1060 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm.DQWords.dqw0);
1061 else
1062 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm, cbOperand);
1063 }
1064#if defined(DEBUG_aeichner) /** @todo Necessary kludge on a i7-1068NG7. */
1065 if ( pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE
1066 && pExtCtx->Ctx.x.Hdr.bmXState == 0x7
1067 && pExtCtxOut->Ctx.x.Hdr.bmXState == 0x3)
1068 pExtCtxOut->Ctx.x.Hdr.bmXState = 0x7;
1069#endif
1070 if (bXcptExpect == X86_XCPT_DB)
1071 Bs3ExtCtxSetMxCsr(pExtCtx, (uMxCsr & ~X86_MXCSR_XCPT_FLAGS)
1072 | (fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS));
1073 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pTestCtx->pszMode, pTestCtx->idTestStep);
1074
1075 if (bXcptExpect == X86_XCPT_DB)
1076 {
1077 uint32_t const fMxCsrXcptFlags = Bs3ExtCtxGetMxCsr(pExtCtxOut) & X86_MXCSR_XCPT_FLAGS;
1078
1079 /* Check if the SIMD FP exception flags (or lack of) are as expected. */
1080 if (fMxCsrXcptFlags != (fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS))
1081 {
1082 char szGotBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1083 char szExpectBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1084 bs3CpuInstr4GetXcptFlags(&szExpectBuf[0], sizeof(szExpectBuf), fExpectedMxCsrFlags);
1085 bs3CpuInstr4GetXcptFlags(&szGotBuf[0], sizeof(szGotBuf), fMxCsrXcptFlags);
1086 Bs3TestFailedF("Expected floating-point xcpt flags%s, got%s", szExpectBuf, szGotBuf);
1087 }
1088
1089 /* Check if the SIMD FP exception (or lack of) is as expected. */
1090 if (fFpFlagsExpect)
1091 {
1092 if (pTrapFrame->bXcpt == bFpXcpt)
1093 { /* likely */ }
1094 else
1095 Bs3TestFailedF("Expected floating-point xcpt %s, got %s", bs3CpuInstr4XcptName(bFpXcpt),
1096 bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1097 }
1098 else if (pTrapFrame->bXcpt == X86_XCPT_DB)
1099 { /* likely */ }
1100 else
1101 Bs3TestFailedF("Expected no xcpt, got %s", bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1102 }
1103 /* Check if non-FP exception is as expected. */
1104 else if (pTrapFrame->bXcpt != bXcptExpect)
1105 Bs3TestFailedF("Expected xcpt %s, got %s", bs3CpuInstr4XcptName(bXcptExpect), bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1106
1107 /* Kludge! Looks like EFLAGS.AC is cleared when raising #GP in real mode on the 10980XE. WEIRD! */
1108 if (bMode == BS3_MODE_RM && (pCtx->rflags.u32 & X86_EFL_AC))
1109 {
1110 if (pTrapFrame->Ctx.rflags.u32 & X86_EFL_AC)
1111 Bs3TestFailedF("Expected EFLAGS.AC to be cleared (bXcpt=%d)", pTrapFrame->bXcpt);
1112 pTrapFrame->Ctx.rflags.u32 |= X86_EFL_AC;
1113 }
1114 if (bXcptExpect == X86_XCPT_PF)
1115 pCtx->cr2.u = (uintptr_t)puMemOp;
1116 Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pCtx, bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect ? cbInstr + 1 : 0, 0 /*cbSpAdjust*/,
1117 (bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect) || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF,
1118 pTestCtx->pszMode, pTestCtx->idTestStep);
1119 pCtx->cr2.u = 0;
1120
1121 if ( pTest->enmRm >= RM_MEM
1122 && Bs3MemCmp(puMemOpAlias, &MemOpExpect, cbMemOp) != 0)
1123 Bs3TestFailedF("Expected uMemOp %.*Rhxs, got %.*Rhxs", cbMemOp, &MemOpExpect, cbMemOp, puMemOpAlias);
1124
1125 return cErrors;
1126}
1127
1128
1129/**
1130 * Test type #1 worker.
1131 */
1132static uint8_t bs3CpuInstrX_WorkerTestType1(uint8_t bMode, BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests, unsigned cTests,
1133 PCBS3CPUINSTR4_CONFIG_T paConfigs, unsigned cConfigs)
1134{
1135 BS3REGCTX Ctx;
1136 BS3TRAPFRAME TrapFrame;
1137 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode);
1138 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0;
1139 uint8_t BS3_FAR *pbBuf = g_pbBuf;
1140 uint32_t cbBuf = g_cbBuf;
1141 PBS3EXTCTX pExtCtxOut;
1142 PBS3EXTCTX pExtCtx = bs3CpuInstrXAllocExtCtxs(&pExtCtxOut);
1143 if (pExtCtx)
1144 { /* likely */ }
1145 else
1146 return 0;
1147 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT)
1148 { /* likely */ }
1149 else
1150 {
1151 Bs3TestPrintf("Skipped due to ancient FPU state format\n");
1152 return 0;
1153 }
1154
1155 /* Ensure the structures are allocated before we sample the stack pointer. */
1156 Bs3MemSet(&Ctx, 0, sizeof(Ctx));
1157 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
1158
1159 /*
1160 * Create test context.
1161 */
1162 pbBuf = bs3CpuInstrXBufSetup(pbBuf, &cbBuf, bMode);
1163 Bs3RegCtxSaveForMode(&Ctx, bMode, 1024);
1164 bs3CpuInstr4SetupSseAndAvx(&Ctx, pExtCtx);
1165
1166 /*
1167 * Run the tests in all rings since alignment issues may behave
1168 * differently in ring-3 compared to ring-0.
1169 */
1170 for (;;)
1171 {
1172 unsigned fPf = 0;
1173 do
1174 {
1175 unsigned iCfg;
1176 for (iCfg = 0; iCfg < cConfigs; iCfg++)
1177 {
1178 unsigned iTest;
1179 BS3CPUINSTRX_CONFIG_SAVED_T SavedCfg;
1180 if (!bs3CpuInstr4ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg], bMode))
1181 continue; /* unsupported config */
1182
1183 /*
1184 * Iterate the tests.
1185 */
1186 for (iTest = 0; iTest < cTests; iTest++)
1187 {
1188 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = &paTests[iTest];
1189 unsigned const cValues = pTest->cValues;
1190 bool const fSseInstr = pTest->enmType >= T_SSE && pTest->enmType < T_AVX_128;
1191 bool const fAvxInstr = pTest->enmType >= T_AVX_128;
1192 uint8_t const cbOperand = pTest->enmType < T_128BITS ? 64/8
1193 : pTest->enmType < T_256BITS ? 128/8 : 256/8;
1194 uint8_t const cbMemOp = bs3CpuInstrXMemOpSize(cbOperand, pTest->enmRm);
1195 uint8_t const cbAlign = cbMemOp;
1196 uint8_t BS3_FAR *puMemOp = bs3CpuInstrXBufForOperand(pbBuf, cbBuf, cbMemOp, cbAlign, &paConfigs[iCfg], fPf);
1197 uint8_t *puMemOpAlias = &g_pbBufAlias[(uintptr_t)puMemOp - (uintptr_t)pbBuf];
1198 uint8_t bXcptExpect = !g_afTypeSupports[pTest->enmType] ? X86_XCPT_UD
1199 : fSseInstr ? paConfigs[iCfg].bXcptSse
1200 : BS3_MODE_IS_RM_OR_V86(bMode) ? X86_XCPT_UD : paConfigs[iCfg].bXcptAvx;
1201 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10;
1202 unsigned cRecompRuns = 0;
1203 unsigned const cMaxRecompRuns = g_cBs3ThresholdNativeRecompiler + cValues;
1204 unsigned iVal;
1205
1206 /* If testing unaligned memory accesses (or #PF), skip register-only tests. This
1207 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */
1208 if ( (pTest->enmRm == RM_REG || pTest->enmRm == RM_MEM8)
1209 && (!paConfigs[iCfg].fAligned || paConfigs[iCfg].fAlignCheck || fPf))
1210 continue;
1211
1212 /* #AC is only raised in ring-3. */
1213 if (bXcptExpect == X86_XCPT_AC)
1214 {
1215 if (bRing != 3)
1216 bXcptExpect = X86_XCPT_DB;
1217 else if (fAvxInstr)
1218 bXcptExpect = pTest->bAvxMisalignXcpt; /* they generally don't raise #AC */
1219 }
1220
1221 if (fPf && bXcptExpect == X86_XCPT_DB)
1222 bXcptExpect = X86_XCPT_PF;
1223
1224 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, pTest->pfnWorker);
1225
1226 /*
1227 * Iterate the test values and do the actual testing.
1228 */
1229 while (cRecompRuns < cMaxRecompRuns)
1230 {
1231 for (iVal = 0; iVal < cValues; iVal++, idTestStep++, cRecompRuns++)
1232 {
1233 uint16_t cErrors;
1234 BS3CPUINSTR4_TEST1_CTX_T TestCtx;
1235 uint32_t const fExpectedMxCsrFlags = pTest->enmType >= T_128BITS
1236 ? pTest->paValues[iVal].f128ExpectedMxCsrFlags
1237 : pTest->paValues[iVal].f256ExpectedMxCsrFlags;
1238
1239 if (BS3_SKIPIT(bRing, iCfg, iTest, iVal, 0))
1240 continue;
1241
1242 /*
1243 * If the hardware does not support DAZ bit and we are testing DE exceptions,
1244 * then skip testing them. We still want to test values that set the MXCSR.DAZ
1245 * if we are not expecting DE exceptions to make sure DAZ bit in and of itself
1246 * is not influencing other cases.
1247 */
1248 if ( !g_fMxCsrDazSupported
1249 && pTest->paValues[iVal].fDenormalsAreZero == X86_MXCSR_DAZ
1250 && (fExpectedMxCsrFlags & X86_MXCSR_DE))
1251 continue;
1252
1253 /*
1254 * Setup the test instruction context and pass it to the worker.
1255 * A few of these can be figured out by the worker but initializing
1256 * it outside the inner most loop is more optimal.
1257 */
1258 TestCtx.pConfig = &paConfigs[iCfg];
1259 TestCtx.pTest = pTest;
1260 TestCtx.iVal = iVal;
1261 TestCtx.pszMode = pszMode;
1262 TestCtx.pTrapFrame = &TrapFrame;
1263 TestCtx.pCtx = &Ctx;
1264 TestCtx.pExtCtx = pExtCtx;
1265 TestCtx.pExtCtxOut = pExtCtxOut;
1266 TestCtx.puMemOp = (uint8_t *)puMemOp;
1267 TestCtx.puMemOpAlias = puMemOpAlias;
1268 TestCtx.cbMemOp = cbMemOp;
1269 TestCtx.cbOperand = cbOperand;
1270 TestCtx.bXcptExpect = bXcptExpect;
1271 TestCtx.fSseInstr = fSseInstr;
1272 TestCtx.fAvxInstr = fAvxInstr;
1273 TestCtx.idTestStep = idTestStep;
1274 cErrors = bs3CpuInstr4_WorkerTestType1_Inner(bMode, &TestCtx, &SavedCfg);
1275 if (cErrors != Bs3TestSubErrorCount())
1276 {
1277 if (paConfigs[iCfg].fAligned)
1278 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, %s %u-bit)",
1279 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1280 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), fSseInstr ? "SSE" : "AVX", cbOperand * 8);
1281 else
1282 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, puMemOp=%p, EFLAGS=%#RX32, CR0=%#RX32, %s %u-bit)",
1283 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1284 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), puMemOp,
1285 TrapFrame.Ctx.rflags.u32, TrapFrame.Ctx.cr0, fSseInstr ? "SSE" : "AVX", cbOperand * 8);
1286 Bs3TestPrintf("\n");
1287 }
1288 }
1289 }
1290 }
1291 bs3CpuInstrXConfigRestore(&SavedCfg, &Ctx, pExtCtx);
1292 }
1293 } while (fPf++ == 0 && BS3_MODE_IS_PAGED(bMode));
1294
1295 /*
1296 * Next ring.
1297 */
1298 bRing++;
1299 if (bRing > 3 || bMode == BS3_MODE_RM)
1300 break;
1301 Bs3RegCtxConvertToRingX(&Ctx, bRing);
1302 }
1303
1304 /*
1305 * Cleanup.
1306 */
1307 bs3CpuInstrXBufCleanup(pbBuf, cbBuf, bMode);
1308 bs3CpuInstrXFreeExtCtxs(pExtCtx, pExtCtxOut);
1309 return 0;
1310}
1311
1312
1313/*
1314 * [V]ADDPS.
1315 */
1316BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addps(uint8_t bMode)
1317{
1318 static BS3CPUINSTR4_TEST1_VALUES_PS_T const s_aValues[] =
1319 {
1320 /*
1321 * Zero.
1322 */
1323 /* 0*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1324 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1325 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1326 /*mask */ X86_MXCSR_XCPT_MASK,
1327 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1328 /*flags */ 0, 0 },
1329 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1330 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1331 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1332 /*mask */ ~X86_MXCSR_XCPT_MASK,
1333 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1334 /*flags */ 0, 0 },
1335 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1336 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1337 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1338 /*mask */ ~X86_MXCSR_XCPT_MASK,
1339 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1340 /*flags */ 0, 0 },
1341 { { /*src2 */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1342 { /*src1 */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1343 { /* => */ { BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1344 /*mask */ ~X86_MXCSR_XCPT_MASK,
1345 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
1346 /*flags */ 0, 0 },
1347 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1348 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1349 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1) } },
1350 /*mask */ ~X86_MXCSR_XCPT_MASK,
1351 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1352 /*flags */ 0, 0 },
1353 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1354 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1355 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1) } },
1356 /*mask */ X86_MXCSR_XCPT_MASK,
1357 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1358 /*flags */ 0, 0 },
1359 /*
1360 * Infinity.
1361 */
1362 /* 6*/{ { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1363 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1364 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1365 /*mask */ ~X86_MXCSR_IM,
1366 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1367 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1368 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1369 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1370 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1371 /*mask */ X86_MXCSR_XCPT_MASK,
1372 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1373 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1374 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1375 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1376 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0) } },
1377 /*mask */ X86_MXCSR_XCPT_MASK,
1378 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1379 /*flags */ 0, X86_MXCSR_IE },
1380 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
1381 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1) } },
1382 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(0) } },
1383 /*mask */ ~X86_MXCSR_XCPT_MASK,
1384 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1385 /*flags */ 0, X86_MXCSR_IE },
1386 { { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(1), BS3_FP32_INF(1), BS3_FP32_INF(1), BS3_FP32_INF(0) } },
1387 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_INF(0), BS3_FP32_ZERO(1), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_INF(0), BS3_FP32_INF(1) } },
1388 { /* => */ { BS3_FP32_INF(1), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(1), BS3_FP32_QNAN(1), BS3_FP32_QNAN(1), BS3_FP32_INF(1) } },
1389 /*mask */ ~X86_MXCSR_XCPT_MASK,
1390 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1391 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1392 /*
1393 * Overflow, Precision.
1394 */
1395 /*11*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1396 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1397 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), } },
1398 /*mask */ ~X86_MXCSR_XCPT_MASK,
1399 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1400 /*flags */ 0, X86_MXCSR_OE },
1401 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1402 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0) } },
1403 { /* => */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1404 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1405 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1406 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1407 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1408 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0) } },
1409 { /* => */ { BS3_FP32_INF(0), BS3_FP32_VAL(1, 0, 2), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_INF(0), BS3_FP32_VAL(1, 0, 2), BS3_FP32_ZERO(0), BS3_FP32_INF(0) } },
1410 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1411 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1412 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1413 { { /*src2 */ { BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1) } },
1414 { /*src1 */ { BS3_FP32_NORMAL_MIN(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MIN(1) } },
1415 { /* => */ { BS3_FP32_VAL(1, 0, 2), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_VAL(1, 0, 2) } },
1416 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1417 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1418 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1419 { { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1420 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1421 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1422 /*mask */ X86_MXCSR_XCPT_MASK,
1423 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1424 /*flags */ 0, X86_MXCSR_OE | X86_MXCSR_PE },
1425 { { /*src2 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1426 { /*src1 */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1427 { /* => */ { BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(1), BS3_FP32_NORMAL_MAX(0) } },
1428 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1429 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1430 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1431 { { /*src2 */ { BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_NORMAL_SAFE_INT_MIN(0), BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1) } },
1432 { /*src1 */ { BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1), BS3_FP32_NORMAL_SAFE_INT_MAX(0), BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_NORMAL_SAFE_INT_MAX(1) } },
1433 { /* => */ { BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(1, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX + 1), BS3_FP32_VAL(0, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(1, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_SAFE_INT_MAX + 1) } },
1434 /*mask */ ~X86_MXCSR_XCPT_MASK,
1435 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1436 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1437 /*
1438 * Normals.
1439 */
1440 /*18*/{ { /*src2 */ { BS3_FP32_VAL(0, 0x600000, 0x7f)/* 1.75*/, BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7d)/*0.25*/, BS3_FP32_VAL(0, 0x600000, 0x7f)/* 1.75*/, BS3_FP32_NORMAL_MAX(0), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7d)/*0.25*/ } },
1441 { /*src1 */ { BS3_FP32_VAL(1, 0, 0x7d)/*-0.25*/, BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7e)/*0.50*/, BS3_FP32_VAL(1, 0, 0x7d)/*-0.25*/, BS3_FP32_NORMAL_MAX(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0, 0x7e)/*0.50*/ } },
1442 { /* => */ { BS3_FP32_VAL(0, 0x400000, 0x7f)/* 1.50*/, BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x400000, 0x7e)/*0.75*/, BS3_FP32_VAL(0, 0x400000, 0x7f)/* 1.50*/, BS3_FP32_ZERO(1), BS3_FP32_ZERO(0), BS3_FP32_VAL(0, 0x400000, 0x7e)/*0.75*/ } },
1443 /*mask */ X86_MXCSR_XCPT_MASK,
1444 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1445 /*flags */ 0, 0 },
1446 };
1447
1448 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1449 {
1450 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1451 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1452
1453 { bs3CpuInstrX_vaddps_XMM1_XMM2_XMM3_icebp_c16, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1454 { bs3CpuInstrX_vaddps_XMM1_XMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1455
1456 { bs3CpuInstrX_vaddps_YMM1_YMM2_YMM3_icebp_c16, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1457 { bs3CpuInstrX_vaddps_YMM1_YMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1458 };
1459 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1460 {
1461 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1462 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1463
1464 { bs3CpuInstrX_vaddps_XMM1_XMM2_XMM3_icebp_c32, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1465 { bs3CpuInstrX_vaddps_XMM1_XMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1466
1467 { bs3CpuInstrX_vaddps_YMM1_YMM2_YMM3_icebp_c32, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1468 { bs3CpuInstrX_vaddps_YMM1_YMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1469 };
1470 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1471 {
1472 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1473 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1474
1475 { bs3CpuInstrX_vaddps_XMM1_XMM2_XMM3_icebp_c64, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1476 { bs3CpuInstrX_vaddps_XMM1_XMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1477
1478 { bs3CpuInstrX_vaddps_YMM1_YMM2_YMM3_icebp_c64, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1479 { bs3CpuInstrX_vaddps_YMM1_YMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1480
1481 { bs3CpuInstrX_addps_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1482 { bs3CpuInstrX_addps_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1483
1484 { bs3CpuInstrX_vaddps_YMM8_YMM9_YMM10_icebp_c64, 255, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1485 { bs3CpuInstrX_vaddps_YMM8_YMM9_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1486 };
1487
1488 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1489 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1490 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1491 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1492}
1493
1494
1495/*
1496 * [V]ADDPD.
1497 */
1498BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addpd(uint8_t bMode)
1499{
1500 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
1501 {
1502 /*
1503 * Zero.
1504 */
1505 /* 0*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1506 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1507 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1508 /*mask */ X86_MXCSR_XCPT_MASK,
1509 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1510 /*flags */ 0, 0 },
1511 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1512 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1513 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1514 /*mask */ ~X86_MXCSR_XCPT_MASK,
1515 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1516 /*flags */ 0, 0 },
1517 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1518 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1519 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1520 /*mask */ X86_MXCSR_XCPT_MASK,
1521 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_DOWN,
1522 /*flags */ 0, 0 },
1523 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1524 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1525 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(1) } },
1526 /*mask */ ~X86_MXCSR_XCPT_MASK,
1527 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1528 /*flags */ 0, 0 },
1529 { { /*src2 */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1530 { /*src1 */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1531 { /* => */ { BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(1), BS3_FP64_ZERO(0) } },
1532 /*mask */ X86_MXCSR_XCPT_MASK,
1533 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1534 /*flags */ 0, 0 },
1535 /*
1536 * Infinity.
1537 */
1538 /* 5*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1539 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1540 { /* => */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1541 /*mask */ ~X86_MXCSR_IM,
1542 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1543 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1544 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1545 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1546 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1547 /*mask */ ~X86_MXCSR_IM,
1548 /*daz,fz,rc*/ 0, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1549 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1550 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1551 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1552 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1553 /*mask */ ~X86_MXCSR_IM,
1554 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1555 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1556 { { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_INF(1) } },
1557 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
1558 { /* => */ { BS3_FP64_QNAN(1), BS3_FP64_QNAN(1), BS3_FP64_ZERO(0), BS3_FP64_QNAN(1) } },
1559 /*mask */ X86_MXCSR_XCPT_MASK,
1560 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_ZERO,
1561 /*flags */ X86_MXCSR_IE, X86_MXCSR_IE },
1562 { { /*src2 */ { BS3_FP64_VAL(0, 0, 0x3fd)/*0.25*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_INF(1) } },
1563 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x3fe)/*0.50*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
1564 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x3fe)/*0.75*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_QNAN(1) } },
1565 /*mask */ X86_MXCSR_XCPT_MASK,
1566 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1567 /*flags */ 0, X86_MXCSR_IE },
1568 /*
1569 * Overflow, Precision.
1570 */
1571 /*10*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
1572 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
1573 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1574 /*mask */ ~X86_MXCSR_XCPT_MASK,
1575 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1576 /*flags */ X86_MXCSR_OE, X86_MXCSR_OE },
1577 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1578 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1579 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1580 /*mask */ ~X86_MXCSR_XCPT_MASK,
1581 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1582 /*flags */ X86_MXCSR_OE, X86_MXCSR_OE },
1583 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0) } },
1584 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0) } },
1585 { /* => */ { BS3_FP64_INF(0), BS3_FP64_VAL(1, 0, 2), BS3_FP64_ZERO(0), BS3_FP64_INF(0), } },
1586 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1587 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_NEAREST,
1588 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1589 { { /*src2 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0) } },
1590 { /*src1 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0) } },
1591 { /* => */ { BS3_FP64_VAL(1, 0, 2), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1592 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1593 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1594 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1595 { { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1596 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1597 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1598 /*mask */ X86_MXCSR_XCPT_MASK,
1599 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1600 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE, X86_MXCSR_OE | X86_MXCSR_PE },
1601 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
1602 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
1603 { /* => */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(1, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_SAFE_INT_MAX + 1) } },
1604 /*mask */ ~X86_MXCSR_XCPT_MASK,
1605 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1606 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1607 /** @todo Why does the below on cause PE?! */
1608 { { /*src2 */ { BS3_FP64_VAL(0, 0xc000000000000, 0x3ff)/* 1.75*/, BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0, 0x3fd)/*0.25*/ } },
1609 { /*src1 */ { BS3_FP64_VAL(1, 0, 0x07d)/*-0.25*/, BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0, 0x3fe)/*0.50*/ } },
1610 { /* => */ { BS3_FP64_VAL(0, 0xbffffffffffff, 0x3ff)/* 1.50*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0x8000000000000, 0x3fe)/*0.75*/ } },
1611 /*mask */ X86_MXCSR_XCPT_MASK,
1612 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1613 /*flags */ X86_MXCSR_PE, X86_MXCSR_PE },
1614 /*
1615 * Normals.
1616 */
1617 /*17*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_VAL_1(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1618 { /*src1 */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_VAL_1(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1619 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1620 /*mask */ ~X86_MXCSR_XCPT_MASK,
1621 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1622 /*flags */ 0, 0 },
1623 { { /*src2 */ { BS3_FP64_VAL(0, 0, 0x409)/*1024*/, BS3_FP64_VAL(0, 0xb800000000000, 0x404)/*55*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1624 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x408)/* 512*/, BS3_FP64_VAL(0, 0xc000000000000, 0x401)/* 7*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1625 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x409)/*1536*/, BS3_FP64_VAL(0, 0xf000000000000, 0x404)/*62*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1626 /*mask */ X86_MXCSR_XCPT_MASK,
1627 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1628 /*flags */ 0, 0 },
1629 { { /*src2 */ { BS3_FP64_VAL(0, 0x26580b4800000, 0x41d)/* 1234567890*/, BS3_FP64_VAL(0, 0xd6f3458800000, 0x41c)/*987654321*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf0033a34f337, 0x432)/*4072598000007579.5*/ } },
1630 { /*src1 */ { BS3_FP64_VAL(1, 0x26580b4800000, 0x41d)/*-1234567890*/, BS3_FP64_VAL(1, 0x9000000000000, 0x405)/* -100*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6eca42000000, 0x419)/* 123450000.5*/ } },
1631 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6f3426800000, 0x41c)/*987654221*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf00348ec5858, 0x432)/*4072598123457580.0*/ } },
1632 /*mask */ ~X86_MXCSR_XCPT_MASK,
1633 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1634 /*flags */ 0, 0 },
1635 { { /*src2 */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1636 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1637 { /* => */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_SAFE_INT_MAX), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1638 /*mask */ X86_MXCSR_XCPT_MASK,
1639 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1640 /*flags */ 0, 0 },
1641 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1642 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1643 { /* => */ { BS3_FP64_VAL(0, 0, BS3_FP64_EXP_SAFE_INT_MAX + 1), BS3_FP64_VAL(1, 0, BS3_FP64_EXP_SAFE_INT_MAX + 1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1644 /*mask */ ~X86_MXCSR_XCPT_MASK,
1645 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1646 /*flags */ 0, 0 },
1647 { { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
1648 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
1649 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_VAL(0, 0, 2) } },
1650 /*mask */ ~X86_MXCSR_XCPT_MASK,
1651 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_ZERO,
1652 /*flags */ 0, 0 },
1653 { { /*src2 */ { BS3_FP64_VAL(0, 0xc122186c3cfd0, 0x42d)/*123456789876543.25*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
1654 { /*src1 */ { BS3_FP64_VAL(0, 0xb88e0395d49b0, 0x42d)/*121098765432102.75*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
1655 { /* => */ { BS3_FP64_VAL(0, 0xbcd80e0108cc0, 0x42e)/*244555555308646.00*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(1, 0, 2) } },
1656 /*mask */ X86_MXCSR_XCPT_MASK,
1657 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_DOWN,
1658 /*flags */ 0, 0 },
1659 /*
1660 * Denormals.
1661 */
1662 /*24*/{ { /*src2 */ { BS3_FP64_DENORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1663 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1664 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1665 /*mask */ ~X86_MXCSR_XCPT_MASK,
1666 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1667 /*flags */ X86_MXCSR_DE, X86_MXCSR_DE },
1668 { { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1669 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1670 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1671 /*mask */ X86_MXCSR_XCPT_MASK,
1672 /*daz,fz,rc*/ X86_MXCSR_DAZ, 0, X86_MXCSR_RC_NEAREST,
1673 /*flags */ 0, 0 },
1674 { { /*src2 */ { BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MAX(0) } },
1675 { /*src1 */ { BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MIN(0) } },
1676 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1677 /*mask */ X86_MXCSR_XCPT_MASK,
1678 /*daz,fz,rc*/ X86_MXCSR_DAZ, X86_MXCSR_FZ, X86_MXCSR_RC_UP,
1679 /*flags */ 0, 0 },
1680 /** @todo More denormals; Underflow, Precision; Rounding, FZ etc. */
1681 };
1682
1683 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1684 {
1685 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1686 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1687
1688 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1689 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1690
1691 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1692 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1693 };
1694 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1695 {
1696 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1697 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1698
1699 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1700 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1701
1702 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1703 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1704 };
1705 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1706 {
1707 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1708 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1709
1710 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1711 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1712
1713 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1714 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1715
1716 { bs3CpuInstrX_addpd_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE2, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1717 { bs3CpuInstrX_addpd_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1718
1719 { bs3CpuInstrX_vaddpd_YMM8_YMM9_YMM10_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1720 { bs3CpuInstrX_vaddpd_YMM8_YMM9_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1721 };
1722
1723 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1724 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1725 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1726 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1727}
1728
1729
1730/**
1731 * The 32-bit protected mode main function.
1732 *
1733 * The tests a driven by 32-bit test drivers, even for real-mode tests (though
1734 * we'll switch between PE32 and RM for each test step we perform). Given that
1735 * we test SSE and AVX here, we don't need to worry about 286 or 8086.
1736 *
1737 * Some extra steps needs to be taken to properly handle extended state in LM64
1738 * (Bs3ExtCtxRestoreEx & Bs3ExtCtxSaveEx) and when testing real mode
1739 * (Bs3RegCtxSaveForMode & Bs3TrapSetJmpAndRestoreWithExtCtxAndRm).
1740 */
1741BS3_DECL(void) Main_pe32()
1742{
1743 static const BS3TESTMODEBYONEENTRY g_aTests[] =
1744 {
1745#if 1 /*ndef DEBUG_bird*/
1746# define ALL_TESTS
1747#endif
1748#if defined(ALL_TESTS)
1749 { "[v]addps", bs3CpuInstrX_v_addps, 0 },
1750 { "[v]addpd", bs3CpuInstrX_v_addpd, 0 },
1751#endif
1752 };
1753 Bs3TestInit("bs3-cpu-instr-4");
1754
1755 /*
1756 * Initialize globals.
1757 */
1758 if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
1759 {
1760 uint32_t fEbx, fEcx, fEdx;
1761 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1762 g_afTypeSupports[T_MMX] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_MMX);
1763 g_afTypeSupports[T_MMX_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1764 g_afTypeSupports[T_MMX_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1765 g_afTypeSupports[T_MMX_SSSE3] = RT_BOOL(fEdx & X86_CPUID_FEATURE_ECX_SSSE3);
1766 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1767 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1768 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3);
1769 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3);
1770 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1);
1771 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2);
1772 g_afTypeSupports[T_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL);
1773 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1774 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1775 g_afTypeSupports[T_AVX_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL)
1776 && RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1777
1778 if (ASMCpuId_EAX(0) >= 7)
1779 {
1780 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &fEbx, NULL, NULL);
1781 g_afTypeSupports[T_AVX2_128] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1782 g_afTypeSupports[T_AVX2_256] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1783 g_afTypeSupports[T_SHA] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_SHA);
1784 }
1785
1786 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
1787 {
1788 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1789 g_afTypeSupports[T_AXMMX] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
1790 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A);
1791 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);
1792 }
1793 g_afTypeSupports[T_AXMMX_OR_SSE] = g_afTypeSupports[T_AXMMX] || g_afTypeSupports[T_SSE];
1794
1795 /*
1796 * Figure out FPU save/restore method and support for DAZ bit.
1797 */
1798 {
1799 /** @todo Add bs3kit API to just get the ext ctx method without needing to
1800 * alloc/free a context. Replicating the logic in the bs3kit here, though
1801 * doable, runs a risk of not updating this when the other logic is
1802 * changed. */
1803 uint64_t fFlags;
1804 uint16_t const cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
1805 PBS3EXTCTX pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cbExtCtx);
1806 if (pExtCtx)
1807 {
1808 Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
1809 g_enmExtCtxMethod = pExtCtx->enmMethod;
1810 if ( ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_XSAVE
1811 && (pExtCtx->Ctx.x.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1812 || ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_FXSAVE)
1813 && (pExtCtx->Ctx.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1814 g_fMxCsrDazSupported = true;
1815 }
1816 else
1817 Bs3TestFailedF("Failed to allocate %u bytes for extended CPU context (tiled addressable)\n", cbExtCtx);
1818 }
1819
1820 /*
1821 * Allocate a buffer for testing.
1822 */
1823 g_cbBuf = X86_PAGE_SIZE * 4;
1824 g_pbBuf = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_REAL, g_cbBuf);
1825 if (g_pbBuf)
1826 {
1827 g_pbBufAliasAlloc = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_TILED, g_cbBuf);
1828 if (g_pbBufAliasAlloc)
1829 {
1830 /*
1831 * Do the tests.
1832 */
1833 Bs3TestDoModesByOne_pe32(g_aTests, RT_ELEMENTS(g_aTests), BS3TESTMODEBYONEENTRY_F_REAL_MODE_READY);
1834#ifdef BS3_SKIPIT_DO_SKIP
1835 bs3CpuInstrX_ShowTallies();
1836#endif
1837 }
1838 else
1839 Bs3TestFailed("Failed to allocate 16K alias buffer (tiled addressable)");
1840 }
1841 else
1842 Bs3TestFailed("Failed to allocate 16K buffer (real mode addressable)");
1843 }
1844
1845 Bs3TestTerm();
1846}
1847
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette