VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-4.c32@ 104852

Last change on this file since 104852 was 104852, checked in by vboxsync, 8 months ago

ValidationKit/bootsectors: bugref:10658 SIMD FP testcase: Denormals.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 88.2 KB
Line 
1/* $Id: bs3-cpu-instr-4.c32 104852 2024-06-05 11:58:20Z vboxsync $ */
2/** @file
3 * BS3Kit - bs3-cpu-instr-4 - SSE, AVX FPU instructions, C code template.
4 */
5
6/*
7 * Copyright (C) 2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <bs3kit.h>
42#include "bs3-cpu-instr-4-asm-auto.h"
43
44#include <iprt/asm.h>
45#include <iprt/asm-amd64-x86.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** Converts an execution mode (BS3_MODE_XXX) into an index into an array
52 * initialized by BS3CPUINSTR4_TEST1_MODES_INIT etc. */
53#define BS3CPUINSTR4_TEST_MODES_INDEX(a_bMode) (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2)
54
55/** Maximum length for the names of all SIMD FP exception flags combined. */
56#define BS3_FP_XCPT_NAMES_MAXLEN sizeof(" IE DE ZE OE UE PE ")
57
58/*
59 * Single-precision (32 bits) floating-point defines.
60 */
61/** The max exponent value for a single-precision floating-point normal. */
62#define BS3_FP32_EXP_NORMAL_MAX 254
63/** The min exponent value for a single-precision floating-point normal. */
64#define BS3_FP32_EXP_NORMAL_MIN 0
65/** The max fraction value for a single-precision floating-point normal. */
66#define BS3_FP32_FRACTION_NORMAL_MAX 0x7fffff
67/** The min fraction value for a single-precision floating-point normal. */
68#define BS3_FP32_FRACTION_NORMAL_MIN 0
69/** The exponent bias for the single-precision floating-point format. */
70#define BS3_FP32_EXP_BIAS RTFLOAT32U_EXP_BIAS
71/** Fraction width (in bits) for the single-precision floating-point format. */
72#define BS3_FP32_FRACTION_BITS RTFLOAT32U_FRACTION_BITS
73
74#define BS3_FP32_NORMAL_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_NORMAL_MAX)
75#define BS3_FP32_NORMAL_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MIN, BS3_FP32_EXP_NORMAL_MIN)
76#define BS3_FP32_ZERO(a_Sign) RTFLOAT32U_INIT_ZERO(a_Sign)
77#define BS3_FP32_ONE(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0, RTFLOAT32U_EXP_BIAS)
78#define BS3_FP32_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT32U_INIT_C(a_Sign, a_Frac, a_Exp)
79#define BS3_FP32_INF(a_Sign) RTFLOAT32U_INIT_INF(a_Sign)
80#define BS3_FP32_QNAN(a_Sign) RTFLOAT32U_INIT_QNAN(a_Sign)
81#define BS3_FP32_QNAN_VAL(a_Sign, a_Val) RTFLOAT32U_INIT_QNAN_EX(a_Sign, a_Val)
82#define BS3_FP32_SNAN(a_Sign) RTFLOAT32U_INIT_SNAN(a_Sign)
83
84/*
85 * Single-precision floating normals.
86 * Fraction - 23 bits, all usable.
87 * Exponent - 8 bits, least significant bit MBZ.
88 */
89#define BS3_FP32_NORMAL_VAL_1(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5fcabd, 0xbc)
90#define BS3_FP32_NORMAL_VAL_2(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x7e117a, 0x7e)
91#define BS3_FP32_NORMAL_VAL_3(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5b5b5b, 0x9a)
92/* The maximum integer value (all 23 + 1 implied bit of the fraction part set) without losing precision. */
93#define BS3_FP32_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_BIAS + BS3_FP32_FRACTION_BITS)
94
95/*
96 * Double-precision (64 bits) floating-point defines.
97 */
98/** The max exponent value for a double-precision floating-point normal. */
99#define BS3_FP64_EXP_NORMAL_MAX 2046
100/** The min exponent value for a double-precision floating-point normal. */
101#define BS3_FP64_EXP_NORMAL_MIN 1
102/** The max fraction value for a double-precision floating-point normal. */
103#define BS3_FP64_FRACTION_NORMAL_MAX 0xfffffffffffff
104/** The min fraction value for a double-precision floating-point normal. */
105#define BS3_FP64_FRACTION_NORMAL_MIN 0
106/** The exponent bias for the double-precision floating-point format. */
107#define BS3_FP64_EXP_BIAS RTFLOAT64U_EXP_BIAS
108/** Fraction width (in bits) for the double-precision floating-point format. */
109#define BS3_FP64_FRACTION_BITS RTFLOAT64U_FRACTION_BITS
110/** The max fraction value for a double-precision floating-point denormal. */
111#define BS3_FP64_FRACTION_DENORMAL_MAX 0xfffffffffffff
112/** The min fraction value for a double-precision floating-point denormal. */
113#define BS3_FP64_FRACTION_DENORMAL_MIN 1
114
115#define BS3_FP64_NORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_NORMAL_MAX)
116#define BS3_FP64_NORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_NORMAL_MIN)
117#define BS3_FP64_ZERO(a_Sign) RTFLOAT64U_INIT_ZERO(a_Sign)
118#define BS3_FP64_ONE(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0, RTFLOAT64U_EXP_BIAS)
119#define BS3_FP64_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT64U_INIT_C(a_Sign, a_Frac, a_Exp)
120#define BS3_FP64_INF(a_Sign) RTFLOAT64U_INIT_INF(a_Sign)
121#define BS3_FP64_QNAN(a_Sign) RTFLOAT64U_INIT_QNAN(a_Sign)
122#define BS3_FP64_QNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_QNAN_EX(a_Sign, a_Val)
123#define BS3_FP64_SNAN(a_Sign) RTFLOAT64U_INIT_SNAN(a_Sign)
124#define BS3_FP64_SNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_SNAN_EX(a_Sign, a_Val)
125
126/*
127 * Double-precision floating normals.
128 * Fraction - 52 bits, all usable.
129 * Exponent - 11 bits, least significant bit MBZ.
130 */
131#define BS3_FP64_NORMAL_VAL_1(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xf10a7ab1ec01a, 0x4bc)
132#define BS3_FP64_NORMAL_VAL_2(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xca5cadea1b1ed, 0x3ae)
133#define BS3_FP64_NORMAL_VAL_3(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xb5b5b5b5b5b5b, 0xffe)
134/* The maximum integer value (all 52 + 1 implied bit of the fraction part set) without losing precision. */
135#define BS3_FP64_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS)
136/* The minimum integer value without losing precision. */
137#define BS3_FP64_NORMAL_SAFE_INT_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0, 1)
138/** The maximum denormal value. */
139#define BS3_FP64_DENORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_DENORMAL_MAX, 0)
140/** The maximum denormal value. */
141#define BS3_FP64_DENORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_DENORMAL_MIN, 0)
142
143
144/*********************************************************************************************************************************
145* Structures and Typedefs *
146*********************************************************************************************************************************/
147/** Instruction set type and operand width. */
148typedef enum BS3CPUINSTRX_INSTRTYPE_T
149{
150 T_INVALID,
151 T_MMX,
152 T_MMX_SSE, /**< MMX instruction, but require the SSE CPUID to work. */
153 T_MMX_SSE2, /**< MMX instruction, but require the SSE2 CPUID to work. */
154 T_MMX_SSSE3, /**< MMX instruction, but require the SSSE3 CPUID to work. */
155 T_AXMMX,
156 T_AXMMX_OR_SSE,
157 T_SSE,
158 T_128BITS = T_SSE,
159 T_SSE2,
160 T_SSE3,
161 T_SSSE3,
162 T_SSE4_1,
163 T_SSE4_2,
164 T_SSE4A,
165 T_PCLMUL,
166 T_SHA,
167 T_AVX_128,
168 T_AVX2_128,
169 T_AVX_PCLMUL,
170 T_AVX_256,
171 T_256BITS = T_AVX_256,
172 T_AVX2_256,
173 T_MAX
174} BS3CPUINSTRX_INSTRTYPE_T;
175
176/** Memory or register rm variant. */
177enum {
178 RM_REG = 0,
179 RM_MEM,
180 RM_MEM8, /**< Memory operand is 8 bytes. Hack for movss and similar. */
181 RM_MEM16, /**< Memory operand is 16 bytes. Hack for movss and similar. */
182 RM_MEM32, /**< Memory operand is 32 bytes. Hack for movss and similar. */
183 RM_MEM64 /**< Memory operand is 64 bytes. Hack for movss and similar. */
184};
185
186/**
187 * Execution environment configuration.
188 */
189typedef struct BS3CPUINSTR4_CONFIG_T
190{
191 uint16_t fCr0Mp : 1;
192 uint16_t fCr0Em : 1;
193 uint16_t fCr0Ts : 1;
194 uint16_t fCr4OsFxSR : 1;
195 uint16_t fCr4OsXSave : 1;
196 uint16_t fCr4OsXmmExcpt : 1;
197 uint16_t fXcr0Sse : 1;
198 uint16_t fXcr0Avx : 1;
199 uint16_t fAligned : 1; /**< Aligned mem operands. If 0, they will be misaligned and tests w/o mem operands skipped. */
200 uint16_t fAlignCheck : 1;
201 uint16_t fMxCsrMM : 1; /**< AMD only */
202 uint8_t bXcptSse;
203 uint8_t bXcptAvx;
204} BS3CPUINSTR4_CONFIG_T;
205/** Pointer to an execution environment configuration. */
206typedef BS3CPUINSTR4_CONFIG_T const BS3_FAR *PCBS3CPUINSTR4_CONFIG_T;
207
208/** State saved by bs3CpuInstr4ConfigReconfigure. */
209typedef struct BS3CPUINSTRX_CONFIG_SAVED_T
210{
211 uint32_t uCr0;
212 uint32_t uCr4;
213 uint32_t uEfl;
214 uint16_t uFcw;
215 uint16_t uFsw;
216 uint32_t uMxCsr;
217} BS3CPUINSTRX_CONFIG_SAVED_T;
218typedef BS3CPUINSTRX_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTRX_CONFIG_SAVED_T;
219typedef BS3CPUINSTRX_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTRX_CONFIG_SAVED_T;
220
221/**
222 * YMM packed double-precision floating-point register.
223 * @todo move to x86.h?
224 */
225typedef union X86YMMFLOATPDREG
226{
227 /** Packed double-precision floating-point view. */
228 RTFLOAT64U ar64[4];
229 /** 256-bit integer view. */
230 RTUINT256U ymm;
231} X86YMMFLOATPDREG;
232# ifndef VBOX_FOR_DTRACE_LIB
233AssertCompileSize(X86YMMFLOATPDREG, 32);
234# endif
235/** Pointer to a YMM packed floating-point register. */
236typedef X86YMMFLOATPDREG BS3_FAR *PX86YMMFLOATPDREG;
237/** Pointer to a const YMM packed floating-point register. */
238typedef X86YMMFLOATPDREG const BS3_FAR *PCX86YMMFLOATPDREG;
239
240/**
241 * YMM packed single-precision floating-point register.
242 * @todo move to x86.h?
243 */
244typedef union X86YMMFLOATPSREG
245{
246 /** Packed single-precision floating-point view. */
247 RTFLOAT32U ar32[8];
248 /** 256-bit integer view. */
249 RTUINT256U ymm;
250} X86YMMFLOATPSREG;
251# ifndef VBOX_FOR_DTRACE_LIB
252AssertCompileSize(X86YMMFLOATPSREG, 32);
253# endif
254/** Pointer to a YMM packed single-precision floating-point register. */
255typedef X86YMMFLOATPSREG BS3_FAR *PX86YMMFLOATPSREG;
256/** Pointer to a const YMM single-precision packed floating-point register. */
257typedef X86YMMFLOATPSREG const BS3_FAR *PCX86YMMFLOATPSREG;
258
259/**
260 * YMM scalar quadruple-precision floating-point register.
261 * @todo move to x86.h?
262 */
263typedef union X86YMMFLOATSQREG
264{
265 /** Scalar quadruple-precision floating point view. */
266 RTFLOAT128U ar128[2];
267 /** 256-bit integer view. */
268 RTUINT256U ymm;
269} X86YMMFLOATSQREG;
270# ifndef VBOX_FOR_DTRACE_LIB
271AssertCompileSize(X86YMMFLOATSQREG, 32);
272# endif
273/** Pointer to a YMM scalar quadruple-precision floating-point register. */
274typedef X86YMMFLOATSQREG *PX86YMMFLOATSQREG;
275/** Pointer to a const YMM scalar quadruple-precision floating-point register. */
276typedef X86YMMFLOATSQREG const *PCX86YMMFLOATSQREG;
277
278
279/*********************************************************************************************************************************
280* Global Variables *
281*********************************************************************************************************************************/
282static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false, false };
283static bool g_fAmdMisalignedSse = false;
284static uint8_t g_enmExtCtxMethod = BS3EXTCTXMETHOD_INVALID;
285static bool g_fMxCsrDazSupported = false;
286
287/** Zero value (indexed by fSign). */
288RTFLOAT32U const g_ar32Zero[] = { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) };
289RTFLOAT64U const g_ar64Zero[] = { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) };
290
291/** One value (indexed by fSign). */
292RTFLOAT32U const g_ar32One[] = { RTFLOAT32U_INIT_C(0, 0, RTFLOAT32U_EXP_BIAS),
293 RTFLOAT32U_INIT_C(1, 0, RTFLOAT32U_EXP_BIAS) };
294RTFLOAT64U const g_ar64One[] = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_BIAS),
295 RTFLOAT64U_INIT_C(1, 0, RTFLOAT64U_EXP_BIAS) };
296
297/** Infinity (indexed by fSign). */
298RTFLOAT32U const g_ar32Infinity[] = { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) };
299RTFLOAT64U const g_ar64Infinity[] = { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) };
300
301/** Default QNaNs (indexed by fSign). */
302RTFLOAT32U const g_ar32QNaN[] = { RTFLOAT32U_INIT_QNAN(0), RTFLOAT32U_INIT_QNAN(1) };
303RTFLOAT64U const g_ar64QNaN[] = { RTFLOAT64U_INIT_QNAN(0), RTFLOAT64U_INIT_QNAN(1) };
304
305/** Size of g_pbBuf - at least three pages. */
306static uint32_t g_cbBuf;
307/** Buffer of g_cbBuf size. */
308static uint8_t BS3_FAR *g_pbBuf;
309/** RW alias for the buffer memory at g_pbBuf. Set up by bs3CpuInstrXBufSetup. */
310static uint8_t BS3_FAR *g_pbBufAlias;
311/** RW alias for the memory at g_pbBuf. */
312static uint8_t BS3_FAR *g_pbBufAliasAlloc;
313
314/** Exception type \#1 test configurations, 16 & 32 bytes strictly aligned. */
315static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig1[] =
316{
317/*
318 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
319 * +AVX +AVX +AMD/SSE +AMD/SSE
320 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
321 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
322 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
323 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
324 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
325 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
326 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
327 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
328 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
329 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
330 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
331 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
332 /* Memory misalignment and alignment checks: */
333 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #10 */
334 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #11 */
335 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
336 /* AMD only: */
337 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
338 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
339};
340
341
342
343/**
344 * Returns the name of an X86 exception given the vector.
345 *
346 * @returns Name of the exception.
347 * @param uVector The exception vector.
348 */
349static const char BS3_FAR *bs3CpuInstr4XcptName(uint8_t uVector)
350{
351 switch (uVector)
352 {
353 case X86_XCPT_DE: return "#DE";
354 case X86_XCPT_DB: return "#DB";
355 case X86_XCPT_NMI: return "#NMI";
356 case X86_XCPT_BP: return "#BP";
357 case X86_XCPT_OF: return "#OF";
358 case X86_XCPT_BR: return "#BR";
359 case X86_XCPT_UD: return "#UD";
360 case X86_XCPT_NM: return "#NM";
361 case X86_XCPT_DF: return "#DF";
362 case X86_XCPT_CO_SEG_OVERRUN: return "#CO_SEG_OVERRUN";
363 case X86_XCPT_TS: return "#TS";
364 case X86_XCPT_NP: return "#NP";
365 case X86_XCPT_SS: return "#SS";
366 case X86_XCPT_GP: return "#GP";
367 case X86_XCPT_PF: return "#PF";
368 case X86_XCPT_MF: return "#MF";
369 case X86_XCPT_AC: return "#AC";
370 case X86_XCPT_MC: return "#MC";
371 case X86_XCPT_XF: return "#XF";
372 case X86_XCPT_VE: return "#VE";
373 case X86_XCPT_CP: return "#CP";
374 case X86_XCPT_VC: return "#VC";
375 case X86_XCPT_SX: return "#SX";
376 }
377 return "UNKNOWN";
378}
379
380
381/**
382 * Gets the names of floating-point exception flags that are set for a given MXCSR.
383 *
384 * @returns Names of floating-point exception flags that are set.
385 * @param pszBuf Where to store the floating-point exception flags.
386 * @param cchBuf The size of the buffer.
387 * @param fMxCsr The MXCSR value.
388 */
389static size_t bs3CpuInstr4GetXcptFlags(char BS3_FAR *pszBuf, size_t cchBuf, uint32_t fMxCsr)
390{
391 BS3_ASSERT(cchBuf >= BS3_FP_XCPT_NAMES_MAXLEN);
392 if (!(fMxCsr & X86_MXCSR_XCPT_FLAGS))
393 return Bs3StrPrintf(pszBuf, cchBuf, " None");
394 return Bs3StrPrintf(pszBuf, cchBuf, "%s%s%s%s%s%s", fMxCsr & X86_MXCSR_IE ? " IE" : "", fMxCsr & X86_MXCSR_DE ? " DE" : "",
395 fMxCsr & X86_MXCSR_ZE ? " ZE" : "", fMxCsr & X86_MXCSR_OE ? " OE" : "",
396 fMxCsr & X86_MXCSR_UE ? " UE" : "", fMxCsr & X86_MXCSR_PE ? " PE" : "");
397}
398
399
400/**
401 * Reconfigures the execution environment according to @a pConfig.
402 *
403 * Call bs3CpuInstrXConfigRestore to undo the changes.
404 *
405 * @returns true on success, false if the configuration cannot be applied. In
406 * the latter case, no context changes are made.
407 * @param pSavedCfg Where to save state we modify.
408 * @param pCtx The register context to modify.
409 * @param pExtCtx The extended register context to modify.
410 * @param pConfig The configuration to apply.
411 * @param bMode The target mode.
412 */
413static bool bs3CpuInstr4ConfigReconfigure(PBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx,
414 PCBS3CPUINSTR4_CONFIG_T pConfig, uint8_t bMode)
415{
416 /*
417 * Save context bits we may change here
418 */
419 pSavedCfg->uCr0 = pCtx->cr0.u32;
420 pSavedCfg->uCr4 = pCtx->cr4.u32;
421 pSavedCfg->uEfl = pCtx->rflags.u32;
422 pSavedCfg->uFcw = Bs3ExtCtxGetFcw(pExtCtx);
423 pSavedCfg->uFsw = Bs3ExtCtxGetFsw(pExtCtx);
424 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx);
425
426 /*
427 * Can we make these changes?
428 */
429 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse)
430 return false;
431
432 /*
433 * Modify the test context.
434 */
435 if (pConfig->fCr0Mp)
436 pCtx->cr0.u32 |= X86_CR0_MP;
437 else
438 pCtx->cr0.u32 &= ~X86_CR0_MP;
439 if (pConfig->fCr0Em)
440 pCtx->cr0.u32 |= X86_CR0_EM;
441 else
442 pCtx->cr0.u32 &= ~X86_CR0_EM;
443 if (pConfig->fCr0Ts)
444 pCtx->cr0.u32 |= X86_CR0_TS;
445 else
446 pCtx->cr0.u32 &= ~X86_CR0_TS;
447
448 if (pConfig->fCr4OsFxSR)
449 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
450 else
451 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
452
453 if (pConfig->fCr4OsXmmExcpt && g_afTypeSupports[T_SSE])
454 pCtx->cr4.u32 |= X86_CR4_OSXMMEEXCPT;
455 else
456 pCtx->cr4.u32 &= ~X86_CR4_OSXMMEEXCPT;
457
458 if (pConfig->fCr4OsFxSR)
459 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
460 else
461 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
462
463 if (pConfig->fCr4OsXSave)
464 pCtx->cr4.u32 |= X86_CR4_OSXSAVE;
465 else
466 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE;
467
468 if (pConfig->fXcr0Sse)
469 pExtCtx->fXcr0Saved |= XSAVE_C_SSE;
470 else
471 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE;
472 if (pConfig->fXcr0Avx && g_afTypeSupports[T_AVX_256])
473 pExtCtx->fXcr0Saved |= XSAVE_C_YMM;
474 else
475 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM;
476
477 if (pConfig->fAlignCheck)
478 {
479 pCtx->rflags.u32 |= X86_EFL_AC;
480 pCtx->cr0.u32 |= X86_CR0_AM;
481 }
482 else
483 {
484 pCtx->rflags.u32 &= ~X86_EFL_AC;
485 pCtx->cr0.u32 &= ~X86_CR0_AM;
486 }
487
488 /** @todo Can we remove this? x87 FPU and SIMD are independent. */
489 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw & ~(X86_FSW_ES | X86_FSW_B));
490
491 if (pConfig->fMxCsrMM)
492 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM);
493 else
494 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM);
495 return true;
496}
497
498
499/**
500 * Undoes changes made by bs3CpuInstr4ConfigReconfigure.
501 */
502static void bs3CpuInstrXConfigRestore(PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx)
503{
504 pCtx->cr0.u32 = pSavedCfg->uCr0;
505 pCtx->cr4.u32 = pSavedCfg->uCr4;
506 pCtx->rflags.u32 = pSavedCfg->uEfl;
507 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal;
508 Bs3ExtCtxSetFcw(pExtCtx, pSavedCfg->uFcw);
509 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw);
510 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr);
511}
512
513
514/**
515 * Allocates three extended CPU contexts and initializes the first one
516 * with random data.
517 * @returns First extended context, initialized with randomish data. NULL on
518 * failure (complained).
519 * @param ppExtCtx2 Where to return the 2nd context.
520 */
521static PBS3EXTCTX bs3CpuInstrXAllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2)
522{
523 /* Allocate extended context structures. */
524 uint64_t fFlags;
525 uint16_t cb = Bs3ExtCtxGetSize(&fFlags);
526 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 2);
527 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb);
528 if (pExtCtx1)
529 {
530 Bs3ExtCtxInit(pExtCtx1, cb, fFlags);
531 /** @todo populate with semi-random stuff. */
532
533 Bs3ExtCtxInit(pExtCtx2, cb, fFlags);
534 *ppExtCtx2 = pExtCtx2;
535 return pExtCtx1;
536 }
537 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2);
538 *ppExtCtx2 = NULL;
539 return NULL;
540}
541
542
543/**
544 * Frees the extended CPU contexts allocated by bs3CpuInstrXAllocExtCtxs.
545 *
546 * @param pExtCtx1 The first extended context.
547 * @param pExtCtx2 The second extended context.
548 */
549static void bs3CpuInstrXFreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2)
550{
551 RT_NOREF_PV(pExtCtx2);
552 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2);
553}
554
555
556/**
557 * Sets up SSE and AVX bits relevant for FPU instructions.
558 */
559static void bs3CpuInstr4SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx)
560{
561 /* CR0: */
562 uint32_t cr0 = Bs3RegGetCr0();
563 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
564 cr0 |= X86_CR0_NE;
565 Bs3RegSetCr0(cr0);
566
567 /* If real mode context, the cr0 value will differ from the current one (we're in PE32 mode). */
568 pCtx->cr0.u32 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
569 pCtx->cr0.u32 |= X86_CR0_NE;
570
571 /* CR4: */
572 BS3_ASSERT( pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
573 || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE);
574 {
575 uint32_t cr4 = Bs3RegGetCr4();
576 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
577 {
578 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
579 Bs3RegSetCr4(cr4);
580 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal);
581 }
582 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
583 {
584 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
585 Bs3RegSetCr4(cr4);
586 }
587 pCtx->cr4.u32 = cr4;
588 }
589}
590
591
592/**
593 * Configures the buffer with electric fences in paged modes.
594 *
595 * @returns Adjusted buffer pointer.
596 * @param pbBuf The buffer pointer.
597 * @param pcbBuf Pointer to the buffer size (input & output).
598 * @param bMode The testing target mode.
599 */
600DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufSetup(uint8_t BS3_FAR *pbBuf, uint32_t *pcbBuf, uint8_t bMode)
601{
602 if (BS3_MODE_IS_PAGED(bMode))
603 {
604 int rc;
605 uint32_t cbBuf = *pcbBuf;
606 Bs3PagingProtectPtr(&pbBuf[0], X86_PAGE_SIZE, 0, X86_PTE_P);
607 Bs3PagingProtectPtr(&pbBuf[cbBuf - X86_PAGE_SIZE], X86_PAGE_SIZE, 0, X86_PTE_P);
608 pbBuf += X86_PAGE_SIZE;
609 cbBuf -= X86_PAGE_SIZE * 2;
610 *pcbBuf = cbBuf;
611
612 g_pbBufAlias = g_pbBufAliasAlloc;
613 rc = Bs3PagingAlias((uintptr_t)g_pbBufAlias, (uintptr_t)pbBuf, cbBuf + X86_PAGE_SIZE, /* must include the tail guard pg */
614 X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW);
615 if (RT_FAILURE(rc))
616 Bs3TestFailedF("Bs3PagingAlias failed on %p/%p LB %#x: %d", g_pbBufAlias, pbBuf, cbBuf, rc);
617 }
618 else
619 g_pbBufAlias = pbBuf;
620 return pbBuf;
621}
622
623
624/**
625 * Undoes what bs3CpuInstrXBufSetup did.
626 *
627 * @param pbBuf The buffer pointer.
628 * @param cbBuf The buffer size.
629 * @param bMode The testing target mode.
630 */
631DECLINLINE(void) bs3CpuInstrXBufCleanup(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t bMode)
632{
633 if (BS3_MODE_IS_PAGED(bMode))
634 {
635 Bs3PagingProtectPtr(&pbBuf[-X86_PAGE_SIZE], X86_PAGE_SIZE, X86_PTE_P, 0);
636 Bs3PagingProtectPtr(&pbBuf[cbBuf], X86_PAGE_SIZE, X86_PTE_P, 0);
637 }
638}
639
640
641/**
642 * Gets a buffer of a @a cbMemOp sized operand according to the given
643 * configuration and alignment restrictions.
644 *
645 * @returns Pointer to the buffer.
646 * @param pbBuf The buffer pointer.
647 * @param cbBuf The buffer size.
648 * @param cbMemOp The operand size.
649 * @param cbAlign The operand alignment restriction.
650 * @param pConfig The configuration.
651 * @param fPageFault The \#PF test setting.
652 */
653DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufForOperand(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t cbMemOp, uint8_t cbAlign,
654 PCBS3CPUINSTR4_CONFIG_T pConfig, unsigned fPageFault)
655{
656 /* All allocations are at the tail end of the buffer, so that we've got a
657 guard page following the operand. When asked to consistenly trigger
658 a #PF, we slide the buffer into that guard page. */
659 if (fPageFault)
660 cbBuf += X86_PAGE_SIZE;
661
662 if (pConfig->fAligned)
663 {
664 if (!pConfig->fAlignCheck)
665 return &pbBuf[cbBuf - cbMemOp];
666 return &pbBuf[cbBuf - cbMemOp - cbAlign];
667 }
668 return &pbBuf[cbBuf - cbMemOp - 1];
669}
670
671
672/**
673 * Determines the size of memory operands.
674 */
675DECLINLINE(uint8_t) bs3CpuInstrXMemOpSize(uint8_t cbOperand, uint8_t enmRm)
676{
677 if (enmRm <= RM_MEM)
678 return cbOperand;
679 if (enmRm == RM_MEM8)
680 return sizeof(uint8_t);
681 if (enmRm == RM_MEM16)
682 return sizeof(uint16_t);
683 if (enmRm == RM_MEM32)
684 return sizeof(uint32_t);
685 if (enmRm == RM_MEM64)
686 return sizeof(uint64_t);
687 BS3_ASSERT(0);
688 return cbOperand;
689}
690
691
692/*
693 * Code to make testing the tests faster. `bs3CpuInstrX_SkipIt()' randomly
694 * skips a large fraction of the micro-tests. It is sufficiently random
695 * that over a large number of runs, all micro-tests will be hit.
696 *
697 * This improves the runtime of the worst case (`#define ALL_TESTS' on a
698 * debug build, run with '--execute-all-in-iem') from ~9000 to ~800 seconds
699 * (on an Intel Core i7-10700, fwiw).
700 *
701 * To activate this 'developer's speed-testing mode', turn on
702 * `#define BS3_SKIPIT_DO_SKIP' here.
703 *
704 * BS3_SKIPIT_AVG_SKIP governs approximately how many micro-tests are
705 * skipped in a row; e.g. the default of 26 means about every 27th
706 * micro-test is run during a particular test run. (This is not 27x
707 * faster due to other activities which are not skipped!) Note this is
708 * only an average; the actual skips are random.
709 *
710 * You can also modify bs3CpuInstrX_SkipIt() to focus on specific sub-tests,
711 * using its (currently ignored) `bRing, iCfg, iTest, iVal, iVariant' args
712 * (to enable this: turn on `#define BS3_SKIPIT_DO_ARGS': which costs about
713 * 3% performance).
714 *
715 * Note! The skipping is not compatible with testing the native recompiler as
716 * it requires the test code to be run a number of times before it kicks
717 * in and does the native recompilation (currently around 16 times).
718 */
719#define BS3_SKIPIT_AVG_SKIP 26
720#define BS3_SKIPIT_REPORT_COUNT 150000
721#undef BS3_SKIPIT_DO_SKIP
722#undef BS3_SKIPIT_DO_ARGS
723
724#ifndef BS3_SKIPIT_DO_SKIP
725# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) (false)
726#else
727# include <iprt/asm-amd64-x86.h>
728# include <iprt/asm-math.h>
729
730DECLINLINE(uint32_t) bs3CpuInstrX_SimpleRand(void)
731{
732 /*
733 * A simple Lehmer linear congruential pseudo-random number
734 * generator using the constants suggested by Park & Miller:
735 *
736 * modulus = 2^31 - 1 (INT32_MAX)
737 * multiplier = 7^5 (16807)
738 *
739 * It produces numbers in the range [1..INT32_MAX-1] and is
740 * more chaotic in the higher bits.
741 *
742 * Note! Runtime/common/rand/randparkmiller.cpp is also use this algorithm,
743 * though the zero handling is different.
744 */
745 static uint32_t s_uSeedMemory = 0;
746 uint32_t uVal = s_uSeedMemory;
747 if (!uVal)
748 uVal = (uint32_t)ASMReadTSC();
749 uVal = ASMModU64ByU32RetU32(ASMMult2xU32RetU64(uVal, 16807), INT32_MAX);
750 s_uSeedMemory = uVal;
751 return uVal;
752}
753
754static unsigned g_cSeen, g_cSkipped;
755
756static void bs3CpuInstrX_ShowTallies(void)
757{
758 Bs3TestPrintf("Micro-tests %d: tested %d / skipped %d\n", g_cSeen, g_cSeen - g_cSkipped, g_cSkipped);
759}
760
761# ifdef BS3_SKIPIT_DO_ARGS
762# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt(bRing, iCfg, iTest, iVal, iVariant)
763static bool bs3CpuInstrX_SkipIt(uint8_t bRing, unsigned iCfg, unsigned iTest, unsigned iVal, unsigned iVariant)
764# else
765# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt()
766static bool bs3CpuInstrX_SkipIt(void)
767# endif
768{
769 static unsigned s_uTimes = 0;
770 bool fSkip;
771
772 /* Cache calls to the relatively expensive random routine */
773 if (!s_uTimes)
774 s_uTimes = bs3CpuInstrX_SimpleRand() % (BS3_SKIPIT_AVG_SKIP * 2 + 1) + 1;
775 fSkip = --s_uTimes > 0;
776 if (fSkip)
777 ++g_cSkipped;
778
779 if (++g_cSeen % BS3_SKIPIT_REPORT_COUNT == 0)
780 bs3CpuInstrX_ShowTallies();
781 return fSkip;
782}
783
784#endif /* BS3_SKIPIT_DO_SKIP */
785
786/*
787 * Test type #1.
788 * Generic YMM registers.
789 */
790typedef struct BS3CPUINSTR4_TEST1_VALUES_T
791{
792 X86YMMREG uSrc2; /**< Second source operand. */
793 X86YMMREG uSrc1; /**< uDstIn for SSE */
794 X86YMMREG uDstOut; /**< Destination output. */
795 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
796 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
797 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
798 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
799 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
800} BS3CPUINSTR4_TEST1_VALUES_T;
801
802/*
803 * Test type #1.
804 * Packed single-precision.
805 */
806typedef struct BS3CPUINSTR4_TEST1_VALUES_PS_T
807{
808 X86YMMFLOATPSREG uSrc2; /**< Second source operand. */
809 X86YMMFLOATPSREG uSrc1; /**< uDstIn for SSE */
810 X86YMMFLOATPSREG uDstOut; /**< Destination output. */
811 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
812 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
813 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
814 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
815 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
816} BS3CPUINSTR4_TEST1_VALUES_PS_T;
817AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PS_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
818AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
819AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
820AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
821AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
822AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
823AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
824AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
825AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
826
827/*
828 * Test type #1.
829 * Packed double-precision.
830 */
831typedef struct BS3CPUINSTR4_TEST1_VALUES_PD_T
832{
833 X86YMMFLOATPDREG uSrc2; /**< Second source operand. */
834 X86YMMFLOATPDREG uSrc1; /**< uDstIn for SSE */
835 X86YMMFLOATPDREG uDstOut; /**< Destination output. */
836 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
837 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
838 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
839 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
840 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
841} BS3CPUINSTR4_TEST1_VALUES_PD_T;
842AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PD_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
843AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
844AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
845AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
846AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
847AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
848AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
849AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
850AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
851
852/*
853 * Test type #1.
854 * Scalar quadruple-precision.
855 */
856typedef struct BS3CPUINSTR4_TEST1_VALUES_SQ_T
857{
858 X86YMMFLOATSQREG uSrc2; /**< Second source operand. */
859 X86YMMFLOATSQREG uSrc1; /**< uDstIn for SSE */
860 X86YMMFLOATSQREG uDstOut; /**< Destination output. */
861 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
862 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
863 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
864 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
865 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
866} BS3CPUINSTR4_TEST1_VALUES_SQ_T;
867AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_SQ_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
868AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
869AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
870AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
871AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
872AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
873AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
874AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
875AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
876
877typedef struct BS3CPUINSTR4_TEST1_T
878{
879 FPFNBS3FAR pfnWorker; /**< Test function worker. */
880 uint8_t bAvxMisalignXcpt; /**< AVX misalignment exception. */
881 uint8_t enmRm; /**< R/M type. */
882 uint8_t enmType; /**< CPU instruction type (see T_XXX). */
883 uint8_t iRegDst; /**< Index of destination register, UINT8_MAX if N/A. */
884 uint8_t iRegSrc1; /**< Index of first source register, UINT8_MAX if N/A. */
885 uint8_t iRegSrc2; /**< Index of second source register, UINT8_MAX if N/A. */
886 uint8_t cValues; /**< Number of test values in @c paValues. */
887 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *paValues; /**< Test values. */
888} BS3CPUINSTR4_TEST1_T;
889
890typedef struct BS3CPUINSTR4_TEST1_MODE_T
891{
892 BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests;
893 unsigned cTests;
894} BS3CPUINSTR4_TEST1_MODE_T;
895
896/** Initializer for a BS3CPUINSTR4_TEST1_MODE_T array (three entries). */
897#define BS3CPUINSTR4_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \
898 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } }
899
900typedef struct BS3CPUINSTR4_TEST1_CTX_T
901{
902 BS3CPUINSTR4_CONFIG_T const BS3_FAR *pConfig; /**< The test execution environment configuration. */
903 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest; /**< The instruction being tested. */
904 unsigned iVal; /**< Which iteration of the test value is this. */
905 const char BS3_FAR *pszMode; /**< The testing mode (e.g. real, protected, paged and permutations). */
906 PBS3TRAPFRAME pTrapFrame; /**< The exception (trap) frame. */
907 PBS3REGCTX pCtx; /**< The general-purpose register context. */
908 PBS3EXTCTX pExtCtx; /**< The extended (FPU) register context. */
909 PBS3EXTCTX pExtCtxOut; /**< The output extended (FPU) register context. */
910 uint8_t BS3_FAR *puMemOp; /**< The memory operand buffer. */
911 uint8_t BS3_FAR *puMemOpAlias; /**< The memory operand alias buffer for comparing result. */
912 uint8_t cbMemOp; /**< Size of the memory operand (and alias) buffer in bytes. */
913 uint8_t cbOperand; /**< Size of the instruction operand (8 for MMX, 16 for SSE etc). */
914 uint8_t cbInstr; /**< Size of the instruction opcode. */
915 uint8_t bXcptExpect; /**< The expected exception while/after executing the instruction. */
916 bool fSseInstr; /**< Whether this is an SSE instruction. */
917 bool fAvxInstr; /**< Whether this is an AVX instruction. */
918 uint16_t idTestStep; /**< The test iteration step. */
919} BS3CPUINSTR4_TEST1_CTX_T;
920/** Pointer to a test 1 context. */
921typedef BS3CPUINSTR4_TEST1_CTX_T BS3_FAR *PBS3CPUINSTR4_TEST1_CTX_T;
922
923
924/**
925 * Worker for bs3CpuInstrX_WorkerTestType1.
926 */
927static uint16_t bs3CpuInstr4_WorkerTestType1_Inner(uint8_t bMode, PBS3CPUINSTR4_TEST1_CTX_T pTestCtx,
928 PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg)
929{
930 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = pTestCtx->pTest;
931 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *pValues = &pTestCtx->pTest->paValues[pTestCtx->iVal];
932 PBS3TRAPFRAME pTrapFrame = pTestCtx->pTrapFrame;
933 PBS3REGCTX pCtx = pTestCtx->pCtx;
934 PBS3EXTCTX pExtCtx = pTestCtx->pExtCtx;
935 PBS3EXTCTX pExtCtxOut = pTestCtx->pExtCtxOut;
936 uint8_t BS3_FAR *puMemOp = pTestCtx->puMemOp;
937 uint8_t BS3_FAR *puMemOpAlias = pTestCtx->puMemOpAlias;
938 uint8_t cbMemOp = pTestCtx->cbMemOp;
939 uint8_t const cbOperand = pTestCtx->cbOperand;
940 uint8_t const cbInstr = ((uint8_t const BS3_FAR *)(uintptr_t)pTestCtx->pTest->pfnWorker)[-1];
941 uint8_t bXcptExpect = pTestCtx->bXcptExpect;
942 uint8_t const bFpXcpt = pTestCtx->pConfig->fCr4OsXmmExcpt ? X86_XCPT_XF : X86_XCPT_UD;
943 bool const fFpFlagsExpect = RT_BOOL( (pValues->fExpectedMxCsrFlags
944 & (~pValues->fMxCsrMask >> X86_MXCSR_XCPT_MASK_SHIFT)) & X86_MXCSR_XCPT_FLAGS);
945 uint32_t uMxCsr;
946 X86YMMREG MemOpExpect;
947 uint16_t cErrors;
948
949 /*
950 * Set up the context and some expectations.
951 */
952 /* Destination. */
953 Bs3MemZero(&MemOpExpect, sizeof(MemOpExpect));
954 if (pTest->iRegDst == UINT8_MAX)
955 {
956 BS3_ASSERT(pTest->enmRm >= RM_MEM);
957 Bs3MemSet(puMemOpAlias, 0xcc, cbMemOp);
958 if (bXcptExpect == X86_XCPT_DB)
959 MemOpExpect.ymm = pValues->uDstOut.ymm;
960 else
961 Bs3MemSet(&MemOpExpect, 0xcc, sizeof(MemOpExpect));
962 }
963
964 /* Source #1 (/ destination for SSE). */
965 if (pTest->iRegSrc1 == UINT8_MAX)
966 {
967 BS3_ASSERT(pTest->enmRm >= RM_MEM);
968 Bs3MemCpy(puMemOpAlias, &pValues->uSrc1, cbMemOp);
969 if (pTest->iRegDst == UINT8_MAX)
970 BS3_ASSERT(pTestCtx->fSseInstr);
971 else
972 MemOpExpect.ymm = pValues->uSrc1.ymm;
973 }
974 else if (pTestCtx->fSseInstr)
975 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm.DQWords.dqw0);
976 else
977 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm, 32);
978
979 /* Source #2. */
980 if (pTest->iRegSrc2 == UINT8_MAX)
981 {
982 BS3_ASSERT(pTest->enmRm >= RM_MEM);
983 BS3_ASSERT(pTest->iRegDst != UINT8_MAX && pTest->iRegSrc1 != UINT8_MAX);
984 Bs3MemCpy(puMemOpAlias, &pValues->uSrc2, cbMemOp);
985 MemOpExpect.ymm = pValues->uSrc2.ymm;
986 }
987 else if (pTestCtx->fSseInstr)
988 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm.DQWords.dqw0);
989 else
990 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm, 32);
991
992 /* Memory pointer. */
993 if (pTest->enmRm >= RM_MEM)
994 {
995 BS3_ASSERT( pTest->iRegDst == UINT8_MAX
996 || pTest->iRegSrc1 == UINT8_MAX
997 || pTest->iRegSrc2 == UINT8_MAX);
998 Bs3RegCtxSetGrpSegFromCurPtr(pCtx, &pCtx->rbx, &pCtx->fs, puMemOp);
999 }
1000
1001 /* Setup MXCSR for the current test. */
1002 uMxCsr = (pSavedCfg->uMxCsr & ~(X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK))
1003 | (pValues->fMxCsrMask & X86_MXCSR_XCPT_MASK)
1004 | (pValues->fRoundingCtlMask & X86_MXCSR_RC_MASK);
1005 if ( pValues->fDenormalsAreZero
1006 && g_fMxCsrDazSupported)
1007 uMxCsr |= X86_MXCSR_DAZ;
1008 if (pValues->fFlushToZero)
1009 uMxCsr |= X86_MXCSR_FZ;
1010 Bs3ExtCtxSetMxCsr(pExtCtx, uMxCsr);
1011
1012 /*
1013 * Prepare globals and execute.
1014 */
1015 g_uBs3TrapEipHint = pCtx->rip.u32;
1016 if ( bXcptExpect == X86_XCPT_DB
1017 && !fFpFlagsExpect)
1018 g_uBs3TrapEipHint += cbInstr + 1;
1019 Bs3TrapSetJmpAndRestoreWithExtCtxAndRm(pCtx, pExtCtx, pTrapFrame, pExtCtxOut);
1020
1021 /*
1022 * Check the result.
1023 *
1024 * If a floating-point exception is expected, the destination is not updated by the instruction.
1025 * In the case of SSE instructions, updating the destination here will work because it is the same
1026 * as the source, but for AVX++ it won't because the destination is different and would contain 0s.
1027 */
1028 cErrors = Bs3TestSubErrorCount();
1029 if ( bXcptExpect == X86_XCPT_DB
1030 && !fFpFlagsExpect
1031 && pTest->iRegDst != UINT8_MAX)
1032 {
1033 if (pTestCtx->fSseInstr)
1034 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm.DQWords.dqw0);
1035 else
1036 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm, cbOperand);
1037 }
1038#if defined(DEBUG_aeichner) /** @todo Necessary kludge on a i7-1068NG7. */
1039 if ( pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE
1040 && pExtCtx->Ctx.x.Hdr.bmXState == 0x7
1041 && pExtCtxOut->Ctx.x.Hdr.bmXState == 0x3)
1042 pExtCtxOut->Ctx.x.Hdr.bmXState = 0x7;
1043#endif
1044 if (bXcptExpect == X86_XCPT_DB)
1045 Bs3ExtCtxSetMxCsr(pExtCtx, (uMxCsr & ~X86_MXCSR_XCPT_FLAGS)
1046 | (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS));
1047 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pTestCtx->pszMode, pTestCtx->idTestStep);
1048
1049 if (bXcptExpect == X86_XCPT_DB)
1050 {
1051 uint32_t const fMxCsrXcptFlags = Bs3ExtCtxGetMxCsr(pExtCtxOut) & X86_MXCSR_XCPT_FLAGS;
1052
1053 /* Check if the SIMD FP exception flags (or lack of) are as expected. */
1054 if (fMxCsrXcptFlags != (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS))
1055 {
1056 char szGotBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1057 char szExpectBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1058 bs3CpuInstr4GetXcptFlags(&szExpectBuf[0], sizeof(szExpectBuf), pValues->fExpectedMxCsrFlags);
1059 bs3CpuInstr4GetXcptFlags(&szGotBuf[0], sizeof(szGotBuf), fMxCsrXcptFlags);
1060 Bs3TestFailedF("Expected floating-point xcpt flags%s, got%s", szExpectBuf, szGotBuf);
1061 }
1062
1063 /* Check if the SIMD FP exception (or lack of) is as expected. */
1064 if (fFpFlagsExpect)
1065 {
1066 if (pTrapFrame->bXcpt == bFpXcpt)
1067 { /* likely */ }
1068 else
1069 Bs3TestFailedF("Expected floating-point xcpt %s, got %s", bs3CpuInstr4XcptName(bFpXcpt),
1070 bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1071 }
1072 else if (pTrapFrame->bXcpt == X86_XCPT_DB)
1073 { /* likely */ }
1074 else
1075 Bs3TestFailedF("Expected no xcpt, got %s", bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1076 }
1077 /* Check if non-FP exception is as expected. */
1078 else if (pTrapFrame->bXcpt != bXcptExpect)
1079 Bs3TestFailedF("Expected xcpt %s, got %s", bs3CpuInstr4XcptName(bXcptExpect), bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1080
1081 /* Kludge! Looks like EFLAGS.AC is cleared when raising #GP in real mode on the 10980XE. WEIRD! */
1082 if (bMode == BS3_MODE_RM && (pCtx->rflags.u32 & X86_EFL_AC))
1083 {
1084 if (pTrapFrame->Ctx.rflags.u32 & X86_EFL_AC)
1085 Bs3TestFailedF("Expected EFLAGS.AC to be cleared (bXcpt=%d)", pTrapFrame->bXcpt);
1086 pTrapFrame->Ctx.rflags.u32 |= X86_EFL_AC;
1087 }
1088 if (bXcptExpect == X86_XCPT_PF)
1089 pCtx->cr2.u = (uintptr_t)puMemOp;
1090 Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pCtx, bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect ? cbInstr + 1 : 0, 0 /*cbSpAdjust*/,
1091 (bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect) || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF,
1092 pTestCtx->pszMode, pTestCtx->idTestStep);
1093 pCtx->cr2.u = 0;
1094
1095 if ( pTest->enmRm >= RM_MEM
1096 && Bs3MemCmp(puMemOpAlias, &MemOpExpect, cbMemOp) != 0)
1097 Bs3TestFailedF("Expected uMemOp %.*Rhxs, got %.*Rhxs", cbMemOp, &MemOpExpect, cbMemOp, puMemOpAlias);
1098
1099 return cErrors;
1100}
1101
1102
1103/**
1104 * Test type #1 worker.
1105 */
1106static uint8_t bs3CpuInstrX_WorkerTestType1(uint8_t bMode, BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests, unsigned cTests,
1107 PCBS3CPUINSTR4_CONFIG_T paConfigs, unsigned cConfigs)
1108{
1109 BS3REGCTX Ctx;
1110 BS3TRAPFRAME TrapFrame;
1111 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode);
1112 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0;
1113 uint8_t BS3_FAR *pbBuf = g_pbBuf;
1114 uint32_t cbBuf = g_cbBuf;
1115 PBS3EXTCTX pExtCtxOut;
1116 PBS3EXTCTX pExtCtx = bs3CpuInstrXAllocExtCtxs(&pExtCtxOut);
1117 if (pExtCtx)
1118 { /* likely */ }
1119 else
1120 return 0;
1121 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT)
1122 { /* likely */ }
1123 else
1124 {
1125 Bs3TestPrintf("Skipped due to ancient FPU state format\n");
1126 return 0;
1127 }
1128
1129 /* Ensure the structures are allocated before we sample the stack pointer. */
1130 Bs3MemSet(&Ctx, 0, sizeof(Ctx));
1131 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
1132
1133 /*
1134 * Create test context.
1135 */
1136 pbBuf = bs3CpuInstrXBufSetup(pbBuf, &cbBuf, bMode);
1137 Bs3RegCtxSaveForMode(&Ctx, bMode, 1024);
1138 bs3CpuInstr4SetupSseAndAvx(&Ctx, pExtCtx);
1139
1140 /*
1141 * Run the tests in all rings since alignment issues may behave
1142 * differently in ring-3 compared to ring-0.
1143 */
1144 for (;;)
1145 {
1146 unsigned fPf = 0;
1147 do
1148 {
1149 unsigned iCfg;
1150 for (iCfg = 0; iCfg < cConfigs; iCfg++)
1151 {
1152 unsigned iTest;
1153 BS3CPUINSTRX_CONFIG_SAVED_T SavedCfg;
1154 if (!bs3CpuInstr4ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg], bMode))
1155 continue; /* unsupported config */
1156
1157 /*
1158 * Iterate the tests.
1159 */
1160 for (iTest = 0; iTest < cTests; iTest++)
1161 {
1162 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = &paTests[iTest];
1163 unsigned const cValues = pTest->cValues;
1164 bool const fSseInstr = pTest->enmType >= T_SSE && pTest->enmType < T_AVX_128;
1165 bool const fAvxInstr = pTest->enmType >= T_AVX_128;
1166 uint8_t const cbOperand = pTest->enmType < T_128BITS ? 64/8
1167 : pTest->enmType < T_256BITS ? 128/8 : 256/8;
1168 uint8_t const cbMemOp = bs3CpuInstrXMemOpSize(cbOperand, pTest->enmRm);
1169 uint8_t const cbAlign = cbMemOp;
1170 uint8_t BS3_FAR *puMemOp = bs3CpuInstrXBufForOperand(pbBuf, cbBuf, cbMemOp, cbAlign, &paConfigs[iCfg], fPf);
1171 uint8_t *puMemOpAlias = &g_pbBufAlias[(uintptr_t)puMemOp - (uintptr_t)pbBuf];
1172 uint8_t bXcptExpect = !g_afTypeSupports[pTest->enmType] ? X86_XCPT_UD
1173 : fSseInstr ? paConfigs[iCfg].bXcptSse
1174 : BS3_MODE_IS_RM_OR_V86(bMode) ? X86_XCPT_UD : paConfigs[iCfg].bXcptAvx;
1175 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10;
1176 unsigned cRecompRuns = 0;
1177 unsigned const cMaxRecompRuns = g_cBs3ThresholdNativeRecompiler + cValues;
1178 unsigned iVal;
1179
1180 /* If testing unaligned memory accesses (or #PF), skip register-only tests. This
1181 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */
1182 if ( (pTest->enmRm == RM_REG || pTest->enmRm == RM_MEM8)
1183 && (!paConfigs[iCfg].fAligned || paConfigs[iCfg].fAlignCheck || fPf))
1184 continue;
1185
1186 /* #AC is only raised in ring-3. */
1187 if (bXcptExpect == X86_XCPT_AC)
1188 {
1189 if (bRing != 3)
1190 bXcptExpect = X86_XCPT_DB;
1191 else if (fAvxInstr)
1192 bXcptExpect = pTest->bAvxMisalignXcpt; /* they generally don't raise #AC */
1193 }
1194
1195 if (fPf && bXcptExpect == X86_XCPT_DB)
1196 bXcptExpect = X86_XCPT_PF;
1197
1198 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, pTest->pfnWorker);
1199
1200 /*
1201 * Iterate the test values and do the actual testing.
1202 */
1203 while (cRecompRuns < cMaxRecompRuns)
1204 {
1205 for (iVal = 0; iVal < cValues; iVal++, idTestStep++, cRecompRuns++)
1206 {
1207 uint16_t cErrors;
1208 BS3CPUINSTR4_TEST1_CTX_T TestCtx;
1209 if (BS3_SKIPIT(bRing, iCfg, iTest, iVal, 0))
1210 continue;
1211
1212 /*
1213 * If the hardware does not support DAZ bit and we are testing DE exceptions,
1214 * then skip testing them. We still want to test values that set the MXCSR.DAZ
1215 * if we are not expecting DE exceptions to make sure DAZ bit in and of itself
1216 * is not influencing other cases.
1217 */
1218 if ( !g_fMxCsrDazSupported
1219 && pTest->paValues[iVal].fDenormalsAreZero
1220 && (pTest->paValues[iVal].fExpectedMxCsrFlags & X86_MXCSR_DE))
1221 continue;
1222
1223 /*
1224 * Setup the test instruction context and pass it to the worker.
1225 * A few of these can be figured out by the worker but initializing
1226 * it outside the inner most loop is more optimal.
1227 */
1228 TestCtx.pConfig = &paConfigs[iCfg];
1229 TestCtx.pTest = pTest;
1230 TestCtx.iVal = iVal;
1231 TestCtx.pszMode = pszMode;
1232 TestCtx.pTrapFrame = &TrapFrame;
1233 TestCtx.pCtx = &Ctx;
1234 TestCtx.pExtCtx = pExtCtx;
1235 TestCtx.pExtCtxOut = pExtCtxOut;
1236 TestCtx.puMemOp = (uint8_t *)puMemOp;
1237 TestCtx.puMemOpAlias = puMemOpAlias;
1238 TestCtx.cbMemOp = cbMemOp;
1239 TestCtx.cbOperand = cbOperand;
1240 TestCtx.bXcptExpect = bXcptExpect;
1241 TestCtx.fSseInstr = fSseInstr;
1242 TestCtx.fAvxInstr = fAvxInstr;
1243 TestCtx.idTestStep = idTestStep;
1244 cErrors = bs3CpuInstr4_WorkerTestType1_Inner(bMode, &TestCtx, &SavedCfg);
1245 if (cErrors != Bs3TestSubErrorCount())
1246 {
1247 if (paConfigs[iCfg].fAligned)
1248 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s)",
1249 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1250 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect));
1251 else
1252 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, puMemOp=%p, EFLAGS=%#RX32, CR0=%#RX32)",
1253 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1254 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), puMemOp,
1255 TrapFrame.Ctx.rflags.u32, TrapFrame.Ctx.cr0);
1256 Bs3TestPrintf("\n");
1257 }
1258 }
1259 }
1260 }
1261 bs3CpuInstrXConfigRestore(&SavedCfg, &Ctx, pExtCtx);
1262 }
1263 } while (fPf++ == 0 && BS3_MODE_IS_PAGED(bMode));
1264
1265 /*
1266 * Next ring.
1267 */
1268 bRing++;
1269 if (bRing > 3 || bMode == BS3_MODE_RM)
1270 break;
1271 Bs3RegCtxConvertToRingX(&Ctx, bRing);
1272 }
1273
1274 /*
1275 * Cleanup.
1276 */
1277 bs3CpuInstrXBufCleanup(pbBuf, cbBuf, bMode);
1278 bs3CpuInstrXFreeExtCtxs(pExtCtx, pExtCtxOut);
1279 return 0;
1280}
1281
1282
1283/*
1284 * [V]ADDPS.
1285 */
1286BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addps(uint8_t bMode)
1287{
1288 static BS3CPUINSTR4_TEST1_VALUES_PS_T const s_aValues[] =
1289 {
1290 /* 0*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1291 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1292 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1293 /*mask */ X86_MXCSR_XCPT_MASK,
1294 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1295 /*flags */ 0 },
1296 /* 1*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1297 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1298 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1299 /*mask */ ~X86_MXCSR_XCPT_MASK,
1300 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1301 /*flags */ 0 },
1302 /* 2*/{ { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1303 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1304 { /* => */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1305 /*mask */ ~X86_MXCSR_IM,
1306 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1307 /*flags */ X86_MXCSR_IE },
1308 /* 3*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1309 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1310 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1311 /*mask */ X86_MXCSR_XCPT_MASK,
1312 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1313 /*flags */ X86_MXCSR_IE },
1314 };
1315
1316 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1317 {
1318 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1319 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1320
1321 { bs3CpuInstrX_vaddps_XMM1_XMM2_XMM3_icebp_c16, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1322 { bs3CpuInstrX_vaddps_XMM1_XMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1323
1324 { bs3CpuInstrX_vaddps_YMM1_YMM2_YMM3_icebp_c16, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1325 { bs3CpuInstrX_vaddps_YMM1_YMM2_FSxBX_icebp_c16, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1326 };
1327 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1328 {
1329 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1330 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1331
1332 { bs3CpuInstrX_vaddps_XMM1_XMM2_XMM3_icebp_c32, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1333 { bs3CpuInstrX_vaddps_XMM1_XMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1334
1335 { bs3CpuInstrX_vaddps_YMM1_YMM2_YMM3_icebp_c32, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1336 { bs3CpuInstrX_vaddps_YMM1_YMM2_FSxBX_icebp_c32, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1337 };
1338 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1339 {
1340 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1341 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1342
1343 { bs3CpuInstrX_vaddps_XMM1_XMM2_XMM3_icebp_c64, 255, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1344 { bs3CpuInstrX_vaddps_XMM1_XMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1345
1346 { bs3CpuInstrX_vaddps_YMM1_YMM2_YMM3_icebp_c64, 255, RM_REG, T_AVX_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1347 { bs3CpuInstrX_vaddps_YMM1_YMM2_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1348
1349 { bs3CpuInstrX_addps_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1350 { bs3CpuInstrX_addps_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1351
1352 { bs3CpuInstrX_vaddps_YMM8_YMM9_YMM10_icebp_c64, 255, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1353 { bs3CpuInstrX_vaddps_YMM8_YMM9_FSxBX_icebp_c64, 255, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1354 };
1355
1356 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1357 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1358 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1359 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1360}
1361
1362
1363/*
1364 * [V]ADDPD.
1365 */
1366BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addpd(uint8_t bMode)
1367{
1368 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
1369 {
1370 /*
1371 * Zero.
1372 */
1373 /* 0*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1374 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1375 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1376 /*mask */ X86_MXCSR_XCPT_MASK,
1377 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1378 /*flags */ 0 },
1379 /* 1*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1380 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1381 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1382 /*mask */ ~X86_MXCSR_XCPT_MASK,
1383 /*daz,fz,rc*/ 0, 1, X86_MXCSR_RC_NEAREST,
1384 /*flags */ 0 },
1385 /* 2*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1386 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1387 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1388 /*mask */ X86_MXCSR_XCPT_MASK,
1389 /*daz,fz,rc*/ 1, 0, X86_MXCSR_RC_DOWN,
1390 /*flags */ 0 },
1391 /* 3*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1392 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1393 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1394 /*mask */ ~X86_MXCSR_XCPT_MASK,
1395 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_UP,
1396 /*flags */ 0 },
1397 /* 4*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1398 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1399 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1400 /*mask */ X86_MXCSR_XCPT_MASK,
1401 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_ZERO,
1402 /*flags */ 0 },
1403 /*
1404 * Infinity.
1405 */
1406 /* 5*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1407 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1408 { /* => */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1409 /*mask */ ~X86_MXCSR_IM,
1410 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1411 /*flags */ X86_MXCSR_IE },
1412 /* 6*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1413 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1414 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1415 /*mask */ ~X86_MXCSR_IM,
1416 /*daz,fz,rc*/ 0, 1, X86_MXCSR_RC_DOWN,
1417 /*flags */ X86_MXCSR_IE },
1418 /* 7*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1419 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1420 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1421 /*mask */ ~X86_MXCSR_IM,
1422 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_UP,
1423 /*flags */ X86_MXCSR_IE },
1424 /* 8*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_INF(1) } },
1425 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_INF(0) } },
1426 { /* => */ { BS3_FP64_QNAN(1), BS3_FP64_QNAN(1), BS3_FP64_ZERO(0), BS3_FP64_QNAN(1) } },
1427 /*mask */ X86_MXCSR_XCPT_MASK,
1428 /*daz,fz,rc*/ 1, 0, X86_MXCSR_RC_ZERO,
1429 /*flags */ X86_MXCSR_IE },
1430 /*
1431 * Overflow, Precision.
1432 */
1433 /* 9*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
1434 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1) } },
1435 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1436 /*mask */ ~X86_MXCSR_XCPT_MASK,
1437 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1438 /*flags */ X86_MXCSR_OE },
1439 /*10*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1440 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1441 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1442 /*mask */ ~X86_MXCSR_XCPT_MASK,
1443 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1444 /*flags */ X86_MXCSR_OE },
1445 /*11*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0) } },
1446 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(0) } },
1447 { /* => */ { BS3_FP64_INF(0), BS3_FP64_VAL(1, 0, 2), BS3_FP64_ZERO(0), BS3_FP64_INF(0), } },
1448 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1449 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_NEAREST,
1450 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE },
1451 /*12*/{ { /*src2 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0) } },
1452 { /*src1 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0) } },
1453 { /* => */ { BS3_FP64_VAL(1, 0, 2), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1454 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1455 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_ZERO,
1456 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE },
1457 /*13*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1458 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1459 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_MAX(0) } },
1460 /*mask */ X86_MXCSR_XCPT_MASK,
1461 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1462 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE },
1463 /*14*/{ { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
1464 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1) } },
1465 { /* => */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(1, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS + 1) } },
1466 /*mask */ ~X86_MXCSR_XCPT_MASK,
1467 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1468 /*flags */ X86_MXCSR_PE },
1469 /*15*/{ { /*src2 */ { BS3_FP64_VAL(0, 0xc000000000000, 0x3ff)/* 1.75*/, BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0, 0x3fd)/*0.25*/ } },
1470 { /*src1 */ { BS3_FP64_VAL(1, 0, 0x07d)/*-0.25*/, BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0, 0x3fe)/*0.50*/ } },
1471 { /* => */ { BS3_FP64_VAL(0, 0xbffffffffffff, 0x3ff)/* 1.50*/, BS3_FP64_ZERO(1), BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0x8000000000000, 0x3fe)/*0.75*/ } },
1472 /*mask */ X86_MXCSR_XCPT_MASK,
1473 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_DOWN,
1474 /*flags */ X86_MXCSR_PE },
1475 /*
1476 * Normals.
1477 */
1478 /*16*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_VAL_1(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1479 { /*src1 */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_VAL_1(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1480 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1481 /*mask */ ~X86_MXCSR_XCPT_MASK,
1482 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1483 /*flags */ 0 },
1484 /*17*/{ { /*src2 */ { BS3_FP64_VAL(0, 0, 0x409)/*1024*/, BS3_FP64_VAL(0, 0xb800000000000, 0x404)/*55*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1485 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x408)/* 512*/, BS3_FP64_VAL(0, 0xc000000000000, 0x401)/* 7*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1486 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x409)/*1536*/, BS3_FP64_VAL(0, 0xf000000000000, 0x404)/*62*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1487 /*mask */ X86_MXCSR_XCPT_MASK,
1488 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1489 /*flags */ 0 },
1490 /*18*/{ { /*src2 */ { BS3_FP64_VAL(0, 0x26580b4800000, 0x41d)/* 1234567890*/, BS3_FP64_VAL(0, 0xd6f3458800000, 0x41c)/*987654321*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf0033a34f337, 0x432)/*4072598000007579.5*/ } },
1491 { /*src1 */ { BS3_FP64_VAL(1, 0x26580b4800000, 0x41d)/*-1234567890*/, BS3_FP64_VAL(1, 0x9000000000000, 0x405)/* -100*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6eca42000000, 0x419)/* 123450000.5*/ } },
1492 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6f3426800000, 0x41c)/*987654221*/, BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xcf00348ec5858, 0x432)/*4072598123457580.0*/ } },
1493 /*mask */ ~X86_MXCSR_XCPT_MASK,
1494 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1495 /*flags */ 0 },
1496 /*19*/{ { /*src2 */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS), BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1497 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1498 { /* => */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1499 /*mask */ X86_MXCSR_XCPT_MASK,
1500 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_ZERO,
1501 /*flags */ 0 },
1502 /*20*/{ { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1503 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1504 { /* => */ { BS3_FP64_VAL(0, 0, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS + 1), BS3_FP64_VAL(1, 0, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS + 1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1505 /*mask */ ~X86_MXCSR_XCPT_MASK,
1506 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1507 /*flags */ 0 },
1508 /*21*/{ { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
1509 { /*src1 */ { BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(0) } },
1510 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1), BS3_FP64_NORMAL_SAFE_INT_MIN(0), BS3_FP64_VAL(0, 0, 2) } },
1511 /*mask */ ~X86_MXCSR_XCPT_MASK,
1512 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_ZERO,
1513 /*flags */ 0 },
1514 /*22*/{ { /*src2 */ { BS3_FP64_VAL(0, 0xc122186c3cfd0, 0x42d)/*123456789876543.25*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
1515 { /*src1 */ { BS3_FP64_VAL(0, 0xb88e0395d49b0, 0x42d)/*121098765432102.75*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_NORMAL_SAFE_INT_MIN(1) } },
1516 { /* => */ { BS3_FP64_VAL(0, 0xbcd80e0108cc0, 0x42e)/*244555555308646 */, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_VAL(1, 0, 2) } },
1517 /*mask */ X86_MXCSR_XCPT_MASK,
1518 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_DOWN,
1519 /*flags */ 0 },
1520 /*
1521 * Denormals.
1522 */
1523 /*23*/{ { /*src2 */ { BS3_FP64_DENORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1524 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1525 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1526 /*mask */ ~X86_MXCSR_XCPT_MASK,
1527 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1528 /*flags */ X86_MXCSR_DE },
1529 /*24*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1530 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1531 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1532 /*mask */ X86_MXCSR_XCPT_MASK,
1533 /*daz,fz,rc*/ 1, 0, X86_MXCSR_RC_NEAREST,
1534 /*flags */ 0 },
1535 /*25*/{ { /*src2 */ { BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MAX(0) } },
1536 { /*src1 */ { BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MIN(0), BS3_FP64_DENORMAL_MAX(0), BS3_FP64_DENORMAL_MIN(0) } },
1537 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_DENORMAL_MIN(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1538 /*mask */ X86_MXCSR_XCPT_MASK,
1539 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_UP,
1540 /*flags */ 0 },
1541 };
1542
1543 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1544 {
1545 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1546 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1547
1548 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1549 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1550
1551 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1552 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1553 };
1554 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1555 {
1556 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1557 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1558
1559 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1560 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1561
1562 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1563 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1564 };
1565 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1566 {
1567 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1568 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1569
1570 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1571 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1572
1573 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1574 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1575
1576 { bs3CpuInstrX_addpd_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE2, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1577 { bs3CpuInstrX_addpd_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1578
1579 { bs3CpuInstrX_vaddpd_YMM8_YMM9_YMM10_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1580 { bs3CpuInstrX_vaddpd_YMM8_YMM9_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1581 };
1582
1583 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1584 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1585 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1586 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1587}
1588
1589
1590/**
1591 * The 32-bit protected mode main function.
1592 *
1593 * The tests a driven by 32-bit test drivers, even for real-mode tests (though
1594 * we'll switch between PE32 and RM for each test step we perform). Given that
1595 * we test SSE and AVX here, we don't need to worry about 286 or 8086.
1596 *
1597 * Some extra steps needs to be taken to properly handle extended state in LM64
1598 * (Bs3ExtCtxRestoreEx & Bs3ExtCtxSaveEx) and when testing real mode
1599 * (Bs3RegCtxSaveForMode & Bs3TrapSetJmpAndRestoreWithExtCtxAndRm).
1600 */
1601BS3_DECL(void) Main_pe32()
1602{
1603 static const BS3TESTMODEBYONEENTRY g_aTests[] =
1604 {
1605#if 1 /*ndef DEBUG_bird*/
1606# define ALL_TESTS
1607#endif
1608#if defined(ALL_TESTS)
1609 { "[v]addps", bs3CpuInstrX_v_addps, 0 },
1610 { "[v]addpd", bs3CpuInstrX_v_addpd, 0 },
1611#endif
1612 };
1613 Bs3TestInit("bs3-cpu-instr-4");
1614
1615 /*
1616 * Initialize globals.
1617 */
1618 if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
1619 {
1620 uint32_t fEbx, fEcx, fEdx;
1621 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1622 g_afTypeSupports[T_MMX] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_MMX);
1623 g_afTypeSupports[T_MMX_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1624 g_afTypeSupports[T_MMX_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1625 g_afTypeSupports[T_MMX_SSSE3] = RT_BOOL(fEdx & X86_CPUID_FEATURE_ECX_SSSE3);
1626 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1627 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1628 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3);
1629 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3);
1630 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1);
1631 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2);
1632 g_afTypeSupports[T_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL);
1633 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1634 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1635 g_afTypeSupports[T_AVX_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL)
1636 && RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1637
1638 if (ASMCpuId_EAX(0) >= 7)
1639 {
1640 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &fEbx, NULL, NULL);
1641 g_afTypeSupports[T_AVX2_128] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1642 g_afTypeSupports[T_AVX2_256] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1643 g_afTypeSupports[T_SHA] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_SHA);
1644 }
1645
1646 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
1647 {
1648 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1649 g_afTypeSupports[T_AXMMX] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
1650 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A);
1651 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);
1652 }
1653 g_afTypeSupports[T_AXMMX_OR_SSE] = g_afTypeSupports[T_AXMMX] || g_afTypeSupports[T_SSE];
1654
1655 /*
1656 * Figure out FPU save/restore method and support for DAZ bit.
1657 */
1658 {
1659 /** @todo Add bs3kit API to just get the ext ctx method without needing to
1660 * alloc/free a context. Replicating the logic in the bs3kit here, though
1661 * doable, runs a risk of not updating this when the other logic is
1662 * changed. */
1663 uint64_t fFlags;
1664 uint16_t const cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
1665 PBS3EXTCTX pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cbExtCtx);
1666 if (pExtCtx)
1667 {
1668 Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
1669 g_enmExtCtxMethod = pExtCtx->enmMethod;
1670 if ( ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_XSAVE
1671 && (pExtCtx->Ctx.x.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1672 || ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_FXSAVE)
1673 && (pExtCtx->Ctx.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1674 g_fMxCsrDazSupported = true;
1675 }
1676 else
1677 Bs3TestFailedF("Failed to allocate %u bytes for extended CPU context (tiled addressable)\n", cbExtCtx);
1678 }
1679
1680 /*
1681 * Allocate a buffer for testing.
1682 */
1683 g_cbBuf = X86_PAGE_SIZE * 4;
1684 g_pbBuf = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_REAL, g_cbBuf);
1685 if (g_pbBuf)
1686 {
1687 g_pbBufAliasAlloc = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_TILED, g_cbBuf);
1688 if (g_pbBufAliasAlloc)
1689 {
1690 /*
1691 * Do the tests.
1692 */
1693 Bs3TestDoModesByOne_pe32(g_aTests, RT_ELEMENTS(g_aTests), BS3TESTMODEBYONEENTRY_F_REAL_MODE_READY);
1694#ifdef BS3_SKIPIT_DO_SKIP
1695 bs3CpuInstrX_ShowTallies();
1696#endif
1697 }
1698 else
1699 Bs3TestFailed("Failed to allocate 16K alias buffer (tiled addressable)");
1700 }
1701 else
1702 Bs3TestFailed("Failed to allocate 16K buffer (real mode addressable)");
1703 }
1704
1705 Bs3TestTerm();
1706}
1707
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette