VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-4.c32@ 104815

Last change on this file since 104815 was 104810, checked in by vboxsync, 9 months ago

ValidationKit/bootsectors: bugref:10658 SIMD FP testcase: Re-enabled the problematic cases that were previously not working.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 80.2 KB
Line 
1/* $Id: bs3-cpu-instr-4.c32 104810 2024-05-29 08:24:15Z vboxsync $ */
2/** @file
3 * BS3Kit - bs3-cpu-instr-4 - SSE, AVX FPU instructions, C code template.
4 */
5
6/*
7 * Copyright (C) 2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <bs3kit.h>
42#include "bs3-cpu-instr-4-asm-auto.h"
43
44#include <iprt/asm.h>
45#include <iprt/asm-amd64-x86.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** Converts an execution mode (BS3_MODE_XXX) into an index into an array
52 * initialized by BS3CPUINSTR4_TEST1_MODES_INIT etc. */
53#define BS3CPUINSTR4_TEST_MODES_INDEX(a_bMode) (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2)
54
55/** Maximum length for the names of all SIMD FP exception flags combined. */
56#define BS3_FP_XCPT_NAMES_MAXLEN sizeof(" IE DE ZE OE UE PE ")
57
58/*
59 * Single-precision (32 bits) floating-point defines.
60 */
61/** The max exponent value for a single-precision floating-point normal. */
62#define BS3_FP32_EXP_NORMAL_MAX 254
63/** The min exponent value for a single-precision floating-point normal. */
64#define BS3_FP32_EXP_NORMAL_MIN 0
65/** The max fraction value for a single-precision floating-point normal. */
66#define BS3_FP32_FRACTION_NORMAL_MAX 0x7fffff
67/** The min fraction value for a single-precision floating-point normal. */
68#define BS3_FP32_FRACTION_NORMAL_MIN 0
69/** The exponent bias for the single-precision floating-point format. */
70#define BS3_FP32_EXP_BIAS RTFLOAT32U_EXP_BIAS
71/** Fraction width (in bits) for the single-precision floating-point format. */
72#define BS3_FP32_FRACTION_BITS RTFLOAT32U_FRACTION_BITS
73
74#define BS3_FP32_NORMAL_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_NORMAL_MAX)
75#define BS3_FP32_NORMAL_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MIN, BS3_FP32_EXP_NORMAL_MIN)
76#define BS3_FP32_ZERO(a_Sign) RTFLOAT32U_INIT_ZERO(a_Sign)
77#define BS3_FP32_ONE(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0, RTFLOAT32U_EXP_BIAS)
78#define BS3_FP32_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT32U_INIT_C(a_Sign, a_Frac, a_Exp)
79#define BS3_FP32_INF(a_Sign) RTFLOAT32U_INIT_INF(a_Sign)
80#define BS3_FP32_QNAN(a_Sign) RTFLOAT32U_INIT_QNAN(a_Sign)
81#define BS3_FP32_QNAN_VAL(a_Sign, a_Val) RTFLOAT32U_INIT_QNAN_EX(a_Sign, a_Val)
82#define BS3_FP32_SNAN(a_Sign) RTFLOAT32U_INIT_SNAN(a_Sign)
83
84/*
85 * Single-precision floating normals.
86 * Fraction - 23 bits, all usable.
87 * Exponent - 8 bits, least significant bit MBZ.
88 */
89#define BS3_FP32_NORMAL_VAL_1(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5fcabd, 0xbc)
90#define BS3_FP32_NORMAL_VAL_2(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x7e117a, 0x7e)
91#define BS3_FP32_NORMAL_VAL_3(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5b5b5b, 0x9a)
92/* The maximum integer value (all 23 + 1 implied bit of the fraction part set) without losing precision. */
93#define BS3_FP32_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_BIAS + BS3_FP32_FRACTION_BITS)
94
95/*
96 * Double-precision (64 bits) floating-point defines.
97 */
98/** The max exponent value for a double-precision floating-point normal. */
99#define BS3_FP64_EXP_NORMAL_MAX 2046
100/** The min exponent value for a double-precision floating-point normal. */
101#define BS3_FP64_EXP_NORMAL_MIN 1
102/** The max fraction value for a double-precision floating-point normal. */
103#define BS3_FP64_FRACTION_NORMAL_MAX 0xfffffffffffff
104/** The min fraction value for a double-precision floating-point normal. */
105#define BS3_FP64_FRACTION_NORMAL_MIN 0
106/** The exponent bias for the double-precision floating-point format. */
107#define BS3_FP64_EXP_BIAS RTFLOAT64U_EXP_BIAS
108/** Fraction width (in bits) for the double-precision floating-point format. */
109#define BS3_FP64_FRACTION_BITS RTFLOAT64U_FRACTION_BITS
110
111#define BS3_FP64_NORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_NORMAL_MAX)
112#define BS3_FP64_NORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_NORMAL_MIN)
113#define BS3_FP64_ZERO(a_Sign) RTFLOAT64U_INIT_ZERO(a_Sign)
114#define BS3_FP64_ONE(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0, RTFLOAT64U_EXP_BIAS)
115#define BS3_FP64_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT64U_INIT_C(a_Sign, a_Frac, a_Exp)
116#define BS3_FP64_INF(a_Sign) RTFLOAT64U_INIT_INF(a_Sign)
117#define BS3_FP64_QNAN(a_Sign) RTFLOAT64U_INIT_QNAN(a_Sign)
118#define BS3_FP64_QNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_QNAN_EX(a_Sign, a_Val)
119#define BS3_FP64_SNAN(a_Sign) RTFLOAT64U_INIT_SNAN(a_Sign)
120#define BS3_FP64_SNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_SNAN_EX(a_Sign, a_Val)
121
122/*
123 * Double-precision floating normals.
124 * Fraction - 52 bits, all usable.
125 * Exponent - 11 bits, least significant bit MBZ.
126 */
127#define BS3_FP64_NORMAL_VAL_1(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xf10a7ab1ec01a, 0x4bc)
128#define BS3_FP64_NORMAL_VAL_2(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xca5cadea1b1ed, 0x3ae)
129#define BS3_FP64_NORMAL_VAL_3(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xb5b5b5b5b5b5b, 0xffe)
130/* The maximum integer value (all 52 + 1 implied bit of the fraction part set) without losing precision. */
131#define BS3_FP64_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS)
132
133
134/*********************************************************************************************************************************
135* Structures and Typedefs *
136*********************************************************************************************************************************/
137/** Instruction set type and operand width. */
138typedef enum BS3CPUINSTRX_INSTRTYPE_T
139{
140 T_INVALID,
141 T_MMX,
142 T_MMX_SSE, /**< MMX instruction, but require the SSE CPUID to work. */
143 T_MMX_SSE2, /**< MMX instruction, but require the SSE2 CPUID to work. */
144 T_MMX_SSSE3, /**< MMX instruction, but require the SSSE3 CPUID to work. */
145 T_AXMMX,
146 T_AXMMX_OR_SSE,
147 T_SSE,
148 T_128BITS = T_SSE,
149 T_SSE2,
150 T_SSE3,
151 T_SSSE3,
152 T_SSE4_1,
153 T_SSE4_2,
154 T_SSE4A,
155 T_PCLMUL,
156 T_SHA,
157 T_AVX_128,
158 T_AVX2_128,
159 T_AVX_PCLMUL,
160 T_AVX_256,
161 T_256BITS = T_AVX_256,
162 T_AVX2_256,
163 T_MAX
164} BS3CPUINSTRX_INSTRTYPE_T;
165
166/** Memory or register rm variant. */
167enum {
168 RM_REG = 0,
169 RM_MEM,
170 RM_MEM8, /**< Memory operand is 8 bytes. Hack for movss and similar. */
171 RM_MEM16, /**< Memory operand is 16 bytes. Hack for movss and similar. */
172 RM_MEM32, /**< Memory operand is 32 bytes. Hack for movss and similar. */
173 RM_MEM64 /**< Memory operand is 64 bytes. Hack for movss and similar. */
174};
175
176/**
177 * Execution environment configuration.
178 */
179typedef struct BS3CPUINSTR4_CONFIG_T
180{
181 uint16_t fCr0Mp : 1;
182 uint16_t fCr0Em : 1;
183 uint16_t fCr0Ts : 1;
184 uint16_t fCr4OsFxSR : 1;
185 uint16_t fCr4OsXSave : 1;
186 uint16_t fCr4OsXmmExcpt : 1;
187 uint16_t fXcr0Sse : 1;
188 uint16_t fXcr0Avx : 1;
189 uint16_t fAligned : 1; /**< Aligned mem operands. If 0, they will be misaligned and tests w/o mem operands skipped. */
190 uint16_t fAlignCheck : 1;
191 uint16_t fMxCsrMM : 1; /**< AMD only */
192 uint8_t bXcptSse;
193 uint8_t bXcptAvx;
194} BS3CPUINSTR4_CONFIG_T;
195/** Pointer to an execution environment configuration. */
196typedef BS3CPUINSTR4_CONFIG_T const BS3_FAR *PCBS3CPUINSTR4_CONFIG_T;
197
198/** State saved by bs3CpuInstr4ConfigReconfigure. */
199typedef struct BS3CPUINSTRX_CONFIG_SAVED_T
200{
201 uint32_t uCr0;
202 uint32_t uCr4;
203 uint32_t uEfl;
204 uint16_t uFcw;
205 uint16_t uFsw;
206 uint32_t uMxCsr;
207} BS3CPUINSTRX_CONFIG_SAVED_T;
208typedef BS3CPUINSTRX_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTRX_CONFIG_SAVED_T;
209typedef BS3CPUINSTRX_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTRX_CONFIG_SAVED_T;
210
211/**
212 * YMM packed double-precision floating-point register.
213 * @todo move to x86.h?
214 */
215typedef union X86YMMFLOATPDREG
216{
217 /** Packed double-precision floating-point view. */
218 RTFLOAT64U ar64[4];
219 /** 256-bit integer view. */
220 RTUINT256U ymm;
221} X86YMMFLOATPDREG;
222# ifndef VBOX_FOR_DTRACE_LIB
223AssertCompileSize(X86YMMFLOATPDREG, 32);
224# endif
225/** Pointer to a YMM packed floating-point register. */
226typedef X86YMMFLOATPDREG BS3_FAR *PX86YMMFLOATPDREG;
227/** Pointer to a const YMM packed floating-point register. */
228typedef X86YMMFLOATPDREG const BS3_FAR *PCX86YMMFLOATPDREG;
229
230/**
231 * YMM packed single-precision floating-point register.
232 * @todo move to x86.h?
233 */
234typedef union X86YMMFLOATPSREG
235{
236 /** Packed single-precision floating-point view. */
237 RTFLOAT32U ar32[8];
238 /** 256-bit integer view. */
239 RTUINT256U ymm;
240} X86YMMFLOATPSREG;
241# ifndef VBOX_FOR_DTRACE_LIB
242AssertCompileSize(X86YMMFLOATPSREG, 32);
243# endif
244/** Pointer to a YMM packed single-precision floating-point register. */
245typedef X86YMMFLOATPSREG BS3_FAR *PX86YMMFLOATPSREG;
246/** Pointer to a const YMM single-precision packed floating-point register. */
247typedef X86YMMFLOATPSREG const BS3_FAR *PCX86YMMFLOATPSREG;
248
249/**
250 * YMM scalar quadruple-precision floating-point register.
251 * @todo move to x86.h?
252 */
253typedef union X86YMMFLOATSQREG
254{
255 /** Scalar quadruple-precision floating point view. */
256 RTFLOAT128U ar128[2];
257 /** 256-bit integer view. */
258 RTUINT256U ymm;
259} X86YMMFLOATSQREG;
260# ifndef VBOX_FOR_DTRACE_LIB
261AssertCompileSize(X86YMMFLOATSQREG, 32);
262# endif
263/** Pointer to a YMM scalar quadruple-precision floating-point register. */
264typedef X86YMMFLOATSQREG *PX86YMMFLOATSQREG;
265/** Pointer to a const YMM scalar quadruple-precision floating-point register. */
266typedef X86YMMFLOATSQREG const *PCX86YMMFLOATSQREG;
267
268
269/*********************************************************************************************************************************
270* Global Variables *
271*********************************************************************************************************************************/
272static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false, false };
273static bool g_fAmdMisalignedSse = false;
274static uint8_t g_enmExtCtxMethod = BS3EXTCTXMETHOD_INVALID;
275static bool g_fMxCsrDazSupported = false;
276
277/** Zero value (indexed by fSign). */
278RTFLOAT32U const g_ar32Zero[] = { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) };
279RTFLOAT64U const g_ar64Zero[] = { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) };
280
281/** One value (indexed by fSign). */
282RTFLOAT32U const g_ar32One[] = { RTFLOAT32U_INIT_C(0, 0, RTFLOAT32U_EXP_BIAS),
283 RTFLOAT32U_INIT_C(1, 0, RTFLOAT32U_EXP_BIAS) };
284RTFLOAT64U const g_ar64One[] = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_BIAS),
285 RTFLOAT64U_INIT_C(1, 0, RTFLOAT64U_EXP_BIAS) };
286
287/** Infinity (indexed by fSign). */
288RTFLOAT32U const g_ar32Infinity[] = { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) };
289RTFLOAT64U const g_ar64Infinity[] = { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) };
290
291/** Default QNaNs (indexed by fSign). */
292RTFLOAT32U const g_ar32QNaN[] = { RTFLOAT32U_INIT_QNAN(0), RTFLOAT32U_INIT_QNAN(1) };
293RTFLOAT64U const g_ar64QNaN[] = { RTFLOAT64U_INIT_QNAN(0), RTFLOAT64U_INIT_QNAN(1) };
294
295/** Size of g_pbBuf - at least three pages. */
296static uint32_t g_cbBuf;
297/** Buffer of g_cbBuf size. */
298static uint8_t BS3_FAR *g_pbBuf;
299/** RW alias for the buffer memory at g_pbBuf. Set up by bs3CpuInstrXBufSetup. */
300static uint8_t BS3_FAR *g_pbBufAlias;
301/** RW alias for the memory at g_pbBuf. */
302static uint8_t BS3_FAR *g_pbBufAliasAlloc;
303
304/** Exception type \#1 test configurations, 16 & 32 bytes strictly aligned. */
305static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig1[] =
306{
307/*
308 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
309 * +AVX +AVX +AMD/SSE +AMD/SSE
310 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
311 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
312 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
313 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
314 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
315 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
316 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
317 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
318 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
319 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
320 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
321 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
322 /* Memory misalignment and alignment checks: */
323 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #10 */
324 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #11 */
325 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
326 /* AMD only: */
327 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
328 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
329};
330
331
332
333/**
334 * Returns the name of an X86 exception given the vector.
335 *
336 * @returns Name of the exception.
337 * @param uVector The exception vector.
338 */
339static const char BS3_FAR *bs3CpuInstr4XcptName(uint8_t uVector)
340{
341 switch (uVector)
342 {
343 case X86_XCPT_DE: return "#DE";
344 case X86_XCPT_DB: return "#DB";
345 case X86_XCPT_NMI: return "#NMI";
346 case X86_XCPT_BP: return "#BP";
347 case X86_XCPT_OF: return "#OF";
348 case X86_XCPT_BR: return "#BR";
349 case X86_XCPT_UD: return "#UD";
350 case X86_XCPT_NM: return "#NM";
351 case X86_XCPT_DF: return "#DF";
352 case X86_XCPT_CO_SEG_OVERRUN: return "#CO_SEG_OVERRUN";
353 case X86_XCPT_TS: return "#TS";
354 case X86_XCPT_NP: return "#NP";
355 case X86_XCPT_SS: return "#SS";
356 case X86_XCPT_GP: return "#GP";
357 case X86_XCPT_PF: return "#PF";
358 case X86_XCPT_MF: return "#MF";
359 case X86_XCPT_AC: return "#AC";
360 case X86_XCPT_MC: return "#MC";
361 case X86_XCPT_XF: return "#XF";
362 case X86_XCPT_VE: return "#VE";
363 case X86_XCPT_CP: return "#CP";
364 case X86_XCPT_VC: return "#VC";
365 case X86_XCPT_SX: return "#SX";
366 }
367 return "UNKNOWN";
368}
369
370
371/**
372 * Gets the names of floating-point exception flags that are set for a given MXCSR.
373 *
374 * @returns Names of floating-point exception flags that are set.
375 * @param pszBuf Where to store the floating-point exception flags.
376 * @param cchBuf The size of the buffer.
377 * @param fMxCsr The MXCSR value.
378 */
379static size_t bs3CpuInstr4GetXcptFlags(char BS3_FAR *pszBuf, size_t cchBuf, uint32_t fMxCsr)
380{
381 BS3_ASSERT(cchBuf >= BS3_FP_XCPT_NAMES_MAXLEN);
382 if (!(fMxCsr & X86_MXCSR_XCPT_FLAGS))
383 return Bs3StrPrintf(pszBuf, cchBuf, " None");
384 return Bs3StrPrintf(pszBuf, cchBuf, "%s%s%s%s%s%s", fMxCsr & X86_MXCSR_IE ? " IE" : "", fMxCsr & X86_MXCSR_DE ? " DE" : "",
385 fMxCsr & X86_MXCSR_ZE ? " ZE" : "", fMxCsr & X86_MXCSR_OE ? " OE" : "",
386 fMxCsr & X86_MXCSR_UE ? " UE" : "", fMxCsr & X86_MXCSR_PE ? " PE" : "");
387}
388
389
390/**
391 * Reconfigures the execution environment according to @a pConfig.
392 *
393 * Call bs3CpuInstrXConfigRestore to undo the changes.
394 *
395 * @returns true on success, false if the configuration cannot be applied. In
396 * the latter case, no context changes are made.
397 * @param pSavedCfg Where to save state we modify.
398 * @param pCtx The register context to modify.
399 * @param pExtCtx The extended register context to modify.
400 * @param pConfig The configuration to apply.
401 * @param bMode The target mode.
402 */
403static bool bs3CpuInstr4ConfigReconfigure(PBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx,
404 PCBS3CPUINSTR4_CONFIG_T pConfig, uint8_t bMode)
405{
406 /*
407 * Save context bits we may change here
408 */
409 pSavedCfg->uCr0 = pCtx->cr0.u32;
410 pSavedCfg->uCr4 = pCtx->cr4.u32;
411 pSavedCfg->uEfl = pCtx->rflags.u32;
412 pSavedCfg->uFcw = Bs3ExtCtxGetFcw(pExtCtx);
413 pSavedCfg->uFsw = Bs3ExtCtxGetFsw(pExtCtx);
414 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx);
415
416 /*
417 * Can we make these changes?
418 */
419 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse)
420 return false;
421
422 /*
423 * Modify the test context.
424 */
425 if (pConfig->fCr0Mp)
426 pCtx->cr0.u32 |= X86_CR0_MP;
427 else
428 pCtx->cr0.u32 &= ~X86_CR0_MP;
429 if (pConfig->fCr0Em)
430 pCtx->cr0.u32 |= X86_CR0_EM;
431 else
432 pCtx->cr0.u32 &= ~X86_CR0_EM;
433 if (pConfig->fCr0Ts)
434 pCtx->cr0.u32 |= X86_CR0_TS;
435 else
436 pCtx->cr0.u32 &= ~X86_CR0_TS;
437
438 if (pConfig->fCr4OsFxSR)
439 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
440 else
441 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
442
443 if (pConfig->fCr4OsXmmExcpt && g_afTypeSupports[T_SSE])
444 pCtx->cr4.u32 |= X86_CR4_OSXMMEEXCPT;
445 else
446 pCtx->cr4.u32 &= ~X86_CR4_OSXMMEEXCPT;
447
448 if (pConfig->fCr4OsFxSR)
449 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
450 else
451 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
452
453 if (pConfig->fCr4OsXSave)
454 pCtx->cr4.u32 |= X86_CR4_OSXSAVE;
455 else
456 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE;
457
458 if (pConfig->fXcr0Sse)
459 pExtCtx->fXcr0Saved |= XSAVE_C_SSE;
460 else
461 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE;
462 if (pConfig->fXcr0Avx && g_afTypeSupports[T_AVX_256])
463 pExtCtx->fXcr0Saved |= XSAVE_C_YMM;
464 else
465 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM;
466
467 if (pConfig->fAlignCheck)
468 {
469 pCtx->rflags.u32 |= X86_EFL_AC;
470 pCtx->cr0.u32 |= X86_CR0_AM;
471 }
472 else
473 {
474 pCtx->rflags.u32 &= ~X86_EFL_AC;
475 pCtx->cr0.u32 &= ~X86_CR0_AM;
476 }
477
478 /** @todo Can we remove this? x87 FPU and SIMD are independent. */
479 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw & ~(X86_FSW_ES | X86_FSW_B));
480
481 if (pConfig->fMxCsrMM)
482 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM);
483 else
484 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM);
485 return true;
486}
487
488
489/**
490 * Undoes changes made by bs3CpuInstr4ConfigReconfigure.
491 */
492static void bs3CpuInstrXConfigRestore(PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx)
493{
494 pCtx->cr0.u32 = pSavedCfg->uCr0;
495 pCtx->cr4.u32 = pSavedCfg->uCr4;
496 pCtx->rflags.u32 = pSavedCfg->uEfl;
497 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal;
498 Bs3ExtCtxSetFcw(pExtCtx, pSavedCfg->uFcw);
499 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw);
500 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr);
501}
502
503
504/**
505 * Allocates three extended CPU contexts and initializes the first one
506 * with random data.
507 * @returns First extended context, initialized with randomish data. NULL on
508 * failure (complained).
509 * @param ppExtCtx2 Where to return the 2nd context.
510 */
511static PBS3EXTCTX bs3CpuInstrXAllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2)
512{
513 /* Allocate extended context structures. */
514 uint64_t fFlags;
515 uint16_t cb = Bs3ExtCtxGetSize(&fFlags);
516 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 2);
517 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb);
518 if (pExtCtx1)
519 {
520 Bs3ExtCtxInit(pExtCtx1, cb, fFlags);
521 /** @todo populate with semi-random stuff. */
522
523 Bs3ExtCtxInit(pExtCtx2, cb, fFlags);
524 *ppExtCtx2 = pExtCtx2;
525 return pExtCtx1;
526 }
527 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2);
528 *ppExtCtx2 = NULL;
529 return NULL;
530}
531
532
533/**
534 * Frees the extended CPU contexts allocated by bs3CpuInstrXAllocExtCtxs.
535 *
536 * @param pExtCtx1 The first extended context.
537 * @param pExtCtx2 The second extended context.
538 */
539static void bs3CpuInstrXFreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2)
540{
541 RT_NOREF_PV(pExtCtx2);
542 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2);
543}
544
545
546/**
547 * Sets up SSE and AVX bits relevant for FPU instructions.
548 */
549static void bs3CpuInstr4SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx)
550{
551 /* CR0: */
552 uint32_t cr0 = Bs3RegGetCr0();
553 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
554 cr0 |= X86_CR0_NE;
555 Bs3RegSetCr0(cr0);
556
557 /* If real mode context, the cr0 value will differ from the current one (we're in PE32 mode). */
558 pCtx->cr0.u32 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
559 pCtx->cr0.u32 |= X86_CR0_NE;
560
561 /* CR4: */
562 BS3_ASSERT( pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
563 || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE);
564 {
565 uint32_t cr4 = Bs3RegGetCr4();
566 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
567 {
568 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
569 Bs3RegSetCr4(cr4);
570 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal);
571 }
572 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
573 {
574 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
575 Bs3RegSetCr4(cr4);
576 }
577 pCtx->cr4.u32 = cr4;
578 }
579}
580
581
582/**
583 * Configures the buffer with electric fences in paged modes.
584 *
585 * @returns Adjusted buffer pointer.
586 * @param pbBuf The buffer pointer.
587 * @param pcbBuf Pointer to the buffer size (input & output).
588 * @param bMode The testing target mode.
589 */
590DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufSetup(uint8_t BS3_FAR *pbBuf, uint32_t *pcbBuf, uint8_t bMode)
591{
592 if (BS3_MODE_IS_PAGED(bMode))
593 {
594 int rc;
595 uint32_t cbBuf = *pcbBuf;
596 Bs3PagingProtectPtr(&pbBuf[0], X86_PAGE_SIZE, 0, X86_PTE_P);
597 Bs3PagingProtectPtr(&pbBuf[cbBuf - X86_PAGE_SIZE], X86_PAGE_SIZE, 0, X86_PTE_P);
598 pbBuf += X86_PAGE_SIZE;
599 cbBuf -= X86_PAGE_SIZE * 2;
600 *pcbBuf = cbBuf;
601
602 g_pbBufAlias = g_pbBufAliasAlloc;
603 rc = Bs3PagingAlias((uintptr_t)g_pbBufAlias, (uintptr_t)pbBuf, cbBuf + X86_PAGE_SIZE, /* must include the tail guard pg */
604 X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW);
605 if (RT_FAILURE(rc))
606 Bs3TestFailedF("Bs3PagingAlias failed on %p/%p LB %#x: %d", g_pbBufAlias, pbBuf, cbBuf, rc);
607 }
608 else
609 g_pbBufAlias = pbBuf;
610 return pbBuf;
611}
612
613
614/**
615 * Undoes what bs3CpuInstrXBufSetup did.
616 *
617 * @param pbBuf The buffer pointer.
618 * @param cbBuf The buffer size.
619 * @param bMode The testing target mode.
620 */
621DECLINLINE(void) bs3CpuInstrXBufCleanup(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t bMode)
622{
623 if (BS3_MODE_IS_PAGED(bMode))
624 {
625 Bs3PagingProtectPtr(&pbBuf[-X86_PAGE_SIZE], X86_PAGE_SIZE, X86_PTE_P, 0);
626 Bs3PagingProtectPtr(&pbBuf[cbBuf], X86_PAGE_SIZE, X86_PTE_P, 0);
627 }
628}
629
630
631/**
632 * Gets a buffer of a @a cbMemOp sized operand according to the given
633 * configuration and alignment restrictions.
634 *
635 * @returns Pointer to the buffer.
636 * @param pbBuf The buffer pointer.
637 * @param cbBuf The buffer size.
638 * @param cbMemOp The operand size.
639 * @param cbAlign The operand alignment restriction.
640 * @param pConfig The configuration.
641 * @param fPageFault The \#PF test setting.
642 */
643DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufForOperand(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t cbMemOp, uint8_t cbAlign,
644 PCBS3CPUINSTR4_CONFIG_T pConfig, unsigned fPageFault)
645{
646 /* All allocations are at the tail end of the buffer, so that we've got a
647 guard page following the operand. When asked to consistenly trigger
648 a #PF, we slide the buffer into that guard page. */
649 if (fPageFault)
650 cbBuf += X86_PAGE_SIZE;
651
652 if (pConfig->fAligned)
653 {
654 if (!pConfig->fAlignCheck)
655 return &pbBuf[cbBuf - cbMemOp];
656 return &pbBuf[cbBuf - cbMemOp - cbAlign];
657 }
658 return &pbBuf[cbBuf - cbMemOp - 1];
659}
660
661
662/**
663 * Determines the size of memory operands.
664 */
665DECLINLINE(uint8_t) bs3CpuInstrXMemOpSize(uint8_t cbOperand, uint8_t enmRm)
666{
667 if (enmRm <= RM_MEM)
668 return cbOperand;
669 if (enmRm == RM_MEM8)
670 return sizeof(uint8_t);
671 if (enmRm == RM_MEM16)
672 return sizeof(uint16_t);
673 if (enmRm == RM_MEM32)
674 return sizeof(uint32_t);
675 if (enmRm == RM_MEM64)
676 return sizeof(uint64_t);
677 BS3_ASSERT(0);
678 return cbOperand;
679}
680
681
682/*
683 * Code to make testing the tests faster. `bs3CpuInstrX_SkipIt()' randomly
684 * skips a large fraction of the micro-tests. It is sufficiently random
685 * that over a large number of runs, all micro-tests will be hit.
686 *
687 * This improves the runtime of the worst case (`#define ALL_TESTS' on a
688 * debug build, run with '--execute-all-in-iem') from ~9000 to ~800 seconds
689 * (on an Intel Core i7-10700, fwiw).
690 *
691 * To activate this 'developer's speed-testing mode', turn on
692 * `#define BS3_SKIPIT_DO_SKIP' here.
693 *
694 * BS3_SKIPIT_AVG_SKIP governs approximately how many micro-tests are
695 * skipped in a row; e.g. the default of 26 means about every 27th
696 * micro-test is run during a particular test run. (This is not 27x
697 * faster due to other activities which are not skipped!) Note this is
698 * only an average; the actual skips are random.
699 *
700 * You can also modify bs3CpuInstrX_SkipIt() to focus on specific sub-tests,
701 * using its (currently ignored) `bRing, iCfg, iTest, iVal, iVariant' args
702 * (to enable this: turn on `#define BS3_SKIPIT_DO_ARGS': which costs about
703 * 3% performance).
704 *
705 * Note! The skipping is not compatible with testing the native recompiler as
706 * it requires the test code to be run a number of times before it kicks
707 * in and does the native recompilation (currently around 16 times).
708 */
709#define BS3_SKIPIT_AVG_SKIP 26
710#define BS3_SKIPIT_REPORT_COUNT 150000
711#undef BS3_SKIPIT_DO_SKIP
712#undef BS3_SKIPIT_DO_ARGS
713
714#ifndef BS3_SKIPIT_DO_SKIP
715# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) (false)
716#else
717# include <iprt/asm-amd64-x86.h>
718# include <iprt/asm-math.h>
719
720DECLINLINE(uint32_t) bs3CpuInstrX_SimpleRand(void)
721{
722 /*
723 * A simple Lehmer linear congruential pseudo-random number
724 * generator using the constants suggested by Park & Miller:
725 *
726 * modulus = 2^31 - 1 (INT32_MAX)
727 * multiplier = 7^5 (16807)
728 *
729 * It produces numbers in the range [1..INT32_MAX-1] and is
730 * more chaotic in the higher bits.
731 *
732 * Note! Runtime/common/rand/randparkmiller.cpp is also use this algorithm,
733 * though the zero handling is different.
734 */
735 static uint32_t s_uSeedMemory = 0;
736 uint32_t uVal = s_uSeedMemory;
737 if (!uVal)
738 uVal = (uint32_t)ASMReadTSC();
739 uVal = ASMModU64ByU32RetU32(ASMMult2xU32RetU64(uVal, 16807), INT32_MAX);
740 s_uSeedMemory = uVal;
741 return uVal;
742}
743
744static unsigned g_cSeen, g_cSkipped;
745
746static void bs3CpuInstrX_ShowTallies(void)
747{
748 Bs3TestPrintf("Micro-tests %d: tested %d / skipped %d\n", g_cSeen, g_cSeen - g_cSkipped, g_cSkipped);
749}
750
751# ifdef BS3_SKIPIT_DO_ARGS
752# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt(bRing, iCfg, iTest, iVal, iVariant)
753static bool bs3CpuInstrX_SkipIt(uint8_t bRing, unsigned iCfg, unsigned iTest, unsigned iVal, unsigned iVariant)
754# else
755# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt()
756static bool bs3CpuInstrX_SkipIt(void)
757# endif
758{
759 static unsigned s_uTimes = 0;
760 bool fSkip;
761
762 /* Cache calls to the relatively expensive random routine */
763 if (!s_uTimes)
764 s_uTimes = bs3CpuInstrX_SimpleRand() % (BS3_SKIPIT_AVG_SKIP * 2 + 1) + 1;
765 fSkip = --s_uTimes > 0;
766 if (fSkip)
767 ++g_cSkipped;
768
769 if (++g_cSeen % BS3_SKIPIT_REPORT_COUNT == 0)
770 bs3CpuInstrX_ShowTallies();
771 return fSkip;
772}
773
774#endif /* BS3_SKIPIT_DO_SKIP */
775
776/*
777 * Test type #1.
778 * Generic YMM registers.
779 */
780typedef struct BS3CPUINSTR4_TEST1_VALUES_T
781{
782 X86YMMREG uSrc2; /**< Second source operand. */
783 X86YMMREG uSrc1; /**< uDstIn for SSE */
784 X86YMMREG uDstOut; /**< Destination output. */
785 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
786 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
787 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
788 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
789 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
790} BS3CPUINSTR4_TEST1_VALUES_T;
791
792/*
793 * Test type #1.
794 * Packed single-precision.
795 */
796typedef struct BS3CPUINSTR4_TEST1_VALUES_PS_T
797{
798 X86YMMFLOATPSREG uSrc2; /**< Second source operand. */
799 X86YMMFLOATPSREG uSrc1; /**< uDstIn for SSE */
800 X86YMMFLOATPSREG uDstOut; /**< Destination output. */
801 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
802 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
803 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
804 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
805 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
806} BS3CPUINSTR4_TEST1_VALUES_PS_T;
807AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PS_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
808AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
809AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
810AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
811AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
812AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
813AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
814AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
815AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
816
817/*
818 * Test type #1.
819 * Packed double-precision.
820 */
821typedef struct BS3CPUINSTR4_TEST1_VALUES_PD_T
822{
823 X86YMMFLOATPDREG uSrc2; /**< Second source operand. */
824 X86YMMFLOATPDREG uSrc1; /**< uDstIn for SSE */
825 X86YMMFLOATPDREG uDstOut; /**< Destination output. */
826 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
827 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
828 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
829 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
830 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
831} BS3CPUINSTR4_TEST1_VALUES_PD_T;
832AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PD_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
833AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
834AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
835AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
836AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
837AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
838AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
839AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
840AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
841
842/*
843 * Test type #1.
844 * Scalar quadruple-precision.
845 */
846typedef struct BS3CPUINSTR4_TEST1_VALUES_SQ_T
847{
848 X86YMMFLOATSQREG uSrc2; /**< Second source operand. */
849 X86YMMFLOATSQREG uSrc1; /**< uDstIn for SSE */
850 X86YMMFLOATSQREG uDstOut; /**< Destination output. */
851 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
852 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
853 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
854 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
855 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
856} BS3CPUINSTR4_TEST1_VALUES_SQ_T;
857AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_SQ_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
858AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
859AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
860AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
861AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
862AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
863AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
864AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
865AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
866
867typedef struct BS3CPUINSTR4_TEST1_T
868{
869 FPFNBS3FAR pfnWorker; /**< Test function worker. */
870 uint8_t bAvxMisalignXcpt; /**< AVX misalignment exception. */
871 uint8_t enmRm; /**< R/M type. */
872 uint8_t enmType; /**< CPU instruction type (see T_XXX). */
873 uint8_t iRegDst; /**< Index of destination register, UINT8_MAX if N/A. */
874 uint8_t iRegSrc1; /**< Index of first source register, UINT8_MAX if N/A. */
875 uint8_t iRegSrc2; /**< Index of second source register, UINT8_MAX if N/A. */
876 uint8_t cValues; /**< Number of test values in @c paValues. */
877 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *paValues; /**< Test values. */
878} BS3CPUINSTR4_TEST1_T;
879
880typedef struct BS3CPUINSTR4_TEST1_MODE_T
881{
882 BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests;
883 unsigned cTests;
884} BS3CPUINSTR4_TEST1_MODE_T;
885
886/** Initializer for a BS3CPUINSTR4_TEST1_MODE_T array (three entries). */
887#define BS3CPUINSTR4_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \
888 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } }
889
890typedef struct BS3CPUINSTR4_TEST1_CTX_T
891{
892 BS3CPUINSTR4_CONFIG_T const BS3_FAR *pConfig; /**< The test execution environment configuration. */
893 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest; /**< The instruction being tested. */
894 unsigned iVal; /**< Which iteration of the test value is this. */
895 const char BS3_FAR *pszMode; /**< The testing mode (e.g. real, protected, paged and permutations). */
896 PBS3TRAPFRAME pTrapFrame; /**< The exception (trap) frame. */
897 PBS3REGCTX pCtx; /**< The general-purpose register context. */
898 PBS3EXTCTX pExtCtx; /**< The extended (FPU) register context. */
899 PBS3EXTCTX pExtCtxOut; /**< The output extended (FPU) register context. */
900 uint8_t BS3_FAR *puMemOp; /**< The memory operand buffer. */
901 uint8_t BS3_FAR *puMemOpAlias; /**< The memory operand alias buffer for comparing result. */
902 uint8_t cbMemOp; /**< Size of the memory operand (and alias) buffer in bytes. */
903 uint8_t cbOperand; /**< Size of the instruction operand (8 for MMX, 16 for SSE etc). */
904 uint8_t cbInstr; /**< Size of the instruction opcode. */
905 uint8_t bXcptExpect; /**< The expected exception while/after executing the instruction. */
906 bool fSseInstr; /**< Whether this is an SSE instruction. */
907 bool fAvxInstr; /**< Whether this is an AVX instruction. */
908 uint16_t idTestStep; /**< The test iteration step. */
909} BS3CPUINSTR4_TEST1_CTX_T;
910/** Pointer to a test 1 context. */
911typedef BS3CPUINSTR4_TEST1_CTX_T BS3_FAR *PBS3CPUINSTR4_TEST1_CTX_T;
912
913
914/**
915 * Worker for bs3CpuInstrX_WorkerTestType1.
916 */
917static uint16_t bs3CpuInstr4_WorkerTestType1_Inner(uint8_t bMode, PBS3CPUINSTR4_TEST1_CTX_T pTestCtx,
918 PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg)
919{
920 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = pTestCtx->pTest;
921 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *pValues = &pTestCtx->pTest->paValues[pTestCtx->iVal];
922 PBS3TRAPFRAME pTrapFrame = pTestCtx->pTrapFrame;
923 PBS3REGCTX pCtx = pTestCtx->pCtx;
924 PBS3EXTCTX pExtCtx = pTestCtx->pExtCtx;
925 PBS3EXTCTX pExtCtxOut = pTestCtx->pExtCtxOut;
926 uint8_t BS3_FAR *puMemOp = pTestCtx->puMemOp;
927 uint8_t BS3_FAR *puMemOpAlias = pTestCtx->puMemOpAlias;
928 uint8_t cbMemOp = pTestCtx->cbMemOp;
929 uint8_t const cbOperand = pTestCtx->cbOperand;
930 uint8_t const cbInstr = ((uint8_t const BS3_FAR *)(uintptr_t)pTestCtx->pTest->pfnWorker)[-1];
931 uint8_t bXcptExpect = pTestCtx->bXcptExpect;
932 uint8_t const bFpXcpt = pTestCtx->pConfig->fCr4OsXmmExcpt ? X86_XCPT_XF : X86_XCPT_UD;
933 bool const fFpFlagsExpect = RT_BOOL( (pValues->fExpectedMxCsrFlags
934 & (~pValues->fMxCsrMask >> X86_MXCSR_XCPT_MASK_SHIFT)) & X86_MXCSR_XCPT_FLAGS);
935 uint32_t uMxCsr;
936 X86YMMREG MemOpExpect;
937 uint16_t cErrors;
938
939 /*
940 * Set up the context and some expectations.
941 */
942 /* Destination. */
943 Bs3MemZero(&MemOpExpect, sizeof(MemOpExpect));
944 if (pTest->iRegDst == UINT8_MAX)
945 {
946 BS3_ASSERT(pTest->enmRm >= RM_MEM);
947 Bs3MemSet(puMemOpAlias, 0xcc, cbMemOp);
948 if (bXcptExpect == X86_XCPT_DB)
949 MemOpExpect.ymm = pValues->uDstOut.ymm;
950 else
951 Bs3MemSet(&MemOpExpect, 0xcc, sizeof(MemOpExpect));
952 }
953
954 /* Source #1 (/ destination for SSE). */
955 if (pTest->iRegSrc1 == UINT8_MAX)
956 {
957 BS3_ASSERT(pTest->enmRm >= RM_MEM);
958 Bs3MemCpy(puMemOpAlias, &pValues->uSrc1, cbMemOp);
959 if (pTest->iRegDst == UINT8_MAX)
960 BS3_ASSERT(pTestCtx->fSseInstr);
961 else
962 MemOpExpect.ymm = pValues->uSrc1.ymm;
963 }
964 else if (pTestCtx->fSseInstr)
965 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm.DQWords.dqw0);
966 else
967 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm, 32);
968
969 /* Source #2. */
970 if (pTest->iRegSrc2 == UINT8_MAX)
971 {
972 BS3_ASSERT(pTest->enmRm >= RM_MEM);
973 BS3_ASSERT(pTest->iRegDst != UINT8_MAX && pTest->iRegSrc1 != UINT8_MAX);
974 Bs3MemCpy(puMemOpAlias, &pValues->uSrc2, cbMemOp);
975 MemOpExpect.ymm = pValues->uSrc2.ymm;
976 }
977 else if (pTestCtx->fSseInstr)
978 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm.DQWords.dqw0);
979 else
980 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm, 32);
981
982 /* Memory pointer. */
983 if (pTest->enmRm >= RM_MEM)
984 {
985 BS3_ASSERT( pTest->iRegDst == UINT8_MAX
986 || pTest->iRegSrc1 == UINT8_MAX
987 || pTest->iRegSrc2 == UINT8_MAX);
988 Bs3RegCtxSetGrpSegFromCurPtr(pCtx, &pCtx->rbx, &pCtx->fs, puMemOp);
989 }
990
991 /* Setup MXCSR for the current test. */
992 uMxCsr = (pSavedCfg->uMxCsr & ~(X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK))
993 | (pValues->fMxCsrMask & X86_MXCSR_XCPT_MASK)
994 | (pValues->fRoundingCtlMask & X86_MXCSR_RC_MASK);
995 if ( pValues->fDenormalsAreZero
996 && g_fMxCsrDazSupported)
997 uMxCsr |= X86_MXCSR_DAZ;
998 if (pValues->fFlushToZero)
999 uMxCsr |= X86_MXCSR_FZ;
1000 Bs3ExtCtxSetMxCsr(pExtCtx, uMxCsr);
1001
1002 /*
1003 * Prepare globals and execute.
1004 */
1005 g_uBs3TrapEipHint = pCtx->rip.u32;
1006 if ( bXcptExpect == X86_XCPT_DB
1007 && !fFpFlagsExpect)
1008 g_uBs3TrapEipHint += cbInstr + 1;
1009 Bs3TrapSetJmpAndRestoreWithExtCtxAndRm(pCtx, pExtCtx, pTrapFrame, pExtCtxOut);
1010
1011 /*
1012 * Check the result.
1013 *
1014 * If a floating-point exception is expected, the destination is not updated by the instruction.
1015 * In the case of SSE instructions, updating the destination here will work because it is the same
1016 * as the source, but for AVX++ it won't because the destination is different and would contain 0s.
1017 */
1018 cErrors = Bs3TestSubErrorCount();
1019 if ( bXcptExpect == X86_XCPT_DB
1020 && !fFpFlagsExpect
1021 && pTest->iRegDst != UINT8_MAX)
1022 {
1023 if (pTestCtx->fSseInstr)
1024 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm.DQWords.dqw0);
1025 else
1026 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm, cbOperand);
1027 }
1028#if defined(DEBUG_aeichner) /** @todo Necessary kludge on a i7-1068NG7. */
1029 if ( pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE
1030 && pExtCtx->Ctx.x.Hdr.bmXState == 0x7
1031 && pExtCtxOut->Ctx.x.Hdr.bmXState == 0x3)
1032 pExtCtxOut->Ctx.x.Hdr.bmXState = 0x7;
1033#endif
1034 if (bXcptExpect == X86_XCPT_DB)
1035 Bs3ExtCtxSetMxCsr(pExtCtx, (uMxCsr & ~X86_MXCSR_XCPT_FLAGS)
1036 | (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS));
1037 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pTestCtx->pszMode, pTestCtx->idTestStep);
1038
1039 if (bXcptExpect == X86_XCPT_DB)
1040 {
1041 uint32_t const fMxCsrXcptFlags = Bs3ExtCtxGetMxCsr(pExtCtxOut) & X86_MXCSR_XCPT_FLAGS;
1042
1043 /* Check if the SIMD FP exception flags (or lack of) are as expected. */
1044 if (fMxCsrXcptFlags != (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS))
1045 {
1046 char szGotBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1047 char szExpectBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1048 bs3CpuInstr4GetXcptFlags(&szExpectBuf[0], sizeof(szExpectBuf), pValues->fExpectedMxCsrFlags);
1049 bs3CpuInstr4GetXcptFlags(&szGotBuf[0], sizeof(szGotBuf), fMxCsrXcptFlags);
1050 Bs3TestFailedF("Expected floating-point xcpt flags%s, got%s", szExpectBuf, szGotBuf);
1051 }
1052
1053 /* Check if the SIMD FP exception (or lack of) is as expected. */
1054 if (fFpFlagsExpect)
1055 {
1056 if (pTrapFrame->bXcpt == bFpXcpt)
1057 { /* likely */ }
1058 else
1059 Bs3TestFailedF("Expected floating-point xcpt %s, got %s", bs3CpuInstr4XcptName(bFpXcpt),
1060 bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1061 }
1062 else if (pTrapFrame->bXcpt == X86_XCPT_DB)
1063 { /* likely */ }
1064 else
1065 Bs3TestFailedF("Expected no xcpt, got %s", bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1066 }
1067 /* Check if non-FP exception is as expected. */
1068 else if (pTrapFrame->bXcpt != bXcptExpect)
1069 Bs3TestFailedF("Expected xcpt %s, got %s", bs3CpuInstr4XcptName(bXcptExpect), bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1070
1071 /* Kludge! Looks like EFLAGS.AC is cleared when raising #GP in real mode on the 10980XE. WEIRD! */
1072 if (bMode == BS3_MODE_RM && (pCtx->rflags.u32 & X86_EFL_AC))
1073 {
1074 if (pTrapFrame->Ctx.rflags.u32 & X86_EFL_AC)
1075 Bs3TestFailedF("Expected EFLAGS.AC to be cleared (bXcpt=%d)", pTrapFrame->bXcpt);
1076 pTrapFrame->Ctx.rflags.u32 |= X86_EFL_AC;
1077 }
1078 if (bXcptExpect == X86_XCPT_PF)
1079 pCtx->cr2.u = (uintptr_t)puMemOp;
1080 Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pCtx, bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect ? cbInstr + 1 : 0, 0 /*cbSpAdjust*/,
1081 (bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect) || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF,
1082 pTestCtx->pszMode, pTestCtx->idTestStep);
1083 pCtx->cr2.u = 0;
1084
1085 if ( pTest->enmRm >= RM_MEM
1086 && Bs3MemCmp(puMemOpAlias, &MemOpExpect, cbMemOp) != 0)
1087 Bs3TestFailedF("Expected uMemOp %.*Rhxs, got %.*Rhxs", cbMemOp, &MemOpExpect, cbMemOp, puMemOpAlias);
1088
1089 return cErrors;
1090}
1091
1092
1093/**
1094 * Test type #1 worker.
1095 */
1096static uint8_t bs3CpuInstrX_WorkerTestType1(uint8_t bMode, BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests, unsigned cTests,
1097 PCBS3CPUINSTR4_CONFIG_T paConfigs, unsigned cConfigs)
1098{
1099 BS3REGCTX Ctx;
1100 BS3TRAPFRAME TrapFrame;
1101 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode);
1102 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0;
1103 uint8_t BS3_FAR *pbBuf = g_pbBuf;
1104 uint32_t cbBuf = g_cbBuf;
1105 PBS3EXTCTX pExtCtxOut;
1106 PBS3EXTCTX pExtCtx = bs3CpuInstrXAllocExtCtxs(&pExtCtxOut);
1107 if (pExtCtx)
1108 { /* likely */ }
1109 else
1110 return 0;
1111 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT)
1112 { /* likely */ }
1113 else
1114 {
1115 Bs3TestPrintf("Skipped due to ancient FPU state format\n");
1116 return 0;
1117 }
1118
1119 /* Ensure the structures are allocated before we sample the stack pointer. */
1120 Bs3MemSet(&Ctx, 0, sizeof(Ctx));
1121 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
1122
1123 /*
1124 * Create test context.
1125 */
1126 pbBuf = bs3CpuInstrXBufSetup(pbBuf, &cbBuf, bMode);
1127 Bs3RegCtxSaveForMode(&Ctx, bMode, 1024);
1128 bs3CpuInstr4SetupSseAndAvx(&Ctx, pExtCtx);
1129
1130 /*
1131 * Run the tests in all rings since alignment issues may behave
1132 * differently in ring-3 compared to ring-0.
1133 */
1134 for (;;)
1135 {
1136 unsigned fPf = 0;
1137 do
1138 {
1139 unsigned iCfg;
1140 for (iCfg = 0; iCfg < cConfigs; iCfg++)
1141 {
1142 unsigned iTest;
1143 BS3CPUINSTRX_CONFIG_SAVED_T SavedCfg;
1144 if (!bs3CpuInstr4ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg], bMode))
1145 continue; /* unsupported config */
1146
1147 /*
1148 * Iterate the tests.
1149 */
1150 for (iTest = 0; iTest < cTests; iTest++)
1151 {
1152 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = &paTests[iTest];
1153 unsigned const cValues = pTest->cValues;
1154 bool const fSseInstr = pTest->enmType >= T_SSE && pTest->enmType < T_AVX_128;
1155 bool const fAvxInstr = pTest->enmType >= T_AVX_128;
1156 uint8_t const cbOperand = pTest->enmType < T_128BITS ? 64/8
1157 : pTest->enmType < T_256BITS ? 128/8 : 256/8;
1158 uint8_t const cbMemOp = bs3CpuInstrXMemOpSize(cbOperand, pTest->enmRm);
1159 uint8_t const cbAlign = cbMemOp;
1160 uint8_t BS3_FAR *puMemOp = bs3CpuInstrXBufForOperand(pbBuf, cbBuf, cbMemOp, cbAlign, &paConfigs[iCfg], fPf);
1161 uint8_t *puMemOpAlias = &g_pbBufAlias[(uintptr_t)puMemOp - (uintptr_t)pbBuf];
1162 uint8_t bXcptExpect = !g_afTypeSupports[pTest->enmType] ? X86_XCPT_UD
1163 : fSseInstr ? paConfigs[iCfg].bXcptSse
1164 : BS3_MODE_IS_RM_OR_V86(bMode) ? X86_XCPT_UD : paConfigs[iCfg].bXcptAvx;
1165 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10;
1166 unsigned cRecompRuns = 0;
1167 unsigned const cMaxRecompRuns = g_cBs3ThresholdNativeRecompiler + cValues;
1168 unsigned iVal;
1169
1170 /* If testing unaligned memory accesses (or #PF), skip register-only tests. This
1171 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */
1172 if ( (pTest->enmRm == RM_REG || pTest->enmRm == RM_MEM8)
1173 && (!paConfigs[iCfg].fAligned || paConfigs[iCfg].fAlignCheck || fPf))
1174 continue;
1175
1176 /* #AC is only raised in ring-3. */
1177 if (bXcptExpect == X86_XCPT_AC)
1178 {
1179 if (bRing != 3)
1180 bXcptExpect = X86_XCPT_DB;
1181 else if (fAvxInstr)
1182 bXcptExpect = pTest->bAvxMisalignXcpt; /* they generally don't raise #AC */
1183 }
1184
1185 if (fPf && bXcptExpect == X86_XCPT_DB)
1186 bXcptExpect = X86_XCPT_PF;
1187
1188 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, pTest->pfnWorker);
1189
1190 /*
1191 * Iterate the test values and do the actual testing.
1192 */
1193 while (cRecompRuns < cMaxRecompRuns)
1194 {
1195 for (iVal = 0; iVal < cValues; iVal++, idTestStep++, cRecompRuns++)
1196 {
1197 uint16_t cErrors;
1198 BS3CPUINSTR4_TEST1_CTX_T TestCtx;
1199 if (BS3_SKIPIT(bRing, iCfg, iTest, iVal, 0))
1200 continue;
1201
1202 /*
1203 * Setup the test instruction context and pass it to the worker.
1204 * A few of these can be figured out by the worker but initializing
1205 * it outside the inner most loop is more optimal.
1206 */
1207 TestCtx.pConfig = &paConfigs[iCfg];
1208 TestCtx.pTest = pTest;
1209 TestCtx.iVal = iVal;
1210 TestCtx.pszMode = pszMode;
1211 TestCtx.pTrapFrame = &TrapFrame;
1212 TestCtx.pCtx = &Ctx;
1213 TestCtx.pExtCtx = pExtCtx;
1214 TestCtx.pExtCtxOut = pExtCtxOut;
1215 TestCtx.puMemOp = (uint8_t *)puMemOp;
1216 TestCtx.puMemOpAlias = puMemOpAlias;
1217 TestCtx.cbMemOp = cbMemOp;
1218 TestCtx.cbOperand = cbOperand;
1219 TestCtx.bXcptExpect = bXcptExpect;
1220 TestCtx.fSseInstr = fSseInstr;
1221 TestCtx.fAvxInstr = fAvxInstr;
1222 TestCtx.idTestStep = idTestStep;
1223 cErrors = bs3CpuInstr4_WorkerTestType1_Inner(bMode, &TestCtx, &SavedCfg);
1224 if (cErrors != Bs3TestSubErrorCount())
1225 {
1226 if (paConfigs[iCfg].fAligned)
1227 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s)",
1228 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1229 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect));
1230 else
1231 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, puMemOp=%p, EFLAGS=%#RX32, CR0=%#RX32)",
1232 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1233 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), puMemOp,
1234 TrapFrame.Ctx.rflags.u32, TrapFrame.Ctx.cr0);
1235 Bs3TestPrintf("\n");
1236 }
1237 }
1238 }
1239 }
1240 bs3CpuInstrXConfigRestore(&SavedCfg, &Ctx, pExtCtx);
1241 }
1242 } while (fPf++ == 0 && BS3_MODE_IS_PAGED(bMode));
1243
1244 /*
1245 * Next ring.
1246 */
1247 bRing++;
1248 if (bRing > 3 || bMode == BS3_MODE_RM)
1249 break;
1250 Bs3RegCtxConvertToRingX(&Ctx, bRing);
1251 }
1252
1253 /*
1254 * Cleanup.
1255 */
1256 bs3CpuInstrXBufCleanup(pbBuf, cbBuf, bMode);
1257 bs3CpuInstrXFreeExtCtxs(pExtCtx, pExtCtxOut);
1258 return 0;
1259}
1260
1261
1262/*
1263 * [V]ADDPS.
1264 */
1265BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addps(uint8_t bMode)
1266{
1267 static BS3CPUINSTR4_TEST1_VALUES_PS_T const s_aValues[] =
1268 {
1269 /* 0*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1270 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1271 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1272 /*mask */ X86_MXCSR_XCPT_MASK,
1273 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1274 /*flags */ 0 },
1275 /* 1*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1276 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1277 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1278 /*mask */ ~X86_MXCSR_XCPT_MASK,
1279 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1280 /*flags */ 0 },
1281 /* 2*/{ { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1282 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1283 { /* => */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1284 /*mask */ ~X86_MXCSR_IM,
1285 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1286 /*flags */ X86_MXCSR_IE },
1287 /* 3*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1288 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1289 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1290 /*mask */ X86_MXCSR_XCPT_MASK,
1291 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1292 /*flags */ X86_MXCSR_IE },
1293 };
1294
1295 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1296 {
1297 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1298 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1299 };
1300 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1301 {
1302 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1303 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1304 };
1305 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1306 {
1307 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1308 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1309 { bs3CpuInstrX_addps_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1310 { bs3CpuInstrX_addps_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1311 };
1312
1313 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1314 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1315 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1316 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1317}
1318
1319
1320/*
1321 * [V]ADDPD.
1322 */
1323BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addpd(uint8_t bMode)
1324{
1325 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
1326 {
1327 /*
1328 * Zero.
1329 */
1330 /* 0*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1331 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1332 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1333 /*mask */ X86_MXCSR_XCPT_MASK,
1334 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1335 /*flags */ 0 },
1336 /* 1*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1337 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1338 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1339 /*mask */ ~X86_MXCSR_XCPT_MASK,
1340 /*daz,fz,rc*/ 0, 1, X86_MXCSR_RC_NEAREST,
1341 /*flags */ 0 },
1342 /* 2*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1343 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1344 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1345 /*mask */ X86_MXCSR_XCPT_MASK,
1346 /*daz,fz,rc*/ 1, 0, X86_MXCSR_RC_DOWN,
1347 /*flags */ 0 },
1348 /* 3*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1349 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1350 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1351 /*mask */ ~X86_MXCSR_XCPT_MASK,
1352 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_UP,
1353 /*flags */ 0 },
1354 /* 4*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1355 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1356 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1357 /*mask */ X86_MXCSR_XCPT_MASK,
1358 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_ZERO,
1359 /*flags */ 0 },
1360 /*
1361 * Infinity.
1362 */
1363 /* 5*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1364 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1365 { /* => */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1366 /*mask */ ~X86_MXCSR_IM,
1367 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1368 /*flags */ X86_MXCSR_IE },
1369 /* 6*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1370 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1371 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1372 /*mask */ ~X86_MXCSR_IM,
1373 /*daz,fz,rc*/ 0, 1, X86_MXCSR_RC_DOWN,
1374 /*flags */ X86_MXCSR_IE },
1375 /* 7*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1376 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1377 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1378 /*mask */ ~X86_MXCSR_IM,
1379 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_UP,
1380 /*flags */ X86_MXCSR_IE },
1381 /* 8*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1382 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1383 { /* => */ { BS3_FP64_QNAN(1), BS3_FP64_QNAN(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1384 /*mask */ X86_MXCSR_XCPT_MASK,
1385 /*daz,fz,rc*/ 1, 0, X86_MXCSR_RC_ZERO,
1386 /*flags */ X86_MXCSR_IE },
1387 /*
1388 * Overflow.
1389 */
1390 /* 9*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1391 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1392 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1393 /*mask */ ~X86_MXCSR_XCPT_MASK,
1394 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1395 /*flags */ X86_MXCSR_OE },
1396 /*10*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1397 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1398 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1399 /*mask */ ~X86_MXCSR_XCPT_MASK,
1400 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1401 /*flags */ X86_MXCSR_OE },
1402 /*11*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1403 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1404 { /* => */ { BS3_FP64_INF(0), BS3_FP64_VAL(1, 0, 2), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1405 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1406 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_NEAREST,
1407 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE },
1408 /*12*/{ { /*src2 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1409 { /*src1 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1410 { /* => */ { BS3_FP64_VAL(1, 0, 2), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1411 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1412 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_ZERO,
1413 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE },
1414 /*13*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1415 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1416 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1417 /*mask */ X86_MXCSR_XCPT_MASK,
1418 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1419 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE },
1420 /*
1421 * Normals.
1422 */
1423 /*14*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_VAL_1(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1424 { /*src1 */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_VAL_1(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1425 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1426 /*mask */ ~X86_MXCSR_XCPT_MASK,
1427 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1428 /*flags */ 0 },
1429 /*15*/{ { /*src2 */ { BS3_FP64_VAL(0, 0, 0x409)/*1024*/, BS3_FP64_VAL(0, 0xb800000000000, 0x404)/*55*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1430 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x408)/* 512*/, BS3_FP64_VAL(0, 0xc000000000000, 0x401)/* 7*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1431 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x409)/*1536*/, BS3_FP64_VAL(0, 0xf000000000000, 0x404)/*62*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1432 /*mask */ X86_MXCSR_XCPT_MASK,
1433 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1434 /*flags */ 0 },
1435 /*16*/{ { /*src2 */ { BS3_FP64_VAL(0, 0x26580b4800000, 0x41d)/* 1234567890*/, BS3_FP64_VAL(0, 0xd6f3458800000, 0x41c)/*987654321*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1436 { /*src1 */ { BS3_FP64_VAL(1, 0x26580b4800000, 0x41d)/*-1234567890*/, BS3_FP64_VAL(1, 0x9000000000000, 0x405)/* -100*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1437 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6f3426800000, 0x41c)/*987654221*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1438 /*mask */ ~X86_MXCSR_XCPT_MASK,
1439 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1440 /*flags */ 0 },
1441 /*17*/{ { /*src2 */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS), BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1442 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1443 { /* => */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1444 /*mask */ X86_MXCSR_XCPT_MASK,
1445 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_ZERO,
1446 /*flags */ 0 },
1447 /*18*/{ { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1448 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1449 { /* => */ { BS3_FP64_VAL(0, 0, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS + 1), BS3_FP64_VAL(1, 0, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS + 1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1450 /*mask */ ~X86_MXCSR_XCPT_MASK,
1451 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1452 /*flags */ 0 },
1453 };
1454
1455 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1456 {
1457 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1458 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1459
1460 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1461 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1462
1463 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1464 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1465 };
1466 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1467 {
1468 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1469 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1470
1471 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1472 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1473
1474 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1475 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1476 };
1477 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1478 {
1479 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1480 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1481
1482 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1483 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1484
1485 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1486 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1487
1488 { bs3CpuInstrX_addpd_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE2, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1489 { bs3CpuInstrX_addpd_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1490
1491 { bs3CpuInstrX_vaddpd_YMM8_YMM9_YMM10_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1492// { bs3CpuInstrX_vaddpd_YMM8_YMM9_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1493
1494 };
1495
1496 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1497 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1498 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1499 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1500}
1501
1502
1503/**
1504 * The 32-bit protected mode main function.
1505 *
1506 * The tests a driven by 32-bit test drivers, even for real-mode tests (though
1507 * we'll switch between PE32 and RM for each test step we perform). Given that
1508 * we test SSE and AVX here, we don't need to worry about 286 or 8086.
1509 *
1510 * Some extra steps needs to be taken to properly handle extended state in LM64
1511 * (Bs3ExtCtxRestoreEx & Bs3ExtCtxSaveEx) and when testing real mode
1512 * (Bs3RegCtxSaveForMode & Bs3TrapSetJmpAndRestoreWithExtCtxAndRm).
1513 */
1514BS3_DECL(void) Main_pe32()
1515{
1516 static const BS3TESTMODEBYONEENTRY g_aTests[] =
1517 {
1518#if 1 /*ndef DEBUG_bird*/
1519# define ALL_TESTS
1520#endif
1521#if defined(ALL_TESTS)
1522 { "[v]addps", bs3CpuInstrX_v_addps, 0 },
1523 { "[v]addpd", bs3CpuInstrX_v_addpd, 0 },
1524#endif
1525 };
1526 Bs3TestInit("bs3-cpu-instr-4");
1527
1528 /*
1529 * Initialize globals.
1530 */
1531 if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
1532 {
1533 uint32_t fEbx, fEcx, fEdx;
1534 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1535 g_afTypeSupports[T_MMX] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_MMX);
1536 g_afTypeSupports[T_MMX_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1537 g_afTypeSupports[T_MMX_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1538 g_afTypeSupports[T_MMX_SSSE3] = RT_BOOL(fEdx & X86_CPUID_FEATURE_ECX_SSSE3);
1539 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1540 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1541 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3);
1542 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3);
1543 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1);
1544 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2);
1545 g_afTypeSupports[T_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL);
1546 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1547 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1548 g_afTypeSupports[T_AVX_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL)
1549 && RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1550
1551 if (ASMCpuId_EAX(0) >= 7)
1552 {
1553 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &fEbx, NULL, NULL);
1554 g_afTypeSupports[T_AVX2_128] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1555 g_afTypeSupports[T_AVX2_256] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1556 g_afTypeSupports[T_SHA] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_SHA);
1557 }
1558
1559 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
1560 {
1561 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1562 g_afTypeSupports[T_AXMMX] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
1563 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A);
1564 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);
1565 }
1566 g_afTypeSupports[T_AXMMX_OR_SSE] = g_afTypeSupports[T_AXMMX] || g_afTypeSupports[T_SSE];
1567
1568 /*
1569 * Figure out FPU save/restore method and support for DAZ bit.
1570 */
1571 {
1572 /** @todo Add bs3kit API to just get the ext ctx method without needing to
1573 * alloc/free a context. Replicating the logic in the bs3kit here, though
1574 * doable, runs a risk of not updating this when the other logic is
1575 * changed. */
1576 uint64_t fFlags;
1577 uint16_t const cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
1578 PBS3EXTCTX pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cbExtCtx);
1579 if (pExtCtx)
1580 {
1581 Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
1582 g_enmExtCtxMethod = pExtCtx->enmMethod;
1583 if ( ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_XSAVE
1584 && (pExtCtx->Ctx.x.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1585 || ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_FXSAVE)
1586 && (pExtCtx->Ctx.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1587 g_fMxCsrDazSupported = true;
1588 }
1589 else
1590 Bs3TestFailedF("Failed to allocate %u bytes for extended CPU context (tiled addressable)\n", cbExtCtx);
1591 }
1592
1593 /*
1594 * Allocate a buffer for testing.
1595 */
1596 g_cbBuf = X86_PAGE_SIZE * 4;
1597 g_pbBuf = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_REAL, g_cbBuf);
1598 if (g_pbBuf)
1599 {
1600 g_pbBufAliasAlloc = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_TILED, g_cbBuf);
1601 if (g_pbBufAliasAlloc)
1602 {
1603 /*
1604 * Do the tests.
1605 */
1606 Bs3TestDoModesByOne_pe32(g_aTests, RT_ELEMENTS(g_aTests), BS3TESTMODEBYONEENTRY_F_REAL_MODE_READY);
1607#ifdef BS3_SKIPIT_DO_SKIP
1608 bs3CpuInstrX_ShowTallies();
1609#endif
1610 }
1611 else
1612 Bs3TestFailed("Failed to allocate 16K alias buffer (tiled addressable)");
1613 }
1614 else
1615 Bs3TestFailed("Failed to allocate 16K buffer (real mode addressable)");
1616 }
1617
1618 Bs3TestTerm();
1619}
1620
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette