VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-4.c32@ 104756

Last change on this file since 104756 was 104756, checked in by vboxsync, 9 months ago

ValidationKit/bootsectors: bugref:10658 SIMD FP testcase: [V]ADDPD and nits.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 63.0 KB
Line 
1/* $Id: bs3-cpu-instr-4.c32 104756 2024-05-22 11:53:58Z vboxsync $ */
2/** @file
3 * BS3Kit - bs3-cpu-instr-4 - SSE, AVX FPU instructions, C code template.
4 */
5
6/*
7 * Copyright (C) 2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <bs3kit.h>
42#include "bs3-cpu-instr-4-asm-auto.h"
43
44#include <iprt/asm.h>
45#include <iprt/asm-amd64-x86.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** Converts an execution mode (BS3_MODE_XXX) into an index into an array
52 * initialized by BS3CPUINSTR4_TEST1_MODES_INIT etc. */
53#define BS3CPUINSTR4_TEST_MODES_INDEX(a_bMode) (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2)
54
55/** Maximum length for the names of all SIMD FP exception flags combined. */
56#define BS3_FP_XCPT_NAMES_MAXLEN sizeof(" IE DE ZE OE UE PE ")
57/** The max exponent value for a double-precision floating-point normal. */
58#define BS3_FP64_EXP_NORMAL_MAX 2046
59/** The min exponent value for a double-precision floating-point normal. */
60#define BS3_FP64_EXP_NORMAL_MIN 1
61/** The max fraction value for a double-precision floating-point normal. */
62#define BS3_FP64_FRACTION_NORMAL_MAX 0xfffffffffffff
63/** The min fraction value for a double-precision floating-point normal. */
64#define BS3_FP64_FRACTION_NORMAL_MIN 0
65
66#define BS3_FP64_NORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_NORMAL_MAX)
67#define BS3_FP64_NORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_NORMAL_MIN)
68#define BS3_FP64_ZERO(a_Sign) RTFLOAT64U_INIT_ZERO(a_Sign)
69#define BS3_FP64_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT64U_INIT_C(a_Sign, a_Frac, a_Exp)
70#define BS3_FP64_INF(a_Sign) RTFLOAT64U_INIT_INF(a_Sign)
71#define BS3_FP64_QNAN(a_Sign) RTFLOAT64U_INIT_QNAN(a_Sign)
72#define BS3_FP64_SNAN(a_Sign) RTFLOAT64U_INIT_SNAN(a_Sign)
73
74/*
75 * 64-bit floating normals.
76 * Fraction - 52 bits, all usable.
77 * Exponent - 11 bits, least significant bit MBZ.
78 */
79#define BS3_FP64_NORMAL_VAL_1(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xf10a7ab1ec01a, 0x4bc)
80#define BS3_FP64_NORMAL_VAL_2(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xca5cadea1b1ed, 0x3ae)
81#define BS3_FP64_NORMAL_VAL_3(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xb5b5b5b5b5b5b, 0xffe)
82
83
84/*********************************************************************************************************************************
85* Structures and Typedefs *
86*********************************************************************************************************************************/
87/** Instruction set type and operand width. */
88typedef enum BS3CPUINSTRX_INSTRTYPE_T
89{
90 T_INVALID,
91 T_MMX,
92 T_MMX_SSE, /**< MMX instruction, but require the SSE CPUID to work. */
93 T_MMX_SSE2, /**< MMX instruction, but require the SSE2 CPUID to work. */
94 T_MMX_SSSE3, /**< MMX instruction, but require the SSSE3 CPUID to work. */
95 T_AXMMX,
96 T_AXMMX_OR_SSE,
97 T_SSE,
98 T_128BITS = T_SSE,
99 T_SSE2,
100 T_SSE3,
101 T_SSSE3,
102 T_SSE4_1,
103 T_SSE4_2,
104 T_SSE4A,
105 T_PCLMUL,
106 T_SHA,
107 T_AVX_128,
108 T_AVX2_128,
109 T_AVX_PCLMUL,
110 T_AVX_256,
111 T_256BITS = T_AVX_256,
112 T_AVX2_256,
113 T_MAX
114} BS3CPUINSTRX_INSTRTYPE_T;
115
116/** Memory or register rm variant. */
117enum {
118 RM_REG = 0,
119 RM_MEM,
120 RM_MEM8, /**< Memory operand is 8 bits. Hack for movss and similar. */
121 RM_MEM16, /**< Memory operand is 16 bits. Hack for movss and similar. */
122 RM_MEM32, /**< Memory operand is 32 bits. Hack for movss and similar. */
123 RM_MEM64 /**< Memory operand is 64 bits. Hack for movss and similar. */
124};
125
126/**
127 * Execution environment configuration.
128 */
129typedef struct BS3CPUINSTR4_CONFIG_T
130{
131 uint16_t fCr0Mp : 1;
132 uint16_t fCr0Em : 1;
133 uint16_t fCr0Ts : 1;
134 uint16_t fCr4OsFxSR : 1;
135 uint16_t fCr4OsXSave : 1;
136 uint16_t fCr4OsXmmExcpt : 1;
137 uint16_t fXcr0Sse : 1;
138 uint16_t fXcr0Avx : 1;
139 uint16_t fAligned : 1; /**< Aligned mem operands. If 0, they will be misaligned and tests w/o mem operands skipped. */
140 uint16_t fAlignCheck : 1;
141 uint16_t fMxCsrMM : 1; /**< AMD only */
142 uint8_t bXcptSse;
143 uint8_t bXcptAvx;
144} BS3CPUINSTR4_CONFIG_T;
145/** Pointer to an execution environment configuration. */
146typedef BS3CPUINSTR4_CONFIG_T const BS3_FAR *PCBS3CPUINSTR4_CONFIG_T;
147
148/** State saved by bs3CpuInstr4ConfigReconfigure. */
149typedef struct BS3CPUINSTRX_CONFIG_SAVED_T
150{
151 uint32_t uCr0;
152 uint32_t uCr4;
153 uint32_t uEfl;
154 uint16_t uFcw;
155 uint16_t uFsw;
156 uint32_t uMxCsr;
157} BS3CPUINSTRX_CONFIG_SAVED_T;
158typedef BS3CPUINSTRX_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTRX_CONFIG_SAVED_T;
159typedef BS3CPUINSTRX_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTRX_CONFIG_SAVED_T;
160
161/**
162 * YMM packed double-precision floating-point register.
163 * @todo move to x86.h?
164 */
165typedef union X86YMMFLOATPDREG
166{
167 /** Packed double-precision floating-point view. */
168 RTFLOAT64U ar64[4];
169 /** 256-bit integer view. */
170 RTUINT256U ymm;
171} X86YMMFLOATPDREG;
172# ifndef VBOX_FOR_DTRACE_LIB
173AssertCompileSize(X86YMMFLOATPDREG, 32);
174# endif
175/** Pointer to a YMM packed floating-point register. */
176typedef X86YMMFLOATPDREG BS3_FAR *PX86YMMFLOATPDREG;
177/** Pointer to a const YMM packed floating-point register. */
178typedef X86YMMFLOATPDREG const BS3_FAR *PCX86YMMFLOATPDREG;
179
180/**
181 * YMM packed single-precision floating-point register.
182 * @todo move to x86.h?
183 */
184typedef union X86YMMFLOATPSREG
185{
186 /** Packed single-precision floating-point view. */
187 RTFLOAT32U ar32[8];
188 /** 256-bit integer view. */
189 RTUINT256U ymm;
190} X86YMMFLOATPSREG;
191# ifndef VBOX_FOR_DTRACE_LIB
192AssertCompileSize(X86YMMFLOATPSREG, 32);
193# endif
194/** Pointer to a YMM packed single-precision floating-point register. */
195typedef X86YMMFLOATPSREG BS3_FAR *PX86YMMFLOATPSREG;
196/** Pointer to a const YMM single-precision packed floating-point register. */
197typedef X86YMMFLOATPSREG const BS3_FAR *PCX86YMMFLOATPSREG;
198
199/**
200 * YMM scalar quadruple-precision floating-point register.
201 * @todo move to x86.h?
202 */
203typedef union X86YMMFLOATSQREG
204{
205 /** Scalar quadruple-precision floating point view. */
206 RTFLOAT128U ar128[2];
207 /** 256-bit integer view. */
208 RTUINT256U ymm;
209} X86YMMFLOATSQREG;
210# ifndef VBOX_FOR_DTRACE_LIB
211AssertCompileSize(X86YMMFLOATSQREG, 32);
212# endif
213/** Pointer to a YMM scalar quadruple-precision floating-point register. */
214typedef X86YMMFLOATSQREG *PX86YMMFLOATSQREG;
215/** Pointer to a const YMM scalar quadruple-precision floating-point register. */
216typedef X86YMMFLOATSQREG const *PCX86YMMFLOATSQREG;
217
218
219/*********************************************************************************************************************************
220* Global Variables *
221*********************************************************************************************************************************/
222static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false, false };
223static bool g_fAmdMisalignedSse = false;
224static uint8_t g_enmExtCtxMethod = BS3EXTCTXMETHOD_INVALID;
225static bool g_fMxCsrDazSupported = false;
226
227/** Zero value (indexed by fSign). */
228RTFLOAT32U const g_ar32Zero[] = { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) };
229RTFLOAT64U const g_ar64Zero[] = { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) };
230
231/** One value (indexed by fSign). */
232RTFLOAT32U const g_ar32One[] = { RTFLOAT32U_INIT_C(0, 0, RTFLOAT32U_EXP_BIAS),
233 RTFLOAT32U_INIT_C(1, 0, RTFLOAT32U_EXP_BIAS) };
234RTFLOAT64U const g_ar64One[] = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_BIAS),
235 RTFLOAT64U_INIT_C(1, 0, RTFLOAT64U_EXP_BIAS) };
236
237/** Infinity (indexed by fSign). */
238RTFLOAT32U const g_ar32Infinity[] = { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) };
239RTFLOAT64U const g_ar64Infinity[] = { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) };
240
241/** Default QNaNs (indexed by fSign). */
242RTFLOAT32U const g_ar32QNaN[] = { RTFLOAT32U_INIT_QNAN(0), RTFLOAT32U_INIT_QNAN(1) };
243RTFLOAT64U const g_ar64QNaN[] = { RTFLOAT64U_INIT_QNAN(0), RTFLOAT64U_INIT_QNAN(1) };
244
245/** Size of g_pbBuf - at least three pages. */
246static uint32_t g_cbBuf;
247/** Buffer of g_cbBuf size. */
248static uint8_t BS3_FAR *g_pbBuf;
249/** RW alias for the buffer memory at g_pbBuf. Set up by bs3CpuInstrXBufSetup. */
250static uint8_t BS3_FAR *g_pbBufAlias;
251/** RW alias for the memory at g_pbBuf. */
252static uint8_t BS3_FAR *g_pbBufAliasAlloc;
253
254/** Exception type \#1 test configurations, 16 & 32 bytes strictly aligned. */
255static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig1[] =
256{
257/*
258 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
259 * +AVX +AVX +AMD/SSE +AMD/SSE
260 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
261 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
262 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
263 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
264 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
265 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
266 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
267 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
268 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
269 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
270 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
271 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
272 /* Memory misalignment and alignment checks: */
273 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_GP }, /* #10 */
274 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_GP }, /* #11 */
275 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
276 /* AMD only: */
277 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
278 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
279};
280
281
282/**
283 * Returns the name of an X86 exception given the vector.
284 *
285 * @returns Name of the exception.
286 * @param uVector The exception vector.
287 */
288static const char BS3_FAR *bs3CpuInstr4XcptName(uint8_t uVector)
289{
290 switch (uVector)
291 {
292 case X86_XCPT_DE: return "#DE";
293 case X86_XCPT_DB: return "#DB";
294 case X86_XCPT_NMI: return "#NMI";
295 case X86_XCPT_BP: return "#BP";
296 case X86_XCPT_OF: return "#OF";
297 case X86_XCPT_BR: return "#BR";
298 case X86_XCPT_UD: return "#UD";
299 case X86_XCPT_NM: return "#NM";
300 case X86_XCPT_DF: return "#DF";
301 case X86_XCPT_CO_SEG_OVERRUN: return "#CO_SEG_OVERRUN";
302 case X86_XCPT_TS: return "#TS";
303 case X86_XCPT_NP: return "#NP";
304 case X86_XCPT_SS: return "#SS";
305 case X86_XCPT_GP: return "#GP";
306 case X86_XCPT_PF: return "#PF";
307 case X86_XCPT_MF: return "#MF";
308 case X86_XCPT_AC: return "#AC";
309 case X86_XCPT_MC: return "#MC";
310 case X86_XCPT_XF: return "#XF";
311 case X86_XCPT_VE: return "#VE";
312 case X86_XCPT_CP: return "#CP";
313 case X86_XCPT_VC: return "#VC";
314 case X86_XCPT_SX: return "#SX";
315 }
316 return "UNKNOWN";
317}
318
319
320/**
321 * Gets the names of floating-point exception flags that are set for a given MXCSR.
322 *
323 * @returns Names of floating-point exception flags that are set.
324 * @param pszBuf Where to store the floating-point exception flags.
325 * @param cchBuf The size of the buffer.
326 * @param fMxCsr The MXCSR value.
327 */
328static size_t bs3CpuInstr4GetXcptFlags(char BS3_FAR *pszBuf, size_t cchBuf, uint32_t fMxCsr)
329{
330 if (!(fMxCsr & X86_MXCSR_XCPT_FLAGS))
331 return Bs3StrPrintf(pszBuf, cchBuf, " None");
332 return Bs3StrPrintf(pszBuf, cchBuf, "%s%s%s%s%s%s", fMxCsr & X86_MXCSR_IE ? " IE" : "", fMxCsr & X86_MXCSR_DE ? " DE" : "",
333 fMxCsr & X86_MXCSR_ZE ? " ZE" : "", fMxCsr & X86_MXCSR_OE ? " OE" : "",
334 fMxCsr & X86_MXCSR_UE ? " UE" : "", fMxCsr & X86_MXCSR_PE ? " PE" : "");
335}
336
337
338/**
339 * Reconfigures the execution environment according to @a pConfig.
340 *
341 * Call bs3CpuInstrXConfigRestore to undo the changes.
342 *
343 * @returns true on success, false if the configuration cannot be applied. In
344 * the latter case, no context changes are made.
345 * @param pSavedCfg Where to save state we modify.
346 * @param pCtx The register context to modify.
347 * @param pExtCtx The extended register context to modify.
348 * @param pConfig The configuration to apply.
349 * @param bMode The target mode.
350 */
351static bool bs3CpuInstr4ConfigReconfigure(PBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx,
352 PCBS3CPUINSTR4_CONFIG_T pConfig, uint8_t bMode)
353{
354 /*
355 * Save context bits we may change here
356 */
357 pSavedCfg->uCr0 = pCtx->cr0.u32;
358 pSavedCfg->uCr4 = pCtx->cr4.u32;
359 pSavedCfg->uEfl = pCtx->rflags.u32;
360 pSavedCfg->uFcw = Bs3ExtCtxGetFcw(pExtCtx);
361 pSavedCfg->uFsw = Bs3ExtCtxGetFsw(pExtCtx);
362 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx);
363
364 /*
365 * Can we make these changes?
366 */
367 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse)
368 return false;
369
370 /*
371 * Modify the test context.
372 */
373 if (pConfig->fCr0Mp)
374 pCtx->cr0.u32 |= X86_CR0_MP;
375 else
376 pCtx->cr0.u32 &= ~X86_CR0_MP;
377 if (pConfig->fCr0Em)
378 pCtx->cr0.u32 |= X86_CR0_EM;
379 else
380 pCtx->cr0.u32 &= ~X86_CR0_EM;
381 if (pConfig->fCr0Ts)
382 pCtx->cr0.u32 |= X86_CR0_TS;
383 else
384 pCtx->cr0.u32 &= ~X86_CR0_TS;
385
386 if (pConfig->fCr4OsFxSR)
387 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
388 else
389 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
390
391 if (pConfig->fCr4OsXmmExcpt && g_afTypeSupports[T_SSE])
392 pCtx->cr4.u32 |= X86_CR4_OSXMMEEXCPT;
393 else
394 pCtx->cr4.u32 &= ~X86_CR4_OSXMMEEXCPT;
395
396 if (pConfig->fCr4OsFxSR)
397 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
398 else
399 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
400
401 if (pConfig->fCr4OsXSave)
402 pCtx->cr4.u32 |= X86_CR4_OSXSAVE;
403 else
404 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE;
405
406 if (pConfig->fXcr0Sse)
407 pExtCtx->fXcr0Saved |= XSAVE_C_SSE;
408 else
409 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE;
410 if (pConfig->fXcr0Avx && g_afTypeSupports[T_AVX_256])
411 pExtCtx->fXcr0Saved |= XSAVE_C_YMM;
412 else
413 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM;
414
415 if (pConfig->fAlignCheck)
416 {
417 pCtx->rflags.u32 |= X86_EFL_AC;
418 pCtx->cr0.u32 |= X86_CR0_AM;
419 }
420 else
421 {
422 pCtx->rflags.u32 &= ~X86_EFL_AC;
423 pCtx->cr0.u32 &= ~X86_CR0_AM;
424 }
425
426 /** @todo Can we remove this? x87 FPU and SIMD are independent. */
427 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw & ~(X86_FSW_ES | X86_FSW_B));
428
429 if (pConfig->fMxCsrMM)
430 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM);
431 else
432 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM);
433 return true;
434}
435
436
437/**
438 * Undoes changes made by bs3CpuInstr4ConfigReconfigure.
439 */
440static void bs3CpuInstrXConfigRestore(PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx)
441{
442 pCtx->cr0.u32 = pSavedCfg->uCr0;
443 pCtx->cr4.u32 = pSavedCfg->uCr4;
444 pCtx->rflags.u32 = pSavedCfg->uEfl;
445 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal;
446 Bs3ExtCtxSetFcw(pExtCtx, pSavedCfg->uFcw);
447 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw);
448 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr);
449}
450
451
452/**
453 * Allocates three extended CPU contexts and initializes the first one
454 * with random data.
455 * @returns First extended context, initialized with randomish data. NULL on
456 * failure (complained).
457 * @param ppExtCtx2 Where to return the 2nd context.
458 */
459static PBS3EXTCTX bs3CpuInstrXAllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2)
460{
461 /* Allocate extended context structures. */
462 uint64_t fFlags;
463 uint16_t cb = Bs3ExtCtxGetSize(&fFlags);
464 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 3);
465 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb);
466 if (pExtCtx1)
467 {
468 Bs3ExtCtxInit(pExtCtx1, cb, fFlags);
469 /** @todo populate with semi-random stuff. */
470
471 Bs3ExtCtxInit(pExtCtx2, cb, fFlags);
472 *ppExtCtx2 = pExtCtx2;
473 return pExtCtx1;
474 }
475 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2);
476 *ppExtCtx2 = NULL;
477 return NULL;
478}
479
480
481/**
482 * Frees the extended CPU contexts allocated by bs3CpuInstrXAllocExtCtxs.
483 *
484 * @param pExtCtx1 The first extended context.
485 * @param pExtCtx2 The second extended context.
486 */
487static void bs3CpuInstrXFreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2)
488{
489 RT_NOREF_PV(pExtCtx2);
490 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2);
491}
492
493
494/**
495 * Sets up SSE and AVX bits relevant for FPU instructions.
496 */
497static void bs3CpuInstr4SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx)
498{
499 /* CR0: */
500 uint32_t cr0 = Bs3RegGetCr0();
501 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
502 cr0 |= X86_CR0_NE;
503 Bs3RegSetCr0(cr0);
504
505 /* If real mode context, the cr0 value will differ from the current one (we're in PE32 mode). */
506 pCtx->cr0.u32 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
507 pCtx->cr0.u32 |= X86_CR0_NE;
508
509 /* CR4: */
510 BS3_ASSERT( pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
511 || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE);
512 {
513 uint32_t cr4 = Bs3RegGetCr4();
514 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
515 {
516 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
517 Bs3RegSetCr4(cr4);
518 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal);
519 }
520 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
521 {
522 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
523 Bs3RegSetCr4(cr4);
524 }
525 pCtx->cr4.u32 = cr4;
526 }
527}
528
529
530/**
531 * Configures the buffer with electric fences in paged modes.
532 *
533 * @returns Adjusted buffer pointer.
534 * @param pbBuf The buffer pointer.
535 * @param pcbBuf Pointer to the buffer size (input & output).
536 * @param bMode The testing target mode.
537 */
538DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufSetup(uint8_t BS3_FAR *pbBuf, uint32_t *pcbBuf, uint8_t bMode)
539{
540 if (BS3_MODE_IS_PAGED(bMode))
541 {
542 int rc;
543 uint32_t cbBuf = *pcbBuf;
544 Bs3PagingProtectPtr(&pbBuf[0], X86_PAGE_SIZE, 0, X86_PTE_P);
545 Bs3PagingProtectPtr(&pbBuf[cbBuf - X86_PAGE_SIZE], X86_PAGE_SIZE, 0, X86_PTE_P);
546 pbBuf += X86_PAGE_SIZE;
547 cbBuf -= X86_PAGE_SIZE * 2;
548 *pcbBuf = cbBuf;
549
550 g_pbBufAlias = g_pbBufAliasAlloc;
551 rc = Bs3PagingAlias((uintptr_t)g_pbBufAlias, (uintptr_t)pbBuf, cbBuf + X86_PAGE_SIZE, /* must include the tail guard pg */
552 X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW);
553 if (RT_FAILURE(rc))
554 Bs3TestFailedF("Bs3PagingAlias failed on %p/%p LB %#x: %d", g_pbBufAlias, pbBuf, cbBuf, rc);
555 }
556 else
557 g_pbBufAlias = pbBuf;
558 return pbBuf;
559}
560
561
562/**
563 * Undoes what bs3CpuInstrXBufSetup did.
564 *
565 * @param pbBuf The buffer pointer.
566 * @param cbBuf The buffer size.
567 * @param bMode The testing target mode.
568 */
569DECLINLINE(void) bs3CpuInstrXBufCleanup(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t bMode)
570{
571 if (BS3_MODE_IS_PAGED(bMode))
572 {
573 Bs3PagingProtectPtr(&pbBuf[-X86_PAGE_SIZE], X86_PAGE_SIZE, X86_PTE_P, 0);
574 Bs3PagingProtectPtr(&pbBuf[cbBuf], X86_PAGE_SIZE, X86_PTE_P, 0);
575 }
576}
577
578
579/**
580 * Gets a buffer of a @a cbMemOp sized operand according to the given
581 * configuration and alignment restrictions.
582 *
583 * @returns Pointer to the buffer.
584 * @param pbBuf The buffer pointer.
585 * @param cbBuf The buffer size.
586 * @param cbMemOp The operand size.
587 * @param cbAlign The operand alignment restriction.
588 * @param pConfig The configuration.
589 * @param fPageFault The \#PF test setting.
590 */
591DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufForOperand(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t cbMemOp, uint8_t cbAlign,
592 PCBS3CPUINSTR4_CONFIG_T pConfig, unsigned fPageFault)
593{
594 /* All allocations are at the tail end of the buffer, so that we've got a
595 guard page following the operand. When asked to consistenly trigger
596 a #PF, we slide the buffer into that guard page. */
597 if (fPageFault)
598 cbBuf += X86_PAGE_SIZE;
599
600 if (pConfig->fAligned)
601 {
602 if (!pConfig->fAlignCheck)
603 return &pbBuf[cbBuf - cbMemOp];
604 return &pbBuf[cbBuf - cbMemOp - cbAlign];
605 }
606 return &pbBuf[cbBuf - cbMemOp - 1];
607}
608
609
610/**
611 * Determins the size of memory operands.
612 */
613DECLINLINE(uint8_t) bs3CpuInstrXMemOpSize(uint8_t cbOperand, uint8_t enmRm)
614{
615 if (enmRm <= RM_MEM)
616 return cbOperand;
617 if (enmRm == RM_MEM8)
618 return sizeof(uint8_t);
619 if (enmRm == RM_MEM16)
620 return sizeof(uint16_t);
621 if (enmRm == RM_MEM32)
622 return sizeof(uint32_t);
623 if (enmRm == RM_MEM64)
624 return sizeof(uint64_t);
625 BS3_ASSERT(0);
626 return cbOperand;
627}
628
629
630/*
631 * Code to make testing the tests faster. `bs3CpuInstrX_SkipIt()' randomly
632 * skips a large fraction of the micro-tests. It is sufficiently random
633 * that over a large number of runs, all micro-tests will be hit.
634 *
635 * This improves the runtime of the worst case (`#define ALL_TESTS' on a
636 * debug build, run with '--execute-all-in-iem') from ~9000 to ~800 seconds
637 * (on an Intel Core i7-10700, fwiw).
638 *
639 * To activate this 'developer's speed-testing mode', turn on
640 * `#define BS3_SKIPIT_DO_SKIP' here.
641 *
642 * BS3_SKIPIT_AVG_SKIP governs approximately how many micro-tests are
643 * skipped in a row; e.g. the default of 26 means about every 27th
644 * micro-test is run during a particular test run. (This is not 27x
645 * faster due to other activities which are not skipped!) Note this is
646 * only an average; the actual skips are random.
647 *
648 * You can also modify bs3CpuInstrX_SkipIt() to focus on specific sub-tests,
649 * using its (currently ignored) `bRing, iCfg, iTest, iVal, iVariant' args
650 * (to enable this: turn on `#define BS3_SKIPIT_DO_ARGS': which costs about
651 * 3% performance).
652 *
653 * Note! The skipping is not compatible with testing the native recompiler as
654 * it requires the test code to be run a number of times before it kicks
655 * in and does the native recompilation (currently around 16 times).
656 */
657#define BS3_SKIPIT_AVG_SKIP 26
658#define BS3_SKIPIT_REPORT_COUNT 150000
659#undef BS3_SKIPIT_DO_SKIP
660#undef BS3_SKIPIT_DO_ARGS
661
662#ifndef BS3_SKIPIT_DO_SKIP
663# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) (false)
664#else
665# include <iprt/asm-amd64-x86.h>
666# include <iprt/asm-math.h>
667
668DECLINLINE(uint32_t) bs3CpuInstrX_SimpleRand(void)
669{
670 /*
671 * A simple Lehmer linear congruential pseudo-random number
672 * generator using the constants suggested by Park & Miller:
673 *
674 * modulus = 2^31 - 1 (INT32_MAX)
675 * multiplier = 7^5 (16807)
676 *
677 * It produces numbers in the range [1..INT32_MAX-1] and is
678 * more chaotic in the higher bits.
679 *
680 * Note! Runtime/common/rand/randparkmiller.cpp is also use this algorithm,
681 * though the zero handling is different.
682 */
683 static uint32_t s_uSeedMemory = 0;
684 uint32_t uVal = s_uSeedMemory;
685 if (!uVal)
686 uVal = (uint32_t)ASMReadTSC();
687 uVal = ASMModU64ByU32RetU32(ASMMult2xU32RetU64(uVal, 16807), INT32_MAX);
688 s_uSeedMemory = uVal;
689 return uVal;
690}
691
692static unsigned g_cSeen, g_cSkipped;
693
694static void bs3CpuInstrX_ShowTallies(void)
695{
696 Bs3TestPrintf("Micro-tests %d: tested %d / skipped %d\n", g_cSeen, g_cSeen - g_cSkipped, g_cSkipped);
697}
698
699# ifdef BS3_SKIPIT_DO_ARGS
700# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt(bRing, iCfg, iTest, iVal, iVariant)
701static bool bs3CpuInstrX_SkipIt(uint8_t bRing, unsigned iCfg, unsigned iTest, unsigned iVal, unsigned iVariant)
702# else
703# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt()
704static bool bs3CpuInstrX_SkipIt(void)
705# endif
706{
707 static unsigned s_uTimes = 0;
708 bool fSkip;
709
710 /* Cache calls to the relatively expensive random routine */
711 if (!s_uTimes)
712 s_uTimes = bs3CpuInstrX_SimpleRand() % (BS3_SKIPIT_AVG_SKIP * 2 + 1) + 1;
713 fSkip = --s_uTimes > 0;
714 if (fSkip)
715 ++g_cSkipped;
716
717 if (++g_cSeen % BS3_SKIPIT_REPORT_COUNT == 0)
718 bs3CpuInstrX_ShowTallies();
719 return fSkip;
720}
721
722#endif /* BS3_SKIPIT_DO_SKIP */
723
724/*
725 * Test type #1.
726 * Generic YMM registers.
727 */
728typedef struct BS3CPUINSTR4_TEST1_VALUES_T
729{
730 X86YMMREG uSrc2; /**< Second source operand. */
731 X86YMMREG uSrc1; /**< uDstIn for SSE */
732 X86YMMREG uDstOut; /**< Destination output. */
733 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
734 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
735 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
736 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
737 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
738} BS3CPUINSTR4_TEST1_VALUES_T;
739
740/*
741 * Test type #1.
742 * Packed single-precision.
743 */
744typedef struct BS3CPUINSTR4_TEST1_VALUES_PS_T
745{
746 X86YMMFLOATPSREG uSrc2; /**< Second source operand. */
747 X86YMMFLOATPSREG uSrc1; /**< uDstIn for SSE */
748 X86YMMFLOATPSREG uDstOut; /**< Destination output. */
749 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
750 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
751 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
752 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
753 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
754} BS3CPUINSTR4_TEST1_VALUES_PS_T;
755AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PS_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
756AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
757AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
758AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
759AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
760AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
761AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
762AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
763AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
764
765/*
766 * Test type #1.
767 * Packed double-precision.
768 */
769typedef struct BS3CPUINSTR4_TEST1_VALUES_PD_T
770{
771 X86YMMFLOATPDREG uSrc2; /**< Second source operand. */
772 X86YMMFLOATPDREG uSrc1; /**< uDstIn for SSE */
773 X86YMMFLOATPDREG uDstOut; /**< Destination output. */
774 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
775 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
776 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
777 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
778 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
779} BS3CPUINSTR4_TEST1_VALUES_PD_T;
780AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PD_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
781AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
782AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
783AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
784AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
785AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
786AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
787AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
788AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
789
790/*
791 * Test type #1.
792 * Scalar quadruple-precision.
793 */
794typedef struct BS3CPUINSTR4_TEST1_VALUES_SQ_T
795{
796 X86YMMFLOATSQREG uSrc2; /**< Second source operand. */
797 X86YMMFLOATSQREG uSrc1; /**< uDstIn for SSE */
798 X86YMMFLOATSQREG uDstOut; /**< Destination output. */
799 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
800 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
801 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
802 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
803 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
804} BS3CPUINSTR4_TEST1_VALUES_SQ_T;
805AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_SQ_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
806AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
807AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
808AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
809AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
810AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
811AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
812AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
813AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
814
815typedef struct BS3CPUINSTR4_TEST1_T
816{
817 FPFNBS3FAR pfnWorker; /**< Test function worker. */
818 uint8_t bAvxMisalignXcpt; /**< AVX misalignment exception. */
819 uint8_t enmRm; /**< R/M type. */
820 uint8_t enmType; /**< CPU instruction type (see T_XXX). */
821 uint8_t iRegDst; /**< Index of destination register, UINT8_MAX if N/A. */
822 uint8_t iRegSrc1; /**< Index of first source register, UINT8_MAX if N/A. */
823 uint8_t iRegSrc2; /**< Index of second source register, UINT8_MAX if N/A. */
824 uint8_t cValues; /**< Number of test values in @c paValues. */
825 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *paValues; /**< Test values. */
826} BS3CPUINSTR4_TEST1_T;
827
828typedef struct BS3CPUINSTR4_TEST1_MODE_T
829{
830 BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests;
831 unsigned cTests;
832} BS3CPUINSTR4_TEST1_MODE_T;
833
834/** Initializer for a BS3CPUINSTR4_TEST1_MODE_T array (three entries). */
835#define BS3CPUINSTR4_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \
836 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } }
837
838typedef struct BS3CPUINSTR4_TEST1_CTX_T
839{
840 BS3CPUINSTR4_CONFIG_T const BS3_FAR *pConfig;
841 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest;
842 unsigned iVal;
843 const char BS3_FAR *pszMode;
844 PBS3TRAPFRAME pTrapFrame;
845 PBS3REGCTX pCtx;
846 PBS3EXTCTX pExtCtx;
847 PBS3EXTCTX pExtCtxOut;
848 uint8_t BS3_FAR *puMemOp;
849 uint8_t BS3_FAR *puMemOpAlias;
850 uint8_t cbMemOp;
851 uint8_t cbOperand;
852 uint8_t cbInstr;
853 uint8_t bXcptExpect;
854 bool fSseInstr;
855 bool fAvxInstr;
856 uint16_t idTestStep;
857} BS3CPUINSTR4_TEST1_CTX_T;
858/** Pointer to a test 1 context. */
859typedef BS3CPUINSTR4_TEST1_CTX_T BS3_FAR *PBS3CPUINSTR4_TEST1_CTX_T;
860
861
862/**
863 * Worker for bs3CpuInstrX_WorkerTestType1.
864 */
865static uint16_t bs3CpuInstr4_WorkerTestType1_Inner(uint8_t bMode, PBS3CPUINSTR4_TEST1_CTX_T pTestCtx,
866 PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg)
867{
868 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = pTestCtx->pTest;
869 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *pValues = &pTestCtx->pTest->paValues[pTestCtx->iVal];
870 PBS3TRAPFRAME pTrapFrame = pTestCtx->pTrapFrame;
871 PBS3REGCTX pCtx = pTestCtx->pCtx;
872 PBS3EXTCTX pExtCtx = pTestCtx->pExtCtx;
873 PBS3EXTCTX pExtCtxOut = pTestCtx->pExtCtxOut;
874 uint8_t BS3_FAR *puMemOp = pTestCtx->puMemOp;
875 uint8_t BS3_FAR *puMemOpAlias = pTestCtx->puMemOpAlias;
876 uint8_t cbMemOp = pTestCtx->cbMemOp;
877 uint8_t const cbOperand = pTestCtx->cbOperand;
878 uint8_t const cbInstr = ((uint8_t const BS3_FAR *)(uintptr_t)pTestCtx->pTest->pfnWorker)[-1];
879 uint8_t bXcptExpect = pTestCtx->bXcptExpect;
880 uint8_t const bFpXcpt = pTestCtx->pConfig->fCr4OsXmmExcpt ? X86_XCPT_XF : X86_XCPT_UD;
881 bool const fFpFlagsExpect = RT_BOOL( (pValues->fExpectedMxCsrFlags
882 & (~pValues->fMxCsrMask >> X86_MXCSR_XCPT_MASK_SHIFT)) & X86_MXCSR_XCPT_FLAGS);
883 uint32_t uMxCsr;
884 X86YMMREG MemOpExpect;
885 uint16_t cErrors;
886
887 /*
888 * Set up the context and some expectations.
889 */
890 /* Destination. */
891 Bs3MemZero(&MemOpExpect, sizeof(MemOpExpect));
892 if (pTest->iRegDst == UINT8_MAX)
893 {
894 BS3_ASSERT(pTest->enmRm >= RM_MEM);
895 Bs3MemSet(puMemOpAlias, 0xcc, cbMemOp);
896 if (bXcptExpect == X86_XCPT_DB)
897 MemOpExpect.ymm = pValues->uDstOut.ymm;
898 else
899 Bs3MemSet(&MemOpExpect, 0xcc, sizeof(MemOpExpect));
900 }
901
902 /* Source #1 (/ destination for SSE). */
903 if (pTest->iRegSrc1 == UINT8_MAX)
904 {
905 BS3_ASSERT(pTest->enmRm >= RM_MEM);
906 Bs3MemCpy(puMemOpAlias, &pValues->uSrc1, cbMemOp);
907 if (pTest->iRegDst == UINT8_MAX)
908 BS3_ASSERT(pTestCtx->fSseInstr);
909 else
910 MemOpExpect.ymm = pValues->uSrc1.ymm;
911 }
912 else if (pTestCtx->fSseInstr)
913 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm.DQWords.dqw0);
914 else
915 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm, 32);
916
917 /* Source #2. */
918 if (pTest->iRegSrc2 == UINT8_MAX)
919 {
920 BS3_ASSERT(pTest->enmRm >= RM_MEM);
921 BS3_ASSERT(pTest->iRegDst != UINT8_MAX && pTest->iRegSrc1 != UINT8_MAX);
922 Bs3MemCpy(puMemOpAlias, &pValues->uSrc2, cbMemOp);
923 MemOpExpect.ymm = pValues->uSrc2.ymm;
924 }
925 else if (pTestCtx->fSseInstr)
926 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm.DQWords.dqw0);
927 else
928 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm, 32);
929
930 /* Memory pointer. */
931 if (pTest->enmRm >= RM_MEM)
932 {
933 BS3_ASSERT( pTest->iRegDst == UINT8_MAX
934 || pTest->iRegSrc1 == UINT8_MAX
935 || pTest->iRegSrc2 == UINT8_MAX);
936 Bs3RegCtxSetGrpSegFromCurPtr(pCtx, &pCtx->rbx, &pCtx->fs, puMemOp);
937 }
938
939 /* Setup MXCSR for the current test. */
940 uMxCsr = (pSavedCfg->uMxCsr & ~(X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK))
941 | (pValues->fMxCsrMask & X86_MXCSR_XCPT_MASK)
942 | (pValues->fRoundingCtlMask & X86_MXCSR_RC_MASK);
943 if ( pValues->fDenormalsAreZero
944 && g_fMxCsrDazSupported)
945 uMxCsr |= X86_MXCSR_DAZ;
946 if (pValues->fFlushToZero)
947 uMxCsr |= X86_MXCSR_FZ;
948 Bs3ExtCtxSetMxCsr(pExtCtx, uMxCsr);
949
950 /*
951 * Prepare globals and execute.
952 */
953 g_uBs3TrapEipHint = pCtx->rip.u32;
954 if ( bXcptExpect == X86_XCPT_DB
955 && !fFpFlagsExpect)
956 g_uBs3TrapEipHint += cbInstr + 1;
957 Bs3TrapSetJmpAndRestoreWithExtCtxAndRm(pCtx, pExtCtx, pTrapFrame, pExtCtxOut);
958
959 /*
960 * Check the result.
961 */
962 cErrors = Bs3TestSubErrorCount();
963 if ( bXcptExpect == X86_XCPT_DB
964 && pTest->iRegDst != UINT8_MAX)
965 {
966 if (pTestCtx->fSseInstr)
967 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm.DQWords.dqw0);
968 else
969 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm, cbOperand);
970 }
971#if defined(DEBUG_aeichner) /** @todo Necessary kludge on a i7-1068NG7. */
972 if ( pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE
973 && pExtCtx->Ctx.x.Hdr.bmXState == 0x7
974 && pExtCtxOut->Ctx.x.Hdr.bmXState == 0x3)
975 pExtCtxOut->Ctx.x.Hdr.bmXState = 0x7;
976#endif
977 if (bXcptExpect == X86_XCPT_DB)
978 Bs3ExtCtxSetMxCsr(pExtCtx, (uMxCsr & ~X86_MXCSR_XCPT_FLAGS)
979 | (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS));
980 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pTestCtx->pszMode, pTestCtx->idTestStep);
981
982 if (bXcptExpect == X86_XCPT_DB)
983 {
984 uint32_t const fMxCsrXcptFlags = Bs3ExtCtxGetMxCsr(pExtCtxOut) & X86_MXCSR_XCPT_FLAGS;
985
986 /* Check if the SIMD FP exception flags (or lack of) are as expected. */
987 if (fMxCsrXcptFlags != (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS))
988 {
989 char szGotBuf[BS3_FP_XCPT_NAMES_MAXLEN];
990 char szExpectBuf[BS3_FP_XCPT_NAMES_MAXLEN];
991 bs3CpuInstr4GetXcptFlags(&szExpectBuf[0], sizeof(szExpectBuf), pValues->fExpectedMxCsrFlags);
992 bs3CpuInstr4GetXcptFlags(&szGotBuf[0], sizeof(szGotBuf), fMxCsrXcptFlags);
993 Bs3TestFailedF("Expected floating-point xcpt flags%s, got%s", szExpectBuf, szGotBuf);
994 }
995
996 /* Check if the SIMD FP exception (or lack of) is as expected. */
997 if (fFpFlagsExpect)
998 {
999 if (pTrapFrame->bXcpt == bFpXcpt)
1000 { /* likely */ }
1001 else
1002 Bs3TestFailedF("Expected floating-point xcpt %s, got %s", bs3CpuInstr4XcptName(bFpXcpt),
1003 bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1004 }
1005 else if (pTrapFrame->bXcpt == X86_XCPT_DB)
1006 { /* likely */ }
1007 else
1008 Bs3TestFailedF("Expected no xcpt, got %s", bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1009 }
1010 /* Check if non-FP exception is as expected. */
1011 else if (pTrapFrame->bXcpt != bXcptExpect)
1012 Bs3TestFailedF("Expected xcpt %s, got %s", bs3CpuInstr4XcptName(bXcptExpect), bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1013
1014 /* Kludge! Looks like EFLAGS.AC is cleared when raising #GP in real mode on the 10980XE. WEIRD! */
1015 if (bMode == BS3_MODE_RM && (pCtx->rflags.u32 & X86_EFL_AC))
1016 {
1017 if (pTrapFrame->Ctx.rflags.u32 & X86_EFL_AC)
1018 Bs3TestFailedF("Expected EFLAGS.AC to be cleared (bXcpt=%d)", pTrapFrame->bXcpt);
1019 pTrapFrame->Ctx.rflags.u32 |= X86_EFL_AC;
1020 }
1021 if (bXcptExpect == X86_XCPT_PF)
1022 pCtx->cr2.u = (uintptr_t)puMemOp;
1023 Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pCtx, bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect ? cbInstr + 1 : 0, 0 /*cbSpAdjust*/,
1024 (bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect) || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF,
1025 pTestCtx->pszMode, pTestCtx->idTestStep);
1026 pCtx->cr2.u = 0;
1027
1028 if ( pTest->enmRm >= RM_MEM
1029 && Bs3MemCmp(puMemOpAlias, &MemOpExpect, cbMemOp) != 0)
1030 Bs3TestFailedF("Expected uMemOp %.*Rhxs, got %.*Rhxs", cbMemOp, &MemOpExpect, cbMemOp, puMemOpAlias);
1031
1032 return cErrors;
1033}
1034
1035
1036/**
1037 * Test type #1 worker.
1038 */
1039static uint8_t bs3CpuInstrX_WorkerTestType1(uint8_t bMode, BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests, unsigned cTests,
1040 PCBS3CPUINSTR4_CONFIG_T paConfigs, unsigned cConfigs)
1041{
1042 BS3REGCTX Ctx;
1043 BS3TRAPFRAME TrapFrame;
1044 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode);
1045 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0;
1046 uint8_t BS3_FAR *pbBuf = g_pbBuf;
1047 uint32_t cbBuf = g_cbBuf;
1048 PBS3EXTCTX pExtCtxOut;
1049 PBS3EXTCTX pExtCtx = bs3CpuInstrXAllocExtCtxs(&pExtCtxOut);
1050 if (pExtCtx)
1051 { /* likely */ }
1052 else
1053 return 0;
1054 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT)
1055 { /* likely */ }
1056 else
1057 {
1058 Bs3TestPrintf("Skipped due to ancient FPU state format\n");
1059 return 0;
1060 }
1061
1062 /* Ensure the structures are allocated before we sample the stack pointer. */
1063 Bs3MemSet(&Ctx, 0, sizeof(Ctx));
1064 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
1065
1066 /*
1067 * Create test context.
1068 */
1069 pbBuf = bs3CpuInstrXBufSetup(pbBuf, &cbBuf, bMode);
1070 Bs3RegCtxSaveForMode(&Ctx, bMode, 1024);
1071 bs3CpuInstr4SetupSseAndAvx(&Ctx, pExtCtx);
1072
1073 /*
1074 * Run the tests in all rings since alignment issues may behave
1075 * differently in ring-3 compared to ring-0.
1076 */
1077 for (;;)
1078 {
1079 unsigned fPf = 0;
1080 do
1081 {
1082 unsigned iCfg;
1083 for (iCfg = 0; iCfg < cConfigs; iCfg++)
1084 {
1085 unsigned iTest;
1086 BS3CPUINSTRX_CONFIG_SAVED_T SavedCfg;
1087 if (!bs3CpuInstr4ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg], bMode))
1088 continue; /* unsupported config */
1089
1090 /*
1091 * Iterate the tests.
1092 */
1093 for (iTest = 0; iTest < cTests; iTest++)
1094 {
1095 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = &paTests[iTest];
1096 unsigned const cValues = pTest->cValues;
1097 bool const fSseInstr = pTest->enmType >= T_SSE && pTest->enmType < T_AVX_128;
1098 bool const fAvxInstr = pTest->enmType >= T_AVX_128;
1099 uint8_t const cbOperand = pTest->enmType < T_128BITS ? 64/8
1100 : pTest->enmType < T_256BITS ? 128/8 : 256/8;
1101 uint8_t const cbMemOp = bs3CpuInstrXMemOpSize(cbOperand, pTest->enmRm);
1102 uint8_t const cbAlign = cbMemOp;
1103 uint8_t BS3_FAR *puMemOp = bs3CpuInstrXBufForOperand(pbBuf, cbBuf, cbMemOp, cbAlign, &paConfigs[iCfg], fPf);
1104 uint8_t *puMemOpAlias = &g_pbBufAlias[(uintptr_t)puMemOp - (uintptr_t)pbBuf];
1105 uint8_t bXcptExpect = !g_afTypeSupports[pTest->enmType] ? X86_XCPT_UD
1106 : fSseInstr ? paConfigs[iCfg].bXcptSse
1107 : BS3_MODE_IS_RM_OR_V86(bMode) ? X86_XCPT_UD : paConfigs[iCfg].bXcptAvx;
1108 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10;
1109 unsigned cRecompRuns = 0;
1110 unsigned const cMaxRecompRuns = g_cBs3ThresholdNativeRecompiler + cValues;
1111 unsigned iVal;
1112
1113 /* If testing unaligned memory accesses (or #PF), skip register-only tests. This
1114 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */
1115 if ( (pTest->enmRm == RM_REG || pTest->enmRm == RM_MEM8)
1116 && (!paConfigs[iCfg].fAligned || paConfigs[iCfg].fAlignCheck || fPf))
1117 continue;
1118
1119 /* #AC is only raised in ring-3. */
1120 if (bXcptExpect == X86_XCPT_AC)
1121 {
1122 if (bRing != 3)
1123 bXcptExpect = X86_XCPT_DB;
1124 else if (fAvxInstr)
1125 bXcptExpect = pTest->bAvxMisalignXcpt; /* they generally don't raise #AC */
1126 }
1127
1128 if (fPf && bXcptExpect == X86_XCPT_DB)
1129 bXcptExpect = X86_XCPT_PF;
1130
1131 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, pTest->pfnWorker);
1132
1133 /*
1134 * Iterate the test values and do the actual testing.
1135 */
1136 while (cRecompRuns < cMaxRecompRuns)
1137 {
1138 for (iVal = 0; iVal < cValues; iVal++, idTestStep++, cRecompRuns++)
1139 {
1140 uint16_t cErrors;
1141 BS3CPUINSTR4_TEST1_CTX_T TestCtx;
1142 if (BS3_SKIPIT(bRing, iCfg, iTest, iVal, 0))
1143 continue;
1144
1145 /*
1146 * Setup the test instruction context and pass it to the worker.
1147 * A few of these can be figured out by the worker but initializing
1148 * it outside the inner most loop is more optimal.
1149 */
1150 TestCtx.pConfig = &paConfigs[iCfg];
1151 TestCtx.pTest = pTest;
1152 TestCtx.iVal = iVal;
1153 TestCtx.pszMode = pszMode;
1154 TestCtx.pTrapFrame = &TrapFrame;
1155 TestCtx.pCtx = &Ctx;
1156 TestCtx.pExtCtx = pExtCtx;
1157 TestCtx.pExtCtxOut = pExtCtxOut;
1158 TestCtx.puMemOp = (uint8_t *)puMemOp;
1159 TestCtx.puMemOpAlias = puMemOpAlias;
1160 TestCtx.cbMemOp = cbMemOp;
1161 TestCtx.cbOperand = cbOperand;
1162 TestCtx.bXcptExpect = bXcptExpect;
1163 TestCtx.fSseInstr = fSseInstr;
1164 TestCtx.fAvxInstr = fAvxInstr;
1165 TestCtx.idTestStep = idTestStep;
1166 cErrors = bs3CpuInstr4_WorkerTestType1_Inner(bMode, &TestCtx, &SavedCfg);
1167 if (cErrors != Bs3TestSubErrorCount())
1168 {
1169 if (paConfigs[iCfg].fAligned)
1170 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s)",
1171 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1172 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect));
1173 else
1174 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, puMemOp=%p, EFLAGS=%#RX32, CR0=%#RX32)",
1175 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1176 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), puMemOp,
1177 TrapFrame.Ctx.rflags.u32, TrapFrame.Ctx.cr0);
1178 Bs3TestPrintf("\n");
1179 }
1180 }
1181 }
1182 }
1183 bs3CpuInstrXConfigRestore(&SavedCfg, &Ctx, pExtCtx);
1184 }
1185 } while (fPf++ == 0 && BS3_MODE_IS_PAGED(bMode));
1186
1187 /*
1188 * Next ring.
1189 */
1190 bRing++;
1191 if (bRing > 3 || bMode == BS3_MODE_RM)
1192 break;
1193 Bs3RegCtxConvertToRingX(&Ctx, bRing);
1194 }
1195
1196 /*
1197 * Cleanup.
1198 */
1199 bs3CpuInstrXBufCleanup(pbBuf, cbBuf, bMode);
1200 bs3CpuInstrXFreeExtCtxs(pExtCtx, pExtCtxOut);
1201 return 0;
1202}
1203
1204
1205/*
1206 * [V]ADDPD.
1207 */
1208BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addpd(uint8_t bMode)
1209{
1210 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
1211 {
1212 /* 0*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1213 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1214 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1215 /*mask */ X86_MXCSR_XCPT_MASK,
1216 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1217 /*flags */ 0 },
1218 /* 1*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1219 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1220 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1221 /*mask */ ~X86_MXCSR_XCPT_MASK,
1222 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1223 /*flags */ 0 },
1224 /* 2*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1225 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1226 { /* => */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1227 /*mask */ ~X86_MXCSR_IM,
1228 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1229 /*flags */ X86_MXCSR_IE },
1230 /* 3*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1231 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1232 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_QNAN(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1233 /*mask */ X86_MXCSR_IM,
1234 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1235 /*flags */ X86_MXCSR_IE },
1236 /* 4*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1237 { /*src1 */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1238 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1239 /*mask */ ~X86_MXCSR_XCPT_MASK,
1240 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1241 /*flags */ 0 },
1242 /* 5*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_VAL_1(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1243 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_VAL_2(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1244 { /* => */ { BS3_FP64_INF(0), BS3_FP64_NORMAL_VAL_1(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1245 /*mask */ ~X86_MXCSR_OE,
1246 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1247 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE },
1248 /* 6*/{ { /*src2 */ { BS3_FP64_VAL(0, 0, 0x409)/*1024*/, BS3_FP64_VAL(0, 0xb800000000000, 0x404)/*55*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1249 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x408)/* 512*/, BS3_FP64_VAL(0, 0xc000000000000, 0x401)/* 7*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1250 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x409)/*1536*/, BS3_FP64_VAL(0, 0xf000000000000, 0x404)/*62*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1251 /*mask */ X86_MXCSR_XCPT_MASK,
1252 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1253 /*flags */ 0 },
1254 /* 7*/{ { /*src2 */ { BS3_FP64_VAL(0, 0x26580b4800000, 0x41d)/* 1234567890*/, BS3_FP64_VAL(0, 0xd6f3458800000, 0x41c)/*987654321*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1255 { /*src1 */ { BS3_FP64_VAL(1, 0x26580b4800000, 0x41d)/*-1234567890*/, BS3_FP64_VAL(1, 0x9000000000000, 0x405)/* -100*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1256 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6f3426800000, 0x41c)/*987654221*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1257 /*mask */ ~X86_MXCSR_XCPT_MASK,
1258 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1259 /*flags */ 0 },
1260 };
1261
1262 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1263 {
1264 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1265 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1266 };
1267 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1268 {
1269 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1270 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1271 };
1272 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1273 {
1274 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1275 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1276 { bs3CpuInstrX_addpd_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE2, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1277 { bs3CpuInstrX_addpd_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1278 };
1279
1280 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1281 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1282 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1283 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1284}
1285
1286
1287/**
1288 * The 32-bit protected mode main function.
1289 *
1290 * The tests a driven by 32-bit test drivers, even for real-mode tests (though
1291 * we'll switch between PE32 and RM for each test step we perform). Given that
1292 * we test SSE and AVX here, we don't need to worry about 286 or 8086.
1293 *
1294 * Some extra steps needs to be taken to properly handle extended state in LM64
1295 * (Bs3ExtCtxRestoreEx & Bs3ExtCtxSaveEx) and when testing real mode
1296 * (Bs3RegCtxSaveForMode & Bs3TrapSetJmpAndRestoreWithExtCtxAndRm).
1297 */
1298BS3_DECL(void) Main_pe32()
1299{
1300 static const BS3TESTMODEBYONEENTRY g_aTests[] =
1301 {
1302#if 1 /*ndef DEBUG_bird*/
1303# define ALL_TESTS
1304#endif
1305#if defined(ALL_TESTS)
1306 { "[v]addpd", bs3CpuInstrX_v_addpd, 0 },
1307#endif
1308 };
1309 Bs3TestInit("bs3-cpu-instr-4");
1310
1311 /*
1312 * Initialize globals.
1313 */
1314 if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
1315 {
1316 uint32_t fEbx, fEcx, fEdx;
1317 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1318 g_afTypeSupports[T_MMX] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_MMX);
1319 g_afTypeSupports[T_MMX_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1320 g_afTypeSupports[T_MMX_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1321 g_afTypeSupports[T_MMX_SSSE3] = RT_BOOL(fEdx & X86_CPUID_FEATURE_ECX_SSSE3);
1322 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1323 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1324 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3);
1325 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3);
1326 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1);
1327 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2);
1328 g_afTypeSupports[T_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL);
1329 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1330 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1331 g_afTypeSupports[T_AVX_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL)
1332 && RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1333
1334 if (ASMCpuId_EAX(0) >= 7)
1335 {
1336 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &fEbx, NULL, NULL);
1337 g_afTypeSupports[T_AVX2_128] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1338 g_afTypeSupports[T_AVX2_256] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1339 g_afTypeSupports[T_SHA] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_SHA);
1340 }
1341
1342 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
1343 {
1344 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1345 g_afTypeSupports[T_AXMMX] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
1346 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A);
1347 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);
1348 }
1349 g_afTypeSupports[T_AXMMX_OR_SSE] = g_afTypeSupports[T_AXMMX] || g_afTypeSupports[T_SSE];
1350
1351 /*
1352 * Figure out FPU save/restore method and support for DAZ bit.
1353 */
1354 {
1355 /** @todo Add bs3kit API to just get the ext ctx method without needing to
1356 * alloc/free a context. Replicating the logic in the bs3kit here, though
1357 * doable, runs a risk of not updating this when the other logic is
1358 * changed. */
1359 uint64_t fFlags;
1360 uint16_t const cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
1361 PBS3EXTCTX pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cbExtCtx);
1362 if (pExtCtx)
1363 {
1364 Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
1365 g_enmExtCtxMethod = pExtCtx->enmMethod;
1366 if ( ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_XSAVE
1367 && (pExtCtx->Ctx.x.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1368 || ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_FXSAVE)
1369 && (pExtCtx->Ctx.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1370 g_fMxCsrDazSupported = true;
1371 }
1372 else
1373 Bs3TestFailedF("Failed to allocate %u bytes for extended CPU context (tiled addressable)\n", cbExtCtx);
1374 }
1375
1376 /*
1377 * Allocate a buffer for testing.
1378 */
1379 g_cbBuf = X86_PAGE_SIZE * 4;
1380 g_pbBuf = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_REAL, g_cbBuf);
1381 if (g_pbBuf)
1382 {
1383 g_pbBufAliasAlloc = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_TILED, g_cbBuf);
1384 if (g_pbBufAliasAlloc)
1385 {
1386 /*
1387 * Do the tests.
1388 */
1389 Bs3TestDoModesByOne_pe32(g_aTests, RT_ELEMENTS(g_aTests), BS3TESTMODEBYONEENTRY_F_REAL_MODE_READY);
1390#ifdef BS3_SKIPIT_DO_SKIP
1391 bs3CpuInstrX_ShowTallies();
1392#endif
1393 }
1394 else
1395 Bs3TestFailed("Failed to allocate 16K alias buffer (tiled addressable)");
1396 }
1397 else
1398 Bs3TestFailed("Failed to allocate 16K buffer (real mode addressable)");
1399 }
1400
1401 Bs3TestTerm();
1402}
1403
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette