VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-4.c32@ 104724

Last change on this file since 104724 was 104724, checked in by vboxsync, 9 months ago

ValidationKit/bootsectors: bugref:10658 SIMD FP testcase: Preparation for using the same test worker and inner worker code for other floating-point type variants.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 59.4 KB
Line 
1/* $Id: bs3-cpu-instr-4.c32 104724 2024-05-20 08:06:05Z vboxsync $ */
2/** @file
3 * BS3Kit - bs3-cpu-instr-4 - SSE, AVX FPU instructions, C code template.
4 */
5
6/*
7 * Copyright (C) 2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <bs3kit.h>
42#include "bs3-cpu-instr-4-asm-auto.h"
43
44#include <iprt/asm.h>
45#include <iprt/asm-amd64-x86.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** Converts an execution mode (BS3_MODE_XXX) into an index into an array
52 * initialized by BS3CPUINSTR4_TEST1_MODES_INIT etc. */
53#define BS3CPUINSTR4_TEST_MODES_INDEX(a_bMode) (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2)
54
55/** Maximum length for the names of all SIMD FP exception flags combined. */
56#define BS3_FP_XCPT_NAMES_MAXLEN sizeof(" IE DE ZE OE UE PE ")
57
58
59/*********************************************************************************************************************************
60* Structures and Typedefs *
61*********************************************************************************************************************************/
62/** Instruction set type and operand width. */
63typedef enum BS3CPUINSTRX_INSTRTYPE_T
64{
65 T_INVALID,
66 T_MMX,
67 T_MMX_SSE, /**< MMX instruction, but require the SSE CPUID to work. */
68 T_MMX_SSE2, /**< MMX instruction, but require the SSE2 CPUID to work. */
69 T_MMX_SSSE3, /**< MMX instruction, but require the SSSE3 CPUID to work. */
70 T_AXMMX,
71 T_AXMMX_OR_SSE,
72 T_SSE,
73 T_128BITS = T_SSE,
74 T_SSE2,
75 T_SSE3,
76 T_SSSE3,
77 T_SSE4_1,
78 T_SSE4_2,
79 T_SSE4A,
80 T_PCLMUL,
81 T_SHA,
82 T_AVX_128,
83 T_AVX2_128,
84 T_AVX_PCLMUL,
85 T_AVX_256,
86 T_256BITS = T_AVX_256,
87 T_AVX2_256,
88 T_MAX
89} BS3CPUINSTRX_INSTRTYPE_T;
90
91/** Memory or register rm variant. */
92enum {
93 RM_REG = 0,
94 RM_MEM,
95 RM_MEM8, /**< Memory operand is 8 bits. Hack for movss and similar. */
96 RM_MEM16, /**< Memory operand is 16 bits. Hack for movss and similar. */
97 RM_MEM32, /**< Memory operand is 32 bits. Hack for movss and similar. */
98 RM_MEM64 /**< Memory operand is 64 bits. Hack for movss and similar. */
99};
100
101/**
102 * Execution environment configuration.
103 */
104typedef struct BS3CPUINSTR4_CONFIG_T
105{
106 uint16_t fCr0Mp : 1;
107 uint16_t fCr0Em : 1;
108 uint16_t fCr0Ts : 1;
109 uint16_t fCr4OsFxSR : 1;
110 uint16_t fCr4OsXSave : 1;
111 uint16_t fCr4OsXmmExcpt : 1;
112 uint16_t fXcr0Sse : 1;
113 uint16_t fXcr0Avx : 1;
114 uint16_t fAligned : 1; /**< Aligned mem operands. If 0, they will be misaligned and tests w/o mem operands skipped. */
115 uint16_t fAlignCheck : 1;
116 uint16_t fMxCsrMM : 1; /**< AMD only */
117 uint8_t bXcptSse;
118 uint8_t bXcptAvx;
119} BS3CPUINSTR4_CONFIG_T;
120/** Pointer to an execution environment configuration. */
121typedef BS3CPUINSTR4_CONFIG_T const BS3_FAR *PCBS3CPUINSTR4_CONFIG_T;
122
123/** State saved by bs3CpuInstr4ConfigReconfigure. */
124typedef struct BS3CPUINSTRX_CONFIG_SAVED_T
125{
126 uint32_t uCr0;
127 uint32_t uCr4;
128 uint32_t uEfl;
129 uint16_t uFcw;
130 uint16_t uFsw;
131 uint32_t uMxCsr;
132} BS3CPUINSTRX_CONFIG_SAVED_T;
133typedef BS3CPUINSTRX_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTRX_CONFIG_SAVED_T;
134typedef BS3CPUINSTRX_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTRX_CONFIG_SAVED_T;
135
136/**
137 * YMM packed double-precision floating-point register.
138 * @todo move to x86.h?
139 */
140typedef union X86YMMFLOATPDREG
141{
142 /** Packed double-precision floating-point view. */
143 RTFLOAT64U ar64[4];
144 /** 256-bit integer view. */
145 RTUINT256U ymm;
146} X86YMMFLOATPDREG;
147# ifndef VBOX_FOR_DTRACE_LIB
148AssertCompileSize(X86YMMFLOATPDREG, 32);
149# endif
150/** Pointer to a YMM packed floating-point register. */
151typedef X86YMMFLOATPDREG BS3_FAR *PX86YMMFLOATPDREG;
152/** Pointer to a const YMM packed floating-point register. */
153typedef X86YMMFLOATPDREG const BS3_FAR *PCX86YMMFLOATPDREG;
154
155/**
156 * YMM packed single-precision floating-point register.
157 * @todo move to x86.h?
158 */
159typedef union X86YMMFLOATPSREG
160{
161 /** Packed single-precision floating-point view. */
162 RTFLOAT32U ar32[8];
163 /** 256-bit integer view. */
164 RTUINT256U ymm;
165} X86YMMFLOATPSREG;
166# ifndef VBOX_FOR_DTRACE_LIB
167AssertCompileSize(X86YMMFLOATPSREG, 32);
168# endif
169/** Pointer to a YMM packed single-precision floating-point register. */
170typedef X86YMMFLOATPSREG BS3_FAR *PX86YMMFLOATPSREG;
171/** Pointer to a const YMM single-precision packed floating-point register. */
172typedef X86YMMFLOATPSREG const BS3_FAR *PCX86YMMFLOATPSREG;
173
174/**
175 * YMM scalar quadruple-precision floating-point register.
176 * @todo move to x86.h?
177 */
178typedef union X86YMMFLOATSQREG
179{
180 /** Scalar quadruple-precision floating point view. */
181 RTFLOAT128U ar128[2];
182 /** 256-bit integer view. */
183 RTUINT256U ymm;
184} X86YMMFLOATSQREG;
185# ifndef VBOX_FOR_DTRACE_LIB
186AssertCompileSize(X86YMMFLOATSQREG, 32);
187# endif
188/** Pointer to a YMM scalar quadruple-precision floating-point register. */
189typedef X86YMMFLOATSQREG *PX86YMMFLOATSQREG;
190/** Pointer to a const YMM scalar quadruple-precision floating-point register. */
191typedef X86YMMFLOATSQREG const *PCX86YMMFLOATSQREG;
192
193
194/*********************************************************************************************************************************
195* Global Variables *
196*********************************************************************************************************************************/
197static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false, false };
198static bool g_fAmdMisalignedSse = false;
199static uint8_t g_enmExtCtxMethod = BS3EXTCTXMETHOD_INVALID;
200static bool g_fMxCsrDazSupported = false;
201
202/** Zero value (indexed by fSign). */
203RTFLOAT32U const g_ar32Zero[] = { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) };
204RTFLOAT64U const g_ar64Zero[] = { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) };
205
206/** One value (indexed by fSign). */
207RTFLOAT32U const g_ar32One[] = { RTFLOAT32U_INIT_C(0, 0, RTFLOAT32U_EXP_BIAS),
208 RTFLOAT32U_INIT_C(1, 0, RTFLOAT32U_EXP_BIAS) };
209RTFLOAT64U const g_ar64One[] = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_BIAS),
210 RTFLOAT64U_INIT_C(1, 0, RTFLOAT64U_EXP_BIAS) };
211
212/** Infinity (indexed by fSign). */
213RTFLOAT32U const g_ar32Infinity[] = { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) };
214RTFLOAT64U const g_ar64Infinity[] = { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) };
215
216/** Default QNaNs (indexed by fSign). */
217RTFLOAT32U const g_ar32QNaN[] = { RTFLOAT32U_INIT_QNAN(0), RTFLOAT32U_INIT_QNAN(1) };
218RTFLOAT64U const g_ar64QNaN[] = { RTFLOAT64U_INIT_QNAN(0), RTFLOAT64U_INIT_QNAN(1) };
219
220/** Size of g_pbBuf - at least three pages. */
221static uint32_t g_cbBuf;
222/** Buffer of g_cbBuf size. */
223static uint8_t BS3_FAR *g_pbBuf;
224/** RW alias for the buffer memory at g_pbBuf. Set up by bs3CpuInstrXBufSetup. */
225static uint8_t BS3_FAR *g_pbBufAlias;
226/** RW alias for the memory at g_pbBuf. */
227static uint8_t BS3_FAR *g_pbBufAliasAlloc;
228
229/** Exception type \#1 test configurations, 16 & 32 bytes strictly aligned. */
230static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig1[] =
231{
232/*
233 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
234 * +AVX +AVX +AMD/SSE +AMD/SSE
235 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
236 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
237 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
238 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
239 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
240 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
241 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
242 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
243 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
244 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
245 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
246 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
247 /* Memory misalignment and alignment checks: */
248 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_GP }, /* #10 */
249 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_GP }, /* #11 */
250 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
251 /* AMD only: */
252 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
253 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
254};
255
256
257/**
258 * Returns the name of an X86 exception given the vector.
259 *
260 * @returns Name of the exception.
261 * @param uVector The exception vector.
262 */
263static const char BS3_FAR *bs3CpuInstr4XcptName(uint8_t uVector)
264{
265 switch (uVector)
266 {
267 case X86_XCPT_DE: return "#DE";
268 case X86_XCPT_DB: return "#DB";
269 case X86_XCPT_NMI: return "#NMI";
270 case X86_XCPT_BP: return "#BP";
271 case X86_XCPT_OF: return "#OF";
272 case X86_XCPT_BR: return "#BR";
273 case X86_XCPT_UD: return "#UD";
274 case X86_XCPT_NM: return "#NM";
275 case X86_XCPT_DF: return "#DF";
276 case X86_XCPT_CO_SEG_OVERRUN: return "#CO_SEG_OVERRUN";
277 case X86_XCPT_TS: return "#TS";
278 case X86_XCPT_NP: return "#NP";
279 case X86_XCPT_SS: return "#SS";
280 case X86_XCPT_GP: return "#GP";
281 case X86_XCPT_PF: return "#PF";
282 case X86_XCPT_MF: return "#MF";
283 case X86_XCPT_AC: return "#AC";
284 case X86_XCPT_MC: return "#MC";
285 case X86_XCPT_XF: return "#XF";
286 case X86_XCPT_VE: return "#VE";
287 case X86_XCPT_CP: return "#CP";
288 case X86_XCPT_VC: return "#VC";
289 case X86_XCPT_SX: return "#SX";
290 }
291 return "UNKNOWN";
292}
293
294
295/**
296 * Gets the names of floating-point exception flags that are set for a given MXCSR.
297 *
298 * @returns Names of floating-point exception flags that are set.
299 * @param pszBuf Where to store the floating-point exception flags.
300 * @param cchBuf The size of the buffer.
301 * @param fMxCsr The MXCSR value.
302 */
303static size_t bs3CpuInstr4GetXcptFlags(char BS3_FAR *pszBuf, size_t cchBuf, uint32_t fMxCsr)
304{
305 if (!(fMxCsr & X86_MXCSR_XCPT_FLAGS))
306 return Bs3StrPrintf(pszBuf, cchBuf, " None");
307 return Bs3StrPrintf(pszBuf, cchBuf, "%s%s%s%s%s%s", fMxCsr & X86_MXCSR_IE ? " IE" : "", fMxCsr & X86_MXCSR_DE ? " DE" : "",
308 fMxCsr & X86_MXCSR_ZE ? " ZE" : "", fMxCsr & X86_MXCSR_OE ? " OE" : "",
309 fMxCsr & X86_MXCSR_UE ? " UE" : "", fMxCsr & X86_MXCSR_PE ? " PE" : "");
310}
311
312
313/**
314 * Reconfigures the execution environment according to @a pConfig.
315 *
316 * Call bs3CpuInstrXConfigRestore to undo the changes.
317 *
318 * @returns true on success, false if the configuration cannot be applied. In
319 * the latter case, no context changes are made.
320 * @param pSavedCfg Where to save state we modify.
321 * @param pCtx The register context to modify.
322 * @param pExtCtx The extended register context to modify.
323 * @param pConfig The configuration to apply.
324 * @param bMode The target mode.
325 */
326static bool bs3CpuInstr4ConfigReconfigure(PBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx,
327 PCBS3CPUINSTR4_CONFIG_T pConfig, uint8_t bMode)
328{
329 /*
330 * Save context bits we may change here
331 */
332 pSavedCfg->uCr0 = pCtx->cr0.u32;
333 pSavedCfg->uCr4 = pCtx->cr4.u32;
334 pSavedCfg->uEfl = pCtx->rflags.u32;
335 pSavedCfg->uFcw = Bs3ExtCtxGetFcw(pExtCtx);
336 pSavedCfg->uFsw = Bs3ExtCtxGetFsw(pExtCtx);
337 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx);
338
339 /*
340 * Can we make these changes?
341 */
342 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse)
343 return false;
344
345 /*
346 * Modify the test context.
347 */
348 if (pConfig->fCr0Mp)
349 pCtx->cr0.u32 |= X86_CR0_MP;
350 else
351 pCtx->cr0.u32 &= ~X86_CR0_MP;
352 if (pConfig->fCr0Em)
353 pCtx->cr0.u32 |= X86_CR0_EM;
354 else
355 pCtx->cr0.u32 &= ~X86_CR0_EM;
356 if (pConfig->fCr0Ts)
357 pCtx->cr0.u32 |= X86_CR0_TS;
358 else
359 pCtx->cr0.u32 &= ~X86_CR0_TS;
360
361 if (pConfig->fCr4OsFxSR)
362 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
363 else
364 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
365
366 if (pConfig->fCr4OsXmmExcpt && g_afTypeSupports[T_SSE])
367 pCtx->cr4.u32 |= X86_CR4_OSXMMEEXCPT;
368 else
369 pCtx->cr4.u32 &= ~X86_CR4_OSXMMEEXCPT;
370
371 if (pConfig->fCr4OsFxSR)
372 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
373 else
374 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
375
376 if (pConfig->fCr4OsXSave)
377 pCtx->cr4.u32 |= X86_CR4_OSXSAVE;
378 else
379 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE;
380
381 if (pConfig->fXcr0Sse)
382 pExtCtx->fXcr0Saved |= XSAVE_C_SSE;
383 else
384 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE;
385 if (pConfig->fXcr0Avx && g_afTypeSupports[T_AVX_256])
386 pExtCtx->fXcr0Saved |= XSAVE_C_YMM;
387 else
388 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM;
389
390 if (pConfig->fAlignCheck)
391 {
392 pCtx->rflags.u32 |= X86_EFL_AC;
393 pCtx->cr0.u32 |= X86_CR0_AM;
394 }
395 else
396 {
397 pCtx->rflags.u32 &= ~X86_EFL_AC;
398 pCtx->cr0.u32 &= ~X86_CR0_AM;
399 }
400
401 /** @todo Can we remove this? x87 FPU and SIMD are independent. */
402 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw & ~(X86_FSW_ES | X86_FSW_B));
403
404 if (pConfig->fMxCsrMM)
405 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM);
406 else
407 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM);
408 return true;
409}
410
411
412/**
413 * Undoes changes made by bs3CpuInstr4ConfigReconfigure.
414 */
415static void bs3CpuInstrXConfigRestore(PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx)
416{
417 pCtx->cr0.u32 = pSavedCfg->uCr0;
418 pCtx->cr4.u32 = pSavedCfg->uCr4;
419 pCtx->rflags.u32 = pSavedCfg->uEfl;
420 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal;
421 Bs3ExtCtxSetFcw(pExtCtx, pSavedCfg->uFcw);
422 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw);
423 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr);
424}
425
426
427/**
428 * Allocates three extended CPU contexts and initializes the first one
429 * with random data.
430 * @returns First extended context, initialized with randomish data. NULL on
431 * failure (complained).
432 * @param ppExtCtx2 Where to return the 2nd context.
433 */
434static PBS3EXTCTX bs3CpuInstrXAllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2)
435{
436 /* Allocate extended context structures. */
437 uint64_t fFlags;
438 uint16_t cb = Bs3ExtCtxGetSize(&fFlags);
439 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 3);
440 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb);
441 if (pExtCtx1)
442 {
443 Bs3ExtCtxInit(pExtCtx1, cb, fFlags);
444 /** @todo populate with semi-random stuff. */
445
446 Bs3ExtCtxInit(pExtCtx2, cb, fFlags);
447 *ppExtCtx2 = pExtCtx2;
448 return pExtCtx1;
449 }
450 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2);
451 *ppExtCtx2 = NULL;
452 return NULL;
453}
454
455
456/**
457 * Frees the extended CPU contexts allocated by bs3CpuInstrXAllocExtCtxs.
458 *
459 * @param pExtCtx1 The first extended context.
460 * @param pExtCtx2 The second extended context.
461 */
462static void bs3CpuInstrXFreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2)
463{
464 RT_NOREF_PV(pExtCtx2);
465 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2);
466}
467
468
469/**
470 * Sets up SSE and AVX bits relevant for FPU instructions.
471 */
472static void bs3CpuInstr4SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx)
473{
474 /* CR0: */
475 uint32_t cr0 = Bs3RegGetCr0();
476 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
477 cr0 |= X86_CR0_NE;
478 Bs3RegSetCr0(cr0);
479
480 /* If real mode context, the cr0 value will differ from the current one (we're in PE32 mode). */
481 pCtx->cr0.u32 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
482 pCtx->cr0.u32 |= X86_CR0_NE;
483
484 /* CR4: */
485 BS3_ASSERT( pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
486 || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE);
487 {
488 uint32_t cr4 = Bs3RegGetCr4();
489 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
490 {
491 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
492 Bs3RegSetCr4(cr4);
493 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal);
494 }
495 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
496 {
497 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
498 Bs3RegSetCr4(cr4);
499 }
500 pCtx->cr4.u32 = cr4;
501 }
502}
503
504
505/**
506 * Configures the buffer with electric fences in paged modes.
507 *
508 * @returns Adjusted buffer pointer.
509 * @param pbBuf The buffer pointer.
510 * @param pcbBuf Pointer to the buffer size (input & output).
511 * @param bMode The testing target mode.
512 */
513DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufSetup(uint8_t BS3_FAR *pbBuf, uint32_t *pcbBuf, uint8_t bMode)
514{
515 if (BS3_MODE_IS_PAGED(bMode))
516 {
517 int rc;
518 uint32_t cbBuf = *pcbBuf;
519 Bs3PagingProtectPtr(&pbBuf[0], X86_PAGE_SIZE, 0, X86_PTE_P);
520 Bs3PagingProtectPtr(&pbBuf[cbBuf - X86_PAGE_SIZE], X86_PAGE_SIZE, 0, X86_PTE_P);
521 pbBuf += X86_PAGE_SIZE;
522 cbBuf -= X86_PAGE_SIZE * 2;
523 *pcbBuf = cbBuf;
524
525 g_pbBufAlias = g_pbBufAliasAlloc;
526 rc = Bs3PagingAlias((uintptr_t)g_pbBufAlias, (uintptr_t)pbBuf, cbBuf + X86_PAGE_SIZE, /* must include the tail guard pg */
527 X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW);
528 if (RT_FAILURE(rc))
529 Bs3TestFailedF("Bs3PagingAlias failed on %p/%p LB %#x: %d", g_pbBufAlias, pbBuf, cbBuf, rc);
530 }
531 else
532 g_pbBufAlias = pbBuf;
533 return pbBuf;
534}
535
536
537/**
538 * Undoes what bs3CpuInstrXBufSetup did.
539 *
540 * @param pbBuf The buffer pointer.
541 * @param cbBuf The buffer size.
542 * @param bMode The testing target mode.
543 */
544DECLINLINE(void) bs3CpuInstrXBufCleanup(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t bMode)
545{
546 if (BS3_MODE_IS_PAGED(bMode))
547 {
548 Bs3PagingProtectPtr(&pbBuf[-X86_PAGE_SIZE], X86_PAGE_SIZE, X86_PTE_P, 0);
549 Bs3PagingProtectPtr(&pbBuf[cbBuf], X86_PAGE_SIZE, X86_PTE_P, 0);
550 }
551}
552
553
554/**
555 * Gets a buffer of a @a cbMemOp sized operand according to the given
556 * configuration and alignment restrictions.
557 *
558 * @returns Pointer to the buffer.
559 * @param pbBuf The buffer pointer.
560 * @param cbBuf The buffer size.
561 * @param cbMemOp The operand size.
562 * @param cbAlign The operand alignment restriction.
563 * @param pConfig The configuration.
564 * @param fPageFault The \#PF test setting.
565 */
566DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufForOperand(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t cbMemOp, uint8_t cbAlign,
567 PCBS3CPUINSTR4_CONFIG_T pConfig, unsigned fPageFault)
568{
569 /* All allocations are at the tail end of the buffer, so that we've got a
570 guard page following the operand. When asked to consistenly trigger
571 a #PF, we slide the buffer into that guard page. */
572 if (fPageFault)
573 cbBuf += X86_PAGE_SIZE;
574
575 if (pConfig->fAligned)
576 {
577 if (!pConfig->fAlignCheck)
578 return &pbBuf[cbBuf - cbMemOp];
579 return &pbBuf[cbBuf - cbMemOp - cbAlign];
580 }
581 return &pbBuf[cbBuf - cbMemOp - 1];
582}
583
584
585/**
586 * Determins the size of memory operands.
587 */
588DECLINLINE(uint8_t) bs3CpuInstrXMemOpSize(uint8_t cbOperand, uint8_t enmRm)
589{
590 if (enmRm <= RM_MEM)
591 return cbOperand;
592 if (enmRm == RM_MEM8)
593 return sizeof(uint8_t);
594 if (enmRm == RM_MEM16)
595 return sizeof(uint16_t);
596 if (enmRm == RM_MEM32)
597 return sizeof(uint32_t);
598 if (enmRm == RM_MEM64)
599 return sizeof(uint64_t);
600 BS3_ASSERT(0);
601 return cbOperand;
602}
603
604
605/*
606 * Code to make testing the tests faster. `bs3CpuInstrX_SkipIt()' randomly
607 * skips a large fraction of the micro-tests. It is sufficiently random
608 * that over a large number of runs, all micro-tests will be hit.
609 *
610 * This improves the runtime of the worst case (`#define ALL_TESTS' on a
611 * debug build, run with '--execute-all-in-iem') from ~9000 to ~800 seconds
612 * (on an Intel Core i7-10700, fwiw).
613 *
614 * To activate this 'developer's speed-testing mode', turn on
615 * `#define BS3_SKIPIT_DO_SKIP' here.
616 *
617 * BS3_SKIPIT_AVG_SKIP governs approximately how many micro-tests are
618 * skipped in a row; e.g. the default of 26 means about every 27th
619 * micro-test is run during a particular test run. (This is not 27x
620 * faster due to other activities which are not skipped!) Note this is
621 * only an average; the actual skips are random.
622 *
623 * You can also modify bs3CpuInstrX_SkipIt() to focus on specific sub-tests,
624 * using its (currently ignored) `bRing, iCfg, iTest, iVal, iVariant' args
625 * (to enable this: turn on `#define BS3_SKIPIT_DO_ARGS': which costs about
626 * 3% performance).
627 *
628 * Note! The skipping is not compatible with testing the native recompiler as
629 * it requires the test code to be run a number of times before it kicks
630 * in and does the native recompilation (currently around 16 times).
631 */
632#define BS3_SKIPIT_AVG_SKIP 26
633#define BS3_SKIPIT_REPORT_COUNT 150000
634#undef BS3_SKIPIT_DO_SKIP
635#undef BS3_SKIPIT_DO_ARGS
636
637#ifndef BS3_SKIPIT_DO_SKIP
638# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) (false)
639#else
640# include <iprt/asm-amd64-x86.h>
641# include <iprt/asm-math.h>
642
643DECLINLINE(uint32_t) bs3CpuInstrX_SimpleRand(void)
644{
645 /*
646 * A simple Lehmer linear congruential pseudo-random number
647 * generator using the constants suggested by Park & Miller:
648 *
649 * modulus = 2^31 - 1 (INT32_MAX)
650 * multiplier = 7^5 (16807)
651 *
652 * It produces numbers in the range [1..INT32_MAX-1] and is
653 * more chaotic in the higher bits.
654 *
655 * Note! Runtime/common/rand/randparkmiller.cpp is also use this algorithm,
656 * though the zero handling is different.
657 */
658 static uint32_t s_uSeedMemory = 0;
659 uint32_t uVal = s_uSeedMemory;
660 if (!uVal)
661 uVal = (uint32_t)ASMReadTSC();
662 uVal = ASMModU64ByU32RetU32(ASMMult2xU32RetU64(uVal, 16807), INT32_MAX);
663 s_uSeedMemory = uVal;
664 return uVal;
665}
666
667static unsigned g_cSeen, g_cSkipped;
668
669static void bs3CpuInstrX_ShowTallies(void)
670{
671 Bs3TestPrintf("Micro-tests %d: tested %d / skipped %d\n", g_cSeen, g_cSeen - g_cSkipped, g_cSkipped);
672}
673
674# ifdef BS3_SKIPIT_DO_ARGS
675# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt(bRing, iCfg, iTest, iVal, iVariant)
676static bool bs3CpuInstrX_SkipIt(uint8_t bRing, unsigned iCfg, unsigned iTest, unsigned iVal, unsigned iVariant)
677# else
678# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt()
679static bool bs3CpuInstrX_SkipIt(void)
680# endif
681{
682 static unsigned s_uTimes = 0;
683 bool fSkip;
684
685 /* Cache calls to the relatively expensive random routine */
686 if (!s_uTimes)
687 s_uTimes = bs3CpuInstrX_SimpleRand() % (BS3_SKIPIT_AVG_SKIP * 2 + 1) + 1;
688 fSkip = --s_uTimes > 0;
689 if (fSkip)
690 ++g_cSkipped;
691
692 if (++g_cSeen % BS3_SKIPIT_REPORT_COUNT == 0)
693 bs3CpuInstrX_ShowTallies();
694 return fSkip;
695}
696
697#endif /* BS3_SKIPIT_DO_SKIP */
698
699/*
700 * Test type #1.
701 * Generic YMM registers.
702 */
703typedef struct BS3CPUINSTR4_TEST1_VALUES_T
704{
705 X86YMMREG uSrc2; /**< Second source operand. */
706 X86YMMREG uSrc1; /**< uDstIn for SSE */
707 X86YMMREG uDstOut; /**< Destination output. */
708 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
709 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
710 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
711 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
712 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
713} BS3CPUINSTR4_TEST1_VALUES_T;
714
715/*
716 * Test type #1.
717 * Packed single-precision.
718 */
719typedef struct BS3CPUINSTR4_TEST1_VALUES_PS_T
720{
721 X86YMMFLOATPSREG uSrc2; /**< Second source operand. */
722 X86YMMFLOATPSREG uSrc1; /**< uDstIn for SSE */
723 X86YMMFLOATPSREG uDstOut; /**< Destination output. */
724 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
725 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
726 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
727 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
728 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
729} BS3CPUINSTR4_TEST1_VALUES_PS_T;
730AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PS_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
731AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
732AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
733AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
734AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
735AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
736AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
737AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
738AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
739
740/*
741 * Test type #1.
742 * Packed double-precision.
743 */
744typedef struct BS3CPUINSTR4_TEST1_VALUES_PD_T
745{
746 X86YMMFLOATPDREG uSrc2; /**< Second source operand. */
747 X86YMMFLOATPDREG uSrc1; /**< uDstIn for SSE */
748 X86YMMFLOATPDREG uDstOut; /**< Destination output. */
749 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
750 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
751 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
752 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
753 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
754} BS3CPUINSTR4_TEST1_VALUES_PD_T;
755AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PD_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
756AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
757AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
758AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
759AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
760AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
761AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
762AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
763AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
764
765/*
766 * Test type #1.
767 * Scalar quadruple-precision.
768 */
769typedef struct BS3CPUINSTR4_TEST1_VALUES_SQ_T
770{
771 X86YMMFLOATSQREG uSrc2; /**< Second source operand. */
772 X86YMMFLOATSQREG uSrc1; /**< uDstIn for SSE */
773 X86YMMFLOATSQREG uDstOut; /**< Destination output. */
774 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
775 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
776 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
777 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
778 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
779} BS3CPUINSTR4_TEST1_VALUES_SQ_T;
780AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_SQ_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
781AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
782AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
783AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
784AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
785AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
786AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
787AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
788AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
789
790typedef struct BS3CPUINSTR4_TEST1_T
791{
792 FPFNBS3FAR pfnWorker; /**< Test function worker. */
793 uint8_t bAvxMisalignXcpt; /**< AVX misalignment exception. */
794 uint8_t enmRm; /**< R/M type. */
795 uint8_t enmType; /**< CPU instruction type (see T_XXX). */
796 uint8_t iRegDst; /**< Index of destination register, UINT8_MAX if N/A. */
797 uint8_t iRegSrc1; /**< Index of first source register, UINT8_MAX if N/A. */
798 uint8_t iRegSrc2; /**< Index of second source register, UINT8_MAX if N/A. */
799 uint8_t cValues; /**< Number of test values in @c paValues. */
800 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *paValues; /**< Test values. */
801} BS3CPUINSTR4_TEST1_T;
802
803typedef struct BS3CPUINSTR4_TEST1_MODE_T
804{
805 BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests;
806 unsigned cTests;
807} BS3CPUINSTR4_TEST1_MODE_T;
808
809/** Initializer for a BS3CPUINSTR4_TEST1_MODE_T array (three entries). */
810#define BS3CPUINSTR4_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \
811 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } }
812
813typedef struct BS3CPUINSTR4_TEST1_CTX_T
814{
815 BS3CPUINSTR4_CONFIG_T const BS3_FAR *pConfig;
816 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest;
817 unsigned iVal;
818 const char BS3_FAR *pszMode;
819 PBS3TRAPFRAME pTrapFrame;
820 PBS3REGCTX pCtx;
821 PBS3EXTCTX pExtCtx;
822 PBS3EXTCTX pExtCtxOut;
823 uint8_t BS3_FAR *puMemOp;
824 uint8_t BS3_FAR *puMemOpAlias;
825 uint8_t cbMemOp;
826 uint8_t cbOperand;
827 uint8_t cbInstr;
828 uint8_t bXcptExpect;
829 bool fSseInstr;
830 uint16_t idTestStep;
831} BS3CPUINSTR4_TEST1_CTX_T;
832/** Pointer to a test 1 context. */
833typedef BS3CPUINSTR4_TEST1_CTX_T BS3_FAR *PBS3CPUINSTR4_TEST1_CTX_T;
834
835
836/**
837 * Worker for bs3CpuInstrX_WorkerTestType1.
838 */
839static uint16_t bs3CpuInstr4_WorkerTestType1_Inner(uint8_t bMode, PBS3CPUINSTR4_TEST1_CTX_T pTestCtx,
840 PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg)
841{
842 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = pTestCtx->pTest;
843 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *pValues = &pTestCtx->pTest->paValues[pTestCtx->iVal];
844 PBS3TRAPFRAME pTrapFrame = pTestCtx->pTrapFrame;
845 PBS3REGCTX pCtx = pTestCtx->pCtx;
846 PBS3EXTCTX pExtCtx = pTestCtx->pExtCtx;
847 PBS3EXTCTX pExtCtxOut = pTestCtx->pExtCtxOut;
848 uint8_t BS3_FAR *puMemOp = pTestCtx->puMemOp;
849 uint8_t BS3_FAR *puMemOpAlias = pTestCtx->puMemOpAlias;
850 uint8_t cbMemOp = pTestCtx->cbMemOp;
851 uint8_t const cbOperand = pTestCtx->cbOperand;
852 uint8_t const cbInstr = ((uint8_t const BS3_FAR *)(uintptr_t)pTestCtx->pTest->pfnWorker)[-1];
853 uint8_t bXcptExpect = pTestCtx->bXcptExpect;
854 uint8_t const bFpXcpt = pTestCtx->pConfig->fCr4OsXmmExcpt ? X86_XCPT_XF : X86_XCPT_UD;
855 bool const fFpFlagsExpect = RT_BOOL(pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS);
856 uint32_t uMxCsr;
857 X86YMMREG uMemOpExpect;
858 uint16_t cErrors;
859
860 /*
861 * Set up the context and some expectations.
862 */
863 /* Destination. */
864 if (pTest->iRegDst == UINT8_MAX)
865 {
866 BS3_ASSERT(pTest->enmRm >= RM_MEM);
867 Bs3MemSet(puMemOpAlias, 0xcc, cbMemOp);
868 if (bXcptExpect == X86_XCPT_DB)
869 uMemOpExpect.ymm = pValues->uDstOut.ymm;
870 else
871 Bs3MemSet(&uMemOpExpect, 0xcc, sizeof(uMemOpExpect));
872 }
873
874 /* Source #1 (/ destination for SSE). */
875 if (pTest->iRegSrc1 == UINT8_MAX)
876 {
877 BS3_ASSERT(pTest->enmRm >= RM_MEM);
878 Bs3MemCpy(puMemOpAlias, &pValues->uSrc1, cbMemOp);
879 if (pTest->iRegDst == UINT8_MAX)
880 BS3_ASSERT(pTestCtx->fSseInstr);
881 else
882 uMemOpExpect.ymm = pValues->uSrc1.ymm;
883 }
884 else if (pTestCtx->fSseInstr)
885 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm.DQWords.dqw0);
886 else
887 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm, 32);
888
889 /* Source #2. */
890 if (pTest->iRegSrc2 == UINT8_MAX)
891 {
892 BS3_ASSERT(pTest->enmRm >= RM_MEM);
893 BS3_ASSERT(pTest->iRegDst != UINT8_MAX && pTest->iRegSrc1 != UINT8_MAX);
894 Bs3MemCpy(puMemOpAlias, &pValues->uSrc2, cbMemOp);
895 uMemOpExpect.ymm = pValues->uSrc2.ymm;
896 }
897 else if (pTestCtx->fSseInstr)
898 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm.DQWords.dqw0);
899 else
900 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm, 32);
901
902 /* Memory pointer. */
903 if (pTest->enmRm >= RM_MEM)
904 {
905 BS3_ASSERT( pTest->iRegDst == UINT8_MAX
906 || pTest->iRegSrc1 == UINT8_MAX
907 || pTest->iRegSrc2 == UINT8_MAX);
908 Bs3RegCtxSetGrpSegFromCurPtr(pCtx, &pCtx->rbx, &pCtx->fs, puMemOp);
909 }
910
911 /* Setup MXCSR for the current test. */
912 uMxCsr = (pSavedCfg->uMxCsr & ~(X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK))
913 | (pValues->fMxCsrMask & X86_MXCSR_XCPT_MASK)
914 | (pValues->fRoundingCtlMask & X86_MXCSR_RC_MASK);
915 if ( pValues->fDenormalsAreZero
916 && g_fMxCsrDazSupported)
917 uMxCsr |= X86_MXCSR_DAZ;
918 if (pValues->fFlushToZero)
919 uMxCsr |= X86_MXCSR_FZ;
920 Bs3ExtCtxSetMxCsr(pExtCtx, uMxCsr);
921
922 /*
923 * Prepare globals and execute.
924 */
925 g_uBs3TrapEipHint = pCtx->rip.u32;
926 if ( bXcptExpect == X86_XCPT_DB
927 && !fFpFlagsExpect)
928 g_uBs3TrapEipHint += cbInstr + 1;
929 Bs3TrapSetJmpAndRestoreWithExtCtxAndRm(pCtx, pExtCtx, pTrapFrame, pExtCtxOut);
930
931 /*
932 * Check the result.
933 */
934 cErrors = Bs3TestSubErrorCount();
935 if ( bXcptExpect == X86_XCPT_DB
936 && pTest->iRegDst != UINT8_MAX)
937 {
938 if (pTestCtx->fSseInstr)
939 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm.DQWords.dqw0);
940 else
941 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm, cbOperand);
942 }
943#if defined(DEBUG_aeichner) /** @todo Necessary kludge on a i7-1068NG7. */
944 if ( pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE
945 && pExtCtx->Ctx.x.Hdr.bmXState == 0x7
946 && pExtCtxOut->Ctx.x.Hdr.bmXState == 0x3)
947 pExtCtxOut->Ctx.x.Hdr.bmXState = 0x7;
948#endif
949 if (bXcptExpect == X86_XCPT_DB)
950 Bs3ExtCtxSetMxCsr(pExtCtx, (uMxCsr & ~X86_MXCSR_XCPT_FLAGS)
951 | (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS));
952 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pTestCtx->pszMode, pTestCtx->idTestStep);
953
954 if (bXcptExpect == X86_XCPT_DB)
955 {
956 uint32_t const fMxCsrXcptFlags = Bs3ExtCtxGetMxCsr(pExtCtxOut) & X86_MXCSR_XCPT_FLAGS;
957
958 /* Check if the SIMD FP exception flags (or lack of) are as expected. */
959 if (fMxCsrXcptFlags != (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS))
960 {
961 char szGotBuf[BS3_FP_XCPT_NAMES_MAXLEN];
962 char szExpectBuf[BS3_FP_XCPT_NAMES_MAXLEN];
963 bs3CpuInstr4GetXcptFlags(&szExpectBuf[0], sizeof(szExpectBuf), pValues->fExpectedMxCsrFlags);
964 bs3CpuInstr4GetXcptFlags(&szGotBuf[0], sizeof(szGotBuf), fMxCsrXcptFlags);
965 Bs3TestFailedF("Expected floating-point xcpt flags%s, got%s", szExpectBuf, szGotBuf);
966 }
967
968 /* Check if the SIMD FP exception (or lack of) is as expected. */
969 if (fFpFlagsExpect)
970 {
971 if (pTrapFrame->bXcpt == bFpXcpt)
972 { /* likely */ }
973 else
974 Bs3TestFailedF("Expected floating-point xcpt %s, got %s", bs3CpuInstr4XcptName(bFpXcpt),
975 bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
976 }
977 else if (pTrapFrame->bXcpt == X86_XCPT_DB)
978 { /* likely */ }
979 else
980 Bs3TestFailedF("Expected no xcpt, got %s", bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
981 }
982 /* Check if non-FP exception is as expected. */
983 else if (pTrapFrame->bXcpt != bXcptExpect)
984 Bs3TestFailedF("Expected xcpt %s, got %s", bs3CpuInstr4XcptName(bXcptExpect), bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
985
986 /* Kludge! Looks like EFLAGS.AC is cleared when raising #GP in real mode on the 10980XE. WEIRD! */
987 if (bMode == BS3_MODE_RM && (pCtx->rflags.u32 & X86_EFL_AC))
988 {
989 if (pTrapFrame->Ctx.rflags.u32 & X86_EFL_AC)
990 Bs3TestFailedF("Expected EFLAGS.AC to be cleared (bXcpt=%d)", pTrapFrame->bXcpt);
991 pTrapFrame->Ctx.rflags.u32 |= X86_EFL_AC;
992 }
993 if (bXcptExpect == X86_XCPT_PF)
994 pCtx->cr2.u = (uintptr_t)puMemOp;
995 Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pCtx, bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect ? cbInstr + 1 : 0, 0 /*cbSpAdjust*/,
996 (bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect) || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF,
997 pTestCtx->pszMode, pTestCtx->idTestStep);
998 pCtx->cr2.u = 0;
999
1000 if ( pTest->enmRm >= RM_MEM
1001 && Bs3MemCmp(puMemOpAlias, &uMemOpExpect, cbMemOp) != 0)
1002 Bs3TestFailedF("Expected uMemOp %.*Rhxs, got %.*Rhxs", cbMemOp, &uMemOpExpect, cbMemOp, puMemOpAlias);
1003
1004 return cErrors;
1005}
1006
1007
1008/**
1009 * Test type #1 worker.
1010 */
1011static uint8_t bs3CpuInstrX_WorkerTestType1(uint8_t bMode, BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests, unsigned cTests,
1012 PCBS3CPUINSTR4_CONFIG_T paConfigs, unsigned cConfigs)
1013{
1014 BS3REGCTX Ctx;
1015 BS3TRAPFRAME TrapFrame;
1016 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode);
1017 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0;
1018 uint8_t BS3_FAR *pbBuf = g_pbBuf;
1019 uint32_t cbBuf = g_cbBuf;
1020 PBS3EXTCTX pExtCtxOut;
1021 PBS3EXTCTX pExtCtx = bs3CpuInstrXAllocExtCtxs(&pExtCtxOut);
1022 if (pExtCtx)
1023 { /* likely */ }
1024 else
1025 return 0;
1026 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT)
1027 { /* likely */ }
1028 else
1029 {
1030 Bs3TestPrintf("Skipped due to ancient FPU state format\n");
1031 return 0;
1032 }
1033
1034 /* Ensure the structures are allocated before we sample the stack pointer. */
1035 Bs3MemSet(&Ctx, 0, sizeof(Ctx));
1036 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
1037
1038 /*
1039 * Create test context.
1040 */
1041 pbBuf = bs3CpuInstrXBufSetup(pbBuf, &cbBuf, bMode);
1042 Bs3RegCtxSaveForMode(&Ctx, bMode, 1024);
1043 bs3CpuInstr4SetupSseAndAvx(&Ctx, pExtCtx);
1044
1045 /*
1046 * Run the tests in all rings since alignment issues may behave
1047 * differently in ring-3 compared to ring-0.
1048 */
1049 for (;;)
1050 {
1051 unsigned fPf = 0;
1052 do
1053 {
1054 unsigned iCfg;
1055 for (iCfg = 0; iCfg < cConfigs; iCfg++)
1056 {
1057 unsigned iTest;
1058 BS3CPUINSTRX_CONFIG_SAVED_T SavedCfg;
1059 if (!bs3CpuInstr4ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg], bMode))
1060 continue; /* unsupported config */
1061
1062 /*
1063 * Iterate the tests.
1064 */
1065 for (iTest = 0; iTest < cTests; iTest++)
1066 {
1067 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = &paTests[iTest];
1068 unsigned const cValues = pTest->cValues;
1069 bool const fSseInstr = pTest->enmType >= T_SSE && pTest->enmType < T_AVX_128;
1070 bool const fAvxInstr = pTest->enmType >= T_AVX_128;
1071 uint8_t const cbOperand = pTest->enmType < T_128BITS ? 64/8
1072 : pTest->enmType < T_256BITS ? 128/8 : 256/8;
1073 uint8_t const cbMemOp = bs3CpuInstrXMemOpSize(cbOperand, pTest->enmRm);
1074 uint8_t const cbAlign = cbMemOp;
1075 uint8_t BS3_FAR *puMemOp = bs3CpuInstrXBufForOperand(pbBuf, cbBuf, cbMemOp, cbAlign, &paConfigs[iCfg], fPf);
1076 uint8_t *puMemOpAlias = &g_pbBufAlias[(uintptr_t)puMemOp - (uintptr_t)pbBuf];
1077 uint8_t bXcptExpect = !g_afTypeSupports[pTest->enmType] ? X86_XCPT_UD
1078 : fSseInstr ? paConfigs[iCfg].bXcptSse
1079 : BS3_MODE_IS_RM_OR_V86(bMode) ? X86_XCPT_UD : paConfigs[iCfg].bXcptAvx;
1080 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10;
1081 unsigned cRecompRuns = 0;
1082 unsigned const cMaxRecompRuns = g_cBs3ThresholdNativeRecompiler + cValues;
1083 unsigned iVal;
1084
1085 /* If testing unaligned memory accesses (or #PF), skip register-only tests. This
1086 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */
1087 if ( (pTest->enmRm == RM_REG || pTest->enmRm == RM_MEM8)
1088 && (!paConfigs[iCfg].fAligned || paConfigs[iCfg].fAlignCheck || fPf))
1089 continue;
1090
1091 /* #AC is only raised in ring-3. */
1092 if (bXcptExpect == X86_XCPT_AC)
1093 {
1094 if (bRing != 3)
1095 bXcptExpect = X86_XCPT_DB;
1096 else if (fAvxInstr)
1097 bXcptExpect = pTest->bAvxMisalignXcpt; /* they generally don't raise #AC */
1098 }
1099
1100 if (fPf && bXcptExpect == X86_XCPT_DB)
1101 bXcptExpect = X86_XCPT_PF;
1102
1103 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, pTest->pfnWorker);
1104
1105 /*
1106 * Iterate the test values and do the actual testing.
1107 */
1108 while (cRecompRuns < cMaxRecompRuns)
1109 {
1110 for (iVal = 0; iVal < cValues; iVal++, idTestStep++, cRecompRuns++)
1111 {
1112 uint16_t cErrors;
1113 BS3CPUINSTR4_TEST1_CTX_T TestCtx;
1114 if (BS3_SKIPIT(bRing, iCfg, iTest, iVal, 0))
1115 continue;
1116
1117 /*
1118 * Setup the test instruction context and pass it to the worker.
1119 * A few of these can be figured out by the worker but initializing
1120 * it outside the inner most loop is more optimal.
1121 */
1122 TestCtx.pConfig = &paConfigs[iCfg];
1123 TestCtx.pTest = pTest;
1124 TestCtx.iVal = iVal;
1125 TestCtx.pszMode = pszMode;
1126 TestCtx.pTrapFrame = &TrapFrame;
1127 TestCtx.pCtx = &Ctx;
1128 TestCtx.pExtCtx = pExtCtx;
1129 TestCtx.pExtCtxOut = pExtCtxOut;
1130 TestCtx.puMemOp = (uint8_t *)puMemOp;
1131 TestCtx.puMemOpAlias = puMemOpAlias;
1132 TestCtx.cbMemOp = cbMemOp;
1133 TestCtx.cbOperand = cbOperand;
1134 TestCtx.bXcptExpect = bXcptExpect;
1135 TestCtx.fSseInstr = fSseInstr;
1136 TestCtx.idTestStep = idTestStep;
1137 cErrors = bs3CpuInstr4_WorkerTestType1_Inner(bMode, &TestCtx, &SavedCfg);
1138 if (cErrors != Bs3TestSubErrorCount())
1139 {
1140 if (paConfigs[iCfg].fAligned)
1141 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s)",
1142 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1143 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect));
1144 else
1145 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, puMemOp=%p, EFLAGS=%#RX32, CR0=%#RX32)",
1146 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1147 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), puMemOp,
1148 TrapFrame.Ctx.rflags.u32, TrapFrame.Ctx.cr0);
1149 Bs3TestPrintf("\n");
1150 }
1151 }
1152 }
1153 }
1154 bs3CpuInstrXConfigRestore(&SavedCfg, &Ctx, pExtCtx);
1155 }
1156 } while (fPf++ == 0 && BS3_MODE_IS_PAGED(bMode));
1157
1158 /*
1159 * Next ring.
1160 */
1161 bRing++;
1162 if (bRing > 3 || bMode == BS3_MODE_RM)
1163 break;
1164 Bs3RegCtxConvertToRingX(&Ctx, bRing);
1165 }
1166
1167 /*
1168 * Cleanup.
1169 */
1170 bs3CpuInstrXBufCleanup(pbBuf, cbBuf, bMode);
1171 bs3CpuInstrXFreeExtCtxs(pExtCtx, pExtCtxOut);
1172 return 0;
1173}
1174
1175
1176/*
1177 * [v]addpd.
1178 */
1179BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addpd(uint8_t bMode)
1180{
1181 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
1182 {
1183 /* 0*/{ { /*src2 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1184 { /*src1 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1185 { /* => */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1186 /*mask */ X86_MXCSR_XCPT_MASK,
1187 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1188 /*flags */ 0 },
1189 /* 1*/{ { /*src2 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1190 { /*src1 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1191 { /* => */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1192 /*mask */ ~X86_MXCSR_XCPT_MASK,
1193 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1194 /*flags */ 0 },
1195 /* 2*/{ { /*src2 */ { RTFLOAT64U_INIT_C(0, 0, 0x409), /*1024*/ RTFLOAT64U_INIT_C(0, 0xb800000000000, 0x404) /*55*/, RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_INF(1) } },
1196 { /*src1 */ { RTFLOAT64U_INIT_C(0, 0, 0x408), /* 512*/ RTFLOAT64U_INIT_C(0, 0xc000000000000, 0x401) /* 7*/, RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1197 { /* => */ { RTFLOAT64U_INIT_C(0, 0x8000000000000, 0x409) /*1536*/, RTFLOAT64U_INIT_C(0, 0xf000000000000, 0x404) /*62*/, RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_INF(1) } },
1198 /*mask */ X86_MXCSR_XCPT_MASK,
1199 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1200 /*flags */ 0 },
1201 /* 3*/{ { /*src2 */ { RTFLOAT64U_INIT_C(0, 0x26580b4800000, 0x41d), /* 1234567890*/ RTFLOAT64U_INIT_C(0, 0xd6f3458800000, 0x41c) /*987654321*/, RTFLOAT64U_INIT_SNAN(0), RTFLOAT64U_INIT_SNAN(1) } },
1202 { /*src1 */ { RTFLOAT64U_INIT_C(1, 0x26580b4800000, 0x41d), /*-1234567890*/ RTFLOAT64U_INIT_C(1, 0x9000000000000, 0x405) /* -100*/, RTFLOAT64U_INIT_SNAN(1), RTFLOAT64U_INIT_SNAN(0) } },
1203 { /* => */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_C(0, 0xd6f3426800000, 0x41c) /*987654221*/, RTFLOAT64U_INIT_SNAN(0), RTFLOAT64U_INIT_SNAN(0) } },
1204 /*mask */ ~X86_MXCSR_XCPT_MASK,
1205 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1206 /*flags */ 0 },
1207 /* 4*/{ { /*src2 */ { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1208 { /*src1 */ { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1209 { /* => */ { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1210 /*mask */ ~X86_MXCSR_XCPT_MASK,
1211 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1212 /*flags */ X86_MXCSR_IE },
1213 };
1214
1215 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1216 {
1217 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1218 };
1219 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1220 {
1221 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1222 };
1223 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1224 {
1225 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1226 };
1227
1228 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1229 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1230 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1231 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1232}
1233
1234
1235/**
1236 * The 32-bit protected mode main function.
1237 *
1238 * The tests a driven by 32-bit test drivers, even for real-mode tests (though
1239 * we'll switch between PE32 and RM for each test step we perform). Given that
1240 * we test SSE and AVX here, we don't need to worry about 286 or 8086.
1241 *
1242 * Some extra steps needs to be taken to properly handle extended state in LM64
1243 * (Bs3ExtCtxRestoreEx & Bs3ExtCtxSaveEx) and when testing real mode
1244 * (Bs3RegCtxSaveForMode & Bs3TrapSetJmpAndRestoreWithExtCtxAndRm).
1245 */
1246BS3_DECL(void) Main_pe32()
1247{
1248 static const BS3TESTMODEBYONEENTRY g_aTests[] =
1249 {
1250#if 1 /*ndef DEBUG_bird*/
1251# define ALL_TESTS
1252#endif
1253#if defined(ALL_TESTS)
1254 { "[v]addpd", bs3CpuInstrX_v_addpd, 0 },
1255#endif
1256 };
1257 Bs3TestInit("bs3-cpu-instr-4");
1258
1259 /*
1260 * Initialize globals.
1261 */
1262 if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
1263 {
1264 uint32_t fEbx, fEcx, fEdx;
1265 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1266 g_afTypeSupports[T_MMX] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_MMX);
1267 g_afTypeSupports[T_MMX_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1268 g_afTypeSupports[T_MMX_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1269 g_afTypeSupports[T_MMX_SSSE3] = RT_BOOL(fEdx & X86_CPUID_FEATURE_ECX_SSSE3);
1270 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1271 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1272 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3);
1273 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3);
1274 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1);
1275 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2);
1276 g_afTypeSupports[T_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL);
1277 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1278 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1279 g_afTypeSupports[T_AVX_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL)
1280 && RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1281
1282 if (ASMCpuId_EAX(0) >= 7)
1283 {
1284 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &fEbx, NULL, NULL);
1285 g_afTypeSupports[T_AVX2_128] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1286 g_afTypeSupports[T_AVX2_256] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1287 g_afTypeSupports[T_SHA] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_SHA);
1288 }
1289
1290 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
1291 {
1292 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1293 g_afTypeSupports[T_AXMMX] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
1294 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A);
1295 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);
1296 }
1297 g_afTypeSupports[T_AXMMX_OR_SSE] = g_afTypeSupports[T_AXMMX] || g_afTypeSupports[T_SSE];
1298
1299 /*
1300 * Figure out FPU save/restore method and support for DAZ bit.
1301 */
1302 {
1303 /** @todo Add bs3kit API to just get the ext ctx method without needing to
1304 * alloc/free a context. Replicating the logic in the bs3kit here, though
1305 * doable, runs a risk of not updating this when the other logic is
1306 * changed. */
1307 uint64_t fFlags;
1308 uint16_t const cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
1309 PBS3EXTCTX pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cbExtCtx);
1310 if (pExtCtx)
1311 {
1312 Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
1313 g_enmExtCtxMethod = pExtCtx->enmMethod;
1314 if ( ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_XSAVE
1315 && (pExtCtx->Ctx.x.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1316 || ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_FXSAVE)
1317 && (pExtCtx->Ctx.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1318 g_fMxCsrDazSupported = true;
1319 }
1320 else
1321 Bs3TestFailedF("Failed to allocate %u bytes for extended CPU context (tiled addressable)\n", cbExtCtx);
1322 }
1323
1324 /*
1325 * Allocate a buffer for testing.
1326 */
1327 g_cbBuf = X86_PAGE_SIZE * 4;
1328 g_pbBuf = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_REAL, g_cbBuf);
1329 if (g_pbBuf)
1330 {
1331 g_pbBufAliasAlloc = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_TILED, g_cbBuf);
1332 if (g_pbBufAliasAlloc)
1333 {
1334 /*
1335 * Do the tests.
1336 */
1337 Bs3TestDoModesByOne_pe32(g_aTests, RT_ELEMENTS(g_aTests), BS3TESTMODEBYONEENTRY_F_REAL_MODE_READY);
1338#ifdef BS3_SKIPIT_DO_SKIP
1339 bs3CpuInstrX_ShowTallies();
1340#endif
1341 }
1342 else
1343 Bs3TestFailed("Failed to allocate 16K alias buffer (tiled addressable)");
1344 }
1345 else
1346 Bs3TestFailed("Failed to allocate 16K buffer (real mode addressable)");
1347 }
1348
1349 Bs3TestTerm();
1350}
1351
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette