VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-4.c32@ 104708

Last change on this file since 104708 was 104708, checked in by vboxsync, 7 months ago

ValidationKit/bootsectors: bugref:10658 SIMD FP testcase: addpd.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 53.0 KB
Line 
1/* $Id: bs3-cpu-instr-4.c32 104708 2024-05-17 10:51:30Z vboxsync $ */
2/** @file
3 * BS3Kit - bs3-cpu-instr-4 - SSE, AVX FPU instructions, C code template.
4 */
5
6/*
7 * Copyright (C) 2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <bs3kit.h>
42#include "bs3-cpu-instr-4-asm-auto.h"
43
44#include <iprt/asm.h>
45#include <iprt/asm-amd64-x86.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** Converts an execution mode (BS3_MODE_XXX) into an index into an array
52 * initialized by BS3CPUINSTR4_TEST1_MODES_INIT etc. */
53#define BS3CPUINSTR4_TEST_MODES_INDEX(a_bMode) (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2)
54
55/** Maximum length for the names of all SIMD FP exception flags combined. */
56#define BS3_FP_XCPT_NAMES_MAXLEN sizeof(" IE DE ZE OE UE PE ")
57
58
59/*********************************************************************************************************************************
60* Structures and Typedefs *
61*********************************************************************************************************************************/
62/** Instruction set type and operand width. */
63typedef enum BS3CPUINSTRX_INSTRTYPE_T
64{
65 T_INVALID,
66 T_MMX,
67 T_MMX_SSE, /**< MMX instruction, but require the SSE CPUID to work. */
68 T_MMX_SSE2, /**< MMX instruction, but require the SSE2 CPUID to work. */
69 T_MMX_SSSE3, /**< MMX instruction, but require the SSSE3 CPUID to work. */
70 T_AXMMX,
71 T_AXMMX_OR_SSE,
72 T_SSE,
73 T_128BITS = T_SSE,
74 T_SSE2,
75 T_SSE3,
76 T_SSSE3,
77 T_SSE4_1,
78 T_SSE4_2,
79 T_SSE4A,
80 T_PCLMUL,
81 T_SHA,
82 T_AVX_128,
83 T_AVX2_128,
84 T_AVX_PCLMUL,
85 T_AVX_256,
86 T_256BITS = T_AVX_256,
87 T_AVX2_256,
88 T_MAX
89} BS3CPUINSTRX_INSTRTYPE_T;
90
91/** Memory or register rm variant. */
92enum {
93 RM_REG = 0,
94 RM_MEM,
95 RM_MEM8, /**< Memory operand is 8 bits. Hack for movss and similar. */
96 RM_MEM16, /**< Memory operand is 16 bits. Hack for movss and similar. */
97 RM_MEM32, /**< Memory operand is 32 bits. Hack for movss and similar. */
98 RM_MEM64 /**< Memory operand is 64 bits. Hack for movss and similar. */
99};
100
101/**
102 * Execution environment configuration.
103 */
104typedef struct BS3CPUINSTR4_CONFIG_T
105{
106 uint16_t fCr0Mp : 1;
107 uint16_t fCr0Em : 1;
108 uint16_t fCr0Ts : 1;
109 uint16_t fCr4OsFxSR : 1;
110 uint16_t fCr4OsXSave : 1;
111 uint16_t fCr4OsXmmExcpt : 1;
112 uint16_t fXcr0Sse : 1;
113 uint16_t fXcr0Avx : 1;
114 uint16_t fAligned : 1; /**< Aligned mem operands. If 0, they will be misaligned and tests w/o mem operands skipped. */
115 uint16_t fAlignCheck : 1;
116 uint16_t fMxCsrMM : 1; /**< AMD only */
117 uint8_t bXcptSse;
118 uint8_t bXcptAvx;
119} BS3CPUINSTR4_CONFIG_T;
120/** Pointer to an execution environment configuration. */
121typedef BS3CPUINSTR4_CONFIG_T const BS3_FAR *PCBS3CPUINSTR4_CONFIG_T;
122
123/** State saved by bs3CpuInstr4ConfigReconfigure. */
124typedef struct BS3CPUINSTRX_CONFIG_SAVED_T
125{
126 uint32_t uCr0;
127 uint32_t uCr4;
128 uint32_t uEfl;
129 uint16_t uFcw;
130 uint16_t uFsw;
131 uint32_t uMxCsr;
132} BS3CPUINSTRX_CONFIG_SAVED_T;
133typedef BS3CPUINSTRX_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTRX_CONFIG_SAVED_T;
134typedef BS3CPUINSTRX_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTRX_CONFIG_SAVED_T;
135
136/**
137 * YMM packed double precision floating-point register.
138 * @todo move to x86.h?
139 */
140typedef union X86YMMFLOATPDREG
141{
142 /** Double precision packed floating point view. */
143 RTFLOAT64U ar64[4];
144 /** Single precision packed floating point view. */
145 RTFLOAT32U ar32[8];
146 /** 256-bit integer view. */
147 RTUINT256U ymm;
148} X86YMMFLOATPDREG;
149# ifndef VBOX_FOR_DTRACE_LIB
150AssertCompileSize(X86YMMFLOATPDREG, 32);
151# endif
152/** Pointer to a YMM packed floating-point register. */
153typedef X86YMMFLOATPDREG BS3_FAR *PX86YMMFLOATPDREG;
154/** Pointer to a const YMM packed floating-point register. */
155typedef X86YMMFLOATPDREG const BS3_FAR *PCX86YMMFLOATPDREG;
156
157/**
158 * YMM scalar floating-point register.
159 * @todo move to x86.h?
160 */
161typedef union X86YMMSFLOATREG
162{
163 /** Double precision scalar floating point view. */
164 RTFLOAT128U ar128[2];
165 /** 256-bit integer view. */
166 RTUINT256U ymm;
167} X86YMMSFLOATREG;
168# ifndef VBOX_FOR_DTRACE_LIB
169AssertCompileSize(X86YMMSFLOATREG, 32);
170# endif
171/** Pointer to a YMM scalar floating-point register. */
172typedef X86YMMSFLOATREG *PX86YMMSFLOATREG;
173/** Pointer to a const YMM scalar floating-point register. */
174typedef X86YMMSFLOATREG const *PCX86YMMSFLOATREG;
175
176
177/*********************************************************************************************************************************
178* Global Variables *
179*********************************************************************************************************************************/
180static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false, false };
181static bool g_fAmdMisalignedSse = false;
182static uint8_t g_enmExtCtxMethod = BS3EXTCTXMETHOD_INVALID;
183static bool g_fMxCsrDazSupported = false;
184
185/** Zero value (indexed by fSign). */
186RTFLOAT32U const g_ar32Zero[] = { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) };
187RTFLOAT64U const g_ar64Zero[] = { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) };
188
189/** One value (indexed by fSign). */
190RTFLOAT32U const g_ar32One[] = { RTFLOAT32U_INIT_C(0, 0, RTFLOAT32U_EXP_BIAS),
191 RTFLOAT32U_INIT_C(1, 0, RTFLOAT32U_EXP_BIAS) };
192RTFLOAT64U const g_ar64One[] = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_BIAS),
193 RTFLOAT64U_INIT_C(1, 0, RTFLOAT64U_EXP_BIAS) };
194
195/** Infinity (indexed by fSign). */
196RTFLOAT32U const g_ar32Infinity[] = { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) };
197RTFLOAT64U const g_ar64Infinity[] = { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) };
198
199/** Default QNaNs (indexed by fSign). */
200RTFLOAT32U const g_ar32QNaN[] = { RTFLOAT32U_INIT_QNAN(0), RTFLOAT32U_INIT_QNAN(1) };
201RTFLOAT64U const g_ar64QNaN[] = { RTFLOAT64U_INIT_QNAN(0), RTFLOAT64U_INIT_QNAN(1) };
202
203/** Size of g_pbBuf - at least three pages. */
204static uint32_t g_cbBuf;
205/** Buffer of g_cbBuf size. */
206static uint8_t BS3_FAR *g_pbBuf;
207/** RW alias for the buffer memory at g_pbBuf. Set up by bs3CpuInstrXBufSetup. */
208static uint8_t BS3_FAR *g_pbBufAlias;
209/** RW alias for the memory at g_pbBuf. */
210static uint8_t BS3_FAR *g_pbBufAliasAlloc;
211
212/** Exception type \#1 test configurations, 16 & 32 bytes strictly aligned. */
213static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig1[] =
214{
215/*
216 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
217 * +AVX +AVX +AMD/SSE +AMD/SSE
218 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
219 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
220 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
221 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
222 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
223 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
224 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
225 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
226 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
227 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
228 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
229 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
230 /* Memory misalignment and alignment checks: */
231 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_GP }, /* #10 */
232 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_GP }, /* #11 */
233 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
234 /* AMD only: */
235 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
236 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
237};
238
239
240/**
241 * Returns the name of an X86 exception given the vector.
242 *
243 * @returns Name of the exception.
244 * @param uVector The exception vector.
245 */
246static const char BS3_FAR *bs3CpuInstr4XcptName(uint8_t uVector)
247{
248 switch (uVector)
249 {
250 case X86_XCPT_DE: return "#DE";
251 case X86_XCPT_DB: return "#DB";
252 case X86_XCPT_NMI: return "#NMI";
253 case X86_XCPT_BP: return "#BP";
254 case X86_XCPT_OF: return "#OF";
255 case X86_XCPT_BR: return "#BR";
256 case X86_XCPT_UD: return "#UD";
257 case X86_XCPT_NM: return "#NM";
258 case X86_XCPT_DF: return "#DF";
259 case X86_XCPT_CO_SEG_OVERRUN: return "#CO_SEG_OVERRUN";
260 case X86_XCPT_TS: return "#TS";
261 case X86_XCPT_NP: return "#NP";
262 case X86_XCPT_SS: return "#SS";
263 case X86_XCPT_GP: return "#GP";
264 case X86_XCPT_PF: return "#PF";
265 case X86_XCPT_MF: return "#MF";
266 case X86_XCPT_AC: return "#AC";
267 case X86_XCPT_MC: return "#MC";
268 case X86_XCPT_XF: return "#XF";
269 case X86_XCPT_VE: return "#VE";
270 case X86_XCPT_CP: return "#CP";
271 case X86_XCPT_VC: return "#VC";
272 case X86_XCPT_SX: return "#SX";
273 }
274 return "UNKNOWN";
275}
276
277
278/**
279 * Gets the names of floating-point exception flags that are set for a given MXCSR.
280 *
281 * @returns Names of floating-point exception flags that are set.
282 * @param pszBuf Where to store the floating-point exception flags.
283 * @param cchBuf The size of the buffer.
284 * @param fMxCsr The MXCSR value.
285 */
286static size_t bs3CpuInstr4GetXcptFlags(char BS3_FAR *pszBuf, size_t cchBuf, uint32_t fMxCsr)
287{
288 if (!(fMxCsr & X86_MXCSR_XCPT_FLAGS))
289 return Bs3StrPrintf(pszBuf, cchBuf, " None");
290 return Bs3StrPrintf(pszBuf, cchBuf, "%s%s%s%s%s%s", fMxCsr & X86_MXCSR_IE ? " IE" : "", fMxCsr & X86_MXCSR_DE ? " DE" : "",
291 fMxCsr & X86_MXCSR_ZE ? " ZE" : "", fMxCsr & X86_MXCSR_OE ? " OE" : "",
292 fMxCsr & X86_MXCSR_UE ? " UE" : "", fMxCsr & X86_MXCSR_PE ? " PE" : "");
293}
294
295
296/**
297 * Reconfigures the execution environment according to @a pConfig.
298 *
299 * Call bs3CpuInstrXConfigRestore to undo the changes.
300 *
301 * @returns true on success, false if the configuration cannot be applied. In
302 * the latter case, no context changes are made.
303 * @param pSavedCfg Where to save state we modify.
304 * @param pCtx The register context to modify.
305 * @param pExtCtx The extended register context to modify.
306 * @param pConfig The configuration to apply.
307 * @param bMode The target mode.
308 */
309static bool bs3CpuInstr4ConfigReconfigure(PBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx,
310 PCBS3CPUINSTR4_CONFIG_T pConfig, uint8_t bMode)
311{
312 /*
313 * Save context bits we may change here
314 */
315 pSavedCfg->uCr0 = pCtx->cr0.u32;
316 pSavedCfg->uCr4 = pCtx->cr4.u32;
317 pSavedCfg->uEfl = pCtx->rflags.u32;
318 pSavedCfg->uFcw = Bs3ExtCtxGetFcw(pExtCtx);
319 pSavedCfg->uFsw = Bs3ExtCtxGetFsw(pExtCtx);
320 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx);
321
322 /*
323 * Can we make these changes?
324 */
325 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse)
326 return false;
327
328 /*
329 * Modify the test context.
330 */
331 if (pConfig->fCr0Mp)
332 pCtx->cr0.u32 |= X86_CR0_MP;
333 else
334 pCtx->cr0.u32 &= ~X86_CR0_MP;
335 if (pConfig->fCr0Em)
336 pCtx->cr0.u32 |= X86_CR0_EM;
337 else
338 pCtx->cr0.u32 &= ~X86_CR0_EM;
339 if (pConfig->fCr0Ts)
340 pCtx->cr0.u32 |= X86_CR0_TS;
341 else
342 pCtx->cr0.u32 &= ~X86_CR0_TS;
343
344 if (pConfig->fCr4OsFxSR)
345 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
346 else
347 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
348
349 if (pConfig->fCr4OsXmmExcpt && g_afTypeSupports[T_SSE])
350 pCtx->cr4.u32 |= X86_CR4_OSXMMEEXCPT;
351 else
352 pCtx->cr4.u32 &= ~X86_CR4_OSXMMEEXCPT;
353
354 if (pConfig->fCr4OsFxSR)
355 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
356 else
357 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
358
359 if (pConfig->fCr4OsXSave)
360 pCtx->cr4.u32 |= X86_CR4_OSXSAVE;
361 else
362 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE;
363
364 if (pConfig->fXcr0Sse)
365 pExtCtx->fXcr0Saved |= XSAVE_C_SSE;
366 else
367 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE;
368 if (pConfig->fXcr0Avx && g_afTypeSupports[T_AVX_256])
369 pExtCtx->fXcr0Saved |= XSAVE_C_YMM;
370 else
371 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM;
372
373 if (pConfig->fAlignCheck)
374 {
375 pCtx->rflags.u32 |= X86_EFL_AC;
376 pCtx->cr0.u32 |= X86_CR0_AM;
377 }
378 else
379 {
380 pCtx->rflags.u32 &= ~X86_EFL_AC;
381 pCtx->cr0.u32 &= ~X86_CR0_AM;
382 }
383
384 /** @todo Can we remove this? x87 FPU and SIMD are independent. */
385 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw & ~(X86_FSW_ES | X86_FSW_B));
386
387 if (pConfig->fMxCsrMM)
388 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM);
389 else
390 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM);
391 return true;
392}
393
394
395/**
396 * Undoes changes made by bs3CpuInstr4ConfigReconfigure.
397 */
398static void bs3CpuInstrXConfigRestore(PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx)
399{
400 pCtx->cr0.u32 = pSavedCfg->uCr0;
401 pCtx->cr4.u32 = pSavedCfg->uCr4;
402 pCtx->rflags.u32 = pSavedCfg->uEfl;
403 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal;
404 Bs3ExtCtxSetFcw(pExtCtx, pSavedCfg->uFcw);
405 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw);
406 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr);
407}
408
409
410/**
411 * Allocates three extended CPU contexts and initializes the first one
412 * with random data.
413 * @returns First extended context, initialized with randomish data. NULL on
414 * failure (complained).
415 * @param ppExtCtx2 Where to return the 2nd context.
416 */
417static PBS3EXTCTX bs3CpuInstrXAllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2)
418{
419 /* Allocate extended context structures. */
420 uint64_t fFlags;
421 uint16_t cb = Bs3ExtCtxGetSize(&fFlags);
422 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 3);
423 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb);
424 if (pExtCtx1)
425 {
426 Bs3ExtCtxInit(pExtCtx1, cb, fFlags);
427 /** @todo populate with semi-random stuff. */
428
429 Bs3ExtCtxInit(pExtCtx2, cb, fFlags);
430 *ppExtCtx2 = pExtCtx2;
431 return pExtCtx1;
432 }
433 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2);
434 *ppExtCtx2 = NULL;
435 return NULL;
436}
437
438
439/**
440 * Frees the extended CPU contexts allocated by bs3CpuInstrXAllocExtCtxs.
441 *
442 * @param pExtCtx1 The first extended context.
443 * @param pExtCtx2 The second extended context.
444 */
445static void bs3CpuInstrXFreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2)
446{
447 RT_NOREF_PV(pExtCtx2);
448 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2);
449}
450
451
452/**
453 * Sets up SSE and AVX bits relevant for FPU instructions.
454 */
455static void bs3CpuInstr4SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx)
456{
457 /* CR0: */
458 uint32_t cr0 = Bs3RegGetCr0();
459 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
460 cr0 |= X86_CR0_NE;
461 Bs3RegSetCr0(cr0);
462
463 /* If real mode context, the cr0 value will differ from the current one (we're in PE32 mode). */
464 pCtx->cr0.u32 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
465 pCtx->cr0.u32 |= X86_CR0_NE;
466
467 /* CR4: */
468 BS3_ASSERT( pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
469 || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE);
470 {
471 uint32_t cr4 = Bs3RegGetCr4();
472 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
473 {
474 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
475 Bs3RegSetCr4(cr4);
476 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal);
477 }
478 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
479 {
480 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
481 Bs3RegSetCr4(cr4);
482 }
483 pCtx->cr4.u32 = cr4;
484 }
485}
486
487
488/**
489 * Configures the buffer with electric fences in paged modes.
490 *
491 * @returns Adjusted buffer pointer.
492 * @param pbBuf The buffer pointer.
493 * @param pcbBuf Pointer to the buffer size (input & output).
494 * @param bMode The testing target mode.
495 */
496DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufSetup(uint8_t BS3_FAR *pbBuf, uint32_t *pcbBuf, uint8_t bMode)
497{
498 if (BS3_MODE_IS_PAGED(bMode))
499 {
500 int rc;
501 uint32_t cbBuf = *pcbBuf;
502 Bs3PagingProtectPtr(&pbBuf[0], X86_PAGE_SIZE, 0, X86_PTE_P);
503 Bs3PagingProtectPtr(&pbBuf[cbBuf - X86_PAGE_SIZE], X86_PAGE_SIZE, 0, X86_PTE_P);
504 pbBuf += X86_PAGE_SIZE;
505 cbBuf -= X86_PAGE_SIZE * 2;
506 *pcbBuf = cbBuf;
507
508 g_pbBufAlias = g_pbBufAliasAlloc;
509 rc = Bs3PagingAlias((uintptr_t)g_pbBufAlias, (uintptr_t)pbBuf, cbBuf + X86_PAGE_SIZE, /* must include the tail guard pg */
510 X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW);
511 if (RT_FAILURE(rc))
512 Bs3TestFailedF("Bs3PagingAlias failed on %p/%p LB %#x: %d", g_pbBufAlias, pbBuf, cbBuf, rc);
513 }
514 else
515 g_pbBufAlias = pbBuf;
516 return pbBuf;
517}
518
519
520/**
521 * Undoes what bs3CpuInstrXBufSetup did.
522 *
523 * @param pbBuf The buffer pointer.
524 * @param cbBuf The buffer size.
525 * @param bMode The testing target mode.
526 */
527DECLINLINE(void) bs3CpuInstrXBufCleanup(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t bMode)
528{
529 if (BS3_MODE_IS_PAGED(bMode))
530 {
531 Bs3PagingProtectPtr(&pbBuf[-X86_PAGE_SIZE], X86_PAGE_SIZE, X86_PTE_P, 0);
532 Bs3PagingProtectPtr(&pbBuf[cbBuf], X86_PAGE_SIZE, X86_PTE_P, 0);
533 }
534}
535
536
537/**
538 * Gets a buffer of a @a cbMemOp sized operand according to the given
539 * configuration and alignment restrictions.
540 *
541 * @returns Pointer to the buffer.
542 * @param pbBuf The buffer pointer.
543 * @param cbBuf The buffer size.
544 * @param cbMemOp The operand size.
545 * @param cbAlign The operand alignment restriction.
546 * @param pConfig The configuration.
547 * @param fPageFault The \#PF test setting.
548 */
549DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufForOperand(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t cbMemOp, uint8_t cbAlign,
550 PCBS3CPUINSTR4_CONFIG_T pConfig, unsigned fPageFault)
551{
552 /* All allocations are at the tail end of the buffer, so that we've got a
553 guard page following the operand. When asked to consistenly trigger
554 a #PF, we slide the buffer into that guard page. */
555 if (fPageFault)
556 cbBuf += X86_PAGE_SIZE;
557
558 if (pConfig->fAligned)
559 {
560 if (!pConfig->fAlignCheck)
561 return &pbBuf[cbBuf - cbMemOp];
562 return &pbBuf[cbBuf - cbMemOp - cbAlign];
563 }
564 return &pbBuf[cbBuf - cbMemOp - 1];
565}
566
567
568/**
569 * Determins the size of memory operands.
570 */
571DECLINLINE(uint8_t) bs3CpuInstrXMemOpSize(uint8_t cbOperand, uint8_t enmRm)
572{
573 if (enmRm <= RM_MEM)
574 return cbOperand;
575 if (enmRm == RM_MEM8)
576 return sizeof(uint8_t);
577 if (enmRm == RM_MEM16)
578 return sizeof(uint16_t);
579 if (enmRm == RM_MEM32)
580 return sizeof(uint32_t);
581 if (enmRm == RM_MEM64)
582 return sizeof(uint64_t);
583 BS3_ASSERT(0);
584 return cbOperand;
585}
586
587
588/*
589 * Code to make testing the tests faster. `bs3CpuInstrX_SkipIt()' randomly
590 * skips a large fraction of the micro-tests. It is sufficiently random
591 * that over a large number of runs, all micro-tests will be hit.
592 *
593 * This improves the runtime of the worst case (`#define ALL_TESTS' on a
594 * debug build, run with '--execute-all-in-iem') from ~9000 to ~800 seconds
595 * (on an Intel Core i7-10700, fwiw).
596 *
597 * To activate this 'developer's speed-testing mode', turn on
598 * `#define BS3_SKIPIT_DO_SKIP' here.
599 *
600 * BS3_SKIPIT_AVG_SKIP governs approximately how many micro-tests are
601 * skipped in a row; e.g. the default of 26 means about every 27th
602 * micro-test is run during a particular test run. (This is not 27x
603 * faster due to other activities which are not skipped!) Note this is
604 * only an average; the actual skips are random.
605 *
606 * You can also modify bs3CpuInstrX_SkipIt() to focus on specific sub-tests,
607 * using its (currently ignored) `bRing, iCfg, iTest, iVal, iVariant' args
608 * (to enable this: turn on `#define BS3_SKIPIT_DO_ARGS': which costs about
609 * 3% performance).
610 *
611 * Note! The skipping is not compatible with testing the native recompiler as
612 * it requires the test code to be run a number of times before it kicks
613 * in and does the native recompilation (currently around 16 times).
614 */
615#define BS3_SKIPIT_AVG_SKIP 26
616#undef BS3_SKIPIT_DO_SKIP
617#undef BS3_SKIPIT_DO_ARGS
618
619#ifndef BS3_SKIPIT_DO_SKIP
620# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) (false)
621#else
622# include <iprt/asm-amd64-x86.h>
623# include <iprt/asm-math.h>
624
625DECLINLINE(uint32_t) bs3CpuInstrX_SimpleRand(void)
626{
627 /*
628 * A simple Lehmer linear congruential pseudo-random number
629 * generator using the constants suggested by Park & Miller:
630 *
631 * modulus = 2^31 - 1 (INT32_MAX)
632 * multiplier = 7^5 (16807)
633 *
634 * It produces numbers in the range [1..INT32_MAX-1] and is
635 * more chaotic in the higher bits.
636 *
637 * Note! Runtime/common/rand/randparkmiller.cpp is also use this algorithm,
638 * though the zero handling is different.
639 */
640 static uint32_t s_uSeedMemory = 0;
641 uint32_t uVal = s_uSeedMemory;
642 if (!uVal)
643 uVal = (uint32_t)ASMReadTSC();
644 uVal = ASMModU64ByU32RetU32(ASMMult2xU32RetU64(uVal, 16807), INT32_MAX);
645 s_uSeedMemory = uVal;
646 return uVal;
647}
648
649static unsigned g_cSeen, g_cSkipped;
650
651static void bs3CpuInstrX_ShowTallies(void)
652{
653 Bs3TestPrintf("Micro-tests %d: tested %d / skipped %d\n", g_cSeen, g_cSeen - g_cSkipped, g_cSkipped);
654}
655
656# ifdef BS3_SKIPIT_DO_ARGS
657# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt(bRing, iCfg, iTest, iVal, iVariant)
658static bool bs3CpuInstrX_SkipIt(uint8_t bRing, unsigned iCfg, unsigned iTest, unsigned iVal, unsigned iVariant)
659# else
660# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt()
661static bool bs3CpuInstrX_SkipIt(void)
662# endif
663{
664 static unsigned s_uTimes = 0;
665 bool fSkip;
666
667 /* Cache calls to the relatively expensive random routine */
668 if (!s_uTimes)
669 s_uTimes = bs3CpuInstrX_SimpleRand() % (BS3_SKIPIT_AVG_SKIP * 2 + 1) + 1;
670 fSkip = --s_uTimes > 0;
671 if (fSkip)
672 ++g_cSkipped;
673
674 if (++g_cSeen % 25000 == 0)
675 bs3CpuInstrX_ShowTallies();
676 return fSkip;
677}
678
679#endif /* BS3_SKIPIT_DO_SKIP */
680
681
682/*
683 * Test type #1.
684 * Packed double-precision.
685 */
686typedef struct BS3CPUINSTR4_TEST1_VALUES_PD_T
687{
688 X86YMMFLOATPDREG uSrc2; /**< Second source operand. */
689 X86YMMFLOATPDREG uSrc1; /**< uDstIn for SSE */
690 X86YMMFLOATPDREG uDstOut; /**< Destination output. */
691 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
692 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
693 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
694 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
695 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
696} BS3CPUINSTR4_TEST1_VALUES_PD_T;
697
698typedef struct BS3CPUINSTR4_TEST1_T
699{
700 FPFNBS3FAR pfnWorker; /**< Test function worker. */
701 uint8_t bAvxMisalignXcpt; /**< AVX misalignment exception. */
702 uint8_t enmRm; /**< R/M type. */
703 uint8_t enmType; /**< CPU instruction type (see T_XXX). */
704 uint8_t iRegDst; /**< Index of destination register, UINT8_MAX if N/A. */
705 uint8_t iRegSrc1; /**< Index of first source register, UINT8_MAX if N/A. */
706 uint8_t iRegSrc2; /**< Index of second source register, UINT8_MAX if N/A. */
707 uint8_t cValues; /**< Number of test values in @c paValues. */
708 BS3CPUINSTR4_TEST1_VALUES_PD_T const BS3_FAR *paValues; /**< Test values. */
709} BS3CPUINSTR4_TEST1_T;
710
711typedef struct BS3CPUINSTR4_TEST1_MODE_T
712{
713 BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests;
714 unsigned cTests;
715} BS3CPUINSTR4_TEST1_MODE_T;
716
717/** Initializer for a BS3CPUINSTR4_TEST1_MODE_T array (three entries). */
718#define BS3CPUINSTR4_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \
719 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } }
720
721typedef struct BS3CPUINSTR4_TEST1_CTX_T
722{
723 BS3CPUINSTR4_CONFIG_T const BS3_FAR *pConfig;
724 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest;
725 unsigned iVal;
726 const char BS3_FAR *pszMode;
727 PBS3TRAPFRAME pTrapFrame;
728 PBS3REGCTX pCtx;
729 PBS3EXTCTX pExtCtx;
730 PBS3EXTCTX pExtCtxOut;
731 uint8_t BS3_FAR *puMemOp;
732 uint8_t BS3_FAR *puMemOpAlias;
733 uint8_t cbMemOp;
734 uint8_t cbOperand;
735 uint8_t cbInstr;
736 uint8_t bXcptExpect;
737 bool fSseInstr;
738 uint16_t idTestStep;
739} BS3CPUINSTR4_TEST1_CTX_T;
740/** Pointer to a test 1 context. */
741typedef BS3CPUINSTR4_TEST1_CTX_T BS3_FAR *PBS3CPUINSTR4_TEST1_CTX_T;
742
743
744/**
745 * Worker for bs3CpuInstrX_WorkerTestType1.
746 */
747static uint16_t bs3CpuInstr4_WorkerTestType1_Inner(uint8_t bMode, PBS3CPUINSTR4_TEST1_CTX_T pTestCtx,
748 PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg)
749{
750 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = pTestCtx->pTest;
751 BS3CPUINSTR4_TEST1_VALUES_PD_T const BS3_FAR *pValues = &pTestCtx->pTest->paValues[pTestCtx->iVal];
752 PBS3TRAPFRAME pTrapFrame = pTestCtx->pTrapFrame;
753 PBS3REGCTX pCtx = pTestCtx->pCtx;
754 PBS3EXTCTX pExtCtx = pTestCtx->pExtCtx;
755 PBS3EXTCTX pExtCtxOut = pTestCtx->pExtCtxOut;
756 uint8_t BS3_FAR *puMemOp = pTestCtx->puMemOp;
757 uint8_t BS3_FAR *puMemOpAlias = pTestCtx->puMemOpAlias;
758 uint8_t cbMemOp = pTestCtx->cbMemOp;
759 uint8_t const cbOperand = pTestCtx->cbOperand;
760 uint8_t const cbInstr = ((uint8_t const BS3_FAR *)(uintptr_t)pTestCtx->pTest->pfnWorker)[-1];
761 uint8_t bXcptExpect = pTestCtx->bXcptExpect;
762 uint8_t const bFpXcpt = pTestCtx->pConfig->fCr4OsXmmExcpt ? X86_XCPT_XF : X86_XCPT_UD;
763 bool const fFpFlagsExpect = RT_BOOL(pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS);
764 uint32_t uMxCsr;
765 X86YMMREG uMemOpExpect;
766 uint16_t cErrors;
767
768 /*
769 * Set up the context and some expectations.
770 */
771 /* Destination. */
772 if (pTest->iRegDst == UINT8_MAX)
773 {
774 BS3_ASSERT(pTest->enmRm >= RM_MEM);
775 Bs3MemSet(puMemOpAlias, 0xcc, cbMemOp);
776 if (bXcptExpect == X86_XCPT_DB)
777 uMemOpExpect.ymm = pValues->uDstOut.ymm;
778 else
779 Bs3MemSet(&uMemOpExpect, 0xcc, sizeof(uMemOpExpect));
780 }
781
782 /* Source #1 (/ destination for SSE). */
783 if (pTest->iRegSrc1 == UINT8_MAX)
784 {
785 BS3_ASSERT(pTest->enmRm >= RM_MEM);
786 Bs3MemCpy(puMemOpAlias, &pValues->uSrc1, cbMemOp);
787 if (pTest->iRegDst == UINT8_MAX)
788 BS3_ASSERT(pTestCtx->fSseInstr);
789 else
790 uMemOpExpect.ymm = pValues->uSrc1.ymm;
791 }
792 else if (pTestCtx->fSseInstr)
793 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm.DQWords.dqw0);
794 else
795 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm, 32);
796
797 /* Source #2. */
798 if (pTest->iRegSrc2 == UINT8_MAX)
799 {
800 BS3_ASSERT(pTest->enmRm >= RM_MEM);
801 BS3_ASSERT(pTest->iRegDst != UINT8_MAX && pTest->iRegSrc1 != UINT8_MAX);
802 Bs3MemCpy(puMemOpAlias, &pValues->uSrc2, cbMemOp);
803 uMemOpExpect.ymm = pValues->uSrc2.ymm;
804 }
805 else if (pTestCtx->fSseInstr)
806 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm.DQWords.dqw0);
807 else
808 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm, 32);
809
810 /* Memory pointer. */
811 if (pTest->enmRm >= RM_MEM)
812 {
813 BS3_ASSERT( pTest->iRegDst == UINT8_MAX
814 || pTest->iRegSrc1 == UINT8_MAX
815 || pTest->iRegSrc2 == UINT8_MAX);
816 Bs3RegCtxSetGrpSegFromCurPtr(pCtx, &pCtx->rbx, &pCtx->fs, puMemOp);
817 }
818
819 /* Setup MXCSR for the current test. */
820 uMxCsr = (pSavedCfg->uMxCsr & ~(X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK))
821 | (pValues->fMxCsrMask & X86_MXCSR_XCPT_MASK)
822 | (pValues->fRoundingCtlMask & X86_MXCSR_RC_MASK);
823 if ( pValues->fDenormalsAreZero
824 && g_fMxCsrDazSupported)
825 uMxCsr |= X86_MXCSR_DAZ;
826 if (pValues->fFlushToZero)
827 uMxCsr |= X86_MXCSR_FZ;
828 Bs3ExtCtxSetMxCsr(pExtCtx, uMxCsr);
829
830 /*
831 * Prepare globals and execute.
832 */
833 g_uBs3TrapEipHint = pCtx->rip.u32;
834 if ( bXcptExpect == X86_XCPT_DB
835 && !fFpFlagsExpect)
836 g_uBs3TrapEipHint += cbInstr + 1;
837 Bs3TrapSetJmpAndRestoreWithExtCtxAndRm(pCtx, pExtCtx, pTrapFrame, pExtCtxOut);
838
839 /*
840 * Check the result.
841 */
842 cErrors = Bs3TestSubErrorCount();
843 if ( bXcptExpect == X86_XCPT_DB
844 && pTest->iRegDst != UINT8_MAX)
845 {
846 if (pTestCtx->fSseInstr)
847 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm.DQWords.dqw0);
848 else
849 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm, cbOperand);
850 }
851#if defined(DEBUG_aeichner) /** @todo Necessary kludge on a i7-1068NG7. */
852 if ( pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE
853 && pExtCtx->Ctx.x.Hdr.bmXState == 0x7
854 && pExtCtxOut->Ctx.x.Hdr.bmXState == 0x3)
855 pExtCtxOut->Ctx.x.Hdr.bmXState = 0x7;
856#endif
857 if (bXcptExpect == X86_XCPT_DB)
858 Bs3ExtCtxSetMxCsr(pExtCtx, (uMxCsr & ~X86_MXCSR_XCPT_FLAGS)
859 | (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS));
860 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pTestCtx->pszMode, pTestCtx->idTestStep);
861
862 if (bXcptExpect == X86_XCPT_DB)
863 {
864 uint32_t const fMxCsrXcptFlags = Bs3ExtCtxGetMxCsr(pExtCtxOut) & X86_MXCSR_XCPT_FLAGS;
865
866 /* Check if the SIMD FP exception flags (or lack of) are as expected. */
867 if (fMxCsrXcptFlags != (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS))
868 {
869 char szGotBuf[BS3_FP_XCPT_NAMES_MAXLEN];
870 char szExpectBuf[BS3_FP_XCPT_NAMES_MAXLEN];
871 bs3CpuInstr4GetXcptFlags(&szExpectBuf[0], sizeof(szExpectBuf), pValues->fExpectedMxCsrFlags);
872 bs3CpuInstr4GetXcptFlags(&szGotBuf[0], sizeof(szGotBuf), fMxCsrXcptFlags);
873 Bs3TestFailedF("Expected floating-point xcpt flags%s, got%s", szExpectBuf, szGotBuf);
874 }
875
876 /* Check if the SIMD FP exception (or lack of) is as expected. */
877 if (fFpFlagsExpect)
878 {
879 if (pTrapFrame->bXcpt == bFpXcpt)
880 { /* likely */ }
881 else
882 Bs3TestFailedF("Expected floating-point xcpt %s, got %s", bs3CpuInstr4XcptName(bFpXcpt),
883 bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
884 }
885 else if (pTrapFrame->bXcpt == X86_XCPT_DB)
886 { /* likely */ }
887 else
888 Bs3TestFailedF("Expected no xcpt, got %s", bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
889 }
890 /* Check if non-FP exception is as expected. */
891 else if (pTrapFrame->bXcpt != bXcptExpect)
892 Bs3TestFailedF("Expected xcpt %s, got %s", bs3CpuInstr4XcptName(bXcptExpect), bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
893
894 /* Kludge! Looks like EFLAGS.AC is cleared when raising #GP in real mode on the 10980XE. WEIRD! */
895 if (bMode == BS3_MODE_RM && (pCtx->rflags.u32 & X86_EFL_AC))
896 {
897 if (pTrapFrame->Ctx.rflags.u32 & X86_EFL_AC)
898 Bs3TestFailedF("Expected EFLAGS.AC to be cleared (bXcpt=%d)", pTrapFrame->bXcpt);
899 pTrapFrame->Ctx.rflags.u32 |= X86_EFL_AC;
900 }
901 if (bXcptExpect == X86_XCPT_PF)
902 pCtx->cr2.u = (uintptr_t)puMemOp;
903 Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pCtx, bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect ? cbInstr + 1 : 0, 0 /*cbSpAdjust*/,
904 (bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect) || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF,
905 pTestCtx->pszMode, pTestCtx->idTestStep);
906 pCtx->cr2.u = 0;
907
908 if ( pTest->enmRm >= RM_MEM
909 && Bs3MemCmp(puMemOpAlias, &uMemOpExpect, cbMemOp) != 0)
910 Bs3TestFailedF("Expected uMemOp %.*Rhxs, got %.*Rhxs", cbMemOp, &uMemOpExpect, cbMemOp, puMemOpAlias);
911
912 return cErrors;
913}
914
915
916/**
917 * Test type #1 worker.
918 */
919static uint8_t bs3CpuInstrX_WorkerTestType1(uint8_t bMode, BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests, unsigned cTests,
920 PCBS3CPUINSTR4_CONFIG_T paConfigs, unsigned cConfigs)
921{
922 BS3REGCTX Ctx;
923 BS3TRAPFRAME TrapFrame;
924 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode);
925 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0;
926 uint8_t BS3_FAR *pbBuf = g_pbBuf;
927 uint32_t cbBuf = g_cbBuf;
928 PBS3EXTCTX pExtCtxOut;
929 PBS3EXTCTX pExtCtx = bs3CpuInstrXAllocExtCtxs(&pExtCtxOut);
930 if (pExtCtx)
931 { /* likely */ }
932 else
933 return 0;
934 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT)
935 { /* likely */ }
936 else
937 {
938 Bs3TestPrintf("Skipped due to ancient FPU state format\n");
939 return 0;
940 }
941
942 /* Ensure the structures are allocated before we sample the stack pointer. */
943 Bs3MemSet(&Ctx, 0, sizeof(Ctx));
944 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
945
946 /*
947 * Create test context.
948 */
949 pbBuf = bs3CpuInstrXBufSetup(pbBuf, &cbBuf, bMode);
950 Bs3RegCtxSaveForMode(&Ctx, bMode, 1024);
951 bs3CpuInstr4SetupSseAndAvx(&Ctx, pExtCtx);
952
953 /*
954 * Run the tests in all rings since alignment issues may behave
955 * differently in ring-3 compared to ring-0.
956 */
957 for (;;)
958 {
959 unsigned fPf = 0;
960 do
961 {
962 unsigned iCfg;
963 for (iCfg = 0; iCfg < cConfigs; iCfg++)
964 {
965 unsigned iTest;
966 BS3CPUINSTRX_CONFIG_SAVED_T SavedCfg;
967 if (!bs3CpuInstr4ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg], bMode))
968 continue; /* unsupported config */
969
970 /*
971 * Iterate the tests.
972 */
973 for (iTest = 0; iTest < cTests; iTest++)
974 {
975 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = &paTests[iTest];
976 unsigned const cValues = pTest->cValues;
977 bool const fSseInstr = pTest->enmType >= T_SSE && pTest->enmType < T_AVX_128;
978 bool const fAvxInstr = pTest->enmType >= T_AVX_128;
979 uint8_t const cbOperand = pTest->enmType < T_128BITS ? 64/8
980 : pTest->enmType < T_256BITS ? 128/8 : 256/8;
981 uint8_t const cbMemOp = bs3CpuInstrXMemOpSize(cbOperand, pTest->enmRm);
982 uint8_t const cbAlign = cbMemOp;
983 uint8_t BS3_FAR *puMemOp = bs3CpuInstrXBufForOperand(pbBuf, cbBuf, cbMemOp, cbAlign, &paConfigs[iCfg], fPf);
984 uint8_t *puMemOpAlias = &g_pbBufAlias[(uintptr_t)puMemOp - (uintptr_t)pbBuf];
985 uint8_t bXcptExpect = !g_afTypeSupports[pTest->enmType] ? X86_XCPT_UD
986 : fSseInstr ? paConfigs[iCfg].bXcptSse
987 : BS3_MODE_IS_RM_OR_V86(bMode) ? X86_XCPT_UD : paConfigs[iCfg].bXcptAvx;
988 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10;
989 unsigned cRecompRuns = 0;
990 unsigned const cMaxRecompRuns = g_cBs3ThresholdNativeRecompiler + cValues;
991 unsigned iVal;
992
993 /* If testing unaligned memory accesses (or #PF), skip register-only tests. This
994 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */
995 if ( (pTest->enmRm == RM_REG || pTest->enmRm == RM_MEM8)
996 && (!paConfigs[iCfg].fAligned || paConfigs[iCfg].fAlignCheck || fPf))
997 continue;
998
999 /* #AC is only raised in ring-3. */
1000 if (bXcptExpect == X86_XCPT_AC)
1001 {
1002 if (bRing != 3)
1003 bXcptExpect = X86_XCPT_DB;
1004 else if (fAvxInstr)
1005 bXcptExpect = pTest->bAvxMisalignXcpt; /* they generally don't raise #AC */
1006 }
1007
1008 if (fPf && bXcptExpect == X86_XCPT_DB)
1009 bXcptExpect = X86_XCPT_PF;
1010
1011 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, pTest->pfnWorker);
1012
1013 /*
1014 * Iterate the test values and do the actual testing.
1015 */
1016 while (cRecompRuns < cMaxRecompRuns)
1017 {
1018 for (iVal = 0; iVal < cValues; iVal++, idTestStep++, cRecompRuns++)
1019 {
1020 uint16_t cErrors;
1021 BS3CPUINSTR4_TEST1_CTX_T TestCtx;
1022
1023 if (BS3_SKIPIT(bRing, iCfg, iTest, iVal, 0))
1024 continue;
1025
1026 /*
1027 * Setup the test instruction context and pass it to the worker.
1028 * A few of these can be figured out by the worker but initializing
1029 * it outside the inner most loop is more optimal.
1030 */
1031 TestCtx.pConfig = &paConfigs[iCfg];
1032 TestCtx.pTest = pTest;
1033 TestCtx.iVal = iVal;
1034 TestCtx.pszMode = pszMode;
1035 TestCtx.pTrapFrame = &TrapFrame;
1036 TestCtx.pCtx = &Ctx;
1037 TestCtx.pExtCtx = pExtCtx;
1038 TestCtx.pExtCtxOut = pExtCtxOut;
1039 TestCtx.puMemOp = (uint8_t *)puMemOp;
1040 TestCtx.puMemOpAlias = puMemOpAlias;
1041 TestCtx.cbMemOp = cbMemOp;
1042 TestCtx.cbOperand = cbOperand;
1043 TestCtx.bXcptExpect = bXcptExpect;
1044 TestCtx.fSseInstr = fSseInstr;
1045 TestCtx.idTestStep = idTestStep;
1046 cErrors = bs3CpuInstr4_WorkerTestType1_Inner(bMode, &TestCtx, &SavedCfg);
1047
1048 if (cErrors != Bs3TestSubErrorCount())
1049 {
1050 if (paConfigs[iCfg].fAligned)
1051 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s)",
1052 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1053 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect));
1054 else
1055 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, puMemOp=%p, EFLAGS=%#RX32, CR0=%#RX32)",
1056 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1057 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), puMemOp,
1058 TrapFrame.Ctx.rflags.u32, TrapFrame.Ctx.cr0);
1059 Bs3TestPrintf("\n");
1060 }
1061 }
1062 }
1063 }
1064 bs3CpuInstrXConfigRestore(&SavedCfg, &Ctx, pExtCtx);
1065 }
1066 } while (fPf++ == 0 && BS3_MODE_IS_PAGED(bMode));
1067
1068 /*
1069 * Next ring.
1070 */
1071 bRing++;
1072 if (bRing > 3 || bMode == BS3_MODE_RM)
1073 break;
1074 Bs3RegCtxConvertToRingX(&Ctx, bRing);
1075 }
1076
1077 /*
1078 * Cleanup.
1079 */
1080 bs3CpuInstrXBufCleanup(pbBuf, cbBuf, bMode);
1081 bs3CpuInstrXFreeExtCtxs(pExtCtx, pExtCtxOut);
1082 return 0;
1083}
1084
1085
1086/*
1087 * [v]addpd.
1088 */
1089BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addpd(uint8_t bMode)
1090{
1091 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
1092 {
1093 /* 0*/{ { /*src2 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1094 { /*src1 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1095 { /* => */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1096 /*mask */ X86_MXCSR_XCPT_MASK,
1097 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1098 /*flags */ 0 },
1099 /* 1*/{ { /*src2 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1100 { /*src1 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1101 { /* => */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1102 /*mask */ ~X86_MXCSR_XCPT_MASK,
1103 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1104 /*flags */ 0 },
1105 /* 2*/{ { /*src2 */ { RTFLOAT64U_INIT_C(0, 0, 0x409), /*1024*/ RTFLOAT64U_INIT_C(0, 0xb800000000000, 0x404) /*55*/, RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_INF(1) } },
1106 { /*src1 */ { RTFLOAT64U_INIT_C(0, 0, 0x408), /* 512*/ RTFLOAT64U_INIT_C(0, 0xc000000000000, 0x401) /* 7*/, RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1107 { /* => */ { RTFLOAT64U_INIT_C(0, 0x8000000000000, 0x409) /*1536*/, RTFLOAT64U_INIT_C(0, 0xf000000000000, 0x404) /*62*/, RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_INF(1) } },
1108 /*mask */ X86_MXCSR_XCPT_MASK,
1109 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1110 /*flags */ 0 },
1111 /* 3*/{ { /*src2 */ { RTFLOAT64U_INIT_C(0, 0x26580b4800000, 0x41d), /* 1234567890*/ RTFLOAT64U_INIT_C(0, 0xd6f3458800000, 0x41c) /*987654321*/, RTFLOAT64U_INIT_SNAN(0), RTFLOAT64U_INIT_SNAN(1) } },
1112 { /*src1 */ { RTFLOAT64U_INIT_C(1, 0x26580b4800000, 0x41d), /*-1234567890*/ RTFLOAT64U_INIT_C(1, 0x9000000000000, 0x405) /* -100*/, RTFLOAT64U_INIT_SNAN(1), RTFLOAT64U_INIT_SNAN(0) } },
1113 { /* => */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_C(0, 0xd6f3426800000, 0x41c) /*987654221*/, RTFLOAT64U_INIT_SNAN(0), RTFLOAT64U_INIT_SNAN(0) } },
1114 /*mask */ ~X86_MXCSR_XCPT_MASK,
1115 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1116 /*flags */ 0 },
1117 /* 4*/{ { /*src2 */ { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1118 { /*src1 */ { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1119 { /* => */ { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1120 /*mask */ ~X86_MXCSR_XCPT_MASK,
1121 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1122 /*flags */ X86_MXCSR_IE },
1123 };
1124
1125 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1126 {
1127 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), s_aValues },
1128 };
1129 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1130 {
1131 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), s_aValues },
1132 };
1133 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1134 {
1135 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), s_aValues },
1136 };
1137
1138 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1139 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1140 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1141 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1142}
1143
1144
1145/**
1146 * The 32-bit protected mode main function.
1147 *
1148 * The tests a driven by 32-bit test drivers, even for real-mode tests (though
1149 * we'll switch between PE32 and RM for each test step we perform). Given that
1150 * we test SSE and AVX here, we don't need to worry about 286 or 8086.
1151 *
1152 * Some extra steps needs to be taken to properly handle extended state in LM64
1153 * (Bs3ExtCtxRestoreEx & Bs3ExtCtxSaveEx) and when testing real mode
1154 * (Bs3RegCtxSaveForMode & Bs3TrapSetJmpAndRestoreWithExtCtxAndRm).
1155 */
1156BS3_DECL(void) Main_pe32()
1157{
1158 static const BS3TESTMODEBYONEENTRY g_aTests[] =
1159 {
1160#if 1 /*ndef DEBUG_bird*/
1161# define ALL_TESTS
1162#endif
1163#if defined(ALL_TESTS)
1164 { "[v]addpd", bs3CpuInstrX_v_addpd, 0 },
1165#endif
1166 };
1167 Bs3TestInit("bs3-cpu-instr-4");
1168
1169 /*
1170 * Initialize globals.
1171 */
1172 if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
1173 {
1174 uint32_t fEbx, fEcx, fEdx;
1175 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1176 g_afTypeSupports[T_MMX] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_MMX);
1177 g_afTypeSupports[T_MMX_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1178 g_afTypeSupports[T_MMX_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1179 g_afTypeSupports[T_MMX_SSSE3] = RT_BOOL(fEdx & X86_CPUID_FEATURE_ECX_SSSE3);
1180 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1181 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1182 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3);
1183 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3);
1184 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1);
1185 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2);
1186 g_afTypeSupports[T_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL);
1187 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1188 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1189 g_afTypeSupports[T_AVX_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL)
1190 && RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1191
1192 if (ASMCpuId_EAX(0) >= 7)
1193 {
1194 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &fEbx, NULL, NULL);
1195 g_afTypeSupports[T_AVX2_128] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1196 g_afTypeSupports[T_AVX2_256] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1197 g_afTypeSupports[T_SHA] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_SHA);
1198 }
1199
1200 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
1201 {
1202 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1203 g_afTypeSupports[T_AXMMX] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
1204 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A);
1205 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);
1206 }
1207 g_afTypeSupports[T_AXMMX_OR_SSE] = g_afTypeSupports[T_AXMMX] || g_afTypeSupports[T_SSE];
1208
1209 /*
1210 * Figure out FPU save/restore method and support for DAZ bit.
1211 */
1212 {
1213 /** @todo Add bs3kit API to just get the ext ctx method without needing to
1214 * alloc/free a context. Replicating the logic in the bs3kit here, though
1215 * doable, runs a risk of not updating this when the other logic is
1216 * changed. */
1217 uint64_t fFlags;
1218 uint16_t const cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
1219 PBS3EXTCTX pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cbExtCtx);
1220 if (pExtCtx)
1221 {
1222 Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
1223 g_enmExtCtxMethod = pExtCtx->enmMethod;
1224 if ( ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_XSAVE
1225 && (pExtCtx->Ctx.x.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1226 || ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_FXSAVE)
1227 && (pExtCtx->Ctx.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1228 g_fMxCsrDazSupported = true;
1229 }
1230 else
1231 Bs3TestFailedF("Failed to allocate %u bytes for extended CPU context (tiled addressable)\n", cbExtCtx);
1232 }
1233
1234 /*
1235 * Allocate a buffer for testing.
1236 */
1237 g_cbBuf = X86_PAGE_SIZE * 4;
1238 g_pbBuf = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_REAL, g_cbBuf);
1239 if (g_pbBuf)
1240 {
1241 g_pbBufAliasAlloc = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_TILED, g_cbBuf);
1242 if (g_pbBufAliasAlloc)
1243 {
1244 /*
1245 * Do the tests.
1246 */
1247 Bs3TestDoModesByOne_pe32(g_aTests, RT_ELEMENTS(g_aTests), BS3TESTMODEBYONEENTRY_F_REAL_MODE_READY);
1248#ifdef BS3_SKIPIT_DO_SKIP
1249 bs3CpuInstrX_ShowTallies();
1250#endif
1251 }
1252 else
1253 Bs3TestFailed("Failed to allocate 16K alias buffer (tiled addressable)");
1254 }
1255 else
1256 Bs3TestFailed("Failed to allocate 16K buffer (real mode addressable)");
1257 }
1258
1259 Bs3TestTerm();
1260}
1261
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette