VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-4.c32@ 104645

Last change on this file since 104645 was 104645, checked in by vboxsync, 7 months ago

bs3-cpu-instr-4: SIMD FP instruction testcase basic but functioning skeleton, work-in-progress.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 53.2 KB
Line 
1/* $Id: bs3-cpu-instr-4.c32 104645 2024-05-16 07:06:33Z vboxsync $ */
2/** @file
3 * BS3Kit - bs3-cpu-instr-4 - SSE and AVX FPU instructions, C code template.
4 */
5
6/*
7 * Copyright (C) 2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <bs3kit.h>
42#include "bs3-cpu-instr-4-asm-auto.h"
43
44#include <iprt/asm.h>
45#include <iprt/asm-amd64-x86.h>
46
47
48/*********************************************************************************************************************************
49* External Symbols *
50*********************************************************************************************************************************/
51BS3TRAPFRAME volatile g_Bs3CpuInstr4TrapFrame;
52/** The extended CPU context to save from the trap handler. */
53BS3EXTCTX volatile g_Bs3CpuInstr4ExtCtxTrap;
54/** Whether the trap handler was called. */
55bool volatile g_Bs3CpuInstr4TrapRaised;
56
57extern FNBS3TRAPHANDLER16 bs3CpuInstr4TrapHandler_c16;
58extern FNBS3TRAPHANDLER32 bs3CpuInstr4TrapHandler_c32;
59extern FNBS3TRAPHANDLER64 bs3CpuInstr4TrapHandler_c64;
60
61
62/*********************************************************************************************************************************
63* Defined Constants And Macros *
64*********************************************************************************************************************************/
65/** Converts an execution mode (BS3_MODE_XXX) into an index into an array
66 * initialized by BS3CPUINSTR4_TEST1_MODES_INIT etc. */
67#define BS3CPUINSTR4_TEST_MODES_INDEX(a_bMode) (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2)
68
69/** Maximum length for the names of all SIMD FP exception flags combined. */
70#define BS3_FP_XCPT_NAMES_MAXLEN sizeof(" IE DE ZE OE UE PE ")
71
72
73/*********************************************************************************************************************************
74* Structures and Typedefs *
75*********************************************************************************************************************************/
76/** Instruction set type and operand width. */
77typedef enum BS3CPUINSTRX_INSTRTYPE_T
78{
79 T_INVALID,
80 T_MMX,
81 T_MMX_SSE, /**< MMX instruction, but require the SSE CPUID to work. */
82 T_MMX_SSE2, /**< MMX instruction, but require the SSE2 CPUID to work. */
83 T_MMX_SSSE3, /**< MMX instruction, but require the SSSE3 CPUID to work. */
84 T_AXMMX,
85 T_AXMMX_OR_SSE,
86 T_SSE,
87 T_128BITS = T_SSE,
88 T_SSE2,
89 T_SSE3,
90 T_SSSE3,
91 T_SSE4_1,
92 T_SSE4_2,
93 T_SSE4A,
94 T_PCLMUL,
95 T_SHA,
96 T_AVX_128,
97 T_AVX2_128,
98 T_AVX_PCLMUL,
99 T_AVX_256,
100 T_256BITS = T_AVX_256,
101 T_AVX2_256,
102 T_MAX
103} BS3CPUINSTRX_INSTRTYPE_T;
104
105/** Memory or register rm variant. */
106enum {
107 RM_REG = 0,
108 RM_MEM,
109 RM_MEM8, /**< Memory operand is 8 bits. Hack for movss and similar. */
110 RM_MEM16, /**< Memory operand is 16 bits. Hack for movss and similar. */
111 RM_MEM32, /**< Memory operand is 32 bits. Hack for movss and similar. */
112 RM_MEM64 /**< Memory operand is 64 bits. Hack for movss and similar. */
113};
114
115/**
116 * Execution environment configuration.
117 */
118typedef struct BS3CPUINSTR4_CONFIG_T
119{
120 uint16_t fCr0Mp : 1;
121 uint16_t fCr0Em : 1;
122 uint16_t fCr0Ts : 1;
123 uint16_t fCr4OsFxSR : 1;
124 uint16_t fCr4OsXSave : 1;
125 uint16_t fCr4OsXmmExcpt : 1;
126 uint16_t fXcr0Sse : 1;
127 uint16_t fXcr0Avx : 1;
128 uint16_t fAligned : 1; /**< Aligned mem operands. If 0, they will be misaligned and tests w/o mem operands skipped. */
129 uint16_t fAlignCheck : 1;
130 uint16_t fMxCsrMM : 1; /**< AMD only */
131 uint8_t bXcptSse;
132 uint8_t bXcptAvx;
133} BS3CPUINSTR4_CONFIG_T;
134/** Pointer to an execution environment configuration. */
135typedef BS3CPUINSTR4_CONFIG_T const BS3_FAR *PCBS3CPUINSTR4_CONFIG_T;
136
137/** State saved by bs3CpuInstr4ConfigReconfigure. */
138typedef struct BS3CPUINSTRX_CONFIG_SAVED_T
139{
140 uint32_t uCr0;
141 uint32_t uCr4;
142 uint32_t uEfl;
143 uint16_t uFcw;
144 uint16_t uFsw;
145 uint32_t uMxCsr;
146} BS3CPUINSTRX_CONFIG_SAVED_T;
147typedef BS3CPUINSTRX_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTRX_CONFIG_SAVED_T;
148typedef BS3CPUINSTRX_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTRX_CONFIG_SAVED_T;
149
150/**
151 * YMM packed double precision floating-point register.
152 * @todo move to x86.h?
153 */
154typedef union X86YMMFLOATPDREG
155{
156 /** Double precision packed floating point view. */
157 RTFLOAT64U ar64[4];
158 /** Single precision packed floating point view. */
159 RTFLOAT32U ar32[8];
160 /** 256-bit integer view. */
161 RTUINT256U ymm;
162} X86YMMFLOATPDREG;
163# ifndef VBOX_FOR_DTRACE_LIB
164AssertCompileSize(X86YMMFLOATPDREG, 32);
165# endif
166/** Pointer to a YMM packed floating-point register. */
167typedef X86YMMFLOATPDREG BS3_FAR *PX86YMMFLOATPDREG;
168/** Pointer to a const YMM packed floating-point register. */
169typedef X86YMMFLOATPDREG const BS3_FAR *PCX86YMMFLOATPDREG;
170
171/**
172 * YMM scalar floating-point register.
173 * @todo move to x86.h?
174 */
175typedef union X86YMMSFLOATREG
176{
177 /** Double precision scalar floating point view. */
178 RTFLOAT128U ar128[2];
179 /** 256-bit integer view. */
180 RTUINT256U ymm;
181} X86YMMSFLOATREG;
182# ifndef VBOX_FOR_DTRACE_LIB
183AssertCompileSize(X86YMMSFLOATREG, 32);
184# endif
185/** Pointer to a YMM scalar floating-point register. */
186typedef X86YMMSFLOATREG *PX86YMMSFLOATREG;
187/** Pointer to a const YMM scalar floating-point register. */
188typedef X86YMMSFLOATREG const *PCX86YMMSFLOATREG;
189
190
191/*********************************************************************************************************************************
192* Global Variables *
193*********************************************************************************************************************************/
194static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false, false };
195static bool g_fAmdMisalignedSse = false;
196static uint8_t g_enmExtCtxMethod = BS3EXTCTXMETHOD_INVALID;
197static bool g_fMxCsrDazSupported = false;
198
199/** Zero value (indexed by fSign). */
200RTFLOAT32U const g_ar32Zero[] = { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) };
201RTFLOAT64U const g_ar64Zero[] = { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) };
202
203/** One value (indexed by fSign). */
204RTFLOAT32U const g_ar32One[] = { RTFLOAT32U_INIT_C(0, 0, RTFLOAT32U_EXP_BIAS),
205 RTFLOAT32U_INIT_C(1, 0, RTFLOAT32U_EXP_BIAS) };
206RTFLOAT64U const g_ar64One[] = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_BIAS),
207 RTFLOAT64U_INIT_C(1, 0, RTFLOAT64U_EXP_BIAS) };
208
209/** Infinity (indexed by fSign). */
210RTFLOAT32U const g_ar32Infinity[] = { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) };
211RTFLOAT64U const g_ar64Infinity[] = { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) };
212
213/** Default QNaNs (indexed by fSign). */
214RTFLOAT32U const g_ar32QNaN[] = { RTFLOAT32U_INIT_QNAN(0), RTFLOAT32U_INIT_QNAN(1) };
215RTFLOAT64U const g_ar64QNaN[] = { RTFLOAT64U_INIT_QNAN(0), RTFLOAT64U_INIT_QNAN(1) };
216
217/** Size of g_pbBuf - at least three pages. */
218static uint32_t g_cbBuf;
219/** Buffer of g_cbBuf size. */
220static uint8_t BS3_FAR *g_pbBuf;
221/** RW alias for the buffer memory at g_pbBuf. Set up by bs3CpuInstrXBufSetup. */
222static uint8_t BS3_FAR *g_pbBufAlias;
223/** RW alias for the memory at g_pbBuf. */
224static uint8_t BS3_FAR *g_pbBufAliasAlloc;
225
226/** Exception type \#1 test configurations, 16 & 32 bytes strictly aligned. */
227static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig1[] =
228{
229/*
230 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
231 * +AVX +AVX +AMD/SSE +AMD/SSE
232 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
233 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
234 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
235 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
236 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
237 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
238 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
239 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
240 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
241 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
242 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
243 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
244 /* Memory misalignment and alignment checks: */
245 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_GP }, /* #10 */
246 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_GP }, /* #11 */
247 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
248 /* AMD only: */
249 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
250 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
251};
252
253
254static const char BS3_FAR *bs3CpuInstr4XcptName(uint8_t uVector)
255{
256 switch (uVector)
257 {
258 case X86_XCPT_DE: return "#DE";
259 case X86_XCPT_DB: return "#DB";
260 case X86_XCPT_NMI: return "#NMI";
261 case X86_XCPT_BP: return "#BP";
262 case X86_XCPT_OF: return "#OF";
263 case X86_XCPT_BR: return "#BR";
264 case X86_XCPT_UD: return "#UD";
265 case X86_XCPT_NM: return "#NM";
266 case X86_XCPT_DF: return "#DF";
267 case X86_XCPT_CO_SEG_OVERRUN: return "#CO_SEG_OVERRUN";
268 case X86_XCPT_TS: return "#TS";
269 case X86_XCPT_NP: return "#NP";
270 case X86_XCPT_SS: return "#SS";
271 case X86_XCPT_GP: return "#GP";
272 case X86_XCPT_PF: return "#PF";
273 case X86_XCPT_MF: return "#MF";
274 case X86_XCPT_AC: return "#AC";
275 case X86_XCPT_MC: return "#MC";
276 case X86_XCPT_XF: return "#XF";
277 case X86_XCPT_VE: return "#VE";
278 case X86_XCPT_CP: return "#CP";
279 case X86_XCPT_VC: return "#VC";
280 case X86_XCPT_SX: return "#SX";
281 }
282 return "UNKNOWN";
283}
284
285
286static size_t bs3CpuInstr4GetXcptFlags(char BS3_FAR *pszBuf, size_t cchBuf, uint32_t fMxCsr)
287{
288 if (!(fMxCsr & X86_MXCSR_XCPT_FLAGS))
289 return Bs3StrPrintf(pszBuf, cchBuf, " None");
290 return Bs3StrPrintf(pszBuf, cchBuf, "%s%s%s%s%s%s", fMxCsr & X86_MXCSR_IE ? " IE" : "",
291 fMxCsr & X86_MXCSR_DE ? " DE" : "",
292 fMxCsr & X86_MXCSR_ZE ? " ZE" : "",
293 fMxCsr & X86_MXCSR_OE ? " OE" : "",
294 fMxCsr & X86_MXCSR_UE ? " UE" : "",
295 fMxCsr & X86_MXCSR_PE ? " PE" : "");
296}
297
298
299/**
300 * Reconfigures the execution environment according to @a pConfig.
301 *
302 * Call bs3CpuInstrXConfigRestore to undo the changes.
303 *
304 * @returns true on success, false if the configuration cannot be applied. In
305 * the latter case, no context changes are made.
306 * @param pSavedCfg Where to save state we modify.
307 * @param pCtx The register context to modify.
308 * @param pExtCtx The extended register context to modify.
309 * @param pConfig The configuration to apply.
310 * @param bMode The target mode.
311 */
312static bool bs3CpuInstr4ConfigReconfigure(PBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx,
313 PCBS3CPUINSTR4_CONFIG_T pConfig, uint8_t bMode)
314{
315 /*
316 * Save context bits we may change here
317 */
318 pSavedCfg->uCr0 = pCtx->cr0.u32;
319 pSavedCfg->uCr4 = pCtx->cr4.u32;
320 pSavedCfg->uEfl = pCtx->rflags.u32;
321 pSavedCfg->uFcw = Bs3ExtCtxGetFcw(pExtCtx);
322 pSavedCfg->uFsw = Bs3ExtCtxGetFsw(pExtCtx);
323 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx);
324
325 /*
326 * Can we make these changes?
327 */
328 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse)
329 return false;
330
331 /*
332 * Modify the test context.
333 */
334 if (pConfig->fCr0Mp)
335 pCtx->cr0.u32 |= X86_CR0_MP;
336 else
337 pCtx->cr0.u32 &= ~X86_CR0_MP;
338 if (pConfig->fCr0Em)
339 pCtx->cr0.u32 |= X86_CR0_EM;
340 else
341 pCtx->cr0.u32 &= ~X86_CR0_EM;
342 if (pConfig->fCr0Ts)
343 pCtx->cr0.u32 |= X86_CR0_TS;
344 else
345 pCtx->cr0.u32 &= ~X86_CR0_TS;
346
347 if (pConfig->fCr4OsFxSR)
348 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
349 else
350 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
351
352 if (pConfig->fCr4OsXmmExcpt && g_afTypeSupports[T_SSE])
353 pCtx->cr4.u32 |= X86_CR4_OSXMMEEXCPT;
354 else
355 pCtx->cr4.u32 &= ~X86_CR4_OSXMMEEXCPT;
356
357 if (pConfig->fCr4OsFxSR)
358 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
359 else
360 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
361
362 if (pConfig->fCr4OsXSave)
363 pCtx->cr4.u32 |= X86_CR4_OSXSAVE;
364 else
365 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE;
366
367 if (pConfig->fXcr0Sse)
368 pExtCtx->fXcr0Saved |= XSAVE_C_SSE;
369 else
370 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE;
371 if (pConfig->fXcr0Avx && g_afTypeSupports[T_AVX_256])
372 pExtCtx->fXcr0Saved |= XSAVE_C_YMM;
373 else
374 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM;
375
376 if (pConfig->fAlignCheck)
377 {
378 pCtx->rflags.u32 |= X86_EFL_AC;
379 pCtx->cr0.u32 |= X86_CR0_AM;
380 }
381 else
382 {
383 pCtx->rflags.u32 &= ~X86_EFL_AC;
384 pCtx->cr0.u32 &= ~X86_CR0_AM;
385 }
386
387 /** @todo Can we remove this? x87 FPU and SIMD are independent. */
388 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw & ~(X86_FSW_ES | X86_FSW_B));
389
390 if (pConfig->fMxCsrMM)
391 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM);
392 else
393 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM);
394 return true;
395}
396
397
398/**
399 * Undoes changes made by bs3CpuInstr4ConfigReconfigure.
400 */
401static void bs3CpuInstrXConfigRestore(PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx)
402{
403 pCtx->cr0.u32 = pSavedCfg->uCr0;
404 pCtx->cr4.u32 = pSavedCfg->uCr4;
405 pCtx->rflags.u32 = pSavedCfg->uEfl;
406 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal;
407 Bs3ExtCtxSetFcw(pExtCtx, pSavedCfg->uFcw);
408 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw);
409 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr);
410}
411
412
413/**
414 * Allocates three extended CPU contexts and initializes the first one
415 * with random data.
416 * @returns First extended context, initialized with randomish data. NULL on
417 * failure (complained).
418 * @param ppExtCtx2 Where to return the 2nd context.
419 */
420static PBS3EXTCTX bs3CpuInstrXAllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2)
421{
422 /* Allocate extended context structures. */
423 uint64_t fFlags;
424 uint16_t cb = Bs3ExtCtxGetSize(&fFlags);
425 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 3);
426 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb);
427 if (pExtCtx1)
428 {
429 Bs3ExtCtxInit(pExtCtx1, cb, fFlags);
430 /** @todo populate with semi-random stuff. */
431
432 Bs3ExtCtxInit(pExtCtx2, cb, fFlags);
433 *ppExtCtx2 = pExtCtx2;
434 return pExtCtx1;
435 }
436 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2);
437 *ppExtCtx2 = NULL;
438 return NULL;
439}
440
441
442static void bs3CpuInstrXFreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2)
443{
444 RT_NOREF_PV(pExtCtx2);
445 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2);
446}
447
448
449/**
450 * Sets up SSE and AVX bits relevant for FPU instructions.
451 */
452static void bs3CpuInstr4SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx)
453{
454 /* CR0: */
455 uint32_t cr0 = Bs3RegGetCr0();
456 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
457 cr0 |= X86_CR0_NE;
458 Bs3RegSetCr0(cr0);
459
460 /* If real mode context, the cr0 value will differ from the current one (we're in PE32 mode). */
461 pCtx->cr0.u32 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
462 pCtx->cr0.u32 |= X86_CR0_NE;
463
464 /* CR4: */
465 BS3_ASSERT( pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
466 || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE);
467 {
468 uint32_t cr4 = Bs3RegGetCr4();
469 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
470 {
471 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
472 Bs3RegSetCr4(cr4);
473 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal);
474 }
475 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
476 {
477 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
478 Bs3RegSetCr4(cr4);
479 }
480 pCtx->cr4.u32 = cr4;
481 }
482}
483
484
485
486/**
487 * Configures the buffer with electrict fences in paged modes.
488 *
489 * @returns Adjusted buffer pointer.
490 * @param pbBuf The buffer pointer.
491 * @param pcbBuf Pointer to the buffer size (input & output).
492 * @param bMode The testing target mode.
493 */
494DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufSetup(uint8_t BS3_FAR *pbBuf, uint32_t *pcbBuf, uint8_t bMode)
495{
496 if (BS3_MODE_IS_PAGED(bMode))
497 {
498 int rc;
499 uint32_t cbBuf = *pcbBuf;
500 Bs3PagingProtectPtr(&pbBuf[0], X86_PAGE_SIZE, 0, X86_PTE_P);
501 Bs3PagingProtectPtr(&pbBuf[cbBuf - X86_PAGE_SIZE], X86_PAGE_SIZE, 0, X86_PTE_P);
502 pbBuf += X86_PAGE_SIZE;
503 cbBuf -= X86_PAGE_SIZE * 2;
504 *pcbBuf = cbBuf;
505
506 g_pbBufAlias = g_pbBufAliasAlloc;
507 rc = Bs3PagingAlias((uintptr_t)g_pbBufAlias, (uintptr_t)pbBuf, cbBuf + X86_PAGE_SIZE, /* must include the tail guard pg */
508 X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW);
509 if (RT_FAILURE(rc))
510 Bs3TestFailedF("Bs3PagingAlias failed on %p/%p LB %#x: %d", g_pbBufAlias, pbBuf, cbBuf, rc);
511 }
512 else
513 g_pbBufAlias = pbBuf;
514 return pbBuf;
515}
516
517
518/**
519 * Undoes what bs3CpuInstrXBufSetup did.
520 *
521 * @param pbBuf The buffer pointer.
522 * @param cbBuf The buffer size.
523 * @param bMode The testing target mode.
524 */
525DECLINLINE(void) bs3CpuInstrXBufCleanup(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t bMode)
526{
527 if (BS3_MODE_IS_PAGED(bMode))
528 {
529 Bs3PagingProtectPtr(&pbBuf[-X86_PAGE_SIZE], X86_PAGE_SIZE, X86_PTE_P, 0);
530 Bs3PagingProtectPtr(&pbBuf[cbBuf], X86_PAGE_SIZE, X86_PTE_P, 0);
531 }
532}
533
534
535/**
536 * Gets a buffer of a @a cbMemOp sized operand according to the given
537 * configuration and alignment restrictions.
538 *
539 * @returns Pointer to the buffer.
540 * @param pbBuf The buffer pointer.
541 * @param cbBuf The buffer size.
542 * @param cbMemOp The operand size.
543 * @param cbAlign The operand alignment restriction.
544 * @param pConfig The configuration.
545 * @param fPageFault The \#PF test setting.
546 */
547DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufForOperand(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t cbMemOp, uint8_t cbAlign,
548 PCBS3CPUINSTR4_CONFIG_T pConfig, unsigned fPageFault)
549{
550 /* All allocations are at the tail end of the buffer, so that we've got a
551 guard page following the operand. When asked to consistenly trigger
552 a #PF, we slide the buffer into that guard page. */
553 if (fPageFault)
554 cbBuf += X86_PAGE_SIZE;
555
556 if (pConfig->fAligned)
557 {
558 if (!pConfig->fAlignCheck)
559 return &pbBuf[cbBuf - cbMemOp];
560 return &pbBuf[cbBuf - cbMemOp - cbAlign];
561 }
562 return &pbBuf[cbBuf - cbMemOp - 1];
563}
564
565
566/**
567 * Determins the size of memory operands.
568 */
569DECLINLINE(uint8_t) bs3CpuInstrXMemOpSize(uint8_t cbOperand, uint8_t enmRm)
570{
571 if (enmRm <= RM_MEM)
572 return cbOperand;
573 if (enmRm == RM_MEM8)
574 return sizeof(uint8_t);
575 if (enmRm == RM_MEM16)
576 return sizeof(uint16_t);
577 if (enmRm == RM_MEM32)
578 return sizeof(uint32_t);
579 if (enmRm == RM_MEM64)
580 return sizeof(uint64_t);
581 BS3_ASSERT(0);
582 return cbOperand;
583}
584
585
586/*
587 * Code to make testing the tests faster. `bs3CpuInstrX_SkipIt()' randomly
588 * skips a large fraction of the micro-tests. It is sufficiently random
589 * that over a large number of runs, all micro-tests will be hit.
590 *
591 * This improves the runtime of the worst case (`#define ALL_TESTS' on a
592 * debug build, run with '--execute-all-in-iem') from ~9000 to ~800 seconds
593 * (on an Intel Core i7-10700, fwiw).
594 *
595 * To activate this 'developer's speed-testing mode', turn on
596 * `#define BS3_SKIPIT_DO_SKIP' here.
597 *
598 * BS3_SKIPIT_AVG_SKIP governs approximately how many micro-tests are
599 * skipped in a row; e.g. the default of 26 means about every 27th
600 * micro-test is run during a particular test run. (This is not 27x
601 * faster due to other activities which are not skipped!) Note this is
602 * only an average; the actual skips are random.
603 *
604 * You can also modify bs3CpuInstrX_SkipIt() to focus on specific sub-tests,
605 * using its (currently ignored) `bRing, iCfg, iTest, iVal, iVariant' args
606 * (to enable this: turn on `#define BS3_SKIPIT_DO_ARGS': which costs about
607 * 3% performance).
608 *
609 * Note! The skipping is not compatible with testing the native recompiler as
610 * it requires the test code to be run a number of times before it kicks
611 * in and does the native recompilation (currently around 16 times).
612 */
613#define BS3_SKIPIT_AVG_SKIP 26
614#undef BS3_SKIPIT_DO_SKIP
615#undef BS3_SKIPIT_DO_ARGS
616
617#ifndef BS3_SKIPIT_DO_SKIP
618# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) (false)
619#else
620# include <iprt/asm-amd64-x86.h>
621# include <iprt/asm-math.h>
622
623DECLINLINE(uint32_t) bs3CpuInstrX_SimpleRand(void)
624{
625 /*
626 * A simple Lehmer linear congruential pseudo-random number
627 * generator using the constants suggested by Park & Miller:
628 *
629 * modulus = 2^31 - 1 (INT32_MAX)
630 * multiplier = 7^5 (16807)
631 *
632 * It produces numbers in the range [1..INT32_MAX-1] and is
633 * more chaotic in the higher bits.
634 *
635 * Note! Runtime/common/rand/randparkmiller.cpp is also use this algorithm,
636 * though the zero handling is different.
637 */
638 static uint32_t s_uSeedMemory = 0;
639 uint32_t uVal = s_uSeedMemory;
640 if (!uVal)
641 uVal = (uint32_t)ASMReadTSC();
642 uVal = ASMModU64ByU32RetU32(ASMMult2xU32RetU64(uVal, 16807), INT32_MAX);
643 s_uSeedMemory = uVal;
644 return uVal;
645}
646
647static unsigned g_cSeen, g_cSkipped;
648
649static void bs3CpuInstrX_ShowTallies(void)
650{
651 Bs3TestPrintf("Micro-tests %d: tested %d / skipped %d\n", g_cSeen, g_cSeen - g_cSkipped, g_cSkipped);
652}
653
654# ifdef BS3_SKIPIT_DO_ARGS
655# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt(bRing, iCfg, iTest, iVal, iVariant)
656static bool bs3CpuInstrX_SkipIt(uint8_t bRing, unsigned iCfg, unsigned iTest, unsigned iVal, unsigned iVariant)
657# else
658# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt()
659static bool bs3CpuInstrX_SkipIt(void)
660# endif
661{
662 static unsigned s_uTimes = 0;
663 bool fSkip;
664
665 /* Cache calls to the relatively expensive random routine */
666 if (!s_uTimes)
667 s_uTimes = bs3CpuInstrX_SimpleRand() % (BS3_SKIPIT_AVG_SKIP * 2 + 1) + 1;
668 fSkip = --s_uTimes > 0;
669 if (fSkip)
670 ++g_cSkipped;
671
672 if (++g_cSeen % 25000 == 0)
673 bs3CpuInstrX_ShowTallies();
674 return fSkip;
675}
676
677#endif /* BS3_SKIPIT_DO_SKIP */
678
679
680/*
681 * Test type #1.
682 * Packed double-precision.
683 */
684typedef struct BS3CPUINSTR4_TEST1_VALUES_PD_T
685{
686 X86YMMFLOATPDREG uSrc2; /**< Second source operand. */
687 X86YMMFLOATPDREG uSrc1; /**< uDstIn for SSE */
688 X86YMMFLOATPDREG uDstOut; /**< Destination output. */
689 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
690 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
691 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
692 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
693 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
694} BS3CPUINSTR4_TEST1_VALUES_PD_T;
695
696typedef struct BS3CPUINSTR4_TEST1_T
697{
698 FPFNBS3FAR pfnWorker; /**< Test function worker. */
699 uint8_t bAvxMisalignXcpt; /**< AVX misalignment exception. */
700 uint8_t enmRm; /**< R/M type. */
701 uint8_t enmType; /**< CPU instruction type (see T_XXX). */
702 uint8_t iRegDst; /**< Index of destination register, UINT8_MAX if N/A. */
703 uint8_t iRegSrc1; /**< Index of first source register, UINT8_MAX if N/A. */
704 uint8_t iRegSrc2; /**< Index of second source register, UINT8_MAX if N/A. */
705 uint8_t cValues; /**< Number of test values in @c paValues. */
706 BS3CPUINSTR4_TEST1_VALUES_PD_T const BS3_FAR *paValues; /**< Test values. */
707} BS3CPUINSTR4_TEST1_T;
708
709typedef struct BS3CPUINSTR4_TEST1_MODE_T
710{
711 BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests;
712 unsigned cTests;
713} BS3CPUINSTR4_TEST1_MODE_T;
714
715/** Initializer for a BS3CPUINSTR4_TEST1_MODE_T array (three entries). */
716#define BS3CPUINSTR4_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \
717 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } }
718
719typedef struct BS3CPUINSTR4_TEST1_CTX_T
720{
721 BS3CPUINSTR4_CONFIG_T const BS3_FAR *pConfig;
722 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest;
723 BS3CPUINSTR4_TEST1_VALUES_PD_T const BS3_FAR *pValues;
724 const char BS3_FAR *pszMode;
725 PBS3TRAPFRAME pTrapFrame;
726 PBS3REGCTX pCtx;
727 PBS3EXTCTX pExtCtx;
728 PBS3EXTCTX pExtCtxOut;
729 uint8_t BS3_FAR *puMemOp;
730 uint8_t BS3_FAR *puMemOpAlias;
731 uint8_t cbMemOp;
732 uint8_t cbOperand;
733 uint8_t cbInstr;
734 uint8_t bXcptExpect;
735 bool fSseInstr;
736 uint16_t idTestStep;
737} BS3CPUINSTR4_TEST1_CTX_T;
738/** Pointer to a test 1 context. */
739typedef BS3CPUINSTR4_TEST1_CTX_T BS3_FAR *PBS3CPUINSTR4_TEST1_CTX_T;
740
741
742static uint16_t bs3CpuInstr4_WorkerTestType1_Inner(uint8_t bMode, PBS3CPUINSTR4_TEST1_CTX_T pTestCtx,
743 PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg)
744{
745 BS3CPUINSTR4_TEST1_T const BS3_FAR * pTest = pTestCtx->pTest;
746 BS3CPUINSTR4_TEST1_VALUES_PD_T const BS3_FAR *pValues = pTestCtx->pValues;
747 PBS3TRAPFRAME pTrapFrame = pTestCtx->pTrapFrame;
748 PBS3REGCTX pCtx = pTestCtx->pCtx;
749 PBS3EXTCTX pExtCtx = pTestCtx->pExtCtx;
750 PBS3EXTCTX pExtCtxOut = pTestCtx->pExtCtxOut;
751 uint8_t BS3_FAR *puMemOp = pTestCtx->puMemOp;
752 uint8_t BS3_FAR *puMemOpAlias = pTestCtx->puMemOpAlias;
753 uint8_t cbMemOp = pTestCtx->cbMemOp;
754 uint8_t const cbOperand = pTestCtx->cbOperand;
755 uint8_t const cbInstr = ((uint8_t const BS3_FAR *)(uintptr_t)pTestCtx->pTest->pfnWorker)[-1];
756 uint8_t bXcptExpect = pTestCtx->bXcptExpect;
757 uint8_t const bFpXcpt = pTestCtx->pConfig->fCr4OsXmmExcpt ? X86_XCPT_XF : X86_XCPT_UD;
758 bool const fFpFlagsExpect = RT_BOOL(pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS);
759 uint32_t uMxCsr;
760 X86YMMREG uMemOpExpect;
761 uint16_t cErrors;
762
763 /*
764 * Set up the context and some expectations.
765 */
766 /* Destination. */
767 if (pTest->iRegDst == UINT8_MAX)
768 {
769 BS3_ASSERT(pTest->enmRm >= RM_MEM);
770 Bs3MemSet(puMemOpAlias, 0xcc, cbMemOp);
771 if (bXcptExpect == X86_XCPT_DB)
772 uMemOpExpect.ymm = pValues->uDstOut.ymm;
773 else
774 Bs3MemSet(&uMemOpExpect, 0xcc, sizeof(uMemOpExpect));
775 }
776
777 /* Source #1 (/ destination for SSE). */
778 if (pTest->iRegSrc1 == UINT8_MAX)
779 {
780 BS3_ASSERT(pTest->enmRm >= RM_MEM);
781 Bs3MemCpy(puMemOpAlias, &pValues->uSrc1, cbMemOp);
782 if (pTest->iRegDst == UINT8_MAX)
783 BS3_ASSERT(pTestCtx->fSseInstr);
784 else
785 uMemOpExpect.ymm = pValues->uSrc1.ymm;
786 }
787 else if (pTestCtx->fSseInstr)
788 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm.DQWords.dqw0);
789 else
790 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm, 32);
791
792 /* Source #2. */
793 if (pTest->iRegSrc2 == UINT8_MAX)
794 {
795 BS3_ASSERT(pTest->enmRm >= RM_MEM);
796 BS3_ASSERT(pTest->iRegDst != UINT8_MAX && pTest->iRegSrc1 != UINT8_MAX);
797 Bs3MemCpy(puMemOpAlias, &pValues->uSrc2, cbMemOp);
798 uMemOpExpect.ymm = pValues->uSrc2.ymm;
799 }
800 else if (pTestCtx->fSseInstr)
801 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm.DQWords.dqw0);
802 else
803 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm, 32);
804
805 /* Memory pointer. */
806 if (pTest->enmRm >= RM_MEM)
807 {
808 BS3_ASSERT( pTest->iRegDst == UINT8_MAX
809 || pTest->iRegSrc1 == UINT8_MAX
810 || pTest->iRegSrc2 == UINT8_MAX);
811 Bs3RegCtxSetGrpSegFromCurPtr(pCtx, &pCtx->rbx, &pCtx->fs, puMemOp);
812 }
813
814 /* Setup MXCSR for the current test. */
815 {
816 uMxCsr = (pSavedCfg->uMxCsr & ~(X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK))
817 | (pValues->fMxCsrMask & X86_MXCSR_XCPT_MASK)
818 | (pValues->fRoundingCtlMask & X86_MXCSR_RC_MASK);
819 if ( pValues->fDenormalsAreZero
820 && g_fMxCsrDazSupported)
821 uMxCsr |= X86_MXCSR_DAZ;
822 if (pValues->fFlushToZero)
823 uMxCsr |= X86_MXCSR_FZ;
824 Bs3ExtCtxSetMxCsr(pExtCtx, uMxCsr);
825 }
826
827 /*
828 * Prepare globals and execute.
829 */
830 g_Bs3CpuInstr4TrapRaised = false;
831 g_uBs3TrapEipHint = pCtx->rip.u32;
832 if ( bXcptExpect == X86_XCPT_DB
833 && !fFpFlagsExpect)
834 g_uBs3TrapEipHint += cbInstr + 1;
835 //Bs3MemZero((void *)&g_Bs3CpuInstr4TrapFrame, sizeof(g_Bs3CpuInstr4TrapFrame));
836 //Bs3TrapSetHandlerEx(bFpXcpt, bs3CpuInstr4TrapHandler_c16, bs3CpuInstr4TrapHandler_c32, bs3CpuInstr4TrapHandler_c64);
837 Bs3TrapSetJmpAndRestoreWithExtCtxAndRm(pCtx, pExtCtx, pTrapFrame, pExtCtxOut);
838
839 /*
840 * Check the result.
841 */
842 cErrors = Bs3TestSubErrorCount();
843 if ( bXcptExpect == X86_XCPT_DB
844 && pTest->iRegDst != UINT8_MAX)
845 {
846 if (pTestCtx->fSseInstr)
847 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm.DQWords.dqw0);
848 else
849 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm, cbOperand);
850 }
851#if defined(DEBUG_aeichner) /** @todo Necessary kludge on a i7-1068NG7. */
852 if ( pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE
853 && pExtCtx->Ctx.x.Hdr.bmXState == 0x7
854 && pExtCtxOut->Ctx.x.Hdr.bmXState == 0x3)
855 pExtCtxOut->Ctx.x.Hdr.bmXState = 0x7;
856#endif
857 if (bXcptExpect == X86_XCPT_DB)
858 Bs3ExtCtxSetMxCsr(pExtCtx, (uMxCsr & ~X86_MXCSR_XCPT_FLAGS)
859 | (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS));
860 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pTestCtx->pszMode, pTestCtx->idTestStep);
861
862 if (bXcptExpect == X86_XCPT_DB)
863 {
864 PBS3TRAPFRAME volatile pTrapFrameXcpt = pTrapFrame;
865 uint32_t const fMxCsrXcptFlags = Bs3ExtCtxGetMxCsr(pExtCtxOut) & X86_MXCSR_XCPT_FLAGS;
866
867 /* Check if the SIMD FP exception flags (or lack of) are as expected. */
868 if (fMxCsrXcptFlags != (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS))
869 {
870 char szGotBuf[BS3_FP_XCPT_NAMES_MAXLEN];
871 char szExpectBuf[BS3_FP_XCPT_NAMES_MAXLEN];
872 bs3CpuInstr4GetXcptFlags(&szExpectBuf[0], sizeof(szExpectBuf), pValues->fExpectedMxCsrFlags);
873 bs3CpuInstr4GetXcptFlags(&szGotBuf[0], sizeof(szGotBuf), fMxCsrXcptFlags);
874 Bs3TestFailedF("Expected floating-point xcpt flags%s, got%s", szExpectBuf, szGotBuf);
875 }
876
877 /* Check if the SIMD FP exception (or lack of) is as expected. */
878 if (fFpFlagsExpect)
879 {
880 if (pTrapFrameXcpt->bXcpt == bFpXcpt)
881 { /* likely */ }
882 else
883 Bs3TestFailedF("Expected floating-point xcpt %s, got %s", bs3CpuInstr4XcptName(bFpXcpt),
884 bs3CpuInstr4XcptName(pTrapFrameXcpt->bXcpt));
885 }
886 else if (pTrapFrameXcpt->bXcpt == X86_XCPT_DB)
887 { /* likely */ }
888 else
889 Bs3TestFailedF("Expected no xcpt, got %s", bs3CpuInstr4XcptName(pTrapFrameXcpt->bXcpt));
890 }
891 /* Check if non-FP exception is as expected. */
892 else if (pTrapFrame->bXcpt != bXcptExpect)
893 Bs3TestFailedF("Expected xcpt %s, got %s", bs3CpuInstr4XcptName(bXcptExpect), bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
894
895 /* Kludge! Looks like EFLAGS.AC is cleared when raising #GP in real mode on the 10980XE. WEIRD! */
896 if (bMode == BS3_MODE_RM && (pCtx->rflags.u32 & X86_EFL_AC))
897 {
898 if (pTrapFrame->Ctx.rflags.u32 & X86_EFL_AC)
899 Bs3TestFailedF("Expected EFLAGS.AC to be cleared (bXcpt=%d)", pTrapFrame->bXcpt);
900 pTrapFrame->Ctx.rflags.u32 |= X86_EFL_AC;
901 }
902 if (bXcptExpect == X86_XCPT_PF)
903 pCtx->cr2.u = (uintptr_t)puMemOp;
904 Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pCtx, bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect ? cbInstr + 1 : 0, 0,
905 (bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect) || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF,
906 pTestCtx->pszMode, pTestCtx->idTestStep);
907 pCtx->cr2.u = 0;
908
909 if ( pTest->enmRm >= RM_MEM
910 && Bs3MemCmp(puMemOpAlias, &uMemOpExpect, cbMemOp) != 0)
911 Bs3TestFailedF("Expected uMemOp %.*Rhxs, got %.*Rhxs", cbMemOp, &uMemOpExpect, cbMemOp, puMemOpAlias);
912
913 return cErrors;
914}
915
916
917/**
918 * Test type #1 worker.
919 */
920static uint8_t bs3CpuInstrX_WorkerTestType1(uint8_t bMode, BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests, unsigned cTests,
921 PCBS3CPUINSTR4_CONFIG_T paConfigs, unsigned cConfigs)
922{
923 BS3REGCTX Ctx;
924 BS3TRAPFRAME TrapFrame;
925 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode);
926 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0;
927 uint8_t BS3_FAR *pbBuf = g_pbBuf;
928 uint32_t cbBuf = g_cbBuf;
929 PBS3EXTCTX pExtCtxOut;
930 PBS3EXTCTX pExtCtx = bs3CpuInstrXAllocExtCtxs(&pExtCtxOut);
931 if (pExtCtx)
932 { /* likely */ }
933 else
934 return 0;
935 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT)
936 { /* likely */ }
937 else
938 {
939 Bs3TestPrintf("Skipped due to ancient FPU state format\n");
940 return 0;
941 }
942
943 /* Ensure the structures are allocated before we sample the stack pointer. */
944 Bs3MemSet(&Ctx, 0, sizeof(Ctx));
945 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
946
947 /*
948 * Create test context.
949 */
950 pbBuf = bs3CpuInstrXBufSetup(pbBuf, &cbBuf, bMode);
951 Bs3RegCtxSaveForMode(&Ctx, bMode, 1024);
952 bs3CpuInstr4SetupSseAndAvx(&Ctx, pExtCtx);
953 //Bs3TestPrintf("FTW=%#x mm1/st1=%.16Rhxs\n", pExtCtx->Ctx.x87.FTW, &pExtCtx->Ctx.x87.aRegs[1]);
954
955 /*
956 * Run the tests in all rings since alignment issues may behave
957 * differently in ring-3 compared to ring-0.
958 */
959 for (;;)
960 {
961 unsigned fPf = 0;
962 do
963 {
964 unsigned iCfg;
965 for (iCfg = 0; iCfg < cConfigs; iCfg++)
966 {
967 unsigned iTest;
968 BS3CPUINSTRX_CONFIG_SAVED_T SavedCfg;
969 if (!bs3CpuInstr4ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg], bMode))
970 continue; /* unsupported config */
971
972 /*
973 * Iterate the tests.
974 */
975 for (iTest = 0; iTest < cTests; iTest++)
976 {
977 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = &paTests[iTest];
978 unsigned const cValues = pTest->cValues;
979 bool const fSseInstr = pTest->enmType >= T_SSE && pTest->enmType < T_AVX_128;
980 bool const fAvxInstr = pTest->enmType >= T_AVX_128;
981 uint8_t const cbOperand = pTest->enmType < T_128BITS ? 64/8
982 : pTest->enmType < T_256BITS ? 128/8 : 256/8;
983 uint8_t const cbMemOp = bs3CpuInstrXMemOpSize(cbOperand, pTest->enmRm);
984 uint8_t const cbAlign = cbMemOp;
985 uint8_t BS3_FAR *puMemOp = bs3CpuInstrXBufForOperand(pbBuf, cbBuf, cbMemOp, cbAlign, &paConfigs[iCfg], fPf);
986 uint8_t *puMemOpAlias = &g_pbBufAlias[(uintptr_t)puMemOp - (uintptr_t)pbBuf];
987 uint8_t bXcptExpect = !g_afTypeSupports[pTest->enmType] ? X86_XCPT_UD
988 : fSseInstr ? paConfigs[iCfg].bXcptSse
989 : BS3_MODE_IS_RM_OR_V86(bMode) ? X86_XCPT_UD : paConfigs[iCfg].bXcptAvx;
990 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10;
991 unsigned cRecompRuns = 0;
992 unsigned const cMaxRecompRuns = g_cBs3ThresholdNativeRecompiler + cValues;
993 unsigned iVal;
994
995 /* If testing unaligned memory accesses (or #PF), skip register-only tests. This
996 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */
997 if ( (pTest->enmRm == RM_REG || pTest->enmRm == RM_MEM8)
998 && (!paConfigs[iCfg].fAligned || paConfigs[iCfg].fAlignCheck || fPf))
999 continue;
1000
1001 /* #AC is only raised in ring-3. */
1002 if (bXcptExpect == X86_XCPT_AC)
1003 {
1004 if (bRing != 3)
1005 bXcptExpect = X86_XCPT_DB;
1006 else if (fAvxInstr)
1007 bXcptExpect = pTest->bAvxMisalignXcpt; /* they generally don't raise #AC */
1008 }
1009
1010 if (fPf && bXcptExpect == X86_XCPT_DB)
1011 bXcptExpect = X86_XCPT_PF;
1012
1013 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, pTest->pfnWorker);
1014
1015 /*
1016 * Iterate the test values and do the actual testing.
1017 */
1018 while (cRecompRuns < cMaxRecompRuns)
1019 {
1020 for (iVal = 0; iVal < cValues; iVal++, idTestStep++, cRecompRuns++)
1021 {
1022 uint16_t cErrors;
1023 BS3CPUINSTR4_TEST1_CTX_T TestCtx;
1024 BS3CPUINSTR4_TEST1_VALUES_PD_T const BS3_FAR *pValues = &pTest->paValues[iVal];
1025
1026 if (BS3_SKIPIT(bRing, iCfg, iTest, iVal, 0))
1027 continue;
1028
1029 /*
1030 * Setup the test instruction context and pass it to the worker.
1031 * A few of these can be figured out by the worker but initializing
1032 * it outside the inner most loop is more optimal.
1033 */
1034 TestCtx.pConfig = &paConfigs[iCfg];
1035 TestCtx.pTest = pTest;
1036 TestCtx.pValues = pValues;
1037 TestCtx.pszMode = pszMode;
1038 TestCtx.pTrapFrame = &TrapFrame;
1039 TestCtx.pCtx = &Ctx;
1040 TestCtx.pExtCtx = pExtCtx;
1041 TestCtx.pExtCtxOut = pExtCtxOut;
1042 TestCtx.puMemOp = (uint8_t *)puMemOp;
1043 TestCtx.puMemOpAlias = puMemOpAlias;
1044 TestCtx.cbMemOp = cbMemOp;
1045 TestCtx.cbOperand = cbOperand;
1046 TestCtx.bXcptExpect = bXcptExpect;
1047 TestCtx.fSseInstr = fSseInstr;
1048 TestCtx.idTestStep = idTestStep;
1049 cErrors = bs3CpuInstr4_WorkerTestType1_Inner(bMode, &TestCtx, &SavedCfg);
1050
1051 if (cErrors != Bs3TestSubErrorCount())
1052 {
1053 if (paConfigs[iCfg].fAligned)
1054 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s)",
1055 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1056 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect));
1057 else
1058 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, puMemOp=%p, EFLAGS=%#RX32, CR0=%#RX32)",
1059 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1060 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), puMemOp,
1061 TrapFrame.Ctx.rflags.u32, TrapFrame.Ctx.cr0);
1062 Bs3TestPrintf("\n");
1063 }
1064 }
1065 }
1066 }
1067 bs3CpuInstrXConfigRestore(&SavedCfg, &Ctx, pExtCtx);
1068 }
1069 } while (fPf++ == 0 && BS3_MODE_IS_PAGED(bMode));
1070
1071 /*
1072 * Next ring.
1073 */
1074 bRing++;
1075 if (bRing > 3 || bMode == BS3_MODE_RM)
1076 break;
1077 Bs3RegCtxConvertToRingX(&Ctx, bRing);
1078 }
1079
1080 /*
1081 * Cleanup.
1082 */
1083 bs3CpuInstrXBufCleanup(pbBuf, cbBuf, bMode);
1084 bs3CpuInstrXFreeExtCtxs(pExtCtx, pExtCtxOut);
1085 return 0;
1086}
1087
1088
1089/*
1090 * [v]addpd.
1091 */
1092BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addpd(uint8_t bMode)
1093{
1094 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
1095 {
1096 /* 0*/{ { /*src2 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1097 { /*src1 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1098 { /* => */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1099 /*mask */ X86_MXCSR_XCPT_MASK,
1100 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1101 /*flags */ 0 },
1102 /* 1*/{ { /*src2 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1103 { /*src1 */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1104 { /* => */ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1105 /*mask */ ~X86_MXCSR_XCPT_MASK,
1106 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1107 /*flags */ 0 },
1108 /* 2*/{ { /*src2 */ { RTFLOAT64U_INIT_C(0, 0, 0x409), /*1024*/ RTFLOAT64U_INIT_C(0, 0xb800000000000, 0x404) /*55*/, RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1109 { /*src1 */ { RTFLOAT64U_INIT_C(0, 0, 0x408), /* 512*/ RTFLOAT64U_INIT_C(0, 0xc000000000000, 0x401) /* 7*/, RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1110 { /* => */ { RTFLOAT64U_INIT_C(0, 0x8000000000000, 0x409) /*1536*/, RTFLOAT64U_INIT_C(0, 0xf000000000000, 0x404) /*62*/, RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1111 /*mask */ X86_MXCSR_XCPT_MASK,
1112 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1113 /*flags */ 0 },
1114 /* 3*/{ { /*src2 */ { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1115 { /*src1 */ { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1116 { /* => */ { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
1117 /*mask */ ~X86_MXCSR_XCPT_MASK,
1118 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1119 /*flags */ X86_MXCSR_IE | X86_MXCSR_DE | X86_MXCSR_ZE | X86_MXCSR_OE | X86_MXCSR_UE | X86_MXCSR_PE },
1120 };
1121
1122 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1123 {
1124 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), s_aValues },
1125 };
1126 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1127 {
1128 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), s_aValues },
1129 };
1130 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1131 {
1132 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), s_aValues },
1133 };
1134
1135 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1136 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1137 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1138 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1139}
1140
1141
1142/**
1143 * The 32-bit protected mode main function.
1144 *
1145 * The tests a driven by 32-bit test drivers, even for real-mode tests (though
1146 * we'll switch between PE32 and RM for each test step we perform). Given that
1147 * we test SSE and AVX here, we don't need to worry about 286 or 8086.
1148 *
1149 * Some extra steps needs to be taken to properly handle extended state in LM64
1150 * (Bs3ExtCtxRestoreEx & Bs3ExtCtxSaveEx) and when testing real mode
1151 * (Bs3RegCtxSaveForMode & Bs3TrapSetJmpAndRestoreWithExtCtxAndRm).
1152 */
1153BS3_DECL(void) Main_pe32()
1154{
1155 static const BS3TESTMODEBYONEENTRY g_aTests[] =
1156 {
1157#if 1 /*ndef DEBUG_bird*/
1158# define ALL_TESTS
1159#endif
1160#if defined(ALL_TESTS)
1161 { "[v]addpd", bs3CpuInstrX_v_addpd, 0 },
1162#endif
1163 };
1164 Bs3TestInit("bs3-cpu-instr-4");
1165
1166 /*
1167 * Initialize globals.
1168 */
1169 if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
1170 {
1171 uint32_t fEbx, fEcx, fEdx;
1172 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1173 g_afTypeSupports[T_MMX] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_MMX);
1174 g_afTypeSupports[T_MMX_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1175 g_afTypeSupports[T_MMX_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1176 g_afTypeSupports[T_MMX_SSSE3] = RT_BOOL(fEdx & X86_CPUID_FEATURE_ECX_SSSE3);
1177 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1178 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1179 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3);
1180 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3);
1181 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1);
1182 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2);
1183 g_afTypeSupports[T_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL);
1184 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1185 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1186 g_afTypeSupports[T_AVX_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL)
1187 && RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1188
1189 if (ASMCpuId_EAX(0) >= 7)
1190 {
1191 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &fEbx, NULL, NULL);
1192 g_afTypeSupports[T_AVX2_128] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1193 g_afTypeSupports[T_AVX2_256] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1194 g_afTypeSupports[T_SHA] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_SHA);
1195 }
1196
1197 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
1198 {
1199 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1200 g_afTypeSupports[T_AXMMX] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
1201 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A);
1202 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);
1203 }
1204 g_afTypeSupports[T_AXMMX_OR_SSE] = g_afTypeSupports[T_AXMMX] || g_afTypeSupports[T_SSE];
1205
1206 /*
1207 * Figure out FPU save/restore method and support for DAZ bit.
1208 */
1209 {
1210 /** @todo Add bs3kit API to just get the ext ctx method without needing to
1211 * alloc/free a context. Replicating the logic in the bs3kit here, though
1212 * doable, runs a risk of not updating this when the other logic is
1213 * changed. */
1214 uint64_t fFlags;
1215 uint16_t const cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
1216 PBS3EXTCTX pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cbExtCtx);
1217 if (pExtCtx)
1218 {
1219 Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
1220 g_enmExtCtxMethod = pExtCtx->enmMethod;
1221 if ( ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_XSAVE
1222 && (pExtCtx->Ctx.x.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1223 || ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_FXSAVE)
1224 && (pExtCtx->Ctx.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1225 g_fMxCsrDazSupported = true;
1226 }
1227 else
1228 Bs3TestFailedF("Failed to allocate %u bytes for extended CPU context (tiled addressable)\n", cbExtCtx);
1229 }
1230
1231 /*
1232 * Allocate a buffer for testing.
1233 */
1234 g_cbBuf = X86_PAGE_SIZE * 4;
1235 g_pbBuf = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_REAL, g_cbBuf);
1236 if (g_pbBuf)
1237 {
1238 g_pbBufAliasAlloc = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_TILED, g_cbBuf);
1239 if (g_pbBufAliasAlloc)
1240 {
1241 /*
1242 * Do the tests.
1243 */
1244 Bs3TestDoModesByOne_pe32(g_aTests, RT_ELEMENTS(g_aTests), BS3TESTMODEBYONEENTRY_F_REAL_MODE_READY);
1245#ifdef BS3_SKIPIT_DO_SKIP
1246 bs3CpuInstrX_ShowTallies();
1247#endif
1248 }
1249 else
1250 Bs3TestFailed("Failed to allocate 16K alias buffer (tiled addressable)");
1251 }
1252 else
1253 Bs3TestFailed("Failed to allocate 16K buffer (real mode addressable)");
1254 }
1255
1256 Bs3TestTerm();
1257}
1258
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette