Changeset 95373 in vbox
- Timestamp:
- Jun 26, 2022 1:22:03 AM (2 years ago)
- Location:
- trunk/src/VBox/ValidationKit/bootsectors
- Files:
-
- 1 edited
- 4 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/ValidationKit/bootsectors/Makefile.kmk
r93115 r95373 351 351 352 352 # 353 # CPU instructions #3 - SSE, ++. 354 # 355 MISCBINS += bs3-cpu-instr-3 356 bs3-cpu-instr-3_TEMPLATE = VBoxBS3KitImg 357 bs3-cpu-instr-3_INCS = . 358 bs3-cpu-instr-3_DEFS = BS3_CMN_INSTANTIATE_FILE1=bs3-cpu-instr-3-template.c 359 bs3-cpu-instr-3_DEFS += BS3_MODE_INSTANTIATE_FILE1=bs3-cpu-instr-3-template.c 360 bs3-cpu-instr-3_SOURCES = \ 361 bs3kit/bs3-first-rm.asm \ 362 bs3-cpu-instr-3.c \ 363 bs3-cpu-instr-3-asm.asm \ 364 bs3kit/bs3-cmn-instantiate-x0.c16 \ 365 bs3kit/bs3-cmn-instantiate.c32 \ 366 bs3kit/bs3-cmn-instantiate.c64 367 bs3-cpu-instr-3-template.o:: \ 368 $$(bs3-cpu-instr-3_0_OUTDIR)/bs3kit/bs3-cmn-instantiate-x0.o16 \ 369 $$(bs3-cpu-instr-3_0_OUTDIR)/bs3kit/bs3-cmn-instantiate.o32 \ 370 $$(bs3-cpu-instr-3_0_OUTDIR)/bs3kit/bs3-cmn-instantiate.o64 \ 371 $$(bs3-cpu-instr-3_0_OUTDIR)/bs3-cpu-instr-3-asm.o16 372 373 # 353 374 # CPU generated instruction tests #1 354 375 # -
trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-3-asm.asm
r95361 r95373 1 1 ; $Id$ 2 2 ;; @file 3 ; BS3Kit - bs3-cpu-instr- 23 ; BS3Kit - bs3-cpu-instr-3 4 4 ; 5 5 … … 31 31 %include "bs3kit.mac" 32 32 33 34 ;*********************************************************************************************************************************35 ;* Global Variables *36 ;*********************************************************************************************************************************37 ;BS3_BEGIN_DATA1638 ;BS3_GLOBAL_DATA g_bs3CpuBasic2_ud2_FlatAddr, 439 ; dd _bs3CpuBasic2_ud2 wrt FLAT40 41 42 43 ;44 ; CPU mode agnostic test code snippets.45 ;46 BS3_BEGIN_TEXT1647 48 BS3_PROC_BEGIN _bs3CpuInstr2_imul_bl_ud249 imul bl50 .again:51 ud252 jmp .again53 BS3_PROC_END _bs3CpuInstr2_imul_bl_ud254 55 56 57 33 ; 58 34 ; Instantiate code templates. 59 35 ; 60 BS3_INSTANTIATE_COMMON_TEMPLATE "bs3-cpu-instr- 2-template.mac"61 BS3_INSTANTIATE_TEMPLATE_WITH_WEIRD_ONES "bs3-cpu-instr- 2-template.mac"36 BS3_INSTANTIATE_COMMON_TEMPLATE "bs3-cpu-instr-3-template.mac" 37 BS3_INSTANTIATE_TEMPLATE_WITH_WEIRD_ONES "bs3-cpu-instr-3-template.mac" 62 38 -
trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-3-template.c
r95361 r95373 1 1 /* $Id$ */ 2 2 /** @file 3 * BS3Kit - bs3-cpu-instr- 2, C code template.3 * BS3Kit - bs3-cpu-instr-3, SSE and AVX instructions, C code template. 4 4 */ 5 5 … … 37 37 *********************************************************************************************************************************/ 38 38 #ifdef BS3_INSTANTIATING_CMN 39 # if ARCH_BITS == 64 40 typedef struct BS3CI2FSGSBASE 41 { 42 const char *pszDesc; 43 bool f64BitOperand; 44 FPFNBS3FAR pfnWorker; 45 uint8_t offWorkerUd2; 46 FPFNBS3FAR pfnVerifyWorker; 47 uint8_t offVerifyWorkerUd2; 48 } BS3CI2FSGSBASE; 49 # endif 39 /** Instruction set type and operand width. */ 40 typedef enum { T_INVALID, T_SSE, T_SSE2, T_SSE3, T_SSSE3, T_SSE4_1, T_SSE4_2, T_SSE4A, T_AVX_128, T_AVX_256, T_MAX } INPUT_TYPE_T; 41 42 /** Memory or register rm variant. */ 43 enum { RM_REG, RM_MEM }; 44 45 /** 46 * Execution environment configuration. 47 */ 48 typedef struct BS3CPUINSTR3_CONFIG_T 49 { 50 uint16_t fCr0Mp : 1; 51 uint16_t fCr0Em : 1; 52 uint16_t fCr0Ts : 1; 53 uint16_t fCr4OsFxSR : 1; 54 uint16_t fCr4OsXSave : 1; 55 uint16_t fXcr0Sse : 1; 56 uint16_t fXcr0Avx : 1; 57 uint16_t fAligned : 1; /**< Aligned memory operands. If zero, they will be misaligned and tests w/o memory ops skipped. */ 58 uint16_t fAlignCheck : 1; 59 uint16_t fMxCsrMM : 1; /**< AMD only */ 60 uint8_t bXcptSse; 61 uint8_t bXcptAvx; 62 } BS3CPUINSTR3_CONFIG_T; 63 /** Pointer to an execution environment configuration. */ 64 typedef BS3CPUINSTR3_CONFIG_T const BS3_FAR *PCBS3CPUINSTR3_CONFIG_T; 65 66 /** State saved by bs3CpuInstr3ConfigReconfigure. */ 67 typedef struct BS3CPUINSTR3_CONFIG_SAVED_T 68 { 69 uint32_t uCr0; 70 uint32_t uCr4; 71 uint32_t uEfl; 72 uint32_t uMxCsr; 73 } BS3CPUINSTR3_CONFIG_SAVED_T; 74 typedef BS3CPUINSTR3_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTR3_CONFIG_SAVED_T; 75 typedef BS3CPUINSTR3_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTR3_CONFIG_SAVED_T; 76 50 77 #endif 51 78 … … 55 82 *********************************************************************************************************************************/ 56 83 #ifdef BS3_INSTANTIATING_CMN 57 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_mul_xBX_ud2); 58 59 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_imul_xBX_ud2); 60 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_imul_xCX_xBX_ud2); 61 62 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_div_xBX_ud2); 63 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_idiv_xBX_ud2); 64 65 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsf_AX_BX_ud2); 66 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsf_EAX_EBX_ud2); 67 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsf_RAX_RBX_ud2); 68 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsf_AX_FSxBX_ud2); 69 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsf_EAX_FSxBX_ud2); 70 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsf_RAX_FSxBX_ud2); 71 72 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsf_AX_BX_ud2); 73 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsf_EAX_EBX_ud2); 74 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsf_RAX_RBX_ud2); 75 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsf_AX_FSxBX_ud2); 76 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsf_EAX_FSxBX_ud2); 77 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsf_RAX_FSxBX_ud2); 78 79 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_tzcnt_AX_BX_ud2); 80 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_tzcnt_EAX_EBX_ud2); 81 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_tzcnt_RAX_RBX_ud2); 82 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_tzcnt_AX_FSxBX_ud2); 83 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_tzcnt_EAX_FSxBX_ud2); 84 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_tzcnt_RAX_FSxBX_ud2); 85 86 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_AX_BX_ud2); 87 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_EAX_EBX_ud2); 88 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_RAX_RBX_ud2); 89 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_AX_FSxBX_ud2); 90 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_EAX_FSxBX_ud2); 91 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_RAX_FSxBX_ud2); 92 93 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsr_AX_BX_ud2); 94 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsr_EAX_EBX_ud2); 95 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsr_RAX_RBX_ud2); 96 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsr_AX_FSxBX_ud2); 97 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsr_EAX_FSxBX_ud2); 98 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bsr_RAX_FSxBX_ud2); 99 100 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsr_AX_BX_ud2); 101 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsr_EAX_EBX_ud2); 102 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsr_RAX_RBX_ud2); 103 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsr_AX_FSxBX_ud2); 104 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsr_EAX_FSxBX_ud2); 105 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_bsr_RAX_FSxBX_ud2); 106 107 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_lzcnt_AX_BX_ud2); 108 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_lzcnt_EAX_EBX_ud2); 109 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_lzcnt_RAX_RBX_ud2); 110 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_lzcnt_AX_FSxBX_ud2); 111 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_lzcnt_EAX_FSxBX_ud2); 112 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_lzcnt_RAX_FSxBX_ud2); 113 114 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_AX_BX_ud2); 115 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_EAX_EBX_ud2); 116 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_RAX_RBX_ud2); 117 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_AX_FSxBX_ud2); 118 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_EAX_FSxBX_ud2); 119 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_RAX_FSxBX_ud2); 120 121 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rorx_RBX_RDX_2_icebp); 122 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_EDX_2_icebp); 123 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_EDX_2_icebp_L1); 124 # if ARCH_BITS == 64 125 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_EDX_2_icebp_X1); 126 # endif 127 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_EDX_2_icebp_V1); 128 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_EDX_2_icebp_V15); 129 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rorx_RBX_DSxDI_68_icebp); 130 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_DSxDI_36_icebp); 131 132 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_andn_RAX_RCX_RBX_icebp); 133 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_andn_RAX_RCX_FSxBX_icebp); 134 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_andn_EAX_ECX_EBX_icebp); 135 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_andn_EAX_ECX_FSxBX_icebp); 136 137 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bextr_RAX_RBX_RCX_icebp); 138 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bextr_RAX_FSxBX_RCX_icebp); 139 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bextr_EAX_EBX_ECX_icebp); 140 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bextr_EAX_FSxBX_ECX_icebp); 141 142 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bzhi_RAX_RBX_RCX_icebp); 143 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bzhi_RAX_FSxBX_RCX_icebp); 144 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bzhi_EAX_EBX_ECX_icebp); 145 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_bzhi_EAX_FSxBX_ECX_icebp); 146 147 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_pdep_RAX_RCX_RBX_icebp); 148 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_pdep_RAX_RCX_FSxBX_icebp); 149 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_pdep_EAX_ECX_EBX_icebp); 150 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_pdep_EAX_ECX_FSxBX_icebp); 151 152 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_pext_RAX_RCX_RBX_icebp); 153 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_pext_RAX_RCX_FSxBX_icebp); 154 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_pext_EAX_ECX_EBX_icebp); 155 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_pext_EAX_ECX_FSxBX_icebp); 156 157 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_shlx_RAX_RBX_RCX_icebp); 158 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_shlx_RAX_FSxBX_RCX_icebp); 159 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_shlx_EAX_EBX_ECX_icebp); 160 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_shlx_EAX_FSxBX_ECX_icebp); 161 162 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_sarx_RAX_RBX_RCX_icebp); 163 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_sarx_RAX_FSxBX_RCX_icebp); 164 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_sarx_EAX_EBX_ECX_icebp); 165 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_sarx_EAX_FSxBX_ECX_icebp); 166 167 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_shrx_RAX_RBX_RCX_icebp); 168 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_shrx_RAX_FSxBX_RCX_icebp); 169 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_shrx_EAX_EBX_ECX_icebp); 170 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_shrx_EAX_FSxBX_ECX_icebp); 171 172 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsr_RAX_RBX_icebp); 173 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsr_RAX_FSxBX_icebp); 174 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsr_EAX_EBX_icebp); 175 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsr_EAX_FSxBX_icebp); 176 177 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsmsk_RAX_RBX_icebp); 178 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsmsk_RAX_FSxBX_icebp); 179 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsmsk_EAX_EBX_icebp); 180 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsmsk_EAX_FSxBX_icebp); 181 182 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsi_RAX_RBX_icebp); 183 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsi_RAX_FSxBX_icebp); 184 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsi_EAX_EBX_icebp); 185 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_blsi_EAX_FSxBX_icebp); 186 187 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_mulx_RAX_RCX_RBX_RDX_icebp); 188 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_mulx_RCX_RCX_RBX_RDX_icebp); 189 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_mulx_RAX_RCX_FSxBX_RDX_icebp); 190 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_mulx_EAX_ECX_EBX_EDX_icebp); 191 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_mulx_ECX_ECX_EBX_EDX_icebp); 192 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_mulx_EAX_ECX_FSxBX_EDX_icebp); 193 194 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_popcnt_AX_BX_icebp); 195 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_popcnt_EAX_EBX_icebp); 196 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_popcnt_RAX_RBX_icebp); 197 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_popcnt_AX_FSxBX_icebp); 198 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_popcnt_EAX_FSxBX_icebp); 199 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_popcnt_RAX_FSxBX_icebp); 200 201 # if ARCH_BITS == 64 202 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_cmpxchg16b_rdi_ud2); 203 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_lock_cmpxchg16b_rdi_ud2); 204 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_o16_cmpxchg16b_rdi_ud2); 205 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_lock_o16_cmpxchg16b_rdi_ud2); 206 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_repz_cmpxchg16b_rdi_ud2); 207 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_lock_repz_cmpxchg16b_rdi_ud2); 208 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_repnz_cmpxchg16b_rdi_ud2); 209 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_lock_repnz_cmpxchg16b_rdi_ud2); 210 211 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_wrfsbase_rbx_ud2); 212 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_wrfsbase_ebx_ud2); 213 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_wrfsbase_rbx_rdfsbase_rcx_ud2); 214 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_wrfsbase_ebx_rdfsbase_ecx_ud2); 215 216 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_wrgsbase_rbx_ud2); 217 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_wrgsbase_ebx_ud2); 218 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_wrgsbase_rbx_rdgsbase_rcx_ud2); 219 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_wrgsbase_ebx_rdgsbase_ecx_ud2); 220 221 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rdfsbase_rbx_ud2); 222 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rdfsbase_ebx_ud2); 223 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rdgsbase_rbx_ud2); 224 extern FNBS3FAR BS3_CMN_NM(bs3CpuInstr2_rdgsbase_ebx_ud2); 225 # endif 84 85 # define BS3_FNBS3FAR_PROTOTYPES_CMN(a_BaseNm) \ 86 extern FNBS3FAR RT_CONCAT(a_BaseNm, _c16); \ 87 extern FNBS3FAR RT_CONCAT(a_BaseNm, _c32); \ 88 extern FNBS3FAR RT_CONCAT(a_BaseNm, _c64) 89 90 BS3_FNBS3FAR_PROTOTYPES_CMN(bs3CpuInstr3_xorps_XMM1_XMM2_icebp); 91 BS3_FNBS3FAR_PROTOTYPES_CMN(bs3CpuInstr3_xorps_XMM1_FSxBX_icebp); 92 BS3_FNBS3FAR_PROTOTYPES_CMN(bs3CpuInstr3_vxorps_XMM1_XMM1_XMM2_icebp); 93 BS3_FNBS3FAR_PROTOTYPES_CMN(bs3CpuInstr3_vxorps_XMM1_XMM1_FSxBX_icebp); 226 94 #endif 227 95 … … 231 99 *********************************************************************************************************************************/ 232 100 #ifdef BS3_INSTANTIATING_CMN 233 # if ARCH_BITS == 64 234 static BS3CI2FSGSBASE const s_aWrFsBaseWorkers[] = 235 { 236 { "wrfsbase rbx", true, BS3_CMN_NM(bs3CpuInstr2_wrfsbase_rbx_ud2), 5, BS3_CMN_NM(bs3CpuInstr2_wrfsbase_rbx_rdfsbase_rcx_ud2), 15 }, 237 { "wrfsbase ebx", false, BS3_CMN_NM(bs3CpuInstr2_wrfsbase_ebx_ud2), 4, BS3_CMN_NM(bs3CpuInstr2_wrfsbase_ebx_rdfsbase_ecx_ud2), 13 }, 101 static bool g_fGlobalInitialized = false; 102 static bool g_fAmdMisalignedSse = false; 103 static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false }; 104 105 /** Exception type #4 test configurations. */ 106 static const BS3CPUINSTR3_CONFIG_T g_aXcptConfig4[] = 107 { 108 /* fCr0Mp, Cr0Em, fCr0Ts, fCr4OsFxSR, fCr4OsXSave, fXcr0Sse, fXcr0Avx, fAligned, fAlignCheck, fMxCsrMM, bXcptSse, bXcptAvx */ 109 /* X87 SSE SSE SSE AVX AVX AVX SSE+AVX AVX+AMD/SSE AMD/SSE */ 110 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */ 111 { 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */ 112 { 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #2 */ 113 { 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_DB }, /* #3 */ 114 { 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #4 */ 115 { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #5 */ 116 { 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #6 */ 117 { 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */ 118 { 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */ 119 /* Memory misalignment: */ 120 { 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #9 */ 121 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_AC }, /* #10 */ 122 /* AMD only: */ 123 { 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_DB }, /* #11 */ 124 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_AC }, /* #12 */ 238 125 }; 239 126 240 static BS3CI2FSGSBASE const s_aWrGsBaseWorkers[] = 241 { 242 { "wrgsbase rbx", true, BS3_CMN_NM(bs3CpuInstr2_wrgsbase_rbx_ud2), 5, BS3_CMN_NM(bs3CpuInstr2_wrgsbase_rbx_rdgsbase_rcx_ud2), 15 }, 243 { "wrgsbase ebx", false, BS3_CMN_NM(bs3CpuInstr2_wrgsbase_ebx_ud2), 4, BS3_CMN_NM(bs3CpuInstr2_wrgsbase_ebx_rdgsbase_ecx_ud2), 13 }, 244 }; 245 246 static BS3CI2FSGSBASE const s_aRdFsBaseWorkers[] = 247 { 248 { "rdfsbase rbx", true, BS3_CMN_NM(bs3CpuInstr2_rdfsbase_rbx_ud2), 5, BS3_CMN_NM(bs3CpuInstr2_wrfsbase_rbx_rdfsbase_rcx_ud2), 15 }, 249 { "rdfsbase ebx", false, BS3_CMN_NM(bs3CpuInstr2_rdfsbase_ebx_ud2), 4, BS3_CMN_NM(bs3CpuInstr2_wrfsbase_ebx_rdfsbase_ecx_ud2), 13 }, 250 }; 251 252 static BS3CI2FSGSBASE const s_aRdGsBaseWorkers[] = 253 { 254 { "rdgsbase rbx", true, BS3_CMN_NM(bs3CpuInstr2_rdgsbase_rbx_ud2), 5, BS3_CMN_NM(bs3CpuInstr2_wrgsbase_rbx_rdgsbase_rcx_ud2), 15 }, 255 { "rdgsbase ebx", false, BS3_CMN_NM(bs3CpuInstr2_rdgsbase_ebx_ud2), 4, BS3_CMN_NM(bs3CpuInstr2_wrgsbase_ebx_rdgsbase_ecx_ud2), 13 }, 256 }; 257 # endif 258 #endif /* BS3_INSTANTIATING_CMN - global */ 127 #endif 259 128 260 129 … … 266 135 #ifdef BS3_INSTANTIATING_CMN 267 136 268 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_mul)(uint8_t bMode) 269 { 270 #define MUL_CHECK_EFLAGS_ZERO (uint16_t)(X86_EFL_AF | X86_EFL_ZF) 271 #define MUL_CHECK_EFLAGS (uint16_t)(X86_EFL_CF | X86_EFL_OF | X86_EFL_SF | X86_EFL_PF) 272 273 static const struct 274 { 275 RTCCUINTREG uInAX; 276 RTCCUINTREG uInBX; 277 RTCCUINTREG uOutDX; 278 RTCCUINTREG uOutAX; 279 uint16_t fFlags; 280 } s_aTests[] = 281 { 282 { 1, 1, 283 0, 1, 0 }, 284 { 2, 2, 285 0, 4, 0 }, 286 { RTCCUINTREG_MAX, RTCCUINTREG_MAX, 287 RTCCUINTREG_MAX-1, 1, X86_EFL_CF | X86_EFL_OF }, 288 { RTCCINTREG_MAX, RTCCINTREG_MAX, 289 RTCCINTREG_MAX / 2, 1, X86_EFL_CF | X86_EFL_OF }, 290 { 1, RTCCUINTREG_MAX, 291 0, RTCCUINTREG_MAX, X86_EFL_PF | X86_EFL_SF }, 292 { 1, RTCCINTREG_MAX, 293 0, RTCCINTREG_MAX, X86_EFL_PF }, 294 { 2, RTCCINTREG_MAX, 295 0, RTCCUINTREG_MAX - 1, X86_EFL_SF }, 296 { (RTCCUINTREG)RTCCINTREG_MAX + 1, 2, 297 1, 0, X86_EFL_PF | X86_EFL_CF | X86_EFL_OF }, 298 { (RTCCUINTREG)RTCCINTREG_MAX / 2 + 1, 3, 299 0, ((RTCCUINTREG)RTCCINTREG_MAX / 2 + 1) * 3, X86_EFL_PF | X86_EFL_SF }, 300 }; 301 302 BS3REGCTX Ctx; 303 BS3TRAPFRAME TrapFrame; 304 unsigned i, j, k; 137 /** Initializes global variables. */ 138 static void bs3CpuInstr3InitGlobals(void) 139 { 140 if (!g_fGlobalInitialized) 141 { 142 if (g_uBs3CpuDetected & BS3CPU_F_CPUID) 143 { 144 uint32_t fEcx, fEdx; 145 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx); 146 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE); 147 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2); 148 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3); 149 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3); 150 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1); 151 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2); 152 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX); 153 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX); 154 155 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES) 156 { 157 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, NULL); 158 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A); 159 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE); 160 } 161 } 162 163 g_fGlobalInitialized = true; 164 } 165 } 166 167 168 /** 169 * Reconfigures the execution environment according to @a pConfig. 170 * 171 * Call bs3CpuInstr3ConfigRestore to undo the changes. 172 * 173 * @returns true on success, false if the configuration cannot be applied. In 174 * the latter case, no context changes are made. 175 * @param pSavedCfg Where to save state we modify. 176 * @param pCtx The register context to modify. 177 * @param pExtCtx The extended register context to modify. 178 * @param pConfig The configuration to apply. 179 */ 180 static bool bs3CpuInstr3ConfigReconfigure(PBS3CPUINSTR3_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx, 181 PCBS3CPUINSTR3_CONFIG_T pConfig) 182 { 183 /* 184 * Save context bits we may change here 185 */ 186 pSavedCfg->uCr0 = pCtx->cr0.u32; 187 pSavedCfg->uCr4 = pCtx->cr4.u32; 188 pSavedCfg->uEfl = pCtx->rflags.u32; 189 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx); 190 191 /* 192 * Can we make these changes? 193 */ 194 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse) 195 return false; 196 197 /* 198 * Modify the test context. 199 */ 200 if (pConfig->fCr0Mp) 201 pCtx->cr0.u32 |= X86_CR0_MP; 202 else 203 pCtx->cr0.u32 &= ~X86_CR0_MP; 204 if (pConfig->fCr0Em) 205 pCtx->cr0.u32 |= X86_CR0_EM; 206 else 207 pCtx->cr0.u32 &= ~X86_CR0_EM; 208 if (pConfig->fCr0Ts) 209 pCtx->cr0.u32 |= X86_CR0_TS; 210 else 211 pCtx->cr0.u32 &= ~X86_CR0_TS; 212 213 if (pConfig->fCr4OsFxSR) 214 pCtx->cr4.u32 |= X86_CR4_OSFXSR; 215 else 216 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR; 217 /** @todo X86_CR4_OSXMMEEXCPT? */ 218 if (pConfig->fCr4OsXSave) 219 pCtx->cr4.u32 |= X86_CR4_OSXSAVE; 220 else 221 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE; 222 223 if (pConfig->fXcr0Sse) 224 pExtCtx->fXcr0Saved |= XSAVE_C_SSE; 225 else 226 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE; 227 if (pConfig->fXcr0Avx) 228 pExtCtx->fXcr0Saved |= XSAVE_C_YMM; 229 else 230 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM; 231 232 if (pConfig->fAlignCheck) 233 { 234 pCtx->rflags.u32 |= X86_EFL_AC; 235 pCtx->cr0.u32 |= X86_CR0_AM; 236 } 237 else 238 { 239 pCtx->rflags.u32 &= ~X86_EFL_AC; 240 pCtx->cr0.u32 &= ~X86_CR0_AM; 241 } 242 243 if (pConfig->fMxCsrMM) 244 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM); 245 else 246 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM); 247 return true; 248 } 249 250 251 /** 252 * Undoes changes made by bs3CpuInstr3ConfigReconfigure. 253 */ 254 static void bs3CpuInstr3ConfigRestore(PCBS3CPUINSTR3_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx) 255 { 256 pCtx->cr0.u32 = pSavedCfg->uCr0; 257 pCtx->cr4.u32 = pSavedCfg->uCr4; 258 pCtx->rflags.u32 = pSavedCfg->uEfl; 259 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal; 260 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr); 261 } 262 263 264 static bool Bs3TestCheckExtCtx(PCBS3EXTCTX pActualExtCtx, PCBS3EXTCTX pExpectedExtCtx, uint16_t fFlags, 265 const char BS3_FAR *pszMode, uint16_t idTestStep) 266 { 267 /* 268 * Make sure the context of a similar and valid before starting. 269 */ 270 if (!pActualExtCtx || pActualExtCtx->u16Magic != BS3EXTCTX_MAGIC) 271 return Bs3TestFailedF("%u - %s: invalid actual context pointer: %p", idTestStep, pszMode, pActualExtCtx); 272 if (!pExpectedExtCtx || pExpectedExtCtx->u16Magic != BS3EXTCTX_MAGIC) 273 return Bs3TestFailedF("%u - %s: invalid expected context pointer: %p", idTestStep, pszMode, pExpectedExtCtx); 274 if ( pActualExtCtx->enmMethod != pExpectedExtCtx->enmMethod 275 || pActualExtCtx->enmMethod == BS3EXTCTXMETHOD_INVALID 276 || pActualExtCtx->enmMethod >= BS3EXTCTXMETHOD_END) 277 return Bs3TestFailedF("%u - %s: mismatching or/and invalid context methods: %d vs %d", 278 idTestStep, pszMode, pActualExtCtx->enmMethod, pExpectedExtCtx->enmMethod); 279 if (pActualExtCtx->cb != pExpectedExtCtx->cb) 280 return Bs3TestFailedF("%u - %s: mismatching context sizes: %#x vs %#x", 281 idTestStep, pszMode, pActualExtCtx->cb, pExpectedExtCtx->cb); 282 283 /* 284 * Try get the job done quickly with a memory compare. 285 */ 286 if (Bs3MemCmp(pActualExtCtx, pExpectedExtCtx, pActualExtCtx->cb) == 0) 287 return true; 288 289 Bs3TestFailedF("%u - %s: context memory differs", idTestStep, pszMode); // debug 290 { 291 uint8_t const BS3_FAR *pb1 = (uint8_t const BS3_FAR *)pActualExtCtx; 292 uint8_t const BS3_FAR *pb2 = (uint8_t const BS3_FAR *)pExpectedExtCtx; 293 unsigned const cb = pActualExtCtx->cb; 294 unsigned off; 295 for (off = 0; off < cb; off++) 296 if (pb1[off] != pb2[off]) 297 { 298 unsigned cbDiff; 299 unsigned const offStart = off++; 300 while (off < cb && pb1[off] != pb2[off]) 301 off++; 302 cbDiff = off - offStart; 303 switch (cbDiff) 304 { 305 case 1: 306 Bs3TestFailedF("%u - %s: Byte difference at %#x: %#04x, expected %#04x", idTestStep, pszMode, offStart, 307 pb1[offStart], pb2[offStart]); 308 break; 309 case 2: 310 Bs3TestFailedF("%u - %s: Word difference at %#x: %#06x, expected %#06x", idTestStep, pszMode, offStart, 311 RT_MAKE_U16(pb1[offStart], pb1[offStart + 1]), 312 RT_MAKE_U16(pb2[offStart], pb2[offStart + 1])); 313 break; 314 case 4: 315 Bs3TestFailedF("%u - %s: Dword difference at %#x: %#010RX32, expected %#010RX32", 316 idTestStep, pszMode, offStart, 317 RT_MAKE_U32_FROM_U8(pb1[offStart], pb1[offStart + 1], pb1[offStart + 2], pb1[offStart + 3]), 318 RT_MAKE_U32_FROM_U8(pb2[offStart], pb2[offStart + 1], pb2[offStart + 2], pb2[offStart + 3])); 319 break; 320 default: 321 Bs3TestFailedF("%u - %s: %#x..%#x differs", idTestStep, pszMode, offStart, off - 1); 322 Bs3TestFailedF("got %.*Rhxs", off - offStart, &pb1[offStart]); 323 Bs3TestFailedF("expected %.*Rhxs", off - offStart, &pb2[offStart]); 324 break; 325 } 326 } 327 } 328 329 if (pActualExtCtx->enmMethod == BS3EXTCTXMETHOD_ANCIENT) 330 return Bs3TestFailedF("%u - %s: BS3EXTCTXMETHOD_ANCIENT not implemented", idTestStep, pszMode); 331 332 /* 333 * Check the x87 state. 334 */ 335 if ( pActualExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE 336 || ( pActualExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE 337 && (pActualExtCtx->fXcr0Nominal & XSAVE_C_X87)) ) 338 { 339 340 } 341 342 /* 343 * 344 */ 345 346 return false; 347 } 348 349 /** 350 * Allocates two extended CPU contexts and initializes the first one 351 * with random data. 352 * @returns First extended context, initialized with randomish data. NULL on 353 * failure (complained). 354 * @param ppExtCtx2 Where to return the 2nd context. 355 */ 356 static PBS3EXTCTX bs3CpuInstr3AllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2) 357 { 358 /* Allocate extended context structures. */ 359 uint64_t fFlags; 360 uint16_t cb = Bs3ExtCtxGetSize(&fFlags); 361 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 2); 362 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb); 363 if (pExtCtx1) 364 { 365 Bs3ExtCtxInit(pExtCtx1, cb, fFlags); 366 /** @todo populate with semi-random stuff. */ 367 368 Bs3ExtCtxInit(pExtCtx2, cb, fFlags); 369 *ppExtCtx2 = pExtCtx2; 370 return pExtCtx1; 371 } 372 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2); 373 *ppExtCtx2 = NULL; 374 return NULL; 375 } 376 377 static void bs3CpuInstr3FreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2) 378 { 379 RT_NOREF_PV(pExtCtx2); 380 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2); 381 } 382 383 /** 384 * Sets up SSE and maybe AVX. 385 */ 386 static void bs3CpuInstr3SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx) 387 { 388 uint32_t cr0 = Bs3RegGetCr0(); 389 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM); 390 cr0 |= X86_CR0_NE; 391 pCtx->cr0.u32 = cr0; 392 Bs3RegSetCr0(cr0); 393 394 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT) 395 { 396 uint32_t cr4 = Bs3RegGetCr4(); 397 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE) 398 { 399 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE; 400 Bs3RegSetCr4(cr4); 401 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal); 402 } 403 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE) 404 { 405 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT; 406 Bs3RegSetCr4(cr4); 407 } 408 pCtx->cr4.u32 = cr4; 409 } 410 } 411 412 typedef struct BS3CPUINSTR3_TEST1_VALUES_T 413 { 414 RTUINT256U uSrc2; 415 RTUINT256U uSrc1; /**< uDstIn for SSE */ 416 RTUINT256U uDstOut; 417 } BS3CPUINSTR3_TEST1_VALUES_T; 418 419 typedef struct BS3CPUINSTR3_TEST1_T 420 { 421 FPFNBS3FAR pfnWorker; 422 uint8_t enmRm; 423 uint8_t enmType; 424 uint8_t cbInstr; 425 uint8_t cValues; 426 uint8_t iRegDst; 427 uint8_t iRegSrc1; 428 uint8_t iRegSrc2; 429 BS3CPUINSTR3_TEST1_VALUES_T const BS3_FAR *paValues; 430 } BS3CPUINSTR3_TEST1_T; 431 432 typedef struct BS3CPUINSTR3_TEST1_MODE_T 433 { 434 BS3CPUINSTR3_TEST1_T const BS3_FAR *paTests; 435 unsigned cTests; 436 } BS3CPUINSTR3_TEST1_MODE_T; 437 438 /** Initializer for a BS3CPUINSTR3_TEST1_MODE_T array (three entries). */ 439 #if ARCH_BITS == 16 440 # define BS3CPUINSTR3_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \ 441 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { NULL, 0 }, { NULL, 0 } } 442 #elif ARCH_BITS == 32 443 # define BS3CPUINSTR3_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \ 444 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { NULL, 0 } } 445 #else 446 # define BS3CPUINSTR3_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \ 447 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } } 448 #endif 449 450 /** Converts an execution mode (BS3_MODE_XXX) into an index into an array 451 * initialized by BS3CPUINSTR3_TEST1_MODES_INIT. */ 452 #define BS3CPUINSTR3_TEST1_MODES_INDEX(a_bMode) \ 453 (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2) 454 455 456 /** 457 * Test type #1 worker. 458 */ 459 static uint8_t bs3CpuInstr3_WorkerTestType1(uint8_t bMode, BS3CPUINSTR3_TEST1_T const BS3_FAR *paTests, unsigned cTests, 460 PCBS3CPUINSTR3_CONFIG_T paConfigs, unsigned cConfigs) 461 { 462 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode); 463 BS3REGCTX Ctx; 464 BS3TRAPFRAME TrapFrame; 465 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0; 466 PBS3EXTCTX pExtCtxOut; 467 PBS3EXTCTX pExtCtx = bs3CpuInstr3AllocExtCtxs(&pExtCtxOut); 468 if (!pExtCtx) 469 return 0; 305 470 306 471 /* Ensure the structures are allocated before we sample the stack pointer. */ … … 308 473 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 309 474 475 /* Ensure that the globals we use here have been initialized. */ 476 bs3CpuInstr3InitGlobals(); 477 310 478 /* 311 479 * Create test context. 312 480 */ 313 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 314 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, BS3_CMN_NM(bs3CpuInstr2_mul_xBX_ud2)); 315 for (k = 0; k < 2; k++) 316 { 317 Ctx.rflags.u16 |= MUL_CHECK_EFLAGS | MUL_CHECK_EFLAGS_ZERO; 318 for (j = 0; j < 2; j++) 481 Bs3RegCtxSaveEx(&Ctx, bMode, 1024); 482 bs3CpuInstr3SetupSseAndAvx(&Ctx, pExtCtx); 483 484 /* 485 * Run the tests in all rings since alignment issues may behave 486 * differently in ring-3 compared to ring-0. 487 */ 488 for (;;) 489 { 490 unsigned iCfg; 491 for (iCfg = 0; iCfg < cConfigs; iCfg++) 319 492 { 320 for (i = 0; i < RT_ELEMENTS(s_aTests); i++) 493 unsigned iTest; 494 BS3CPUINSTR3_CONFIG_SAVED_T SavedCfg; 495 if (!bs3CpuInstr3ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg])) 496 continue; /* unsupported config */ 497 498 /* 499 * Iterate the tests. 500 */ 501 for (iTest = 0; iTest < cTests; iTest++) 321 502 { 322 if (k == 0) 503 BS3CPUINSTR3_TEST1_VALUES_T const BS3_FAR *paValues = paTests[iTest].paValues; 504 unsigned const cValues = paTests[iTest].cValues; 505 bool const fSseInstr = paTests[iTest].enmType >= T_SSE && paTests[iTest].enmType < T_AVX_128; 506 uint8_t const cbOperand = paTests[iTest].enmType <= T_AVX_128 ? 128/8 : 256/8; 507 uint8_t const cbAlign = 16; 508 uint8_t bXcptExpect = !g_afTypeSupports[paTests[iTest].enmType] ? X86_XCPT_UD 509 : fSseInstr ? paConfigs[iCfg].bXcptSse : paConfigs[iCfg].bXcptAvx; 510 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10; 511 unsigned iVal; 512 uint8_t abPadding[sizeof(RTUINT256U) * 2]; 513 PRTUINT256U puMemOp = (PRTUINT256U)&abPadding[( BS3_FP_OFF(&abPadding[sizeof(RTUINT256U)]) 514 & ~(size_t)(cbAlign - 1)) 515 - !paConfigs[iCfg].fAligned]; 516 517 /* If testing unaligned memory accesses, skip register-only tests. This 518 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */ 519 if (!paConfigs[iCfg].fAligned && paTests[iTest].enmRm != RM_MEM) 520 continue; 521 522 /* #AC is only raised in ring-3: */ 523 if (bXcptExpect == X86_XCPT_AC && bRing != 3) 524 bXcptExpect = X86_XCPT_DB; 525 526 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, paTests[iTest].pfnWorker); 527 528 /* 529 * Iterate the test values and do the actual testing. 530 */ 531 for (iVal = 0; iVal < cValues; iVal++, idTestStep++) 323 532 { 324 Ctx.rax.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInAX; 325 Ctx.rbx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInBX; 326 } 327 else 328 { 329 Ctx.rax.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInBX; 330 Ctx.rbx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInAX; 331 } 332 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 333 if (TrapFrame.bXcpt != X86_XCPT_UD) 334 Bs3TestFailedF("Expected #UD got %#x", TrapFrame.bXcpt); 335 else if ( TrapFrame.Ctx.rax.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutAX 336 || TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutDX 337 || (TrapFrame.Ctx.rflags.u16 & (MUL_CHECK_EFLAGS | MUL_CHECK_EFLAGS_ZERO)) 338 != (s_aTests[i].fFlags & MUL_CHECK_EFLAGS) ) 339 { 340 Bs3TestFailedF("test #%i failed: input %#" RTCCUINTREG_XFMT " * %#" RTCCUINTREG_XFMT, 341 i, s_aTests[i].uInAX, s_aTests[i].uInBX); 342 343 if (TrapFrame.Ctx.rax.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutAX) 344 Bs3TestFailedF("Expected xAX = %#RX" RT_XSTR(ARCH_BITS) " got %#RX" RT_XSTR(ARCH_BITS), 345 s_aTests[i].uOutAX, TrapFrame.Ctx.rax.RT_CONCAT(u,ARCH_BITS)); 346 if (TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutDX) 347 Bs3TestFailedF("Expected xDX = %#RX" RT_XSTR(ARCH_BITS) " got %#RX" RT_XSTR(ARCH_BITS), 348 s_aTests[i].uOutDX, TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS)); 349 if ( (TrapFrame.Ctx.rflags.u16 & (MUL_CHECK_EFLAGS | MUL_CHECK_EFLAGS_ZERO)) 350 != (s_aTests[i].fFlags & MUL_CHECK_EFLAGS) ) 351 Bs3TestFailedF("Expected EFLAGS = %#06RX16, got %#06RX16", s_aTests[i].fFlags & MUL_CHECK_EFLAGS, 352 TrapFrame.Ctx.rflags.u16 & (MUL_CHECK_EFLAGS | MUL_CHECK_EFLAGS_ZERO)); 533 uint16_t cErrors; 534 RTUINT256U uMemOpExpect; 535 536 /* 537 * Set up the context and some expectations. 538 */ 539 if (paTests[iTest].iRegDst == UINT8_MAX) 540 { 541 BS3_ASSERT(paTests[iTest].enmRm == RM_MEM); 542 Bs3MemSet(puMemOp, sizeof(*puMemOp), 0xcc); 543 if (bXcptExpect == X86_XCPT_DB) 544 uMemOpExpect = paValues[iVal].uDstOut; 545 else 546 uMemOpExpect = *puMemOp; 547 } 548 549 if (paTests[iTest].iRegSrc1 != UINT8_MAX) 550 Bs3ExtCtxSetYmm(pExtCtx, paTests[iTest].iRegSrc1, &paValues[iVal].uSrc1, fSseInstr); 551 else 552 { 553 BS3_ASSERT(paTests[iTest].enmRm == RM_MEM); 554 *puMemOp = paValues[iVal].uSrc1; 555 if (paTests[iTest].iRegDst == UINT8_MAX) 556 BS3_ASSERT(fSseInstr); 557 else 558 uMemOpExpect = paValues[iVal].uSrc1; 559 } 560 561 if (paTests[iTest].iRegSrc2 != UINT8_MAX) 562 Bs3ExtCtxSetYmm(pExtCtx, paTests[iTest].iRegSrc2, &paValues[iVal].uSrc2, fSseInstr); 563 else 564 { 565 BS3_ASSERT(paTests[iTest].enmRm == RM_MEM); 566 BS3_ASSERT(paTests[iTest].iRegDst != UINT8_MAX && paTests[iTest].iRegSrc1 != UINT8_MAX); 567 *puMemOp = uMemOpExpect = paValues[iVal].uSrc1; 568 uMemOpExpect = paValues[iVal].uSrc1; 569 } 570 if (paTests[iTest].enmRm == RM_MEM) 571 { 572 BS3_ASSERT( paTests[iTest].iRegDst == UINT8_MAX 573 || paTests[iTest].iRegSrc1 == UINT8_MAX 574 || paTests[iTest].iRegSrc2 == UINT8_MAX); 575 Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, &Ctx.fs, puMemOp); 576 } 577 578 /* 579 * Execute. 580 */ 581 Bs3ExtCtxRestore(pExtCtx); 582 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 583 Bs3ExtCtxSave(pExtCtxOut); 584 585 /* 586 * Check the result: 587 */ 588 cErrors = Bs3TestSubErrorCount(); 589 590 if (bXcptExpect == X86_XCPT_DB && paTests[iTest].iRegDst != UINT8_MAX) 591 Bs3ExtCtxSetYmm(pExtCtx, paTests[iTest].iRegDst, &paValues[iVal].uDstOut, fSseInstr); 592 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pszMode, idTestStep); 593 594 if (TrapFrame.bXcpt != bXcptExpect) 595 Bs3TestFailedF("Expected bXcpt = %#x, got %#x", bXcptExpect, TrapFrame.bXcpt); 596 Bs3TestCheckRegCtxEx(&TrapFrame.Ctx, &Ctx, bXcptExpect == X86_XCPT_DB ? paTests[iTest].cbInstr + 1 : 0, 0, 597 bXcptExpect == X86_XCPT_DB || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF, 598 pszMode, idTestStep); 599 if ( paTests[iTest].enmRm == RM_MEM 600 && Bs3MemCmp(puMemOp, &uMemOpExpect, cbOperand) != 0) 601 Bs3TestFailedF("Expected uMemOp %*.Rhxs, got %*.Rhxs", cbOperand, &uMemOpExpect, cbOperand, puMemOp); 602 603 if (cErrors != Bs3TestSubErrorCount()) 604 Bs3TestFailedF("ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%#x)", 605 bRing, iCfg, iTest, iVal, bXcptExpect); 353 606 } 354 607 } 355 Ctx.rflags.u16 &= ~(MUL_CHECK_EFLAGS | MUL_CHECK_EFLAGS_ZERO); 608 609 bs3CpuInstr3ConfigRestore(&SavedCfg, &Ctx, pExtCtx); 356 610 } 357 } 358 611 612 /* 613 * Next ring. 614 */ 615 bRing++; 616 if (bRing > 3 || bMode == BS3_MODE_RM) 617 break; 618 Bs3RegCtxConvertToRingX(&Ctx, bRing); 619 } 620 621 /* 622 * Cleanup. 623 */ 624 bs3CpuInstr3FreeExtCtxs(pExtCtx, pExtCtxOut); 359 625 return 0; 360 626 } 361 627 362 363 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_imul)(uint8_t bMode) 364 { 365 #define IMUL_CHECK_EFLAGS_ZERO (uint16_t)(X86_EFL_AF | X86_EFL_ZF) 366 #define IMUL_CHECK_EFLAGS (uint16_t)(X86_EFL_CF | X86_EFL_OF | X86_EFL_SF | X86_EFL_PF) 367 static const struct 368 { 369 RTCCUINTREG uInAX; 370 RTCCUINTREG uInBX; 371 RTCCUINTREG uOutDX; 372 RTCCUINTREG uOutAX; 373 uint16_t fFlags; 374 } s_aTests[] = 375 { 376 /* two positive values. */ 377 { 1, 1, 378 0, 1, 0 }, 379 { 2, 2, 380 0, 4, 0 }, 381 { RTCCINTREG_MAX, RTCCINTREG_MAX, 382 RTCCINTREG_MAX/2, 1, X86_EFL_CF | X86_EFL_OF }, 383 { 1, RTCCINTREG_MAX, 384 0, RTCCINTREG_MAX, X86_EFL_PF }, 385 { 2, RTCCINTREG_MAX, 386 0, RTCCUINTREG_MAX - 1U, X86_EFL_CF | X86_EFL_OF | X86_EFL_SF }, 387 { 2, RTCCINTREG_MAX / 2, 388 0, RTCCINTREG_MAX - 1U, 0 }, 389 { 2, (RTCCINTREG_MAX / 2 + 1), 390 0, (RTCCUINTREG)RTCCINTREG_MAX + 1U, X86_EFL_CF | X86_EFL_OF | X86_EFL_SF | X86_EFL_PF }, 391 { 4, (RTCCINTREG_MAX / 2 + 1), 392 1, 0, X86_EFL_CF | X86_EFL_OF | X86_EFL_PF }, 393 394 /* negative and positive */ 395 { -4, 3, 396 -1, -12, X86_EFL_SF }, 397 { 32, -127, 398 -1, -4064, X86_EFL_SF }, 399 { RTCCINTREG_MIN, 1, 400 -1, RTCCINTREG_MIN, X86_EFL_SF | X86_EFL_PF }, 401 { RTCCINTREG_MIN, 2, 402 -1, 0, X86_EFL_CF | X86_EFL_OF | X86_EFL_PF }, 403 { RTCCINTREG_MIN, 3, 404 -2, RTCCINTREG_MIN, X86_EFL_CF | X86_EFL_OF | X86_EFL_SF | X86_EFL_PF }, 405 { RTCCINTREG_MIN, 4, 406 -2, 0, X86_EFL_CF | X86_EFL_OF | X86_EFL_PF }, 407 { RTCCINTREG_MIN, RTCCINTREG_MAX, 408 RTCCINTREG_MIN / 2, RTCCINTREG_MIN, X86_EFL_CF | X86_EFL_OF | X86_EFL_SF | X86_EFL_PF }, 409 { RTCCINTREG_MIN, RTCCINTREG_MAX - 1, 410 RTCCINTREG_MIN / 2 + 1, 0, X86_EFL_CF | X86_EFL_OF | X86_EFL_PF }, 411 412 /* two negative values. */ 413 { -4, -63, 414 0, 252, X86_EFL_PF }, 415 { RTCCINTREG_MIN, RTCCINTREG_MIN, 416 RTCCUINTREG_MAX / 4 + 1, 0, X86_EFL_CF | X86_EFL_OF | X86_EFL_PF }, 417 { RTCCINTREG_MIN, RTCCINTREG_MIN + 1, 418 RTCCUINTREG_MAX / 4, RTCCINTREG_MIN, X86_EFL_CF | X86_EFL_OF | X86_EFL_SF | X86_EFL_PF}, 419 { RTCCINTREG_MIN + 1, RTCCINTREG_MIN + 1, 420 RTCCUINTREG_MAX / 4, 1, X86_EFL_CF | X86_EFL_OF }, 421 628 /* 629 * XORPS, 128-bit VXORPS 630 */ 631 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr3_xorps)(uint8_t bMode) 632 { 633 /** Input values for 128 bit wide operations: */ 634 static BS3CPUINSTR3_TEST1_VALUES_T const s_aValues128[] = 635 { 636 { RTUINT256_INIT_C(0, 0, 0x1111222233334444, 0x5555666677778888), 637 /* ^ */ RTUINT256_INIT_C(0, 0, 0x9999aaaabbbbcccc, 0xddddeeeeffff0000), 638 /* = */ RTUINT256_INIT_C(0, 0, 0x8888888888888888, 0x8888888888888888) }, 422 639 }; 423 640 424 BS3REGCTX Ctx; 425 BS3TRAPFRAME TrapFrame; 426 unsigned i, j, k; 427 428 /* Ensure the structures are allocated before we sample the stack pointer. */ 429 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 430 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 431 432 /* 433 * Create test context. 434 */ 435 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 436 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, BS3_CMN_NM(bs3CpuInstr2_imul_xBX_ud2)); 437 438 for (k = 0; k < 2; k++) 439 { 440 Ctx.rflags.u16 |= MUL_CHECK_EFLAGS | MUL_CHECK_EFLAGS_ZERO; 441 for (j = 0; j < 2; j++) 442 { 443 for (i = 0; i < RT_ELEMENTS(s_aTests); i++) 444 { 445 if (k == 0) 446 { 447 Ctx.rax.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInAX; 448 Ctx.rbx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInBX; 449 } 450 else 451 { 452 Ctx.rax.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInBX; 453 Ctx.rbx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInAX; 454 } 455 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 456 if (TrapFrame.bXcpt != X86_XCPT_UD) 457 Bs3TestFailedF("Expected #UD got %#x", TrapFrame.bXcpt); 458 else if ( TrapFrame.Ctx.rax.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutAX 459 || TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutDX 460 || (TrapFrame.Ctx.rflags.u16 & (IMUL_CHECK_EFLAGS | IMUL_CHECK_EFLAGS_ZERO)) 461 != (s_aTests[i].fFlags & IMUL_CHECK_EFLAGS) ) 462 { 463 Bs3TestFailedF("test #%i failed: input %#" RTCCUINTREG_XFMT " * %#" RTCCUINTREG_XFMT, 464 i, s_aTests[i].uInAX, s_aTests[i].uInBX); 465 466 if (TrapFrame.Ctx.rax.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutAX) 467 Bs3TestFailedF("Expected xAX = %#RX" RT_XSTR(ARCH_BITS) " got %#RX" RT_XSTR(ARCH_BITS), 468 s_aTests[i].uOutAX, TrapFrame.Ctx.rax.RT_CONCAT(u,ARCH_BITS)); 469 if (TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutDX) 470 Bs3TestFailedF("Expected xDX = %#RX" RT_XSTR(ARCH_BITS) " got %#RX" RT_XSTR(ARCH_BITS), 471 s_aTests[i].uOutDX, TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS)); 472 if ( (TrapFrame.Ctx.rflags.u16 & (IMUL_CHECK_EFLAGS | IMUL_CHECK_EFLAGS_ZERO)) 473 != (s_aTests[i].fFlags & IMUL_CHECK_EFLAGS) ) 474 Bs3TestFailedF("Expected EFLAGS = %#06RX16, got %#06RX16", s_aTests[i].fFlags & IMUL_CHECK_EFLAGS, 475 TrapFrame.Ctx.rflags.u16 & (IMUL_CHECK_EFLAGS | IMUL_CHECK_EFLAGS_ZERO)); 476 } 477 } 478 } 479 } 480 481 /* 482 * Repeat for the truncating two operand version. 483 */ 484 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, BS3_CMN_NM(bs3CpuInstr2_imul_xCX_xBX_ud2)); 485 486 for (k = 0; k < 2; k++) 487 { 488 Ctx.rflags.u16 |= MUL_CHECK_EFLAGS | MUL_CHECK_EFLAGS_ZERO; 489 for (j = 0; j < 2; j++) 490 { 491 for (i = 0; i < RT_ELEMENTS(s_aTests); i++) 492 { 493 if (k == 0) 494 { 495 Ctx.rcx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInAX; 496 Ctx.rbx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInBX; 497 } 498 else 499 { 500 Ctx.rcx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInBX; 501 Ctx.rbx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInAX; 502 } 503 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 504 if (TrapFrame.bXcpt != X86_XCPT_UD) 505 Bs3TestFailedF("Expected #UD got %#x", TrapFrame.bXcpt); 506 else if ( TrapFrame.Ctx.rcx.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutAX 507 || TrapFrame.Ctx.rdx.u != Ctx.rdx.u 508 || TrapFrame.Ctx.rbx.u != Ctx.rbx.u 509 || (TrapFrame.Ctx.rflags.u16 & (IMUL_CHECK_EFLAGS | IMUL_CHECK_EFLAGS_ZERO)) 510 != (s_aTests[i].fFlags & IMUL_CHECK_EFLAGS) ) 511 { 512 Bs3TestFailedF("test #%i failed: input %#" RTCCUINTREG_XFMT " * %#" RTCCUINTREG_XFMT, 513 i, s_aTests[i].uInAX, s_aTests[i].uInBX); 514 515 if (TrapFrame.Ctx.rcx.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutAX) 516 Bs3TestFailedF("Expected xAX = %#RX" RT_XSTR(ARCH_BITS) " got %#RX" RT_XSTR(ARCH_BITS), 517 s_aTests[i].uOutAX, TrapFrame.Ctx.rcx.RT_CONCAT(u,ARCH_BITS)); 518 if ( (TrapFrame.Ctx.rflags.u16 & (IMUL_CHECK_EFLAGS | IMUL_CHECK_EFLAGS_ZERO)) 519 != (s_aTests[i].fFlags & IMUL_CHECK_EFLAGS) ) 520 Bs3TestFailedF("Expected EFLAGS = %#06RX16, got %#06RX16", s_aTests[i].fFlags & IMUL_CHECK_EFLAGS, 521 TrapFrame.Ctx.rflags.u16 & (IMUL_CHECK_EFLAGS | IMUL_CHECK_EFLAGS_ZERO)); 522 } 523 } 524 } 525 } 526 527 return 0; 528 } 529 530 531 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_div)(uint8_t bMode) 532 { 533 #define DIV_CHECK_EFLAGS (uint16_t)(X86_EFL_CF | X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF) 534 static const struct 535 { 536 RTCCUINTREG uInDX; 537 RTCCUINTREG uInAX; 538 RTCCUINTREG uInBX; 539 RTCCUINTREG uOutAX; 540 RTCCUINTREG uOutDX; 541 uint8_t bXcpt; 542 } s_aTests[] = 543 { 544 { 0, 1, 1, 545 1, 0, X86_XCPT_UD }, 546 { 0, 5, 2, 547 2, 1, X86_XCPT_UD }, 548 { 0, 0, 0, 549 0, 0, X86_XCPT_DE }, 550 { RTCCUINTREG_MAX, RTCCUINTREG_MAX, 0, 551 0, 0, X86_XCPT_DE }, 552 { RTCCUINTREG_MAX, RTCCUINTREG_MAX, 1, 553 0, 0, X86_XCPT_DE }, 554 { RTCCUINTREG_MAX, RTCCUINTREG_MAX, RTCCUINTREG_MAX, 555 0, 0, X86_XCPT_DE }, 556 { RTCCUINTREG_MAX - 1, RTCCUINTREG_MAX, RTCCUINTREG_MAX, 557 RTCCUINTREG_MAX, RTCCUINTREG_MAX - 1, X86_XCPT_UD }, 641 static BS3CPUINSTR3_TEST1_T const s_aTests16[] = 642 { 643 { bs3CpuInstr3_xorps_XMM1_XMM2_icebp_c16, RM_REG, T_SSE2, 3, 1, 1, 2, RT_ELEMENTS(s_aValues128), s_aValues128 }, 558 644 }; 559 645 560 BS3REGCTX Ctx; 561 BS3TRAPFRAME TrapFrame; 562 unsigned i, j; 563 564 /* Ensure the structures are allocated before we sample the stack pointer. */ 565 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 566 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 567 568 /* 569 * Create test context. 570 */ 571 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 572 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, BS3_CMN_NM(bs3CpuInstr2_div_xBX_ud2)); 573 574 /* 575 * Do the tests twice, first with all flags set, then once again with 576 * flags cleared. The flags are not touched by my intel skylake CPU. 577 */ 578 Ctx.rflags.u16 |= DIV_CHECK_EFLAGS; 579 for (j = 0; j < 2; j++) 580 { 581 for (i = 0; i < RT_ELEMENTS(s_aTests); i++) 582 { 583 Ctx.rax.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInAX; 584 Ctx.rdx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInDX; 585 Ctx.rbx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInBX; 586 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 587 588 if ( TrapFrame.bXcpt != s_aTests[i].bXcpt 589 || ( s_aTests[i].bXcpt == X86_XCPT_UD 590 ? TrapFrame.Ctx.rax.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutAX 591 || TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutDX 592 || (TrapFrame.Ctx.rflags.u16 & DIV_CHECK_EFLAGS) != (Ctx.rflags.u16 & DIV_CHECK_EFLAGS) 593 : TrapFrame.Ctx.rax.u != Ctx.rax.u 594 || TrapFrame.Ctx.rdx.u != Ctx.rdx.u 595 || (TrapFrame.Ctx.rflags.u16 & DIV_CHECK_EFLAGS) != (Ctx.rflags.u16 & DIV_CHECK_EFLAGS) ) ) 596 { 597 Bs3TestFailedF("test #%i failed: input %#" RTCCUINTREG_XFMT ":%" RTCCUINTREG_XFMT " / %#" RTCCUINTREG_XFMT, 598 i, s_aTests[i].uInDX, s_aTests[i].uInAX, s_aTests[i].uInBX); 599 if (TrapFrame.bXcpt != s_aTests[i].bXcpt) 600 Bs3TestFailedF("Expected bXcpt = %#x, got %#x", s_aTests[i].bXcpt, TrapFrame.bXcpt); 601 if (s_aTests[i].bXcpt == X86_XCPT_UD) 602 { 603 if (TrapFrame.Ctx.rax.RT_CONCAT(u, ARCH_BITS) != s_aTests[i].uOutAX) 604 Bs3TestFailedF("Expected xAX = %#" RTCCUINTREG_XFMT ", got %#" RTCCUINTREG_XFMT, 605 s_aTests[i].uOutAX, TrapFrame.Ctx.rax.RT_CONCAT(u,ARCH_BITS)); 606 if (TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutDX) 607 Bs3TestFailedF("Expected xDX = %#" RTCCUINTREG_XFMT ", got %#" RTCCUINTREG_XFMT, 608 s_aTests[i].uOutDX, TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS)); 609 if ((TrapFrame.Ctx.rflags.u16 & DIV_CHECK_EFLAGS) != (Ctx.rflags.u16 & DIV_CHECK_EFLAGS)) 610 Bs3TestFailedF("Expected EFLAGS = %#06RX16, got %#06RX16", 611 Ctx.rflags.u16 & DIV_CHECK_EFLAGS, TrapFrame.Ctx.rflags.u16 & DIV_CHECK_EFLAGS); 612 } 613 } 614 } 615 Ctx.rflags.u16 &= ~DIV_CHECK_EFLAGS; 616 } 617 618 return 0; 619 } 620 621 622 623 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_idiv)(uint8_t bMode) 624 { 625 #define IDIV_CHECK_EFLAGS (uint16_t)(X86_EFL_CF | X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF) 626 static const struct 627 { 628 RTCCUINTREG uInDX; 629 RTCCUINTREG uInAX; 630 RTCCUINTREG uInBX; 631 RTCCUINTREG uOutAX; 632 RTCCUINTREG uOutDX; 633 uint8_t bXcpt; 634 } s_aTests[] = 635 { 636 { 0, 0, 0, 637 0, 0, X86_XCPT_DE }, 638 { RTCCINTREG_MAX, RTCCINTREG_MAX, 0, 639 0, 0, X86_XCPT_DE }, 640 /* two positive values. */ 641 { 0, 1, 1, 642 1, 0, X86_XCPT_UD }, 643 { 0, 5, 2, 644 2, 1, X86_XCPT_UD }, 645 { RTCCINTREG_MAX / 2, RTCCUINTREG_MAX / 2, RTCCINTREG_MAX, 646 RTCCINTREG_MAX, RTCCINTREG_MAX - 1, X86_XCPT_UD }, 647 { RTCCINTREG_MAX / 2, RTCCUINTREG_MAX / 2 + 1, RTCCINTREG_MAX, 648 RTCCINTREG_MAX, RTCCINTREG_MAX - 1, X86_XCPT_DE }, 649 /* negative dividend, positive divisor. */ 650 { -1, -7, 2, 651 -3, -1, X86_XCPT_UD }, 652 { RTCCINTREG_MIN / 2 + 1, 0, RTCCINTREG_MAX, 653 RTCCINTREG_MIN + 2, RTCCINTREG_MIN + 2, X86_XCPT_UD }, 654 { RTCCINTREG_MIN / 2, 0, RTCCINTREG_MAX, 655 0, 0, X86_XCPT_DE }, 656 /* positive dividend, negative divisor. */ 657 { 0, 7, -2, 658 -3, 1, X86_XCPT_UD }, 659 { RTCCINTREG_MAX / 2 + 1, RTCCINTREG_MAX, RTCCINTREG_MIN, 660 RTCCINTREG_MIN, RTCCINTREG_MAX, X86_XCPT_UD }, 661 { RTCCINTREG_MAX / 2 + 1, (RTCCUINTREG)RTCCINTREG_MAX+1, RTCCINTREG_MIN, 662 0, 0, X86_XCPT_DE }, 663 /* negative dividend, negative divisor. */ 664 { -1, -7, -2, 665 3, -1, X86_XCPT_UD }, 666 { RTCCINTREG_MIN / 2, 1, RTCCINTREG_MIN, 667 RTCCINTREG_MAX, RTCCINTREG_MIN + 1, X86_XCPT_UD }, 668 { RTCCINTREG_MIN / 2, 2, RTCCINTREG_MIN, 669 RTCCINTREG_MAX, RTCCINTREG_MIN + 2, X86_XCPT_UD }, 670 { RTCCINTREG_MIN / 2, 0, RTCCINTREG_MIN, 671 0, 0, X86_XCPT_DE }, 672 }; 673 674 BS3REGCTX Ctx; 675 BS3TRAPFRAME TrapFrame; 676 unsigned i, j; 677 678 /* Ensure the structures are allocated before we sample the stack pointer. */ 679 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 680 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 681 682 /* 683 * Create test context. 684 */ 685 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 686 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, BS3_CMN_NM(bs3CpuInstr2_idiv_xBX_ud2)); 687 688 /* 689 * Do the tests twice, first with all flags set, then once again with 690 * flags cleared. The flags are not touched by my intel skylake CPU. 691 */ 692 Ctx.rflags.u16 |= IDIV_CHECK_EFLAGS; 693 for (j = 0; j < 2; j++) 694 { 695 for (i = 0; i < RT_ELEMENTS(s_aTests); i++) 696 { 697 Ctx.rax.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInAX; 698 Ctx.rdx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInDX; 699 Ctx.rbx.RT_CONCAT(u,ARCH_BITS) = s_aTests[i].uInBX; 700 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 701 702 if ( TrapFrame.bXcpt != s_aTests[i].bXcpt 703 || ( s_aTests[i].bXcpt == X86_XCPT_UD 704 ? TrapFrame.Ctx.rax.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutAX 705 || TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutDX 706 || (TrapFrame.Ctx.rflags.u16 & IDIV_CHECK_EFLAGS) != (Ctx.rflags.u16 & IDIV_CHECK_EFLAGS) 707 : TrapFrame.Ctx.rax.u != Ctx.rax.u 708 || TrapFrame.Ctx.rdx.u != Ctx.rdx.u 709 || (TrapFrame.Ctx.rflags.u16 & IDIV_CHECK_EFLAGS) != (Ctx.rflags.u16 & IDIV_CHECK_EFLAGS) ) ) 710 { 711 Bs3TestFailedF("test #%i failed: input %#" RTCCUINTREG_XFMT ":%" RTCCUINTREG_XFMT " / %#" RTCCUINTREG_XFMT, 712 i, s_aTests[i].uInDX, s_aTests[i].uInAX, s_aTests[i].uInBX); 713 if (TrapFrame.bXcpt != s_aTests[i].bXcpt) 714 Bs3TestFailedF("Expected bXcpt = %#x, got %#x", s_aTests[i].bXcpt, TrapFrame.bXcpt); 715 if (s_aTests[i].bXcpt == X86_XCPT_UD) 716 { 717 if (TrapFrame.Ctx.rax.RT_CONCAT(u, ARCH_BITS) != s_aTests[i].uOutAX) 718 Bs3TestFailedF("Expected xAX = %#" RTCCUINTREG_XFMT ", got %#" RTCCUINTREG_XFMT, 719 s_aTests[i].uOutAX, TrapFrame.Ctx.rax.RT_CONCAT(u,ARCH_BITS)); 720 if (TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS) != s_aTests[i].uOutDX) 721 Bs3TestFailedF("Expected xDX = %#" RTCCUINTREG_XFMT ", got %#" RTCCUINTREG_XFMT, 722 s_aTests[i].uOutDX, TrapFrame.Ctx.rdx.RT_CONCAT(u,ARCH_BITS)); 723 if ((TrapFrame.Ctx.rflags.u16 & IDIV_CHECK_EFLAGS) != (Ctx.rflags.u16 & IDIV_CHECK_EFLAGS)) 724 Bs3TestFailedF("Expected EFLAGS = %#06RX16, got %#06RX16", 725 Ctx.rflags.u16 & IDIV_CHECK_EFLAGS, TrapFrame.Ctx.rflags.u16 & IDIV_CHECK_EFLAGS); 726 } 727 } 728 } 729 Ctx.rflags.u16 &= ~IDIV_CHECK_EFLAGS; 730 } 731 732 return 0; 733 } 734 735 736 /* 737 * BSF/BSR (386+) & TZCNT/LZCNT (BMI1,ABM) 738 */ 739 740 typedef struct BS3CPUINSTR2_SUBTEST_BITSCAN_T 741 { 742 RTCCUINTXREG uSrc; 743 RTCCUINTXREG uOut; 744 bool fOutNotSet; 745 uint16_t fEflOut; 746 } BS3CPUINSTR2_SUBTEST_BITSCAN_T; 747 748 typedef struct BS3CPUINSTR2_TEST_BITSCAN_T 749 { 750 FPFNBS3FAR pfnWorker; 751 bool fMemSrc; 752 uint8_t cbInstr; 753 uint8_t cOpBits; 754 uint16_t fEflCheck; 755 uint8_t cSubTests; 756 BS3CPUINSTR2_SUBTEST_BITSCAN_T const *paSubTests; 757 } BS3CPUINSTR2_TEST_BITSCAN_T; 758 759 static uint8_t bs3CpuInstr2_BitScan(uint8_t bMode, BS3CPUINSTR2_TEST_BITSCAN_T const *paTests, unsigned cTests) 760 { 761 BS3REGCTX Ctx; 762 BS3TRAPFRAME TrapFrame; 763 unsigned i, j, k; 764 765 /* Ensure the structures are allocated before we sample the stack pointer. */ 766 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 767 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 768 769 /* 770 * Create test context. 771 */ 772 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 773 774 /* 775 * Do the tests twice, first with all flags set, then once again with 776 * flags cleared. The flags are not supposed to be touched at all. 777 */ 778 Ctx.rflags.u16 |= X86_EFL_STATUS_BITS; 779 for (j = 0; j < 2; j++) 780 { 781 for (i = 0; i < cTests; i++) 782 { 783 for (k = 0; k < paTests[i].cSubTests; k++) 784 { 785 uint64_t uExpectRax, uExpectRip; 786 RTCCUINTXREG uMemSrc, uMemSrcExpect; 787 788 Ctx.rax.uCcXReg = RTCCUINTXREG_MAX * 1019; 789 if (!paTests[i].fMemSrc) 790 { 791 Ctx.rbx.uCcXReg = paTests[i].paSubTests[k].uSrc; 792 uMemSrcExpect = uMemSrc = ~paTests[i].paSubTests[k].uSrc; 793 } 794 else 795 { 796 uMemSrcExpect = uMemSrc = paTests[i].paSubTests[k].uSrc; 797 Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, &Ctx.fs, &uMemSrc); 798 } 799 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, paTests[i].pfnWorker); 800 if (paTests[i].paSubTests[k].fOutNotSet) 801 uExpectRax = Ctx.rax.u; 802 else if (paTests[i].cOpBits != 16) 803 uExpectRax = paTests[i].paSubTests[k].uOut; 804 else 805 uExpectRax = paTests[i].paSubTests[k].uOut | (Ctx.rax.u & UINT64_C(0xffffffffffff0000)); 806 uExpectRip = Ctx.rip.u + paTests[i].cbInstr; 807 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 808 809 if ( TrapFrame.bXcpt != X86_XCPT_UD 810 || TrapFrame.Ctx.rip.u != uExpectRip 811 || TrapFrame.Ctx.rbx.u != Ctx.rbx.u 812 || TrapFrame.Ctx.rax.u != uExpectRax 813 || (TrapFrame.Ctx.rflags.u16 & paTests[i].fEflCheck) 814 != (paTests[i].paSubTests[k].fEflOut & paTests[i].fEflCheck) 815 /* check that nothing else really changed: */ 816 || TrapFrame.Ctx.rcx.u != Ctx.rcx.u 817 || TrapFrame.Ctx.rdx.u != Ctx.rdx.u 818 || TrapFrame.Ctx.rsp.u != Ctx.rsp.u 819 || TrapFrame.Ctx.rbp.u != Ctx.rbp.u 820 || TrapFrame.Ctx.rsi.u != Ctx.rsi.u 821 || TrapFrame.Ctx.rdi.u != Ctx.rdi.u 822 || uMemSrc != uMemSrcExpect 823 ) 824 { 825 Bs3TestFailedF("test #%i/%i failed: input %#" RTCCUINTXREG_XFMT, 826 i, k, paTests[i].paSubTests[k].uSrc); 827 if (TrapFrame.bXcpt != X86_XCPT_UD) 828 Bs3TestFailedF("Expected bXcpt = %#x, got %#x", X86_XCPT_UD, TrapFrame.bXcpt); 829 if (TrapFrame.Ctx.rip.u != uExpectRip) 830 Bs3TestFailedF("Expected RIP = %#06RX64, got %#06RX64", uExpectRip, TrapFrame.Ctx.rip.u); 831 if (TrapFrame.Ctx.rax.u != uExpectRax) 832 Bs3TestFailedF("Expected RAX = %#06RX64, got %#06RX64", uExpectRax, TrapFrame.Ctx.rax.u); 833 if (TrapFrame.Ctx.rcx.u != Ctx.rcx.u) 834 Bs3TestFailedF("Expected RCX = %#06RX64, got %#06RX64", Ctx.rcx.u, TrapFrame.Ctx.rcx.u); 835 if (TrapFrame.Ctx.rbx.u != Ctx.rbx.u) 836 Bs3TestFailedF("Expected RBX = %#06RX64, got %#06RX64 (dst)", Ctx.rbx.u, TrapFrame.Ctx.rbx.u); 837 if ( (TrapFrame.Ctx.rflags.u16 & paTests[i].fEflCheck) 838 != (paTests[i].paSubTests[k].fEflOut & paTests[i].fEflCheck)) 839 Bs3TestFailedF("Expected EFLAGS = %#06RX32, got %#06RX32 (output)", 840 paTests[i].paSubTests[k].fEflOut & paTests[i].fEflCheck, 841 TrapFrame.Ctx.rflags.u16 & paTests[i].fEflCheck); 842 843 if (TrapFrame.Ctx.rdx.u != Ctx.rdx.u) 844 Bs3TestFailedF("Expected RDX = %#06RX64, got %#06RX64 (src)", Ctx.rdx.u, TrapFrame.Ctx.rdx.u); 845 if (TrapFrame.Ctx.rsp.u != Ctx.rsp.u) 846 Bs3TestFailedF("Expected RSP = %#06RX64, got %#06RX64", Ctx.rsp.u, TrapFrame.Ctx.rsp.u); 847 if (TrapFrame.Ctx.rbp.u != Ctx.rbp.u) 848 Bs3TestFailedF("Expected RBP = %#06RX64, got %#06RX64", Ctx.rbp.u, TrapFrame.Ctx.rbp.u); 849 if (TrapFrame.Ctx.rsi.u != Ctx.rsi.u) 850 Bs3TestFailedF("Expected RSI = %#06RX64, got %#06RX64", Ctx.rsi.u, TrapFrame.Ctx.rsi.u); 851 if (TrapFrame.Ctx.rdi.u != Ctx.rdi.u) 852 Bs3TestFailedF("Expected RDI = %#06RX64, got %#06RX64", Ctx.rdi.u, TrapFrame.Ctx.rdi.u); 853 if (uMemSrc != uMemSrcExpect) 854 Bs3TestFailedF("Expected uMemSrc = %#06RX64, got %#06RX64", (uint64_t)uMemSrcExpect, (uint64_t)uMemSrc); 855 } 856 } 857 } 858 Ctx.rflags.u16 &= ~X86_EFL_STATUS_BITS; 859 } 860 861 return 0; 862 } 863 864 865 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_bsf_tzcnt)(uint8_t bMode) 866 { 867 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsBsf16[] = 868 { 869 { 0, /* -> */ 0, true, X86_EFL_ZF }, 870 { ~(RTCCUINTXREG)UINT16_MAX, /* -> */ 0, true, X86_EFL_ZF }, 871 { ~(RTCCUINTXREG)0, /* -> */ 0, false, 0 }, 872 { ~(RTCCUINTXREG)1, /* -> */ 1, false, 0 }, 873 { UINT16_C(0x8000), /* -> */ 15, false, 0 }, 874 { UINT16_C(0x4560), /* -> */ 5, false, 0 }, 875 }; 876 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsTzCnt16[] = 877 { 878 { 0, /* -> */ 16, false, X86_EFL_CF }, 879 { ~(RTCCUINTXREG)UINT16_MAX, /* -> */ 16, false, X86_EFL_CF }, 880 { ~(RTCCUINTXREG)0, /* -> */ 0, false, X86_EFL_ZF }, 881 { ~(RTCCUINTXREG)1, /* -> */ 1, false, 0 }, 882 { UINT16_C(0x8000), /* -> */ 15, false, 0 }, 883 { UINT16_C(0x4560), /* -> */ 5, false, 0 }, 884 }; 885 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsBsf32[] = 886 { 887 { 0, /* -> */ 0, true, X86_EFL_ZF }, 888 #if ARCH_BITS == 64 889 { ~(RTCCUINTXREG)UINT32_MAX, /* -> */ 0, true, X86_EFL_ZF }, 890 #endif 891 { ~(RTCCUINTXREG)0, /* -> */ 0, false, 0 }, 892 { ~(RTCCUINTXREG)1, /* -> */ 1, false, 0 }, 893 { UINT16_C(0x8000), /* -> */ 15, false, 0 }, 894 { UINT16_C(0x4560), /* -> */ 5, false, 0 }, 895 { UINT32_C(0x80000000), /* -> */ 31, false, 0 }, 896 { UINT32_C(0x45600000), /* -> */ 21, false, 0 }, 897 }; 898 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsTzCnt32[] = 899 { 900 { 0, /* -> */ 32, false, X86_EFL_CF }, 901 #if ARCH_BITS == 64 902 { ~(RTCCUINTXREG)UINT32_MAX, /* -> */ 32, false, X86_EFL_CF }, 903 #endif 904 { ~(RTCCUINTXREG)0, /* -> */ 0, false, X86_EFL_ZF }, 905 { ~(RTCCUINTXREG)1, /* -> */ 1, false, 0 }, 906 { UINT16_C(0x8000), /* -> */ 15, false, 0 }, 907 { UINT16_C(0x4560), /* -> */ 5, false, 0 }, 908 { UINT32_C(0x80000000), /* -> */ 31, false, 0 }, 909 { UINT32_C(0x45600000), /* -> */ 21, false, 0 }, 910 }; 911 #if ARCH_BITS == 64 912 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsBsf64[] = 913 { 914 { 0, /* -> */ 0, true, X86_EFL_ZF }, 915 { ~(RTCCUINTXREG)0, /* -> */ 0, false, 0 }, 916 { ~(RTCCUINTXREG)1, /* -> */ 1, false, 0 }, 917 { UINT16_C(0x8000), /* -> */ 15, false, 0 }, 918 { UINT16_C(0x4560), /* -> */ 5, false, 0 }, 919 { UINT32_C(0x80000000), /* -> */ 31, false, 0 }, 920 { UINT32_C(0x45600000), /* -> */ 21, false, 0 }, 921 { UINT64_C(0x8000000000000000), /* -> */ 63, false, 0 }, 922 { UINT64_C(0x4560000000000000), /* -> */ 53, false, 0 }, 923 }; 924 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsTzCnt64[] = 925 { 926 { 0, /* -> */ 64, false, X86_EFL_CF }, 927 { ~(RTCCUINTXREG)0, /* -> */ 0, false, X86_EFL_ZF }, 928 { ~(RTCCUINTXREG)1, /* -> */ 1, false, 0 }, 929 { UINT16_C(0x8000), /* -> */ 15, false, 0 }, 930 { UINT16_C(0x4560), /* -> */ 5, false, 0 }, 931 { UINT32_C(0x80000000), /* -> */ 31, false, 0 }, 932 { UINT32_C(0x45600000), /* -> */ 21, false, 0 }, 933 { UINT64_C(0x8000000000000000), /* -> */ 63, false, 0 }, 934 { UINT64_C(0x4560000000000000), /* -> */ 53, false, 0 }, 646 #if ARCH_BITS >= 32 647 static BS3CPUINSTR3_TEST1_T const s_aTests32[] = 648 { 649 { bs3CpuInstr3_xorps_XMM1_XMM2_icebp_c32, RM_REG, T_SSE2, 3, 1, 1, 2, RT_ELEMENTS(s_aValues128), s_aValues128 }, 935 650 }; 936 651 #endif 937 static BS3CPUINSTR2_TEST_BITSCAN_T s_aTests[] = 938 { 939 { BS3_CMN_NM(bs3CpuInstr2_bsf_AX_BX_ud2), false, 3 + (ARCH_BITS != 16), 16, X86_EFL_ZF, 940 RT_ELEMENTS(s_aSubTestsBsf16), s_aSubTestsBsf16 }, 941 { BS3_CMN_NM(bs3CpuInstr2_bsf_AX_FSxBX_ud2), true, 4 + (ARCH_BITS != 16), 16, X86_EFL_ZF, 942 RT_ELEMENTS(s_aSubTestsBsf16), s_aSubTestsBsf16 }, 943 { BS3_CMN_NM(bs3CpuInstr2_bsf_EAX_EBX_ud2), false, 3 + (ARCH_BITS == 16), 32, X86_EFL_ZF, 944 RT_ELEMENTS(s_aSubTestsBsf32), s_aSubTestsBsf32 }, 945 { BS3_CMN_NM(bs3CpuInstr2_bsf_EAX_FSxBX_ud2), true, 4 + (ARCH_BITS == 16), 32, X86_EFL_ZF, 946 RT_ELEMENTS(s_aSubTestsBsf32), s_aSubTestsBsf32 }, 947 #if ARCH_BITS == 64 948 { BS3_CMN_NM(bs3CpuInstr2_bsf_RAX_RBX_ud2), false, 4, 64, X86_EFL_ZF, 949 RT_ELEMENTS(s_aSubTestsBsf64), s_aSubTestsBsf64 }, 950 { BS3_CMN_NM(bs3CpuInstr2_bsf_RAX_FSxBX_ud2), true, 5, 64, X86_EFL_ZF, 951 RT_ELEMENTS(s_aSubTestsBsf64), s_aSubTestsBsf64 }, 952 #endif 953 /* f2 prefixed variant: */ 954 { BS3_CMN_NM(bs3CpuInstr2_f2_bsf_AX_BX_ud2), false, 4 + (ARCH_BITS != 16), 16, X86_EFL_ZF, 955 RT_ELEMENTS(s_aSubTestsBsf16), s_aSubTestsBsf16 }, 956 { BS3_CMN_NM(bs3CpuInstr2_f2_bsf_AX_FSxBX_ud2), true, 5 + (ARCH_BITS != 16), 16, X86_EFL_ZF, 957 RT_ELEMENTS(s_aSubTestsBsf16), s_aSubTestsBsf16 }, 958 { BS3_CMN_NM(bs3CpuInstr2_f2_bsf_EAX_EBX_ud2), false, 4 + (ARCH_BITS == 16), 32, X86_EFL_ZF, 959 RT_ELEMENTS(s_aSubTestsBsf32), s_aSubTestsBsf32 }, 960 { BS3_CMN_NM(bs3CpuInstr2_f2_bsf_EAX_FSxBX_ud2), true, 5 + (ARCH_BITS == 16), 32, X86_EFL_ZF, 961 RT_ELEMENTS(s_aSubTestsBsf32), s_aSubTestsBsf32 }, 962 #if ARCH_BITS == 64 963 { BS3_CMN_NM(bs3CpuInstr2_f2_bsf_RAX_RBX_ud2), false, 5, 64, X86_EFL_ZF, 964 RT_ELEMENTS(s_aSubTestsBsf64), s_aSubTestsBsf64 }, 965 { BS3_CMN_NM(bs3CpuInstr2_f2_bsf_RAX_FSxBX_ud2), true, 6, 64, X86_EFL_ZF, 966 RT_ELEMENTS(s_aSubTestsBsf64), s_aSubTestsBsf64 }, 967 #endif 968 969 /* tzcnt: */ 970 { BS3_CMN_NM(bs3CpuInstr2_tzcnt_AX_BX_ud2), false, 4 + (ARCH_BITS != 16), 16, X86_EFL_ZF | X86_EFL_CF, 971 RT_ELEMENTS(s_aSubTestsTzCnt16), s_aSubTestsTzCnt16 }, 972 { BS3_CMN_NM(bs3CpuInstr2_tzcnt_AX_FSxBX_ud2), true, 5 + (ARCH_BITS != 16), 16, X86_EFL_ZF | X86_EFL_CF, 973 RT_ELEMENTS(s_aSubTestsTzCnt16), s_aSubTestsTzCnt16 }, 974 { BS3_CMN_NM(bs3CpuInstr2_tzcnt_EAX_EBX_ud2), false, 4 + (ARCH_BITS == 16), 32, X86_EFL_ZF | X86_EFL_CF, 975 RT_ELEMENTS(s_aSubTestsTzCnt32), s_aSubTestsTzCnt32 }, 976 { BS3_CMN_NM(bs3CpuInstr2_tzcnt_EAX_FSxBX_ud2), true, 5 + (ARCH_BITS == 16), 32, X86_EFL_ZF | X86_EFL_CF, 977 RT_ELEMENTS(s_aSubTestsTzCnt32), s_aSubTestsTzCnt32 }, 978 #if ARCH_BITS == 64 979 { BS3_CMN_NM(bs3CpuInstr2_tzcnt_RAX_RBX_ud2), false, 5, 64, X86_EFL_ZF | X86_EFL_CF, 980 RT_ELEMENTS(s_aSubTestsTzCnt64), s_aSubTestsTzCnt64 }, 981 { BS3_CMN_NM(bs3CpuInstr2_tzcnt_RAX_FSxBX_ud2), true, 6, 64, X86_EFL_ZF | X86_EFL_CF, 982 RT_ELEMENTS(s_aSubTestsTzCnt64), s_aSubTestsTzCnt64 }, 983 #endif 984 /* f2 prefixed tzcnt variant (last prefix (f3) should prevail): */ 985 { BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_AX_BX_ud2), false, 5 + (ARCH_BITS != 16), 16, X86_EFL_ZF | X86_EFL_CF, 986 RT_ELEMENTS(s_aSubTestsTzCnt16), s_aSubTestsTzCnt16 }, 987 { BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_AX_FSxBX_ud2), true, 6 + (ARCH_BITS != 16), 16, X86_EFL_ZF | X86_EFL_CF, 988 RT_ELEMENTS(s_aSubTestsTzCnt16), s_aSubTestsTzCnt16 }, 989 { BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_EAX_EBX_ud2), false, 5 + (ARCH_BITS == 16), 32, X86_EFL_ZF | X86_EFL_CF, 990 RT_ELEMENTS(s_aSubTestsTzCnt32), s_aSubTestsTzCnt32 }, 991 { BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_EAX_FSxBX_ud2),true, 6 + (ARCH_BITS == 16), 32, X86_EFL_ZF | X86_EFL_CF, 992 RT_ELEMENTS(s_aSubTestsTzCnt32), s_aSubTestsTzCnt32 }, 993 #if ARCH_BITS == 64 994 { BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_RAX_RBX_ud2), false, 6, 64, X86_EFL_ZF | X86_EFL_CF, 995 RT_ELEMENTS(s_aSubTestsTzCnt64), s_aSubTestsTzCnt64 }, 996 { BS3_CMN_NM(bs3CpuInstr2_f2_tzcnt_RAX_FSxBX_ud2),true, 7, 64, X86_EFL_ZF | X86_EFL_CF, 997 RT_ELEMENTS(s_aSubTestsTzCnt64), s_aSubTestsTzCnt64 }, 998 #endif 999 }; 1000 1001 uint32_t uStdExtFeatEbx = 0; 1002 if (g_uBs3CpuDetected & BS3CPU_F_CPUID) 1003 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &uStdExtFeatEbx, NULL, NULL); 1004 if (!(uStdExtFeatEbx & X86_CPUID_STEXT_FEATURE_EBX_BMI1)) 1005 { 1006 unsigned i = RT_ELEMENTS(s_aTests); 1007 while (i-- > 0) 1008 if (s_aTests[i].fEflCheck & X86_EFL_CF) 1009 { 1010 s_aTests[i].fEflCheck = X86_EFL_ZF; 1011 switch (s_aTests[i].cOpBits) 1012 { 1013 case 16: 1014 s_aTests[i].cSubTests = RT_ELEMENTS(s_aSubTestsBsf16); 1015 s_aTests[i].paSubTests = s_aSubTestsBsf16; 1016 break; 1017 case 32: 1018 s_aTests[i].cSubTests = RT_ELEMENTS(s_aSubTestsBsf32); 1019 s_aTests[i].paSubTests = s_aSubTestsBsf32; 1020 break; 1021 #if ARCH_BITS == 64 1022 case 64: 1023 s_aTests[i].cSubTests = RT_ELEMENTS(s_aSubTestsBsf64); 1024 s_aTests[i].paSubTests = s_aSubTestsBsf64; 1025 break; 1026 #endif 1027 } 1028 } 1029 Bs3TestPrintf("tzcnt not supported\n"); 1030 } 1031 1032 return bs3CpuInstr2_BitScan(bMode, s_aTests, RT_ELEMENTS(s_aTests)); 1033 } 1034 1035 1036 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_bsr_lzcnt)(uint8_t bMode) 1037 { 1038 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsBsr16[] = 1039 { 1040 { 0, /* -> */ 0, true, X86_EFL_ZF }, 1041 { ~(RTCCUINTXREG)UINT16_MAX, /* -> */ 0, true, X86_EFL_ZF }, 1042 { ~(RTCCUINTXREG)0, /* -> */ 15, false, 0 }, 1043 { ~(RTCCUINTXREG)1, /* -> */ 15, false, 0 }, 1044 { UINT16_C(0x0001), /* -> */ 0, false, 0 }, 1045 { UINT16_C(0x0002), /* -> */ 1, false, 0 }, 1046 { UINT16_C(0x4560), /* -> */ 14, false, 0 }, 1047 }; 1048 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsLzCnt16[] = 1049 { 1050 { 0, /* -> */ 16, false, X86_EFL_CF }, 1051 { ~(RTCCUINTXREG)UINT16_MAX, /* -> */ 16, false, X86_EFL_CF }, 1052 { ~(RTCCUINTXREG)0, /* -> */ 0, false, X86_EFL_ZF }, 1053 { ~(RTCCUINTXREG)1, /* -> */ 0, false, X86_EFL_ZF }, 1054 { UINT16_C(0x8000), /* -> */ 0, false, X86_EFL_ZF }, 1055 { UINT16_C(0x4560), /* -> */ 1, false, 0 }, 1056 { UINT16_C(0x003f), /* -> */ 10, false, 0 }, 1057 { UINT16_C(0x0001), /* -> */ 15, false, 0 }, 1058 }; 1059 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsBsr32[] = 1060 { 1061 { 0, /* -> */ 0, true, X86_EFL_ZF }, 1062 #if ARCH_BITS == 64 1063 { ~(RTCCUINTXREG)UINT32_MAX, /* -> */ 0, true, X86_EFL_ZF }, 1064 #endif 1065 { ~(RTCCUINTXREG)0, /* -> */ 31, false, 0 }, 1066 { ~(RTCCUINTXREG)1, /* -> */ 31, false, 0 }, 1067 { 1, /* -> */ 0, false, 0 }, 1068 { 2, /* -> */ 1, false, 0 }, 1069 { UINT16_C(0x8000), /* -> */ 15, false, 0 }, 1070 { UINT16_C(0x4560), /* -> */ 14, false, 0 }, 1071 { UINT32_C(0x80000000), /* -> */ 31, false, 0 }, 1072 { UINT32_C(0x45600000), /* -> */ 30, false, 0 }, 1073 }; 1074 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsLzCnt32[] = 1075 { 1076 { 0, /* -> */ 32, false, X86_EFL_CF }, 1077 #if ARCH_BITS == 64 1078 { ~(RTCCUINTXREG)UINT32_MAX, /* -> */ 32, false, X86_EFL_CF }, 1079 #endif 1080 { ~(RTCCUINTXREG)0, /* -> */ 0, false, X86_EFL_ZF }, 1081 { ~(RTCCUINTXREG)1, /* -> */ 0, false, X86_EFL_ZF }, 1082 { 1, /* -> */ 31, false, 0 }, 1083 { 2, /* -> */ 30, false, 0}, 1084 { UINT16_C(0x8000), /* -> */ 16, false, 0 }, 1085 { UINT16_C(0x4560), /* -> */ 17, false, 0 }, 1086 { UINT32_C(0x80000000), /* -> */ 0, false, X86_EFL_ZF }, 1087 { UINT32_C(0x45600000), /* -> */ 1, false, 0 }, 1088 { UINT32_C(0x0000ffff), /* -> */ 16, false, 0 }, 1089 }; 1090 #if ARCH_BITS == 64 1091 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsBsr64[] = 1092 { 1093 { 0, /* -> */ 0, true, X86_EFL_ZF }, 1094 { ~(RTCCUINTXREG)0, /* -> */ 63, false, 0 }, 1095 { ~(RTCCUINTXREG)1, /* -> */ 63, false, 0 }, 1096 { 1, /* -> */ 0, false, 0 }, 1097 { 2, /* -> */ 1, false, 0 }, 1098 { UINT16_C(0x8000), /* -> */ 15, false, 0 }, 1099 { UINT16_C(0x4560), /* -> */ 14, false, 0 }, 1100 { UINT32_C(0x80000000), /* -> */ 31, false, 0 }, 1101 { UINT32_C(0x45600000), /* -> */ 30, false, 0 }, 1102 { UINT64_C(0x8000000000000000), /* -> */ 63, false, 0 }, 1103 { UINT64_C(0x0045600000000000), /* -> */ 54, false, 0 }, 1104 }; 1105 static BS3CPUINSTR2_SUBTEST_BITSCAN_T const s_aSubTestsLzCnt64[] = 1106 { 1107 { 0, /* -> */ 64, false, X86_EFL_CF }, 1108 { ~(RTCCUINTXREG)0, /* -> */ 0, false, X86_EFL_ZF }, 1109 { ~(RTCCUINTXREG)1, /* -> */ 0, false, X86_EFL_ZF }, 1110 { 1, /* -> */ 63, false, 0 }, 1111 { 2, /* -> */ 62, false, 0 }, 1112 { UINT16_C(0x8000), /* -> */ 48, false, 0 }, 1113 { UINT16_C(0x4560), /* -> */ 49, false, 0 }, 1114 { UINT32_C(0x80000000), /* -> */ 32, false, 0 }, 1115 { UINT32_C(0x45600000), /* -> */ 33, false, 0 }, 1116 { UINT64_C(0x8000000000000000), /* -> */ 0, false, X86_EFL_ZF }, 1117 { UINT64_C(0x4560000000000000), /* -> */ 1, false, 0 }, 1118 { UINT64_C(0x0045600000000000), /* -> */ 9, false, 0 }, 652 653 #if ARCH_BITS >= 64 654 static BS3CPUINSTR3_TEST1_T const s_aTests64[] = 655 { 656 { bs3CpuInstr3_xorps_XMM1_XMM2_icebp_c64, RM_REG, T_SSE2, 3, 1, 1, 2, RT_ELEMENTS(s_aValues128), s_aValues128 }, 1119 657 }; 1120 658 #endif 1121 static BS3CPUINSTR2_TEST_BITSCAN_T s_aTests[] = 1122 { 1123 { BS3_CMN_NM(bs3CpuInstr2_bsr_AX_BX_ud2), false, 3 + (ARCH_BITS != 16), 16, X86_EFL_ZF, 1124 RT_ELEMENTS(s_aSubTestsBsr16), s_aSubTestsBsr16 }, 1125 { BS3_CMN_NM(bs3CpuInstr2_bsr_AX_FSxBX_ud2), true, 4 + (ARCH_BITS != 16), 16, X86_EFL_ZF, 1126 RT_ELEMENTS(s_aSubTestsBsr16), s_aSubTestsBsr16 }, 1127 { BS3_CMN_NM(bs3CpuInstr2_bsr_EAX_EBX_ud2), false, 3 + (ARCH_BITS == 16), 32, X86_EFL_ZF, 1128 RT_ELEMENTS(s_aSubTestsBsr32), s_aSubTestsBsr32 }, 1129 { BS3_CMN_NM(bs3CpuInstr2_bsr_EAX_FSxBX_ud2), true, 4 + (ARCH_BITS == 16), 32, X86_EFL_ZF, 1130 RT_ELEMENTS(s_aSubTestsBsr32), s_aSubTestsBsr32 }, 1131 #if ARCH_BITS == 64 1132 { BS3_CMN_NM(bs3CpuInstr2_bsr_RAX_RBX_ud2), false, 4, 64, X86_EFL_ZF, 1133 RT_ELEMENTS(s_aSubTestsBsr64), s_aSubTestsBsr64 }, 1134 { BS3_CMN_NM(bs3CpuInstr2_bsr_RAX_FSxBX_ud2), true, 5, 64, X86_EFL_ZF, 1135 RT_ELEMENTS(s_aSubTestsBsr64), s_aSubTestsBsr64 }, 1136 #endif 1137 /* f2 prefixed variant: */ 1138 { BS3_CMN_NM(bs3CpuInstr2_f2_bsr_AX_BX_ud2), false, 4 + (ARCH_BITS != 16), 16, X86_EFL_ZF, 1139 RT_ELEMENTS(s_aSubTestsBsr16), s_aSubTestsBsr16 }, 1140 { BS3_CMN_NM(bs3CpuInstr2_f2_bsr_AX_FSxBX_ud2), true, 5 + (ARCH_BITS != 16), 16, X86_EFL_ZF, 1141 RT_ELEMENTS(s_aSubTestsBsr16), s_aSubTestsBsr16 }, 1142 { BS3_CMN_NM(bs3CpuInstr2_f2_bsr_EAX_EBX_ud2), false, 4 + (ARCH_BITS == 16), 32, X86_EFL_ZF, 1143 RT_ELEMENTS(s_aSubTestsBsr32), s_aSubTestsBsr32 }, 1144 { BS3_CMN_NM(bs3CpuInstr2_f2_bsr_EAX_FSxBX_ud2), true, 5 + (ARCH_BITS == 16), 32, X86_EFL_ZF, 1145 RT_ELEMENTS(s_aSubTestsBsr32), s_aSubTestsBsr32 }, 1146 #if ARCH_BITS == 64 1147 { BS3_CMN_NM(bs3CpuInstr2_f2_bsr_RAX_RBX_ud2), false, 5, 64, X86_EFL_ZF, 1148 RT_ELEMENTS(s_aSubTestsBsr64), s_aSubTestsBsr64 }, 1149 { BS3_CMN_NM(bs3CpuInstr2_f2_bsr_RAX_FSxBX_ud2), true, 6, 64, X86_EFL_ZF, 1150 RT_ELEMENTS(s_aSubTestsBsr64), s_aSubTestsBsr64 }, 1151 #endif 1152 1153 /* lzcnt: */ 1154 { BS3_CMN_NM(bs3CpuInstr2_lzcnt_AX_BX_ud2), false, 4 + (ARCH_BITS != 16), 16, X86_EFL_ZF | X86_EFL_CF, 1155 RT_ELEMENTS(s_aSubTestsLzCnt16), s_aSubTestsLzCnt16 }, 1156 { BS3_CMN_NM(bs3CpuInstr2_lzcnt_AX_FSxBX_ud2), true, 5 + (ARCH_BITS != 16), 16, X86_EFL_ZF | X86_EFL_CF, 1157 RT_ELEMENTS(s_aSubTestsLzCnt16), s_aSubTestsLzCnt16 }, 1158 { BS3_CMN_NM(bs3CpuInstr2_lzcnt_EAX_EBX_ud2), false, 4 + (ARCH_BITS == 16), 32, X86_EFL_ZF | X86_EFL_CF, 1159 RT_ELEMENTS(s_aSubTestsLzCnt32), s_aSubTestsLzCnt32 }, 1160 { BS3_CMN_NM(bs3CpuInstr2_lzcnt_EAX_FSxBX_ud2), true, 5 + (ARCH_BITS == 16), 32, X86_EFL_ZF | X86_EFL_CF, 1161 RT_ELEMENTS(s_aSubTestsLzCnt32), s_aSubTestsLzCnt32 }, 1162 #if ARCH_BITS == 64 1163 { BS3_CMN_NM(bs3CpuInstr2_lzcnt_RAX_RBX_ud2), false, 5, 64, X86_EFL_ZF | X86_EFL_CF, 1164 RT_ELEMENTS(s_aSubTestsLzCnt64), s_aSubTestsLzCnt64 }, 1165 { BS3_CMN_NM(bs3CpuInstr2_lzcnt_RAX_FSxBX_ud2), true, 6, 64, X86_EFL_ZF | X86_EFL_CF, 1166 RT_ELEMENTS(s_aSubTestsLzCnt64), s_aSubTestsLzCnt64 }, 1167 #endif 1168 /* f2 prefixed lzcnt variant (last prefix (f3) should prevail): */ 1169 { BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_AX_BX_ud2), false, 5 + (ARCH_BITS != 16), 16, X86_EFL_ZF | X86_EFL_CF, 1170 RT_ELEMENTS(s_aSubTestsLzCnt16), s_aSubTestsLzCnt16 }, 1171 { BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_AX_FSxBX_ud2), true, 6 + (ARCH_BITS != 16), 16, X86_EFL_ZF | X86_EFL_CF, 1172 RT_ELEMENTS(s_aSubTestsLzCnt16), s_aSubTestsLzCnt16 }, 1173 { BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_EAX_EBX_ud2), false, 5 + (ARCH_BITS == 16), 32, X86_EFL_ZF | X86_EFL_CF, 1174 RT_ELEMENTS(s_aSubTestsLzCnt32), s_aSubTestsLzCnt32 }, 1175 { BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_EAX_FSxBX_ud2),true, 6 + (ARCH_BITS == 16), 32, X86_EFL_ZF | X86_EFL_CF, 1176 RT_ELEMENTS(s_aSubTestsLzCnt32), s_aSubTestsLzCnt32 }, 1177 #if ARCH_BITS == 64 1178 { BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_RAX_RBX_ud2), false, 6, 64, X86_EFL_ZF | X86_EFL_CF, 1179 RT_ELEMENTS(s_aSubTestsLzCnt64), s_aSubTestsLzCnt64 }, 1180 { BS3_CMN_NM(bs3CpuInstr2_f2_lzcnt_RAX_FSxBX_ud2),true, 7, 64, X86_EFL_ZF | X86_EFL_CF, 1181 RT_ELEMENTS(s_aSubTestsLzCnt64), s_aSubTestsLzCnt64 }, 1182 #endif 1183 }; 1184 1185 uint32_t uExtFeatEcx = 0; 1186 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES) 1187 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &uExtFeatEcx, NULL); 1188 if (!(uExtFeatEcx & X86_CPUID_AMD_FEATURE_ECX_ABM)) 1189 { 1190 unsigned i = RT_ELEMENTS(s_aTests); 1191 while (i-- > 0) 1192 if (s_aTests[i].fEflCheck & X86_EFL_CF) 1193 { 1194 s_aTests[i].fEflCheck = X86_EFL_ZF; 1195 switch (s_aTests[i].cOpBits) 1196 { 1197 case 16: 1198 s_aTests[i].cSubTests = RT_ELEMENTS(s_aSubTestsBsr16); 1199 s_aTests[i].paSubTests = s_aSubTestsBsr16; 1200 break; 1201 case 32: 1202 s_aTests[i].cSubTests = RT_ELEMENTS(s_aSubTestsBsr32); 1203 s_aTests[i].paSubTests = s_aSubTestsBsr32; 1204 break; 1205 #if ARCH_BITS == 64 1206 case 64: 1207 s_aTests[i].cSubTests = RT_ELEMENTS(s_aSubTestsBsr64); 1208 s_aTests[i].paSubTests = s_aSubTestsBsr64; 1209 break; 1210 #endif 1211 } 1212 } 1213 Bs3TestPrintf("lzcnt not supported\n"); 1214 } 1215 1216 return bs3CpuInstr2_BitScan(bMode, s_aTests, RT_ELEMENTS(s_aTests)); 1217 } 1218 1219 1220 /** 1221 * RORX 1222 */ 1223 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_rorx)(uint8_t bMode) 1224 { 1225 static const struct 1226 { 1227 FPFNBS3FAR pfnWorker; 1228 bool fMemSrc; 1229 bool fOkay; 1230 RTCCUINTXREG uIn; 1231 RTCCUINTXREG uOut; 1232 } s_aTests[] = 1233 { 1234 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 1235 { BS3_CMN_NM(bs3CpuInstr2_rorx_RBX_RDX_2_icebp), false, true, // #0 1236 0, /* -> */ 0 }, 1237 { BS3_CMN_NM(bs3CpuInstr2_rorx_RBX_RDX_2_icebp), false, true, // #1 1238 ~(RTCCUINTXREG)2, /* -> */ ~(RTCCUINTXREG)0 >> 1 }, 1239 { BS3_CMN_NM(bs3CpuInstr2_rorx_RBX_DSxDI_68_icebp), true, true, // #2 1240 0, /* -> */ 0 }, 1241 { BS3_CMN_NM(bs3CpuInstr2_rorx_RBX_DSxDI_68_icebp), true, true, // #3 1242 ~(RTCCUINTXREG)2, /* -> */ (RTCCUINTXREG_MAX >> 4) | (~(RTCCUINTXREG)2 << (sizeof(RTCCUINTXREG) * 8 - 4)) }, 1243 1244 /* 32 bits register width: */ 1245 { BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_EDX_2_icebp), false, true, // #4 1246 0, /* -> */ 0 }, 1247 { BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_EDX_2_icebp), false, true, // #5 1248 ~(RTCCUINTXREG)2, /* -> */ (RTCCUINTXREG)(~(uint32_t)0 >> 1) }, 1249 { BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_DSxDI_36_icebp), true, true, // #6 1250 0, /* -> */ 0 }, 1251 { BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_DSxDI_36_icebp), true, true, // #7 1252 ~(RTCCUINTXREG)2, /* -> */ (RTCCUINTXREG)UINT32_C(0xdfffffff) }, 1253 1254 /* encoding tests: */ 1255 { BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_EDX_2_icebp_L1), false, false, // #8 1256 RTCCUINTXREG_MAX, /* -> */ 0 }, 1257 { BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_EDX_2_icebp_V1), false, false, // #9 1258 RTCCUINTXREG_MAX, /* -> */ 0 }, 1259 { BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_EDX_2_icebp_V15), false, false, // #10 1260 RTCCUINTXREG_MAX, /* -> */ 0 }, 1261 # if ARCH_BITS == 64 /* The VEX.X=0 encoding mean LES instruction in 32-bit and 16-bit mode. */ 1262 { BS3_CMN_NM(bs3CpuInstr2_rorx_EBX_EDX_2_icebp_X1), false, true, // #11 1263 UINT32_C(0xf1e2d3c5), /* -> */ (RTCCUINTXREG)UINT32_C(0x7c78b4f1) }, 1264 # endif 1265 }; 1266 1267 BS3REGCTX Ctx; 1268 BS3TRAPFRAME TrapFrame; 1269 unsigned i, j; 1270 uint32_t uStdExtFeatEbx = 0; 1271 bool fSupportsRorX; 1272 1273 if (g_uBs3CpuDetected & BS3CPU_F_CPUID) 1274 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &uStdExtFeatEbx, NULL, NULL); 1275 fSupportsRorX = RT_BOOL(uStdExtFeatEbx & X86_CPUID_STEXT_FEATURE_EBX_BMI2); 1276 1277 /* Ensure the structures are allocated before we sample the stack pointer. */ 1278 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 1279 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 1280 1281 /* 1282 * Create test context. 1283 */ 1284 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 1285 1286 /* 1287 * Do the tests twice, first with all flags set, then once again with 1288 * flags cleared. The flags are not supposed to be touched at all. 1289 */ 1290 Ctx.rflags.u16 |= X86_EFL_STATUS_BITS; 1291 for (j = 0; j < 2; j++) 1292 { 1293 for (i = 0; i < RT_ELEMENTS(s_aTests); i++) 1294 { 1295 bool const fOkay = !BS3_MODE_IS_RM_OR_V86(bMode) && s_aTests[i].fOkay && fSupportsRorX; 1296 uint8_t const bExpectXcpt = fOkay ? X86_XCPT_DB : X86_XCPT_UD; 1297 uint64_t uExpectRbx, uExpectRip; 1298 RTCCUINTXREG uMemSrc, uMemSrcExpect; 1299 Ctx.rbx.uCcXReg = RTCCUINTXREG_MAX * 1019; 1300 if (!s_aTests[i].fMemSrc) 1301 { 1302 Ctx.rdx.uCcXReg = s_aTests[i].uIn; 1303 uMemSrcExpect = uMemSrc = ~s_aTests[i].uIn; 1304 } 1305 else 1306 { 1307 Ctx.rdx.uCcXReg = ~s_aTests[i].uIn; 1308 uMemSrcExpect = uMemSrc = s_aTests[i].uIn; 1309 Bs3RegCtxSetGrpDsFromCurPtr(&Ctx, &Ctx.rdi, &uMemSrc); 1310 } 1311 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, s_aTests[i].pfnWorker); 1312 uExpectRbx = fOkay ? s_aTests[i].uOut : Ctx.rbx.u; 1313 uExpectRip = Ctx.rip.u + (fOkay ? 6 + 1 : 0); 1314 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 1315 1316 if ( TrapFrame.bXcpt != bExpectXcpt 1317 || TrapFrame.Ctx.rip.u != uExpectRip 1318 || TrapFrame.Ctx.rdx.u != Ctx.rdx.u 1319 || TrapFrame.Ctx.rbx.u != uExpectRbx 1320 /* check that nothing else really changed: */ 1321 || (TrapFrame.Ctx.rflags.u16 & X86_EFL_STATUS_BITS) != (Ctx.rflags.u16 & X86_EFL_STATUS_BITS) 1322 || TrapFrame.Ctx.rax.u != Ctx.rax.u 1323 || TrapFrame.Ctx.rcx.u != Ctx.rcx.u 1324 || TrapFrame.Ctx.rsp.u != Ctx.rsp.u 1325 || TrapFrame.Ctx.rbp.u != Ctx.rbp.u 1326 || TrapFrame.Ctx.rsi.u != Ctx.rsi.u 1327 || TrapFrame.Ctx.rdi.u != Ctx.rdi.u 1328 || uMemSrc != uMemSrcExpect 1329 ) 1330 { 1331 Bs3TestFailedF("test #%i failed: input %#" RTCCUINTXREG_XFMT, i, s_aTests[i].uIn); 1332 if (TrapFrame.bXcpt != bExpectXcpt) 1333 Bs3TestFailedF("Expected bXcpt = %#x, got %#x", bExpectXcpt, TrapFrame.bXcpt); 1334 if (TrapFrame.Ctx.rip.u != uExpectRip) 1335 Bs3TestFailedF("Expected RIP = %#06RX64, got %#06RX64", uExpectRip, TrapFrame.Ctx.rip.u); 1336 if (TrapFrame.Ctx.rdx.u != Ctx.rdx.u) 1337 Bs3TestFailedF("Expected RDX = %#06RX64, got %#06RX64 (src)", Ctx.rdx.u, TrapFrame.Ctx.rdx.u); 1338 if (TrapFrame.Ctx.rbx.u != uExpectRbx) 1339 Bs3TestFailedF("Expected RBX = %#06RX64, got %#06RX64 (dst)", uExpectRbx, TrapFrame.Ctx.rbx.u); 1340 1341 if ((TrapFrame.Ctx.rflags.u16 & X86_EFL_STATUS_BITS) != (Ctx.rflags.u16 & X86_EFL_STATUS_BITS)) 1342 Bs3TestFailedF("Expected EFLAGS = %#06RX64, got %#06RX64", 1343 Ctx.rflags.u16 & X86_EFL_STATUS_BITS, TrapFrame.Ctx.rflags.u16 & X86_EFL_STATUS_BITS); 1344 if (TrapFrame.Ctx.rax.u != Ctx.rax.u) 1345 Bs3TestFailedF("Expected RAX = %#06RX64, got %#06RX64", Ctx.rax.u, TrapFrame.Ctx.rax.u); 1346 if (TrapFrame.Ctx.rcx.u != Ctx.rcx.u) 1347 Bs3TestFailedF("Expected RCX = %#06RX64, got %#06RX64", Ctx.rcx.u, TrapFrame.Ctx.rcx.u); 1348 if (TrapFrame.Ctx.rsp.u != Ctx.rsp.u) 1349 Bs3TestFailedF("Expected RSP = %#06RX64, got %#06RX64", Ctx.rsp.u, TrapFrame.Ctx.rsp.u); 1350 if (TrapFrame.Ctx.rbp.u != Ctx.rbp.u) 1351 Bs3TestFailedF("Expected RBP = %#06RX64, got %#06RX64", Ctx.rbp.u, TrapFrame.Ctx.rbp.u); 1352 if (TrapFrame.Ctx.rsi.u != Ctx.rsi.u) 1353 Bs3TestFailedF("Expected RSI = %#06RX64, got %#06RX64", Ctx.rsi.u, TrapFrame.Ctx.rsi.u); 1354 if (TrapFrame.Ctx.rdi.u != Ctx.rdi.u) 1355 Bs3TestFailedF("Expected RDI = %#06RX64, got %#06RX64", Ctx.rdi.u, TrapFrame.Ctx.rdi.u); 1356 if (uMemSrc != uMemSrcExpect) 1357 Bs3TestFailedF("Expected uMemSrc = %#06RX64, got %#06RX64", (uint64_t)uMemSrcExpect, (uint64_t)uMemSrc); 1358 } 1359 } 1360 Ctx.rflags.u16 &= ~X86_EFL_STATUS_BITS; 1361 } 1362 1363 return 0; 1364 } 1365 1366 1367 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_andn)(uint8_t bMode) 1368 { 1369 #define ANDN_CHECK_EFLAGS (uint16_t)(X86_EFL_CF | X86_EFL_ZF | X86_EFL_OF | X86_EFL_SF) 1370 #define ANDN_IGNORE_EFLAGS (uint16_t)(X86_EFL_AF | X86_EFL_PF) /* undefined, ignoring for now */ 1371 static const struct 1372 { 1373 FPFNBS3FAR pfnWorker; 1374 bool fMemSrc; 1375 uint8_t cbInstr; 1376 RTCCUINTXREG uSrc1; 1377 RTCCUINTXREG uSrc2; 1378 RTCCUINTXREG uOut; 1379 uint16_t fEFlags; 1380 } s_aTests[] = 1381 { 1382 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 1383 { BS3_CMN_NM(bs3CpuInstr2_andn_RAX_RCX_RBX_icebp), false, 5, // #0 1384 0, 0, /* -> */ 0, X86_EFL_ZF }, 1385 { BS3_CMN_NM(bs3CpuInstr2_andn_RAX_RCX_RBX_icebp), false, 5, // #1 1386 2, ~(RTCCUINTXREG)3, /* -> */ ~(RTCCUINTXREG)3, X86_EFL_SF }, 1387 { BS3_CMN_NM(bs3CpuInstr2_andn_RAX_RCX_FSxBX_icebp), true, 6, // #2 1388 0, 0, /* -> */ 0, X86_EFL_ZF }, 1389 { BS3_CMN_NM(bs3CpuInstr2_andn_RAX_RCX_FSxBX_icebp), true, 6, // #3 1390 2, ~(RTCCUINTXREG)3, /* -> */ ~(RTCCUINTXREG)3, X86_EFL_SF }, 1391 1392 /* 32-bit register width */ 1393 { BS3_CMN_NM(bs3CpuInstr2_andn_EAX_ECX_EBX_icebp), false, 5, // #4 1394 0, 0, /* -> */ 0, X86_EFL_ZF }, 1395 { BS3_CMN_NM(bs3CpuInstr2_andn_EAX_ECX_EBX_icebp), false, 5, // #5 1396 2, ~(RTCCUINTXREG)7, /* -> */ ~(uint32_t)7, X86_EFL_SF }, 1397 { BS3_CMN_NM(bs3CpuInstr2_andn_EAX_ECX_FSxBX_icebp), true, 6, // #6 1398 0, 0, /* -> */ 0, X86_EFL_ZF }, 1399 { BS3_CMN_NM(bs3CpuInstr2_andn_EAX_ECX_FSxBX_icebp), true, 6, // #7 1400 2, ~(RTCCUINTXREG)7, /* -> */ ~(uint32_t)7, X86_EFL_SF }, 1401 1402 }; 1403 1404 BS3REGCTX Ctx; 1405 BS3TRAPFRAME TrapFrame; 1406 unsigned i, j; 1407 uint32_t uStdExtFeatEbx = 0; 1408 bool fSupportsAndN; 1409 1410 if (g_uBs3CpuDetected & BS3CPU_F_CPUID) 1411 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &uStdExtFeatEbx, NULL, NULL); 1412 fSupportsAndN = RT_BOOL(uStdExtFeatEbx & X86_CPUID_STEXT_FEATURE_EBX_BMI1); 1413 1414 /* Ensure the structures are allocated before we sample the stack pointer. */ 1415 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 1416 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 1417 1418 /* 1419 * Create test context. 1420 */ 1421 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 1422 1423 /* 1424 * Do the tests twice, first with all flags set, then once again with 1425 * flags cleared. The flags are not supposed to be touched at all. 1426 */ 1427 Ctx.rflags.u16 |= X86_EFL_STATUS_BITS; 1428 for (j = 0; j < 2; j++) 1429 { 1430 for (i = 0; i < RT_ELEMENTS(s_aTests); i++) 1431 { 1432 bool const fOkay = !BS3_MODE_IS_RM_OR_V86(bMode) && fSupportsAndN; 1433 uint8_t const bExpectXcpt = fOkay ? X86_XCPT_DB : X86_XCPT_UD; 1434 uint64_t uExpectRax, uExpectRip; 1435 RTCCUINTXREG uMemSrc2, uMemSrc2Expect; 1436 1437 Ctx.rax.uCcXReg = RTCCUINTXREG_MAX * 1019; 1438 Ctx.rcx.uCcXReg = s_aTests[i].uSrc1; 1439 if (!s_aTests[i].fMemSrc) 1440 { 1441 Ctx.rbx.uCcXReg = s_aTests[i].uSrc2; 1442 uMemSrc2Expect = uMemSrc2 = ~s_aTests[i].uSrc2; 1443 } 1444 else 1445 { 1446 uMemSrc2Expect = uMemSrc2 = s_aTests[i].uSrc2; 1447 Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, &Ctx.fs, &uMemSrc2); 1448 } 1449 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, s_aTests[i].pfnWorker); 1450 uExpectRax = fOkay ? s_aTests[i].uOut : Ctx.rax.u; 1451 uExpectRip = Ctx.rip.u + (fOkay ? s_aTests[i].cbInstr + 1 : 0); 1452 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 1453 1454 if ( TrapFrame.bXcpt != bExpectXcpt 1455 || TrapFrame.Ctx.rip.u != uExpectRip 1456 || TrapFrame.Ctx.rcx.u != Ctx.rcx.u 1457 || TrapFrame.Ctx.rbx.u != Ctx.rbx.u 1458 || TrapFrame.Ctx.rax.u != uExpectRax 1459 /* check that nothing else really changed: */ 1460 || (TrapFrame.Ctx.rflags.u16 & ANDN_CHECK_EFLAGS) 1461 != ((fOkay ? s_aTests[i].fEFlags : Ctx.rflags.u16) & ANDN_CHECK_EFLAGS) 1462 || (TrapFrame.Ctx.rflags.u16 & ~(ANDN_CHECK_EFLAGS | ANDN_IGNORE_EFLAGS) & X86_EFL_STATUS_BITS) 1463 != (Ctx.rflags.u16 & ~(ANDN_CHECK_EFLAGS | ANDN_IGNORE_EFLAGS) & X86_EFL_STATUS_BITS) 1464 || TrapFrame.Ctx.rdx.u != Ctx.rdx.u 1465 || TrapFrame.Ctx.rsp.u != Ctx.rsp.u 1466 || TrapFrame.Ctx.rbp.u != Ctx.rbp.u 1467 || TrapFrame.Ctx.rsi.u != Ctx.rsi.u 1468 || TrapFrame.Ctx.rdi.u != Ctx.rdi.u 1469 || uMemSrc2 != uMemSrc2Expect 1470 ) 1471 { 1472 Bs3TestFailedF("test #%i failed: input %#" RTCCUINTXREG_XFMT ", %#" RTCCUINTXREG_XFMT, i, s_aTests[i].uSrc1, s_aTests[i].uSrc2); 1473 if (TrapFrame.bXcpt != bExpectXcpt) 1474 Bs3TestFailedF("Expected bXcpt = %#x, got %#x", bExpectXcpt, TrapFrame.bXcpt); 1475 if (TrapFrame.Ctx.rip.u != uExpectRip) 1476 Bs3TestFailedF("Expected RIP = %#06RX64, got %#06RX64", uExpectRip, TrapFrame.Ctx.rip.u); 1477 if (TrapFrame.Ctx.rax.u != uExpectRax) 1478 Bs3TestFailedF("Expected RAX = %#06RX64, got %#06RX64", uExpectRax, TrapFrame.Ctx.rax.u); 1479 if (TrapFrame.Ctx.rcx.u != Ctx.rcx.u) 1480 Bs3TestFailedF("Expected RCX = %#06RX64, got %#06RX64", Ctx.rcx.u, TrapFrame.Ctx.rcx.u); 1481 if (TrapFrame.Ctx.rbx.u != Ctx.rbx.u) 1482 Bs3TestFailedF("Expected RBX = %#06RX64, got %#06RX64 (dst)", Ctx.rbx.u, TrapFrame.Ctx.rbx.u); 1483 if ( (TrapFrame.Ctx.rflags.u16 & ANDN_CHECK_EFLAGS) 1484 != ((fOkay ? s_aTests[i].fEFlags : Ctx.rflags.u16) & ANDN_CHECK_EFLAGS)) 1485 Bs3TestFailedF("Expected EFLAGS = %#06RX32, got %#06RX32 (output)", 1486 (fOkay ? s_aTests[i].fEFlags : Ctx.rflags.u16) & ANDN_CHECK_EFLAGS, TrapFrame.Ctx.rflags.u16 & ANDN_CHECK_EFLAGS); 1487 if ( (TrapFrame.Ctx.rflags.u16 & ~(ANDN_CHECK_EFLAGS | ANDN_IGNORE_EFLAGS) & X86_EFL_STATUS_BITS) 1488 != (Ctx.rflags.u16 & ~(ANDN_CHECK_EFLAGS | ANDN_IGNORE_EFLAGS) & X86_EFL_STATUS_BITS)) 1489 Bs3TestFailedF("Expected EFLAGS = %#06RX32, got %#06RX32 (immutable)", 1490 Ctx.rflags.u16 & ~(ANDN_CHECK_EFLAGS | ANDN_IGNORE_EFLAGS) & X86_EFL_STATUS_BITS, 1491 TrapFrame.Ctx.rflags.u16 & ~(ANDN_CHECK_EFLAGS | ANDN_IGNORE_EFLAGS) & X86_EFL_STATUS_BITS); 1492 1493 if (TrapFrame.Ctx.rdx.u != Ctx.rdx.u) 1494 Bs3TestFailedF("Expected RDX = %#06RX64, got %#06RX64 (src)", Ctx.rdx.u, TrapFrame.Ctx.rdx.u); 1495 if (TrapFrame.Ctx.rsp.u != Ctx.rsp.u) 1496 Bs3TestFailedF("Expected RSP = %#06RX64, got %#06RX64", Ctx.rsp.u, TrapFrame.Ctx.rsp.u); 1497 if (TrapFrame.Ctx.rbp.u != Ctx.rbp.u) 1498 Bs3TestFailedF("Expected RBP = %#06RX64, got %#06RX64", Ctx.rbp.u, TrapFrame.Ctx.rbp.u); 1499 if (TrapFrame.Ctx.rsi.u != Ctx.rsi.u) 1500 Bs3TestFailedF("Expected RSI = %#06RX64, got %#06RX64", Ctx.rsi.u, TrapFrame.Ctx.rsi.u); 1501 if (TrapFrame.Ctx.rdi.u != Ctx.rdi.u) 1502 Bs3TestFailedF("Expected RDI = %#06RX64, got %#06RX64", Ctx.rdi.u, TrapFrame.Ctx.rdi.u); 1503 if (uMemSrc2 != uMemSrc2Expect) 1504 Bs3TestFailedF("Expected uMemSrc2 = %#06RX64, got %#06RX64", (uint64_t)uMemSrc2Expect, (uint64_t)uMemSrc2); 1505 } 1506 } 1507 Ctx.rflags.u16 &= ~X86_EFL_STATUS_BITS; 1508 } 1509 1510 return 0; 1511 } 1512 1513 /* 1514 * For testing BEXTR, SHLX SARX & SHRX. 1515 */ 1516 typedef struct BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T 1517 { 1518 RTCCUINTXREG uSrc1; 1519 RTCCUINTXREG uSrc2; 1520 RTCCUINTXREG uOut; 1521 uint16_t fEflOut; 1522 } BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T; 1523 1524 typedef struct BS3CPUINSTR2_TEST_Gy_Ey_By_T 1525 { 1526 FPFNBS3FAR pfnWorker; 1527 bool fMemSrc; 1528 uint8_t cbInstr; 1529 uint8_t cSubTests; 1530 BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const *paSubTests; 1531 } BS3CPUINSTR2_TEST_Gy_Ey_By_T; 1532 1533 static uint8_t bs3CpuInstr2_Common_Gy_Ey_By(uint8_t bMode, BS3CPUINSTR2_TEST_Gy_Ey_By_T const *paTests, unsigned cTests, 1534 uint32_t fStdExtFeatEbx, uint16_t fEflCheck, uint16_t fEflIgnore) 1535 { 1536 BS3REGCTX Ctx; 1537 BS3TRAPFRAME TrapFrame; 1538 unsigned i, j, k; 1539 uint32_t uStdExtFeatEbx = 0; 1540 bool fSupportsInstr; 1541 1542 fEflCheck &= ~fEflIgnore; 1543 1544 if (g_uBs3CpuDetected & BS3CPU_F_CPUID) 1545 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &uStdExtFeatEbx, NULL, NULL); 1546 fSupportsInstr = RT_BOOL(uStdExtFeatEbx & fStdExtFeatEbx); 1547 1548 /* Ensure the structures are allocated before we sample the stack pointer. */ 1549 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 1550 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 1551 1552 /* 1553 * Create test context. 1554 */ 1555 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 1556 1557 /* 1558 * Do the tests twice, first with all flags set, then once again with 1559 * flags cleared. The flags are not supposed to be touched at all. 1560 */ 1561 Ctx.rflags.u16 |= X86_EFL_STATUS_BITS; 1562 for (j = 0; j < 2; j++) 1563 { 1564 for (i = 0; i < cTests; i++) 1565 { 1566 for (k = 0; k < paTests[i].cSubTests; k++) 1567 { 1568 bool const fOkay = !BS3_MODE_IS_RM_OR_V86(bMode) && fSupportsInstr; 1569 uint8_t const bExpectXcpt = fOkay ? X86_XCPT_DB : X86_XCPT_UD; 1570 uint64_t uExpectRax, uExpectRip; 1571 RTCCUINTXREG uMemSrc1, uMemSrc1Expect; 1572 1573 Ctx.rax.uCcXReg = RTCCUINTXREG_MAX * 1019; 1574 Ctx.rcx.uCcXReg = paTests[i].paSubTests[k].uSrc2; 1575 if (!paTests[i].fMemSrc) 1576 { 1577 Ctx.rbx.uCcXReg = paTests[i].paSubTests[k].uSrc1; 1578 uMemSrc1Expect = uMemSrc1 = ~paTests[i].paSubTests[k].uSrc1; 1579 } 1580 else 1581 { 1582 uMemSrc1Expect = uMemSrc1 = paTests[i].paSubTests[k].uSrc1; 1583 Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, &Ctx.fs, &uMemSrc1); 1584 } 1585 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, paTests[i].pfnWorker); 1586 uExpectRax = fOkay ? paTests[i].paSubTests[k].uOut : Ctx.rax.u; 1587 uExpectRip = Ctx.rip.u + (fOkay ? paTests[i].cbInstr + 1 : 0); 1588 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 1589 1590 if ( TrapFrame.bXcpt != bExpectXcpt 1591 || TrapFrame.Ctx.rip.u != uExpectRip 1592 || TrapFrame.Ctx.rcx.u != Ctx.rcx.u 1593 || TrapFrame.Ctx.rbx.u != Ctx.rbx.u 1594 || TrapFrame.Ctx.rax.u != uExpectRax 1595 /* check that nothing else really changed: */ 1596 || (TrapFrame.Ctx.rflags.u16 & fEflCheck) 1597 != ((fOkay ? paTests[i].paSubTests[k].fEflOut : Ctx.rflags.u16) & fEflCheck) 1598 || (TrapFrame.Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS) 1599 != (Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS) 1600 || TrapFrame.Ctx.rdx.u != Ctx.rdx.u 1601 || TrapFrame.Ctx.rsp.u != Ctx.rsp.u 1602 || TrapFrame.Ctx.rbp.u != Ctx.rbp.u 1603 || TrapFrame.Ctx.rsi.u != Ctx.rsi.u 1604 || TrapFrame.Ctx.rdi.u != Ctx.rdi.u 1605 || uMemSrc1 != uMemSrc1Expect 1606 ) 1607 { 1608 Bs3TestFailedF("test #%i/%i failed: input %#" RTCCUINTXREG_XFMT ", %#" RTCCUINTXREG_XFMT, 1609 i, k, paTests[i].paSubTests[k].uSrc1, paTests[i].paSubTests[k].uSrc2); 1610 if (TrapFrame.bXcpt != bExpectXcpt) 1611 Bs3TestFailedF("Expected bXcpt = %#x, got %#x", bExpectXcpt, TrapFrame.bXcpt); 1612 if (TrapFrame.Ctx.rip.u != uExpectRip) 1613 Bs3TestFailedF("Expected RIP = %#06RX64, got %#06RX64", uExpectRip, TrapFrame.Ctx.rip.u); 1614 if (TrapFrame.Ctx.rax.u != uExpectRax) 1615 Bs3TestFailedF("Expected RAX = %#06RX64, got %#06RX64", uExpectRax, TrapFrame.Ctx.rax.u); 1616 if (TrapFrame.Ctx.rcx.u != Ctx.rcx.u) 1617 Bs3TestFailedF("Expected RCX = %#06RX64, got %#06RX64", Ctx.rcx.u, TrapFrame.Ctx.rcx.u); 1618 if (TrapFrame.Ctx.rbx.u != Ctx.rbx.u) 1619 Bs3TestFailedF("Expected RBX = %#06RX64, got %#06RX64", Ctx.rbx.u, TrapFrame.Ctx.rbx.u); 1620 if ( (TrapFrame.Ctx.rflags.u16 & fEflCheck) 1621 != ((fOkay ? paTests[i].paSubTests[k].fEflOut : Ctx.rflags.u16) & fEflCheck)) 1622 Bs3TestFailedF("Expected EFLAGS = %#06RX32, got %#06RX32 (output)", 1623 (fOkay ? paTests[i].paSubTests[k].fEflOut : Ctx.rflags.u16) & fEflCheck, 1624 TrapFrame.Ctx.rflags.u16 & fEflCheck); 1625 if ( (TrapFrame.Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS) 1626 != (Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS)) 1627 Bs3TestFailedF("Expected EFLAGS = %#06RX32, got %#06RX32 (immutable)", 1628 Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS, 1629 TrapFrame.Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS); 1630 1631 if (TrapFrame.Ctx.rdx.u != Ctx.rdx.u) 1632 Bs3TestFailedF("Expected RDX = %#06RX64, got %#06RX64", Ctx.rdx.u, TrapFrame.Ctx.rdx.u); 1633 if (TrapFrame.Ctx.rsp.u != Ctx.rsp.u) 1634 Bs3TestFailedF("Expected RSP = %#06RX64, got %#06RX64", Ctx.rsp.u, TrapFrame.Ctx.rsp.u); 1635 if (TrapFrame.Ctx.rbp.u != Ctx.rbp.u) 1636 Bs3TestFailedF("Expected RBP = %#06RX64, got %#06RX64", Ctx.rbp.u, TrapFrame.Ctx.rbp.u); 1637 if (TrapFrame.Ctx.rsi.u != Ctx.rsi.u) 1638 Bs3TestFailedF("Expected RSI = %#06RX64, got %#06RX64", Ctx.rsi.u, TrapFrame.Ctx.rsi.u); 1639 if (TrapFrame.Ctx.rdi.u != Ctx.rdi.u) 1640 Bs3TestFailedF("Expected RDI = %#06RX64, got %#06RX64", Ctx.rdi.u, TrapFrame.Ctx.rdi.u); 1641 if (uMemSrc1 != uMemSrc1Expect) 1642 Bs3TestFailedF("Expected uMemSrc1 = %#06RX64, got %#06RX64", (uint64_t)uMemSrc1Expect, (uint64_t)uMemSrc1); 1643 } 1644 } 1645 } 1646 Ctx.rflags.u16 &= ~X86_EFL_STATUS_BITS; 1647 } 1648 1649 return 0; 1650 } 1651 1652 1653 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_bextr)(uint8_t bMode) 1654 { 1655 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 1656 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests64[] = 1657 { 1658 { 0, RT_MAKE_U16(0, 0), /* -> */ 0, X86_EFL_ZF }, 1659 { 0, RT_MAKE_U16(16, 33), /* -> */ 0, X86_EFL_ZF }, 1660 { ~(RTCCUINTXREG)7, RT_MAKE_U16(2, 4), /* -> */ 0xe, 0}, 1661 { ~(RTCCUINTXREG)7, RT_MAKE_U16(40, 8), /* -> */ ARCH_BITS == 64 ? 0xff : 0x00, ARCH_BITS == 64 ? 0 : X86_EFL_ZF }, 1662 }; 1663 1664 /* 32-bit register width */ 1665 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests32[] = 1666 { 1667 { 0, RT_MAKE_U16(0, 0), /* -> */ 0, X86_EFL_ZF }, 1668 { 0, RT_MAKE_U16(16, 18), /* -> */ 0, X86_EFL_ZF }, 1669 { ~(RTCCUINTXREG)7, RT_MAKE_U16(2, 4), /* -> */ 0xe, 0 }, 1670 { ~(RTCCUINTXREG)7, RT_MAKE_U16(24, 8), /* -> */ 0xff, 0 }, 1671 { ~(RTCCUINTXREG)7, RT_MAKE_U16(31, 9), /* -> */ 1, 0 }, 1672 { ~(RTCCUINTXREG)7, RT_MAKE_U16(42, 8), /* -> */ 0, X86_EFL_ZF }, 1673 }; 1674 1675 static BS3CPUINSTR2_TEST_Gy_Ey_By_T const s_aTests[] = 1676 { 1677 { BS3_CMN_NM(bs3CpuInstr2_bextr_RAX_RBX_RCX_icebp), false, 5, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1678 { BS3_CMN_NM(bs3CpuInstr2_bextr_RAX_FSxBX_RCX_icebp), true, 6, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1679 { BS3_CMN_NM(bs3CpuInstr2_bextr_EAX_EBX_ECX_icebp), false, 5, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1680 { BS3_CMN_NM(bs3CpuInstr2_bextr_EAX_FSxBX_ECX_icebp), true, 6, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1681 }; 1682 return bs3CpuInstr2_Common_Gy_Ey_By(bMode, s_aTests, RT_ELEMENTS(s_aTests), X86_CPUID_STEXT_FEATURE_EBX_BMI1, 1683 X86_EFL_STATUS_BITS, X86_EFL_AF | X86_EFL_SF | X86_EFL_PF); 1684 } 1685 1686 1687 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_bzhi)(uint8_t bMode) 1688 { 1689 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 1690 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests64[] = 1691 { 1692 { 0, 0, /* -> */ 0, X86_EFL_ZF }, 1693 { 0, ~(RTCCUINTXREG)255, /* -> */ 0, X86_EFL_ZF }, 1694 { 0, 64, /* -> */ 0, X86_EFL_ZF | X86_EFL_CF }, 1695 { ~(RTCCUINTXREG)0, 64, /* -> */ ~(RTCCUINTXREG)0, X86_EFL_CF | X86_EFL_SF }, 1696 { ~(RTCCUINTXREG)0, 63, 1697 /* -> */ ARCH_BITS >= 64 ? ~(RTCCUINTXREG)0 >> 1 : ~(RTCCUINTXREG)0, ARCH_BITS >= 64 ? 0 : X86_EFL_CF | X86_EFL_SF }, 1698 { ~(RTCCUINTXREG)0 << 31 | UINT32_C(0x63849607), 24, /* -> */ UINT32_C(0x00849607), 0 }, 1699 { ~(RTCCUINTXREG)0 << 31 | UINT32_C(0x63849607), 33, 1700 /* -> */ ARCH_BITS >= 64 ? UINT64_C(0x1e3849607) : UINT32_C(0xe3849607), ARCH_BITS >= 64 ? 0 : X86_EFL_CF | X86_EFL_SF }, 1701 }; 1702 1703 /* 32-bit register width */ 1704 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests32[] = 1705 { 1706 { 0, 0, /* -> */ 0, X86_EFL_ZF }, 1707 { 0, ~(RTCCUINTXREG)255, /* -> */ 0, X86_EFL_ZF }, 1708 { 0, 32, /* -> */ 0, X86_EFL_ZF | X86_EFL_CF }, 1709 { ~(RTCCUINTXREG)0, 32, /* -> */ UINT32_MAX, X86_EFL_CF | X86_EFL_SF }, 1710 { ~(RTCCUINTXREG)0, 31, /* -> */ UINT32_MAX >> 1, 0 }, 1711 { UINT32_C(0x1230fd34), 15, /* -> */ UINT32_C(0x00007d34), 0 }, 1712 }; 1713 1714 static BS3CPUINSTR2_TEST_Gy_Ey_By_T const s_aTests[] = 1715 { 1716 { BS3_CMN_NM(bs3CpuInstr2_bzhi_RAX_RBX_RCX_icebp), false, 5, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1717 { BS3_CMN_NM(bs3CpuInstr2_bzhi_RAX_FSxBX_RCX_icebp), true, 6, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1718 { BS3_CMN_NM(bs3CpuInstr2_bzhi_EAX_EBX_ECX_icebp), false, 5, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1719 { BS3_CMN_NM(bs3CpuInstr2_bzhi_EAX_FSxBX_ECX_icebp), true, 6, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1720 }; 1721 return bs3CpuInstr2_Common_Gy_Ey_By(bMode, s_aTests, RT_ELEMENTS(s_aTests), X86_CPUID_STEXT_FEATURE_EBX_BMI2, 1722 X86_EFL_STATUS_BITS, 0); 1723 } 1724 1725 1726 /** @note This is a Gy_By_Ey format instruction, so we're switching the two 1727 * source registers around when calling bs3CpuInstr2_Common_Gy_Ey_By. 1728 * Sorry for the confusion, but it saves some unnecessary code dup. */ 1729 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_pdep)(uint8_t bMode) 1730 { 1731 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 1732 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests64[] = 1733 { /* Mask (RBX/[FS:xBX]), source=RCX */ 1734 { 0, 0, /* -> */ 0, 0 }, 1735 { 0, ~(RTCCUINTXREG)0, /* -> */ 0, 0 }, 1736 { ~(RTCCUINTXREG)0, 0, /* -> */ 0, 0 }, 1737 { ~(RTCCUINTXREG)0, ~(RTCCUINTXREG)0, /* -> */ ~(RTCCUINTXREG)0, 0 }, 1738 #if ARCH_BITS >= 64 1739 { UINT64_C(0x3586049947589201), ~(RTCCUINTXREG)0, /* -> */ UINT64_C(0x3586049947589201), 0 }, 1740 { UINT64_C(0x3586049947589201), ~(RTCCUINTXREG)7, /* -> */ UINT64_C(0x3586049947588000), 0 }, 1741 #endif 1742 { UINT32_C(0x47589201), ~(RTCCUINTXREG)0, /* -> */ UINT32_C(0x47589201), 0 }, 1743 { UINT32_C(0x47589201), ~(RTCCUINTXREG)7, /* -> */ UINT32_C(0x47588000), 0 }, 1744 }; 1745 1746 /* 32-bit register width */ 1747 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests32[] = 1748 { /* Mask (EBX/[FS:xBX]), source=ECX */ 1749 { 0, 0, /* -> */ 0, 0 }, 1750 { 0, ~(RTCCUINTXREG)0, /* -> */ 0, 0 }, 1751 { ~(RTCCUINTXREG)0, 0, /* -> */ 0, 0 }, 1752 { ~(RTCCUINTXREG)0, ~(RTCCUINTXREG)0, /* -> */ UINT32_MAX, 0 }, 1753 { UINT32_C(0x01010101), ~(RTCCUINTXREG)0, /* -> */ UINT32_C(0x01010101), 0 }, 1754 { UINT32_C(0x01010101), ~(RTCCUINTXREG)3, /* -> */ UINT32_C(0x01010000), 0 }, 1755 { UINT32_C(0x47589201), ~(RTCCUINTXREG)0, /* -> */ UINT32_C(0x47589201), 0 }, 1756 }; 1757 1758 static BS3CPUINSTR2_TEST_Gy_Ey_By_T const s_aTests[] = 1759 { 1760 { BS3_CMN_NM(bs3CpuInstr2_pdep_RAX_RCX_RBX_icebp), false, 5, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1761 { BS3_CMN_NM(bs3CpuInstr2_pdep_RAX_RCX_FSxBX_icebp), true, 6, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1762 { BS3_CMN_NM(bs3CpuInstr2_pdep_EAX_ECX_EBX_icebp), false, 5, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1763 { BS3_CMN_NM(bs3CpuInstr2_pdep_EAX_ECX_FSxBX_icebp), true, 6, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1764 }; 1765 return bs3CpuInstr2_Common_Gy_Ey_By(bMode, s_aTests, RT_ELEMENTS(s_aTests), X86_CPUID_STEXT_FEATURE_EBX_BMI2, 0, 0); 1766 } 1767 1768 1769 /** @note Same note as for bs3CpuInstr2_pdep */ 1770 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_pext)(uint8_t bMode) 1771 { 1772 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 1773 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests64[] = 1774 { /* Mask (RBX/[FS:xBX]), source=RCX */ 1775 { 0, 0, /* -> */ 0, 0 }, 1776 { 0, ~(RTCCUINTXREG)0, /* -> */ 0, 0 }, 1777 { ~(RTCCUINTXREG)0, 0, /* -> */ 0, 0 }, 1778 { ~(RTCCUINTXREG)0, ~(RTCCUINTXREG)0, /* -> */ ~(RTCCUINTXREG)0, 0 }, 1779 #if ARCH_BITS >= 64 1780 { UINT64_C(0x3586049947589201), ~(RTCCUINTXREG)0, /* -> */ UINT64_C(0x00000000007fffff), 0 }, 1781 { UINT64_C(0x3586049947589201), ~(RTCCUINTXREG)7, /* -> */ UINT64_C(0x00000000007ffffe), 0 }, 1782 #endif 1783 { UINT32_C(0x47589201), ~(RTCCUINTXREG)0, /* -> */ UINT32_C(0x000007ff), 0 }, 1784 { UINT32_C(0x47589201), ~(RTCCUINTXREG)7, /* -> */ UINT32_C(0x000007fe), 0 }, 1785 }; 1786 1787 /* 32-bit register width */ 1788 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests32[] = 1789 { /* Mask (EBX/[FS:xBX]), source=ECX */ 1790 { 0, 0, /* -> */ 0, 0 }, 1791 { 0, ~(RTCCUINTXREG)0, /* -> */ 0, 0 }, 1792 { ~(RTCCUINTXREG)0, 0, /* -> */ 0, 0 }, 1793 { ~(RTCCUINTXREG)0, ~(RTCCUINTXREG)0, /* -> */ UINT32_MAX, 0 }, 1794 { UINT32_C(0x01010101), ~(RTCCUINTXREG)0, /* -> */ UINT32_C(0x0000000f), 0 }, 1795 { UINT32_C(0x01010101), ~(RTCCUINTXREG)3, /* -> */ UINT32_C(0x0000000e), 0 }, 1796 { UINT32_C(0x47589201), ~(RTCCUINTXREG)0, /* -> */ UINT32_C(0x000007ff), 0 }, 1797 { UINT32_C(0x47589201), ~(RTCCUINTXREG)7, /* -> */ UINT32_C(0x000007fe), 0 }, 1798 }; 1799 1800 static BS3CPUINSTR2_TEST_Gy_Ey_By_T const s_aTests[] = 1801 { 1802 { BS3_CMN_NM(bs3CpuInstr2_pext_RAX_RCX_RBX_icebp), false, 5, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1803 { BS3_CMN_NM(bs3CpuInstr2_pext_RAX_RCX_FSxBX_icebp), true, 6, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1804 { BS3_CMN_NM(bs3CpuInstr2_pext_EAX_ECX_EBX_icebp), false, 5, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1805 { BS3_CMN_NM(bs3CpuInstr2_pext_EAX_ECX_FSxBX_icebp), true, 6, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1806 }; 1807 return bs3CpuInstr2_Common_Gy_Ey_By(bMode, s_aTests, RT_ELEMENTS(s_aTests), X86_CPUID_STEXT_FEATURE_EBX_BMI2, 0, 0); 1808 } 1809 1810 1811 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_shlx)(uint8_t bMode) 1812 { 1813 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 1814 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests64[] = 1815 { 1816 { 0, 0, /* -> */ 0, 0 }, 1817 { 0, ~(RTCCUINTXREG)3, /* -> */ 0, 0 }, 1818 { ~(RTCCUINTXREG)7, 8, /* -> */ ~(RTCCUINTXREG)0x7ff, 0}, 1819 { ~(RTCCUINTXREG)7, 40, /* -> */ ~(RTCCUINTXREG)7 << (ARCH_BITS == 64 ? 40 : 8), 0 }, 1820 { ~(RTCCUINTXREG)7, 72, /* -> */ ~(RTCCUINTXREG)7 << 8, 0 }, 1821 }; 1822 1823 /* 32-bit register width */ 1824 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests32[] = 1825 { 1826 { 0, 0, /* -> */ 0, 0 }, 1827 { 0, ~(RTCCUINTXREG)9, /* -> */ 0, 0 }, 1828 { ~(RTCCUINTXREG)7, 8, /* -> */ UINT32_C(0xfffff800), 0 }, 1829 { ~(RTCCUINTXREG)7, 8, /* -> */ UINT32_C(0xfffff800), 0 }, 1830 }; 1831 1832 static BS3CPUINSTR2_TEST_Gy_Ey_By_T const s_aTests[] = 1833 { 1834 { BS3_CMN_NM(bs3CpuInstr2_shlx_RAX_RBX_RCX_icebp), false, 5, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1835 { BS3_CMN_NM(bs3CpuInstr2_shlx_RAX_FSxBX_RCX_icebp), true, 6, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1836 { BS3_CMN_NM(bs3CpuInstr2_shlx_EAX_EBX_ECX_icebp), false, 5, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1837 { BS3_CMN_NM(bs3CpuInstr2_shlx_EAX_FSxBX_ECX_icebp), true, 6, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1838 }; 1839 return bs3CpuInstr2_Common_Gy_Ey_By(bMode, s_aTests, RT_ELEMENTS(s_aTests), X86_CPUID_STEXT_FEATURE_EBX_BMI1, 1840 0, 0); 1841 } 1842 1843 1844 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_sarx)(uint8_t bMode) 1845 { 1846 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 1847 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests64[] = 1848 { 1849 { 0, 0, /* -> */ 0, 0 }, 1850 { 0, ~(RTCCUINTXREG)3, /* -> */ 0, 0 }, 1851 { (RTCCUINTXREG)1 << (RTCCINTXREG_BITS - 1), RTCCINTXREG_BITS - 1, /* -> */ ~(RTCCUINTXREG)0, 0 }, 1852 { (RTCCUINTXREG)1 << (RTCCINTXREG_BITS - 1), RTCCINTXREG_BITS - 1 + 64, /* -> */ ~(RTCCUINTXREG)0, 0 }, 1853 { (RTCCUINTXREG)1 << (RTCCINTXREG_BITS - 2), RTCCINTXREG_BITS - 3, /* -> */ 2, 0 }, 1854 { (RTCCUINTXREG)1 << (RTCCINTXREG_BITS - 2), RTCCINTXREG_BITS - 3 + 64, /* -> */ 2, 0 }, 1855 }; 1856 1857 /* 32-bit register width */ 1858 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests32[] = 1859 { 1860 { 0, 0, /* -> */ 0, 0 }, 1861 { 0, ~(RTCCUINTXREG)9, /* -> */ 0, 0 }, 1862 { ~(RTCCUINTXREG)UINT32_C(0x7fffffff), 24, /* -> */ UINT32_C(0xffffff80), 0 }, 1863 { ~(RTCCUINTXREG)UINT32_C(0x7fffffff), 24+32, /* -> */ UINT32_C(0xffffff80), 0 }, 1864 { ~(RTCCUINTXREG)UINT32_C(0xbfffffff), 24, /* -> */ UINT32_C(0x40), 0 }, 1865 { ~(RTCCUINTXREG)UINT32_C(0xbfffffff), 24+32, /* -> */ UINT32_C(0x40), 0 }, 1866 }; 1867 1868 static BS3CPUINSTR2_TEST_Gy_Ey_By_T const s_aTests[] = 1869 { 1870 { BS3_CMN_NM(bs3CpuInstr2_sarx_RAX_RBX_RCX_icebp), false, 5, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1871 { BS3_CMN_NM(bs3CpuInstr2_sarx_RAX_FSxBX_RCX_icebp), true, 6, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1872 { BS3_CMN_NM(bs3CpuInstr2_sarx_EAX_EBX_ECX_icebp), false, 5, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1873 { BS3_CMN_NM(bs3CpuInstr2_sarx_EAX_FSxBX_ECX_icebp), true, 6, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1874 }; 1875 return bs3CpuInstr2_Common_Gy_Ey_By(bMode, s_aTests, RT_ELEMENTS(s_aTests), X86_CPUID_STEXT_FEATURE_EBX_BMI1, 1876 0, 0); 1877 } 1878 1879 1880 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_shrx)(uint8_t bMode) 1881 { 1882 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 1883 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests64[] = 1884 { 1885 { 0, 0, /* -> */ 0, 0 }, 1886 { 0, ~(RTCCUINTXREG)3, /* -> */ 0, 0 }, 1887 { (RTCCUINTXREG)1 << (RTCCINTXREG_BITS - 1), RTCCINTXREG_BITS - 1, /* -> */ 1, 0 }, 1888 { (RTCCUINTXREG)1 << (RTCCINTXREG_BITS - 1), RTCCINTXREG_BITS - 1 + 64, /* -> */ 1, 0 }, 1889 { (RTCCUINTXREG)1 << (RTCCINTXREG_BITS - 2), RTCCINTXREG_BITS - 3, /* -> */ 2, 0 }, 1890 { (RTCCUINTXREG)1 << (RTCCINTXREG_BITS - 2), RTCCINTXREG_BITS - 3 + 64, /* -> */ 2, 0 }, 1891 }; 1892 1893 /* 32-bit register width */ 1894 static BS3CPUINSTR2_SUBTEST_Gy_Ey_By_T const s_aSubTests32[] = 1895 { 1896 { 0, 0, /* -> */ 0, 0 }, 1897 { 0, ~(RTCCUINTXREG)9, /* -> */ 0, 0 }, 1898 { ~(RTCCUINTXREG)UINT32_C(0x7fffffff), 24, /* -> */ UINT32_C(0x80), 0 }, 1899 { ~(RTCCUINTXREG)UINT32_C(0x7fffffff), 24+32, /* -> */ UINT32_C(0x80), 0 }, 1900 { ~(RTCCUINTXREG)UINT32_C(0xbfffffff), 24, /* -> */ UINT32_C(0x40), 0 }, 1901 { ~(RTCCUINTXREG)UINT32_C(0xbfffffff), 24+32, /* -> */ UINT32_C(0x40), 0 }, 1902 }; 1903 1904 static BS3CPUINSTR2_TEST_Gy_Ey_By_T const s_aTests[] = 1905 { 1906 { BS3_CMN_NM(bs3CpuInstr2_shrx_RAX_RBX_RCX_icebp), false, 5, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1907 { BS3_CMN_NM(bs3CpuInstr2_shrx_RAX_FSxBX_RCX_icebp), true, 6, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 1908 { BS3_CMN_NM(bs3CpuInstr2_shrx_EAX_EBX_ECX_icebp), false, 5, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1909 { BS3_CMN_NM(bs3CpuInstr2_shrx_EAX_FSxBX_ECX_icebp), true, 6, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 1910 }; 1911 return bs3CpuInstr2_Common_Gy_Ey_By(bMode, s_aTests, RT_ELEMENTS(s_aTests), X86_CPUID_STEXT_FEATURE_EBX_BMI1, 1912 0, 0); 1913 } 1914 1915 1916 /* 1917 * For testing BLSR, BLSMSK, and BLSI. 1918 */ 1919 typedef struct BS3CPUINSTR2_SUBTEST_By_Ey_T 1920 { 1921 RTCCUINTXREG uSrc; 1922 RTCCUINTXREG uDst; 1923 uint16_t fEflOut; 1924 } BS3CPUINSTR2_SUBTEST_By_Ey_T; 1925 1926 typedef struct BS3CPUINSTR2_TEST_By_Ey_T 1927 { 1928 FPFNBS3FAR pfnWorker; 1929 bool fMemSrc; 1930 uint8_t cbInstr; 1931 uint8_t cSubTests; 1932 BS3CPUINSTR2_SUBTEST_By_Ey_T const *paSubTests; 1933 } BS3CPUINSTR2_TEST_By_Ey_T; 1934 1935 static uint8_t bs3CpuInstr2_Common_By_Ey(uint8_t bMode, BS3CPUINSTR2_TEST_By_Ey_T const *paTests, unsigned cTests, 1936 uint32_t fStdExtFeatEbx, uint16_t fEflCheck, uint16_t fEflIgnore) 1937 { 1938 BS3REGCTX Ctx; 1939 BS3TRAPFRAME TrapFrame; 1940 unsigned i, j, k; 1941 uint32_t uStdExtFeatEbx = 0; 1942 bool fSupportsInstr; 1943 1944 fEflCheck &= ~fEflIgnore; 1945 1946 if (g_uBs3CpuDetected & BS3CPU_F_CPUID) 1947 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &uStdExtFeatEbx, NULL, NULL); 1948 fSupportsInstr = RT_BOOL(uStdExtFeatEbx & fStdExtFeatEbx); 1949 1950 /* Ensure the structures are allocated before we sample the stack pointer. */ 1951 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 1952 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 1953 1954 /* 1955 * Create test context. 1956 */ 1957 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 1958 1959 /* 1960 * Do the tests twice, first with all flags set, then once again with 1961 * flags cleared. The flags are not supposed to be touched at all. 1962 */ 1963 Ctx.rflags.u16 |= X86_EFL_STATUS_BITS; 1964 for (j = 0; j < 2; j++) 1965 { 1966 for (i = 0; i < cTests; i++) 1967 { 1968 for (k = 0; k < paTests[i].cSubTests; k++) 1969 { 1970 bool const fOkay = !BS3_MODE_IS_RM_OR_V86(bMode) && fSupportsInstr; 1971 uint8_t const bExpectXcpt = fOkay ? X86_XCPT_DB : X86_XCPT_UD; 1972 uint64_t uExpectRax, uExpectRip; 1973 RTCCUINTXREG uMemSrc, uMemSrcExpect; 1974 1975 Ctx.rax.uCcXReg = ~paTests[i].paSubTests[k].uSrc ^ 0x593e7591; 1976 if (!paTests[i].fMemSrc) 1977 { 1978 Ctx.rbx.uCcXReg = paTests[i].paSubTests[k].uSrc; 1979 uMemSrcExpect = uMemSrc = ~paTests[i].paSubTests[k].uSrc; 1980 } 1981 else 1982 { 1983 uMemSrcExpect = uMemSrc = paTests[i].paSubTests[k].uSrc; 1984 Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, &Ctx.fs, &uMemSrc); 1985 } 1986 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, paTests[i].pfnWorker); 1987 uExpectRax = fOkay ? paTests[i].paSubTests[k].uDst : Ctx.rax.u; 1988 uExpectRip = Ctx.rip.u + (fOkay ? paTests[i].cbInstr + 1 : 0); 1989 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 1990 1991 if ( TrapFrame.bXcpt != bExpectXcpt 1992 || TrapFrame.Ctx.rip.u != uExpectRip 1993 || TrapFrame.Ctx.rbx.u != Ctx.rbx.u 1994 || TrapFrame.Ctx.rax.u != uExpectRax 1995 /* check that nothing else really changed: */ 1996 || (TrapFrame.Ctx.rflags.u16 & fEflCheck) 1997 != ((fOkay ? paTests[i].paSubTests[k].fEflOut : Ctx.rflags.u16) & fEflCheck) 1998 || (TrapFrame.Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS) 1999 != (Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS) 2000 || TrapFrame.Ctx.rcx.u != Ctx.rcx.u 2001 || TrapFrame.Ctx.rdx.u != Ctx.rdx.u 2002 || TrapFrame.Ctx.rsp.u != Ctx.rsp.u 2003 || TrapFrame.Ctx.rbp.u != Ctx.rbp.u 2004 || TrapFrame.Ctx.rsi.u != Ctx.rsi.u 2005 || TrapFrame.Ctx.rdi.u != Ctx.rdi.u 2006 || uMemSrc != uMemSrcExpect 2007 ) 2008 { 2009 Bs3TestFailedF("test #%i/%i failed: input %#" RTCCUINTXREG_XFMT, 2010 i, k, paTests[i].paSubTests[k].uSrc); 2011 if (TrapFrame.bXcpt != bExpectXcpt) 2012 Bs3TestFailedF("Expected bXcpt = %#x, got %#x", bExpectXcpt, TrapFrame.bXcpt); 2013 if (TrapFrame.Ctx.rip.u != uExpectRip) 2014 Bs3TestFailedF("Expected RIP = %#06RX64, got %#06RX64", uExpectRip, TrapFrame.Ctx.rip.u); 2015 if (TrapFrame.Ctx.rax.u != uExpectRax) 2016 Bs3TestFailedF("Expected RAX = %#06RX64, got %#06RX64", uExpectRax, TrapFrame.Ctx.rax.u); 2017 if (TrapFrame.Ctx.rbx.u != Ctx.rbx.u) 2018 Bs3TestFailedF("Expected RBX = %#06RX64, got %#06RX64 (dst)", Ctx.rbx.u, TrapFrame.Ctx.rbx.u); 2019 if ( (TrapFrame.Ctx.rflags.u16 & fEflCheck) 2020 != ((fOkay ? paTests[i].paSubTests[k].fEflOut : Ctx.rflags.u16) & fEflCheck)) 2021 Bs3TestFailedF("Expected EFLAGS = %#06RX32, got %#06RX32 (output)", 2022 (fOkay ? paTests[i].paSubTests[k].fEflOut : Ctx.rflags.u16) & fEflCheck, 2023 TrapFrame.Ctx.rflags.u16 & fEflCheck); 2024 if ( (TrapFrame.Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS) 2025 != (Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS)) 2026 Bs3TestFailedF("Expected EFLAGS = %#06RX32, got %#06RX32 (immutable)", 2027 Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS, 2028 TrapFrame.Ctx.rflags.u16 & ~(fEflCheck | fEflIgnore) & X86_EFL_STATUS_BITS); 2029 2030 if (TrapFrame.Ctx.rcx.u != Ctx.rcx.u) 2031 Bs3TestFailedF("Expected RCX = %#06RX64, got %#06RX64", Ctx.rcx.u, TrapFrame.Ctx.rcx.u); 2032 if (TrapFrame.Ctx.rdx.u != Ctx.rdx.u) 2033 Bs3TestFailedF("Expected RDX = %#06RX64, got %#06RX64", Ctx.rdx.u, TrapFrame.Ctx.rdx.u); 2034 if (TrapFrame.Ctx.rsp.u != Ctx.rsp.u) 2035 Bs3TestFailedF("Expected RSP = %#06RX64, got %#06RX64", Ctx.rsp.u, TrapFrame.Ctx.rsp.u); 2036 if (TrapFrame.Ctx.rbp.u != Ctx.rbp.u) 2037 Bs3TestFailedF("Expected RBP = %#06RX64, got %#06RX64", Ctx.rbp.u, TrapFrame.Ctx.rbp.u); 2038 if (TrapFrame.Ctx.rsi.u != Ctx.rsi.u) 2039 Bs3TestFailedF("Expected RSI = %#06RX64, got %#06RX64", Ctx.rsi.u, TrapFrame.Ctx.rsi.u); 2040 if (TrapFrame.Ctx.rdi.u != Ctx.rdi.u) 2041 Bs3TestFailedF("Expected RDI = %#06RX64, got %#06RX64", Ctx.rdi.u, TrapFrame.Ctx.rdi.u); 2042 if (uMemSrc != uMemSrcExpect) 2043 Bs3TestFailedF("Expected uMemSrc = %#06RX64, got %#06RX64", (uint64_t)uMemSrcExpect, (uint64_t)uMemSrc); 2044 } 2045 } 2046 } 2047 Ctx.rflags.u16 &= ~X86_EFL_STATUS_BITS; 2048 } 2049 2050 return 0; 2051 } 2052 2053 2054 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_blsr)(uint8_t bMode) 2055 { 2056 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 2057 static BS3CPUINSTR2_SUBTEST_By_Ey_T const s_aSubTests64[] = 2058 { 2059 { 0, /* -> */ 0, X86_EFL_ZF | X86_EFL_CF }, 2060 { 1, /* -> */ 0, X86_EFL_ZF }, 2061 { 2, /* -> */ 0, X86_EFL_ZF }, 2062 { 3, /* -> */ 2, 0 }, 2063 { 5, /* -> */ 4, 0 }, 2064 { 6, /* -> */ 4, 0 }, 2065 { 7, /* -> */ 6, 0 }, 2066 { 9, /* -> */ 8, 0 }, 2067 { 10, /* -> */ 8, 0 }, 2068 { ~(RTCCUINTXREG)1, /* -> */ ~(RTCCUINTXREG)3, X86_EFL_SF }, 2069 { (RTCCUINTXREG)3 << (RTCCINTXREG_BITS - 2), /* -> */ (RTCCUINTXREG)2 << (RTCCINTXREG_BITS - 2), X86_EFL_SF }, 2070 }; 2071 2072 /* 32-bit register width */ 2073 static BS3CPUINSTR2_SUBTEST_By_Ey_T const s_aSubTests32[] = 2074 { 2075 { 0, /* -> */ 0, X86_EFL_ZF | X86_EFL_CF }, 2076 { 1, /* -> */ 0, X86_EFL_ZF }, 2077 { ~(RTCCUINTXREG)1, /* -> */ UINT32_C(0xfffffffc), X86_EFL_SF }, 2078 { ~(RTCCUINTXREG)0 << 30, /* -> */ UINT32_C(0x80000000), X86_EFL_SF }, 2079 }; 2080 2081 static BS3CPUINSTR2_TEST_By_Ey_T const s_aTests[] = 2082 { 2083 { BS3_CMN_NM(bs3CpuInstr2_blsr_RAX_RBX_icebp), false, 5, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 2084 { BS3_CMN_NM(bs3CpuInstr2_blsr_RAX_FSxBX_icebp), true, 6, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 2085 { BS3_CMN_NM(bs3CpuInstr2_blsr_EAX_EBX_icebp), false, 5, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 2086 { BS3_CMN_NM(bs3CpuInstr2_blsr_EAX_FSxBX_icebp), true, 6, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 2087 }; 2088 return bs3CpuInstr2_Common_By_Ey(bMode, s_aTests, RT_ELEMENTS(s_aTests), X86_CPUID_STEXT_FEATURE_EBX_BMI1, 2089 X86_EFL_STATUS_BITS, 0); 2090 } 2091 2092 2093 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_blsmsk)(uint8_t bMode) 2094 { 2095 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 2096 static BS3CPUINSTR2_SUBTEST_By_Ey_T const s_aSubTests64[] = 2097 { 2098 { 0, /* -> */ ~(RTCCUINTXREG)0, X86_EFL_CF | X86_EFL_SF }, 2099 { 1, /* -> */ 1, 0 }, 2100 { ~(RTCCUINTXREG)1, /* -> */ 3, 0 }, 2101 { (RTCCUINTXREG)3 << (RTCCINTXREG_BITS - 2), /* -> */ ~((RTCCUINTXREG)2 << (RTCCINTXREG_BITS - 2)), 0 }, 2102 }; 2103 2104 /* 32-bit register width */ 2105 static BS3CPUINSTR2_SUBTEST_By_Ey_T const s_aSubTests32[] = 2106 { 2107 { 0, /* -> */ UINT32_MAX, X86_EFL_CF | X86_EFL_SF }, 2108 { 1, /* -> */ 1, 0 }, 2109 { ~(RTCCUINTXREG)1, /* -> */ 3, 0 }, 2110 { ~(RTCCUINTXREG)0 << 30, /* -> */ UINT32_C(0x7fffffff), 0}, 2111 }; 2112 2113 static BS3CPUINSTR2_TEST_By_Ey_T const s_aTests[] = 2114 { 2115 { BS3_CMN_NM(bs3CpuInstr2_blsmsk_RAX_RBX_icebp), false, 5, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 2116 { BS3_CMN_NM(bs3CpuInstr2_blsmsk_RAX_FSxBX_icebp), true, 6, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 2117 { BS3_CMN_NM(bs3CpuInstr2_blsmsk_EAX_EBX_icebp), false, 5, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 2118 { BS3_CMN_NM(bs3CpuInstr2_blsmsk_EAX_FSxBX_icebp), true, 6, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 2119 }; 2120 return bs3CpuInstr2_Common_By_Ey(bMode, s_aTests, RT_ELEMENTS(s_aTests), X86_CPUID_STEXT_FEATURE_EBX_BMI1, 2121 X86_EFL_STATUS_BITS, 0); 2122 } 2123 2124 2125 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_blsi)(uint8_t bMode) 2126 { 2127 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 2128 static BS3CPUINSTR2_SUBTEST_By_Ey_T const s_aSubTests64[] = 2129 { 2130 { 0, /* -> */ 0, X86_EFL_ZF }, 2131 { 1, /* -> */ 1, X86_EFL_CF }, 2132 { ~(RTCCUINTXREG)1, /* -> */ 2, X86_EFL_CF }, 2133 { (RTCCUINTXREG)3 << (RTCCINTXREG_BITS - 2), /* -> */ (RTCCUINTXREG)1 << (RTCCINTXREG_BITS - 2), X86_EFL_CF }, 2134 }; 2135 2136 /* 32-bit register width */ 2137 static BS3CPUINSTR2_SUBTEST_By_Ey_T const s_aSubTests32[] = 2138 { 2139 { 0, /* -> */ 0, X86_EFL_ZF }, 2140 { 1, /* -> */ 1, X86_EFL_CF }, 2141 { ~(RTCCUINTXREG)1, /* -> */ 2, X86_EFL_CF }, 2142 { ~(RTCCUINTXREG)0 << 30, /* -> */ UINT32_C(0x40000000), X86_EFL_CF }, 2143 }; 2144 2145 static BS3CPUINSTR2_TEST_By_Ey_T const s_aTests[] = 2146 { 2147 { BS3_CMN_NM(bs3CpuInstr2_blsi_RAX_RBX_icebp), false, 5, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 2148 { BS3_CMN_NM(bs3CpuInstr2_blsi_RAX_FSxBX_icebp), true, 6, RT_ELEMENTS(s_aSubTests64), s_aSubTests64 }, 2149 { BS3_CMN_NM(bs3CpuInstr2_blsi_EAX_EBX_icebp), false, 5, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 2150 { BS3_CMN_NM(bs3CpuInstr2_blsi_EAX_FSxBX_icebp), true, 6, RT_ELEMENTS(s_aSubTests32), s_aSubTests32 }, 2151 }; 2152 return bs3CpuInstr2_Common_By_Ey(bMode, s_aTests, RT_ELEMENTS(s_aTests), X86_CPUID_STEXT_FEATURE_EBX_BMI1, 2153 X86_EFL_STATUS_BITS, 0); 2154 } 2155 2156 2157 /* 2158 * MULX (BMI2) - destination registers (/r & vvvv) = r/m * rDX 2159 */ 2160 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_mulx)(uint8_t bMode) 2161 { 2162 static const struct 2163 { 2164 FPFNBS3FAR pfnWorker; 2165 bool fMemSrc; 2166 bool fSameDst; 2167 uint8_t cbInstr; 2168 RTCCUINTXREG uSrc1; 2169 RTCCUINTXREG uSrc2; 2170 RTCCUINTXREG uDst1; 2171 RTCCUINTXREG uDst2; 2172 } s_aTests[] = 2173 { 2174 /* 64 bits register width (32 bits in 32- and 16-bit modes): */ 2175 { BS3_CMN_NM(bs3CpuInstr2_mulx_RAX_RCX_RBX_RDX_icebp), false, false, 5, // #0 2176 0, 0, /* -> */ 0, 0 }, 2177 { BS3_CMN_NM(bs3CpuInstr2_mulx_RAX_RCX_RBX_RDX_icebp), false, false, 5, // #1 2178 ~(RTCCUINTXREG)0, ~(RTCCUINTXREG)0, /* -> */ ~(RTCCUINTXREG)1, 1 }, 2179 { BS3_CMN_NM(bs3CpuInstr2_mulx_RCX_RCX_RBX_RDX_icebp), false, true, 5, // #2 2180 ~(RTCCUINTXREG)0, ~(RTCCUINTXREG)0, /* -> */ ~(RTCCUINTXREG)1, ~(RTCCUINTXREG)1 }, 2181 { BS3_CMN_NM(bs3CpuInstr2_mulx_RAX_RCX_RBX_RDX_icebp), false, false, 5, // #3 2182 2, 2, /* -> */ 0, 4 }, 2183 { BS3_CMN_NM(bs3CpuInstr2_mulx_RAX_RCX_RBX_RDX_icebp), false, false, 5, // #4 2184 ~(RTCCUINTXREG)0, 42, /* -> */ 0x29, ~(RTCCUINTXREG)41 }, 2185 2186 { BS3_CMN_NM(bs3CpuInstr2_mulx_RAX_RCX_FSxBX_RDX_icebp), true, false, 6, // #5 2187 0, 0, /* -> */ 0, 0 }, 2188 { BS3_CMN_NM(bs3CpuInstr2_mulx_RAX_RCX_FSxBX_RDX_icebp), true, false, 6, // #6 2189 ~(RTCCUINTXREG)0, ~(RTCCUINTXREG)0, /* -> */ ~(RTCCUINTXREG)1, 1 }, 2190 { BS3_CMN_NM(bs3CpuInstr2_mulx_RAX_RCX_FSxBX_RDX_icebp), true, false, 6, // #7 2191 ~(RTCCUINTXREG)0, 42, /* -> */ 0x29, ~(RTCCUINTXREG)41 }, 2192 2193 /* 32-bit register width */ 2194 { BS3_CMN_NM(bs3CpuInstr2_mulx_EAX_ECX_EBX_EDX_icebp), false, false, 5, // #8 2195 0, 0, /* -> */ 0, 0 }, 2196 { BS3_CMN_NM(bs3CpuInstr2_mulx_EAX_ECX_EBX_EDX_icebp), false, false, 5, // #9 2197 ~(RTCCUINTXREG)0, ~(RTCCUINTXREG)0, /* -> */ ~(uint32_t)1, 1 }, 2198 { BS3_CMN_NM(bs3CpuInstr2_mulx_ECX_ECX_EBX_EDX_icebp), false, true, 5, // #10 2199 ~(RTCCUINTXREG)0, ~(RTCCUINTXREG)0, /* -> */ ~(uint32_t)1, ~(uint32_t)1 }, 2200 { BS3_CMN_NM(bs3CpuInstr2_mulx_EAX_ECX_EBX_EDX_icebp), false, false, 5, // #11 2201 2, 2, /* -> */ 0, 4 }, 2202 { BS3_CMN_NM(bs3CpuInstr2_mulx_EAX_ECX_EBX_EDX_icebp), false, false, 5, // #12 2203 ~(RTCCUINTXREG)0, 42, /* -> */ 0x29, ~(uint32_t)41 }, 2204 2205 { BS3_CMN_NM(bs3CpuInstr2_mulx_EAX_ECX_FSxBX_EDX_icebp), true, false, 6, // #13 2206 0, 0, /* -> */ 0, 0 }, 2207 { BS3_CMN_NM(bs3CpuInstr2_mulx_EAX_ECX_FSxBX_EDX_icebp), true, false, 6, // #14 2208 ~(RTCCUINTXREG)0, ~(RTCCUINTXREG)0, /* -> */ ~(uint32_t)1, 1 }, 2209 { BS3_CMN_NM(bs3CpuInstr2_mulx_EAX_ECX_FSxBX_EDX_icebp), true, false, 6, // #15 2210 ~(RTCCUINTXREG)0, 42, /* -> */ 0x29, ~(uint32_t)41 }, 2211 }; 2212 2213 BS3REGCTX Ctx; 2214 BS3TRAPFRAME TrapFrame; 2215 unsigned i, j; 2216 uint32_t uStdExtFeatEbx = 0; 2217 bool fSupportsAndN; 2218 2219 if (g_uBs3CpuDetected & BS3CPU_F_CPUID) 2220 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &uStdExtFeatEbx, NULL, NULL); 2221 fSupportsAndN = RT_BOOL(uStdExtFeatEbx & X86_CPUID_STEXT_FEATURE_EBX_BMI2); 2222 2223 /* Ensure the structures are allocated before we sample the stack pointer. */ 2224 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 2225 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 2226 2227 /* 2228 * Create test context. 2229 */ 2230 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 2231 2232 /* 2233 * Do the tests twice, first with all flags set, then once again with 2234 * flags cleared. The flags are not supposed to be touched at all. 2235 */ 2236 Ctx.rflags.u16 |= X86_EFL_STATUS_BITS; 2237 for (j = 0; j < 2; j++) 2238 { 2239 for (i = 0; i < RT_ELEMENTS(s_aTests); i++) 2240 { 2241 bool const fOkay = !BS3_MODE_IS_RM_OR_V86(bMode) && fSupportsAndN; 2242 uint8_t const bExpectXcpt = fOkay ? X86_XCPT_DB : X86_XCPT_UD; 2243 uint64_t uExpectRax, uExpectRcx, uExpectRip; 2244 RTCCUINTXREG uMemSrc1, uMemSrc1Expect; 2245 2246 Ctx.rax.uCcXReg = RTCCUINTXREG_MAX * 1019; 2247 Ctx.rcx.uCcXReg = RTCCUINTXREG_MAX * 4095; 2248 Ctx.rdx.uCcXReg = s_aTests[i].uSrc2; 2249 if (!s_aTests[i].fMemSrc) 2250 { 2251 Ctx.rbx.uCcXReg = s_aTests[i].uSrc1; 2252 uMemSrc1Expect = uMemSrc1 = ~s_aTests[i].uSrc1; 2253 } 2254 else 2255 { 2256 uMemSrc1Expect = uMemSrc1 = s_aTests[i].uSrc1; 2257 Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, &Ctx.fs, &uMemSrc1); 2258 } 2259 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, s_aTests[i].pfnWorker); 2260 uExpectRax = fOkay && !s_aTests[i].fSameDst ? s_aTests[i].uDst1 : Ctx.rax.u; 2261 uExpectRcx = fOkay ? s_aTests[i].uDst2 : Ctx.rcx.u; 2262 uExpectRip = Ctx.rip.u + (fOkay ? s_aTests[i].cbInstr + 1 : 0); 2263 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 2264 2265 if ( TrapFrame.bXcpt != bExpectXcpt 2266 || TrapFrame.Ctx.rip.u != uExpectRip 2267 || TrapFrame.Ctx.rbx.u != Ctx.rbx.u 2268 || TrapFrame.Ctx.rdx.u != Ctx.rdx.u 2269 || TrapFrame.Ctx.rax.u != uExpectRax 2270 || TrapFrame.Ctx.rcx.u != uExpectRcx 2271 /* check that nothing else really changed: */ 2272 || (TrapFrame.Ctx.rflags.u16 & X86_EFL_STATUS_BITS) != (Ctx.rflags.u16 & X86_EFL_STATUS_BITS) 2273 || TrapFrame.Ctx.rsp.u != Ctx.rsp.u 2274 || TrapFrame.Ctx.rbp.u != Ctx.rbp.u 2275 || TrapFrame.Ctx.rsi.u != Ctx.rsi.u 2276 || TrapFrame.Ctx.rdi.u != Ctx.rdi.u 2277 || uMemSrc1 != uMemSrc1Expect 2278 ) 2279 { 2280 Bs3TestFailedF("test #%i failed: input %#" RTCCUINTXREG_XFMT ", %#" RTCCUINTXREG_XFMT, i, s_aTests[i].uSrc1, s_aTests[i].uSrc2); 2281 if (TrapFrame.bXcpt != bExpectXcpt) 2282 Bs3TestFailedF("Expected bXcpt = %#x, got %#x", bExpectXcpt, TrapFrame.bXcpt); 2283 if (TrapFrame.Ctx.rip.u != uExpectRip) 2284 Bs3TestFailedF("Expected RIP = %#06RX64, got %#06RX64", uExpectRip, TrapFrame.Ctx.rip.u); 2285 if (TrapFrame.Ctx.rax.u != uExpectRax) 2286 Bs3TestFailedF("Expected RAX = %#06RX64, got %#06RX64", uExpectRax, TrapFrame.Ctx.rax.u); 2287 if (TrapFrame.Ctx.rcx.u != uExpectRcx) 2288 Bs3TestFailedF("Expected RCX = %#06RX64, got %#06RX64", uExpectRcx, TrapFrame.Ctx.rcx.u); 2289 if (TrapFrame.Ctx.rbx.u != Ctx.rbx.u) 2290 Bs3TestFailedF("Expected RBX = %#06RX64, got %#06RX64 (dst)", Ctx.rbx.u, TrapFrame.Ctx.rbx.u); 2291 if (TrapFrame.Ctx.rdx.u != Ctx.rdx.u) 2292 Bs3TestFailedF("Expected RDX = %#06RX64, got %#06RX64 (src)", Ctx.rdx.u, TrapFrame.Ctx.rdx.u); 2293 2294 if ( (TrapFrame.Ctx.rflags.u16 & X86_EFL_STATUS_BITS) != (Ctx.rflags.u16 & X86_EFL_STATUS_BITS)) 2295 Bs3TestFailedF("Expected EFLAGS = %#06RX32, got %#06RX32 (immutable)", 2296 Ctx.rflags.u16 & X86_EFL_STATUS_BITS, TrapFrame.Ctx.rflags.u16 & X86_EFL_STATUS_BITS); 2297 if (TrapFrame.Ctx.rsp.u != Ctx.rsp.u) 2298 Bs3TestFailedF("Expected RSP = %#06RX64, got %#06RX64", Ctx.rsp.u, TrapFrame.Ctx.rsp.u); 2299 if (TrapFrame.Ctx.rbp.u != Ctx.rbp.u) 2300 Bs3TestFailedF("Expected RBP = %#06RX64, got %#06RX64", Ctx.rbp.u, TrapFrame.Ctx.rbp.u); 2301 if (TrapFrame.Ctx.rsi.u != Ctx.rsi.u) 2302 Bs3TestFailedF("Expected RSI = %#06RX64, got %#06RX64", Ctx.rsi.u, TrapFrame.Ctx.rsi.u); 2303 if (TrapFrame.Ctx.rdi.u != Ctx.rdi.u) 2304 Bs3TestFailedF("Expected RDI = %#06RX64, got %#06RX64", Ctx.rdi.u, TrapFrame.Ctx.rdi.u); 2305 if (uMemSrc1 != uMemSrc1Expect) 2306 Bs3TestFailedF("Expected uMemSrc1 = %#06RX64, got %#06RX64", (uint64_t)uMemSrc1Expect, (uint64_t)uMemSrc1); 2307 } 2308 } 2309 Ctx.rflags.u16 &= ~X86_EFL_STATUS_BITS; 2310 } 2311 2312 return 0; 2313 } 2314 2315 2316 /* 2317 * POPCNT - Intel: POPCNT; AMD: ABM. 2318 */ 2319 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_popcnt)(uint8_t bMode) 2320 { 2321 static const struct 2322 { 2323 FPFNBS3FAR pfnWorker; 2324 bool fMemSrc; 2325 uint8_t cWidth; 2326 uint8_t cbInstr; 2327 RTCCUINTXREG uSrc; 2328 RTCCUINTXREG uDst; 2329 uint16_t fEFlags; 2330 } s_aTests[] = 2331 { 2332 /* 16-bit register width */ 2333 { BS3_CMN_NM(bs3CpuInstr2_popcnt_AX_BX_icebp), false, 16, 4 + (ARCH_BITS != 16), // #0 2334 0, /* -> */ 0, X86_EFL_ZF }, 2335 { BS3_CMN_NM(bs3CpuInstr2_popcnt_AX_BX_icebp), false, 16, 4 + (ARCH_BITS != 16), // #1 2336 ~(RTCCUINTXREG)0, /* -> */ 16, 0 }, 2337 { BS3_CMN_NM(bs3CpuInstr2_popcnt_AX_BX_icebp), false, 16, 4 + (ARCH_BITS != 16), // #2 2338 UINT16_C(0xffff), /* -> */ 16, 0 }, 2339 { BS3_CMN_NM(bs3CpuInstr2_popcnt_AX_BX_icebp), false, 16, 4 + (ARCH_BITS != 16), // #3 2340 UINT16_C(0x0304), /* -> */ 3, 0 }, 2341 { BS3_CMN_NM(bs3CpuInstr2_popcnt_AX_FSxBX_icebp), true, 16, 5 + (ARCH_BITS != 16), // #4 2342 UINT16_C(0xd569), /* -> */ 9, 0}, 2343 { BS3_CMN_NM(bs3CpuInstr2_popcnt_AX_FSxBX_icebp), true, 16, 5 + (ARCH_BITS != 16), // #5 2344 0, /* -> */ 0, X86_EFL_ZF }, 2345 2346 /* 32-bit register width */ 2347 { BS3_CMN_NM(bs3CpuInstr2_popcnt_EAX_EBX_icebp), false, 32, 4 + (ARCH_BITS == 16), // #6 2348 0, /* -> */ 0, X86_EFL_ZF }, 2349 { BS3_CMN_NM(bs3CpuInstr2_popcnt_EAX_EBX_icebp), false, 32, 4 + (ARCH_BITS == 16), // #7 2350 ~(RTCCUINTXREG)0, /* -> */ 32, 0}, 2351 { BS3_CMN_NM(bs3CpuInstr2_popcnt_EAX_EBX_icebp), false, 32, 4 + (ARCH_BITS == 16), // #8 2352 UINT32_C(0x01020304), /* -> */ 5, 0}, 2353 { BS3_CMN_NM(bs3CpuInstr2_popcnt_EAX_FSxBX_icebp), true, 32, 5 + (ARCH_BITS == 16), // #9 2354 0, /* -> */ 0, X86_EFL_ZF }, 2355 { BS3_CMN_NM(bs3CpuInstr2_popcnt_EAX_FSxBX_icebp), true, 32, 5 + (ARCH_BITS == 16), // #10 2356 UINT32_C(0x49760948), /* -> */ 12, 0 }, 2357 2358 #if ARCH_BITS == 64 2359 /* 64-bit register width */ 2360 { BS3_CMN_NM(bs3CpuInstr2_popcnt_RAX_RBX_icebp), false, 64, 5, // #11 2361 0, /* -> */ 0, X86_EFL_ZF }, 2362 { BS3_CMN_NM(bs3CpuInstr2_popcnt_RAX_RBX_icebp), false, 64, 5, // #12 2363 ~(RTCCUINTXREG)0, /* -> */ 64, 0 }, 2364 { BS3_CMN_NM(bs3CpuInstr2_popcnt_RAX_RBX_icebp), false, 64, 5, // #13 2365 UINT64_C(0x1234123412341234), /* -> */ 5*4, 0 }, 2366 { BS3_CMN_NM(bs3CpuInstr2_popcnt_RAX_FSxBX_icebp), true, 64, 6, // #14 2367 0, /* -> */ 0, X86_EFL_ZF }, 2368 { BS3_CMN_NM(bs3CpuInstr2_popcnt_RAX_FSxBX_icebp), true, 64, 6, // #15 2369 ~(RTCCUINTXREG)0, /* -> */ 64, 0 }, 2370 { BS3_CMN_NM(bs3CpuInstr2_popcnt_RAX_FSxBX_icebp), true, 64, 6, // #16 2371 UINT64_C(0x5908760293769087), /* -> */ 26, 0 }, 2372 #endif 2373 }; 2374 2375 BS3REGCTX Ctx; 2376 BS3TRAPFRAME TrapFrame; 2377 unsigned i, j; 2378 bool const fSupportsPopCnt = (g_uBs3CpuDetected & BS3CPU_F_CPUID) 2379 && (ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_POPCNT); 2380 2381 /* Ensure the structures are allocated before we sample the stack pointer. */ 2382 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 2383 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 2384 2385 /* 2386 * Create test context. 2387 */ 2388 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 2389 2390 /* 2391 * Do the tests twice, first with all flags set, then once again with 2392 * flags cleared. The flags are not supposed to be touched at all. 2393 */ 2394 Ctx.rflags.u16 |= X86_EFL_STATUS_BITS; 2395 for (j = 0; j < 2; j++) 2396 { 2397 for (i = 0; i < RT_ELEMENTS(s_aTests); i++) 2398 { 2399 bool const fOkay = fSupportsPopCnt; 2400 uint8_t const bExpectXcpt = fOkay ? X86_XCPT_DB : X86_XCPT_UD; 2401 uint64_t uExpectRax, uExpectRip; 2402 RTCCUINTXREG uMemSrc, uMemSrcExpect; 2403 2404 Ctx.rax.uCcXReg = RTCCUINTXREG_MAX * 1019; 2405 if (!s_aTests[i].fMemSrc) 2406 { 2407 Ctx.rbx.uCcXReg = s_aTests[i].uSrc; 2408 uMemSrcExpect = uMemSrc = ~s_aTests[i].uSrc; 2409 } 2410 else 2411 { 2412 uMemSrcExpect = uMemSrc = s_aTests[i].uSrc; 2413 Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, &Ctx.fs, &uMemSrc); 2414 } 2415 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, s_aTests[i].pfnWorker); 2416 uExpectRax = fOkay ? s_aTests[i].uDst : Ctx.rax.u; 2417 if (s_aTests[i].cWidth == 16) 2418 uExpectRax = (uExpectRax & UINT16_MAX) | (Ctx.rax.u & ~(uint64_t)UINT16_MAX); 2419 2420 uExpectRip = Ctx.rip.u + (fOkay ? s_aTests[i].cbInstr + 1 : 0); 2421 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 2422 2423 if ( TrapFrame.bXcpt != bExpectXcpt 2424 || TrapFrame.Ctx.rip.u != uExpectRip 2425 || TrapFrame.Ctx.rbx.u != Ctx.rbx.u 2426 || TrapFrame.Ctx.rax.u != uExpectRax 2427 || (TrapFrame.Ctx.rflags.u16 & X86_EFL_STATUS_BITS) != (fOkay ? s_aTests[i].fEFlags : Ctx.rflags.u16) 2428 /* check that nothing else really changed: */ 2429 || TrapFrame.Ctx.rcx.u != Ctx.rcx.u 2430 || TrapFrame.Ctx.rdx.u != Ctx.rdx.u 2431 || TrapFrame.Ctx.rsp.u != Ctx.rsp.u 2432 || TrapFrame.Ctx.rbp.u != Ctx.rbp.u 2433 || TrapFrame.Ctx.rsi.u != Ctx.rsi.u 2434 || TrapFrame.Ctx.rdi.u != Ctx.rdi.u 2435 || uMemSrc != uMemSrcExpect 2436 ) 2437 { 2438 Bs3TestFailedF("test #%i failed: input %#" RTCCUINTXREG_XFMT, i, s_aTests[i].uSrc); 2439 if (TrapFrame.bXcpt != bExpectXcpt) 2440 Bs3TestFailedF("Expected bXcpt = %#x, got %#x", bExpectXcpt, TrapFrame.bXcpt); 2441 if (TrapFrame.Ctx.rip.u != uExpectRip) 2442 Bs3TestFailedF("Expected RIP = %#06RX64, got %#06RX64", uExpectRip, TrapFrame.Ctx.rip.u); 2443 if (TrapFrame.Ctx.rax.u != uExpectRax) 2444 Bs3TestFailedF("Expected RAX = %#06RX64, got %#06RX64", uExpectRax, TrapFrame.Ctx.rax.u); 2445 if (TrapFrame.Ctx.rbx.u != Ctx.rbx.u) 2446 Bs3TestFailedF("Expected RBX = %#06RX64, got %#06RX64 (dst)", Ctx.rbx.u, TrapFrame.Ctx.rbx.u); 2447 if ((TrapFrame.Ctx.rflags.u16 & X86_EFL_STATUS_BITS) != (fOkay ? s_aTests[i].fEFlags : Ctx.rflags.u16)) 2448 Bs3TestFailedF("Expected EFLAGS = %#06RX32, got %#06RX32", 2449 fOkay ? s_aTests[i].fEFlags : Ctx.rflags.u16, TrapFrame.Ctx.rflags.u16 & X86_EFL_STATUS_BITS); 2450 2451 if (TrapFrame.Ctx.rcx.u != Ctx.rcx.u) 2452 Bs3TestFailedF("Expected RCX = %#06RX64, got %#06RX64", Ctx.rcx.u, TrapFrame.Ctx.rcx.u); 2453 if (TrapFrame.Ctx.rdx.u != Ctx.rdx.u) 2454 Bs3TestFailedF("Expected RDX = %#06RX64, got %#06RX64 (src)", Ctx.rdx.u, TrapFrame.Ctx.rdx.u); 2455 if (TrapFrame.Ctx.rsp.u != Ctx.rsp.u) 2456 Bs3TestFailedF("Expected RSP = %#06RX64, got %#06RX64", Ctx.rsp.u, TrapFrame.Ctx.rsp.u); 2457 if (TrapFrame.Ctx.rbp.u != Ctx.rbp.u) 2458 Bs3TestFailedF("Expected RBP = %#06RX64, got %#06RX64", Ctx.rbp.u, TrapFrame.Ctx.rbp.u); 2459 if (TrapFrame.Ctx.rsi.u != Ctx.rsi.u) 2460 Bs3TestFailedF("Expected RSI = %#06RX64, got %#06RX64", Ctx.rsi.u, TrapFrame.Ctx.rsi.u); 2461 if (TrapFrame.Ctx.rdi.u != Ctx.rdi.u) 2462 Bs3TestFailedF("Expected RDI = %#06RX64, got %#06RX64", Ctx.rdi.u, TrapFrame.Ctx.rdi.u); 2463 if (uMemSrc != uMemSrcExpect) 2464 Bs3TestFailedF("Expected uMemSrc = %#06RX64, got %#06RX64", (uint64_t)uMemSrcExpect, (uint64_t)uMemSrc); 2465 } 2466 } 2467 Ctx.rflags.u16 &= ~X86_EFL_STATUS_BITS; 2468 } 2469 2470 return 0; 2471 } 2472 2473 /* 2474 * 2475 */ 2476 # if ARCH_BITS == 64 2477 2478 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_cmpxchg16b)(uint8_t bMode) 2479 { 2480 BS3REGCTX Ctx; 2481 BS3REGCTX ExpectCtx; 2482 BS3TRAPFRAME TrapFrame; 2483 RTUINT128U au128[3]; 2484 PRTUINT128U pau128 = RT_ALIGN_PT(&au128[0], sizeof(RTUINT128U), PRTUINT128U); 2485 bool const fSupportCX16 = RT_BOOL(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16); 2486 unsigned iFlags; 2487 unsigned offBuf; 2488 unsigned iMatch; 2489 unsigned iWorker; 2490 static struct 2491 { 2492 bool fLocked; 2493 uint8_t offUd2; 2494 FNBS3FAR *pfnWorker; 2495 } const s_aWorkers[] = 2496 { 2497 { false, 4, BS3_CMN_NM(bs3CpuInstr2_cmpxchg16b_rdi_ud2) }, 2498 { false, 5, BS3_CMN_NM(bs3CpuInstr2_o16_cmpxchg16b_rdi_ud2) }, 2499 { false, 5, BS3_CMN_NM(bs3CpuInstr2_repz_cmpxchg16b_rdi_ud2) }, 2500 { false, 5, BS3_CMN_NM(bs3CpuInstr2_repnz_cmpxchg16b_rdi_ud2) }, 2501 { true, 1+4, BS3_CMN_NM(bs3CpuInstr2_lock_cmpxchg16b_rdi_ud2) }, 2502 { true, 1+5, BS3_CMN_NM(bs3CpuInstr2_lock_o16_cmpxchg16b_rdi_ud2) }, 2503 { true, 1+5, BS3_CMN_NM(bs3CpuInstr2_lock_repz_cmpxchg16b_rdi_ud2) }, 2504 { true, 1+5, BS3_CMN_NM(bs3CpuInstr2_lock_repnz_cmpxchg16b_rdi_ud2) }, 2505 }; 2506 2507 /* Ensure the structures are allocated before we sample the stack pointer. */ 2508 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 2509 Bs3MemSet(&ExpectCtx, 0, sizeof(ExpectCtx)); 2510 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 2511 Bs3MemSet(pau128, 0, sizeof(pau128[0]) * 2); 2512 2513 /* 2514 * Create test context. 2515 */ 2516 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 2517 if (!fSupportCX16) 2518 Bs3TestPrintf("Note! CMPXCHG16B is not supported by the CPU!\n"); 2519 2520 /* 2521 * One loop with the normal variant and one with the locked one 2522 */ 2523 g_usBs3TestStep = 0; 2524 for (iWorker = 0; iWorker < RT_ELEMENTS(s_aWorkers); iWorker++) 2525 { 2526 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, s_aWorkers[iWorker].pfnWorker); 2527 2528 /* 2529 * One loop with all status flags set, and one with them clear. 2530 */ 2531 Ctx.rflags.u16 |= X86_EFL_STATUS_BITS; 2532 for (iFlags = 0; iFlags < 2; iFlags++) 2533 { 2534 Bs3MemCpy(&ExpectCtx, &Ctx, sizeof(ExpectCtx)); 2535 2536 for (offBuf = 0; offBuf < sizeof(RTUINT128U); offBuf++) 2537 { 2538 # define CX16_OLD_LO UINT64_C(0xabb6345dcc9c4bbd) 2539 # define CX16_OLD_HI UINT64_C(0x7b06ea35749549ab) 2540 # define CX16_MISMATCH_LO UINT64_C(0xbace3e3590f18981) 2541 # define CX16_MISMATCH_HI UINT64_C(0x9b385e8bfd5b4000) 2542 # define CX16_STORE_LO UINT64_C(0x5cbd27d251f6559b) 2543 # define CX16_STORE_HI UINT64_C(0x17ff434ed1b54963) 2544 2545 PRTUINT128U pBuf = (PRTUINT128U)&pau128->au8[offBuf]; 2546 2547 ExpectCtx.rax.u = Ctx.rax.u = CX16_MISMATCH_LO; 2548 ExpectCtx.rdx.u = Ctx.rdx.u = CX16_MISMATCH_HI; 2549 for (iMatch = 0; iMatch < 2; iMatch++) 2550 { 2551 uint8_t bExpectXcpt; 2552 pBuf->s.Lo = CX16_OLD_LO; 2553 pBuf->s.Hi = CX16_OLD_HI; 2554 ExpectCtx.rdi.u = Ctx.rdi.u = (uintptr_t)pBuf; 2555 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 2556 g_usBs3TestStep++; 2557 //Bs3TestPrintf("Test: iFlags=%d offBuf=%d iMatch=%u iWorker=%u\n", iFlags, offBuf, iMatch, iWorker); 2558 bExpectXcpt = X86_XCPT_UD; 2559 if (fSupportCX16) 2560 { 2561 if (offBuf & 15) 2562 { 2563 bExpectXcpt = X86_XCPT_GP; 2564 ExpectCtx.rip.u = Ctx.rip.u; 2565 ExpectCtx.rflags.u32 = Ctx.rflags.u32; 2566 } 2567 else 2568 { 2569 ExpectCtx.rax.u = CX16_OLD_LO; 2570 ExpectCtx.rdx.u = CX16_OLD_HI; 2571 if (iMatch & 1) 2572 ExpectCtx.rflags.u32 = Ctx.rflags.u32 | X86_EFL_ZF; 2573 else 2574 ExpectCtx.rflags.u32 = Ctx.rflags.u32 & ~X86_EFL_ZF; 2575 ExpectCtx.rip.u = Ctx.rip.u + s_aWorkers[iWorker].offUd2; 2576 } 2577 ExpectCtx.rflags.u32 |= X86_EFL_RF; 2578 } 2579 if ( !Bs3TestCheckRegCtxEx(&TrapFrame.Ctx, &ExpectCtx, 0 /*cbPcAdjust*/, 0 /*cbSpAdjust*/, 2580 0 /*fExtraEfl*/, "lm64", 0 /*idTestStep*/) 2581 || TrapFrame.bXcpt != bExpectXcpt) 2582 { 2583 if (TrapFrame.bXcpt != bExpectXcpt) 2584 Bs3TestFailedF("Expected bXcpt=#%x, got %#x (%#x)", bExpectXcpt, TrapFrame.bXcpt, TrapFrame.uErrCd); 2585 Bs3TestFailedF("^^^ iWorker=%d iFlags=%d offBuf=%d iMatch=%u\n", iWorker, iFlags, offBuf, iMatch); 2586 ASMHalt(); 2587 } 2588 2589 ExpectCtx.rax.u = Ctx.rax.u = CX16_OLD_LO; 2590 ExpectCtx.rdx.u = Ctx.rdx.u = CX16_OLD_HI; 2591 } 2592 } 2593 Ctx.rflags.u16 &= ~X86_EFL_STATUS_BITS; 2594 } 2595 } 2596 2597 return 0; 2598 } 2599 2600 2601 static void bs3CpuInstr2_fsgsbase_ExpectUD(uint8_t bMode, PBS3REGCTX pCtx, PBS3REGCTX pExpectCtx, PBS3TRAPFRAME pTrapFrame) 2602 { 2603 pCtx->rbx.u = 0; 2604 Bs3MemCpy(pExpectCtx, pCtx, sizeof(*pExpectCtx)); 2605 Bs3TrapSetJmpAndRestore(pCtx, pTrapFrame); 2606 pExpectCtx->rip.u = pCtx->rip.u; 2607 pExpectCtx->rflags.u32 |= X86_EFL_RF; 2608 if ( !Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pExpectCtx, 0 /*cbPcAdjust*/, 0 /*cbSpAdjust*/, 0 /*fExtraEfl*/, "lm64", 2609 0 /*idTestStep*/) 2610 || pTrapFrame->bXcpt != X86_XCPT_UD) 2611 { 2612 Bs3TestFailedF("Expected #UD, got %#x (%#x)", pTrapFrame->bXcpt, pTrapFrame->uErrCd); 2613 ASMHalt(); 2614 } 2615 } 2616 2617 2618 static bool bs3CpuInstr2_fsgsbase_VerifyWorker(uint8_t bMode, PBS3REGCTX pCtx, PBS3REGCTX pExpectCtx, PBS3TRAPFRAME pTrapFrame, 2619 BS3CI2FSGSBASE const *pFsGsBaseWorker, unsigned *puIter) 2620 { 2621 bool fPassed = true; 2622 unsigned iValue = 0; 2623 static const struct 2624 { 2625 bool fGP; 2626 uint64_t u64Base; 2627 } s_aValues64[] = 2628 { 2629 { false, UINT64_C(0x0000000000000000) }, 2630 { false, UINT64_C(0x0000000000000001) }, 2631 { false, UINT64_C(0x0000000000000010) }, 2632 { false, UINT64_C(0x0000000000000123) }, 2633 { false, UINT64_C(0x0000000000001234) }, 2634 { false, UINT64_C(0x0000000000012345) }, 2635 { false, UINT64_C(0x0000000000123456) }, 2636 { false, UINT64_C(0x0000000001234567) }, 2637 { false, UINT64_C(0x0000000012345678) }, 2638 { false, UINT64_C(0x0000000123456789) }, 2639 { false, UINT64_C(0x000000123456789a) }, 2640 { false, UINT64_C(0x00000123456789ab) }, 2641 { false, UINT64_C(0x0000123456789abc) }, 2642 { false, UINT64_C(0x00007ffffeefefef) }, 2643 { false, UINT64_C(0x00007fffffffffff) }, 2644 { true, UINT64_C(0x0000800000000000) }, 2645 { true, UINT64_C(0x0000800000000000) }, 2646 { true, UINT64_C(0x0000800000000333) }, 2647 { true, UINT64_C(0x0001000000000000) }, 2648 { true, UINT64_C(0x0012000000000000) }, 2649 { true, UINT64_C(0x0123000000000000) }, 2650 { true, UINT64_C(0x1234000000000000) }, 2651 { true, UINT64_C(0xffff300000000000) }, 2652 { true, UINT64_C(0xffff7fffffffffff) }, 2653 { true, UINT64_C(0xffff7fffffffffff) }, 2654 { false, UINT64_C(0xffff800000000000) }, 2655 { false, UINT64_C(0xffffffffffeefefe) }, 2656 { false, UINT64_C(0xffffffffffffffff) }, 2657 { false, UINT64_C(0xffffffffffffffff) }, 2658 { false, UINT64_C(0x00000000efefefef) }, 2659 { false, UINT64_C(0x0000000080204060) }, 2660 { false, UINT64_C(0x00000000ddeeffaa) }, 2661 { false, UINT64_C(0x00000000fdecdbca) }, 2662 { false, UINT64_C(0x000000006098456b) }, 2663 { false, UINT64_C(0x0000000098506099) }, 2664 { false, UINT64_C(0x00000000206950bc) }, 2665 { false, UINT64_C(0x000000009740395d) }, 2666 { false, UINT64_C(0x0000000064a9455e) }, 2667 { false, UINT64_C(0x00000000d20b6eff) }, 2668 { false, UINT64_C(0x0000000085296d46) }, 2669 { false, UINT64_C(0x0000000007000039) }, 2670 { false, UINT64_C(0x000000000007fe00) }, 2671 }; 2672 2673 Bs3RegCtxSetRipCsFromCurPtr(pCtx, pFsGsBaseWorker->pfnVerifyWorker); 2674 if (pFsGsBaseWorker->f64BitOperand) 2675 { 2676 for (iValue = 0; iValue < RT_ELEMENTS(s_aValues64); iValue++) 2677 { 2678 bool const fGP = s_aValues64[iValue].fGP; 2679 2680 pCtx->rbx.u = s_aValues64[iValue].u64Base; 2681 pCtx->rcx.u = 0; 2682 pCtx->cr4.u |= X86_CR4_FSGSBASE; 2683 Bs3MemCpy(pExpectCtx, pCtx, sizeof(*pExpectCtx)); 2684 Bs3TrapSetJmpAndRestore(pCtx, pTrapFrame); 2685 pExpectCtx->rip.u = pCtx->rip.u + (!fGP ? pFsGsBaseWorker->offVerifyWorkerUd2 : 0); 2686 pExpectCtx->rbx.u = !fGP ? 0 : s_aValues64[iValue].u64Base; 2687 pExpectCtx->rcx.u = !fGP ? s_aValues64[iValue].u64Base : 0; 2688 pExpectCtx->rflags.u32 |= X86_EFL_RF; 2689 if ( !Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pExpectCtx, 0 /*cbPcAdjust*/, 0 /*cbSpAdjust*/, 2690 0 /*fExtraEfl*/, "lm64", 0 /*idTestStep*/) 2691 || (fGP && pTrapFrame->bXcpt != X86_XCPT_GP)) 2692 { 2693 if (fGP && pTrapFrame->bXcpt != X86_XCPT_GP) 2694 Bs3TestFailedF("Expected #GP, got %#x (%#x)", pTrapFrame->bXcpt, pTrapFrame->uErrCd); 2695 else 2696 Bs3TestFailedF("iValue=%u\n", iValue); 2697 fPassed = false; 2698 break; 2699 } 2700 } 2701 } 2702 else 2703 { 2704 for (iValue = 0; iValue < RT_ELEMENTS(s_aValues64); iValue++) 2705 { 2706 pCtx->rbx.u = s_aValues64[iValue].u64Base; 2707 pCtx->rcx.u = ~s_aValues64[iValue].u64Base; 2708 pCtx->cr4.u |= X86_CR4_FSGSBASE; 2709 Bs3MemCpy(pExpectCtx, pCtx, sizeof(*pExpectCtx)); 2710 Bs3TrapSetJmpAndRestore(pCtx, pTrapFrame); 2711 pExpectCtx->rip.u = pCtx->rip.u + pFsGsBaseWorker->offVerifyWorkerUd2; 2712 pExpectCtx->rbx.u = 0; 2713 pExpectCtx->rcx.u = s_aValues64[iValue].u64Base & UINT64_C(0x00000000ffffffff); 2714 pExpectCtx->rflags.u32 |= X86_EFL_RF; 2715 if (!Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pExpectCtx, 0 /*cbPcAdjust*/, 0 /*cbSpAdjust*/, 2716 0 /*fExtraEfl*/, "lm64", 0 /*idTestStep*/)) 2717 { 2718 Bs3TestFailedF("iValue=%u\n", iValue); 2719 fPassed = false; 2720 break; 2721 } 2722 } 2723 } 2724 2725 *puIter = iValue; 2726 return fPassed; 2727 } 2728 2729 2730 static void bs3CpuInstr2_rdfsbase_rdgsbase_Common(uint8_t bMode, BS3CI2FSGSBASE const *paFsGsBaseWorkers, 2731 unsigned cFsGsBaseWorkers, uint32_t idxFsGsBaseMsr) 2732 { 2733 BS3REGCTX Ctx; 2734 BS3REGCTX ExpectCtx; 2735 BS3TRAPFRAME TrapFrame; 2736 unsigned iWorker; 2737 unsigned iIter; 2738 uint32_t uDummy; 2739 uint32_t uStdExtFeatEbx; 2740 bool fSupportsFsGsBase; 2741 2742 ASMCpuId_Idx_ECX(7, 0, &uDummy, &uStdExtFeatEbx, &uDummy, &uDummy); 2743 fSupportsFsGsBase = RT_BOOL(uStdExtFeatEbx & X86_CPUID_STEXT_FEATURE_EBX_FSGSBASE); 2744 2745 /* Ensure the structures are allocated before we sample the stack pointer. */ 2746 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 2747 Bs3MemSet(&ExpectCtx, 0, sizeof(ExpectCtx)); 2748 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 2749 2750 /* 2751 * Create test context. 2752 */ 2753 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 2754 2755 for (iWorker = 0; iWorker < cFsGsBaseWorkers; iWorker++) 2756 { 2757 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, paFsGsBaseWorkers[iWorker].pfnWorker); 2758 if (fSupportsFsGsBase) 2759 { 2760 uint64_t const uBaseAddr = ASMRdMsr(idxFsGsBaseMsr); 2761 2762 /* CR4.FSGSBASE disabled -> #UD. */ 2763 Ctx.cr4.u &= ~X86_CR4_FSGSBASE; 2764 bs3CpuInstr2_fsgsbase_ExpectUD(bMode, &Ctx, &ExpectCtx, &TrapFrame); 2765 2766 /* Read and verify existing base address. */ 2767 Ctx.rbx.u = 0; 2768 Ctx.cr4.u |= X86_CR4_FSGSBASE; 2769 Bs3MemCpy(&ExpectCtx, &Ctx, sizeof(ExpectCtx)); 2770 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 2771 ExpectCtx.rip.u = Ctx.rip.u + paFsGsBaseWorkers[iWorker].offWorkerUd2; 2772 ExpectCtx.rbx.u = uBaseAddr; 2773 ExpectCtx.rflags.u32 |= X86_EFL_RF; 2774 if (!Bs3TestCheckRegCtxEx(&TrapFrame.Ctx, &ExpectCtx, 0 /*cbPcAdjust*/, 0 /*cbSpAdjust*/, 0 /*fExtraEfl*/, "lm64", 2775 0 /*idTestStep*/)) 2776 { 2777 ASMHalt(); 2778 } 2779 2780 /* Write, read and verify series of base addresses. */ 2781 if (!bs3CpuInstr2_fsgsbase_VerifyWorker(bMode, &Ctx, &ExpectCtx, &TrapFrame, &paFsGsBaseWorkers[iWorker], &iIter)) 2782 { 2783 Bs3TestFailedF("^^^ %s: iWorker=%u iIter=%u\n", paFsGsBaseWorkers[iWorker].pszDesc, iWorker, iIter); 2784 ASMHalt(); 2785 } 2786 2787 /* Restore original base address. */ 2788 ASMWrMsr(idxFsGsBaseMsr, uBaseAddr); 2789 2790 /* Clean used GPRs. */ 2791 Ctx.rbx.u = 0; 2792 Ctx.rcx.u = 0; 2793 } 2794 else 2795 { 2796 /* Unsupported by CPUID -> #UD. */ 2797 Bs3TestPrintf("Note! FSGSBASE is not supported by the CPU!\n"); 2798 bs3CpuInstr2_fsgsbase_ExpectUD(bMode, &Ctx, &ExpectCtx, &TrapFrame); 2799 } 2800 } 2801 } 2802 2803 2804 static void bs3CpuInstr2_wrfsbase_wrgsbase_Common(uint8_t bMode, BS3CI2FSGSBASE const *paFsGsBaseWorkers, 2805 unsigned cFsGsBaseWorkers, uint32_t idxFsGsBaseMsr) 2806 { 2807 BS3REGCTX Ctx; 2808 BS3REGCTX ExpectCtx; 2809 BS3TRAPFRAME TrapFrame; 2810 unsigned iWorker; 2811 unsigned iIter; 2812 uint32_t uDummy; 2813 uint32_t uStdExtFeatEbx; 2814 bool fSupportsFsGsBase; 2815 2816 ASMCpuId_Idx_ECX(7, 0, &uDummy, &uStdExtFeatEbx, &uDummy, &uDummy); 2817 fSupportsFsGsBase = RT_BOOL(uStdExtFeatEbx & X86_CPUID_STEXT_FEATURE_EBX_FSGSBASE); 2818 2819 /* Ensure the structures are allocated before we sample the stack pointer. */ 2820 Bs3MemSet(&Ctx, 0, sizeof(Ctx)); 2821 Bs3MemSet(&ExpectCtx, 0, sizeof(ExpectCtx)); 2822 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame)); 2823 2824 /* 2825 * Create test context. 2826 */ 2827 Bs3RegCtxSaveEx(&Ctx, bMode, 512); 2828 2829 for (iWorker = 0; iWorker < cFsGsBaseWorkers; iWorker++) 2830 { 2831 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, paFsGsBaseWorkers[iWorker].pfnWorker); 2832 if (fSupportsFsGsBase) 2833 { 2834 uint64_t const uBaseAddr = ASMRdMsr(idxFsGsBaseMsr); 2835 2836 /* CR4.FSGSBASE disabled -> #UD. */ 2837 Ctx.cr4.u &= ~X86_CR4_FSGSBASE; 2838 bs3CpuInstr2_fsgsbase_ExpectUD(bMode, &Ctx, &ExpectCtx, &TrapFrame); 2839 2840 /* Write a base address. */ 2841 Ctx.rbx.u = 0xa0000; 2842 Ctx.cr4.u |= X86_CR4_FSGSBASE; 2843 Bs3MemCpy(&ExpectCtx, &Ctx, sizeof(ExpectCtx)); 2844 Bs3TrapSetJmpAndRestore(&Ctx, &TrapFrame); 2845 ExpectCtx.rip.u = Ctx.rip.u + paFsGsBaseWorkers[iWorker].offWorkerUd2; 2846 ExpectCtx.rflags.u32 |= X86_EFL_RF; 2847 if (!Bs3TestCheckRegCtxEx(&TrapFrame.Ctx, &ExpectCtx, 0 /*cbPcAdjust*/, 0 /*cbSpAdjust*/, 0 /*fExtraEfl*/, "lm64", 2848 0 /*idTestStep*/)) 2849 { 2850 ASMHalt(); 2851 } 2852 2853 /* Write and read back series of base addresses. */ 2854 if (!bs3CpuInstr2_fsgsbase_VerifyWorker(bMode, &Ctx, &ExpectCtx, &TrapFrame, &paFsGsBaseWorkers[iWorker], &iIter)) 2855 { 2856 Bs3TestFailedF("^^^ %s: iWorker=%u iIter=%u\n", paFsGsBaseWorkers[iWorker].pszDesc, iWorker, iIter); 2857 ASMHalt(); 2858 } 2859 2860 /* Restore original base address. */ 2861 ASMWrMsr(idxFsGsBaseMsr, uBaseAddr); 2862 2863 /* Clean used GPRs. */ 2864 Ctx.rbx.u = 0; 2865 Ctx.rcx.u = 0; 2866 } 2867 else 2868 { 2869 /* Unsupported by CPUID -> #UD. */ 2870 Bs3TestPrintf("Note! FSGSBASE is not supported by the CPU!\n"); 2871 bs3CpuInstr2_fsgsbase_ExpectUD(bMode, &Ctx, &ExpectCtx, &TrapFrame); 2872 } 2873 } 2874 } 2875 2876 2877 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_wrfsbase)(uint8_t bMode) 2878 { 2879 bs3CpuInstr2_wrfsbase_wrgsbase_Common(bMode, s_aWrFsBaseWorkers, RT_ELEMENTS(s_aWrFsBaseWorkers), MSR_K8_FS_BASE); 2880 return 0; 2881 } 2882 2883 2884 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_wrgsbase)(uint8_t bMode) 2885 { 2886 bs3CpuInstr2_wrfsbase_wrgsbase_Common(bMode, s_aWrGsBaseWorkers, RT_ELEMENTS(s_aWrGsBaseWorkers), MSR_K8_GS_BASE); 2887 return 0; 2888 } 2889 2890 2891 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_rdfsbase)(uint8_t bMode) 2892 { 2893 bs3CpuInstr2_rdfsbase_rdgsbase_Common(bMode, s_aRdFsBaseWorkers, RT_ELEMENTS(s_aRdFsBaseWorkers), MSR_K8_FS_BASE); 2894 return 0; 2895 } 2896 2897 2898 BS3_DECL_FAR(uint8_t) BS3_CMN_NM(bs3CpuInstr2_rdgsbase)(uint8_t bMode) 2899 { 2900 bs3CpuInstr2_rdfsbase_rdgsbase_Common(bMode, s_aRdGsBaseWorkers, RT_ELEMENTS(s_aRdGsBaseWorkers), MSR_K8_GS_BASE); 2901 return 0; 2902 } 2903 2904 # endif /* ARCH_BITS == 64 */ 659 660 static BS3CPUINSTR3_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR3_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64); 661 unsigned const iTest = BS3CPUINSTR3_TEST1_MODES_INDEX(bMode); 662 return bs3CpuInstr3_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests, 663 g_aXcptConfig4, RT_ELEMENTS(g_aXcptConfig4)); 664 } 665 2905 666 2906 667 #endif /* BS3_INSTANTIATING_CMN */ -
trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-3-template.mac
r95361 r95373 1 1 ; $Id$ 2 2 ;; @file 3 ; BS3Kit - bs3-cpu-instr- 2assembly template.3 ; BS3Kit - bs3-cpu-instr-3, SSE and AVX instructions, assembly template. 4 4 ; 5 5 … … 44 44 %ifdef BS3_INSTANTIATING_CMN 45 45 46 BS3_PROC_BEGIN_CMN bs3CpuInstr2_mul_xBX_ud2, BS3_PBC_NEAR47 mul xBX48 .again:49 ud250 jmp .again51 BS3_PROC_END_CMN bs3CpuInstr2_mul_xBX_ud252 53 54 BS3_PROC_BEGIN_CMN bs3CpuInstr2_imul_xBX_ud2, BS3_PBC_NEAR55 imul xBX56 .again:57 ud258 jmp .again59 BS3_PROC_END_CMN bs3CpuInstr2_imul_xBX_ud260 61 62 BS3_PROC_BEGIN_CMN bs3CpuInstr2_imul_xCX_xBX_ud2, BS3_PBC_NEAR63 imul xCX, xBX64 .again:65 ud266 jmp .again67 BS3_PROC_END_CMN bs3CpuInstr2_imul_xCX_xBX_ud268 69 70 BS3_PROC_BEGIN_CMN bs3CpuInstr2_div_xBX_ud2, BS3_PBC_NEAR71 div xBX72 .again:73 ud274 jmp .again75 BS3_PROC_END_CMN bs3CpuInstr2_div_xBX_ud276 77 78 BS3_PROC_BEGIN_CMN bs3CpuInstr2_idiv_xBX_ud2, BS3_PBC_NEAR79 idiv xBX80 .again:81 ud282 jmp .again83 BS3_PROC_END_CMN bs3CpuInstr2_idiv_xBX_ud284 85 86 46 ; 87 ; BSF / BSR / TZCNT / LZCNT47 ; XORPS (SSE2) & VXORPS (AVX) 88 48 ; 89 %ifndef EMIT_BITSCAN_DEFINED 90 %define EMIT_BITSCAN_DEFINED 91 %macro EMIT_BITSCAN 3 92 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %3 %+ _AX_BX_ud2, BS3_PBC_NEAR 93 %2 94 %1 ax, bx 95 .again: 96 ud2 97 jmp .again 98 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %3 %+ _AX_BX_ud2 99 100 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %3 %+ _AX_FSxBX_ud2, BS3_PBC_NEAR 101 %2 102 %1 ax, [fs:xBX] 103 .again: 104 ud2 105 jmp .again 106 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %3 %+ _AX_FSxBX_ud2 107 108 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %3 %+ _EAX_EBX_ud2, BS3_PBC_NEAR 109 %2 110 %1 eax, ebx 111 .again: 112 ud2 113 jmp .again 114 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %3 %+ _EAX_EBX_ud2 115 116 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %3 %+ _EAX_FSxBX_ud2, BS3_PBC_NEAR 117 %2 118 %1 eax, [fs:xBX] 119 .again: 120 ud2 121 jmp .again 122 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %3 %+ _EAX_FSxBX_ud2 123 124 %if TMPL_BITS == 64 125 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %3 %+ _RAX_RBX_ud2, BS3_PBC_NEAR 126 %2 127 %1 rax, rbx 128 .again: 129 ud2 130 jmp .again 131 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %3 %+ _RAX_RBX_ud2 132 133 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %3 %+ _RAX_FSxBX_ud2, BS3_PBC_NEAR 134 %2 135 %1 rax, [fs:xBX] 136 .again: 137 ud2 138 jmp .again 139 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %3 %+ _RAX_FSxBX_ud2 140 %endif 141 %endmacro 142 %endif 143 144 EMIT_BITSCAN bsf, .ignored:, bsf 145 EMIT_BITSCAN bsr, .ignored:, bsr 146 EMIT_BITSCAN tzcnt, .ignored:, tzcnt 147 EMIT_BITSCAN lzcnt, .ignored:, lzcnt 148 EMIT_BITSCAN bsf, db 0f2h, f2_bsf 149 EMIT_BITSCAN bsr, db 0f2h, f2_bsr 150 EMIT_BITSCAN tzcnt, db 0f2h, f2_tzcnt 151 EMIT_BITSCAN lzcnt, db 0f2h, f2_lzcnt 152 153 154 ; 155 ; RORX - VEX instruction with a couple of questions about non-standard encodings. 156 ; 157 ;;%define icebp ud2 158 BS3_PROC_BEGIN_CMN bs3CpuInstr2_rorx_EBX_EDX_2_icebp, BS3_PBC_NEAR 159 rorx ebx, edx, 2 49 BS3_PROC_BEGIN_CMN bs3CpuInstr3_xorps_XMM1_XMM2_icebp, BS3_PBC_NEAR 50 xorps xmm1, xmm2 160 51 .again: 161 52 icebp 162 53 jmp .again 163 BS3_PROC_END_CMN bs3CpuInstr 2_rorx_EBX_EDX_2_icebp54 BS3_PROC_END_CMN bs3CpuInstr3_xorps_XMM1_XMM2_icebp 164 55 165 BS3_PROC_BEGIN_CMN bs3CpuInstr2_rorx_RBX_RDX_2_icebp, BS3_PBC_NEAR 166 %if TMPL_BITS == 64 167 rorx rbx, rdx, 2 168 %else 169 db 0C4h,0E3h,0FBh,0F0h,0DAh,002h ; 32-bit ignores VEX.W=1 (10980xe) 170 %endif 56 BS3_PROC_BEGIN_CMN bs3CpuInstr3_xorps_XMM1_FSxBX_icebp, BS3_PBC_NEAR 57 xorps xmm1, [fs:xBX] 171 58 .again: 172 59 icebp 173 60 jmp .again 174 BS3_PROC_END_CMN bs3CpuInstr 2_rorx_RBX_RDX_2_icebp61 BS3_PROC_END_CMN bs3CpuInstr3_xorps_XMM1_FSxBX_icebp 175 62 176 BS3_PROC_BEGIN_CMN bs3CpuInstr 2_rorx_EBX_EDX_2_icebp_L1, BS3_PBC_NEAR177 db 0C4h, 0E3h, 07Bh | 4h, 0F0h, 0DAh, 002h ; VEX.L=1 should #UD according to the docs63 BS3_PROC_BEGIN_CMN bs3CpuInstr3_vxorps_XMM1_XMM1_XMM2_icebp, BS3_PBC_NEAR 64 vxorps xmm1, xmm1, xmm2 178 65 .again: 179 66 icebp 180 67 jmp .again 181 BS3_PROC_END_CMN bs3CpuInstr 2_rorx_EBX_EDX_2_icebp_L168 BS3_PROC_END_CMN bs3CpuInstr3_vxorps_XMM1_XMM1_XMM2_icebp 182 69 183 BS3_PROC_BEGIN_CMN bs3CpuInstr 2_rorx_EBX_EDX_2_icebp_V1, BS3_PBC_NEAR184 db 0C4h, 0E3h, 003h | ~(1 << 3), 0F0h, 0DAh, 002h ; VEX.VVVV=1 - behaviour is undocumented - 10980xe #UD70 BS3_PROC_BEGIN_CMN bs3CpuInstr3_vxorps_XMM1_XMM1_FSxBX_icebp, BS3_PBC_NEAR 71 vxorps xmm1, xmm1, [fs:xBX] 185 72 .again: 186 73 icebp 187 74 jmp .again 188 BS3_PROC_END_CMN bs3CpuInstr2_rorx_EBX_EDX_2_icebp_V1 189 190 BS3_PROC_BEGIN_CMN bs3CpuInstr2_rorx_EBX_EDX_2_icebp_V15, BS3_PBC_NEAR 191 db 0C4h, 0E3h, 003h | ~(15 << 3), 0F0h, 0DAh, 002h ; VEX.VVVV=15 - behaviour is not documented - 10980xe #UD 192 .again: 193 icebp 194 jmp .again 195 BS3_PROC_END_CMN bs3CpuInstr2_rorx_EBX_EDX_2_icebp_V15 196 197 %if TMPL_BITS == 64 198 BS3_PROC_BEGIN_CMN bs3CpuInstr2_rorx_EBX_EDX_2_icebp_X1, BS3_PBC_NEAR 199 db 0C4h, 0E3h & ~40h, 07Bh, 0F0h, 0DAh, 002h ; VEX.X=0 - behaviour is not documented - ignored by 10980xe 200 .again: 201 icebp 202 jmp .again 203 BS3_PROC_END_CMN bs3CpuInstr2_rorx_EBX_EDX_2_icebp_X1 204 %endif 205 206 ; A couple of memory variants 207 BS3_PROC_BEGIN_CMN bs3CpuInstr2_rorx_EBX_DSxDI_36_icebp, BS3_PBC_NEAR 208 rorx ebx, [xDI], 36 209 .again: 210 icebp 211 jmp .again 212 BS3_PROC_END_CMN bs3CpuInstr2_rorx_EBX_DSxDI_36_icebp 213 214 BS3_PROC_BEGIN_CMN bs3CpuInstr2_rorx_RBX_DSxDI_68_icebp, BS3_PBC_NEAR 215 %if TMPL_BITS == 64 216 rorx rbx, [xDI], 68 217 %elif TMPL_BITS == 32 218 db 0C4h,0E3h,07Bh,0F0h,01Fh,044h ; 16-bit ignores VEX.W=1 (10980xe) 219 %else 220 db 0C4h,0E3h,0FBh,0F0h,01Dh,044h ; 16-bit ignores VEX.W=1 (10980xe) 221 %endif 222 .again: 223 icebp 224 jmp .again 225 BS3_PROC_END_CMN bs3CpuInstr2_rorx_RBX_DSxDI_68_icebp 226 227 ; 228 ; ANDN (BMI1) 229 ; 230 BS3_PROC_BEGIN_CMN bs3CpuInstr2_andn_RAX_RCX_RBX_icebp, BS3_PBC_NEAR 231 %if TMPL_BITS == 64 232 andn rax, rcx, rbx 233 %else 234 db 0C4h,0E2h,0F0h,0F2h,0C3h ; 32-bit & 16-bit ignores VEX.W=1 (10980xe) 235 %endif 236 .again: 237 icebp 238 jmp .again 239 BS3_PROC_END_CMN bs3CpuInstr2_andn_RAX_RCX_RBX_icebp 240 241 BS3_PROC_BEGIN_CMN bs3CpuInstr2_andn_EAX_ECX_EBX_icebp, BS3_PBC_NEAR 242 andn eax, ecx, ebx 243 .again: 244 icebp 245 jmp .again 246 BS3_PROC_END_CMN bs3CpuInstr2_andn_EAX_ECX_EBX_icebp 247 248 249 BS3_PROC_BEGIN_CMN bs3CpuInstr2_andn_RAX_RCX_FSxBX_icebp, BS3_PBC_NEAR 250 %if TMPL_BITS == 64 251 andn rax, rcx, [fs:rbx] 252 %elif TMPL_BITS == 32 253 db 064h,0C4h,0E2h,0F0h,0F2h,003h ; andn rax, rcx, [fs:ebx] 254 %else 255 db 064h,0C4h,0E2h,0F0h,0F2h,007h ; andn rax, rcx, [fs:bx] 256 %endif 257 .again: 258 icebp 259 jmp .again 260 BS3_PROC_END_CMN bs3CpuInstr2_andn_RAX_RCX_FSxBX_icebp 261 262 BS3_PROC_BEGIN_CMN bs3CpuInstr2_andn_EAX_ECX_FSxBX_icebp, BS3_PBC_NEAR 263 andn eax, ecx, [fs:xBX] 264 .again: 265 icebp 266 jmp .again 267 BS3_PROC_END_CMN bs3CpuInstr2_andn_EAX_ECX_FSxBX_icebp 268 269 270 ; 271 ; BEXTR / SHLX / SARX / SHRX - BMI1 (opcode f7h) 272 ; BZHI - BMI2 (opcode f5h) 273 ; 274 ; @param %1 instruction 275 ; @param %2 opcode 276 ; @param %3 prefix 277 ; 278 %ifndef SHLX_SARX_SHRX_DEFINED 279 %define SHLX_SARX_SHRX_DEFINED 280 %macro SHLX_SARX_SHRX 3 281 282 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_RBX_RCX_icebp, BS3_PBC_NEAR 283 %if TMPL_BITS == 64 284 %1 rax, rbx, rcx ; SHLX=C4E2F1F7C3 285 %else 286 db 0C4h,0E2h,0F0h|%3,%2,0C3h ; 32-bit & 16-bit ignores VEX.W=1 (10980xe) 287 %endif 288 .again: 289 icebp 290 jmp .again 291 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_RBX_RCX_icebp 292 293 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_EBX_ECX_icebp, BS3_PBC_NEAR 294 %1 eax, ebx, ecx 295 .again: 296 icebp 297 jmp .again 298 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_EBX_ECX_icebp 299 300 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_FSxBX_RCX_icebp, BS3_PBC_NEAR 301 %if TMPL_BITS == 64 302 %1 rax, [fs:rbx], rcx ; SHLX=64C4E2F1F703 303 %elif TMPL_BITS == 32 304 db 064h,0C4h,0E2h,0F0h|%3,%2,003h 305 %else 306 db 064h,0C4h,0E2h,0F0h|%3,%2,007h 307 %endif 308 .again: 309 icebp 310 jmp .again 311 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_FSxBX_RCX_icebp 312 313 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_FSxBX_ECX_icebp, BS3_PBC_NEAR 314 %1 eax, [fs:xBX], ecx 315 .again: 316 icebp 317 jmp .again 318 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_FSxBX_ECX_icebp 319 320 %endmacro 321 %endif 322 323 SHLX_SARX_SHRX bextr, 0f7h, 0 ; none 324 SHLX_SARX_SHRX shlx, 0f7h, 1 ; 66h 325 SHLX_SARX_SHRX sarx, 0f7h, 2 ; f3h 326 SHLX_SARX_SHRX shrx, 0f7h, 3 ; f2h 327 SHLX_SARX_SHRX bzhi, 0f5h, 0 ; none 328 329 ; 330 ; PPEP / PEXT - BMI2 (opcode f5h) 331 ; 332 ; @param %1 instruction 333 ; @param %2 opcode 334 ; @param %3 prefix 335 ; 336 %ifndef PDEP_PEXT_DEFINED 337 %define PDEP_PEXT_DEFINED 338 %macro PDEP_PEXT_ 3 339 340 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_RCX_RBX_icebp, BS3_PBC_NEAR 341 %if TMPL_BITS == 64 342 %1 rax, rcx, rbx 343 %else 344 db 0C4h,0E2h,0F0h|%3,%2,0C3h ; 32-bit & 16-bit ignores VEX.W=1 (10980xe) 345 %endif 346 .again: 347 icebp 348 jmp .again 349 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_RCX_RBX_icebp 350 351 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_ECX_EBX_icebp, BS3_PBC_NEAR 352 %1 eax, ecx, ebx 353 .again: 354 icebp 355 jmp .again 356 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_ECX_EBX_icebp 357 358 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_RCX_FSxBX_icebp, BS3_PBC_NEAR 359 %if TMPL_BITS == 64 360 %1 rax, rcx, [fs:rbx] 361 %elif TMPL_BITS == 32 362 db 064h,0C4h,0E2h,0F0h|%3,%2,003h 363 %else 364 db 064h,0C4h,0E2h,0F0h|%3,%2,007h 365 %endif 366 .again: 367 icebp 368 jmp .again 369 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_RCX_FSxBX_icebp 370 371 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_ECX_FSxBX_icebp, BS3_PBC_NEAR 372 %1 eax, ecx, [fs:xBX] 373 .again: 374 icebp 375 jmp .again 376 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_ECX_FSxBX_icebp 377 378 %endmacro 379 %endif 380 381 PDEP_PEXT_ pext, 0f5h, 2 ; f3h 382 PDEP_PEXT_ pdep, 0f5h, 3 ; f2h 383 384 ; 385 ; BLSR / BLSMSK / BLSI 386 ; These are encoded in the exact same way, only the /r differs (%2). 387 ; 388 %ifndef BLSR_BLSMSK_BLSI_DEFINED 389 %define BLSR_BLSMSK_BLSI_DEFINED 390 %macro BLSR_BLSMSK_BLSI 2 391 392 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_RBX_icebp, BS3_PBC_NEAR 393 %if TMPL_BITS == 64 394 %1 rax, rbx ; BLSR=C4E2F8F3CB 395 %else 396 db 0C4h,0E2h,0F8h,0F3h,0C3h | (%2 << 3) ; 32-bit & 16-bit ignores VEX.W=1 (10980xe) 397 %endif 398 .again: 399 icebp 400 jmp .again 401 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_RBX_icebp 402 403 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_EBX_icebp, BS3_PBC_NEAR 404 %1 eax, ebx 405 .again: 406 icebp 407 jmp .again 408 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_EBX_icebp 409 410 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_FSxBX_icebp, BS3_PBC_NEAR 411 %if TMPL_BITS == 64 412 %1 rax, [fs:rbx] ; BSLR=64C4E2F8F30B 413 %elif TMPL_BITS == 32 414 db 064h,0C4h,0E2h,0F8h,0F3h,003h | (%2 << 3) 415 %else 416 db 064h,0C4h,0E2h,0F8h,0F3h,007h | (%2 << 3) 417 %endif 418 .again: 419 icebp 420 jmp .again 421 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _RAX_FSxBX_icebp 422 423 BS3_PROC_BEGIN_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_FSxBX_icebp, BS3_PBC_NEAR 424 %1 eax, [fs:xBX] 425 .again: 426 icebp 427 jmp .again 428 BS3_PROC_END_CMN bs3CpuInstr2_ %+ %1 %+ _EAX_FSxBX_icebp 429 430 %endmacro 431 %endif 432 433 BLSR_BLSMSK_BLSI blsr, 1 434 BLSR_BLSMSK_BLSI blsmsk, 2 435 BLSR_BLSMSK_BLSI blsi, 3 436 437 ; 438 ; MULX 439 ; 440 BS3_PROC_BEGIN_CMN bs3CpuInstr2_mulx_RAX_RCX_RBX_RDX_icebp, BS3_PBC_NEAR 441 %if TMPL_BITS == 64 442 mulx rax, rcx, rbx ; C4E2F3F6C3 443 %else 444 db 0C4h,0E2h,0F3h,0F6h,0C3h ; 32-bit & 16-bit ignores VEX.W=1 (10980xe) 445 %endif 446 .again: 447 icebp 448 jmp .again 449 BS3_PROC_END_CMN bs3CpuInstr2_mulx_RAX_RCX_RBX_RDX_icebp 450 451 BS3_PROC_BEGIN_CMN bs3CpuInstr2_mulx_RCX_RCX_RBX_RDX_icebp, BS3_PBC_NEAR 452 %if TMPL_BITS == 64 453 mulx rcx, rcx, rbx ; C4E2F3F6CB 454 %else 455 db 0C4h,0E2h,0F3h,0F6h,0CBh ; 32-bit & 16-bit ignores VEX.W=1 (10980xe) 456 %endif 457 .again: 458 icebp 459 jmp .again 460 BS3_PROC_END_CMN bs3CpuInstr2_mulx_RCX_RCX_RBX_RDX_icebp 461 462 BS3_PROC_BEGIN_CMN bs3CpuInstr2_mulx_RAX_RCX_FSxBX_RDX_icebp, BS3_PBC_NEAR 463 %if TMPL_BITS == 64 464 mulx rax, rcx, [fs:rbx] ; 64C4E2F3F603 465 %elif TMPL_BITS == 32 466 db 064h,0C4h,0E2h,0F3h,0F6h,003h ; 32-bit & 16-bit ignores VEX.W=1 (10980xe) 467 %else 468 db 064h,0C4h,0E2h,0F3h,0F6h,007h ; 32-bit & 16-bit ignores VEX.W=1 (10980xe) 469 %endif 470 .again: 471 icebp 472 jmp .again 473 BS3_PROC_END_CMN bs3CpuInstr2_mulx_RAX_RCX_FSxBX_RDX_icebp 474 475 BS3_PROC_BEGIN_CMN bs3CpuInstr2_mulx_EAX_ECX_EBX_EDX_icebp, BS3_PBC_NEAR 476 mulx eax, ecx, ebx 477 .again: 478 icebp 479 jmp .again 480 BS3_PROC_END_CMN bs3CpuInstr2_mulx_EAX_ECX_EBX_EDX_icebp 481 482 BS3_PROC_BEGIN_CMN bs3CpuInstr2_mulx_ECX_ECX_EBX_EDX_icebp, BS3_PBC_NEAR 483 mulx ecx, ecx, ebx 484 .again: 485 icebp 486 jmp .again 487 BS3_PROC_END_CMN bs3CpuInstr2_mulx_ECX_ECX_EBX_EDX_icebp 488 489 BS3_PROC_BEGIN_CMN bs3CpuInstr2_mulx_EAX_ECX_FSxBX_EDX_icebp, BS3_PBC_NEAR 490 mulx eax, ecx, [fs:xBX] 491 .again: 492 icebp 493 jmp .again 494 BS3_PROC_END_CMN bs3CpuInstr2_mulx_EAX_ECX_FSxBX_EDX_icebp 495 496 497 ; 498 ; POPCNT 499 ; 500 BS3_PROC_BEGIN_CMN bs3CpuInstr2_popcnt_AX_BX_icebp, BS3_PBC_NEAR 501 popcnt ax, bx 502 .again: 503 icebp 504 jmp .again 505 BS3_PROC_END_CMN bs3CpuInstr2_popcnt_AX_BX_icebp 506 507 BS3_PROC_BEGIN_CMN bs3CpuInstr2_popcnt_EAX_EBX_icebp, BS3_PBC_NEAR 508 popcnt eax, ebx 509 .again: 510 icebp 511 jmp .again 512 BS3_PROC_END_CMN bs3CpuInstr2_popcnt_EAX_EBX_icebp 513 514 %if TMPL_BITS == 64 515 BS3_PROC_BEGIN_CMN bs3CpuInstr2_popcnt_RAX_RBX_icebp, BS3_PBC_NEAR 516 popcnt rax, rbx 517 .again: 518 icebp 519 jmp .again 520 BS3_PROC_END_CMN bs3CpuInstr2_popcnt_RAX_RBX_icebp 521 %endif 522 523 524 BS3_PROC_BEGIN_CMN bs3CpuInstr2_popcnt_AX_FSxBX_icebp, BS3_PBC_NEAR 525 popcnt ax, [fs:xBX] 526 .again: 527 icebp 528 jmp .again 529 BS3_PROC_END_CMN bs3CpuInstr2_popcnt_AX_FSxBX_icebp 530 531 BS3_PROC_BEGIN_CMN bs3CpuInstr2_popcnt_EAX_FSxBX_icebp, BS3_PBC_NEAR 532 popcnt eax, [fs:xBX] 533 .again: 534 icebp 535 jmp .again 536 BS3_PROC_END_CMN bs3CpuInstr2_popcnt_EAX_FSxBX_icebp 537 538 %if TMPL_BITS == 64 539 BS3_PROC_BEGIN_CMN bs3CpuInstr2_popcnt_RAX_FSxBX_icebp, BS3_PBC_NEAR 540 popcnt rax, [fs:xBX] 541 .again: 542 icebp 543 jmp .again 544 BS3_PROC_END_CMN bs3CpuInstr2_popcnt_RAX_FSxBX_icebp 545 %endif 546 547 548 549 ; 550 ; CMPXCHG16B 551 ; 552 %if TMPL_BITS == 64 553 BS3_PROC_BEGIN_CMN bs3CpuInstr2_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR 554 cmpxchg16b [rdi] 555 .again: 556 ud2 557 jmp .again 558 AssertCompile(.again - BS3_LAST_LABEL == 4) 559 BS3_PROC_END_CMN bs3CpuInstr2_cmpxchg16b_rdi_ud2 560 561 562 BS3_PROC_BEGIN_CMN bs3CpuInstr2_lock_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR 563 lock cmpxchg16b [rdi] 564 .again: 565 ud2 566 jmp .again 567 AssertCompile(.again - BS3_LAST_LABEL == 5) 568 BS3_PROC_END_CMN bs3CpuInstr2_lock_cmpxchg16b_rdi_ud2 569 570 571 BS3_PROC_BEGIN_CMN bs3CpuInstr2_o16_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR 572 o16 cmpxchg16b [rdi] 573 .again: 574 ud2 575 jmp .again 576 AssertCompile(.again - BS3_LAST_LABEL == 5) 577 BS3_PROC_END_CMN bs3CpuInstr2_o16_cmpxchg16b_rdi_ud2 578 579 580 BS3_PROC_BEGIN_CMN bs3CpuInstr2_lock_o16_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR 581 db 0f0h, 066h 582 cmpxchg16b [rdi] 583 .again: 584 ud2 585 jmp .again 586 AssertCompile(.again - BS3_LAST_LABEL == 6) 587 BS3_PROC_END_CMN bs3CpuInstr2_lock_o16_cmpxchg16b_rdi_ud2 588 589 590 BS3_PROC_BEGIN_CMN bs3CpuInstr2_repz_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR 591 repz cmpxchg16b [rdi] 592 .again: 593 ud2 594 jmp .again 595 AssertCompile(.again - BS3_LAST_LABEL == 5) 596 BS3_PROC_END_CMN bs3CpuInstr2_repz_cmpxchg16b_rdi_ud2 597 598 599 BS3_PROC_BEGIN_CMN bs3CpuInstr2_lock_repz_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR 600 db 0f0h, 0f3h 601 cmpxchg16b [rdi] 602 .again: 603 ud2 604 jmp .again 605 AssertCompile(.again - BS3_LAST_LABEL == 6) 606 BS3_PROC_END_CMN bs3CpuInstr2_lock_repz_cmpxchg16b_rdi_ud2 607 608 BS3_PROC_BEGIN_CMN bs3CpuInstr2_repnz_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR 609 repnz cmpxchg16b [rdi] 610 .again: 611 ud2 612 jmp .again 613 AssertCompile(.again - BS3_LAST_LABEL == 5) 614 BS3_PROC_END_CMN bs3CpuInstr2_repnz_cmpxchg16b_rdi_ud2 615 616 617 BS3_PROC_BEGIN_CMN bs3CpuInstr2_lock_repnz_cmpxchg16b_rdi_ud2, BS3_PBC_NEAR 618 db 0f0h, 0f2h 619 cmpxchg16b [rdi] 620 .again: 621 ud2 622 jmp .again 623 AssertCompile(.again - BS3_LAST_LABEL == 6) 624 BS3_PROC_END_CMN bs3CpuInstr2_lock_repnz_cmpxchg16b_rdi_ud2 625 626 627 BS3_PROC_BEGIN_CMN bs3CpuInstr2_wrfsbase_rbx_ud2, BS3_PBC_NEAR 628 wrfsbase rbx 629 .again: 630 ud2 631 jmp .again 632 AssertCompile(.again - BS3_LAST_LABEL == 5) 633 BS3_PROC_END_CMN bs3CpuInstr2_wrfsbase_rbx_ud2 634 635 636 BS3_PROC_BEGIN_CMN bs3CpuInstr2_wrfsbase_ebx_ud2, BS3_PBC_NEAR 637 wrfsbase ebx 638 .again: 639 ud2 640 jmp .again 641 AssertCompile(.again - BS3_LAST_LABEL == 4) 642 BS3_PROC_END_CMN bs3CpuInstr2_wrfsbase_ebx_ud2 643 644 645 BS3_PROC_BEGIN_CMN bs3CpuInstr2_wrgsbase_rbx_ud2, BS3_PBC_NEAR 646 wrgsbase rbx 647 .again: 648 ud2 649 jmp .again 650 AssertCompile(.again - BS3_LAST_LABEL == 5) 651 BS3_PROC_END_CMN bs3CpuInstr2_wrgsbase_rbx_ud2 652 653 654 BS3_PROC_BEGIN_CMN bs3CpuInstr2_wrgsbase_ebx_ud2, BS3_PBC_NEAR 655 wrgsbase ebx 656 .again: 657 ud2 658 jmp .again 659 AssertCompile(.again - BS3_LAST_LABEL == 4) 660 BS3_PROC_END_CMN bs3CpuInstr2_wrgsbase_ebx_ud2 661 662 663 BS3_PROC_BEGIN_CMN bs3CpuInstr2_wrfsbase_rbx_rdfsbase_rcx_ud2, BS3_PBC_NEAR 664 wrfsbase rbx 665 mov ebx, 0 666 rdfsbase rcx 667 .again: 668 ud2 669 jmp .again 670 AssertCompile(.again - BS3_LAST_LABEL == 15) 671 BS3_PROC_END_CMN bs3CpuInstr2_wrfsbase_rbx_rdfsbase_rcx_ud2 672 673 674 BS3_PROC_BEGIN_CMN bs3CpuInstr2_wrfsbase_ebx_rdfsbase_ecx_ud2, BS3_PBC_NEAR 675 wrfsbase ebx 676 mov ebx, 0 677 rdfsbase ecx 678 .again: 679 ud2 680 jmp .again 681 AssertCompile(.again - BS3_LAST_LABEL == 13) 682 BS3_PROC_END_CMN bs3CpuInstr2_wrfsbase_ebx_rdfsbase_ecx_ud2 683 684 685 BS3_PROC_BEGIN_CMN bs3CpuInstr2_wrgsbase_rbx_rdgsbase_rcx_ud2, BS3_PBC_NEAR 686 wrgsbase rbx 687 mov ebx, 0 688 rdgsbase rcx 689 .again: 690 ud2 691 jmp .again 692 AssertCompile(.again - BS3_LAST_LABEL == 15) 693 BS3_PROC_END_CMN bs3CpuInstr2_wrgsbase_rbx_rdgsbase_rcx_ud2 694 695 696 BS3_PROC_BEGIN_CMN bs3CpuInstr2_wrgsbase_ebx_rdgsbase_ecx_ud2, BS3_PBC_NEAR 697 wrgsbase ebx 698 mov ebx, 0 699 rdgsbase ecx 700 .again: 701 ud2 702 jmp .again 703 AssertCompile(.again - BS3_LAST_LABEL == 13) 704 BS3_PROC_END_CMN bs3CpuInstr2_wrfgbase_ebx_rdgsbase_ecx_ud2 705 706 707 BS3_PROC_BEGIN_CMN bs3CpuInstr2_rdfsbase_rbx_ud2, BS3_PBC_NEAR 708 rdfsbase rbx 709 .again: 710 ud2 711 jmp .again 712 AssertCompile(.again - BS3_LAST_LABEL == 5) 713 BS3_PROC_END_CMN bs3CpuInstr2_rdfsbase_rbx_ud2 714 715 716 BS3_PROC_BEGIN_CMN bs3CpuInstr2_rdfsbase_ebx_ud2, BS3_PBC_NEAR 717 rdfsbase ebx 718 .again: 719 ud2 720 jmp .again 721 AssertCompile(.again - BS3_LAST_LABEL == 4) 722 BS3_PROC_END_CMN bs3CpuInstr2_rdfsbase_ebx_ud2 723 724 725 BS3_PROC_BEGIN_CMN bs3CpuInstr2_rdgsbase_rbx_ud2, BS3_PBC_NEAR 726 rdgsbase rbx 727 .again: 728 ud2 729 jmp .again 730 AssertCompile(.again - BS3_LAST_LABEL == 5) 731 BS3_PROC_END_CMN bs3CpuInstr2_rdgsbase_rbx_ud2 732 733 734 BS3_PROC_BEGIN_CMN bs3CpuInstr2_rdgsbase_ebx_ud2, BS3_PBC_NEAR 735 rdgsbase ebx 736 .again: 737 ud2 738 jmp .again 739 AssertCompile(.again - BS3_LAST_LABEL == 4) 740 BS3_PROC_END_CMN bs3CpuInstr2_rdgsbase_ebx_ud2 741 742 743 ;; @todo figure out this fudge. sigh. 744 times (348) db 0cch ; fudge to avoid 'rderr' during boot. 745 746 %endif ; TMPL_BITS == 64 75 BS3_PROC_END_CMN bs3CpuInstr3_vxorps_XMM1_XMM1_FSxBX_icebp 747 76 748 77 -
trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-3.c
r95361 r95373 1 1 /* $Id$ */ 2 2 /** @file 3 * BS3Kit - bs3-cpu-instr- 2, 16-bit C code.3 * BS3Kit - bs3-cpu-instr-3, SSE and AVX instructions, 16-bit C code. 4 4 */ 5 5 … … 35 35 * Internal Functions * 36 36 *********************************************************************************************************************************/ 37 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_mul); 38 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_imul); 39 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_div); 40 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_idiv); 41 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_bsf_tzcnt); 42 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_bsr_lzcnt); 43 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_andn); 44 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_bextr); 45 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_blsr); 46 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_blsmsk); 47 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_blsi); 48 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_bzhi); 49 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_pdep); 50 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_pext); 51 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_rorx); 52 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_shlx); 53 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_sarx); 54 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_shrx); 55 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_mulx); 56 BS3TESTMODE_PROTOTYPES_CMN(bs3CpuInstr2_popcnt); 57 BS3TESTMODE_PROTOTYPES_CMN_64(bs3CpuInstr2_cmpxchg16b); 58 BS3TESTMODE_PROTOTYPES_CMN_64(bs3CpuInstr2_wrfsbase); 59 BS3TESTMODE_PROTOTYPES_CMN_64(bs3CpuInstr2_wrgsbase); 60 BS3TESTMODE_PROTOTYPES_CMN_64(bs3CpuInstr2_rdfsbase); 61 BS3TESTMODE_PROTOTYPES_CMN_64(bs3CpuInstr2_rdgsbase); 37 BS3TESTMODEBYMAX_PROTOTYPES_CMN(bs3CpuInstr3_xorps); 62 38 63 39 … … 65 41 * Global Variables * 66 42 *********************************************************************************************************************************/ 67 static const BS3TESTMODE ENTRY g_aModeTests[] =43 static const BS3TESTMODEBYMAXENTRY g_aTests[] = 68 44 { 69 #if 1 70 BS3TESTMODEENTRY_CMN("mul", bs3CpuInstr2_mul), 71 BS3TESTMODEENTRY_CMN("imul", bs3CpuInstr2_imul), 72 BS3TESTMODEENTRY_CMN("div", bs3CpuInstr2_div), 73 BS3TESTMODEENTRY_CMN("idiv", bs3CpuInstr2_idiv), 74 #endif 75 #if 1 /* BSF/BSR (386+) & TZCNT/LZCNT (BMI1,ABM) */ 76 BS3TESTMODEENTRY_CMN("bsf/tzcnt", bs3CpuInstr2_bsf_tzcnt), 77 BS3TESTMODEENTRY_CMN("bsr/lzcnt", bs3CpuInstr2_bsr_lzcnt), 78 #endif 79 #if 1 /* BMI1 */ 80 BS3TESTMODEENTRY_CMN("andn", bs3CpuInstr2_andn), 81 BS3TESTMODEENTRY_CMN("bextr", bs3CpuInstr2_bextr), 82 BS3TESTMODEENTRY_CMN("blsr", bs3CpuInstr2_blsr), 83 BS3TESTMODEENTRY_CMN("blsmsk", bs3CpuInstr2_blsmsk), 84 BS3TESTMODEENTRY_CMN("blsi", bs3CpuInstr2_blsi), 85 #endif 86 #if 1 /* BMI2 */ 87 BS3TESTMODEENTRY_CMN("bzhi", bs3CpuInstr2_bzhi), 88 BS3TESTMODEENTRY_CMN("pdep", bs3CpuInstr2_pdep), 89 BS3TESTMODEENTRY_CMN("pext", bs3CpuInstr2_pext), 90 BS3TESTMODEENTRY_CMN("rorx", bs3CpuInstr2_rorx), 91 BS3TESTMODEENTRY_CMN("shlx", bs3CpuInstr2_shlx), 92 BS3TESTMODEENTRY_CMN("sarx", bs3CpuInstr2_sarx), 93 BS3TESTMODEENTRY_CMN("shrx", bs3CpuInstr2_shrx), 94 BS3TESTMODEENTRY_CMN("mulx", bs3CpuInstr2_mulx), 95 #endif 96 BS3TESTMODEENTRY_CMN("popcnt", bs3CpuInstr2_popcnt), /* Intel: POPCNT; AMD: ABM */ 97 #if 1 98 BS3TESTMODEENTRY_CMN_64("cmpxchg16b", bs3CpuInstr2_cmpxchg16b), 99 BS3TESTMODEENTRY_CMN_64("wrfsbase", bs3CpuInstr2_wrfsbase), 100 BS3TESTMODEENTRY_CMN_64("wrgsbase", bs3CpuInstr2_wrgsbase), 101 BS3TESTMODEENTRY_CMN_64("rdfsbase", bs3CpuInstr2_rdfsbase), 102 BS3TESTMODEENTRY_CMN_64("rdgsbase", bs3CpuInstr2_rdgsbase), 103 #endif 45 BS3TESTMODEBYMAXENTRY_CMN("xorps", bs3CpuInstr3_xorps), 104 46 }; 105 47 … … 108 50 { 109 51 Bs3InitAll_rm(); 110 Bs3TestInit("bs3-cpu-instr- 2");52 Bs3TestInit("bs3-cpu-instr-3"); 111 53 112 Bs3TestDoModes _rm(g_aModeTests, RT_ELEMENTS(g_aModeTests));54 Bs3TestDoModesByMax_rm(g_aTests, RT_ELEMENTS(g_aTests)); 113 55 114 56 Bs3TestTerm();
Note:
See TracChangeset
for help on using the changeset viewer.