Changeset 103913 in vbox
- Timestamp:
- Mar 19, 2024 11:47:09 AM (10 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp
r103912 r103913 6116 6116 6117 6117 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 6118 # ifdef RT_ARCH_AMD64 6119 /** 6120 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset. 6121 */ 6122 DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu) 6123 { 6124 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */ 6125 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 6126 if (idxSimdReg >= 8) 6127 pbCodeBuf[off++] = X86_OP_REX_R; 6128 pbCodeBuf[off++] = 0x0f; 6129 pbCodeBuf[off++] = 0x38; 6130 pbCodeBuf[off++] = 0x29; 6131 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu); 6132 6133 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */ 6134 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 6135 pbCodeBuf[off++] = X86_OP_REX_W 6136 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R) 6137 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6138 pbCodeBuf[off++] = 0x0f; 6139 pbCodeBuf[off++] = 0x3a; 6140 pbCodeBuf[off++] = 0x16; 6141 pbCodeBuf[off++] = 0xeb; 6142 pbCodeBuf[off++] = 0x00; 6143 6144 /* cmp tmp0, 0xffffffffffffffff. */ 6145 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6146 pbCodeBuf[off++] = 0x83; 6147 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7); 6148 pbCodeBuf[off++] = 0xff; 6149 6150 /* je/jz +1 */ 6151 pbCodeBuf[off++] = 0x74; 6152 pbCodeBuf[off++] = 0x01; 6153 6154 /* int3 */ 6155 pbCodeBuf[off++] = 0xcc; 6156 6157 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */ 6158 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 6159 pbCodeBuf[off++] = X86_OP_REX_W 6160 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R) 6161 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6162 pbCodeBuf[off++] = 0x0f; 6163 pbCodeBuf[off++] = 0x3a; 6164 pbCodeBuf[off++] = 0x16; 6165 pbCodeBuf[off++] = 0xeb; 6166 pbCodeBuf[off++] = 0x01; 6167 6168 /* cmp tmp0, 0xffffffffffffffff. */ 6169 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6170 pbCodeBuf[off++] = 0x83; 6171 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7); 6172 pbCodeBuf[off++] = 0xff; 6173 6174 /* je/jz +1 */ 6175 pbCodeBuf[off++] = 0x74; 6176 pbCodeBuf[off++] = 0x01; 6177 6178 /* int3 */ 6179 pbCodeBuf[off++] = 0xcc; 6180 6181 return off; 6182 } 6183 # endif 6184 6185 6118 6186 /** 6119 6187 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same … … 6139 6207 6140 6208 # ifdef RT_ARCH_AMD64 6141 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */ 6142 6143 /* movdqa vectmp0, idxSimdReg */ 6144 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg); 6145 6146 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44); 6147 6148 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */ 6149 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 6150 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8) 6151 pbCodeBuf[off++] = X86_OP_REX_R; 6152 pbCodeBuf[off++] = 0x0f; 6153 pbCodeBuf[off++] = 0x38; 6154 pbCodeBuf[off++] = 0x29; 6155 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm); 6156 6157 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */ 6158 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 6159 pbCodeBuf[off++] = X86_OP_REX_W 6160 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R) 6161 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6162 pbCodeBuf[off++] = 0x0f; 6163 pbCodeBuf[off++] = 0x3a; 6164 pbCodeBuf[off++] = 0x16; 6165 pbCodeBuf[off++] = 0xeb; 6166 pbCodeBuf[off++] = 0x00; 6167 6168 /* cmp tmp0, 0xffffffffffffffff. */ 6169 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6170 pbCodeBuf[off++] = 0x83; 6171 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7); 6172 pbCodeBuf[off++] = 0xff; 6173 6174 /* je/jz +1 */ 6175 pbCodeBuf[off++] = 0x74; 6176 pbCodeBuf[off++] = 0x01; 6177 6178 /* int3 */ 6179 pbCodeBuf[off++] = 0xcc; 6180 6181 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */ 6182 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 6183 pbCodeBuf[off++] = X86_OP_REX_W 6184 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R) 6185 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6186 pbCodeBuf[off++] = 0x0f; 6187 pbCodeBuf[off++] = 0x3a; 6188 pbCodeBuf[off++] = 0x16; 6189 pbCodeBuf[off++] = 0xeb; 6190 pbCodeBuf[off++] = 0x01; 6191 6192 /* cmp tmp0, 0xffffffffffffffff. */ 6193 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B); 6194 pbCodeBuf[off++] = 0x83; 6195 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7); 6196 pbCodeBuf[off++] = 0xff; 6197 6198 /* je/jz +1 */ 6199 pbCodeBuf[off++] = 0x74; 6200 pbCodeBuf[off++] = 0x01; 6201 6202 /* int3 */ 6203 pbCodeBuf[off++] = 0xcc; 6204 6209 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256) 6210 { 6211 /* movdqa vectmp0, idxSimdReg */ 6212 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg); 6213 6214 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44); 6215 6216 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, 6217 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm); 6218 } 6219 6220 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256) 6221 { 6222 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */ 6223 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50); 6224 6225 /* vextracti128 vectmp0, idxSimdReg, 1 */ 6226 pbCodeBuf[off++] = X86_OP_VEX3; 6227 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0) 6228 | X86_OP_VEX3_BYTE1_X 6229 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0) 6230 | 0x03; /* Opcode map */ 6231 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H); 6232 pbCodeBuf[off++] = 0x39; 6233 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7); 6234 pbCodeBuf[off++] = 0x01; 6235 6236 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, 6237 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm); 6238 } 6205 6239 # elif defined(RT_ARCH_ARM64) 6206 6240 /* mov vectmp0, [gstreg] */
Note:
See TracChangeset
for help on using the changeset viewer.