Changeset 104373 in vbox for trunk/src/VBox/VMM/VMMAll/target-x86
- Timestamp:
- Apr 19, 2024 8:31:27 AM (13 months ago)
- svn:sync-xref-src-repo-rev:
- 162862
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h
r104348 r104373 2339 2339 IEMNATIVE_NATIVE_EMIT_PCMP_U128(pcmpeqd, kArmv8VecInstrCmpOp_Eq, kArmv8VecInstrArithSz_32, 0x76); 2340 2340 2341 2342 /** 2343 * Emitter for pmovmskb 2344 */ 2345 DECL_INLINE_THROW(uint32_t) 2346 iemNativeEmit_pmovmskb_rr_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off, 2347 uint8_t const idxGstRegDst, uint8_t const idxSimdGstRegSrc) 2348 { 2349 #ifdef RT_ARCH_AMD64 2350 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegDst), kIemNativeGstRegUse_ForFullWrite); 2351 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegSrc), 2352 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly); 2353 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5); 2354 2355 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 2356 if (idxRegDst >= 8 || idxSimdRegSrc >= 8) 2357 pCodeBuf[off++] = (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0) 2358 | (idxRegDst >= 8 ? X86_OP_REX_R : 0); 2359 pCodeBuf[off++] = 0x0f; 2360 pCodeBuf[off++] = 0xd7; 2361 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegDst & 7, idxSimdRegSrc & 7); 2362 2363 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc); 2364 iemNativeRegFreeTmp(pReNative, idxRegDst); 2365 #elif defined(RT_ARCH_ARM64) 2366 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegDst), kIemNativeGstRegUse_ForFullWrite); 2367 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off); 2368 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegSrc), 2369 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_Calculation); 2370 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7); 2371 2372 /* 2373 * See https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon 2374 * for different approaches as NEON doesn't has an instruction equivalent for pmovmskb, so we have to emulate that. 2375 * 2376 * As there is no way around emulating the exact semantics of pmovmskb we will use the same algorithm as the sse2neon implementation because 2377 * there we can get away with loading any constants and the base algorithm is only 4 NEON instructions (+ 3 for extracting the result to a general register). 2378 * 2379 * The following illustrates the algorithm: 2380 * 2381 * Byte vector Element -> 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 2382 * Instruction 2383 * | 2384 * V 2385 * Axxxxxxx Bxxxxxxx Cxxxxxxx Dxxxxxxx Exxxxxxx Fxxxxxxx Gxxxxxxx Hxxxxxxx Ixxxxxxx Jxxxxxxx Kxxxxxxx Lxxxxxxx Mxxxxxxx Nxxxxxxx Oxxxxxxx Pxxxxxxx 2386 * USHR v.16B, v.16B, #7 0000000A 0000000B 0000000C 0000000D 0000000E 0000000F 0000000G 0000000H 0000000I 0000000J 0000000K 0000000L 0000000M 0000000N 0000000O 0000000P 2387 * USRA v.8H, v.8H, #7 00000000 000000AB 00000000 000000CD 00000000 000000EF 00000000 000000GH 00000000 000000IJ 00000000 000000KL 00000000 000000MN 00000000 000000OP 2388 * USRA v.4S, v.4S, #14 00000000 00000000 00000000 0000ABCD 00000000 00000000 00000000 0000EFGH 00000000 00000000 00000000 0000IJKL 00000000 00000000 00000000 0000MNOP 2389 * USRA v.2D, v.2D, #28 00000000 00000000 00000000 00000000 00000000 00000000 00000000 ABCDEFGH 00000000 00000000 00000000 00000000 00000000 00000000 00000000 IJKLMNOP 2390 * 2391 * The extraction process 2392 * UMOV wTMP, v.16B[8] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 ABCDEFGH 2393 * UMOV wRES, v.16B[0] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 IJKLMNOP 2394 * ORR xRES, xRES, xTMP, LSL #8 00000000 00000000 00000000 00000000 00000000 00000000 ABCDEFGH IJKLMNOP 2395 */ 2396 pCodeBuf[off++] = Armv8A64MkVecInstrShrImm(idxSimdRegSrc, idxSimdRegSrc, 7, kArmv8InstrShiftSz_U8); 2397 pCodeBuf[off++] = Armv8A64MkVecInstrShrImm(idxSimdRegSrc, idxSimdRegSrc, 7, kArmv8InstrShiftSz_U16, true /*fUnsigned*/, false /*fRound*/, true /*fAccum*/); 2398 pCodeBuf[off++] = Armv8A64MkVecInstrShrImm(idxSimdRegSrc, idxSimdRegSrc, 14, kArmv8InstrShiftSz_U32, true /*fUnsigned*/, false /*fRound*/, true /*fAccum*/); 2399 pCodeBuf[off++] = Armv8A64MkVecInstrShrImm(idxSimdRegSrc, idxSimdRegSrc, 28, kArmv8InstrShiftSz_U64, true /*fUnsigned*/, false /*fRound*/, true /*fAccum*/); 2400 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(idxRegTmp, idxSimdRegSrc, 8, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/); 2401 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(idxRegDst, idxSimdRegSrc, 0, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/); 2402 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegTmp, true /*f64Bit*/, 8 /*offShift6*/); 2403 2404 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc); 2405 iemNativeRegFreeTmp(pReNative, idxRegTmp); 2406 iemNativeRegFreeTmp(pReNative, idxRegDst); 2407 #else 2408 # error "Port me" 2409 #endif 2410 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 2411 return off; 2412 } 2413 2341 2414 #endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */ 2342 2415
Note:
See TracChangeset
for help on using the changeset viewer.