VirtualBox

Ignore:
Timestamp:
Apr 19, 2024 8:31:27 AM (13 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
162862
Message:

VMM/IEM: Implement native emitter for pmovmskb which is used at least Linux guests (string/memory search), bugref:10652

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h

    r104348 r104373  
    23392339IEMNATIVE_NATIVE_EMIT_PCMP_U128(pcmpeqd, kArmv8VecInstrCmpOp_Eq, kArmv8VecInstrArithSz_32, 0x76);
    23402340
     2341
     2342/**
     2343 * Emitter for pmovmskb
     2344 */
     2345DECL_INLINE_THROW(uint32_t)
     2346iemNativeEmit_pmovmskb_rr_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     2347                               uint8_t const idxGstRegDst, uint8_t const idxSimdGstRegSrc)
     2348{
     2349#ifdef RT_ARCH_AMD64
     2350    uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegDst), kIemNativeGstRegUse_ForFullWrite);
     2351    uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegSrc),
     2352                                                                          kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
     2353    PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
     2354
     2355    pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     2356    if (idxRegDst >= 8 || idxSimdRegSrc >= 8)
     2357        pCodeBuf[off++] =   (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0)
     2358                          | (idxRegDst     >= 8 ? X86_OP_REX_R : 0);
     2359    pCodeBuf[off++] = 0x0f;
     2360    pCodeBuf[off++] = 0xd7;
     2361    pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegDst & 7, idxSimdRegSrc & 7);
     2362
     2363    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
     2364    iemNativeRegFreeTmp(pReNative, idxRegDst);
     2365#elif defined(RT_ARCH_ARM64)
     2366    uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegDst), kIemNativeGstRegUse_ForFullWrite);
     2367    uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
     2368    uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegSrc),
     2369                                                                          kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_Calculation);
     2370    PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
     2371
     2372    /*
     2373     * See https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
     2374     * for different approaches as NEON doesn't has an instruction equivalent for pmovmskb, so we have to emulate that.
     2375     *
     2376     * As there is no way around emulating the exact semantics of pmovmskb we will use the same algorithm as the sse2neon implementation because
     2377     * there we can get away with loading any constants and the base algorithm is only 4 NEON instructions (+ 3 for extracting the result to a general register).
     2378     *
     2379     * The following illustrates the algorithm:
     2380     *
     2381     *     Byte vector Element ->   15       14       13       12       11       10        9        8        7        6       5        4         3        2        1        0
     2382     *     Instruction
     2383     *          |
     2384     *          V
     2385     *                           Axxxxxxx Bxxxxxxx Cxxxxxxx Dxxxxxxx Exxxxxxx Fxxxxxxx Gxxxxxxx Hxxxxxxx Ixxxxxxx Jxxxxxxx Kxxxxxxx Lxxxxxxx Mxxxxxxx Nxxxxxxx Oxxxxxxx Pxxxxxxx
     2386     *     USHR v.16B, v.16B, #7 0000000A 0000000B 0000000C 0000000D 0000000E 0000000F 0000000G 0000000H 0000000I 0000000J 0000000K 0000000L 0000000M 0000000N 0000000O 0000000P
     2387     *     USRA v.8H,  v.8H,  #7 00000000 000000AB 00000000 000000CD 00000000 000000EF 00000000 000000GH 00000000 000000IJ 00000000 000000KL 00000000 000000MN 00000000 000000OP
     2388     *     USRA v.4S,  v.4S, #14 00000000 00000000 00000000 0000ABCD 00000000 00000000 00000000 0000EFGH 00000000 00000000 00000000 0000IJKL 00000000 00000000 00000000 0000MNOP
     2389     *     USRA v.2D,  v.2D, #28 00000000 00000000 00000000 00000000 00000000 00000000 00000000 ABCDEFGH 00000000 00000000 00000000 00000000 00000000 00000000 00000000 IJKLMNOP
     2390     *
     2391     * The extraction process
     2392     *     UMOV wTMP,  v.16B[8]             00000000 00000000 00000000 00000000 00000000 00000000 00000000 ABCDEFGH
     2393     *     UMOV wRES,  v.16B[0]             00000000 00000000 00000000 00000000 00000000 00000000 00000000 IJKLMNOP
     2394     *     ORR  xRES, xRES, xTMP, LSL #8    00000000 00000000 00000000 00000000 00000000 00000000 ABCDEFGH IJKLMNOP
     2395     */
     2396    pCodeBuf[off++] = Armv8A64MkVecInstrShrImm(idxSimdRegSrc, idxSimdRegSrc,  7, kArmv8InstrShiftSz_U8);
     2397    pCodeBuf[off++] = Armv8A64MkVecInstrShrImm(idxSimdRegSrc, idxSimdRegSrc,  7, kArmv8InstrShiftSz_U16, true /*fUnsigned*/, false /*fRound*/, true /*fAccum*/);
     2398    pCodeBuf[off++] = Armv8A64MkVecInstrShrImm(idxSimdRegSrc, idxSimdRegSrc, 14, kArmv8InstrShiftSz_U32, true /*fUnsigned*/, false /*fRound*/, true /*fAccum*/);
     2399    pCodeBuf[off++] = Armv8A64MkVecInstrShrImm(idxSimdRegSrc, idxSimdRegSrc, 28, kArmv8InstrShiftSz_U64, true /*fUnsigned*/, false /*fRound*/, true /*fAccum*/);
     2400    pCodeBuf[off++] = Armv8A64MkVecInstrUmov(idxRegTmp, idxSimdRegSrc, 8, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
     2401    pCodeBuf[off++] = Armv8A64MkVecInstrUmov(idxRegDst, idxSimdRegSrc, 0, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
     2402    pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegDst, idxRegDst, idxRegTmp, true /*f64Bit*/, 8 /*offShift6*/);
     2403
     2404    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
     2405    iemNativeRegFreeTmp(pReNative, idxRegTmp);
     2406    iemNativeRegFreeTmp(pReNative, idxRegDst);
     2407#else
     2408# error "Port me"
     2409#endif
     2410    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     2411    return off;
     2412}
     2413
    23412414#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
    23422415
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette