VirtualBox

Changeset 103990 in vbox for trunk/src/VBox/VMM


Ignore:
Timestamp:
Mar 21, 2024 2:23:23 PM (11 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
162375
Message:

VMM/IEM: Fix the SIMD guest register value checking, cnt operates on byte elements so the generated code would only actually check the low 64-bit instead of the whole 128-bit for matching values, use uaddlv instead which sums up all byte values and stores the result in the low 16-bit of the vector register. The result should be zero for matching values, bugref:10614

Location:
trunk/src/VBox/VMM/VMMAll
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h

    r103986 r103990  
    71997199iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
    72007200{
    7201     /* Allocate destination and source register. */
     7201    /*
     7202     * Allocate destination and source register.
     7203     *
     7204     * @note The order is important here when iXRegSrc == iXRegDst, because if iXRegDst gets allocated first for the full write
     7205     *       it won't load the actual value from CPUMCTX. When allocating iXRegSrc afterwards it will get duplicated from the already
     7206     *       allocated host register for iXRegDst containing garbage. This will be catched by the guest register value checking.
     7207     */
     7208    uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
     7209                                                                          kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
    72027210    uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
    72037211                                                                          kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
    7204     uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
    7205                                                                           kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
    72067212
    72077213    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
     
    75147520iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
    75157521{
    7516     /* Allocate destination and source register. */
     7522    /*
     7523     * Allocate destination and source register.
     7524     *
     7525     * @note The order is important here when iYRegSrc == iYRegDst, because if iYRegDst gets allocated first for the full write
     7526     *       it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
     7527     *       allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking.
     7528     */
     7529    uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
     7530                                                                          kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
    75177531    uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
    75187532                                                                          kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
    7519     uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
    7520                                                                           kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
    75217533
    75227534    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
     
    75407552iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
    75417553{
    7542     /* Allocate destination and source register. */
     7554    /*
     7555     * Allocate destination and source register.
     7556     *
     7557     * @note The order is important here when iYRegSrc == iYRegDst, because if iYRegDst gets allocated first for the full write
     7558     *       it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
     7559     *       allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking.
     7560     */
     7561    uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
     7562                                                                          kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
    75437563    uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
    75447564                                                                          kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
    7545     uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
    7546                                                                           kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
    75477565
    75487566    off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r103964 r103990  
    55945594        Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
    55955595        Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
     5596
     5597        pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
    55965598        Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
    55975599    }
     
    56475649        Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
    56485650        Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
     5651
     5652        pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
    56495653        Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
    56505654    }
     
    64366440    if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
    64376441    {
    6438         uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
     6442        uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    64396443        /* eor vectmp0, vectmp0, idxSimdReg */
    64406444        pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
    6441         /* cnt vectmp0, vectmp0, #0*/
    6442         pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
    6443         /* umov tmp0, vectmp0.D[0] */
     6445        /* uaddlv vectmp0, vectmp0.16B */
     6446        pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
     6447        /* umov tmp0, vectmp0.H[0] */
    64446448        pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
    6445                                                     0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
     6449                                                    0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
    64466450        /* cbz tmp0, +1 */
    64476451        pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
     
    64526456    if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
    64536457    {
    6454         uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
     6458        uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    64556459        /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
    64566460        pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
    6457         /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
    6458         pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
    6459         /* umov tmp0, (vectmp0 + 1).D[0] */
     6461        /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
     6462        pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
     6463        /* umov tmp0, (vectmp0 + 1).H[0] */
    64606464        pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
    6461                                                     0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
     6465                                                    0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
    64626466        /* cbz tmp0, +1 */
    64636467        pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette