VirtualBox

Changeset 101088 in vbox for trunk/src/VBox/VMM/include


Ignore:
Timestamp:
Sep 12, 2023 10:22:20 AM (18 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
159057
Message:

VMM/IEM,VMM/TM: Basic TB managment and allocation rewrite. bugref:10369

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/include/IEMInternal.h

    r100966 r101088  
    712712 *       For the same reasons, we skip all of IEM_F_X86_CTX_MASK, with the
    713713 *       exception of SMM (which we don't implement). */
    714 #define IEMTB_F_KEY_MASK                ((UINT32_C(0xffffffff) & ~(IEM_F_X86_CTX_MASK | IEM_F_X86_CPL_MASK)) | IEM_F_X86_CTX_SMM)
     714#define IEMTB_F_KEY_MASK                (  (UINT32_MAX & ~(IEM_F_X86_CTX_MASK | IEM_F_X86_CPL_MASK | IEMTB_F_TYPE_MASK)) \
     715                                         | IEM_F_X86_CTX_SMM)
    715716/** @} */
    716717
     
    781782/**
    782783 * Translation block.
     784 *
     785 * The current plan is to just keep TBs and associated lookup hash table private
     786 * to each VCpu as that simplifies TB removal greatly (no races) and generally
     787 * avoids using expensive atomic primitives for updating lists and stuff.
    783788 */
    784789#pragma pack(2) /* to prevent the Thrd structure from being padded unnecessarily */
     
    786791{
    787792    /** Next block with the same hash table entry. */
    788     struct IEMTB * volatile pNext;
    789     /** List on the local VCPU for blocks. */
    790     RTLISTNODE          LocalList;
     793    struct IEMTB       *pNext;
     794    /** Usage counter. */
     795    uint32_t            cUsed;
     796    /** The IEMCPU::msRecompilerPollNow last time it was used. */
     797    uint32_t            msLastUsed;
     798    /** The allocation chunk this TB belongs to. */
     799    uint8_t             idxAllocChunk;
     800
     801    uint8_t             abUnused[3];
     802    uint32_t            uUnused;
     803
    791804
    792805    /** @name What uniquely identifies the block.
     
    822835            uint16_t            cAllocated;
    823836        } Thrd;
     837        struct
     838        {
     839            uint8_t            *pbCode;
     840            /** Amount of code that pbCode points to. */
     841            uint32_t            cbAllocated;
     842        } Native;
     843        /** Generic view for zeroing when freeing. */
     844        struct
     845        {
     846            uintptr_t           uPtr;
     847            uint32_t            uData;
     848        } Gen;
    824849    };
    825850
     
    872897AssertCompileMemberOffset(IEMTB, cbOpcodes, 52);
    873898AssertCompileMemberSize(IEMTB, aRanges[0], 6);
     899#if 1
    874900AssertCompileSize(IEMTB, 128);
     901# define IEMTB_SIZE_IS_POWER_OF_TWO /**< The IEMTB size is a power of two. */
     902#else
     903AssertCompileSize(IEMTB, 168);
     904# undef  IEMTB_SIZE_IS_POWER_OF_TWO
     905#endif
     906
    875907/** Pointer to a translation block. */
    876908typedef IEMTB *PIEMTB;
    877909/** Pointer to a const translation block. */
    878910typedef IEMTB const *PCIEMTB;
     911
     912/**
     913 * A chunk of memory in the TB allocator.
     914 */
     915typedef struct IEMTBCHUNK
     916{
     917    /** Pointer to the translation blocks in this chunk. */
     918    PIEMTB          paTbs;
     919#ifdef IN_RING0
     920    /** Allocation handle. */
     921    RTR0MEMOBJ      hMemObj;
     922#endif
     923} IEMTBCHUNK;
     924
     925/**
     926 * A per-CPU translation block allocator.
     927 *
     928 * Because of how the IEMTBCACHE uses the lower 6 bits of the TB address to keep
     929 * the length of the collision list, and of course also for cache line alignment
     930 * reasons, the TBs must be allocated with at least 64-byte alignment.
     931 * Memory is there therefore allocated using one of the page aligned allocators.
     932 *
     933 *
     934 * To avoid wasting too much memory, it is allocated piecemeal as needed,
     935 * in chunks (IEMTBCHUNK) of 2 MiB or more.  The TB has an 8-bit chunk index
     936 * that enables us to quickly calculate the allocation bitmap position when
     937 * freeing the translation block.
     938 */
     939typedef struct IEMTBALLOCATOR
     940{
     941    /** Magic value (IEMTBALLOCATOR_MAGIC). */
     942    uint32_t        uMagic;
     943
     944#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
     945    /** Mask corresponding to cTbsPerChunk - 1. */
     946    uint32_t        fChunkMask;
     947    /** Shift count corresponding to cTbsPerChunk. */
     948    uint8_t         cChunkShift;
     949#else
     950    uint32_t        uUnused;
     951    uint8_t         bUnused;
     952#endif
     953    /** Number of chunks we're allowed to allocate. */
     954    uint8_t         cMaxChunks;
     955    /** Number of chunks currently populated. */
     956    uint16_t        cAllocatedChunks;
     957    /** Number of translation blocks per chunk. */
     958    uint32_t        cTbsPerChunk;
     959    /** Chunk size. */
     960    uint32_t        cbPerChunk;
     961
     962    /** The maximum number of TBs. */
     963    uint32_t        cMaxTbs;
     964    /** Total number of TBs in the populated chunks.
     965     * (cAllocatedChunks * cTbsPerChunk) */
     966    uint32_t        cTotalTbs;
     967    /** The current number of TBs in use.
     968     * The number of free TBs: cAllocatedTbs - cInUseTbs; */
     969    uint32_t        cInUseTbs;
     970    /** Statistics: Number of the cInUseTbs that are native ones. */
     971    uint32_t        cNativeTbs;
     972    /** Statistics: Number of the cInUseTbs that are threaded ones. */
     973    uint32_t        cThreadedTbs;
     974
     975    /** Where to start pruning TBs from when we're out.
     976     *  See iemTbAllocatorAllocSlow for details. */
     977    uint32_t        iPruneFrom;
     978    /** Hint about which bit to start scanning the bitmap from. */
     979    uint32_t        iStartHint;
     980
     981    /** Statistics: Number of TB allocation calls. */
     982    STAMCOUNTER     StatAllocs;
     983    /** Statistics: Number of TB free calls. */
     984    STAMCOUNTER     StatFrees;
     985    /** Statistics: Time spend pruning. */
     986    STAMPROFILE     StatPrune;
     987
     988    /** Allocation chunks. */
     989    IEMTBCHUNK      aChunks[256];
     990
     991    /** Allocation bitmap for all possible chunk chunks. */
     992    RT_FLEXIBLE_ARRAY_EXTENSION
     993    uint64_t        bmAllocated[RT_FLEXIBLE_ARRAY];
     994} IEMTBALLOCATOR;
     995/** Pointer to a TB allocator. */
     996typedef struct IEMTBALLOCATOR *PIEMTBALLOCATOR;
     997
     998/** Magic value for the TB allocator (Emmet Harley Cohen). */
     999#define IEMTBALLOCATOR_MAGIC        UINT32_C(0x19900525)
     1000
     1001
     1002/**
     1003 * A per-CPU translation block cache (hash table).
     1004 *
     1005 * The hash table is allocated once during IEM initialization and size double
     1006 * the max TB count, rounded up to the nearest power of two (so we can use and
     1007 * AND mask rather than a rest division when hashing).
     1008 */
     1009typedef struct IEMTBCACHE
     1010{
     1011    /** Magic value (IEMTBCACHE_MAGIC). */
     1012    uint32_t        uMagic;
     1013    /** Size of the hash table.  This is a power of two. */
     1014    uint32_t        cHash;
     1015    /** The mask corresponding to cHash. */
     1016    uint32_t        uHashMask;
     1017    uint32_t        uPadding;
     1018
     1019    /** @name Statistics
     1020     * @{ */
     1021    /** Number of collisions ever. */
     1022    STAMCOUNTER     cCollisions;
     1023
     1024    /** Statistics: Number of TB lookup misses. */
     1025    STAMCOUNTER     cLookupMisses;
     1026    /** Statistics: Number of TB lookup hits (debug only). */
     1027    STAMCOUNTER     cLookupHits;
     1028    STAMCOUNTER     auPadding2[3];
     1029    /** Statistics: Collision list length pruning. */
     1030    STAMPROFILE     StatPrune;
     1031    /** @} */
     1032
     1033    /** The hash table itself.
     1034     * @note The lower 6 bits of the pointer is used for keeping the collision
     1035     *       list length, so we can take action when it grows too long.
     1036     *       This works because TBs are allocated using a 64 byte (or
     1037     *       higher) alignment from page aligned chunks of memory, so the lower
     1038     *       6 bits of the address will always be zero.
     1039     *       See IEMTBCACHE_PTR_COUNT_MASK, IEMTBCACHE_PTR_MAKE and friends.
     1040     */
     1041    RT_FLEXIBLE_ARRAY_EXTENSION
     1042    PIEMTB          apHash[RT_FLEXIBLE_ARRAY];
     1043} IEMTBCACHE;
     1044/** Pointer to a per-CPU translation block cahce. */
     1045typedef IEMTBCACHE *PIEMTBCACHE;
     1046
     1047/** Magic value for IEMTBCACHE (Johnny O'Neal). */
     1048#define IEMTBCACHE_MAGIC            UINT32_C(0x19561010)
     1049
     1050/** The collision count mask for IEMTBCACHE::apHash entries. */
     1051#define IEMTBCACHE_PTR_COUNT_MASK               ((uintptr_t)0x3f)
     1052/** The max collision count for IEMTBCACHE::apHash entries before pruning. */
     1053#define IEMTBCACHE_PTR_MAX_COUNT                ((uintptr_t)0x30)
     1054/** Combine a TB pointer and a collision list length into a value for an
     1055 *  IEMTBCACHE::apHash entry. */
     1056#define IEMTBCACHE_PTR_MAKE(a_pTb, a_cCount)    (PIEMTB)((uintptr_t)(a_pTb) | (a_cCount))
     1057/** Combine a TB pointer and a collision list length into a value for an
     1058 *  IEMTBCACHE::apHash entry. */
     1059#define IEMTBCACHE_PTR_GET_TB(a_pHashEntry)     (PIEMTB)((uintptr_t)(a_pHashEntry) & ~IEMTBCACHE_PTR_COUNT_MASK)
     1060/** Combine a TB pointer and a collision list length into a value for an
     1061 *  IEMTBCACHE::apHash entry. */
     1062#define IEMTBCACHE_PTR_GET_COUNT(a_pHashEntry)  ((uintptr_t)(a_pHashEntry) & IEMTBCACHE_PTR_COUNT_MASK)
     1063
     1064/**
     1065 * Calculates the hash table slot for a TB from physical PC address and TB flags.
     1066 */
     1067#define IEMTBCACHE_HASH(a_paCache, a_fTbFlags, a_GCPhysPc) \
     1068    IEMTBCACHE_HASH_NO_KEY_MASK(a_paCache, (a_fTbFlags) & IEMTB_F_KEY_MASK, a_GCPhysPc)
     1069
     1070/**
     1071 * Calculates the hash table slot for a TB from physical PC address and TB
     1072 * flags, ASSUMING the caller has applied IEMTB_F_KEY_MASK to @a a_fTbFlags.
     1073 */
     1074#define IEMTBCACHE_HASH_NO_KEY_MASK(a_paCache, a_fTbFlags, a_GCPhysPc) \
     1075    (((uint32_t)(a_GCPhysPc) ^ (a_fTbFlags)) & (a_paCache)->uHashMask)
     1076
    8791077
    8801078/** @name IEMBRANCHED_F_XXX - Branched indicator (IEMCPU::fTbBranched).
     
    11851383     * components as needed. */
    11861384    R3PTRTYPE(PIEMTB)       pNativeCompileTbR3;
     1385    /** Pointer to the ring-3 TB cache for this EMT. */
     1386    R3PTRTYPE(PIEMTBCACHE)  pTbCacheR3;
    11871387    /** The PC (RIP) at the start of pCurTbR3/pCurTbR0.
    11881388     * The TBs are based on physical addresses, so this is needed to correleated
    11891389     * RIP to opcode bytes stored in the TB (AMD-V / VT-x). */
    11901390    uint64_t                uCurTbStartPc;
    1191     /** Statistics: Number of TB lookup misses. */
    1192     uint64_t                cTbLookupMisses;
    1193     /** Statistics: Number of TB lookup hits (debug only). */
    1194     uint64_t                cTbLookupHits;
    11951391    /** Number of TBs executed. */
    11961392    uint64_t                cTbExec;
     
    12151411    bool                    fTbCurInstrIsSti;
    12161412    /** Spaced reserved for recompiler data / alignment. */
    1217     bool                    afRecompilerStuff1[2];
     1413    bool                    afRecompilerStuff1[2+4];
     1414    /** The virtual sync time at the last timer poll call. */
     1415    uint32_t                msRecompilerPollNow;
    12181416    /** Previous GCPhysInstrBuf value - only valid if fTbCrossedPage is set.   */
    12191417    RTGCPHYS                GCPhysInstrBufPrev;
     
    12251423    /** Copy of IEMCPU::uInstrBufPc after decoding a branch instruction.  */
    12261424    uint64_t                GCVirtTbBranchSrcBuf;
     1425    /** Pointer to the ring-3 TB allocator for this EMT. */
     1426    R3PTRTYPE(PIEMTBALLOCATOR) pTbAllocatorR3;
    12271427    /* Alignment. */
    1228     uint64_t                auAlignment10[6];
    1229     /** Statistics: Number of TB allocation calls. */
    1230     uint64_t                cTbAllocs;
    1231     /** Statistics: Number of TB free calls. */
    1232     uint64_t                cTbFrees;
     1428    uint64_t                auAlignment10[7];
    12331429    /** Statistics: Times TB execution was broken off before reaching the end. */
    12341430    STAMCOUNTER             StatTbExecBreaks;
     
    51055301extern const PFNIEMOP g_apfnIemThreadedRecompilerVecMap3[1024];
    51065302
     5303DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs);
    51075304void            iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb);
    51085305
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette