VirtualBox

Changeset 87315 in vbox for trunk/src


Ignore:
Timestamp:
Jan 20, 2021 9:34:35 AM (4 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
142288
Message:

AMD IOMMU: bugref:9654 IOTLB cache bits.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Devices/Bus/DevIommuAmd.cpp

    r87293 r87315  
    2727
    2828#include <iprt/x86.h>
    29 #include <iprt/alloc.h>
    3029#include <iprt/string.h>
    3130
     
    4140/** The current saved state version. */
    4241#define IOMMU_SAVED_STATE_VERSION                   1
    43 /** The IOTLB entry magic. */
    44 #define IOMMU_IOTLBE_MAGIC                          0x10acce55
    45 
     42/** The IOMMU device instance magic. */
     43#define IOMMU_MAGIC                                 0x10acce55
     44/** The maximum number of IOTLB entries in our cache implementation. */
     45#define IOMMU_IOTLBE_MAX                            64
     46/** Enable the IOTLBE cache. */
     47#define IOMMU_WITH_IOTLBE_CACHE
     48#ifdef IOMMU_WITH_IOTLBE_CACHE
     49/** The mask of bits for the domain ID of the IOTLBE key. */
     50# define IOMMU_IOTLB_DOMAIN_ID_MASK                 UINT64_C(0xffffff0000000000)
     51/** The number of bits to shift for the domain ID of the IOTLBE key. */
     52# define IOMMU_IOTLB_DOMAIN_ID_SHIFT                40
     53#endif
    4654
    4755/*********************************************************************************************************************************
     
    116124 * I/O page walk result.
    117125 */
    118 typedef struct
     126typedef struct IOWALKRESULT
    119127{
    120128    /** The translated system physical address. */
     
    133141
    134142/**
     143 * IOMMU I/O Device ID mapping.
     144 */
     145#pragma pack(1)
     146typedef struct IODOMAIN
     147{
     148    /** The domain ID assigned by software. */
     149    uint16_t        uDomainId;
     150    /** Whether the domain ID is valid (since all bits of domain ID are usable). */
     151    bool            fValid;
     152    bool            afAlignment[1];
     153} IODOMAIN;
     154#pragma pack()
     155/** Pointer to an I/O domain struct. */
     156typedef IODOMAIN *PIODOMAIN;
     157/** Pointer to a const I/O domain struct. */
     158typedef IODOMAIN *PCIODOMAIN;
     159AssertCompileSize(IODOMAIN, 4);
     160
     161/**
    135162 * IOMMU I/O TLB Entry.
    136163 * Keep this as small and aligned as possible.
    137164 */
    138 typedef struct
    139 {
    140     /** The translated system physical address (SPA) of the page. */
    141     RTGCPHYS        GCPhysSpa;
    142     /** The index of the 4K page within a large page. */
    143     uint32_t        idxSubPage;
    144     /** The I/O access permissions (IOMMU_IO_PERM_XXX). */
    145     uint8_t         fIoPerm;
    146     /** The number of offset bits in the translation indicating page size. */
    147     uint8_t         cShift;
    148     /** Alignment padding. */
    149     uint8_t         afPadding[2];
    150 } IOTLBE_T;
    151 AssertCompileSize(IOTLBE_T, 16);
     165typedef struct IOTLBE
     166{
     167    /** The AVL tree core. */
     168    AVLU64NODECORE  Core;
     169    /** List node for the LRU (Least Recently Used) list used for eviction. */
     170    RTLISTNODE      NdLru;
     171    /** The I/O walk result of the translation. */
     172    IOWALKRESULT    WalkResult;
     173} IOTLBE;
     174AssertCompileSizeAlignment(IOTLBE, 8);
    152175/** Pointer to an IOMMU I/O TLB entry struct. */
    153 typedef IOTLBE_T *PIOTLBE_T;
     176typedef IOTLBE *PIOTLBE;
    154177/** Pointer to a const IOMMU I/O TLB entry struct. */
    155 typedef IOTLBE_T const *PCIOTLBE_T;
     178typedef IOTLBE const *PCIOTLBE;
    156179
    157180/**
     
    162185    /** IOMMU device index (0 is at the top of the PCI tree hierarchy). */
    163186    uint32_t                    idxIommu;
    164     /** Alignment padding. */
    165     uint32_t                    uPadding0;
     187    /** IOMMU magic. */
     188    uint32_t                    u32Magic;
    166189
    167190    /** Whether the command thread is sleeping. */
     
    178201    /** The MMIO handle. */
    179202    IOMMMIOHANDLE               hMmio;
     203
     204#ifdef IOMMU_WITH_IOTLBE_CACHE
     205    /** L1 Cache - Maps [DeviceId] to [DomainId]. */
     206    PIODOMAIN                   paDomainIds;
     207    /** Pointer to array of allocated IOTLBEs. */
     208    PIOTLBE                     paIotlbes;
     209    /** L2 Cache - Maps [DomainId,Iova] to [IOTLBE]. */
     210    AVLU64TREE                  TreeIotlbe;
     211    /** LRU list anchor for IOTLB entries. */
     212    RTLISTANCHOR                LstLruIotlbe;
     213    /** Number of cached IOTLBEs. */
     214    uint32_t                    cCachedIotlbes;
     215    /** Padding. */
     216    uint32_t                    uPadding1;
     217#endif
    180218
    181219    /** @name PCI: Base capability block registers.
     
    300338    /** @name IOMMU: Stat counters.
    301339     * @{ */
    302     STAMCOUNTER             StatMmioReadR3;             /**< Number of MMIO reads in R3. */
    303     STAMCOUNTER             StatMmioReadRZ;             /**< Number of MMIO reads in RZ. */
    304     STAMCOUNTER             StatMmioWriteR3;            /**< Number of MMIO writes in R3. */
    305     STAMCOUNTER             StatMmioWriteRZ;            /**< Number of MMIO writes in RZ. */
    306 
    307     STAMCOUNTER             StatMsiRemapR3;             /**< Number of MSI remap requests in R3. */
    308     STAMCOUNTER             StatMsiRemapRZ;             /**< Number of MSI remap requests in RZ. */
    309 
    310     STAMCOUNTER             StatMemReadR3;              /**< Number of memory read translation requests in R3. */
    311     STAMCOUNTER             StatMemReadRZ;              /**< Number of memory read translation requests in RZ. */
    312     STAMCOUNTER             StatMemWriteR3;             /**< Number of memory write translation requests in R3. */
    313     STAMCOUNTER             StatMemWriteRZ;             /**< Number of memory write translation requests in RZ. */
    314 
    315     STAMCOUNTER             StatMemBulkReadR3;          /**< Number of memory read bulk translation requests in R3. */
    316     STAMCOUNTER             StatMemBulkReadRZ;          /**< Number of memory read bulk translation requests in RZ. */
    317     STAMCOUNTER             StatMemBulkWriteR3;         /**< Number of memory write bulk translation requests in R3. */
    318     STAMCOUNTER             StatMemBulkWriteRZ;         /**< Number of memory write bulk translation requests in RZ. */
    319 
    320     STAMCOUNTER             StatCmd;                    /**< Number of commands processed in total. */
    321     STAMCOUNTER             StatCmdCompWait;            /**< Number of Completion Wait commands processed. */
    322     STAMCOUNTER             StatCmdInvDte;              /**< Number of Invalidate DTE commands processed. */
    323     STAMCOUNTER             StatCmdInvIommuPages;       /**< Number of Invalidate IOMMU pages commands processed. */
    324     STAMCOUNTER             StatCmdInvIotlbPages;       /**< Number of Invalidate IOTLB pages commands processed. */
    325     STAMCOUNTER             StatCmdInvIntrTable;        /**< Number of Invalidate Interrupt Table commands processed. */
    326     STAMCOUNTER             StatCmdPrefIommuPages;      /**< Number of Prefetch IOMMU Pages commands processed. */
    327     STAMCOUNTER             StatCmdCompletePprReq;      /**< Number of Complete PPR Requests commands processed. */
    328     STAMCOUNTER             StatCmdInvIommuAll;         /**< Number of Invalidate IOMMU All commands processed. */
    329 
    330     STAMPROFILEADV          StatDteLookup;              /**< Profiling of device table entry lookup (uncached). */
     340    STAMCOUNTER                 StatMmioReadR3;             /**< Number of MMIO reads in R3. */
     341    STAMCOUNTER                 StatMmioReadRZ;             /**< Number of MMIO reads in RZ. */
     342    STAMCOUNTER                 StatMmioWriteR3;            /**< Number of MMIO writes in R3. */
     343    STAMCOUNTER                 StatMmioWriteRZ;            /**< Number of MMIO writes in RZ. */
     344
     345    STAMCOUNTER                 StatMsiRemapR3;             /**< Number of MSI remap requests in R3. */
     346    STAMCOUNTER                 StatMsiRemapRZ;             /**< Number of MSI remap requests in RZ. */
     347
     348    STAMCOUNTER                 StatMemReadR3;              /**< Number of memory read translation requests in R3. */
     349    STAMCOUNTER                 StatMemReadRZ;              /**< Number of memory read translation requests in RZ. */
     350    STAMCOUNTER                 StatMemWriteR3;             /**< Number of memory write translation requests in R3. */
     351    STAMCOUNTER                 StatMemWriteRZ;             /**< Number of memory write translation requests in RZ. */
     352
     353    STAMCOUNTER                 StatMemBulkReadR3;          /**< Number of memory read bulk translation requests in R3. */
     354    STAMCOUNTER                 StatMemBulkReadRZ;          /**< Number of memory read bulk translation requests in RZ. */
     355    STAMCOUNTER                 StatMemBulkWriteR3;         /**< Number of memory write bulk translation requests in R3. */
     356    STAMCOUNTER                 StatMemBulkWriteRZ;         /**< Number of memory write bulk translation requests in RZ. */
     357
     358    STAMCOUNTER                 StatCmd;                    /**< Number of commands processed in total. */
     359    STAMCOUNTER                 StatCmdCompWait;            /**< Number of Completion Wait commands processed. */
     360    STAMCOUNTER                 StatCmdInvDte;              /**< Number of Invalidate DTE commands processed. */
     361    STAMCOUNTER                 StatCmdInvIommuPages;       /**< Number of Invalidate IOMMU pages commands processed. */
     362    STAMCOUNTER                 StatCmdInvIotlbPages;       /**< Number of Invalidate IOTLB pages commands processed. */
     363    STAMCOUNTER                 StatCmdInvIntrTable;        /**< Number of Invalidate Interrupt Table commands processed. */
     364    STAMCOUNTER                 StatCmdPrefIommuPages;      /**< Number of Prefetch IOMMU Pages commands processed. */
     365    STAMCOUNTER                 StatCmdCompletePprReq;      /**< Number of Complete PPR Requests commands processed. */
     366    STAMCOUNTER                 StatCmdInvIommuAll;         /**< Number of Invalidate IOMMU All commands processed. */
     367
     368    STAMPROFILEADV              StatDteLookup;              /**< Profiling of device table entry lookup (uncached). */
    331369    /** @} */
    332370#endif
     
    340378AssertCompileMemberAlignment(IOMMU, hEvtCmdThread, 8);
    341379AssertCompileMemberAlignment(IOMMU, hMmio, 8);
     380#ifdef IOMMU_WITH_IOTLBE_CACHE
     381AssertCompileMemberAlignment(IOMMU, paDomainIds, 8);
     382AssertCompileMemberAlignment(IOMMU, paIotlbes, 8);
     383AssertCompileMemberAlignment(IOMMU, TreeIotlbe, 8);
     384AssertCompileMemberAlignment(IOMMU, LstLruIotlbe, 8);
     385#endif
    342386AssertCompileMemberAlignment(IOMMU, IommuBar, 8);
    343387AssertCompileMemberAlignment(IOMMU, aDevTabBaseAddrs, 8);
     
    496540}
    497541#endif
     542
     543
     544#ifdef IOMMU_WITH_IOTLBE_CACHE
     545/**
     546 * Constructs the key for an IOTLB entry suitable for using as part of the IOTLB
     547 * cache.
     548 *
     549 * @returns The key for an IOTLB entry.
     550 * @param   uDomainId   The domain ID.
     551 * @param   uIova       The I/O virtual address.
     552 */
     553DECL_FORCE_INLINE(uint64_t) iommuAmdIotlbConstructKey(uint16_t uDomainId, uint64_t uIova)
     554{
     555    /*
     556     * Address bits 63:52 of the IOVA are zero extended, so top 12 bits are free.
     557     * Address bits 11:0 of the IOVA are offset into the minimum page size of 4K,
     558     * so bottom 12 bits are free.
     559     *
     560     * Thus we use the top 24 bits of key to hold bits 15:0 of the domain ID.
     561     * We use the bottom 40 bits of the key to hold bits 51:12 of the IOVA.
     562     */
     563    uIova &= IOMMU_IOTLB_DOMAIN_ID_MASK;
     564    uIova >>= X86_PAGE_4K_SHIFT;
     565    return ((uint64_t)uDomainId << IOMMU_IOTLB_DOMAIN_ID_SHIFT) | uIova;
     566}
     567
     568
     569/**
     570 * Deconstructs the key of an IOTLB entry into the domain ID and IOVA.
     571 *
     572 * @param   uKey            The key for the IOTLB entry.
     573 * @param   puDomainId      Where to store the domain ID.
     574 * @param   puIova          Where to store the I/O virtual address.
     575 */
     576DECL_FORCE_INLINE(void) iommuAmdIotlbDeconstructKey(uint64_t uKey, uint16_t *puDomainId, uint64_t *puIova)
     577{
     578    *puDomainId = (uKey &  IOMMU_IOTLB_DOMAIN_ID_MASK) >> IOMMU_IOTLB_DOMAIN_ID_SHIFT;
     579    *puIova     = (uKey & ~IOMMU_IOTLB_DOMAIN_ID_MASK) << X86_PAGE_4K_SHIFT;
     580}
     581
     582
     583/**
     584 * Looks up an IOTLB entry from the IOTLB cache.
     585 *
     586 * @returns Pointer to the I/O walk result or NULL if the entry is not found.
     587 * @param   pThis       The IOMMU device state.
     588 * @param   uDomainId   The domain ID.
     589 * @param   uIova       The I/O virtual address.
     590 */
     591static PIOWALKRESULT iommuAmdIotlbLookup(PIOMMU pThis, uint64_t uDomainId, uint64_t uIova)
     592{
     593    uint64_t const uKey = iommuAmdIotlbConstructKey(uDomainId, uIova);
     594    PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Get(&pThis->TreeIotlbe, uKey);
     595    if (pIotlbe)
     596        return &pIotlbe->WalkResult;
     597    return NULL;
     598}
     599
     600
     601/**
     602 * Adds an IOTLB entry corresponding to the given I/O page walk result.
     603 *
     604 * @param   pThis           The IOMMU device state.
     605 * @param   pWalkResult     The I/O page walk result to cache.
     606 */
     607static void iommuAmdIotlbAdd(PIOMMU pThis, PCIOWALKRESULT pWalkResult)
     608{
     609    /*
     610     * If the cache is full, evict the last recently used entry.
     611     * Otherwise, get a new IOTLB entry from the pre-allocated list.
     612     */
     613    PIOTLBE pIotlbe;
     614    if (pThis->cCachedIotlbes == IOMMU_IOTLBE_MAX)
     615    {
     616        pIotlbe = RTListRemoveFirst(&pThis->LstLruIotlbe, IOTLBE, NdLru);
     617        Assert(pIotlbe);
     618        RTAvlU64Remove(&pThis->TreeIotlbe, pIotlbe->Core.Key);
     619        --pThis->cCachedIotlbes;
     620        /* Zero out IOTLB entry before reuse. */
     621        RT_BZERO(pIotlbe, sizeof(IOTLBE));
     622    }
     623    else
     624    {
     625        pIotlbe = &pThis->paIotlbes[pThis->cCachedIotlbes];
     626        ++pThis->cCachedIotlbes;
     627        /* IOTLB entries have alredy been zero'ed during allocation. */
     628    }
     629
     630    /* Update the entry with the results of the page walk. */
     631    Assert(pIotlbe);
     632    pIotlbe->WalkResult = *pWalkResult;
     633
     634    /* Add the entry to the IOTLB cache. */
     635    RTAvlU64Insert(&pThis->TreeIotlbe, &pIotlbe->Core);
     636
     637    /*
     638     * Add the entry to the -end- of last recently used list signifying that
     639     * is the most recently used entry.
     640     */
     641    RTListAppend(&pThis->LstLruIotlbe, &pIotlbe->NdLru);
     642}
     643#endif  /* IOMMU_WITH_IOTLBE_CACHE */
    498644
    499645
     
    44014547#endif
    44024548
     4549
    44034550/**
    44044551 * @callback_method_impl{FNSSMDEVSAVEEXEC}
     
    44234570    return VERR_NOT_IMPLEMENTED;
    44244571}
     4572
     4573
     4574#ifdef IOMMU_WITH_IOTLBE_CACHE
     4575/**
     4576 * @callback_method_impl{AVLU64CALLBACK}
     4577 */
     4578static DECLCALLBACK(int) iommuAmdR3DestroyIotlbe(PAVLU64NODECORE pCore, void *pvUser)
     4579{
     4580    RT_NOREF2(pCore, pvUser);
     4581    /* Nothing to do as we will destroy IOTLB entries wholesale later. */
     4582    return VINF_SUCCESS;
     4583}
     4584#endif
    44254585
    44264586
     
    45324692        pThis->hEvtCmdThread = NIL_SUPSEMEVENT;
    45334693    }
     4694
     4695#ifdef IOMMU_WITH_IOTLBE_CACHE
     4696    /* Destroy level 1 cache. */
     4697    if (pThis->paDomainIds)
     4698    {
     4699        PDMDevHlpMMHeapFree(pDevIns, pThis->paDomainIds);
     4700        pThis->paDomainIds = NULL;
     4701    }
     4702
     4703    /* Destroy level 2 cache. */
     4704    if (pThis->paIotlbes)
     4705    {
     4706        RTAvlU64Destroy(&pThis->TreeIotlbe, iommuAmdR3DestroyIotlbe, NULL /* pvParam */);
     4707        RTListInit(&pThis->LstLruIotlbe);
     4708        PDMDevHlpMMHeapFree(pDevIns, pThis->paIotlbes);
     4709        pThis->paIotlbes = NULL;
     4710    }
     4711#endif
     4712
    45344713    return VINF_SUCCESS;
    45354714}
     
    45464725    PIOMMU   pThis   = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
    45474726    PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
     4727    pThis->u32Magic = IOMMU_MAGIC;
    45484728    pThisCC->pDevInsR3 = pDevIns;
    45494729
     
    47574937    rc = PDMDevHlpSUPSemEventCreate(pDevIns, &pThis->hEvtCmdThread);
    47584938    AssertLogRelRCReturn(rc, rc);
     4939
     4940#ifdef IOMMU_WITH_IOTLBE_CACHE
     4941    /*
     4942     * Allocate the level 1 cache (device ID to domain ID mapping).
     4943     * PCI devices are hotpluggable, plus we don't have a way of querying the bus for all
     4944     * assigned PCI BDF slots. So while this wastes some memory, it should work regardless
     4945     * of how code, features and devices around the IOMMU changes.
     4946     */
     4947    size_t const cbDomains = sizeof(IODOMAIN) * UINT16_MAX;
     4948    pThis->paDomainIds = (PIODOMAIN)PDMDevHlpMMHeapAllocZ(pDevIns, cbDomains);
     4949    if (!pThis->paDomainIds)
     4950    {
     4951        return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
     4952                                   N_("Failed to allocate %zu bytes from the hyperheap for the IOMMU level 1 cache."),
     4953                                   cbDomains);
     4954    }
     4955
     4956    /*
     4957     * Allocate the level 2 cache (IOTLB entries).
     4958     * This is allocated upfront since we expect a relatively small number of entries,
     4959     * is more cache-line efficient and easier to track least recently used entries for
     4960     * eviction when the cache is full. This also prevents unpredictable behavior during
     4961     * the lifetime of the VM if the hyperheap gets full as allocation would fail upfront
     4962     * or not at all.
     4963     */
     4964    size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX;
     4965    pThis->paIotlbes = (PIOTLBE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIotlbes);
     4966    if (!pThis->paIotlbes)
     4967    {
     4968        return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
     4969                                   N_("Failed to allocate %zu bytes from the hyperheap for the IOMMU level 2 cache."),
     4970                                   cbIotlbes);
     4971    }
     4972    RTListInit(&pThis->LstLruIotlbe);
     4973
     4974    LogRel(("%s: Allocated %zu bytes from the hyperheap for the IOTLB cache\n", IOMMU_LOG_PFX, cbDomains + cbIotlbes));
     4975#endif
    47594976
    47604977    /*
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette