VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0.cpp@ 93732

Last change on this file since 93732 was 93716, checked in by vboxsync, 3 years ago

VMM/PGM: Moved the physical handler allocation off the hyper heap and into its own slab, changing the it to the 'hardened' avl tree code. bugref:10093

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 51.5 KB
Line 
1/* $Id: PGMR0.cpp 93716 2022-02-14 10:36:21Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Ring-0.
4 */
5
6/*
7 * Copyright (C) 2007-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/rawpci.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/gmm.h>
27#include "PGMInternal.h"
28#include <VBox/vmm/pdmdev.h>
29#include <VBox/vmm/vmcc.h>
30#include <VBox/vmm/gvm.h>
31#include "PGMInline.h"
32#include <VBox/log.h>
33#include <VBox/err.h>
34#include <iprt/assert.h>
35#include <iprt/mem.h>
36#include <iprt/memobj.h>
37#include <iprt/process.h>
38#include <iprt/rand.h>
39#include <iprt/string.h>
40#include <iprt/time.h>
41
42
43/*
44 * Instantiate the ring-0 header/code templates.
45 */
46/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
47#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
48#include "PGMR0Bth.h"
49#undef PGM_BTH_NAME
50
51#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
52#include "PGMR0Bth.h"
53#undef PGM_BTH_NAME
54
55#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
56#include "PGMR0Bth.h"
57#undef PGM_BTH_NAME
58
59#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
60#include "PGMR0Bth.h"
61#undef PGM_BTH_NAME
62
63
64/**
65 * Initializes the per-VM data for the PGM.
66 *
67 * This is called from under the GVMM lock, so it should only initialize the
68 * data so PGMR0CleanupVM and others will work smoothly.
69 *
70 * @returns VBox status code.
71 * @param pGVM Pointer to the global VM structure.
72 * @param hMemObj Handle to the memory object backing pGVM.
73 */
74VMMR0_INT_DECL(int) PGMR0InitPerVMData(PGVM pGVM, RTR0MEMOBJ hMemObj)
75{
76 AssertCompile(sizeof(pGVM->pgm.s) <= sizeof(pGVM->pgm.padding));
77 AssertCompile(sizeof(pGVM->pgmr0.s) <= sizeof(pGVM->pgmr0.padding));
78
79 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
80 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
81 {
82 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
83 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
84 }
85 pGVM->pgmr0.s.hPhysHandlerMemObj = NIL_RTR0MEMOBJ;
86 pGVM->pgmr0.s.hPhysHandlerMapObj = NIL_RTR0MEMOBJ;
87
88 /*
89 * Initialize the handler type table with return to ring-3 callbacks so we
90 * don't have to do anything special for ring-3 only registrations.
91 *
92 * Note! The random bits of the hType value is mainly for prevent trouble
93 * with zero initialized handles w/o needing to sacrifice handle zero.
94 */
95 for (size_t i = 0; i < RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes); i++)
96 {
97 pGVM->pgmr0.s.aPhysHandlerTypes[i].hType = i | (RTRandU64() & ~(uint64_t)PGMPHYSHANDLERTYPE_IDX_MASK);
98 pGVM->pgmr0.s.aPhysHandlerTypes[i].enmKind = PGMPHYSHANDLERKIND_INVALID;
99 pGVM->pgmr0.s.aPhysHandlerTypes[i].pfnHandler = pgmR0HandlerPhysicalHandlerToRing3;
100 pGVM->pgmr0.s.aPhysHandlerTypes[i].pfnPfHandler = pgmR0HandlerPhysicalPfHandlerToRing3;
101
102 pGVM->pgm.s.aPhysHandlerTypes[i].hType = pGVM->pgmr0.s.aPhysHandlerTypes[i].hType;
103 pGVM->pgm.s.aPhysHandlerTypes[i].enmKind = PGMPHYSHANDLERKIND_INVALID;
104 }
105
106 /*
107 * Get the physical address of the ZERO and MMIO-dummy pages.
108 */
109 AssertReturn(((uintptr_t)&pGVM->pgm.s.abZeroPg[0] & HOST_PAGE_OFFSET_MASK) == 0, VERR_INTERNAL_ERROR_2);
110 pGVM->pgm.s.HCPhysZeroPg = RTR0MemObjGetPagePhysAddr(hMemObj, RT_UOFFSETOF_DYN(GVM, pgm.s.abZeroPg) >> HOST_PAGE_SHIFT);
111 AssertReturn(pGVM->pgm.s.HCPhysZeroPg != NIL_RTHCPHYS, VERR_INTERNAL_ERROR_3);
112
113 AssertReturn(((uintptr_t)&pGVM->pgm.s.abMmioPg[0] & HOST_PAGE_OFFSET_MASK) == 0, VERR_INTERNAL_ERROR_2);
114 pGVM->pgm.s.HCPhysMmioPg = RTR0MemObjGetPagePhysAddr(hMemObj, RT_UOFFSETOF_DYN(GVM, pgm.s.abMmioPg) >> HOST_PAGE_SHIFT);
115 AssertReturn(pGVM->pgm.s.HCPhysMmioPg != NIL_RTHCPHYS, VERR_INTERNAL_ERROR_3);
116
117 pGVM->pgm.s.HCPhysInvMmioPg = pGVM->pgm.s.HCPhysMmioPg;
118
119 return RTCritSectInit(&pGVM->pgmr0.s.PoolGrowCritSect);
120}
121
122
123/**
124 * Initalize the per-VM PGM for ring-0.
125 *
126 * @returns VBox status code.
127 * @param pGVM Pointer to the global VM structure.
128 */
129VMMR0_INT_DECL(int) PGMR0InitVM(PGVM pGVM)
130{
131 /*
132 * Set up the ring-0 context for our access handlers.
133 */
134 int rc = PGMR0HandlerPhysicalTypeSetUpContext(pGVM, PGMPHYSHANDLERKIND_WRITE, 0 /*fFlags*/,
135 pgmPhysRomWriteHandler, pgmPhysRomWritePfHandler,
136 "ROM write protection", pGVM->pgm.s.hRomPhysHandlerType);
137 AssertLogRelRCReturn(rc, rc);
138
139 /*
140 * Register the physical access handler doing dirty MMIO2 tracing.
141 */
142 rc = PGMR0HandlerPhysicalTypeSetUpContext(pGVM, PGMPHYSHANDLERKIND_WRITE, PGMPHYSHANDLER_F_KEEP_PGM_LOCK,
143 pgmPhysMmio2WriteHandler, pgmPhysMmio2WritePfHandler,
144 "MMIO2 dirty page tracing", pGVM->pgm.s.hMmio2DirtyPhysHandlerType);
145 AssertLogRelRCReturn(rc, rc);
146
147 /*
148 * The page pool.
149 */
150 return pgmR0PoolInitVM(pGVM);
151}
152
153
154/**
155 * Called at the end of the ring-0 initialization to seal access handler types.
156 *
157 * @returns VBox status code.
158 * @param pGVM Pointer to the global VM structure.
159 */
160VMMR0_INT_DECL(void) PGMR0DoneInitVM(PGVM pGVM)
161{
162 /*
163 * Seal all the access handler types. Does both ring-3 and ring-0.
164 *
165 * Note! Since this is a void function and we don't have any ring-0 state
166 * machinery for marking the VM as bogus, this code will just
167 * override corrupted values as best as it can.
168 */
169 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes));
170 for (size_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes); i++)
171 {
172 PPGMPHYSHANDLERTYPEINTR0 const pTypeR0 = &pGVM->pgmr0.s.aPhysHandlerTypes[i];
173 PPGMPHYSHANDLERTYPEINTR3 const pTypeR3 = &pGVM->pgm.s.aPhysHandlerTypes[i];
174 PGMPHYSHANDLERKIND const enmKindR3 = pTypeR3->enmKind;
175 PGMPHYSHANDLERKIND const enmKindR0 = pTypeR0->enmKind;
176 AssertLogRelMsgStmt(pTypeR0->hType == pTypeR3->hType,
177 ("i=%u %#RX64 vs %#RX64 %s\n", i, pTypeR0->hType, pTypeR3->hType, pTypeR0->pszDesc),
178 pTypeR3->hType = pTypeR0->hType);
179 switch (enmKindR3)
180 {
181 case PGMPHYSHANDLERKIND_ALL:
182 case PGMPHYSHANDLERKIND_MMIO:
183 if ( enmKindR0 == enmKindR3
184 || enmKindR0 == PGMPHYSHANDLERKIND_INVALID)
185 {
186 pTypeR3->fRing0Enabled = enmKindR0 == enmKindR3;
187 pTypeR0->uState = PGM_PAGE_HNDL_PHYS_STATE_ALL;
188 pTypeR3->uState = PGM_PAGE_HNDL_PHYS_STATE_ALL;
189 continue;
190 }
191 break;
192
193 case PGMPHYSHANDLERKIND_WRITE:
194 if ( enmKindR0 == enmKindR3
195 || enmKindR0 == PGMPHYSHANDLERKIND_INVALID)
196 {
197 pTypeR3->fRing0Enabled = enmKindR0 == enmKindR3;
198 pTypeR0->uState = PGM_PAGE_HNDL_PHYS_STATE_WRITE;
199 pTypeR3->uState = PGM_PAGE_HNDL_PHYS_STATE_WRITE;
200 continue;
201 }
202 break;
203
204 default:
205 AssertLogRelMsgFailed(("i=%u enmKindR3=%d\n", i, enmKindR3));
206 RT_FALL_THROUGH();
207 case PGMPHYSHANDLERKIND_INVALID:
208 AssertLogRelMsg(enmKindR0 == PGMPHYSHANDLERKIND_INVALID,
209 ("i=%u enmKind=%d %s\n", i, enmKindR0, pTypeR0->pszDesc));
210 AssertLogRelMsg(pTypeR0->pfnHandler == pgmR0HandlerPhysicalHandlerToRing3,
211 ("i=%u pfnHandler=%p %s\n", i, pTypeR0->pfnHandler, pTypeR0->pszDesc));
212 AssertLogRelMsg(pTypeR0->pfnPfHandler == pgmR0HandlerPhysicalPfHandlerToRing3,
213 ("i=%u pfnPfHandler=%p %s\n", i, pTypeR0->pfnPfHandler, pTypeR0->pszDesc));
214
215 /* Unused of bad ring-3 entry, make it and the ring-0 one harmless. */
216 pTypeR3->enmKind = PGMPHYSHANDLERKIND_END;
217 pTypeR3->fRing0DevInsIdx = false;
218 pTypeR3->fKeepPgmLock = false;
219 pTypeR3->uState = 0;
220 break;
221 }
222 pTypeR3->fRing0Enabled = false;
223
224 /* Make sure the entry is harmless and goes to ring-3. */
225 pTypeR0->enmKind = PGMPHYSHANDLERKIND_END;
226 pTypeR0->pfnHandler = pgmR0HandlerPhysicalHandlerToRing3;
227 pTypeR0->pfnPfHandler = pgmR0HandlerPhysicalPfHandlerToRing3;
228 pTypeR0->fRing0DevInsIdx = false;
229 pTypeR0->fKeepPgmLock = false;
230 pTypeR0->uState = 0;
231 pTypeR0->pszDesc = "invalid";
232 }
233}
234
235
236/**
237 * Cleans up any loose ends before the GVM structure is destroyed.
238 */
239VMMR0_INT_DECL(void) PGMR0CleanupVM(PGVM pGVM)
240{
241 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
242 {
243 if (pGVM->pgmr0.s.ahPoolMapObjs[i] != NIL_RTR0MEMOBJ)
244 {
245 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMapObjs[i], true /*fFreeMappings*/);
246 AssertRC(rc);
247 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
248 }
249
250 if (pGVM->pgmr0.s.ahPoolMemObjs[i] != NIL_RTR0MEMOBJ)
251 {
252 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMemObjs[i], true /*fFreeMappings*/);
253 AssertRC(rc);
254 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
255 }
256 }
257
258 if (pGVM->pgmr0.s.hPhysHandlerMapObj != NIL_RTR0MEMOBJ)
259 {
260 int rc = RTR0MemObjFree(pGVM->pgmr0.s.hPhysHandlerMapObj, true /*fFreeMappings*/);
261 AssertRC(rc);
262 pGVM->pgmr0.s.hPhysHandlerMapObj = NIL_RTR0MEMOBJ;
263 }
264
265 if (pGVM->pgmr0.s.hPhysHandlerMemObj != NIL_RTR0MEMOBJ)
266 {
267 int rc = RTR0MemObjFree(pGVM->pgmr0.s.hPhysHandlerMemObj, true /*fFreeMappings*/);
268 AssertRC(rc);
269 pGVM->pgmr0.s.hPhysHandlerMemObj = NIL_RTR0MEMOBJ;
270 }
271
272 if (RTCritSectIsInitialized(&pGVM->pgmr0.s.PoolGrowCritSect))
273 RTCritSectDelete(&pGVM->pgmr0.s.PoolGrowCritSect);
274}
275
276
277/**
278 * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
279 *
280 * @returns The following VBox status codes.
281 * @retval VINF_SUCCESS on success. FF cleared.
282 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
283 *
284 * @param pGVM The global (ring-0) VM structure.
285 * @param idCpu The ID of the calling EMT.
286 * @param fRing3 Set if the caller is ring-3. Determins whether to
287 * return VINF_EM_NO_MEMORY or not.
288 *
289 * @thread EMT(idCpu)
290 *
291 * @remarks Must be called from within the PGM critical section. The caller
292 * must clear the new pages.
293 */
294int pgmR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu, bool fRing3)
295{
296 /*
297 * Validate inputs.
298 */
299 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
300 Assert(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf());
301 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
302
303 /*
304 * Check for error injection.
305 */
306 if (RT_LIKELY(!pGVM->pgm.s.fErrInjHandyPages))
307 { /* likely */ }
308 else
309 return VERR_NO_MEMORY;
310
311 /*
312 * Try allocate a full set of handy pages.
313 */
314 uint32_t const iFirst = pGVM->pgm.s.cHandyPages;
315 AssertMsgReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), ("%#x\n", iFirst), VERR_PGM_HANDY_PAGE_IPE);
316
317 uint32_t const cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
318 if (!cPages)
319 return VINF_SUCCESS;
320
321 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
322 if (RT_SUCCESS(rc))
323 {
324 uint32_t const cHandyPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages); /** @todo allow allocating less... */
325 pGVM->pgm.s.cHandyPages = cHandyPages;
326 VM_FF_CLEAR(pGVM, VM_FF_PGM_NEED_HANDY_PAGES);
327 VM_FF_CLEAR(pGVM, VM_FF_PGM_NO_MEMORY);
328
329#ifdef VBOX_STRICT
330 for (uint32_t i = 0; i < cHandyPages; i++)
331 {
332 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
333 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
334 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
335 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_GMMPAGEDESC_PHYS);
336 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
337 }
338#endif
339
340 /*
341 * Clear the pages.
342 */
343 for (uint32_t iPage = iFirst; iPage < cHandyPages; iPage++)
344 {
345 PGMMPAGEDESC pPage = &pGVM->pgm.s.aHandyPages[iPage];
346 if (!pPage->fZeroed)
347 {
348 void *pv = NULL;
349#ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
350 rc = SUPR0HCPhysToVirt(pPage->HCPhysGCPhys, &pv);
351#else
352 rc = GMMR0PageIdToVirt(pGVM, pPage->idPage, &pv);
353#endif
354 AssertMsgRCReturn(rc, ("idPage=%#x HCPhys=%RHp rc=%Rrc\n", pPage->idPage, pPage->HCPhysGCPhys, rc), rc);
355
356 RT_BZERO(pv, GUEST_PAGE_SIZE);
357 pPage->fZeroed = true;
358 }
359#ifdef VBOX_STRICT
360 else
361 {
362 void *pv = NULL;
363# ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
364 rc = SUPR0HCPhysToVirt(pPage->HCPhysGCPhys, &pv);
365# else
366 rc = GMMR0PageIdToVirt(pGVM, pPage->idPage, &pv);
367# endif
368 AssertMsgRCReturn(rc, ("idPage=%#x HCPhys=%RHp rc=%Rrc\n", pPage->idPage, pPage->HCPhysGCPhys, rc), rc);
369 AssertReturn(ASMMemIsZero(pv, GUEST_PAGE_SIZE), VERR_PGM_HANDY_PAGE_IPE);
370 }
371#endif
372 Log3(("PGMR0PhysAllocateHandyPages: idPage=%#x HCPhys=%RGp\n", pPage->idPage, pPage->HCPhysGCPhys));
373 }
374 }
375 else
376 {
377 /*
378 * We should never get here unless there is a genuine shortage of
379 * memory (or some internal error). Flag the error so the VM can be
380 * suspended ASAP and the user informed. If we're totally out of
381 * handy pages we will return failure.
382 */
383 /* Report the failure. */
384 LogRel(("PGM: Failed to procure handy pages; rc=%Rrc cHandyPages=%#x\n"
385 " cAllPages=%#x cPrivatePages=%#x cSharedPages=%#x cZeroPages=%#x\n",
386 rc, pGVM->pgm.s.cHandyPages,
387 pGVM->pgm.s.cAllPages, pGVM->pgm.s.cPrivatePages, pGVM->pgm.s.cSharedPages, pGVM->pgm.s.cZeroPages));
388
389 GMMMEMSTATSREQ Stats = { { SUPVMMR0REQHDR_MAGIC, sizeof(Stats) }, 0, 0, 0, 0, 0 };
390 if (RT_SUCCESS(GMMR0QueryMemoryStatsReq(pGVM, idCpu, &Stats)))
391 LogRel(("GMM: Statistics:\n"
392 " Allocated pages: %RX64\n"
393 " Free pages: %RX64\n"
394 " Shared pages: %RX64\n"
395 " Maximum pages: %RX64\n"
396 " Ballooned pages: %RX64\n",
397 Stats.cAllocPages, Stats.cFreePages, Stats.cSharedPages, Stats.cMaxPages, Stats.cBalloonedPages));
398
399 if ( rc != VERR_NO_MEMORY
400 && rc != VERR_NO_PHYS_MEMORY
401 && rc != VERR_LOCK_FAILED)
402 for (uint32_t iPage = 0; iPage < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); iPage++)
403 LogRel(("PGM: aHandyPages[#%#04x] = {.HCPhysGCPhys=%RHp, .idPage=%#08x, .idSharedPage=%#08x}\n",
404 iPage, pGVM->pgm.s.aHandyPages[iPage].HCPhysGCPhys, pGVM->pgm.s.aHandyPages[iPage].idPage,
405 pGVM->pgm.s.aHandyPages[iPage].idSharedPage));
406
407 /* Set the FFs and adjust rc. */
408 VM_FF_SET(pGVM, VM_FF_PGM_NEED_HANDY_PAGES);
409 VM_FF_SET(pGVM, VM_FF_PGM_NO_MEMORY);
410 if (!fRing3)
411 if ( rc == VERR_NO_MEMORY
412 || rc == VERR_NO_PHYS_MEMORY
413 || rc == VERR_LOCK_FAILED
414 || rc == VERR_MAP_FAILED)
415 rc = VINF_EM_NO_MEMORY;
416 }
417
418 LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
419 return rc;
420}
421
422
423/**
424 * Worker function for PGMR3PhysAllocateHandyPages / VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES.
425 *
426 * @returns The following VBox status codes.
427 * @retval VINF_SUCCESS on success. FF cleared.
428 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
429 *
430 * @param pGVM The global (ring-0) VM structure.
431 * @param idCpu The ID of the calling EMT.
432 *
433 * @thread EMT(idCpu)
434 *
435 * @remarks Must be called from within the PGM critical section. The caller
436 * must clear the new pages.
437 */
438VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu)
439{
440 /*
441 * Validate inputs.
442 */
443 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
444 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
445
446 /*
447 * Enter the PGM lock and call the worker.
448 */
449 int rc = PGM_LOCK(pGVM);
450 if (RT_SUCCESS(rc))
451 {
452 rc = pgmR0PhysAllocateHandyPages(pGVM, idCpu, true /*fRing3*/);
453 PGM_UNLOCK(pGVM);
454 }
455 return rc;
456}
457
458
459/**
460 * Flushes any changes pending in the handy page array.
461 *
462 * It is very important that this gets done when page sharing is enabled.
463 *
464 * @returns The following VBox status codes.
465 * @retval VINF_SUCCESS on success. FF cleared.
466 *
467 * @param pGVM The global (ring-0) VM structure.
468 * @param idCpu The ID of the calling EMT.
469 *
470 * @thread EMT(idCpu)
471 *
472 * @remarks Must be called from within the PGM critical section.
473 */
474VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, VMCPUID idCpu)
475{
476 /*
477 * Validate inputs.
478 */
479 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
480 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
481 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
482
483 /*
484 * Try allocate a full set of handy pages.
485 */
486 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
487 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
488 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
489 if (!cPages)
490 return VINF_SUCCESS;
491 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, 0, &pGVM->pgm.s.aHandyPages[iFirst]);
492
493 LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
494 return rc;
495}
496
497
498/**
499 * Allocate a large page at @a GCPhys.
500 *
501 * @returns The following VBox status codes.
502 * @retval VINF_SUCCESS on success.
503 * @retval VINF_EM_NO_MEMORY if we're out of memory.
504 *
505 * @param pGVM The global (ring-0) VM structure.
506 * @param idCpu The ID of the calling EMT.
507 * @param GCPhys The guest physical address of the page.
508 *
509 * @thread EMT(idCpu)
510 *
511 * @remarks Must be called from within the PGM critical section. The caller
512 * must clear the new pages.
513 */
514int pgmR0PhysAllocateLargePage(PGVM pGVM, VMCPUID idCpu, RTGCPHYS GCPhys)
515{
516 STAM_PROFILE_START(&pGVM->pgm.s.Stats.StatLargePageAlloc2, a);
517 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
518
519 /*
520 * Allocate a large page.
521 */
522 RTHCPHYS HCPhys = NIL_GMMPAGEDESC_PHYS;
523 uint32_t idPage = NIL_GMM_PAGEID;
524
525 if (true) /** @todo pre-allocate 2-3 pages on the allocation thread. */
526 {
527 uint64_t const nsAllocStart = RTTimeNanoTS();
528 if (nsAllocStart < pGVM->pgm.s.nsLargePageRetry)
529 {
530 LogFlowFunc(("returns VERR_TRY_AGAIN - %RU64 ns left of hold off period\n", pGVM->pgm.s.nsLargePageRetry - nsAllocStart));
531 return VERR_TRY_AGAIN;
532 }
533
534 int const rc = GMMR0AllocateLargePage(pGVM, idCpu, _2M, &idPage, &HCPhys);
535
536 uint64_t const nsAllocEnd = RTTimeNanoTS();
537 uint64_t const cNsElapsed = nsAllocEnd - nsAllocStart;
538 STAM_REL_PROFILE_ADD_PERIOD(&pGVM->pgm.s.StatLargePageAlloc, cNsElapsed);
539 if (cNsElapsed < RT_NS_100MS)
540 pGVM->pgm.s.cLargePageLongAllocRepeats = 0;
541 else
542 {
543 /* If a large page allocation takes more than 100ms back off for a
544 while so the host OS can reshuffle memory and make some more large
545 pages available. However if it took over a second, just disable it. */
546 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageOverflow);
547 pGVM->pgm.s.cLargePageLongAllocRepeats++;
548 if (cNsElapsed > RT_NS_1SEC)
549 {
550 LogRel(("PGMR0PhysAllocateLargePage: Disabling large pages after %'RU64 ns allocation time.\n", cNsElapsed));
551 PGMSetLargePageUsage(pGVM, false);
552 }
553 else
554 {
555 Log(("PGMR0PhysAllocateLargePage: Suspending large page allocations for %u sec after %'RU64 ns allocation time.\n",
556 30 * pGVM->pgm.s.cLargePageLongAllocRepeats, cNsElapsed));
557 pGVM->pgm.s.nsLargePageRetry = nsAllocEnd + RT_NS_30SEC * pGVM->pgm.s.cLargePageLongAllocRepeats;
558 }
559 }
560
561 if (RT_FAILURE(rc))
562 {
563 Log(("PGMR0PhysAllocateLargePage: Failed: %Rrc\n", rc));
564 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageAllocFailed);
565 if (rc == VERR_NOT_SUPPORTED)
566 {
567 LogRel(("PGM: Disabling large pages because of VERR_NOT_SUPPORTED status.\n"));
568 PGMSetLargePageUsage(pGVM, false);
569 }
570 return rc;
571 }
572 }
573
574 STAM_PROFILE_STOP_START(&pGVM->pgm.s.Stats.StatLargePageAlloc2, &pGVM->pgm.s.Stats.StatLargePageSetup, a);
575
576 /*
577 * Enter the pages into PGM.
578 */
579 bool fFlushTLBs = false;
580 VBOXSTRICTRC rc = VINF_SUCCESS;
581 unsigned cLeft = _2M / GUEST_PAGE_SIZE;
582 while (cLeft-- > 0)
583 {
584 PPGMPAGE const pPage = pgmPhysGetPage(pGVM, GCPhys);
585 AssertReturn(pPage && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM && PGM_PAGE_IS_ZERO(pPage), VERR_PGM_UNEXPECTED_PAGE_STATE);
586
587 /* Make sure there are no zero mappings. */
588 uint16_t const u16Tracking = PGM_PAGE_GET_TRACKING(pPage);
589 if (u16Tracking == 0)
590 Assert(PGM_PAGE_GET_PTE_INDEX(pPage) == 0);
591 else
592 {
593 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageZeroEvict);
594 VBOXSTRICTRC rc3 = pgmPoolTrackUpdateGCPhys(pGVM, GCPhys, pPage, true /*fFlushPTEs*/, &fFlushTLBs);
595 Log(("PGMR0PhysAllocateLargePage: GCPhys=%RGp: tracking=%#x rc3=%Rrc\n", GCPhys, u16Tracking, VBOXSTRICTRC_VAL(rc3)));
596 if (rc3 != VINF_SUCCESS && rc == VINF_SUCCESS)
597 rc = rc3; /** @todo not perfect... */
598 PGM_PAGE_SET_PTE_INDEX(pGVM, pPage, 0);
599 PGM_PAGE_SET_TRACKING(pGVM, pPage, 0);
600 }
601
602 /* Setup the new page. */
603 PGM_PAGE_SET_HCPHYS(pGVM, pPage, HCPhys);
604 PGM_PAGE_SET_STATE(pGVM, pPage, PGM_PAGE_STATE_ALLOCATED);
605 PGM_PAGE_SET_PDE_TYPE(pGVM, pPage, PGM_PAGE_PDE_TYPE_PDE);
606 PGM_PAGE_SET_PAGEID(pGVM, pPage, idPage);
607 Log3(("PGMR0PhysAllocateLargePage: GCPhys=%RGp: idPage=%#x HCPhys=%RGp (old tracking=%#x)\n",
608 GCPhys, idPage, HCPhys, u16Tracking));
609
610 /* advance */
611 idPage++;
612 HCPhys += GUEST_PAGE_SIZE;
613 GCPhys += GUEST_PAGE_SIZE;
614 }
615
616 STAM_COUNTER_ADD(&pGVM->pgm.s.Stats.StatRZPageReplaceZero, _2M / GUEST_PAGE_SIZE);
617 pGVM->pgm.s.cZeroPages -= _2M / GUEST_PAGE_SIZE;
618 pGVM->pgm.s.cPrivatePages += _2M / GUEST_PAGE_SIZE;
619
620 /*
621 * Flush all TLBs.
622 */
623 if (!fFlushTLBs)
624 { /* likely as we shouldn't normally map zero pages */ }
625 else
626 {
627 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageTlbFlush);
628 PGM_INVL_ALL_VCPU_TLBS(pGVM);
629 }
630 /** @todo this is a little expensive (~3000 ticks) since we'll have to
631 * invalidate everything. Add a version to the TLB? */
632 pgmPhysInvalidatePageMapTLB(pGVM);
633
634 STAM_PROFILE_STOP(&pGVM->pgm.s.Stats.StatLargePageSetup, a);
635#if 0 /** @todo returning info statuses here might not be a great idea... */
636 LogFlow(("PGMR0PhysAllocateLargePage: returns %Rrc\n", VBOXSTRICTRC_VAL(rc) ));
637 return VBOXSTRICTRC_TODO(rc);
638#else
639 LogFlow(("PGMR0PhysAllocateLargePage: returns VINF_SUCCESS (rc=%Rrc)\n", VBOXSTRICTRC_VAL(rc) ));
640 return VINF_SUCCESS;
641#endif
642}
643
644
645/**
646 * Allocate a large page at @a GCPhys.
647 *
648 * @returns The following VBox status codes.
649 * @retval VINF_SUCCESS on success.
650 * @retval VINF_EM_NO_MEMORY if we're out of memory.
651 *
652 * @param pGVM The global (ring-0) VM structure.
653 * @param idCpu The ID of the calling EMT.
654 * @param GCPhys The guest physical address of the page.
655 *
656 * @thread EMT(idCpu)
657 *
658 * @remarks Must be called from within the PGM critical section. The caller
659 * must clear the new pages.
660 */
661VMMR0_INT_DECL(int) PGMR0PhysAllocateLargePage(PGVM pGVM, VMCPUID idCpu, RTGCPHYS GCPhys)
662{
663 /*
664 * Validate inputs.
665 */
666 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
667 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
668
669 int rc = PGM_LOCK(pGVM);
670 AssertRCReturn(rc, rc);
671
672 /* The caller might have done this already, but since we're ring-3 callable we
673 need to make sure everything is fine before starting the allocation here. */
674 for (unsigned i = 0; i < _2M / GUEST_PAGE_SIZE; i++)
675 {
676 PPGMPAGE pPage;
677 rc = pgmPhysGetPageEx(pGVM, GCPhys + i * GUEST_PAGE_SIZE, &pPage);
678 AssertRCReturnStmt(rc, PGM_UNLOCK(pGVM), rc);
679 AssertReturnStmt(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM, PGM_UNLOCK(pGVM), VERR_PGM_PHYS_NOT_RAM);
680 AssertReturnStmt(PGM_PAGE_IS_ZERO(pPage), PGM_UNLOCK(pGVM), VERR_PGM_UNEXPECTED_PAGE_STATE);
681 }
682
683 /*
684 * Call common code.
685 */
686 rc = pgmR0PhysAllocateLargePage(pGVM, idCpu, GCPhys);
687
688 PGM_UNLOCK(pGVM);
689 return rc;
690}
691
692
693/**
694 * Locate a MMIO2 range.
695 *
696 * @returns Pointer to the MMIO2 range.
697 * @param pGVM The global (ring-0) VM structure.
698 * @param pDevIns The device instance owning the region.
699 * @param hMmio2 Handle to look up.
700 */
701DECLINLINE(PPGMREGMMIO2RANGE) pgmR0PhysMmio2Find(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
702{
703 /*
704 * We use the lookup table here as list walking is tedious in ring-0 when using
705 * ring-3 pointers and this probably will require some kind of refactoring anyway.
706 */
707 if (hMmio2 <= RT_ELEMENTS(pGVM->pgm.s.apMmio2RangesR0) && hMmio2 != 0)
708 {
709 PPGMREGMMIO2RANGE pCur = pGVM->pgm.s.apMmio2RangesR0[hMmio2 - 1];
710 if (pCur && pCur->pDevInsR3 == pDevIns->pDevInsForR3)
711 {
712 Assert(pCur->idMmio2 == hMmio2);
713 return pCur;
714 }
715 Assert(!pCur);
716 }
717 return NULL;
718}
719
720
721/**
722 * Worker for PDMDEVHLPR0::pfnMmio2SetUpContext.
723 *
724 * @returns VBox status code.
725 * @param pGVM The global (ring-0) VM structure.
726 * @param pDevIns The device instance.
727 * @param hMmio2 The MMIO2 region to map into ring-0 address space.
728 * @param offSub The offset into the region.
729 * @param cbSub The size of the mapping, zero meaning all the rest.
730 * @param ppvMapping Where to return the ring-0 mapping address.
731 */
732VMMR0_INT_DECL(int) PGMR0PhysMMIO2MapKernel(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
733 size_t offSub, size_t cbSub, void **ppvMapping)
734{
735 AssertReturn(!(offSub & HOST_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
736 AssertReturn(!(cbSub & HOST_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
737
738 /*
739 * Translate hRegion into a range pointer.
740 */
741 PPGMREGMMIO2RANGE pFirstRegMmio = pgmR0PhysMmio2Find(pGVM, pDevIns, hMmio2);
742 AssertReturn(pFirstRegMmio, VERR_NOT_FOUND);
743#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
744 uint8_t * const pvR0 = (uint8_t *)pFirstRegMmio->pvR0;
745#else
746 RTR3PTR const pvR3 = pFirstRegMmio->pvR3;
747#endif
748 RTGCPHYS const cbReal = pFirstRegMmio->cbReal;
749 pFirstRegMmio = NULL;
750 ASMCompilerBarrier();
751
752 AssertReturn(offSub < cbReal, VERR_OUT_OF_RANGE);
753 if (cbSub == 0)
754 cbSub = cbReal - offSub;
755 else
756 AssertReturn(cbSub < cbReal && cbSub + offSub <= cbReal, VERR_OUT_OF_RANGE);
757
758 /*
759 * Do the mapping.
760 */
761#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
762 AssertPtr(pvR0);
763 *ppvMapping = pvR0 + offSub;
764 return VINF_SUCCESS;
765#else
766 return SUPR0PageMapKernel(pGVM->pSession, pvR3, (uint32_t)offSub, (uint32_t)cbSub, 0 /*fFlags*/, ppvMapping);
767#endif
768}
769
770
771/**
772 * This is called during PGMR3Init to init the physical access handler allocator
773 * and tree.
774 *
775 * @returns VBox status code.
776 * @param pGVM Pointer to the global VM structure.
777 * @param cEntries Desired number of physical access handlers to reserve
778 * space for (will be adjusted).
779 * @thread EMT(0)
780 */
781VMMR0_INT_DECL(int) PGMR0PhysHandlerInitReqHandler(PGVM pGVM, uint32_t cEntries)
782{
783 /*
784 * Validate the input and state.
785 */
786 int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
787 AssertRCReturn(rc, rc);
788 VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE); /** @todo ring-0 safe state check. */
789
790 AssertReturn(pGVM->pgmr0.s.PhysHandlerAllocator.m_paNodes == NULL, VERR_WRONG_ORDER);
791 AssertReturn(pGVM->pgm.s.PhysHandlerAllocator.m_paNodes == NULL, VERR_WRONG_ORDER);
792
793 AssertLogRelMsgReturn(cEntries <= _64K, ("%#x\n", cEntries), VERR_OUT_OF_RANGE);
794
795 /*
796 * Calculate the table size and allocate it.
797 */
798 uint32_t cbTreeAndBitmap = 0;
799 uint32_t const cbTotalAligned = pgmHandlerPhysicalCalcTableSizes(&cEntries, &cbTreeAndBitmap);
800 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
801 rc = RTR0MemObjAllocPage(&hMemObj, cbTotalAligned, false);
802 if (RT_SUCCESS(rc))
803 {
804 RTR0MEMOBJ hMapObj = NIL_RTR0MEMOBJ;
805 rc = RTR0MemObjMapUser(&hMapObj, hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
806 if (RT_SUCCESS(rc))
807 {
808 uint8_t *pb = (uint8_t *)RTR0MemObjAddress(hMemObj);
809 if (!RTR0MemObjWasZeroInitialized(hMemObj))
810 RT_BZERO(pb, cbTotalAligned);
811
812 pGVM->pgmr0.s.PhysHandlerAllocator.initSlabAllocator(cEntries, (PPGMPHYSHANDLER)&pb[cbTreeAndBitmap],
813 (uint64_t *)&pb[sizeof(PGMPHYSHANDLERTREE)]);
814 pGVM->pgmr0.s.pPhysHandlerTree = (PPGMPHYSHANDLERTREE)pb;
815 pGVM->pgmr0.s.pPhysHandlerTree->initWithAllocator(&pGVM->pgmr0.s.PhysHandlerAllocator);
816 pGVM->pgmr0.s.hPhysHandlerMemObj = hMemObj;
817 pGVM->pgmr0.s.hPhysHandlerMapObj = hMapObj;
818
819 AssertCompile(sizeof(pGVM->pgm.s.PhysHandlerAllocator) == sizeof(pGVM->pgmr0.s.PhysHandlerAllocator));
820 RTR3PTR R3Ptr = RTR0MemObjAddressR3(hMapObj);
821 pGVM->pgm.s.pPhysHandlerTree = R3Ptr;
822 pGVM->pgm.s.PhysHandlerAllocator.m_paNodes = R3Ptr + cbTreeAndBitmap;
823 pGVM->pgm.s.PhysHandlerAllocator.m_pbmAlloc = R3Ptr + sizeof(PGMPHYSHANDLERTREE);
824 pGVM->pgm.s.PhysHandlerAllocator.m_cNodes = cEntries;
825 pGVM->pgm.s.PhysHandlerAllocator.m_cErrors = 0;
826 pGVM->pgm.s.PhysHandlerAllocator.m_idxAllocHint = 0;
827 pGVM->pgm.s.PhysHandlerAllocator.m_uPadding = 0;
828 return VINF_SUCCESS;
829 }
830
831 RTR0MemObjFree(hMemObj, true /*fFreeMappings*/);
832 }
833 return rc;
834}
835
836
837/**
838 * Updates a physical access handler type with ring-0 callback functions.
839 *
840 * The handler type must first have been registered in ring-3.
841 *
842 * @returns VBox status code.
843 * @param pGVM The global (ring-0) VM structure.
844 * @param enmKind The kind of access handler.
845 * @param fFlags PGMPHYSHANDLER_F_XXX
846 * @param pfnHandler Pointer to the ring-0 handler callback.
847 * @param pfnPfHandler Pointer to the ring-0 \#PF handler callback.
848 * callback.
849 * @param pszDesc The type description.
850 * @param hType The handle to do ring-0 callback registrations for.
851 * @thread EMT(0)
852 */
853VMMR0_INT_DECL(int) PGMR0HandlerPhysicalTypeSetUpContext(PGVM pGVM, PGMPHYSHANDLERKIND enmKind, uint32_t fFlags,
854 PFNPGMPHYSHANDLER pfnHandler, PFNPGMRZPHYSPFHANDLER pfnPfHandler,
855 const char *pszDesc, PGMPHYSHANDLERTYPE hType)
856{
857 /*
858 * Validate input.
859 */
860 AssertPtrReturn(pfnHandler, VERR_INVALID_POINTER);
861 AssertPtrReturn(pfnPfHandler, VERR_INVALID_POINTER);
862 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
863 AssertReturn( enmKind == PGMPHYSHANDLERKIND_WRITE
864 || enmKind == PGMPHYSHANDLERKIND_ALL
865 || enmKind == PGMPHYSHANDLERKIND_MMIO,
866 VERR_INVALID_PARAMETER);
867 AssertMsgReturn(!(fFlags & ~PGMPHYSHANDLER_F_VALID_MASK), ("%#x\n", fFlags), VERR_INVALID_FLAGS);
868
869 PPGMPHYSHANDLERTYPEINTR0 const pTypeR0 = &pGVM->pgmr0.s.aPhysHandlerTypes[hType & PGMPHYSHANDLERTYPE_IDX_MASK];
870 AssertMsgReturn(hType == pTypeR0->hType, ("%#RX64, expected=%#RX64\n", hType, pTypeR0->hType), VERR_INVALID_HANDLE);
871 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes));
872 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == PGMPHYSHANDLERTYPE_IDX_MASK + 1);
873 AssertReturn(pTypeR0->enmKind == PGMPHYSHANDLERKIND_INVALID, VERR_ALREADY_INITIALIZED);
874
875 int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
876 AssertRCReturn(rc, rc);
877 VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE); /** @todo ring-0 safe state check. */
878
879 PPGMPHYSHANDLERTYPEINTR3 const pTypeR3 = &pGVM->pgm.s.aPhysHandlerTypes[hType & PGMPHYSHANDLERTYPE_IDX_MASK];
880 AssertMsgReturn(pTypeR3->enmKind == enmKind,
881 ("%#x: %d, expected %d\n", hType, pTypeR3->enmKind, enmKind),
882 VERR_INVALID_HANDLE);
883 AssertMsgReturn(pTypeR3->fKeepPgmLock == RT_BOOL(fFlags & PGMPHYSHANDLER_F_KEEP_PGM_LOCK),
884 ("%#x: %d, fFlags=%d\n", hType, pTypeR3->fKeepPgmLock, fFlags),
885 VERR_INVALID_HANDLE);
886 AssertMsgReturn(pTypeR3->fRing0DevInsIdx == RT_BOOL(fFlags & PGMPHYSHANDLER_F_R0_DEVINS_IDX),
887 ("%#x: %d, fFlags=%d\n", hType, pTypeR3->fRing0DevInsIdx, fFlags),
888 VERR_INVALID_HANDLE);
889
890 /*
891 * Update the entry.
892 */
893 pTypeR0->enmKind = enmKind;
894 pTypeR0->uState = enmKind == PGMPHYSHANDLERKIND_WRITE
895 ? PGM_PAGE_HNDL_PHYS_STATE_WRITE : PGM_PAGE_HNDL_PHYS_STATE_ALL;
896 pTypeR0->fKeepPgmLock = RT_BOOL(fFlags & PGMPHYSHANDLER_F_KEEP_PGM_LOCK);
897 pTypeR0->fRing0DevInsIdx = RT_BOOL(fFlags & PGMPHYSHANDLER_F_R0_DEVINS_IDX);
898 pTypeR0->pfnHandler = pfnHandler;
899 pTypeR0->pfnPfHandler = pfnPfHandler;
900 pTypeR0->pszDesc = pszDesc;
901
902 pTypeR3->fRing0Enabled = true;
903
904 LogFlow(("PGMR0HandlerPhysicalTypeRegister: hType=%#x: enmKind=%d fFlags=%#x pfnHandler=%p pfnPfHandler=%p pszDesc=%s\n",
905 hType, enmKind, fFlags, pfnHandler, pfnPfHandler, pszDesc));
906 return VINF_SUCCESS;
907}
908
909
910#ifdef VBOX_WITH_PCI_PASSTHROUGH
911/* Interface sketch. The interface belongs to a global PCI pass-through
912 manager. It shall use the global VM handle, not the user VM handle to
913 store the per-VM info (domain) since that is all ring-0 stuff, thus
914 passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
915 we can discuss the PciRaw code re-organtization when I'm back from
916 vacation.
917
918 I've implemented the initial IOMMU set up below. For things to work
919 reliably, we will probably need add a whole bunch of checks and
920 GPciRawR0GuestPageUpdate call to the PGM code. For the present,
921 assuming nested paging (enforced) and prealloc (enforced), no
922 ballooning (check missing), page sharing (check missing) or live
923 migration (check missing), it might work fine. At least if some
924 VM power-off hook is present and can tear down the IOMMU page tables. */
925
926/**
927 * Tells the global PCI pass-through manager that we are about to set up the
928 * guest page to host page mappings for the specfied VM.
929 *
930 * @returns VBox status code.
931 *
932 * @param pGVM The ring-0 VM structure.
933 */
934VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
935{
936 NOREF(pGVM);
937 return VINF_SUCCESS;
938}
939
940
941/**
942 * Assigns a host page mapping for a guest page.
943 *
944 * This is only used when setting up the mappings, i.e. between
945 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
946 *
947 * @returns VBox status code.
948 * @param pGVM The ring-0 VM structure.
949 * @param GCPhys The address of the guest page (page aligned).
950 * @param HCPhys The address of the host page (page aligned).
951 */
952VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
953{
954 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
955 AssertReturn(!(HCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
956
957 if (pGVM->rawpci.s.pfnContigMemInfo)
958 /** @todo what do we do on failure? */
959 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, HOST_PAGE_SIZE, PCIRAW_MEMINFO_MAP);
960
961 return VINF_SUCCESS;
962}
963
964
965/**
966 * Indicates that the specified guest page doesn't exists but doesn't have host
967 * page mapping we trust PCI pass-through with.
968 *
969 * This is only used when setting up the mappings, i.e. between
970 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
971 *
972 * @returns VBox status code.
973 * @param pGVM The ring-0 VM structure.
974 * @param GCPhys The address of the guest page (page aligned).
975 * @param HCPhys The address of the host page (page aligned).
976 */
977VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
978{
979 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
980
981 if (pGVM->rawpci.s.pfnContigMemInfo)
982 /** @todo what do we do on failure? */
983 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, HOST_PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
984
985 return VINF_SUCCESS;
986}
987
988
989/**
990 * Tells the global PCI pass-through manager that we have completed setting up
991 * the guest page to host page mappings for the specfied VM.
992 *
993 * This complements GPciRawR0GuestPageBeginAssignments and will be called even
994 * if some page assignment failed.
995 *
996 * @returns VBox status code.
997 *
998 * @param pGVM The ring-0 VM structure.
999 */
1000VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
1001{
1002 NOREF(pGVM);
1003 return VINF_SUCCESS;
1004}
1005
1006
1007/**
1008 * Tells the global PCI pass-through manager that a guest page mapping has
1009 * changed after the initial setup.
1010 *
1011 * @returns VBox status code.
1012 * @param pGVM The ring-0 VM structure.
1013 * @param GCPhys The address of the guest page (page aligned).
1014 * @param HCPhys The new host page address or NIL_RTHCPHYS if
1015 * now unassigned.
1016 */
1017VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
1018{
1019 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
1020 AssertReturn(!(HCPhys & HOST_PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
1021 NOREF(pGVM);
1022 return VINF_SUCCESS;
1023}
1024
1025#endif /* VBOX_WITH_PCI_PASSTHROUGH */
1026
1027
1028/**
1029 * Sets up the IOMMU when raw PCI device is enabled.
1030 *
1031 * @note This is a hack that will probably be remodelled and refined later!
1032 *
1033 * @returns VBox status code.
1034 *
1035 * @param pGVM The global (ring-0) VM structure.
1036 */
1037VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM)
1038{
1039 int rc = GVMMR0ValidateGVM(pGVM);
1040 if (RT_FAILURE(rc))
1041 return rc;
1042
1043#ifdef VBOX_WITH_PCI_PASSTHROUGH
1044 if (pGVM->pgm.s.fPciPassthrough)
1045 {
1046 /*
1047 * The Simplistic Approach - Enumerate all the pages and call tell the
1048 * IOMMU about each of them.
1049 */
1050 PGM_LOCK_VOID(pGVM);
1051 rc = GPciRawR0GuestPageBeginAssignments(pGVM);
1052 if (RT_SUCCESS(rc))
1053 {
1054 for (PPGMRAMRANGE pRam = pGVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
1055 {
1056 PPGMPAGE pPage = &pRam->aPages[0];
1057 RTGCPHYS GCPhys = pRam->GCPhys;
1058 uint32_t cLeft = pRam->cb >> GUEST_PAGE_SHIFT;
1059 while (cLeft-- > 0)
1060 {
1061 /* Only expose pages that are 100% safe for now. */
1062 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1063 && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
1064 && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
1065 rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
1066 else
1067 rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
1068
1069 /* next */
1070 pPage++;
1071 GCPhys += HOST_PAGE_SIZE;
1072 }
1073 }
1074
1075 int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
1076 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
1077 rc = rc2;
1078 }
1079 PGM_UNLOCK(pGVM);
1080 }
1081 else
1082#endif
1083 rc = VERR_NOT_SUPPORTED;
1084 return rc;
1085}
1086
1087
1088/**
1089 * \#PF Handler for nested paging.
1090 *
1091 * @returns VBox status code (appropriate for trap handling and GC return).
1092 * @param pGVM The global (ring-0) VM structure.
1093 * @param pGVCpu The global (ring-0) CPU structure of the calling
1094 * EMT.
1095 * @param enmShwPagingMode Paging mode for the nested page tables.
1096 * @param uErr The trap error code.
1097 * @param pRegFrame Trap register frame.
1098 * @param GCPhysFault The fault address.
1099 */
1100VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
1101 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault)
1102{
1103 int rc;
1104
1105 LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip));
1106 STAM_PROFILE_START(&pGVCpu->pgm.s.StatRZTrap0e, a);
1107 STAM_STATS({ pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = NULL; } );
1108
1109 /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
1110 AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
1111 || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
1112 ("enmShwPagingMode=%d\n", enmShwPagingMode));
1113
1114 /* Reserved shouldn't end up here. */
1115 Assert(!(uErr & X86_TRAP_PF_RSVD));
1116
1117#ifdef VBOX_WITH_STATISTICS
1118 /*
1119 * Error code stats.
1120 */
1121 if (uErr & X86_TRAP_PF_US)
1122 {
1123 if (!(uErr & X86_TRAP_PF_P))
1124 {
1125 if (uErr & X86_TRAP_PF_RW)
1126 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentWrite);
1127 else
1128 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentRead);
1129 }
1130 else if (uErr & X86_TRAP_PF_RW)
1131 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSWrite);
1132 else if (uErr & X86_TRAP_PF_RSVD)
1133 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSReserved);
1134 else if (uErr & X86_TRAP_PF_ID)
1135 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNXE);
1136 else
1137 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSRead);
1138 }
1139 else
1140 { /* Supervisor */
1141 if (!(uErr & X86_TRAP_PF_P))
1142 {
1143 if (uErr & X86_TRAP_PF_RW)
1144 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentWrite);
1145 else
1146 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentRead);
1147 }
1148 else if (uErr & X86_TRAP_PF_RW)
1149 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVWrite);
1150 else if (uErr & X86_TRAP_PF_ID)
1151 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSNXE);
1152 else if (uErr & X86_TRAP_PF_RSVD)
1153 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVReserved);
1154 }
1155#endif
1156
1157 /*
1158 * Call the worker.
1159 *
1160 * Note! We pretend the guest is in protected mode without paging, so we
1161 * can use existing code to build the nested page tables.
1162 */
1163/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
1164 bool fLockTaken = false;
1165 switch (enmShwPagingMode)
1166 {
1167 case PGMMODE_32_BIT:
1168 rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
1169 break;
1170 case PGMMODE_PAE:
1171 case PGMMODE_PAE_NX:
1172 rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
1173 break;
1174 case PGMMODE_AMD64:
1175 case PGMMODE_AMD64_NX:
1176 rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
1177 break;
1178 case PGMMODE_EPT:
1179 rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
1180 break;
1181 default:
1182 AssertFailed();
1183 rc = VERR_INVALID_PARAMETER;
1184 break;
1185 }
1186 if (fLockTaken)
1187 {
1188 PGM_LOCK_ASSERT_OWNER(pGVM);
1189 PGM_UNLOCK(pGVM);
1190 }
1191
1192 if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
1193 rc = VINF_SUCCESS;
1194 /*
1195 * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
1196 * via its page tables, see @bugref{6043}.
1197 */
1198 else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
1199 || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
1200 || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
1201 || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
1202 {
1203 Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip));
1204 /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
1205 single VCPU VMs though. */
1206 rc = VINF_SUCCESS;
1207 }
1208
1209 STAM_STATS({ if (!pGVCpu->pgmr0.s.pStatTrap0eAttributionR0)
1210 pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pGVCpu->pgm.s.Stats.StatRZTrap0eTime2Misc; });
1211 STAM_PROFILE_STOP_EX(&pGVCpu->pgm.s.Stats.StatRZTrap0e, pGVCpu->pgmr0.s.pStatTrap0eAttributionR0, a);
1212 return rc;
1213}
1214
1215
1216/**
1217 * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
1218 * employed for MMIO pages.
1219 *
1220 * @returns VBox status code (appropriate for trap handling and GC return).
1221 * @param pGVM The global (ring-0) VM structure.
1222 * @param pGVCpu The global (ring-0) CPU structure of the calling
1223 * EMT.
1224 * @param enmShwPagingMode Paging mode for the nested page tables.
1225 * @param pRegFrame Trap register frame.
1226 * @param GCPhysFault The fault address.
1227 * @param uErr The error code, UINT32_MAX if not available
1228 * (VT-x).
1229 */
1230VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode,
1231 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, uint32_t uErr)
1232{
1233#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1234 STAM_PROFILE_START(&pGVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
1235 VBOXSTRICTRC rc;
1236
1237 /*
1238 * Try lookup the all access physical handler for the address.
1239 */
1240 PGM_LOCK_VOID(pGVM);
1241 PPGMPHYSHANDLER pHandler;
1242 rc = pgmHandlerPhysicalLookup(pGVM, GCPhysFault, &pHandler);
1243 if (RT_SUCCESS(rc))
1244 {
1245 PCPGMPHYSHANDLERTYPEINT pHandlerType = PGMPHYSHANDLER_GET_TYPE_NO_NULL(pGVM, pHandler);
1246 if (RT_LIKELY(pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE))
1247 {
1248 /*
1249 * If the handle has aliases page or pages that have been temporarily
1250 * disabled, we'll have to take a detour to make sure we resync them
1251 * to avoid lots of unnecessary exits.
1252 */
1253 PPGMPAGE pPage;
1254 if ( ( pHandler->cAliasedPages
1255 || pHandler->cTmpOffPages)
1256 && ( (pPage = pgmPhysGetPage(pGVM, GCPhysFault)) == NULL
1257 || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
1258 )
1259 {
1260 Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
1261 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
1262 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
1263 PGM_UNLOCK(pGVM);
1264 }
1265 else
1266 {
1267 if (pHandlerType->pfnPfHandler)
1268 {
1269 uint64_t const uUser = !pHandlerType->fRing0DevInsIdx ? pHandler->uUser
1270 : (uintptr_t)PDMDeviceRing0IdxToInstance(pGVM, pHandler->uUser);
1271 STAM_PROFILE_START(&pHandler->Stat, h);
1272 PGM_UNLOCK(pGVM);
1273
1274 Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->pfnPfHandler, uErr, GCPhysFault, uUser));
1275 rc = pHandlerType->pfnPfHandler(pGVM, pGVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame,
1276 GCPhysFault, GCPhysFault, uUser);
1277
1278 STAM_PROFILE_STOP(&pHandler->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
1279 }
1280 else
1281 {
1282 PGM_UNLOCK(pGVM);
1283 Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
1284 rc = VINF_EM_RAW_EMULATE_INSTR;
1285 }
1286 }
1287 STAM_PROFILE_STOP(&pGVCpu->pgm.s.Stats.StatR0NpMiscfg, a);
1288 return rc;
1289 }
1290 }
1291 else
1292 AssertMsgReturn(rc == VERR_NOT_FOUND, ("%Rrc GCPhysFault=%RGp\n", VBOXSTRICTRC_VAL(rc), GCPhysFault), rc);
1293
1294 /*
1295 * Must be out of sync, so do a SyncPage and restart the instruction.
1296 *
1297 * ASSUMES that ALL handlers are page aligned and covers whole pages
1298 * (assumption asserted in PGMHandlerPhysicalRegisterEx).
1299 */
1300 Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
1301 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
1302 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
1303 PGM_UNLOCK(pGVM);
1304
1305 STAM_PROFILE_STOP(&pGVCpu->pgm.s.Stats.StatR0NpMiscfg, a);
1306 return rc;
1307
1308#else
1309 AssertLogRelFailed();
1310 return VERR_PGM_NOT_USED_IN_MODE;
1311#endif
1312}
1313
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette