VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0.cpp@ 93857

Last change on this file since 93857 was 93735, checked in by vboxsync, 3 years ago

VMM/PGM: Allow pfnPfHandler to be NULL in PGMR0HandlerPhysicalTypeSetUpContext (IEMR0InitVM does this for some reason). bugref:10093

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 51.6 KB
Line 
1/* $Id: PGMR0.cpp 93735 2022-02-14 17:06:27Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Ring-0.
4 */
5
6/*
7 * Copyright (C) 2007-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/rawpci.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/gmm.h>
27#include "PGMInternal.h"
28#include <VBox/vmm/pdmdev.h>
29#include <VBox/vmm/vmcc.h>
30#include <VBox/vmm/gvm.h>
31#include "PGMInline.h"
32#include <VBox/log.h>
33#include <VBox/err.h>
34#include <iprt/assert.h>
35#include <iprt/mem.h>
36#include <iprt/memobj.h>
37#include <iprt/process.h>
38#include <iprt/rand.h>
39#include <iprt/string.h>
40#include <iprt/time.h>
41
42
43/*
44 * Instantiate the ring-0 header/code templates.
45 */
46/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
47#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
48#include "PGMR0Bth.h"
49#undef PGM_BTH_NAME
50
51#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
52#include "PGMR0Bth.h"
53#undef PGM_BTH_NAME
54
55#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
56#include "PGMR0Bth.h"
57#undef PGM_BTH_NAME
58
59#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
60#include "PGMR0Bth.h"
61#undef PGM_BTH_NAME
62
63
64/**
65 * Initializes the per-VM data for the PGM.
66 *
67 * This is called from under the GVMM lock, so it should only initialize the
68 * data so PGMR0CleanupVM and others will work smoothly.
69 *
70 * @returns VBox status code.
71 * @param pGVM Pointer to the global VM structure.
72 * @param hMemObj Handle to the memory object backing pGVM.
73 */
74VMMR0_INT_DECL(int) PGMR0InitPerVMData(PGVM pGVM, RTR0MEMOBJ hMemObj)
75{
76 AssertCompile(sizeof(pGVM->pgm.s) <= sizeof(pGVM->pgm.padding));
77 AssertCompile(sizeof(pGVM->pgmr0.s) <= sizeof(pGVM->pgmr0.padding));
78
79 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
80 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
81 {
82 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
83 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
84 }
85 pGVM->pgmr0.s.hPhysHandlerMemObj = NIL_RTR0MEMOBJ;
86 pGVM->pgmr0.s.hPhysHandlerMapObj = NIL_RTR0MEMOBJ;
87
88 /*
89 * Initialize the handler type table with return to ring-3 callbacks so we
90 * don't have to do anything special for ring-3 only registrations.
91 *
92 * Note! The random bits of the hType value is mainly for prevent trouble
93 * with zero initialized handles w/o needing to sacrifice handle zero.
94 */
95 for (size_t i = 0; i < RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes); i++)
96 {
97 pGVM->pgmr0.s.aPhysHandlerTypes[i].hType = i | (RTRandU64() & ~(uint64_t)PGMPHYSHANDLERTYPE_IDX_MASK);
98 pGVM->pgmr0.s.aPhysHandlerTypes[i].enmKind = PGMPHYSHANDLERKIND_INVALID;
99 pGVM->pgmr0.s.aPhysHandlerTypes[i].pfnHandler = pgmR0HandlerPhysicalHandlerToRing3;
100 pGVM->pgmr0.s.aPhysHandlerTypes[i].pfnPfHandler = pgmR0HandlerPhysicalPfHandlerToRing3;
101
102 pGVM->pgm.s.aPhysHandlerTypes[i].hType = pGVM->pgmr0.s.aPhysHandlerTypes[i].hType;
103 pGVM->pgm.s.aPhysHandlerTypes[i].enmKind = PGMPHYSHANDLERKIND_INVALID;
104 }
105
106 /*
107 * Get the physical address of the ZERO and MMIO-dummy pages.
108 */
109 AssertReturn(((uintptr_t)&pGVM->pgm.s.abZeroPg[0] & HOST_PAGE_OFFSET_MASK) == 0, VERR_INTERNAL_ERROR_2);
110 pGVM->pgm.s.HCPhysZeroPg = RTR0MemObjGetPagePhysAddr(hMemObj, RT_UOFFSETOF_DYN(GVM, pgm.s.abZeroPg) >> HOST_PAGE_SHIFT);
111 AssertReturn(pGVM->pgm.s.HCPhysZeroPg != NIL_RTHCPHYS, VERR_INTERNAL_ERROR_3);
112
113 AssertReturn(((uintptr_t)&pGVM->pgm.s.abMmioPg[0] & HOST_PAGE_OFFSET_MASK) == 0, VERR_INTERNAL_ERROR_2);
114 pGVM->pgm.s.HCPhysMmioPg = RTR0MemObjGetPagePhysAddr(hMemObj, RT_UOFFSETOF_DYN(GVM, pgm.s.abMmioPg) >> HOST_PAGE_SHIFT);
115 AssertReturn(pGVM->pgm.s.HCPhysMmioPg != NIL_RTHCPHYS, VERR_INTERNAL_ERROR_3);
116
117 pGVM->pgm.s.HCPhysInvMmioPg = pGVM->pgm.s.HCPhysMmioPg;
118
119 return RTCritSectInit(&pGVM->pgmr0.s.PoolGrowCritSect);
120}
121
122
123/**
124 * Initalize the per-VM PGM for ring-0.
125 *
126 * @returns VBox status code.
127 * @param pGVM Pointer to the global VM structure.
128 */
129VMMR0_INT_DECL(int) PGMR0InitVM(PGVM pGVM)
130{
131 /*
132 * Set up the ring-0 context for our access handlers.
133 */
134 int rc = PGMR0HandlerPhysicalTypeSetUpContext(pGVM, PGMPHYSHANDLERKIND_WRITE, 0 /*fFlags*/,
135 pgmPhysRomWriteHandler, pgmPhysRomWritePfHandler,
136 "ROM write protection", pGVM->pgm.s.hRomPhysHandlerType);
137 AssertLogRelRCReturn(rc, rc);
138
139 /*
140 * Register the physical access handler doing dirty MMIO2 tracing.
141 */
142 rc = PGMR0HandlerPhysicalTypeSetUpContext(pGVM, PGMPHYSHANDLERKIND_WRITE, PGMPHYSHANDLER_F_KEEP_PGM_LOCK,
143 pgmPhysMmio2WriteHandler, pgmPhysMmio2WritePfHandler,
144 "MMIO2 dirty page tracing", pGVM->pgm.s.hMmio2DirtyPhysHandlerType);
145 AssertLogRelRCReturn(rc, rc);
146
147 /*
148 * The page pool.
149 */
150 return pgmR0PoolInitVM(pGVM);
151}
152
153
154/**
155 * Called at the end of the ring-0 initialization to seal access handler types.
156 *
157 * @returns VBox status code.
158 * @param pGVM Pointer to the global VM structure.
159 */
160VMMR0_INT_DECL(void) PGMR0DoneInitVM(PGVM pGVM)
161{
162 /*
163 * Seal all the access handler types. Does both ring-3 and ring-0.
164 *
165 * Note! Since this is a void function and we don't have any ring-0 state
166 * machinery for marking the VM as bogus, this code will just
167 * override corrupted values as best as it can.
168 */
169 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes));
170 for (size_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes); i++)
171 {
172 PPGMPHYSHANDLERTYPEINTR0 const pTypeR0 = &pGVM->pgmr0.s.aPhysHandlerTypes[i];
173 PPGMPHYSHANDLERTYPEINTR3 const pTypeR3 = &pGVM->pgm.s.aPhysHandlerTypes[i];
174 PGMPHYSHANDLERKIND const enmKindR3 = pTypeR3->enmKind;
175 PGMPHYSHANDLERKIND const enmKindR0 = pTypeR0->enmKind;
176 AssertLogRelMsgStmt(pTypeR0->hType == pTypeR3->hType,
177 ("i=%u %#RX64 vs %#RX64 %s\n", i, pTypeR0->hType, pTypeR3->hType, pTypeR0->pszDesc),
178 pTypeR3->hType = pTypeR0->hType);
179 switch (enmKindR3)
180 {
181 case PGMPHYSHANDLERKIND_ALL:
182 case PGMPHYSHANDLERKIND_MMIO:
183 if ( enmKindR0 == enmKindR3
184 || enmKindR0 == PGMPHYSHANDLERKIND_INVALID)
185 {
186 pTypeR3->fRing0Enabled = enmKindR0 == enmKindR3;
187 pTypeR0->uState = PGM_PAGE_HNDL_PHYS_STATE_ALL;
188 pTypeR3->uState = PGM_PAGE_HNDL_PHYS_STATE_ALL;
189 continue;
190 }
191 break;
192
193 case PGMPHYSHANDLERKIND_WRITE:
194 if ( enmKindR0 == enmKindR3
195 || enmKindR0 == PGMPHYSHANDLERKIND_INVALID)
196 {
197 pTypeR3->fRing0Enabled = enmKindR0 == enmKindR3;
198 pTypeR0->uState = PGM_PAGE_HNDL_PHYS_STATE_WRITE;
199 pTypeR3->uState = PGM_PAGE_HNDL_PHYS_STATE_WRITE;
200 continue;
201 }
202 break;
203
204 default:
205 AssertLogRelMsgFailed(("i=%u enmKindR3=%d\n", i, enmKindR3));
206 RT_FALL_THROUGH();
207 case PGMPHYSHANDLERKIND_INVALID:
208 AssertLogRelMsg(enmKindR0 == PGMPHYSHANDLERKIND_INVALID,
209 ("i=%u enmKind=%d %s\n", i, enmKindR0, pTypeR0->pszDesc));
210 AssertLogRelMsg(pTypeR0->pfnHandler == pgmR0HandlerPhysicalHandlerToRing3,
211 ("i=%u pfnHandler=%p %s\n", i, pTypeR0->pfnHandler, pTypeR0->pszDesc));
212 AssertLogRelMsg(pTypeR0->pfnPfHandler == pgmR0HandlerPhysicalPfHandlerToRing3,
213 ("i=%u pfnPfHandler=%p %s\n", i, pTypeR0->pfnPfHandler, pTypeR0->pszDesc));
214
215 /* Unused of bad ring-3 entry, make it and the ring-0 one harmless. */
216 pTypeR3->enmKind = PGMPHYSHANDLERKIND_END;
217 pTypeR3->fRing0DevInsIdx = false;
218 pTypeR3->fKeepPgmLock = false;
219 pTypeR3->uState = 0;
220 break;
221 }
222 pTypeR3->fRing0Enabled = false;
223
224 /* Make sure the entry is harmless and goes to ring-3. */
225 pTypeR0->enmKind = PGMPHYSHANDLERKIND_END;
226 pTypeR0->pfnHandler = pgmR0HandlerPhysicalHandlerToRing3;
227 pTypeR0->pfnPfHandler = pgmR0HandlerPhysicalPfHandlerToRing3;
228 pTypeR0->fRing0DevInsIdx = false;
229 pTypeR0->fKeepPgmLock = false;
230 pTypeR0->uState = 0;
231 pTypeR0->pszDesc = "invalid";
232 }
233}
234
235
236/**
237 * Cleans up any loose ends before the GVM structure is destroyed.
238 */
239VMMR0_INT_DECL(void) PGMR0CleanupVM(PGVM pGVM)
240{
241 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
242 {
243 if (pGVM->pgmr0.s.ahPoolMapObjs[i] != NIL_RTR0MEMOBJ)
244 {
245 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMapObjs[i], true /*fFreeMappings*/);
246 AssertRC(rc);
247 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
248 }
249
250 if (pGVM->pgmr0.s.ahPoolMemObjs[i] != NIL_RTR0MEMOBJ)
251 {
252 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMemObjs[i], true /*fFreeMappings*/);
253 AssertRC(rc);
254 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
255 }
256 }
257
258 if (pGVM->pgmr0.s.hPhysHandlerMapObj != NIL_RTR0MEMOBJ)
259 {
260 int rc = RTR0MemObjFree(pGVM->pgmr0.s.hPhysHandlerMapObj, true /*fFreeMappings*/);
261 AssertRC(rc);
262 pGVM->pgmr0.s.hPhysHandlerMapObj = NIL_RTR0MEMOBJ;
263 }
264
265 if (pGVM->pgmr0.s.hPhysHandlerMemObj != NIL_RTR0MEMOBJ)
266 {
267 int rc = RTR0MemObjFree(pGVM->pgmr0.s.hPhysHandlerMemObj, true /*fFreeMappings*/);
268 AssertRC(rc);
269 pGVM->pgmr0.s.hPhysHandlerMemObj = NIL_RTR0MEMOBJ;
270 }
271
272 if (RTCritSectIsInitialized(&pGVM->pgmr0.s.PoolGrowCritSect))
273 RTCritSectDelete(&pGVM->pgmr0.s.PoolGrowCritSect);
274}
275
276
277/**
278 * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
279 *
280 * @returns The following VBox status codes.
281 * @retval VINF_SUCCESS on success. FF cleared.
282 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
283 *
284 * @param pGVM The global (ring-0) VM structure.
285 * @param idCpu The ID of the calling EMT.
286 * @param fRing3 Set if the caller is ring-3. Determins whether to
287 * return VINF_EM_NO_MEMORY or not.
288 *
289 * @thread EMT(idCpu)
290 *
291 * @remarks Must be called from within the PGM critical section. The caller
292 * must clear the new pages.
293 */
294int pgmR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu, bool fRing3)
295{
296 /*
297 * Validate inputs.
298 */
299 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
300 Assert(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf());
301 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
302
303 /*
304 * Check for error injection.
305 */
306 if (RT_LIKELY(!pGVM->pgm.s.fErrInjHandyPages))
307 { /* likely */ }
308 else
309 return VERR_NO_MEMORY;
310
311 /*
312 * Try allocate a full set of handy pages.
313 */
314 uint32_t const iFirst = pGVM->pgm.s.cHandyPages;
315 AssertMsgReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), ("%#x\n", iFirst), VERR_PGM_HANDY_PAGE_IPE);
316
317 uint32_t const cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
318 if (!cPages)
319 return VINF_SUCCESS;
320
321 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
322 if (RT_SUCCESS(rc))
323 {
324 uint32_t const cHandyPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages); /** @todo allow allocating less... */
325 pGVM->pgm.s.cHandyPages = cHandyPages;
326 VM_FF_CLEAR(pGVM, VM_FF_PGM_NEED_HANDY_PAGES);
327 VM_FF_CLEAR(pGVM, VM_FF_PGM_NO_MEMORY);
328
329#ifdef VBOX_STRICT
330 for (uint32_t i = 0; i < cHandyPages; i++)
331 {
332 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
333 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
334 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
335 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_GMMPAGEDESC_PHYS);
336 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
337 }
338#endif
339
340 /*
341 * Clear the pages.
342 */
343 for (uint32_t iPage = iFirst; iPage < cHandyPages; iPage++)
344 {
345 PGMMPAGEDESC pPage = &pGVM->pgm.s.aHandyPages[iPage];
346 if (!pPage->fZeroed)
347 {
348 void *pv = NULL;
349#ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
350 rc = SUPR0HCPhysToVirt(pPage->HCPhysGCPhys, &pv);
351#else
352 rc = GMMR0PageIdToVirt(pGVM, pPage->idPage, &pv);
353#endif
354 AssertMsgRCReturn(rc, ("idPage=%#x HCPhys=%RHp rc=%Rrc\n", pPage->idPage, pPage->HCPhysGCPhys, rc), rc);
355
356 RT_BZERO(pv, GUEST_PAGE_SIZE);
357 pPage->fZeroed = true;
358 }
359#ifdef VBOX_STRICT
360 else
361 {
362 void *pv = NULL;
363# ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
364 rc = SUPR0HCPhysToVirt(pPage->HCPhysGCPhys, &pv);
365# else
366 rc = GMMR0PageIdToVirt(pGVM, pPage->idPage, &pv);
367# endif
368 AssertMsgRCReturn(rc, ("idPage=%#x HCPhys=%RHp rc=%Rrc\n", pPage->idPage, pPage->HCPhysGCPhys, rc), rc);
369 AssertReturn(ASMMemIsZero(pv, GUEST_PAGE_SIZE), VERR_PGM_HANDY_PAGE_IPE);
370 }
371#endif
372 Log3(("PGMR0PhysAllocateHandyPages: idPage=%#x HCPhys=%RGp\n", pPage->idPage, pPage->HCPhysGCPhys));
373 }
374 }
375 else
376 {
377 /*
378 * We should never get here unless there is a genuine shortage of
379 * memory (or some internal error). Flag the error so the VM can be
380 * suspended ASAP and the user informed. If we're totally out of
381 * handy pages we will return failure.
382 */
383 /* Report the failure. */
384 LogRel(("PGM: Failed to procure handy pages; rc=%Rrc cHandyPages=%#x\n"
385 " cAllPages=%#x cPrivatePages=%#x cSharedPages=%#x cZeroPages=%#x\n",
386 rc, pGVM->pgm.s.cHandyPages,
387 pGVM->pgm.s.cAllPages, pGVM->pgm.s.cPrivatePages, pGVM->pgm.s.cSharedPages, pGVM->pgm.s.cZeroPages));
388
389 GMMMEMSTATSREQ Stats = { { SUPVMMR0REQHDR_MAGIC, sizeof(Stats) }, 0, 0, 0, 0, 0 };
390 if (RT_SUCCESS(GMMR0QueryMemoryStatsReq(pGVM, idCpu, &Stats)))
391 LogRel(("GMM: Statistics:\n"
392 " Allocated pages: %RX64\n"
393 " Free pages: %RX64\n"
394 " Shared pages: %RX64\n"
395 " Maximum pages: %RX64\n"
396 " Ballooned pages: %RX64\n",
397 Stats.cAllocPages, Stats.cFreePages, Stats.cSharedPages, Stats.cMaxPages, Stats.cBalloonedPages));
398
399 if ( rc != VERR_NO_MEMORY
400 && rc != VERR_NO_PHYS_MEMORY
401 && rc != VERR_LOCK_FAILED)
402 for (uint32_t iPage = 0; iPage < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); iPage++)
403 LogRel(("PGM: aHandyPages[#%#04x] = {.HCPhysGCPhys=%RHp, .idPage=%#08x, .idSharedPage=%#08x}\n",
404 iPage, pGVM->pgm.s.aHandyPages[iPage].HCPhysGCPhys, pGVM->pgm.s.aHandyPages[iPage].idPage,
405 pGVM->pgm.s.aHandyPages[iPage].idSharedPage));
406
407 /* Set the FFs and adjust rc. */
408 VM_FF_SET(pGVM, VM_FF_PGM_NEED_HANDY_PAGES);
409 VM_FF_SET(pGVM, VM_FF_PGM_NO_MEMORY);
410 if (!fRing3)
411 if ( rc == VERR_NO_MEMORY
412 || rc == VERR_NO_PHYS_MEMORY
413 || rc == VERR_LOCK_FAILED
414 || rc == VERR_MAP_FAILED)
415 rc = VINF_EM_NO_MEMORY;
416 }
417
418 LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
419 return rc;
420}
421
422
423/**
424 * Worker function for PGMR3PhysAllocateHandyPages / VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES.
425 *
426 * @returns The following VBox status codes.
427 * @retval VINF_SUCCESS on success. FF cleared.
428 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
429 *
430 * @param pGVM The global (ring-0) VM structure.
431 * @param idCpu The ID of the calling EMT.
432 *
433 * @thread EMT(idCpu)
434 *
435 * @remarks Must be called from within the PGM critical section. The caller
436 * must clear the new pages.
437 */
438VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu)
439{
440 /*
441 * Validate inputs.
442 */
443 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
444 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
445
446 /*
447 * Enter the PGM lock and call the worker.
448 */
449 int rc = PGM_LOCK(pGVM);
450 if (RT_SUCCESS(rc))
451 {
452 rc = pgmR0PhysAllocateHandyPages(pGVM, idCpu, true /*fRing3*/);
453 PGM_UNLOCK(pGVM);
454 }
455 return rc;
456}
457
458
459/**
460 * Flushes any changes pending in the handy page array.
461 *
462 * It is very important that this gets done when page sharing is enabled.
463 *
464 * @returns The following VBox status codes.
465 * @retval VINF_SUCCESS on success. FF cleared.
466 *
467 * @param pGVM The global (ring-0) VM structure.
468 * @param idCpu The ID of the calling EMT.
469 *
470 * @thread EMT(idCpu)
471 *
472 * @remarks Must be called from within the PGM critical section.
473 */
474VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, VMCPUID idCpu)
475{
476 /*
477 * Validate inputs.
478 */
479 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
480 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
481 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
482
483 /*
484 * Try allocate a full set of handy pages.
485 */
486 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
487 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
488 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
489 if (!cPages)
490 return VINF_SUCCESS;
491 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, 0, &pGVM->pgm.s.aHandyPages[iFirst]);
492
493 LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
494 return rc;
495}
496
497
498/**
499 * Allocate a large page at @a GCPhys.
500 *
501 * @returns The following VBox status codes.
502 * @retval VINF_SUCCESS on success.
503 * @retval VINF_EM_NO_MEMORY if we're out of memory.
504 *
505 * @param pGVM The global (ring-0) VM structure.
506 * @param idCpu The ID of the calling EMT.
507 * @param GCPhys The guest physical address of the page.
508 *
509 * @thread EMT(idCpu)
510 *
511 * @remarks Must be called from within the PGM critical section. The caller
512 * must clear the new pages.
513 */
514int pgmR0PhysAllocateLargePage(PGVM pGVM, VMCPUID idCpu, RTGCPHYS GCPhys)
515{
516 STAM_PROFILE_START(&pGVM->pgm.s.Stats.StatLargePageAlloc2, a);
517 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
518
519 /*
520 * Allocate a large page.
521 */
522 RTHCPHYS HCPhys = NIL_GMMPAGEDESC_PHYS;
523 uint32_t idPage = NIL_GMM_PAGEID;
524
525 if (true) /** @todo pre-allocate 2-3 pages on the allocation thread. */
526 {
527 uint64_t const nsAllocStart = RTTimeNanoTS();
528 if (nsAllocStart < pGVM->pgm.s.nsLargePageRetry)
529 {
530 LogFlowFunc(("returns VERR_TRY_AGAIN - %RU64 ns left of hold off period\n", pGVM->pgm.s.nsLargePageRetry - nsAllocStart));
531 return VERR_TRY_AGAIN;
532 }
533
534 int const rc = GMMR0AllocateLargePage(pGVM, idCpu, _2M, &idPage, &HCPhys);
535
536 uint64_t const nsAllocEnd = RTTimeNanoTS();
537 uint64_t const cNsElapsed = nsAllocEnd - nsAllocStart;
538 STAM_REL_PROFILE_ADD_PERIOD(&pGVM->pgm.s.StatLargePageAlloc, cNsElapsed);
539 if (cNsElapsed < RT_NS_100MS)
540 pGVM->pgm.s.cLargePageLongAllocRepeats = 0;
541 else
542 {
543 /* If a large page allocation takes more than 100ms back off for a
544 while so the host OS can reshuffle memory and make some more large
545 pages available. However if it took over a second, just disable it. */
546 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageOverflow);
547 pGVM->pgm.s.cLargePageLongAllocRepeats++;
548 if (cNsElapsed > RT_NS_1SEC)
549 {
550 LogRel(("PGMR0PhysAllocateLargePage: Disabling large pages after %'RU64 ns allocation time.\n", cNsElapsed));
551 PGMSetLargePageUsage(pGVM, false);
552 }
553 else
554 {
555 Log(("PGMR0PhysAllocateLargePage: Suspending large page allocations for %u sec after %'RU64 ns allocation time.\n",
556 30 * pGVM->pgm.s.cLargePageLongAllocRepeats, cNsElapsed));
557 pGVM->pgm.s.nsLargePageRetry = nsAllocEnd + RT_NS_30SEC * pGVM->pgm.s.cLargePageLongAllocRepeats;
558 }
559 }
560
561 if (RT_FAILURE(rc))
562 {
563 Log(("PGMR0PhysAllocateLargePage: Failed: %Rrc\n", rc));
564 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageAllocFailed);
565 if (rc == VERR_NOT_SUPPORTED)
566 {
567 LogRel(("PGM: Disabling large pages because of VERR_NOT_SUPPORTED status.\n"));
568 PGMSetLargePageUsage(pGVM, false);
569 }
570 return rc;
571 }
572 }
573
574 STAM_PROFILE_STOP_START(&pGVM->pgm.s.Stats.StatLargePageAlloc2, &pGVM->pgm.s.Stats.StatLargePageSetup, a);
575
576 /*
577 * Enter the pages into PGM.
578 */
579 bool fFlushTLBs = false;
580 VBOXSTRICTRC rc = VINF_SUCCESS;
581 unsigned cLeft = _2M / GUEST_PAGE_SIZE;
582 while (cLeft-- > 0)
583 {
584 PPGMPAGE const pPage = pgmPhysGetPage(pGVM, GCPhys);
585 AssertReturn(pPage && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM && PGM_PAGE_IS_ZERO(pPage), VERR_PGM_UNEXPECTED_PAGE_STATE);
586
587 /* Make sure there are no zero mappings. */
588 uint16_t const u16Tracking = PGM_PAGE_GET_TRACKING(pPage);
589 if (u16Tracking == 0)
590 Assert(PGM_PAGE_GET_PTE_INDEX(pPage) == 0);
591 else
592 {
593 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageZeroEvict);
594 VBOXSTRICTRC rc3 = pgmPoolTrackUpdateGCPhys(pGVM, GCPhys, pPage, true /*fFlushPTEs*/, &fFlushTLBs);
595 Log(("PGMR0PhysAllocateLargePage: GCPhys=%RGp: tracking=%#x rc3=%Rrc\n", GCPhys, u16Tracking, VBOXSTRICTRC_VAL(rc3)));
596 if (rc3 != VINF_SUCCESS && rc == VINF_SUCCESS)
597 rc = rc3; /** @todo not perfect... */
598 PGM_PAGE_SET_PTE_INDEX(pGVM, pPage, 0);
599 PGM_PAGE_SET_TRACKING(pGVM, pPage, 0);
600 }
601
602 /* Setup the new page. */
603 PGM_PAGE_SET_HCPHYS(pGVM, pPage, HCPhys);
604 PGM_PAGE_SET_STATE(pGVM, pPage, PGM_PAGE_STATE_ALLOCATED);
605 PGM_PAGE_SET_PDE_TYPE(pGVM, pPage, PGM_PAGE_PDE_TYPE_PDE);
606 PGM_PAGE_SET_PAGEID(pGVM, pPage, idPage);
607 Log3(("PGMR0PhysAllocateLargePage: GCPhys=%RGp: idPage=%#x HCPhys=%RGp (old tracking=%#x)\n",
608 GCPhys, idPage, HCPhys, u16Tracking));
609
610 /* advance */
611 idPage++;
612 HCPhys += GUEST_PAGE_SIZE;
613 GCPhys += GUEST_PAGE_SIZE;
614 }
615
616 STAM_COUNTER_ADD(&pGVM->pgm.s.Stats.StatRZPageReplaceZero, _2M / GUEST_PAGE_SIZE);
617 pGVM->pgm.s.cZeroPages -= _2M / GUEST_PAGE_SIZE;
618 pGVM->pgm.s.cPrivatePages += _2M / GUEST_PAGE_SIZE;
619
620 /*
621 * Flush all TLBs.
622 */
623 if (!fFlushTLBs)
624 { /* likely as we shouldn't normally map zero pages */ }
625 else
626 {
627 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageTlbFlush);
628 PGM_INVL_ALL_VCPU_TLBS(pGVM);
629 }
630 /** @todo this is a little expensive (~3000 ticks) since we'll have to
631 * invalidate everything. Add a version to the TLB? */
632 pgmPhysInvalidatePageMapTLB(pGVM);
633
634 STAM_PROFILE_STOP(&pGVM->pgm.s.Stats.StatLargePageSetup, a);
635#if 0 /** @todo returning info statuses here might not be a great idea... */
636 LogFlow(("PGMR0PhysAllocateLargePage: returns %Rrc\n", VBOXSTRICTRC_VAL(rc) ));
637 return VBOXSTRICTRC_TODO(rc);
638#else
639 LogFlow(("PGMR0PhysAllocateLargePage: returns VINF_SUCCESS (rc=%Rrc)\n", VBOXSTRICTRC_VAL(rc) ));
640 return VINF_SUCCESS;
641#endif
642}
643
644
645/**
646 * Allocate a large page at @a GCPhys.
647 *
648 * @returns The following VBox status codes.
649 * @retval VINF_SUCCESS on success.
650 * @retval VINF_EM_NO_MEMORY if we're out of memory.
651 *
652 * @param pGVM The global (ring-0) VM structure.
653 * @param idCpu The ID of the calling EMT.
654 * @param GCPhys The guest physical address of the page.
655 *
656 * @thread EMT(idCpu)
657 *
658 * @remarks Must be called from within the PGM critical section. The caller
659 * must clear the new pages.
660 */
661VMMR0_INT_DECL(int) PGMR0PhysAllocateLargePage(PGVM pGVM, VMCPUID idCpu, RTGCPHYS GCPhys)
662{
663 /*
664 * Validate inputs.
665 */
666 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
667 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
668
669 int rc = PGM_LOCK(pGVM);
670 AssertRCReturn(rc, rc);
671
672 /* The caller might have done this already, but since we're ring-3 callable we
673 need to make sure everything is fine before starting the allocation here. */
674 for (unsigned i = 0; i < _2M / GUEST_PAGE_SIZE; i++)
675 {
676 PPGMPAGE pPage;
677 rc = pgmPhysGetPageEx(pGVM, GCPhys + i * GUEST_PAGE_SIZE, &pPage);
678 AssertRCReturnStmt(rc, PGM_UNLOCK(pGVM), rc);
679 AssertReturnStmt(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM, PGM_UNLOCK(pGVM), VERR_PGM_PHYS_NOT_RAM);
680 AssertReturnStmt(PGM_PAGE_IS_ZERO(pPage), PGM_UNLOCK(pGVM), VERR_PGM_UNEXPECTED_PAGE_STATE);
681 }
682
683 /*
684 * Call common code.
685 */
686 rc = pgmR0PhysAllocateLargePage(pGVM, idCpu, GCPhys);
687
688 PGM_UNLOCK(pGVM);
689 return rc;
690}
691
692
693/**
694 * Locate a MMIO2 range.
695 *
696 * @returns Pointer to the MMIO2 range.
697 * @param pGVM The global (ring-0) VM structure.
698 * @param pDevIns The device instance owning the region.
699 * @param hMmio2 Handle to look up.
700 */
701DECLINLINE(PPGMREGMMIO2RANGE) pgmR0PhysMmio2Find(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
702{
703 /*
704 * We use the lookup table here as list walking is tedious in ring-0 when using
705 * ring-3 pointers and this probably will require some kind of refactoring anyway.
706 */
707 if (hMmio2 <= RT_ELEMENTS(pGVM->pgm.s.apMmio2RangesR0) && hMmio2 != 0)
708 {
709 PPGMREGMMIO2RANGE pCur = pGVM->pgm.s.apMmio2RangesR0[hMmio2 - 1];
710 if (pCur && pCur->pDevInsR3 == pDevIns->pDevInsForR3)
711 {
712 Assert(pCur->idMmio2 == hMmio2);
713 return pCur;
714 }
715 Assert(!pCur);
716 }
717 return NULL;
718}
719
720
721/**
722 * Worker for PDMDEVHLPR0::pfnMmio2SetUpContext.
723 *
724 * @returns VBox status code.
725 * @param pGVM The global (ring-0) VM structure.
726 * @param pDevIns The device instance.
727 * @param hMmio2 The MMIO2 region to map into ring-0 address space.
728 * @param offSub The offset into the region.
729 * @param cbSub The size of the mapping, zero meaning all the rest.
730 * @param ppvMapping Where to return the ring-0 mapping address.
731 */
732VMMR0_INT_DECL(int) PGMR0PhysMMIO2MapKernel(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
733 size_t offSub, size_t cbSub, void **ppvMapping)
734{
735 AssertReturn(!(offSub & HOST_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
736 AssertReturn(!(cbSub & HOST_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
737
738 /*
739 * Translate hRegion into a range pointer.
740 */
741 PPGMREGMMIO2RANGE pFirstRegMmio = pgmR0PhysMmio2Find(pGVM, pDevIns, hMmio2);
742 AssertReturn(pFirstRegMmio, VERR_NOT_FOUND);
743#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
744 uint8_t * const pvR0 = (uint8_t *)pFirstRegMmio->pvR0;
745#else
746 RTR3PTR const pvR3 = pFirstRegMmio->pvR3;
747#endif
748 RTGCPHYS const cbReal = pFirstRegMmio->cbReal;
749 pFirstRegMmio = NULL;
750 ASMCompilerBarrier();
751
752 AssertReturn(offSub < cbReal, VERR_OUT_OF_RANGE);
753 if (cbSub == 0)
754 cbSub = cbReal - offSub;
755 else
756 AssertReturn(cbSub < cbReal && cbSub + offSub <= cbReal, VERR_OUT_OF_RANGE);
757
758 /*
759 * Do the mapping.
760 */
761#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
762 AssertPtr(pvR0);
763 *ppvMapping = pvR0 + offSub;
764 return VINF_SUCCESS;
765#else
766 return SUPR0PageMapKernel(pGVM->pSession, pvR3, (uint32_t)offSub, (uint32_t)cbSub, 0 /*fFlags*/, ppvMapping);
767#endif
768}
769
770
771/**
772 * This is called during PGMR3Init to init the physical access handler allocator
773 * and tree.
774 *
775 * @returns VBox status code.
776 * @param pGVM Pointer to the global VM structure.
777 * @param cEntries Desired number of physical access handlers to reserve
778 * space for (will be adjusted).
779 * @thread EMT(0)
780 */
781VMMR0_INT_DECL(int) PGMR0PhysHandlerInitReqHandler(PGVM pGVM, uint32_t cEntries)
782{
783 /*
784 * Validate the input and state.
785 */
786 int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
787 AssertRCReturn(rc, rc);
788 VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE); /** @todo ring-0 safe state check. */
789
790 AssertReturn(pGVM->pgmr0.s.PhysHandlerAllocator.m_paNodes == NULL, VERR_WRONG_ORDER);
791 AssertReturn(pGVM->pgm.s.PhysHandlerAllocator.m_paNodes == NULL, VERR_WRONG_ORDER);
792
793 AssertLogRelMsgReturn(cEntries <= _64K, ("%#x\n", cEntries), VERR_OUT_OF_RANGE);
794
795 /*
796 * Calculate the table size and allocate it.
797 */
798 uint32_t cbTreeAndBitmap = 0;
799 uint32_t const cbTotalAligned = pgmHandlerPhysicalCalcTableSizes(&cEntries, &cbTreeAndBitmap);
800 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
801 rc = RTR0MemObjAllocPage(&hMemObj, cbTotalAligned, false);
802 if (RT_SUCCESS(rc))
803 {
804 RTR0MEMOBJ hMapObj = NIL_RTR0MEMOBJ;
805 rc = RTR0MemObjMapUser(&hMapObj, hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
806 if (RT_SUCCESS(rc))
807 {
808 uint8_t *pb = (uint8_t *)RTR0MemObjAddress(hMemObj);
809 if (!RTR0MemObjWasZeroInitialized(hMemObj))
810 RT_BZERO(pb, cbTotalAligned);
811
812 pGVM->pgmr0.s.PhysHandlerAllocator.initSlabAllocator(cEntries, (PPGMPHYSHANDLER)&pb[cbTreeAndBitmap],
813 (uint64_t *)&pb[sizeof(PGMPHYSHANDLERTREE)]);
814 pGVM->pgmr0.s.pPhysHandlerTree = (PPGMPHYSHANDLERTREE)pb;
815 pGVM->pgmr0.s.pPhysHandlerTree->initWithAllocator(&pGVM->pgmr0.s.PhysHandlerAllocator);
816 pGVM->pgmr0.s.hPhysHandlerMemObj = hMemObj;
817 pGVM->pgmr0.s.hPhysHandlerMapObj = hMapObj;
818
819 AssertCompile(sizeof(pGVM->pgm.s.PhysHandlerAllocator) == sizeof(pGVM->pgmr0.s.PhysHandlerAllocator));
820 RTR3PTR R3Ptr = RTR0MemObjAddressR3(hMapObj);
821 pGVM->pgm.s.pPhysHandlerTree = R3Ptr;
822 pGVM->pgm.s.PhysHandlerAllocator.m_paNodes = R3Ptr + cbTreeAndBitmap;
823 pGVM->pgm.s.PhysHandlerAllocator.m_pbmAlloc = R3Ptr + sizeof(PGMPHYSHANDLERTREE);
824 pGVM->pgm.s.PhysHandlerAllocator.m_cNodes = cEntries;
825 pGVM->pgm.s.PhysHandlerAllocator.m_cErrors = 0;
826 pGVM->pgm.s.PhysHandlerAllocator.m_idxAllocHint = 0;
827 pGVM->pgm.s.PhysHandlerAllocator.m_uPadding = 0;
828 return VINF_SUCCESS;
829 }
830
831 RTR0MemObjFree(hMemObj, true /*fFreeMappings*/);
832 }
833 return rc;
834}
835
836
837/**
838 * Updates a physical access handler type with ring-0 callback functions.
839 *
840 * The handler type must first have been registered in ring-3.
841 *
842 * @returns VBox status code.
843 * @param pGVM The global (ring-0) VM structure.
844 * @param enmKind The kind of access handler.
845 * @param fFlags PGMPHYSHANDLER_F_XXX
846 * @param pfnHandler Pointer to the ring-0 handler callback.
847 * @param pfnPfHandler Pointer to the ring-0 \#PF handler callback.
848 * callback. Can be NULL (not recommended though).
849 * @param pszDesc The type description.
850 * @param hType The handle to do ring-0 callback registrations for.
851 * @thread EMT(0)
852 */
853VMMR0_INT_DECL(int) PGMR0HandlerPhysicalTypeSetUpContext(PGVM pGVM, PGMPHYSHANDLERKIND enmKind, uint32_t fFlags,
854 PFNPGMPHYSHANDLER pfnHandler, PFNPGMRZPHYSPFHANDLER pfnPfHandler,
855 const char *pszDesc, PGMPHYSHANDLERTYPE hType)
856{
857 /*
858 * Validate input.
859 */
860 AssertPtrReturn(pfnHandler, VERR_INVALID_POINTER);
861 AssertPtrNullReturn(pfnPfHandler, VERR_INVALID_POINTER);
862
863 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
864 AssertReturn( enmKind == PGMPHYSHANDLERKIND_WRITE
865 || enmKind == PGMPHYSHANDLERKIND_ALL
866 || enmKind == PGMPHYSHANDLERKIND_MMIO,
867 VERR_INVALID_PARAMETER);
868 AssertMsgReturn(!(fFlags & ~PGMPHYSHANDLER_F_VALID_MASK), ("%#x\n", fFlags), VERR_INVALID_FLAGS);
869
870 PPGMPHYSHANDLERTYPEINTR0 const pTypeR0 = &pGVM->pgmr0.s.aPhysHandlerTypes[hType & PGMPHYSHANDLERTYPE_IDX_MASK];
871 AssertMsgReturn(hType == pTypeR0->hType, ("%#RX64, expected=%#RX64\n", hType, pTypeR0->hType), VERR_INVALID_HANDLE);
872 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes));
873 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == PGMPHYSHANDLERTYPE_IDX_MASK + 1);
874 AssertReturn(pTypeR0->enmKind == PGMPHYSHANDLERKIND_INVALID, VERR_ALREADY_INITIALIZED);
875
876 int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
877 AssertRCReturn(rc, rc);
878 VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE); /** @todo ring-0 safe state check. */
879
880 PPGMPHYSHANDLERTYPEINTR3 const pTypeR3 = &pGVM->pgm.s.aPhysHandlerTypes[hType & PGMPHYSHANDLERTYPE_IDX_MASK];
881 AssertMsgReturn(pTypeR3->enmKind == enmKind,
882 ("%#x: %d, expected %d\n", hType, pTypeR3->enmKind, enmKind),
883 VERR_INVALID_HANDLE);
884 AssertMsgReturn(pTypeR3->fKeepPgmLock == RT_BOOL(fFlags & PGMPHYSHANDLER_F_KEEP_PGM_LOCK),
885 ("%#x: %d, fFlags=%d\n", hType, pTypeR3->fKeepPgmLock, fFlags),
886 VERR_INVALID_HANDLE);
887 AssertMsgReturn(pTypeR3->fRing0DevInsIdx == RT_BOOL(fFlags & PGMPHYSHANDLER_F_R0_DEVINS_IDX),
888 ("%#x: %d, fFlags=%d\n", hType, pTypeR3->fRing0DevInsIdx, fFlags),
889 VERR_INVALID_HANDLE);
890
891 /*
892 * Update the entry.
893 */
894 pTypeR0->enmKind = enmKind;
895 pTypeR0->uState = enmKind == PGMPHYSHANDLERKIND_WRITE
896 ? PGM_PAGE_HNDL_PHYS_STATE_WRITE : PGM_PAGE_HNDL_PHYS_STATE_ALL;
897 pTypeR0->fKeepPgmLock = RT_BOOL(fFlags & PGMPHYSHANDLER_F_KEEP_PGM_LOCK);
898 pTypeR0->fRing0DevInsIdx = RT_BOOL(fFlags & PGMPHYSHANDLER_F_R0_DEVINS_IDX);
899 pTypeR0->pfnHandler = pfnHandler;
900 pTypeR0->pfnPfHandler = pfnPfHandler;
901 pTypeR0->pszDesc = pszDesc;
902
903 pTypeR3->fRing0Enabled = true;
904
905 LogFlow(("PGMR0HandlerPhysicalTypeRegister: hType=%#x: enmKind=%d fFlags=%#x pfnHandler=%p pfnPfHandler=%p pszDesc=%s\n",
906 hType, enmKind, fFlags, pfnHandler, pfnPfHandler, pszDesc));
907 return VINF_SUCCESS;
908}
909
910
911#ifdef VBOX_WITH_PCI_PASSTHROUGH
912/* Interface sketch. The interface belongs to a global PCI pass-through
913 manager. It shall use the global VM handle, not the user VM handle to
914 store the per-VM info (domain) since that is all ring-0 stuff, thus
915 passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
916 we can discuss the PciRaw code re-organtization when I'm back from
917 vacation.
918
919 I've implemented the initial IOMMU set up below. For things to work
920 reliably, we will probably need add a whole bunch of checks and
921 GPciRawR0GuestPageUpdate call to the PGM code. For the present,
922 assuming nested paging (enforced) and prealloc (enforced), no
923 ballooning (check missing), page sharing (check missing) or live
924 migration (check missing), it might work fine. At least if some
925 VM power-off hook is present and can tear down the IOMMU page tables. */
926
927/**
928 * Tells the global PCI pass-through manager that we are about to set up the
929 * guest page to host page mappings for the specfied VM.
930 *
931 * @returns VBox status code.
932 *
933 * @param pGVM The ring-0 VM structure.
934 */
935VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
936{
937 NOREF(pGVM);
938 return VINF_SUCCESS;
939}
940
941
942/**
943 * Assigns a host page mapping for a guest page.
944 *
945 * This is only used when setting up the mappings, i.e. between
946 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
947 *
948 * @returns VBox status code.
949 * @param pGVM The ring-0 VM structure.
950 * @param GCPhys The address of the guest page (page aligned).
951 * @param HCPhys The address of the host page (page aligned).
952 */
953VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
954{
955 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
956 AssertReturn(!(HCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
957
958 if (pGVM->rawpci.s.pfnContigMemInfo)
959 /** @todo what do we do on failure? */
960 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, HOST_PAGE_SIZE, PCIRAW_MEMINFO_MAP);
961
962 return VINF_SUCCESS;
963}
964
965
966/**
967 * Indicates that the specified guest page doesn't exists but doesn't have host
968 * page mapping we trust PCI pass-through with.
969 *
970 * This is only used when setting up the mappings, i.e. between
971 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
972 *
973 * @returns VBox status code.
974 * @param pGVM The ring-0 VM structure.
975 * @param GCPhys The address of the guest page (page aligned).
976 * @param HCPhys The address of the host page (page aligned).
977 */
978VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
979{
980 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
981
982 if (pGVM->rawpci.s.pfnContigMemInfo)
983 /** @todo what do we do on failure? */
984 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, HOST_PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
985
986 return VINF_SUCCESS;
987}
988
989
990/**
991 * Tells the global PCI pass-through manager that we have completed setting up
992 * the guest page to host page mappings for the specfied VM.
993 *
994 * This complements GPciRawR0GuestPageBeginAssignments and will be called even
995 * if some page assignment failed.
996 *
997 * @returns VBox status code.
998 *
999 * @param pGVM The ring-0 VM structure.
1000 */
1001VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
1002{
1003 NOREF(pGVM);
1004 return VINF_SUCCESS;
1005}
1006
1007
1008/**
1009 * Tells the global PCI pass-through manager that a guest page mapping has
1010 * changed after the initial setup.
1011 *
1012 * @returns VBox status code.
1013 * @param pGVM The ring-0 VM structure.
1014 * @param GCPhys The address of the guest page (page aligned).
1015 * @param HCPhys The new host page address or NIL_RTHCPHYS if
1016 * now unassigned.
1017 */
1018VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
1019{
1020 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
1021 AssertReturn(!(HCPhys & HOST_PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
1022 NOREF(pGVM);
1023 return VINF_SUCCESS;
1024}
1025
1026#endif /* VBOX_WITH_PCI_PASSTHROUGH */
1027
1028
1029/**
1030 * Sets up the IOMMU when raw PCI device is enabled.
1031 *
1032 * @note This is a hack that will probably be remodelled and refined later!
1033 *
1034 * @returns VBox status code.
1035 *
1036 * @param pGVM The global (ring-0) VM structure.
1037 */
1038VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM)
1039{
1040 int rc = GVMMR0ValidateGVM(pGVM);
1041 if (RT_FAILURE(rc))
1042 return rc;
1043
1044#ifdef VBOX_WITH_PCI_PASSTHROUGH
1045 if (pGVM->pgm.s.fPciPassthrough)
1046 {
1047 /*
1048 * The Simplistic Approach - Enumerate all the pages and call tell the
1049 * IOMMU about each of them.
1050 */
1051 PGM_LOCK_VOID(pGVM);
1052 rc = GPciRawR0GuestPageBeginAssignments(pGVM);
1053 if (RT_SUCCESS(rc))
1054 {
1055 for (PPGMRAMRANGE pRam = pGVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
1056 {
1057 PPGMPAGE pPage = &pRam->aPages[0];
1058 RTGCPHYS GCPhys = pRam->GCPhys;
1059 uint32_t cLeft = pRam->cb >> GUEST_PAGE_SHIFT;
1060 while (cLeft-- > 0)
1061 {
1062 /* Only expose pages that are 100% safe for now. */
1063 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1064 && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
1065 && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
1066 rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
1067 else
1068 rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
1069
1070 /* next */
1071 pPage++;
1072 GCPhys += HOST_PAGE_SIZE;
1073 }
1074 }
1075
1076 int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
1077 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
1078 rc = rc2;
1079 }
1080 PGM_UNLOCK(pGVM);
1081 }
1082 else
1083#endif
1084 rc = VERR_NOT_SUPPORTED;
1085 return rc;
1086}
1087
1088
1089/**
1090 * \#PF Handler for nested paging.
1091 *
1092 * @returns VBox status code (appropriate for trap handling and GC return).
1093 * @param pGVM The global (ring-0) VM structure.
1094 * @param pGVCpu The global (ring-0) CPU structure of the calling
1095 * EMT.
1096 * @param enmShwPagingMode Paging mode for the nested page tables.
1097 * @param uErr The trap error code.
1098 * @param pRegFrame Trap register frame.
1099 * @param GCPhysFault The fault address.
1100 */
1101VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
1102 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault)
1103{
1104 int rc;
1105
1106 LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip));
1107 STAM_PROFILE_START(&pGVCpu->pgm.s.StatRZTrap0e, a);
1108 STAM_STATS({ pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = NULL; } );
1109
1110 /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
1111 AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
1112 || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
1113 ("enmShwPagingMode=%d\n", enmShwPagingMode));
1114
1115 /* Reserved shouldn't end up here. */
1116 Assert(!(uErr & X86_TRAP_PF_RSVD));
1117
1118#ifdef VBOX_WITH_STATISTICS
1119 /*
1120 * Error code stats.
1121 */
1122 if (uErr & X86_TRAP_PF_US)
1123 {
1124 if (!(uErr & X86_TRAP_PF_P))
1125 {
1126 if (uErr & X86_TRAP_PF_RW)
1127 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentWrite);
1128 else
1129 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentRead);
1130 }
1131 else if (uErr & X86_TRAP_PF_RW)
1132 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSWrite);
1133 else if (uErr & X86_TRAP_PF_RSVD)
1134 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSReserved);
1135 else if (uErr & X86_TRAP_PF_ID)
1136 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNXE);
1137 else
1138 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSRead);
1139 }
1140 else
1141 { /* Supervisor */
1142 if (!(uErr & X86_TRAP_PF_P))
1143 {
1144 if (uErr & X86_TRAP_PF_RW)
1145 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentWrite);
1146 else
1147 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentRead);
1148 }
1149 else if (uErr & X86_TRAP_PF_RW)
1150 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVWrite);
1151 else if (uErr & X86_TRAP_PF_ID)
1152 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSNXE);
1153 else if (uErr & X86_TRAP_PF_RSVD)
1154 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVReserved);
1155 }
1156#endif
1157
1158 /*
1159 * Call the worker.
1160 *
1161 * Note! We pretend the guest is in protected mode without paging, so we
1162 * can use existing code to build the nested page tables.
1163 */
1164/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
1165 bool fLockTaken = false;
1166 switch (enmShwPagingMode)
1167 {
1168 case PGMMODE_32_BIT:
1169 rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
1170 break;
1171 case PGMMODE_PAE:
1172 case PGMMODE_PAE_NX:
1173 rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
1174 break;
1175 case PGMMODE_AMD64:
1176 case PGMMODE_AMD64_NX:
1177 rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
1178 break;
1179 case PGMMODE_EPT:
1180 rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
1181 break;
1182 default:
1183 AssertFailed();
1184 rc = VERR_INVALID_PARAMETER;
1185 break;
1186 }
1187 if (fLockTaken)
1188 {
1189 PGM_LOCK_ASSERT_OWNER(pGVM);
1190 PGM_UNLOCK(pGVM);
1191 }
1192
1193 if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
1194 rc = VINF_SUCCESS;
1195 /*
1196 * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
1197 * via its page tables, see @bugref{6043}.
1198 */
1199 else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
1200 || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
1201 || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
1202 || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
1203 {
1204 Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip));
1205 /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
1206 single VCPU VMs though. */
1207 rc = VINF_SUCCESS;
1208 }
1209
1210 STAM_STATS({ if (!pGVCpu->pgmr0.s.pStatTrap0eAttributionR0)
1211 pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pGVCpu->pgm.s.Stats.StatRZTrap0eTime2Misc; });
1212 STAM_PROFILE_STOP_EX(&pGVCpu->pgm.s.Stats.StatRZTrap0e, pGVCpu->pgmr0.s.pStatTrap0eAttributionR0, a);
1213 return rc;
1214}
1215
1216
1217/**
1218 * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
1219 * employed for MMIO pages.
1220 *
1221 * @returns VBox status code (appropriate for trap handling and GC return).
1222 * @param pGVM The global (ring-0) VM structure.
1223 * @param pGVCpu The global (ring-0) CPU structure of the calling
1224 * EMT.
1225 * @param enmShwPagingMode Paging mode for the nested page tables.
1226 * @param pRegFrame Trap register frame.
1227 * @param GCPhysFault The fault address.
1228 * @param uErr The error code, UINT32_MAX if not available
1229 * (VT-x).
1230 */
1231VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode,
1232 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, uint32_t uErr)
1233{
1234#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1235 STAM_PROFILE_START(&pGVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
1236 VBOXSTRICTRC rc;
1237
1238 /*
1239 * Try lookup the all access physical handler for the address.
1240 */
1241 PGM_LOCK_VOID(pGVM);
1242 PPGMPHYSHANDLER pHandler;
1243 rc = pgmHandlerPhysicalLookup(pGVM, GCPhysFault, &pHandler);
1244 if (RT_SUCCESS(rc))
1245 {
1246 PCPGMPHYSHANDLERTYPEINT pHandlerType = PGMPHYSHANDLER_GET_TYPE_NO_NULL(pGVM, pHandler);
1247 if (RT_LIKELY(pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE))
1248 {
1249 /*
1250 * If the handle has aliases page or pages that have been temporarily
1251 * disabled, we'll have to take a detour to make sure we resync them
1252 * to avoid lots of unnecessary exits.
1253 */
1254 PPGMPAGE pPage;
1255 if ( ( pHandler->cAliasedPages
1256 || pHandler->cTmpOffPages)
1257 && ( (pPage = pgmPhysGetPage(pGVM, GCPhysFault)) == NULL
1258 || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
1259 )
1260 {
1261 Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
1262 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
1263 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
1264 PGM_UNLOCK(pGVM);
1265 }
1266 else
1267 {
1268 if (pHandlerType->pfnPfHandler)
1269 {
1270 uint64_t const uUser = !pHandlerType->fRing0DevInsIdx ? pHandler->uUser
1271 : (uintptr_t)PDMDeviceRing0IdxToInstance(pGVM, pHandler->uUser);
1272 STAM_PROFILE_START(&pHandler->Stat, h);
1273 PGM_UNLOCK(pGVM);
1274
1275 Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->pfnPfHandler, uErr, GCPhysFault, uUser));
1276 rc = pHandlerType->pfnPfHandler(pGVM, pGVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame,
1277 GCPhysFault, GCPhysFault, uUser);
1278
1279 STAM_PROFILE_STOP(&pHandler->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
1280 }
1281 else
1282 {
1283 PGM_UNLOCK(pGVM);
1284 Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
1285 rc = VINF_EM_RAW_EMULATE_INSTR;
1286 }
1287 }
1288 STAM_PROFILE_STOP(&pGVCpu->pgm.s.Stats.StatR0NpMiscfg, a);
1289 return rc;
1290 }
1291 }
1292 else
1293 AssertMsgReturn(rc == VERR_NOT_FOUND, ("%Rrc GCPhysFault=%RGp\n", VBOXSTRICTRC_VAL(rc), GCPhysFault), rc);
1294
1295 /*
1296 * Must be out of sync, so do a SyncPage and restart the instruction.
1297 *
1298 * ASSUMES that ALL handlers are page aligned and covers whole pages
1299 * (assumption asserted in PGMHandlerPhysicalRegisterEx).
1300 */
1301 Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
1302 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
1303 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
1304 PGM_UNLOCK(pGVM);
1305
1306 STAM_PROFILE_STOP(&pGVCpu->pgm.s.Stats.StatR0NpMiscfg, a);
1307 return rc;
1308
1309#else
1310 AssertLogRelFailed();
1311 return VERR_PGM_NOT_USED_IN_MODE;
1312#endif
1313}
1314
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette