VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0.cpp@ 82567

Last change on this file since 82567 was 82555, checked in by vboxsync, 5 years ago

PGMPool,MM: Use ring-0 mapping while in ring-0, so let the page pool do its own allocations rather than going through MMPage*. The MMPage* code is mostly code, but we still need it for a dummy page allocation. I'll address this tomorrow. bugref:9528

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 29.1 KB
Line 
1/* $Id: PGMR0.cpp 82555 2019-12-11 23:56:54Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Ring-0.
4 */
5
6/*
7 * Copyright (C) 2007-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM
23#include <VBox/rawpci.h>
24#include <VBox/vmm/pgm.h>
25#include <VBox/vmm/gmm.h>
26#include "PGMInternal.h"
27#include <VBox/vmm/pdmdev.h>
28#include <VBox/vmm/vmcc.h>
29#include <VBox/vmm/gvm.h>
30#include "PGMInline.h"
31#include <VBox/log.h>
32#include <VBox/err.h>
33#include <iprt/assert.h>
34#include <iprt/mem.h>
35#include <iprt/memobj.h>
36
37
38/*
39 * Instantiate the ring-0 header/code templates.
40 */
41/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
42#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
43#include "PGMR0Bth.h"
44#undef PGM_BTH_NAME
45
46#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
47#include "PGMR0Bth.h"
48#undef PGM_BTH_NAME
49
50#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
51#include "PGMR0Bth.h"
52#undef PGM_BTH_NAME
53
54#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
55#include "PGMR0Bth.h"
56#undef PGM_BTH_NAME
57
58
59/**
60 * Initializes the per-VM data for the PGM.
61 *
62 * This is called from under the GVMM lock, so it should only initialize the
63 * data so PGMR0CleanupVM and others will work smoothly.
64 *
65 * @returns VBox status code.
66 * @param pGVM Pointer to the global VM structure.
67 */
68VMMR0_INT_DECL(int) PGMR0InitPerVMData(PGVM pGVM)
69{
70 AssertCompile(sizeof(pGVM->pgm.s) <= sizeof(pGVM->pgm.padding));
71 AssertCompile(sizeof(pGVM->pgmr0.s) <= sizeof(pGVM->pgmr0.padding));
72
73 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
74 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
75 {
76 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
77 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
78 }
79 return RTCritSectInit(&pGVM->pgmr0.s.PoolGrowCritSect);
80}
81
82
83/**
84 * Initalize the per-VM PGM for ring-0.
85 *
86 * @returns VBox status code.
87 * @param pGVM Pointer to the global VM structure.
88 */
89VMMR0_INT_DECL(int) PGMR0InitVM(PGVM pGVM)
90{
91 int rc = VINF_SUCCESS;
92#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
93 rc = PGMR0DynMapInitVM(pGVM);
94#endif
95 RT_NOREF(pGVM);
96 return rc;
97}
98
99
100/**
101 * Cleans up any loose ends before the GVM structure is destroyed.
102 */
103VMMR0_INT_DECL(void) PGMR0CleanupVM(PGVM pGVM)
104{
105 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
106 {
107 if (pGVM->pgmr0.s.ahPoolMapObjs[i] != NIL_RTR0MEMOBJ)
108 {
109 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMapObjs[i], true /*fFreeMappings*/);
110 AssertRC(rc);
111 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
112 }
113
114 if (pGVM->pgmr0.s.ahPoolMemObjs[i] != NIL_RTR0MEMOBJ)
115 {
116 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMemObjs[i], true /*fFreeMappings*/);
117 AssertRC(rc);
118 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
119 }
120 }
121
122 if (RTCritSectIsInitialized(&pGVM->pgmr0.s.PoolGrowCritSect))
123 RTCritSectDelete(&pGVM->pgmr0.s.PoolGrowCritSect);
124}
125
126
127/**
128 * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
129 *
130 * @returns The following VBox status codes.
131 * @retval VINF_SUCCESS on success. FF cleared.
132 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
133 *
134 * @param pGVM The global (ring-0) VM structure.
135 * @param idCpu The ID of the calling EMT.
136 *
137 * @thread EMT(idCpu)
138 *
139 * @remarks Must be called from within the PGM critical section. The caller
140 * must clear the new pages.
141 */
142VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu)
143{
144 /*
145 * Validate inputs.
146 */
147 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
148 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
149 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
150
151 /*
152 * Check for error injection.
153 */
154 if (RT_UNLIKELY(pGVM->pgm.s.fErrInjHandyPages))
155 return VERR_NO_MEMORY;
156
157 /*
158 * Try allocate a full set of handy pages.
159 */
160 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
161 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
162 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
163 if (!cPages)
164 return VINF_SUCCESS;
165 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
166 if (RT_SUCCESS(rc))
167 {
168#ifdef VBOX_STRICT
169 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
170 {
171 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
172 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
173 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
174 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
175 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
176 }
177#endif
178
179 pGVM->pgm.s.cHandyPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages);
180 }
181 else if (rc != VERR_GMM_SEED_ME)
182 {
183 if ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
184 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
185 && iFirst < PGM_HANDY_PAGES_MIN)
186 {
187
188#ifdef VBOX_STRICT
189 /* We're ASSUMING that GMM has updated all the entires before failing us. */
190 uint32_t i;
191 for (i = iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
192 {
193 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
194 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
195 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
196 }
197#endif
198
199 /*
200 * Reduce the number of pages until we hit the minimum limit.
201 */
202 do
203 {
204 cPages >>= 1;
205 if (cPages + iFirst < PGM_HANDY_PAGES_MIN)
206 cPages = PGM_HANDY_PAGES_MIN - iFirst;
207 rc = GMMR0AllocateHandyPages(pGVM, idCpu, 0, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
208 } while ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
209 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
210 && cPages + iFirst > PGM_HANDY_PAGES_MIN);
211 if (RT_SUCCESS(rc))
212 {
213#ifdef VBOX_STRICT
214 i = iFirst + cPages;
215 while (i-- > 0)
216 {
217 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
218 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
219 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
220 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
221 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
222 }
223
224 for (i = cPages + iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
225 {
226 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
227 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
228 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
229 }
230#endif
231
232 pGVM->pgm.s.cHandyPages = iFirst + cPages;
233 }
234 }
235
236 if (RT_FAILURE(rc) && rc != VERR_GMM_SEED_ME)
237 {
238 LogRel(("PGMR0PhysAllocateHandyPages: rc=%Rrc iFirst=%d cPages=%d\n", rc, iFirst, cPages));
239 VM_FF_SET(pGVM, VM_FF_PGM_NO_MEMORY);
240 }
241 }
242
243
244 LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
245 return rc;
246}
247
248
249/**
250 * Flushes any changes pending in the handy page array.
251 *
252 * It is very important that this gets done when page sharing is enabled.
253 *
254 * @returns The following VBox status codes.
255 * @retval VINF_SUCCESS on success. FF cleared.
256 *
257 * @param pGVM The global (ring-0) VM structure.
258 * @param idCpu The ID of the calling EMT.
259 *
260 * @thread EMT(idCpu)
261 *
262 * @remarks Must be called from within the PGM critical section.
263 */
264VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, VMCPUID idCpu)
265{
266 /*
267 * Validate inputs.
268 */
269 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
270 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
271 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
272
273 /*
274 * Try allocate a full set of handy pages.
275 */
276 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
277 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
278 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
279 if (!cPages)
280 return VINF_SUCCESS;
281 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, 0, &pGVM->pgm.s.aHandyPages[iFirst]);
282
283 LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
284 return rc;
285}
286
287
288/**
289 * Worker function for PGMR3PhysAllocateLargeHandyPage
290 *
291 * @returns The following VBox status codes.
292 * @retval VINF_SUCCESS on success.
293 * @retval VINF_EM_NO_MEMORY if we're out of memory.
294 *
295 * @param pGVM The global (ring-0) VM structure.
296 * @param idCpu The ID of the calling EMT.
297 *
298 * @thread EMT(idCpu)
299 *
300 * @remarks Must be called from within the PGM critical section. The caller
301 * must clear the new pages.
302 */
303VMMR0_INT_DECL(int) PGMR0PhysAllocateLargeHandyPage(PGVM pGVM, VMCPUID idCpu)
304{
305 /*
306 * Validate inputs.
307 */
308 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
309 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
310 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
311 Assert(!pGVM->pgm.s.cLargeHandyPages);
312
313 /*
314 * Do the job.
315 */
316 int rc = GMMR0AllocateLargePage(pGVM, idCpu, _2M,
317 &pGVM->pgm.s.aLargeHandyPage[0].idPage,
318 &pGVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys);
319 if (RT_SUCCESS(rc))
320 pGVM->pgm.s.cLargeHandyPages = 1;
321
322 return rc;
323}
324
325
326/**
327 * Locate a MMIO2 range.
328 *
329 * @returns Pointer to the MMIO2 range.
330 * @param pGVM The global (ring-0) VM structure.
331 * @param pDevIns The device instance owning the region.
332 * @param hMmio2 Handle to look up.
333 */
334DECLINLINE(PPGMREGMMIO2RANGE) pgmR0PhysMMIOExFind(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
335{
336 /*
337 * We use the lookup table here as list walking is tedious in ring-0 when using
338 * ring-3 pointers and this probably will require some kind of refactoring anyway.
339 */
340 if (hMmio2 <= RT_ELEMENTS(pGVM->pgm.s.apMmio2RangesR0) && hMmio2 != 0)
341 {
342 PPGMREGMMIO2RANGE pCur = pGVM->pgm.s.apMmio2RangesR0[hMmio2 - 1];
343 if (pCur && pCur->pDevInsR3 == pDevIns->pDevInsForR3)
344 {
345 Assert(pCur->idMmio2 == hMmio2);
346 AssertReturn(pCur->fFlags & PGMREGMMIO2RANGE_F_MMIO2, NULL);
347 return pCur;
348 }
349 Assert(!pCur);
350 }
351 return NULL;
352}
353
354
355/**
356 * Worker for PDMDEVHLPR0::pfnMmio2SetUpContext.
357 *
358 * @returns VBox status code.
359 * @param pGVM The global (ring-0) VM structure.
360 * @param pDevIns The device instance.
361 * @param hMmio2 The MMIO2 region to map into ring-0 address space.
362 * @param offSub The offset into the region.
363 * @param cbSub The size of the mapping, zero meaning all the rest.
364 * @param ppvMapping Where to return the ring-0 mapping address.
365 */
366VMMR0_INT_DECL(int) PGMR0PhysMMIO2MapKernel(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
367 size_t offSub, size_t cbSub, void **ppvMapping)
368{
369 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
370 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
371
372 /*
373 * Translate hRegion into a range pointer.
374 */
375 PPGMREGMMIO2RANGE pFirstRegMmio = pgmR0PhysMMIOExFind(pGVM, pDevIns, hMmio2);
376 AssertReturn(pFirstRegMmio, VERR_NOT_FOUND);
377 RTR3PTR const pvR3 = pFirstRegMmio->pvR3;
378 RTGCPHYS const cbReal = pFirstRegMmio->cbReal;
379 pFirstRegMmio = NULL;
380 ASMCompilerBarrier();
381
382 AssertReturn(offSub < cbReal, VERR_OUT_OF_RANGE);
383 if (cbSub == 0)
384 cbSub = cbReal - offSub;
385 else
386 AssertReturn(cbSub < cbReal && cbSub + offSub <= cbReal, VERR_OUT_OF_RANGE);
387
388 /*
389 * Do the mapping.
390 */
391 return SUPR0PageMapKernel(pGVM->pSession, pvR3, (uint32_t)offSub, (uint32_t)cbSub, 0 /*fFlags*/, ppvMapping);
392}
393
394
395#ifdef VBOX_WITH_PCI_PASSTHROUGH
396/* Interface sketch. The interface belongs to a global PCI pass-through
397 manager. It shall use the global VM handle, not the user VM handle to
398 store the per-VM info (domain) since that is all ring-0 stuff, thus
399 passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
400 we can discuss the PciRaw code re-organtization when I'm back from
401 vacation.
402
403 I've implemented the initial IOMMU set up below. For things to work
404 reliably, we will probably need add a whole bunch of checks and
405 GPciRawR0GuestPageUpdate call to the PGM code. For the present,
406 assuming nested paging (enforced) and prealloc (enforced), no
407 ballooning (check missing), page sharing (check missing) or live
408 migration (check missing), it might work fine. At least if some
409 VM power-off hook is present and can tear down the IOMMU page tables. */
410
411/**
412 * Tells the global PCI pass-through manager that we are about to set up the
413 * guest page to host page mappings for the specfied VM.
414 *
415 * @returns VBox status code.
416 *
417 * @param pGVM The ring-0 VM structure.
418 */
419VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
420{
421 NOREF(pGVM);
422 return VINF_SUCCESS;
423}
424
425
426/**
427 * Assigns a host page mapping for a guest page.
428 *
429 * This is only used when setting up the mappings, i.e. between
430 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
431 *
432 * @returns VBox status code.
433 * @param pGVM The ring-0 VM structure.
434 * @param GCPhys The address of the guest page (page aligned).
435 * @param HCPhys The address of the host page (page aligned).
436 */
437VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
438{
439 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
440 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
441
442 if (pGVM->rawpci.s.pfnContigMemInfo)
443 /** @todo what do we do on failure? */
444 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_MAP);
445
446 return VINF_SUCCESS;
447}
448
449
450/**
451 * Indicates that the specified guest page doesn't exists but doesn't have host
452 * page mapping we trust PCI pass-through with.
453 *
454 * This is only used when setting up the mappings, i.e. between
455 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
456 *
457 * @returns VBox status code.
458 * @param pGVM The ring-0 VM structure.
459 * @param GCPhys The address of the guest page (page aligned).
460 * @param HCPhys The address of the host page (page aligned).
461 */
462VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
463{
464 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
465
466 if (pGVM->rawpci.s.pfnContigMemInfo)
467 /** @todo what do we do on failure? */
468 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
469
470 return VINF_SUCCESS;
471}
472
473
474/**
475 * Tells the global PCI pass-through manager that we have completed setting up
476 * the guest page to host page mappings for the specfied VM.
477 *
478 * This complements GPciRawR0GuestPageBeginAssignments and will be called even
479 * if some page assignment failed.
480 *
481 * @returns VBox status code.
482 *
483 * @param pGVM The ring-0 VM structure.
484 */
485VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
486{
487 NOREF(pGVM);
488 return VINF_SUCCESS;
489}
490
491
492/**
493 * Tells the global PCI pass-through manager that a guest page mapping has
494 * changed after the initial setup.
495 *
496 * @returns VBox status code.
497 * @param pGVM The ring-0 VM structure.
498 * @param GCPhys The address of the guest page (page aligned).
499 * @param HCPhys The new host page address or NIL_RTHCPHYS if
500 * now unassigned.
501 */
502VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
503{
504 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
505 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
506 NOREF(pGVM);
507 return VINF_SUCCESS;
508}
509
510#endif /* VBOX_WITH_PCI_PASSTHROUGH */
511
512
513/**
514 * Sets up the IOMMU when raw PCI device is enabled.
515 *
516 * @note This is a hack that will probably be remodelled and refined later!
517 *
518 * @returns VBox status code.
519 *
520 * @param pGVM The global (ring-0) VM structure.
521 */
522VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM)
523{
524 int rc = GVMMR0ValidateGVM(pGVM);
525 if (RT_FAILURE(rc))
526 return rc;
527
528#ifdef VBOX_WITH_PCI_PASSTHROUGH
529 if (pGVM->pgm.s.fPciPassthrough)
530 {
531 /*
532 * The Simplistic Approach - Enumerate all the pages and call tell the
533 * IOMMU about each of them.
534 */
535 pgmLock(pGVM);
536 rc = GPciRawR0GuestPageBeginAssignments(pGVM);
537 if (RT_SUCCESS(rc))
538 {
539 for (PPGMRAMRANGE pRam = pGVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
540 {
541 PPGMPAGE pPage = &pRam->aPages[0];
542 RTGCPHYS GCPhys = pRam->GCPhys;
543 uint32_t cLeft = pRam->cb >> PAGE_SHIFT;
544 while (cLeft-- > 0)
545 {
546 /* Only expose pages that are 100% safe for now. */
547 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
548 && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
549 && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
550 rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
551 else
552 rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
553
554 /* next */
555 pPage++;
556 GCPhys += PAGE_SIZE;
557 }
558 }
559
560 int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
561 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
562 rc = rc2;
563 }
564 pgmUnlock(pGVM);
565 }
566 else
567#endif
568 rc = VERR_NOT_SUPPORTED;
569 return rc;
570}
571
572
573/**
574 * \#PF Handler for nested paging.
575 *
576 * @returns VBox status code (appropriate for trap handling and GC return).
577 * @param pGVM The global (ring-0) VM structure.
578 * @param pGVCpu The global (ring-0) CPU structure of the calling
579 * EMT.
580 * @param enmShwPagingMode Paging mode for the nested page tables.
581 * @param uErr The trap error code.
582 * @param pRegFrame Trap register frame.
583 * @param GCPhysFault The fault address.
584 */
585VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
586 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault)
587{
588 int rc;
589
590 LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip));
591 STAM_PROFILE_START(&pGVCpu->pgm.s.StatRZTrap0e, a);
592 STAM_STATS({ pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = NULL; } );
593
594 /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
595 AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
596 || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
597 ("enmShwPagingMode=%d\n", enmShwPagingMode));
598
599 /* Reserved shouldn't end up here. */
600 Assert(!(uErr & X86_TRAP_PF_RSVD));
601
602#ifdef VBOX_WITH_STATISTICS
603 /*
604 * Error code stats.
605 */
606 if (uErr & X86_TRAP_PF_US)
607 {
608 if (!(uErr & X86_TRAP_PF_P))
609 {
610 if (uErr & X86_TRAP_PF_RW)
611 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentWrite);
612 else
613 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentRead);
614 }
615 else if (uErr & X86_TRAP_PF_RW)
616 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSWrite);
617 else if (uErr & X86_TRAP_PF_RSVD)
618 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSReserved);
619 else if (uErr & X86_TRAP_PF_ID)
620 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNXE);
621 else
622 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSRead);
623 }
624 else
625 { /* Supervisor */
626 if (!(uErr & X86_TRAP_PF_P))
627 {
628 if (uErr & X86_TRAP_PF_RW)
629 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentWrite);
630 else
631 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentRead);
632 }
633 else if (uErr & X86_TRAP_PF_RW)
634 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVWrite);
635 else if (uErr & X86_TRAP_PF_ID)
636 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSNXE);
637 else if (uErr & X86_TRAP_PF_RSVD)
638 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVReserved);
639 }
640#endif
641
642 /*
643 * Call the worker.
644 *
645 * Note! We pretend the guest is in protected mode without paging, so we
646 * can use existing code to build the nested page tables.
647 */
648/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
649 bool fLockTaken = false;
650 switch (enmShwPagingMode)
651 {
652 case PGMMODE_32_BIT:
653 rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
654 break;
655 case PGMMODE_PAE:
656 case PGMMODE_PAE_NX:
657 rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
658 break;
659 case PGMMODE_AMD64:
660 case PGMMODE_AMD64_NX:
661 rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
662 break;
663 case PGMMODE_EPT:
664 rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
665 break;
666 default:
667 AssertFailed();
668 rc = VERR_INVALID_PARAMETER;
669 break;
670 }
671 if (fLockTaken)
672 {
673 PGM_LOCK_ASSERT_OWNER(pGVM);
674 pgmUnlock(pGVM);
675 }
676
677 if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
678 rc = VINF_SUCCESS;
679 /*
680 * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
681 * via its page tables, see @bugref{6043}.
682 */
683 else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
684 || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
685 || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
686 || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
687 {
688 Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip));
689 /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
690 single VCPU VMs though. */
691 rc = VINF_SUCCESS;
692 }
693
694 STAM_STATS({ if (!pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution))
695 pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Misc; });
696 STAM_PROFILE_STOP_EX(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0e, pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution), a);
697 return rc;
698}
699
700
701/**
702 * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
703 * employed for MMIO pages.
704 *
705 * @returns VBox status code (appropriate for trap handling and GC return).
706 * @param pGVM The global (ring-0) VM structure.
707 * @param pGVCpu The global (ring-0) CPU structure of the calling
708 * EMT.
709 * @param enmShwPagingMode Paging mode for the nested page tables.
710 * @param pRegFrame Trap register frame.
711 * @param GCPhysFault The fault address.
712 * @param uErr The error code, UINT32_MAX if not available
713 * (VT-x).
714 */
715VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode,
716 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, uint32_t uErr)
717{
718#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
719 STAM_PROFILE_START(&pGVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
720 VBOXSTRICTRC rc;
721
722 /*
723 * Try lookup the all access physical handler for the address.
724 */
725 pgmLock(pGVM);
726 PPGMPHYSHANDLER pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
727 PPGMPHYSHANDLERTYPEINT pHandlerType = RT_LIKELY(pHandler) ? PGMPHYSHANDLER_GET_TYPE(pGVM, pHandler) : NULL;
728 if (RT_LIKELY(pHandler && pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE))
729 {
730 /*
731 * If the handle has aliases page or pages that have been temporarily
732 * disabled, we'll have to take a detour to make sure we resync them
733 * to avoid lots of unnecessary exits.
734 */
735 PPGMPAGE pPage;
736 if ( ( pHandler->cAliasedPages
737 || pHandler->cTmpOffPages)
738 && ( (pPage = pgmPhysGetPage(pGVM, GCPhysFault)) == NULL
739 || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
740 )
741 {
742 Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
743 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage);
744 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
745 pgmUnlock(pGVM);
746 }
747 else
748 {
749 if (pHandlerType->CTX_SUFF(pfnPfHandler))
750 {
751 void *pvUser = pHandler->CTX_SUFF(pvUser);
752 STAM_PROFILE_START(&pHandler->Stat, h);
753 pgmUnlock(pGVM);
754
755 Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->CTX_SUFF(pfnPfHandler), uErr, GCPhysFault, pvUser));
756 rc = pHandlerType->CTX_SUFF(pfnPfHandler)(pGVM, pGVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame,
757 GCPhysFault, GCPhysFault, pvUser);
758
759#ifdef VBOX_WITH_STATISTICS
760 pgmLock(pGVM);
761 pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
762 if (pHandler)
763 STAM_PROFILE_STOP(&pHandler->Stat, h);
764 pgmUnlock(pGVM);
765#endif
766 }
767 else
768 {
769 pgmUnlock(pGVM);
770 Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
771 rc = VINF_EM_RAW_EMULATE_INSTR;
772 }
773 }
774 }
775 else
776 {
777 /*
778 * Must be out of sync, so do a SyncPage and restart the instruction.
779 *
780 * ASSUMES that ALL handlers are page aligned and covers whole pages
781 * (assumption asserted in PGMHandlerPhysicalRegisterEx).
782 */
783 Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
784 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage);
785 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
786 pgmUnlock(pGVM);
787 }
788
789 STAM_PROFILE_STOP(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfg, a);
790 return rc;
791
792#else
793 AssertLogRelFailed();
794 return VERR_PGM_NOT_USED_IN_MODE;
795#endif
796}
797
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette