VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0.cpp@ 92340

Last change on this file since 92340 was 92326, checked in by vboxsync, 3 years ago

VMM/GMM,PGM: Optimize zeroing of RAM allocations by not doing it again if the OS already zeroed an allocation. bugref:10093

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 29.3 KB
Line 
1/* $Id: PGMR0.cpp 92326 2021-11-10 15:14:52Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Ring-0.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/rawpci.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/gmm.h>
27#include "PGMInternal.h"
28#include <VBox/vmm/pdmdev.h>
29#include <VBox/vmm/vmcc.h>
30#include <VBox/vmm/gvm.h>
31#include "PGMInline.h"
32#include <VBox/log.h>
33#include <VBox/err.h>
34#include <iprt/assert.h>
35#include <iprt/mem.h>
36#include <iprt/memobj.h>
37
38
39/*
40 * Instantiate the ring-0 header/code templates.
41 */
42/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
43#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
44#include "PGMR0Bth.h"
45#undef PGM_BTH_NAME
46
47#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
48#include "PGMR0Bth.h"
49#undef PGM_BTH_NAME
50
51#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
52#include "PGMR0Bth.h"
53#undef PGM_BTH_NAME
54
55#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
56#include "PGMR0Bth.h"
57#undef PGM_BTH_NAME
58
59
60/**
61 * Initializes the per-VM data for the PGM.
62 *
63 * This is called from under the GVMM lock, so it should only initialize the
64 * data so PGMR0CleanupVM and others will work smoothly.
65 *
66 * @returns VBox status code.
67 * @param pGVM Pointer to the global VM structure.
68 */
69VMMR0_INT_DECL(int) PGMR0InitPerVMData(PGVM pGVM)
70{
71 AssertCompile(sizeof(pGVM->pgm.s) <= sizeof(pGVM->pgm.padding));
72 AssertCompile(sizeof(pGVM->pgmr0.s) <= sizeof(pGVM->pgmr0.padding));
73
74 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
75 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
76 {
77 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
78 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
79 }
80 return RTCritSectInit(&pGVM->pgmr0.s.PoolGrowCritSect);
81}
82
83
84/**
85 * Initalize the per-VM PGM for ring-0.
86 *
87 * @returns VBox status code.
88 * @param pGVM Pointer to the global VM structure.
89 */
90VMMR0_INT_DECL(int) PGMR0InitVM(PGVM pGVM)
91{
92 RT_NOREF(pGVM);
93 /* Was used for DynMap init */
94 return VINF_SUCCESS;
95}
96
97
98/**
99 * Cleans up any loose ends before the GVM structure is destroyed.
100 */
101VMMR0_INT_DECL(void) PGMR0CleanupVM(PGVM pGVM)
102{
103 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
104 {
105 if (pGVM->pgmr0.s.ahPoolMapObjs[i] != NIL_RTR0MEMOBJ)
106 {
107 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMapObjs[i], true /*fFreeMappings*/);
108 AssertRC(rc);
109 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
110 }
111
112 if (pGVM->pgmr0.s.ahPoolMemObjs[i] != NIL_RTR0MEMOBJ)
113 {
114 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMemObjs[i], true /*fFreeMappings*/);
115 AssertRC(rc);
116 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
117 }
118 }
119
120 if (RTCritSectIsInitialized(&pGVM->pgmr0.s.PoolGrowCritSect))
121 RTCritSectDelete(&pGVM->pgmr0.s.PoolGrowCritSect);
122}
123
124
125/**
126 * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
127 *
128 * @returns The following VBox status codes.
129 * @retval VINF_SUCCESS on success. FF cleared.
130 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
131 *
132 * @param pGVM The global (ring-0) VM structure.
133 * @param idCpu The ID of the calling EMT.
134 *
135 * @thread EMT(idCpu)
136 *
137 * @remarks Must be called from within the PGM critical section. The caller
138 * must clear the new pages.
139 */
140VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu)
141{
142 /*
143 * Validate inputs.
144 */
145 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
146 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
147 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
148
149 /*
150 * Check for error injection.
151 */
152 if (RT_UNLIKELY(pGVM->pgm.s.fErrInjHandyPages))
153 return VERR_NO_MEMORY;
154
155 /*
156 * Try allocate a full set of handy pages.
157 */
158 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
159 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
160 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
161 if (!cPages)
162 return VINF_SUCCESS;
163 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
164 if (RT_SUCCESS(rc))
165 {
166#ifdef VBOX_STRICT
167 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
168 {
169 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
170 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
171 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
172 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_GMMPAGEDESC_PHYS);
173 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
174 }
175#endif
176
177 pGVM->pgm.s.cHandyPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages);
178 }
179 else
180 {
181 if ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
182 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
183 && iFirst < PGM_HANDY_PAGES_MIN)
184 {
185
186#ifdef VBOX_STRICT
187 /* We're ASSUMING that GMM has updated all the entires before failing us. */
188 uint32_t i;
189 for (i = iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
190 {
191 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
192 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
193 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_GMMPAGEDESC_PHYS);
194 Assert(pGVM->pgm.s.aHandyPages[i].fZeroed == false);
195 }
196#endif
197
198 /*
199 * Reduce the number of pages until we hit the minimum limit.
200 */
201 do
202 {
203 cPages >>= 1;
204 if (cPages + iFirst < PGM_HANDY_PAGES_MIN)
205 cPages = PGM_HANDY_PAGES_MIN - iFirst;
206 rc = GMMR0AllocateHandyPages(pGVM, idCpu, 0, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
207 } while ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
208 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
209 && cPages + iFirst > PGM_HANDY_PAGES_MIN);
210 if (RT_SUCCESS(rc))
211 {
212#ifdef VBOX_STRICT
213 i = iFirst + cPages;
214 while (i-- > 0)
215 {
216 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
217 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
218 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
219 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_GMMPAGEDESC_PHYS);
220 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
221 }
222
223 for (i = cPages + iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
224 {
225 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
226 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
227 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_GMMPAGEDESC_PHYS);
228 Assert(pGVM->pgm.s.aHandyPages[i].fZeroed == false);
229 }
230#endif
231
232 pGVM->pgm.s.cHandyPages = iFirst + cPages;
233 }
234 }
235
236 if (RT_FAILURE(rc))
237 {
238 LogRel(("PGMR0PhysAllocateHandyPages: rc=%Rrc iFirst=%d cPages=%d\n", rc, iFirst, cPages));
239 VM_FF_SET(pGVM, VM_FF_PGM_NO_MEMORY);
240 }
241 }
242
243 LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
244 return rc;
245}
246
247
248/**
249 * Flushes any changes pending in the handy page array.
250 *
251 * It is very important that this gets done when page sharing is enabled.
252 *
253 * @returns The following VBox status codes.
254 * @retval VINF_SUCCESS on success. FF cleared.
255 *
256 * @param pGVM The global (ring-0) VM structure.
257 * @param idCpu The ID of the calling EMT.
258 *
259 * @thread EMT(idCpu)
260 *
261 * @remarks Must be called from within the PGM critical section.
262 */
263VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, VMCPUID idCpu)
264{
265 /*
266 * Validate inputs.
267 */
268 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
269 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
270 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
271
272 /*
273 * Try allocate a full set of handy pages.
274 */
275 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
276 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
277 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
278 if (!cPages)
279 return VINF_SUCCESS;
280 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, 0, &pGVM->pgm.s.aHandyPages[iFirst]);
281
282 LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
283 return rc;
284}
285
286
287/**
288 * Worker function for PGMR3PhysAllocateLargeHandyPage
289 *
290 * @returns The following VBox status codes.
291 * @retval VINF_SUCCESS on success.
292 * @retval VINF_EM_NO_MEMORY if we're out of memory.
293 *
294 * @param pGVM The global (ring-0) VM structure.
295 * @param idCpu The ID of the calling EMT.
296 *
297 * @thread EMT(idCpu)
298 *
299 * @remarks Must be called from within the PGM critical section. The caller
300 * must clear the new pages.
301 */
302VMMR0_INT_DECL(int) PGMR0PhysAllocateLargeHandyPage(PGVM pGVM, VMCPUID idCpu)
303{
304 /*
305 * Validate inputs.
306 */
307 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
308 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
309 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
310 Assert(!pGVM->pgm.s.cLargeHandyPages);
311
312 /*
313 * Do the job.
314 */
315 RTHCPHYS HCPhys = NIL_GMMPAGEDESC_PHYS;
316 int rc = GMMR0AllocateLargePage(pGVM, idCpu, _2M, &pGVM->pgm.s.aLargeHandyPage[0].idPage, &HCPhys);
317 if (RT_SUCCESS(rc))
318 pGVM->pgm.s.cLargeHandyPages = 1;
319 pGVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys = HCPhys;
320 pGVM->pgm.s.aLargeHandyPage[0].fZeroed = true;
321
322 return rc;
323}
324
325
326/**
327 * Locate a MMIO2 range.
328 *
329 * @returns Pointer to the MMIO2 range.
330 * @param pGVM The global (ring-0) VM structure.
331 * @param pDevIns The device instance owning the region.
332 * @param hMmio2 Handle to look up.
333 */
334DECLINLINE(PPGMREGMMIO2RANGE) pgmR0PhysMmio2Find(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
335{
336 /*
337 * We use the lookup table here as list walking is tedious in ring-0 when using
338 * ring-3 pointers and this probably will require some kind of refactoring anyway.
339 */
340 if (hMmio2 <= RT_ELEMENTS(pGVM->pgm.s.apMmio2RangesR0) && hMmio2 != 0)
341 {
342 PPGMREGMMIO2RANGE pCur = pGVM->pgm.s.apMmio2RangesR0[hMmio2 - 1];
343 if (pCur && pCur->pDevInsR3 == pDevIns->pDevInsForR3)
344 {
345 Assert(pCur->idMmio2 == hMmio2);
346 return pCur;
347 }
348 Assert(!pCur);
349 }
350 return NULL;
351}
352
353
354/**
355 * Worker for PDMDEVHLPR0::pfnMmio2SetUpContext.
356 *
357 * @returns VBox status code.
358 * @param pGVM The global (ring-0) VM structure.
359 * @param pDevIns The device instance.
360 * @param hMmio2 The MMIO2 region to map into ring-0 address space.
361 * @param offSub The offset into the region.
362 * @param cbSub The size of the mapping, zero meaning all the rest.
363 * @param ppvMapping Where to return the ring-0 mapping address.
364 */
365VMMR0_INT_DECL(int) PGMR0PhysMMIO2MapKernel(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
366 size_t offSub, size_t cbSub, void **ppvMapping)
367{
368 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
369 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
370
371 /*
372 * Translate hRegion into a range pointer.
373 */
374 PPGMREGMMIO2RANGE pFirstRegMmio = pgmR0PhysMmio2Find(pGVM, pDevIns, hMmio2);
375 AssertReturn(pFirstRegMmio, VERR_NOT_FOUND);
376#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
377 uint8_t * const pvR0 = (uint8_t *)pFirstRegMmio->pvR0;
378#else
379 RTR3PTR const pvR3 = pFirstRegMmio->pvR3;
380#endif
381 RTGCPHYS const cbReal = pFirstRegMmio->cbReal;
382 pFirstRegMmio = NULL;
383 ASMCompilerBarrier();
384
385 AssertReturn(offSub < cbReal, VERR_OUT_OF_RANGE);
386 if (cbSub == 0)
387 cbSub = cbReal - offSub;
388 else
389 AssertReturn(cbSub < cbReal && cbSub + offSub <= cbReal, VERR_OUT_OF_RANGE);
390
391 /*
392 * Do the mapping.
393 */
394#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
395 AssertPtr(pvR0);
396 *ppvMapping = pvR0 + offSub;
397 return VINF_SUCCESS;
398#else
399 return SUPR0PageMapKernel(pGVM->pSession, pvR3, (uint32_t)offSub, (uint32_t)cbSub, 0 /*fFlags*/, ppvMapping);
400#endif
401}
402
403
404#ifdef VBOX_WITH_PCI_PASSTHROUGH
405/* Interface sketch. The interface belongs to a global PCI pass-through
406 manager. It shall use the global VM handle, not the user VM handle to
407 store the per-VM info (domain) since that is all ring-0 stuff, thus
408 passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
409 we can discuss the PciRaw code re-organtization when I'm back from
410 vacation.
411
412 I've implemented the initial IOMMU set up below. For things to work
413 reliably, we will probably need add a whole bunch of checks and
414 GPciRawR0GuestPageUpdate call to the PGM code. For the present,
415 assuming nested paging (enforced) and prealloc (enforced), no
416 ballooning (check missing), page sharing (check missing) or live
417 migration (check missing), it might work fine. At least if some
418 VM power-off hook is present and can tear down the IOMMU page tables. */
419
420/**
421 * Tells the global PCI pass-through manager that we are about to set up the
422 * guest page to host page mappings for the specfied VM.
423 *
424 * @returns VBox status code.
425 *
426 * @param pGVM The ring-0 VM structure.
427 */
428VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
429{
430 NOREF(pGVM);
431 return VINF_SUCCESS;
432}
433
434
435/**
436 * Assigns a host page mapping for a guest page.
437 *
438 * This is only used when setting up the mappings, i.e. between
439 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
440 *
441 * @returns VBox status code.
442 * @param pGVM The ring-0 VM structure.
443 * @param GCPhys The address of the guest page (page aligned).
444 * @param HCPhys The address of the host page (page aligned).
445 */
446VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
447{
448 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
449 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
450
451 if (pGVM->rawpci.s.pfnContigMemInfo)
452 /** @todo what do we do on failure? */
453 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_MAP);
454
455 return VINF_SUCCESS;
456}
457
458
459/**
460 * Indicates that the specified guest page doesn't exists but doesn't have host
461 * page mapping we trust PCI pass-through with.
462 *
463 * This is only used when setting up the mappings, i.e. between
464 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
465 *
466 * @returns VBox status code.
467 * @param pGVM The ring-0 VM structure.
468 * @param GCPhys The address of the guest page (page aligned).
469 * @param HCPhys The address of the host page (page aligned).
470 */
471VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
472{
473 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
474
475 if (pGVM->rawpci.s.pfnContigMemInfo)
476 /** @todo what do we do on failure? */
477 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
478
479 return VINF_SUCCESS;
480}
481
482
483/**
484 * Tells the global PCI pass-through manager that we have completed setting up
485 * the guest page to host page mappings for the specfied VM.
486 *
487 * This complements GPciRawR0GuestPageBeginAssignments and will be called even
488 * if some page assignment failed.
489 *
490 * @returns VBox status code.
491 *
492 * @param pGVM The ring-0 VM structure.
493 */
494VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
495{
496 NOREF(pGVM);
497 return VINF_SUCCESS;
498}
499
500
501/**
502 * Tells the global PCI pass-through manager that a guest page mapping has
503 * changed after the initial setup.
504 *
505 * @returns VBox status code.
506 * @param pGVM The ring-0 VM structure.
507 * @param GCPhys The address of the guest page (page aligned).
508 * @param HCPhys The new host page address or NIL_RTHCPHYS if
509 * now unassigned.
510 */
511VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
512{
513 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
514 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
515 NOREF(pGVM);
516 return VINF_SUCCESS;
517}
518
519#endif /* VBOX_WITH_PCI_PASSTHROUGH */
520
521
522/**
523 * Sets up the IOMMU when raw PCI device is enabled.
524 *
525 * @note This is a hack that will probably be remodelled and refined later!
526 *
527 * @returns VBox status code.
528 *
529 * @param pGVM The global (ring-0) VM structure.
530 */
531VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM)
532{
533 int rc = GVMMR0ValidateGVM(pGVM);
534 if (RT_FAILURE(rc))
535 return rc;
536
537#ifdef VBOX_WITH_PCI_PASSTHROUGH
538 if (pGVM->pgm.s.fPciPassthrough)
539 {
540 /*
541 * The Simplistic Approach - Enumerate all the pages and call tell the
542 * IOMMU about each of them.
543 */
544 PGM_LOCK_VOID(pGVM);
545 rc = GPciRawR0GuestPageBeginAssignments(pGVM);
546 if (RT_SUCCESS(rc))
547 {
548 for (PPGMRAMRANGE pRam = pGVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
549 {
550 PPGMPAGE pPage = &pRam->aPages[0];
551 RTGCPHYS GCPhys = pRam->GCPhys;
552 uint32_t cLeft = pRam->cb >> PAGE_SHIFT;
553 while (cLeft-- > 0)
554 {
555 /* Only expose pages that are 100% safe for now. */
556 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
557 && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
558 && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
559 rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
560 else
561 rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
562
563 /* next */
564 pPage++;
565 GCPhys += PAGE_SIZE;
566 }
567 }
568
569 int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
570 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
571 rc = rc2;
572 }
573 PGM_UNLOCK(pGVM);
574 }
575 else
576#endif
577 rc = VERR_NOT_SUPPORTED;
578 return rc;
579}
580
581
582/**
583 * \#PF Handler for nested paging.
584 *
585 * @returns VBox status code (appropriate for trap handling and GC return).
586 * @param pGVM The global (ring-0) VM structure.
587 * @param pGVCpu The global (ring-0) CPU structure of the calling
588 * EMT.
589 * @param enmShwPagingMode Paging mode for the nested page tables.
590 * @param uErr The trap error code.
591 * @param pRegFrame Trap register frame.
592 * @param GCPhysFault The fault address.
593 */
594VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
595 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault)
596{
597 int rc;
598
599 LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip));
600 STAM_PROFILE_START(&pGVCpu->pgm.s.StatRZTrap0e, a);
601 STAM_STATS({ pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = NULL; } );
602
603 /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
604 AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
605 || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
606 ("enmShwPagingMode=%d\n", enmShwPagingMode));
607
608 /* Reserved shouldn't end up here. */
609 Assert(!(uErr & X86_TRAP_PF_RSVD));
610
611#ifdef VBOX_WITH_STATISTICS
612 /*
613 * Error code stats.
614 */
615 if (uErr & X86_TRAP_PF_US)
616 {
617 if (!(uErr & X86_TRAP_PF_P))
618 {
619 if (uErr & X86_TRAP_PF_RW)
620 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentWrite);
621 else
622 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentRead);
623 }
624 else if (uErr & X86_TRAP_PF_RW)
625 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSWrite);
626 else if (uErr & X86_TRAP_PF_RSVD)
627 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSReserved);
628 else if (uErr & X86_TRAP_PF_ID)
629 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNXE);
630 else
631 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSRead);
632 }
633 else
634 { /* Supervisor */
635 if (!(uErr & X86_TRAP_PF_P))
636 {
637 if (uErr & X86_TRAP_PF_RW)
638 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentWrite);
639 else
640 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentRead);
641 }
642 else if (uErr & X86_TRAP_PF_RW)
643 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVWrite);
644 else if (uErr & X86_TRAP_PF_ID)
645 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSNXE);
646 else if (uErr & X86_TRAP_PF_RSVD)
647 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVReserved);
648 }
649#endif
650
651 /*
652 * Call the worker.
653 *
654 * Note! We pretend the guest is in protected mode without paging, so we
655 * can use existing code to build the nested page tables.
656 */
657/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
658 bool fLockTaken = false;
659 switch (enmShwPagingMode)
660 {
661 case PGMMODE_32_BIT:
662 rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
663 break;
664 case PGMMODE_PAE:
665 case PGMMODE_PAE_NX:
666 rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
667 break;
668 case PGMMODE_AMD64:
669 case PGMMODE_AMD64_NX:
670 rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
671 break;
672 case PGMMODE_EPT:
673 rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
674 break;
675 default:
676 AssertFailed();
677 rc = VERR_INVALID_PARAMETER;
678 break;
679 }
680 if (fLockTaken)
681 {
682 PGM_LOCK_ASSERT_OWNER(pGVM);
683 PGM_UNLOCK(pGVM);
684 }
685
686 if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
687 rc = VINF_SUCCESS;
688 /*
689 * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
690 * via its page tables, see @bugref{6043}.
691 */
692 else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
693 || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
694 || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
695 || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
696 {
697 Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip));
698 /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
699 single VCPU VMs though. */
700 rc = VINF_SUCCESS;
701 }
702
703 STAM_STATS({ if (!pGVCpu->pgmr0.s.pStatTrap0eAttributionR0)
704 pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pGVCpu->pgm.s.Stats.StatRZTrap0eTime2Misc; });
705 STAM_PROFILE_STOP_EX(&pGVCpu->pgm.s.Stats.StatRZTrap0e, pGVCpu->pgmr0.s.pStatTrap0eAttributionR0, a);
706 return rc;
707}
708
709
710/**
711 * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
712 * employed for MMIO pages.
713 *
714 * @returns VBox status code (appropriate for trap handling and GC return).
715 * @param pGVM The global (ring-0) VM structure.
716 * @param pGVCpu The global (ring-0) CPU structure of the calling
717 * EMT.
718 * @param enmShwPagingMode Paging mode for the nested page tables.
719 * @param pRegFrame Trap register frame.
720 * @param GCPhysFault The fault address.
721 * @param uErr The error code, UINT32_MAX if not available
722 * (VT-x).
723 */
724VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode,
725 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, uint32_t uErr)
726{
727#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
728 STAM_PROFILE_START(&pGVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
729 VBOXSTRICTRC rc;
730
731 /*
732 * Try lookup the all access physical handler for the address.
733 */
734 PGM_LOCK_VOID(pGVM);
735 PPGMPHYSHANDLER pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
736 PPGMPHYSHANDLERTYPEINT pHandlerType = RT_LIKELY(pHandler) ? PGMPHYSHANDLER_GET_TYPE(pGVM, pHandler) : NULL;
737 if (RT_LIKELY(pHandler && pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE))
738 {
739 /*
740 * If the handle has aliases page or pages that have been temporarily
741 * disabled, we'll have to take a detour to make sure we resync them
742 * to avoid lots of unnecessary exits.
743 */
744 PPGMPAGE pPage;
745 if ( ( pHandler->cAliasedPages
746 || pHandler->cTmpOffPages)
747 && ( (pPage = pgmPhysGetPage(pGVM, GCPhysFault)) == NULL
748 || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
749 )
750 {
751 Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
752 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
753 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
754 PGM_UNLOCK(pGVM);
755 }
756 else
757 {
758 if (pHandlerType->CTX_SUFF(pfnPfHandler))
759 {
760 void *pvUser = pHandler->CTX_SUFF(pvUser);
761 STAM_PROFILE_START(&pHandler->Stat, h);
762 PGM_UNLOCK(pGVM);
763
764 Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->CTX_SUFF(pfnPfHandler), uErr, GCPhysFault, pvUser));
765 rc = pHandlerType->CTX_SUFF(pfnPfHandler)(pGVM, pGVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame,
766 GCPhysFault, GCPhysFault, pvUser);
767
768#ifdef VBOX_WITH_STATISTICS
769 PGM_LOCK_VOID(pGVM);
770 pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
771 if (pHandler)
772 STAM_PROFILE_STOP(&pHandler->Stat, h);
773 PGM_UNLOCK(pGVM);
774#endif
775 }
776 else
777 {
778 PGM_UNLOCK(pGVM);
779 Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
780 rc = VINF_EM_RAW_EMULATE_INSTR;
781 }
782 }
783 }
784 else
785 {
786 /*
787 * Must be out of sync, so do a SyncPage and restart the instruction.
788 *
789 * ASSUMES that ALL handlers are page aligned and covers whole pages
790 * (assumption asserted in PGMHandlerPhysicalRegisterEx).
791 */
792 Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
793 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
794 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
795 PGM_UNLOCK(pGVM);
796 }
797
798 STAM_PROFILE_STOP(&pGVCpu->pgm.s.Stats.StatR0NpMiscfg, a);
799 return rc;
800
801#else
802 AssertLogRelFailed();
803 return VERR_PGM_NOT_USED_IN_MODE;
804#endif
805}
806
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette