VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0.cpp@ 92296

Last change on this file since 92296 was 92248, checked in by vboxsync, 3 years ago

VMM/GMM: Removed all the legacy mode code (disabled everywhere since r146982). bugref:10093

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 29.0 KB
Line 
1/* $Id: PGMR0.cpp 92248 2021-11-06 15:21:57Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Ring-0.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/rawpci.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/gmm.h>
27#include "PGMInternal.h"
28#include <VBox/vmm/pdmdev.h>
29#include <VBox/vmm/vmcc.h>
30#include <VBox/vmm/gvm.h>
31#include "PGMInline.h"
32#include <VBox/log.h>
33#include <VBox/err.h>
34#include <iprt/assert.h>
35#include <iprt/mem.h>
36#include <iprt/memobj.h>
37
38
39/*
40 * Instantiate the ring-0 header/code templates.
41 */
42/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
43#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
44#include "PGMR0Bth.h"
45#undef PGM_BTH_NAME
46
47#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
48#include "PGMR0Bth.h"
49#undef PGM_BTH_NAME
50
51#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
52#include "PGMR0Bth.h"
53#undef PGM_BTH_NAME
54
55#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
56#include "PGMR0Bth.h"
57#undef PGM_BTH_NAME
58
59
60/**
61 * Initializes the per-VM data for the PGM.
62 *
63 * This is called from under the GVMM lock, so it should only initialize the
64 * data so PGMR0CleanupVM and others will work smoothly.
65 *
66 * @returns VBox status code.
67 * @param pGVM Pointer to the global VM structure.
68 */
69VMMR0_INT_DECL(int) PGMR0InitPerVMData(PGVM pGVM)
70{
71 AssertCompile(sizeof(pGVM->pgm.s) <= sizeof(pGVM->pgm.padding));
72 AssertCompile(sizeof(pGVM->pgmr0.s) <= sizeof(pGVM->pgmr0.padding));
73
74 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
75 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
76 {
77 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
78 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
79 }
80 return RTCritSectInit(&pGVM->pgmr0.s.PoolGrowCritSect);
81}
82
83
84/**
85 * Initalize the per-VM PGM for ring-0.
86 *
87 * @returns VBox status code.
88 * @param pGVM Pointer to the global VM structure.
89 */
90VMMR0_INT_DECL(int) PGMR0InitVM(PGVM pGVM)
91{
92 RT_NOREF(pGVM);
93 /* Was used for DynMap init */
94 return VINF_SUCCESS;
95}
96
97
98/**
99 * Cleans up any loose ends before the GVM structure is destroyed.
100 */
101VMMR0_INT_DECL(void) PGMR0CleanupVM(PGVM pGVM)
102{
103 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
104 {
105 if (pGVM->pgmr0.s.ahPoolMapObjs[i] != NIL_RTR0MEMOBJ)
106 {
107 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMapObjs[i], true /*fFreeMappings*/);
108 AssertRC(rc);
109 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
110 }
111
112 if (pGVM->pgmr0.s.ahPoolMemObjs[i] != NIL_RTR0MEMOBJ)
113 {
114 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMemObjs[i], true /*fFreeMappings*/);
115 AssertRC(rc);
116 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
117 }
118 }
119
120 if (RTCritSectIsInitialized(&pGVM->pgmr0.s.PoolGrowCritSect))
121 RTCritSectDelete(&pGVM->pgmr0.s.PoolGrowCritSect);
122}
123
124
125/**
126 * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
127 *
128 * @returns The following VBox status codes.
129 * @retval VINF_SUCCESS on success. FF cleared.
130 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
131 *
132 * @param pGVM The global (ring-0) VM structure.
133 * @param idCpu The ID of the calling EMT.
134 *
135 * @thread EMT(idCpu)
136 *
137 * @remarks Must be called from within the PGM critical section. The caller
138 * must clear the new pages.
139 */
140VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu)
141{
142 /*
143 * Validate inputs.
144 */
145 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
146 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
147 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
148
149 /*
150 * Check for error injection.
151 */
152 if (RT_UNLIKELY(pGVM->pgm.s.fErrInjHandyPages))
153 return VERR_NO_MEMORY;
154
155 /*
156 * Try allocate a full set of handy pages.
157 */
158 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
159 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
160 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
161 if (!cPages)
162 return VINF_SUCCESS;
163 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
164 if (RT_SUCCESS(rc))
165 {
166#ifdef VBOX_STRICT
167 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
168 {
169 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
170 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
171 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
172 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
173 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
174 }
175#endif
176
177 pGVM->pgm.s.cHandyPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages);
178 }
179 else
180 {
181 if ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
182 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
183 && iFirst < PGM_HANDY_PAGES_MIN)
184 {
185
186#ifdef VBOX_STRICT
187 /* We're ASSUMING that GMM has updated all the entires before failing us. */
188 uint32_t i;
189 for (i = iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
190 {
191 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
192 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
193 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
194 }
195#endif
196
197 /*
198 * Reduce the number of pages until we hit the minimum limit.
199 */
200 do
201 {
202 cPages >>= 1;
203 if (cPages + iFirst < PGM_HANDY_PAGES_MIN)
204 cPages = PGM_HANDY_PAGES_MIN - iFirst;
205 rc = GMMR0AllocateHandyPages(pGVM, idCpu, 0, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
206 } while ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
207 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
208 && cPages + iFirst > PGM_HANDY_PAGES_MIN);
209 if (RT_SUCCESS(rc))
210 {
211#ifdef VBOX_STRICT
212 i = iFirst + cPages;
213 while (i-- > 0)
214 {
215 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
216 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
217 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
218 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
219 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
220 }
221
222 for (i = cPages + iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
223 {
224 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
225 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
226 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
227 }
228#endif
229
230 pGVM->pgm.s.cHandyPages = iFirst + cPages;
231 }
232 }
233
234 if (RT_FAILURE(rc))
235 {
236 LogRel(("PGMR0PhysAllocateHandyPages: rc=%Rrc iFirst=%d cPages=%d\n", rc, iFirst, cPages));
237 VM_FF_SET(pGVM, VM_FF_PGM_NO_MEMORY);
238 }
239 }
240
241 LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
242 return rc;
243}
244
245
246/**
247 * Flushes any changes pending in the handy page array.
248 *
249 * It is very important that this gets done when page sharing is enabled.
250 *
251 * @returns The following VBox status codes.
252 * @retval VINF_SUCCESS on success. FF cleared.
253 *
254 * @param pGVM The global (ring-0) VM structure.
255 * @param idCpu The ID of the calling EMT.
256 *
257 * @thread EMT(idCpu)
258 *
259 * @remarks Must be called from within the PGM critical section.
260 */
261VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, VMCPUID idCpu)
262{
263 /*
264 * Validate inputs.
265 */
266 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
267 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
268 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
269
270 /*
271 * Try allocate a full set of handy pages.
272 */
273 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
274 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
275 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
276 if (!cPages)
277 return VINF_SUCCESS;
278 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, 0, &pGVM->pgm.s.aHandyPages[iFirst]);
279
280 LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
281 return rc;
282}
283
284
285/**
286 * Worker function for PGMR3PhysAllocateLargeHandyPage
287 *
288 * @returns The following VBox status codes.
289 * @retval VINF_SUCCESS on success.
290 * @retval VINF_EM_NO_MEMORY if we're out of memory.
291 *
292 * @param pGVM The global (ring-0) VM structure.
293 * @param idCpu The ID of the calling EMT.
294 *
295 * @thread EMT(idCpu)
296 *
297 * @remarks Must be called from within the PGM critical section. The caller
298 * must clear the new pages.
299 */
300VMMR0_INT_DECL(int) PGMR0PhysAllocateLargeHandyPage(PGVM pGVM, VMCPUID idCpu)
301{
302 /*
303 * Validate inputs.
304 */
305 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
306 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
307 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
308 Assert(!pGVM->pgm.s.cLargeHandyPages);
309
310 /*
311 * Do the job.
312 */
313 int rc = GMMR0AllocateLargePage(pGVM, idCpu, _2M,
314 &pGVM->pgm.s.aLargeHandyPage[0].idPage,
315 &pGVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys);
316 if (RT_SUCCESS(rc))
317 pGVM->pgm.s.cLargeHandyPages = 1;
318
319 return rc;
320}
321
322
323/**
324 * Locate a MMIO2 range.
325 *
326 * @returns Pointer to the MMIO2 range.
327 * @param pGVM The global (ring-0) VM structure.
328 * @param pDevIns The device instance owning the region.
329 * @param hMmio2 Handle to look up.
330 */
331DECLINLINE(PPGMREGMMIO2RANGE) pgmR0PhysMmio2Find(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
332{
333 /*
334 * We use the lookup table here as list walking is tedious in ring-0 when using
335 * ring-3 pointers and this probably will require some kind of refactoring anyway.
336 */
337 if (hMmio2 <= RT_ELEMENTS(pGVM->pgm.s.apMmio2RangesR0) && hMmio2 != 0)
338 {
339 PPGMREGMMIO2RANGE pCur = pGVM->pgm.s.apMmio2RangesR0[hMmio2 - 1];
340 if (pCur && pCur->pDevInsR3 == pDevIns->pDevInsForR3)
341 {
342 Assert(pCur->idMmio2 == hMmio2);
343 return pCur;
344 }
345 Assert(!pCur);
346 }
347 return NULL;
348}
349
350
351/**
352 * Worker for PDMDEVHLPR0::pfnMmio2SetUpContext.
353 *
354 * @returns VBox status code.
355 * @param pGVM The global (ring-0) VM structure.
356 * @param pDevIns The device instance.
357 * @param hMmio2 The MMIO2 region to map into ring-0 address space.
358 * @param offSub The offset into the region.
359 * @param cbSub The size of the mapping, zero meaning all the rest.
360 * @param ppvMapping Where to return the ring-0 mapping address.
361 */
362VMMR0_INT_DECL(int) PGMR0PhysMMIO2MapKernel(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
363 size_t offSub, size_t cbSub, void **ppvMapping)
364{
365 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
366 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
367
368 /*
369 * Translate hRegion into a range pointer.
370 */
371 PPGMREGMMIO2RANGE pFirstRegMmio = pgmR0PhysMmio2Find(pGVM, pDevIns, hMmio2);
372 AssertReturn(pFirstRegMmio, VERR_NOT_FOUND);
373#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
374 uint8_t * const pvR0 = (uint8_t *)pFirstRegMmio->pvR0;
375#else
376 RTR3PTR const pvR3 = pFirstRegMmio->pvR3;
377#endif
378 RTGCPHYS const cbReal = pFirstRegMmio->cbReal;
379 pFirstRegMmio = NULL;
380 ASMCompilerBarrier();
381
382 AssertReturn(offSub < cbReal, VERR_OUT_OF_RANGE);
383 if (cbSub == 0)
384 cbSub = cbReal - offSub;
385 else
386 AssertReturn(cbSub < cbReal && cbSub + offSub <= cbReal, VERR_OUT_OF_RANGE);
387
388 /*
389 * Do the mapping.
390 */
391#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
392 AssertPtr(pvR0);
393 *ppvMapping = pvR0 + offSub;
394 return VINF_SUCCESS;
395#else
396 return SUPR0PageMapKernel(pGVM->pSession, pvR3, (uint32_t)offSub, (uint32_t)cbSub, 0 /*fFlags*/, ppvMapping);
397#endif
398}
399
400
401#ifdef VBOX_WITH_PCI_PASSTHROUGH
402/* Interface sketch. The interface belongs to a global PCI pass-through
403 manager. It shall use the global VM handle, not the user VM handle to
404 store the per-VM info (domain) since that is all ring-0 stuff, thus
405 passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
406 we can discuss the PciRaw code re-organtization when I'm back from
407 vacation.
408
409 I've implemented the initial IOMMU set up below. For things to work
410 reliably, we will probably need add a whole bunch of checks and
411 GPciRawR0GuestPageUpdate call to the PGM code. For the present,
412 assuming nested paging (enforced) and prealloc (enforced), no
413 ballooning (check missing), page sharing (check missing) or live
414 migration (check missing), it might work fine. At least if some
415 VM power-off hook is present and can tear down the IOMMU page tables. */
416
417/**
418 * Tells the global PCI pass-through manager that we are about to set up the
419 * guest page to host page mappings for the specfied VM.
420 *
421 * @returns VBox status code.
422 *
423 * @param pGVM The ring-0 VM structure.
424 */
425VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
426{
427 NOREF(pGVM);
428 return VINF_SUCCESS;
429}
430
431
432/**
433 * Assigns a host page mapping for a guest page.
434 *
435 * This is only used when setting up the mappings, i.e. between
436 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
437 *
438 * @returns VBox status code.
439 * @param pGVM The ring-0 VM structure.
440 * @param GCPhys The address of the guest page (page aligned).
441 * @param HCPhys The address of the host page (page aligned).
442 */
443VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
444{
445 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
446 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
447
448 if (pGVM->rawpci.s.pfnContigMemInfo)
449 /** @todo what do we do on failure? */
450 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_MAP);
451
452 return VINF_SUCCESS;
453}
454
455
456/**
457 * Indicates that the specified guest page doesn't exists but doesn't have host
458 * page mapping we trust PCI pass-through with.
459 *
460 * This is only used when setting up the mappings, i.e. between
461 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
462 *
463 * @returns VBox status code.
464 * @param pGVM The ring-0 VM structure.
465 * @param GCPhys The address of the guest page (page aligned).
466 * @param HCPhys The address of the host page (page aligned).
467 */
468VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
469{
470 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
471
472 if (pGVM->rawpci.s.pfnContigMemInfo)
473 /** @todo what do we do on failure? */
474 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
475
476 return VINF_SUCCESS;
477}
478
479
480/**
481 * Tells the global PCI pass-through manager that we have completed setting up
482 * the guest page to host page mappings for the specfied VM.
483 *
484 * This complements GPciRawR0GuestPageBeginAssignments and will be called even
485 * if some page assignment failed.
486 *
487 * @returns VBox status code.
488 *
489 * @param pGVM The ring-0 VM structure.
490 */
491VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
492{
493 NOREF(pGVM);
494 return VINF_SUCCESS;
495}
496
497
498/**
499 * Tells the global PCI pass-through manager that a guest page mapping has
500 * changed after the initial setup.
501 *
502 * @returns VBox status code.
503 * @param pGVM The ring-0 VM structure.
504 * @param GCPhys The address of the guest page (page aligned).
505 * @param HCPhys The new host page address or NIL_RTHCPHYS if
506 * now unassigned.
507 */
508VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
509{
510 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
511 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
512 NOREF(pGVM);
513 return VINF_SUCCESS;
514}
515
516#endif /* VBOX_WITH_PCI_PASSTHROUGH */
517
518
519/**
520 * Sets up the IOMMU when raw PCI device is enabled.
521 *
522 * @note This is a hack that will probably be remodelled and refined later!
523 *
524 * @returns VBox status code.
525 *
526 * @param pGVM The global (ring-0) VM structure.
527 */
528VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM)
529{
530 int rc = GVMMR0ValidateGVM(pGVM);
531 if (RT_FAILURE(rc))
532 return rc;
533
534#ifdef VBOX_WITH_PCI_PASSTHROUGH
535 if (pGVM->pgm.s.fPciPassthrough)
536 {
537 /*
538 * The Simplistic Approach - Enumerate all the pages and call tell the
539 * IOMMU about each of them.
540 */
541 PGM_LOCK_VOID(pGVM);
542 rc = GPciRawR0GuestPageBeginAssignments(pGVM);
543 if (RT_SUCCESS(rc))
544 {
545 for (PPGMRAMRANGE pRam = pGVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
546 {
547 PPGMPAGE pPage = &pRam->aPages[0];
548 RTGCPHYS GCPhys = pRam->GCPhys;
549 uint32_t cLeft = pRam->cb >> PAGE_SHIFT;
550 while (cLeft-- > 0)
551 {
552 /* Only expose pages that are 100% safe for now. */
553 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
554 && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
555 && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
556 rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
557 else
558 rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
559
560 /* next */
561 pPage++;
562 GCPhys += PAGE_SIZE;
563 }
564 }
565
566 int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
567 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
568 rc = rc2;
569 }
570 PGM_UNLOCK(pGVM);
571 }
572 else
573#endif
574 rc = VERR_NOT_SUPPORTED;
575 return rc;
576}
577
578
579/**
580 * \#PF Handler for nested paging.
581 *
582 * @returns VBox status code (appropriate for trap handling and GC return).
583 * @param pGVM The global (ring-0) VM structure.
584 * @param pGVCpu The global (ring-0) CPU structure of the calling
585 * EMT.
586 * @param enmShwPagingMode Paging mode for the nested page tables.
587 * @param uErr The trap error code.
588 * @param pRegFrame Trap register frame.
589 * @param GCPhysFault The fault address.
590 */
591VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
592 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault)
593{
594 int rc;
595
596 LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip));
597 STAM_PROFILE_START(&pGVCpu->pgm.s.StatRZTrap0e, a);
598 STAM_STATS({ pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = NULL; } );
599
600 /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
601 AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
602 || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
603 ("enmShwPagingMode=%d\n", enmShwPagingMode));
604
605 /* Reserved shouldn't end up here. */
606 Assert(!(uErr & X86_TRAP_PF_RSVD));
607
608#ifdef VBOX_WITH_STATISTICS
609 /*
610 * Error code stats.
611 */
612 if (uErr & X86_TRAP_PF_US)
613 {
614 if (!(uErr & X86_TRAP_PF_P))
615 {
616 if (uErr & X86_TRAP_PF_RW)
617 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentWrite);
618 else
619 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentRead);
620 }
621 else if (uErr & X86_TRAP_PF_RW)
622 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSWrite);
623 else if (uErr & X86_TRAP_PF_RSVD)
624 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSReserved);
625 else if (uErr & X86_TRAP_PF_ID)
626 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNXE);
627 else
628 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSRead);
629 }
630 else
631 { /* Supervisor */
632 if (!(uErr & X86_TRAP_PF_P))
633 {
634 if (uErr & X86_TRAP_PF_RW)
635 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentWrite);
636 else
637 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentRead);
638 }
639 else if (uErr & X86_TRAP_PF_RW)
640 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVWrite);
641 else if (uErr & X86_TRAP_PF_ID)
642 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSNXE);
643 else if (uErr & X86_TRAP_PF_RSVD)
644 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVReserved);
645 }
646#endif
647
648 /*
649 * Call the worker.
650 *
651 * Note! We pretend the guest is in protected mode without paging, so we
652 * can use existing code to build the nested page tables.
653 */
654/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
655 bool fLockTaken = false;
656 switch (enmShwPagingMode)
657 {
658 case PGMMODE_32_BIT:
659 rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
660 break;
661 case PGMMODE_PAE:
662 case PGMMODE_PAE_NX:
663 rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
664 break;
665 case PGMMODE_AMD64:
666 case PGMMODE_AMD64_NX:
667 rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
668 break;
669 case PGMMODE_EPT:
670 rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
671 break;
672 default:
673 AssertFailed();
674 rc = VERR_INVALID_PARAMETER;
675 break;
676 }
677 if (fLockTaken)
678 {
679 PGM_LOCK_ASSERT_OWNER(pGVM);
680 PGM_UNLOCK(pGVM);
681 }
682
683 if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
684 rc = VINF_SUCCESS;
685 /*
686 * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
687 * via its page tables, see @bugref{6043}.
688 */
689 else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
690 || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
691 || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
692 || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
693 {
694 Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip));
695 /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
696 single VCPU VMs though. */
697 rc = VINF_SUCCESS;
698 }
699
700 STAM_STATS({ if (!pGVCpu->pgmr0.s.pStatTrap0eAttributionR0)
701 pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pGVCpu->pgm.s.Stats.StatRZTrap0eTime2Misc; });
702 STAM_PROFILE_STOP_EX(&pGVCpu->pgm.s.Stats.StatRZTrap0e, pGVCpu->pgmr0.s.pStatTrap0eAttributionR0, a);
703 return rc;
704}
705
706
707/**
708 * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
709 * employed for MMIO pages.
710 *
711 * @returns VBox status code (appropriate for trap handling and GC return).
712 * @param pGVM The global (ring-0) VM structure.
713 * @param pGVCpu The global (ring-0) CPU structure of the calling
714 * EMT.
715 * @param enmShwPagingMode Paging mode for the nested page tables.
716 * @param pRegFrame Trap register frame.
717 * @param GCPhysFault The fault address.
718 * @param uErr The error code, UINT32_MAX if not available
719 * (VT-x).
720 */
721VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode,
722 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, uint32_t uErr)
723{
724#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
725 STAM_PROFILE_START(&pGVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
726 VBOXSTRICTRC rc;
727
728 /*
729 * Try lookup the all access physical handler for the address.
730 */
731 PGM_LOCK_VOID(pGVM);
732 PPGMPHYSHANDLER pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
733 PPGMPHYSHANDLERTYPEINT pHandlerType = RT_LIKELY(pHandler) ? PGMPHYSHANDLER_GET_TYPE(pGVM, pHandler) : NULL;
734 if (RT_LIKELY(pHandler && pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE))
735 {
736 /*
737 * If the handle has aliases page or pages that have been temporarily
738 * disabled, we'll have to take a detour to make sure we resync them
739 * to avoid lots of unnecessary exits.
740 */
741 PPGMPAGE pPage;
742 if ( ( pHandler->cAliasedPages
743 || pHandler->cTmpOffPages)
744 && ( (pPage = pgmPhysGetPage(pGVM, GCPhysFault)) == NULL
745 || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
746 )
747 {
748 Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
749 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
750 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
751 PGM_UNLOCK(pGVM);
752 }
753 else
754 {
755 if (pHandlerType->CTX_SUFF(pfnPfHandler))
756 {
757 void *pvUser = pHandler->CTX_SUFF(pvUser);
758 STAM_PROFILE_START(&pHandler->Stat, h);
759 PGM_UNLOCK(pGVM);
760
761 Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->CTX_SUFF(pfnPfHandler), uErr, GCPhysFault, pvUser));
762 rc = pHandlerType->CTX_SUFF(pfnPfHandler)(pGVM, pGVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame,
763 GCPhysFault, GCPhysFault, pvUser);
764
765#ifdef VBOX_WITH_STATISTICS
766 PGM_LOCK_VOID(pGVM);
767 pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
768 if (pHandler)
769 STAM_PROFILE_STOP(&pHandler->Stat, h);
770 PGM_UNLOCK(pGVM);
771#endif
772 }
773 else
774 {
775 PGM_UNLOCK(pGVM);
776 Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
777 rc = VINF_EM_RAW_EMULATE_INSTR;
778 }
779 }
780 }
781 else
782 {
783 /*
784 * Must be out of sync, so do a SyncPage and restart the instruction.
785 *
786 * ASSUMES that ALL handlers are page aligned and covers whole pages
787 * (assumption asserted in PGMHandlerPhysicalRegisterEx).
788 */
789 Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
790 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
791 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
792 PGM_UNLOCK(pGVM);
793 }
794
795 STAM_PROFILE_STOP(&pGVCpu->pgm.s.Stats.StatR0NpMiscfg, a);
796 return rc;
797
798#else
799 AssertLogRelFailed();
800 return VERR_PGM_NOT_USED_IN_MODE;
801#endif
802}
803
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette