VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0.cpp@ 92216

Last change on this file since 92216 was 92157, checked in by vboxsync, 3 years ago

VMM/PGMPhysMmio2: Removed obsolete PGMREGMMIO2RANGE_F_MMIO2 flag (it's always MMIO2 now for a good while). bugref:10122

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 29.1 KB
Line 
1/* $Id: PGMR0.cpp 92157 2021-10-29 22:03:51Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Ring-0.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/rawpci.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/gmm.h>
27#include "PGMInternal.h"
28#include <VBox/vmm/pdmdev.h>
29#include <VBox/vmm/vmcc.h>
30#include <VBox/vmm/gvm.h>
31#include "PGMInline.h"
32#include <VBox/log.h>
33#include <VBox/err.h>
34#include <iprt/assert.h>
35#include <iprt/mem.h>
36#include <iprt/memobj.h>
37
38
39/*
40 * Instantiate the ring-0 header/code templates.
41 */
42/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
43#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
44#include "PGMR0Bth.h"
45#undef PGM_BTH_NAME
46
47#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
48#include "PGMR0Bth.h"
49#undef PGM_BTH_NAME
50
51#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
52#include "PGMR0Bth.h"
53#undef PGM_BTH_NAME
54
55#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
56#include "PGMR0Bth.h"
57#undef PGM_BTH_NAME
58
59
60/**
61 * Initializes the per-VM data for the PGM.
62 *
63 * This is called from under the GVMM lock, so it should only initialize the
64 * data so PGMR0CleanupVM and others will work smoothly.
65 *
66 * @returns VBox status code.
67 * @param pGVM Pointer to the global VM structure.
68 */
69VMMR0_INT_DECL(int) PGMR0InitPerVMData(PGVM pGVM)
70{
71 AssertCompile(sizeof(pGVM->pgm.s) <= sizeof(pGVM->pgm.padding));
72 AssertCompile(sizeof(pGVM->pgmr0.s) <= sizeof(pGVM->pgmr0.padding));
73
74 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
75 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
76 {
77 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
78 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
79 }
80 return RTCritSectInit(&pGVM->pgmr0.s.PoolGrowCritSect);
81}
82
83
84/**
85 * Initalize the per-VM PGM for ring-0.
86 *
87 * @returns VBox status code.
88 * @param pGVM Pointer to the global VM structure.
89 */
90VMMR0_INT_DECL(int) PGMR0InitVM(PGVM pGVM)
91{
92 RT_NOREF(pGVM);
93 /* Was used for DynMap init */
94 return VINF_SUCCESS;
95}
96
97
98/**
99 * Cleans up any loose ends before the GVM structure is destroyed.
100 */
101VMMR0_INT_DECL(void) PGMR0CleanupVM(PGVM pGVM)
102{
103 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
104 {
105 if (pGVM->pgmr0.s.ahPoolMapObjs[i] != NIL_RTR0MEMOBJ)
106 {
107 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMapObjs[i], true /*fFreeMappings*/);
108 AssertRC(rc);
109 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
110 }
111
112 if (pGVM->pgmr0.s.ahPoolMemObjs[i] != NIL_RTR0MEMOBJ)
113 {
114 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMemObjs[i], true /*fFreeMappings*/);
115 AssertRC(rc);
116 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
117 }
118 }
119
120 if (RTCritSectIsInitialized(&pGVM->pgmr0.s.PoolGrowCritSect))
121 RTCritSectDelete(&pGVM->pgmr0.s.PoolGrowCritSect);
122}
123
124
125/**
126 * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
127 *
128 * @returns The following VBox status codes.
129 * @retval VINF_SUCCESS on success. FF cleared.
130 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
131 *
132 * @param pGVM The global (ring-0) VM structure.
133 * @param idCpu The ID of the calling EMT.
134 *
135 * @thread EMT(idCpu)
136 *
137 * @remarks Must be called from within the PGM critical section. The caller
138 * must clear the new pages.
139 */
140VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu)
141{
142 /*
143 * Validate inputs.
144 */
145 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
146 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
147 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
148
149 /*
150 * Check for error injection.
151 */
152 if (RT_UNLIKELY(pGVM->pgm.s.fErrInjHandyPages))
153 return VERR_NO_MEMORY;
154
155 /*
156 * Try allocate a full set of handy pages.
157 */
158 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
159 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
160 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
161 if (!cPages)
162 return VINF_SUCCESS;
163 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
164 if (RT_SUCCESS(rc))
165 {
166#ifdef VBOX_STRICT
167 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
168 {
169 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
170 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
171 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
172 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
173 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
174 }
175#endif
176
177 pGVM->pgm.s.cHandyPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages);
178 }
179 else if (rc != VERR_GMM_SEED_ME)
180 {
181 if ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
182 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
183 && iFirst < PGM_HANDY_PAGES_MIN)
184 {
185
186#ifdef VBOX_STRICT
187 /* We're ASSUMING that GMM has updated all the entires before failing us. */
188 uint32_t i;
189 for (i = iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
190 {
191 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
192 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
193 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
194 }
195#endif
196
197 /*
198 * Reduce the number of pages until we hit the minimum limit.
199 */
200 do
201 {
202 cPages >>= 1;
203 if (cPages + iFirst < PGM_HANDY_PAGES_MIN)
204 cPages = PGM_HANDY_PAGES_MIN - iFirst;
205 rc = GMMR0AllocateHandyPages(pGVM, idCpu, 0, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
206 } while ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
207 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
208 && cPages + iFirst > PGM_HANDY_PAGES_MIN);
209 if (RT_SUCCESS(rc))
210 {
211#ifdef VBOX_STRICT
212 i = iFirst + cPages;
213 while (i-- > 0)
214 {
215 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
216 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
217 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
218 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
219 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
220 }
221
222 for (i = cPages + iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
223 {
224 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
225 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
226 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
227 }
228#endif
229
230 pGVM->pgm.s.cHandyPages = iFirst + cPages;
231 }
232 }
233
234 if (RT_FAILURE(rc) && rc != VERR_GMM_SEED_ME)
235 {
236 LogRel(("PGMR0PhysAllocateHandyPages: rc=%Rrc iFirst=%d cPages=%d\n", rc, iFirst, cPages));
237 VM_FF_SET(pGVM, VM_FF_PGM_NO_MEMORY);
238 }
239 }
240
241
242 LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
243 return rc;
244}
245
246
247/**
248 * Flushes any changes pending in the handy page array.
249 *
250 * It is very important that this gets done when page sharing is enabled.
251 *
252 * @returns The following VBox status codes.
253 * @retval VINF_SUCCESS on success. FF cleared.
254 *
255 * @param pGVM The global (ring-0) VM structure.
256 * @param idCpu The ID of the calling EMT.
257 *
258 * @thread EMT(idCpu)
259 *
260 * @remarks Must be called from within the PGM critical section.
261 */
262VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, VMCPUID idCpu)
263{
264 /*
265 * Validate inputs.
266 */
267 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
268 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
269 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
270
271 /*
272 * Try allocate a full set of handy pages.
273 */
274 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
275 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
276 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
277 if (!cPages)
278 return VINF_SUCCESS;
279 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, 0, &pGVM->pgm.s.aHandyPages[iFirst]);
280
281 LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
282 return rc;
283}
284
285
286/**
287 * Worker function for PGMR3PhysAllocateLargeHandyPage
288 *
289 * @returns The following VBox status codes.
290 * @retval VINF_SUCCESS on success.
291 * @retval VINF_EM_NO_MEMORY if we're out of memory.
292 *
293 * @param pGVM The global (ring-0) VM structure.
294 * @param idCpu The ID of the calling EMT.
295 *
296 * @thread EMT(idCpu)
297 *
298 * @remarks Must be called from within the PGM critical section. The caller
299 * must clear the new pages.
300 */
301VMMR0_INT_DECL(int) PGMR0PhysAllocateLargeHandyPage(PGVM pGVM, VMCPUID idCpu)
302{
303 /*
304 * Validate inputs.
305 */
306 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
307 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
308 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
309 Assert(!pGVM->pgm.s.cLargeHandyPages);
310
311 /*
312 * Do the job.
313 */
314 int rc = GMMR0AllocateLargePage(pGVM, idCpu, _2M,
315 &pGVM->pgm.s.aLargeHandyPage[0].idPage,
316 &pGVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys);
317 if (RT_SUCCESS(rc))
318 pGVM->pgm.s.cLargeHandyPages = 1;
319
320 return rc;
321}
322
323
324/**
325 * Locate a MMIO2 range.
326 *
327 * @returns Pointer to the MMIO2 range.
328 * @param pGVM The global (ring-0) VM structure.
329 * @param pDevIns The device instance owning the region.
330 * @param hMmio2 Handle to look up.
331 */
332DECLINLINE(PPGMREGMMIO2RANGE) pgmR0PhysMmio2Find(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
333{
334 /*
335 * We use the lookup table here as list walking is tedious in ring-0 when using
336 * ring-3 pointers and this probably will require some kind of refactoring anyway.
337 */
338 if (hMmio2 <= RT_ELEMENTS(pGVM->pgm.s.apMmio2RangesR0) && hMmio2 != 0)
339 {
340 PPGMREGMMIO2RANGE pCur = pGVM->pgm.s.apMmio2RangesR0[hMmio2 - 1];
341 if (pCur && pCur->pDevInsR3 == pDevIns->pDevInsForR3)
342 {
343 Assert(pCur->idMmio2 == hMmio2);
344 return pCur;
345 }
346 Assert(!pCur);
347 }
348 return NULL;
349}
350
351
352/**
353 * Worker for PDMDEVHLPR0::pfnMmio2SetUpContext.
354 *
355 * @returns VBox status code.
356 * @param pGVM The global (ring-0) VM structure.
357 * @param pDevIns The device instance.
358 * @param hMmio2 The MMIO2 region to map into ring-0 address space.
359 * @param offSub The offset into the region.
360 * @param cbSub The size of the mapping, zero meaning all the rest.
361 * @param ppvMapping Where to return the ring-0 mapping address.
362 */
363VMMR0_INT_DECL(int) PGMR0PhysMMIO2MapKernel(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
364 size_t offSub, size_t cbSub, void **ppvMapping)
365{
366 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
367 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
368
369 /*
370 * Translate hRegion into a range pointer.
371 */
372 PPGMREGMMIO2RANGE pFirstRegMmio = pgmR0PhysMmio2Find(pGVM, pDevIns, hMmio2);
373 AssertReturn(pFirstRegMmio, VERR_NOT_FOUND);
374#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
375 uint8_t * const pvR0 = (uint8_t *)pFirstRegMmio->pvR0;
376#else
377 RTR3PTR const pvR3 = pFirstRegMmio->pvR3;
378#endif
379 RTGCPHYS const cbReal = pFirstRegMmio->cbReal;
380 pFirstRegMmio = NULL;
381 ASMCompilerBarrier();
382
383 AssertReturn(offSub < cbReal, VERR_OUT_OF_RANGE);
384 if (cbSub == 0)
385 cbSub = cbReal - offSub;
386 else
387 AssertReturn(cbSub < cbReal && cbSub + offSub <= cbReal, VERR_OUT_OF_RANGE);
388
389 /*
390 * Do the mapping.
391 */
392#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
393 AssertPtr(pvR0);
394 *ppvMapping = pvR0 + offSub;
395 return VINF_SUCCESS;
396#else
397 return SUPR0PageMapKernel(pGVM->pSession, pvR3, (uint32_t)offSub, (uint32_t)cbSub, 0 /*fFlags*/, ppvMapping);
398#endif
399}
400
401
402#ifdef VBOX_WITH_PCI_PASSTHROUGH
403/* Interface sketch. The interface belongs to a global PCI pass-through
404 manager. It shall use the global VM handle, not the user VM handle to
405 store the per-VM info (domain) since that is all ring-0 stuff, thus
406 passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
407 we can discuss the PciRaw code re-organtization when I'm back from
408 vacation.
409
410 I've implemented the initial IOMMU set up below. For things to work
411 reliably, we will probably need add a whole bunch of checks and
412 GPciRawR0GuestPageUpdate call to the PGM code. For the present,
413 assuming nested paging (enforced) and prealloc (enforced), no
414 ballooning (check missing), page sharing (check missing) or live
415 migration (check missing), it might work fine. At least if some
416 VM power-off hook is present and can tear down the IOMMU page tables. */
417
418/**
419 * Tells the global PCI pass-through manager that we are about to set up the
420 * guest page to host page mappings for the specfied VM.
421 *
422 * @returns VBox status code.
423 *
424 * @param pGVM The ring-0 VM structure.
425 */
426VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
427{
428 NOREF(pGVM);
429 return VINF_SUCCESS;
430}
431
432
433/**
434 * Assigns a host page mapping for a guest page.
435 *
436 * This is only used when setting up the mappings, i.e. between
437 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
438 *
439 * @returns VBox status code.
440 * @param pGVM The ring-0 VM structure.
441 * @param GCPhys The address of the guest page (page aligned).
442 * @param HCPhys The address of the host page (page aligned).
443 */
444VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
445{
446 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
447 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
448
449 if (pGVM->rawpci.s.pfnContigMemInfo)
450 /** @todo what do we do on failure? */
451 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_MAP);
452
453 return VINF_SUCCESS;
454}
455
456
457/**
458 * Indicates that the specified guest page doesn't exists but doesn't have host
459 * page mapping we trust PCI pass-through with.
460 *
461 * This is only used when setting up the mappings, i.e. between
462 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
463 *
464 * @returns VBox status code.
465 * @param pGVM The ring-0 VM structure.
466 * @param GCPhys The address of the guest page (page aligned).
467 * @param HCPhys The address of the host page (page aligned).
468 */
469VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
470{
471 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
472
473 if (pGVM->rawpci.s.pfnContigMemInfo)
474 /** @todo what do we do on failure? */
475 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
476
477 return VINF_SUCCESS;
478}
479
480
481/**
482 * Tells the global PCI pass-through manager that we have completed setting up
483 * the guest page to host page mappings for the specfied VM.
484 *
485 * This complements GPciRawR0GuestPageBeginAssignments and will be called even
486 * if some page assignment failed.
487 *
488 * @returns VBox status code.
489 *
490 * @param pGVM The ring-0 VM structure.
491 */
492VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
493{
494 NOREF(pGVM);
495 return VINF_SUCCESS;
496}
497
498
499/**
500 * Tells the global PCI pass-through manager that a guest page mapping has
501 * changed after the initial setup.
502 *
503 * @returns VBox status code.
504 * @param pGVM The ring-0 VM structure.
505 * @param GCPhys The address of the guest page (page aligned).
506 * @param HCPhys The new host page address or NIL_RTHCPHYS if
507 * now unassigned.
508 */
509VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
510{
511 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
512 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
513 NOREF(pGVM);
514 return VINF_SUCCESS;
515}
516
517#endif /* VBOX_WITH_PCI_PASSTHROUGH */
518
519
520/**
521 * Sets up the IOMMU when raw PCI device is enabled.
522 *
523 * @note This is a hack that will probably be remodelled and refined later!
524 *
525 * @returns VBox status code.
526 *
527 * @param pGVM The global (ring-0) VM structure.
528 */
529VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM)
530{
531 int rc = GVMMR0ValidateGVM(pGVM);
532 if (RT_FAILURE(rc))
533 return rc;
534
535#ifdef VBOX_WITH_PCI_PASSTHROUGH
536 if (pGVM->pgm.s.fPciPassthrough)
537 {
538 /*
539 * The Simplistic Approach - Enumerate all the pages and call tell the
540 * IOMMU about each of them.
541 */
542 PGM_LOCK_VOID(pGVM);
543 rc = GPciRawR0GuestPageBeginAssignments(pGVM);
544 if (RT_SUCCESS(rc))
545 {
546 for (PPGMRAMRANGE pRam = pGVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
547 {
548 PPGMPAGE pPage = &pRam->aPages[0];
549 RTGCPHYS GCPhys = pRam->GCPhys;
550 uint32_t cLeft = pRam->cb >> PAGE_SHIFT;
551 while (cLeft-- > 0)
552 {
553 /* Only expose pages that are 100% safe for now. */
554 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
555 && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
556 && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
557 rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
558 else
559 rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
560
561 /* next */
562 pPage++;
563 GCPhys += PAGE_SIZE;
564 }
565 }
566
567 int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
568 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
569 rc = rc2;
570 }
571 PGM_UNLOCK(pGVM);
572 }
573 else
574#endif
575 rc = VERR_NOT_SUPPORTED;
576 return rc;
577}
578
579
580/**
581 * \#PF Handler for nested paging.
582 *
583 * @returns VBox status code (appropriate for trap handling and GC return).
584 * @param pGVM The global (ring-0) VM structure.
585 * @param pGVCpu The global (ring-0) CPU structure of the calling
586 * EMT.
587 * @param enmShwPagingMode Paging mode for the nested page tables.
588 * @param uErr The trap error code.
589 * @param pRegFrame Trap register frame.
590 * @param GCPhysFault The fault address.
591 */
592VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
593 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault)
594{
595 int rc;
596
597 LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip));
598 STAM_PROFILE_START(&pGVCpu->pgm.s.StatRZTrap0e, a);
599 STAM_STATS({ pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = NULL; } );
600
601 /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
602 AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
603 || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
604 ("enmShwPagingMode=%d\n", enmShwPagingMode));
605
606 /* Reserved shouldn't end up here. */
607 Assert(!(uErr & X86_TRAP_PF_RSVD));
608
609#ifdef VBOX_WITH_STATISTICS
610 /*
611 * Error code stats.
612 */
613 if (uErr & X86_TRAP_PF_US)
614 {
615 if (!(uErr & X86_TRAP_PF_P))
616 {
617 if (uErr & X86_TRAP_PF_RW)
618 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentWrite);
619 else
620 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentRead);
621 }
622 else if (uErr & X86_TRAP_PF_RW)
623 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSWrite);
624 else if (uErr & X86_TRAP_PF_RSVD)
625 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSReserved);
626 else if (uErr & X86_TRAP_PF_ID)
627 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNXE);
628 else
629 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSRead);
630 }
631 else
632 { /* Supervisor */
633 if (!(uErr & X86_TRAP_PF_P))
634 {
635 if (uErr & X86_TRAP_PF_RW)
636 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentWrite);
637 else
638 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentRead);
639 }
640 else if (uErr & X86_TRAP_PF_RW)
641 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVWrite);
642 else if (uErr & X86_TRAP_PF_ID)
643 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSNXE);
644 else if (uErr & X86_TRAP_PF_RSVD)
645 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVReserved);
646 }
647#endif
648
649 /*
650 * Call the worker.
651 *
652 * Note! We pretend the guest is in protected mode without paging, so we
653 * can use existing code to build the nested page tables.
654 */
655/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
656 bool fLockTaken = false;
657 switch (enmShwPagingMode)
658 {
659 case PGMMODE_32_BIT:
660 rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
661 break;
662 case PGMMODE_PAE:
663 case PGMMODE_PAE_NX:
664 rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
665 break;
666 case PGMMODE_AMD64:
667 case PGMMODE_AMD64_NX:
668 rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
669 break;
670 case PGMMODE_EPT:
671 rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
672 break;
673 default:
674 AssertFailed();
675 rc = VERR_INVALID_PARAMETER;
676 break;
677 }
678 if (fLockTaken)
679 {
680 PGM_LOCK_ASSERT_OWNER(pGVM);
681 PGM_UNLOCK(pGVM);
682 }
683
684 if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
685 rc = VINF_SUCCESS;
686 /*
687 * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
688 * via its page tables, see @bugref{6043}.
689 */
690 else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
691 || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
692 || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
693 || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
694 {
695 Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip));
696 /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
697 single VCPU VMs though. */
698 rc = VINF_SUCCESS;
699 }
700
701 STAM_STATS({ if (!pGVCpu->pgmr0.s.pStatTrap0eAttributionR0)
702 pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pGVCpu->pgm.s.Stats.StatRZTrap0eTime2Misc; });
703 STAM_PROFILE_STOP_EX(&pGVCpu->pgm.s.Stats.StatRZTrap0e, pGVCpu->pgmr0.s.pStatTrap0eAttributionR0, a);
704 return rc;
705}
706
707
708/**
709 * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
710 * employed for MMIO pages.
711 *
712 * @returns VBox status code (appropriate for trap handling and GC return).
713 * @param pGVM The global (ring-0) VM structure.
714 * @param pGVCpu The global (ring-0) CPU structure of the calling
715 * EMT.
716 * @param enmShwPagingMode Paging mode for the nested page tables.
717 * @param pRegFrame Trap register frame.
718 * @param GCPhysFault The fault address.
719 * @param uErr The error code, UINT32_MAX if not available
720 * (VT-x).
721 */
722VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode,
723 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, uint32_t uErr)
724{
725#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
726 STAM_PROFILE_START(&pGVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
727 VBOXSTRICTRC rc;
728
729 /*
730 * Try lookup the all access physical handler for the address.
731 */
732 PGM_LOCK_VOID(pGVM);
733 PPGMPHYSHANDLER pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
734 PPGMPHYSHANDLERTYPEINT pHandlerType = RT_LIKELY(pHandler) ? PGMPHYSHANDLER_GET_TYPE(pGVM, pHandler) : NULL;
735 if (RT_LIKELY(pHandler && pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE))
736 {
737 /*
738 * If the handle has aliases page or pages that have been temporarily
739 * disabled, we'll have to take a detour to make sure we resync them
740 * to avoid lots of unnecessary exits.
741 */
742 PPGMPAGE pPage;
743 if ( ( pHandler->cAliasedPages
744 || pHandler->cTmpOffPages)
745 && ( (pPage = pgmPhysGetPage(pGVM, GCPhysFault)) == NULL
746 || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
747 )
748 {
749 Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
750 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
751 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
752 PGM_UNLOCK(pGVM);
753 }
754 else
755 {
756 if (pHandlerType->CTX_SUFF(pfnPfHandler))
757 {
758 void *pvUser = pHandler->CTX_SUFF(pvUser);
759 STAM_PROFILE_START(&pHandler->Stat, h);
760 PGM_UNLOCK(pGVM);
761
762 Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->CTX_SUFF(pfnPfHandler), uErr, GCPhysFault, pvUser));
763 rc = pHandlerType->CTX_SUFF(pfnPfHandler)(pGVM, pGVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame,
764 GCPhysFault, GCPhysFault, pvUser);
765
766#ifdef VBOX_WITH_STATISTICS
767 PGM_LOCK_VOID(pGVM);
768 pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
769 if (pHandler)
770 STAM_PROFILE_STOP(&pHandler->Stat, h);
771 PGM_UNLOCK(pGVM);
772#endif
773 }
774 else
775 {
776 PGM_UNLOCK(pGVM);
777 Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
778 rc = VINF_EM_RAW_EMULATE_INSTR;
779 }
780 }
781 }
782 else
783 {
784 /*
785 * Must be out of sync, so do a SyncPage and restart the instruction.
786 *
787 * ASSUMES that ALL handlers are page aligned and covers whole pages
788 * (assumption asserted in PGMHandlerPhysicalRegisterEx).
789 */
790 Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
791 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
792 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
793 PGM_UNLOCK(pGVM);
794 }
795
796 STAM_PROFILE_STOP(&pGVCpu->pgm.s.Stats.StatR0NpMiscfg, a);
797 return rc;
798
799#else
800 AssertLogRelFailed();
801 return VERR_PGM_NOT_USED_IN_MODE;
802#endif
803}
804
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette