VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0.cpp@ 86611

Last change on this file since 86611 was 86473, checked in by vboxsync, 4 years ago

VMM/PGM: Working on eliminating page table bitfield use. bugref:9841 bugref:9746

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 29.5 KB
Line 
1/* $Id: PGMR0.cpp 86473 2020-10-07 17:30:25Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Ring-0.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/rawpci.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/gmm.h>
27#include "PGMInternal.h"
28#include <VBox/vmm/pdmdev.h>
29#include <VBox/vmm/vmcc.h>
30#include <VBox/vmm/gvm.h>
31#include "PGMInline.h"
32#include <VBox/log.h>
33#include <VBox/err.h>
34#include <iprt/assert.h>
35#include <iprt/mem.h>
36#include <iprt/memobj.h>
37
38
39/*
40 * Instantiate the ring-0 header/code templates.
41 */
42/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
43#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
44#include "PGMR0Bth.h"
45#undef PGM_BTH_NAME
46
47#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
48#include "PGMR0Bth.h"
49#undef PGM_BTH_NAME
50
51#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
52#include "PGMR0Bth.h"
53#undef PGM_BTH_NAME
54
55#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
56#include "PGMR0Bth.h"
57#undef PGM_BTH_NAME
58
59
60/**
61 * Initializes the per-VM data for the PGM.
62 *
63 * This is called from under the GVMM lock, so it should only initialize the
64 * data so PGMR0CleanupVM and others will work smoothly.
65 *
66 * @returns VBox status code.
67 * @param pGVM Pointer to the global VM structure.
68 */
69VMMR0_INT_DECL(int) PGMR0InitPerVMData(PGVM pGVM)
70{
71 AssertCompile(sizeof(pGVM->pgm.s) <= sizeof(pGVM->pgm.padding));
72 AssertCompile(sizeof(pGVM->pgmr0.s) <= sizeof(pGVM->pgmr0.padding));
73
74 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
75 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
76 {
77 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
78 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
79 }
80 return RTCritSectInit(&pGVM->pgmr0.s.PoolGrowCritSect);
81}
82
83
84/**
85 * Initalize the per-VM PGM for ring-0.
86 *
87 * @returns VBox status code.
88 * @param pGVM Pointer to the global VM structure.
89 */
90VMMR0_INT_DECL(int) PGMR0InitVM(PGVM pGVM)
91{
92 int rc = VINF_SUCCESS;
93#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
94 rc = PGMR0DynMapInitVM(pGVM);
95#endif
96 RT_NOREF(pGVM);
97 return rc;
98}
99
100
101/**
102 * Cleans up any loose ends before the GVM structure is destroyed.
103 */
104VMMR0_INT_DECL(void) PGMR0CleanupVM(PGVM pGVM)
105{
106 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
107 {
108 if (pGVM->pgmr0.s.ahPoolMapObjs[i] != NIL_RTR0MEMOBJ)
109 {
110 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMapObjs[i], true /*fFreeMappings*/);
111 AssertRC(rc);
112 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
113 }
114
115 if (pGVM->pgmr0.s.ahPoolMemObjs[i] != NIL_RTR0MEMOBJ)
116 {
117 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMemObjs[i], true /*fFreeMappings*/);
118 AssertRC(rc);
119 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
120 }
121 }
122
123 if (RTCritSectIsInitialized(&pGVM->pgmr0.s.PoolGrowCritSect))
124 RTCritSectDelete(&pGVM->pgmr0.s.PoolGrowCritSect);
125}
126
127
128/**
129 * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
130 *
131 * @returns The following VBox status codes.
132 * @retval VINF_SUCCESS on success. FF cleared.
133 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
134 *
135 * @param pGVM The global (ring-0) VM structure.
136 * @param idCpu The ID of the calling EMT.
137 *
138 * @thread EMT(idCpu)
139 *
140 * @remarks Must be called from within the PGM critical section. The caller
141 * must clear the new pages.
142 */
143VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu)
144{
145 /*
146 * Validate inputs.
147 */
148 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
149 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
150 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
151
152 /*
153 * Check for error injection.
154 */
155 if (RT_UNLIKELY(pGVM->pgm.s.fErrInjHandyPages))
156 return VERR_NO_MEMORY;
157
158 /*
159 * Try allocate a full set of handy pages.
160 */
161 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
162 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
163 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
164 if (!cPages)
165 return VINF_SUCCESS;
166 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
167 if (RT_SUCCESS(rc))
168 {
169#ifdef VBOX_STRICT
170 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
171 {
172 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
173 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
174 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
175 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
176 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
177 }
178#endif
179
180 pGVM->pgm.s.cHandyPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages);
181 }
182 else if (rc != VERR_GMM_SEED_ME)
183 {
184 if ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
185 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
186 && iFirst < PGM_HANDY_PAGES_MIN)
187 {
188
189#ifdef VBOX_STRICT
190 /* We're ASSUMING that GMM has updated all the entires before failing us. */
191 uint32_t i;
192 for (i = iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
193 {
194 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
195 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
196 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
197 }
198#endif
199
200 /*
201 * Reduce the number of pages until we hit the minimum limit.
202 */
203 do
204 {
205 cPages >>= 1;
206 if (cPages + iFirst < PGM_HANDY_PAGES_MIN)
207 cPages = PGM_HANDY_PAGES_MIN - iFirst;
208 rc = GMMR0AllocateHandyPages(pGVM, idCpu, 0, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
209 } while ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
210 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
211 && cPages + iFirst > PGM_HANDY_PAGES_MIN);
212 if (RT_SUCCESS(rc))
213 {
214#ifdef VBOX_STRICT
215 i = iFirst + cPages;
216 while (i-- > 0)
217 {
218 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
219 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
220 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
221 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
222 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
223 }
224
225 for (i = cPages + iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
226 {
227 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
228 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
229 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
230 }
231#endif
232
233 pGVM->pgm.s.cHandyPages = iFirst + cPages;
234 }
235 }
236
237 if (RT_FAILURE(rc) && rc != VERR_GMM_SEED_ME)
238 {
239 LogRel(("PGMR0PhysAllocateHandyPages: rc=%Rrc iFirst=%d cPages=%d\n", rc, iFirst, cPages));
240 VM_FF_SET(pGVM, VM_FF_PGM_NO_MEMORY);
241 }
242 }
243
244
245 LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
246 return rc;
247}
248
249
250/**
251 * Flushes any changes pending in the handy page array.
252 *
253 * It is very important that this gets done when page sharing is enabled.
254 *
255 * @returns The following VBox status codes.
256 * @retval VINF_SUCCESS on success. FF cleared.
257 *
258 * @param pGVM The global (ring-0) VM structure.
259 * @param idCpu The ID of the calling EMT.
260 *
261 * @thread EMT(idCpu)
262 *
263 * @remarks Must be called from within the PGM critical section.
264 */
265VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, VMCPUID idCpu)
266{
267 /*
268 * Validate inputs.
269 */
270 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
271 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
272 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
273
274 /*
275 * Try allocate a full set of handy pages.
276 */
277 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
278 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
279 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
280 if (!cPages)
281 return VINF_SUCCESS;
282 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, 0, &pGVM->pgm.s.aHandyPages[iFirst]);
283
284 LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
285 return rc;
286}
287
288
289/**
290 * Worker function for PGMR3PhysAllocateLargeHandyPage
291 *
292 * @returns The following VBox status codes.
293 * @retval VINF_SUCCESS on success.
294 * @retval VINF_EM_NO_MEMORY if we're out of memory.
295 *
296 * @param pGVM The global (ring-0) VM structure.
297 * @param idCpu The ID of the calling EMT.
298 *
299 * @thread EMT(idCpu)
300 *
301 * @remarks Must be called from within the PGM critical section. The caller
302 * must clear the new pages.
303 */
304VMMR0_INT_DECL(int) PGMR0PhysAllocateLargeHandyPage(PGVM pGVM, VMCPUID idCpu)
305{
306 /*
307 * Validate inputs.
308 */
309 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
310 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
311 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
312 Assert(!pGVM->pgm.s.cLargeHandyPages);
313
314 /*
315 * Do the job.
316 */
317 int rc = GMMR0AllocateLargePage(pGVM, idCpu, _2M,
318 &pGVM->pgm.s.aLargeHandyPage[0].idPage,
319 &pGVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys);
320 if (RT_SUCCESS(rc))
321 pGVM->pgm.s.cLargeHandyPages = 1;
322
323 return rc;
324}
325
326
327/**
328 * Locate a MMIO2 range.
329 *
330 * @returns Pointer to the MMIO2 range.
331 * @param pGVM The global (ring-0) VM structure.
332 * @param pDevIns The device instance owning the region.
333 * @param hMmio2 Handle to look up.
334 */
335DECLINLINE(PPGMREGMMIO2RANGE) pgmR0PhysMMIOExFind(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
336{
337 /*
338 * We use the lookup table here as list walking is tedious in ring-0 when using
339 * ring-3 pointers and this probably will require some kind of refactoring anyway.
340 */
341 if (hMmio2 <= RT_ELEMENTS(pGVM->pgm.s.apMmio2RangesR0) && hMmio2 != 0)
342 {
343 PPGMREGMMIO2RANGE pCur = pGVM->pgm.s.apMmio2RangesR0[hMmio2 - 1];
344 if (pCur && pCur->pDevInsR3 == pDevIns->pDevInsForR3)
345 {
346 Assert(pCur->idMmio2 == hMmio2);
347 AssertReturn(pCur->fFlags & PGMREGMMIO2RANGE_F_MMIO2, NULL);
348 return pCur;
349 }
350 Assert(!pCur);
351 }
352 return NULL;
353}
354
355
356/**
357 * Worker for PDMDEVHLPR0::pfnMmio2SetUpContext.
358 *
359 * @returns VBox status code.
360 * @param pGVM The global (ring-0) VM structure.
361 * @param pDevIns The device instance.
362 * @param hMmio2 The MMIO2 region to map into ring-0 address space.
363 * @param offSub The offset into the region.
364 * @param cbSub The size of the mapping, zero meaning all the rest.
365 * @param ppvMapping Where to return the ring-0 mapping address.
366 */
367VMMR0_INT_DECL(int) PGMR0PhysMMIO2MapKernel(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
368 size_t offSub, size_t cbSub, void **ppvMapping)
369{
370 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
371 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
372
373 /*
374 * Translate hRegion into a range pointer.
375 */
376 PPGMREGMMIO2RANGE pFirstRegMmio = pgmR0PhysMMIOExFind(pGVM, pDevIns, hMmio2);
377 AssertReturn(pFirstRegMmio, VERR_NOT_FOUND);
378#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
379 uint8_t * const pvR0 = (uint8_t *)pFirstRegMmio->pvR0;
380#else
381 RTR3PTR const pvR3 = pFirstRegMmio->pvR3;
382#endif
383 RTGCPHYS const cbReal = pFirstRegMmio->cbReal;
384 pFirstRegMmio = NULL;
385 ASMCompilerBarrier();
386
387 AssertReturn(offSub < cbReal, VERR_OUT_OF_RANGE);
388 if (cbSub == 0)
389 cbSub = cbReal - offSub;
390 else
391 AssertReturn(cbSub < cbReal && cbSub + offSub <= cbReal, VERR_OUT_OF_RANGE);
392
393 /*
394 * Do the mapping.
395 */
396#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
397 AssertPtr(pvR0);
398 *ppvMapping = pvR0 + offSub;
399 return VINF_SUCCESS;
400#else
401 return SUPR0PageMapKernel(pGVM->pSession, pvR3, (uint32_t)offSub, (uint32_t)cbSub, 0 /*fFlags*/, ppvMapping);
402#endif
403}
404
405
406#ifdef VBOX_WITH_PCI_PASSTHROUGH
407/* Interface sketch. The interface belongs to a global PCI pass-through
408 manager. It shall use the global VM handle, not the user VM handle to
409 store the per-VM info (domain) since that is all ring-0 stuff, thus
410 passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
411 we can discuss the PciRaw code re-organtization when I'm back from
412 vacation.
413
414 I've implemented the initial IOMMU set up below. For things to work
415 reliably, we will probably need add a whole bunch of checks and
416 GPciRawR0GuestPageUpdate call to the PGM code. For the present,
417 assuming nested paging (enforced) and prealloc (enforced), no
418 ballooning (check missing), page sharing (check missing) or live
419 migration (check missing), it might work fine. At least if some
420 VM power-off hook is present and can tear down the IOMMU page tables. */
421
422/**
423 * Tells the global PCI pass-through manager that we are about to set up the
424 * guest page to host page mappings for the specfied VM.
425 *
426 * @returns VBox status code.
427 *
428 * @param pGVM The ring-0 VM structure.
429 */
430VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
431{
432 NOREF(pGVM);
433 return VINF_SUCCESS;
434}
435
436
437/**
438 * Assigns a host page mapping for a guest page.
439 *
440 * This is only used when setting up the mappings, i.e. between
441 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
442 *
443 * @returns VBox status code.
444 * @param pGVM The ring-0 VM structure.
445 * @param GCPhys The address of the guest page (page aligned).
446 * @param HCPhys The address of the host page (page aligned).
447 */
448VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
449{
450 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
451 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
452
453 if (pGVM->rawpci.s.pfnContigMemInfo)
454 /** @todo what do we do on failure? */
455 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_MAP);
456
457 return VINF_SUCCESS;
458}
459
460
461/**
462 * Indicates that the specified guest page doesn't exists but doesn't have host
463 * page mapping we trust PCI pass-through with.
464 *
465 * This is only used when setting up the mappings, i.e. between
466 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
467 *
468 * @returns VBox status code.
469 * @param pGVM The ring-0 VM structure.
470 * @param GCPhys The address of the guest page (page aligned).
471 * @param HCPhys The address of the host page (page aligned).
472 */
473VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
474{
475 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
476
477 if (pGVM->rawpci.s.pfnContigMemInfo)
478 /** @todo what do we do on failure? */
479 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
480
481 return VINF_SUCCESS;
482}
483
484
485/**
486 * Tells the global PCI pass-through manager that we have completed setting up
487 * the guest page to host page mappings for the specfied VM.
488 *
489 * This complements GPciRawR0GuestPageBeginAssignments and will be called even
490 * if some page assignment failed.
491 *
492 * @returns VBox status code.
493 *
494 * @param pGVM The ring-0 VM structure.
495 */
496VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
497{
498 NOREF(pGVM);
499 return VINF_SUCCESS;
500}
501
502
503/**
504 * Tells the global PCI pass-through manager that a guest page mapping has
505 * changed after the initial setup.
506 *
507 * @returns VBox status code.
508 * @param pGVM The ring-0 VM structure.
509 * @param GCPhys The address of the guest page (page aligned).
510 * @param HCPhys The new host page address or NIL_RTHCPHYS if
511 * now unassigned.
512 */
513VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
514{
515 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
516 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
517 NOREF(pGVM);
518 return VINF_SUCCESS;
519}
520
521#endif /* VBOX_WITH_PCI_PASSTHROUGH */
522
523
524/**
525 * Sets up the IOMMU when raw PCI device is enabled.
526 *
527 * @note This is a hack that will probably be remodelled and refined later!
528 *
529 * @returns VBox status code.
530 *
531 * @param pGVM The global (ring-0) VM structure.
532 */
533VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM)
534{
535 int rc = GVMMR0ValidateGVM(pGVM);
536 if (RT_FAILURE(rc))
537 return rc;
538
539#ifdef VBOX_WITH_PCI_PASSTHROUGH
540 if (pGVM->pgm.s.fPciPassthrough)
541 {
542 /*
543 * The Simplistic Approach - Enumerate all the pages and call tell the
544 * IOMMU about each of them.
545 */
546 pgmLock(pGVM);
547 rc = GPciRawR0GuestPageBeginAssignments(pGVM);
548 if (RT_SUCCESS(rc))
549 {
550 for (PPGMRAMRANGE pRam = pGVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
551 {
552 PPGMPAGE pPage = &pRam->aPages[0];
553 RTGCPHYS GCPhys = pRam->GCPhys;
554 uint32_t cLeft = pRam->cb >> PAGE_SHIFT;
555 while (cLeft-- > 0)
556 {
557 /* Only expose pages that are 100% safe for now. */
558 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
559 && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
560 && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
561 rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
562 else
563 rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
564
565 /* next */
566 pPage++;
567 GCPhys += PAGE_SIZE;
568 }
569 }
570
571 int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
572 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
573 rc = rc2;
574 }
575 pgmUnlock(pGVM);
576 }
577 else
578#endif
579 rc = VERR_NOT_SUPPORTED;
580 return rc;
581}
582
583
584/**
585 * \#PF Handler for nested paging.
586 *
587 * @returns VBox status code (appropriate for trap handling and GC return).
588 * @param pGVM The global (ring-0) VM structure.
589 * @param pGVCpu The global (ring-0) CPU structure of the calling
590 * EMT.
591 * @param enmShwPagingMode Paging mode for the nested page tables.
592 * @param uErr The trap error code.
593 * @param pRegFrame Trap register frame.
594 * @param GCPhysFault The fault address.
595 */
596VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
597 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault)
598{
599 int rc;
600
601 LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip));
602 STAM_PROFILE_START(&pGVCpu->pgm.s.StatRZTrap0e, a);
603 STAM_STATS({ pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = NULL; } );
604
605 /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
606 AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
607 || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
608 ("enmShwPagingMode=%d\n", enmShwPagingMode));
609
610 /* Reserved shouldn't end up here. */
611 Assert(!(uErr & X86_TRAP_PF_RSVD));
612
613#ifdef VBOX_WITH_STATISTICS
614 /*
615 * Error code stats.
616 */
617 if (uErr & X86_TRAP_PF_US)
618 {
619 if (!(uErr & X86_TRAP_PF_P))
620 {
621 if (uErr & X86_TRAP_PF_RW)
622 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentWrite);
623 else
624 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentRead);
625 }
626 else if (uErr & X86_TRAP_PF_RW)
627 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSWrite);
628 else if (uErr & X86_TRAP_PF_RSVD)
629 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSReserved);
630 else if (uErr & X86_TRAP_PF_ID)
631 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNXE);
632 else
633 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSRead);
634 }
635 else
636 { /* Supervisor */
637 if (!(uErr & X86_TRAP_PF_P))
638 {
639 if (uErr & X86_TRAP_PF_RW)
640 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentWrite);
641 else
642 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentRead);
643 }
644 else if (uErr & X86_TRAP_PF_RW)
645 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVWrite);
646 else if (uErr & X86_TRAP_PF_ID)
647 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSNXE);
648 else if (uErr & X86_TRAP_PF_RSVD)
649 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVReserved);
650 }
651#endif
652
653 /*
654 * Call the worker.
655 *
656 * Note! We pretend the guest is in protected mode without paging, so we
657 * can use existing code to build the nested page tables.
658 */
659/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
660 bool fLockTaken = false;
661 switch (enmShwPagingMode)
662 {
663 case PGMMODE_32_BIT:
664 rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
665 break;
666 case PGMMODE_PAE:
667 case PGMMODE_PAE_NX:
668 rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
669 break;
670 case PGMMODE_AMD64:
671 case PGMMODE_AMD64_NX:
672 rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
673 break;
674 case PGMMODE_EPT:
675 rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
676 break;
677 default:
678 AssertFailed();
679 rc = VERR_INVALID_PARAMETER;
680 break;
681 }
682 if (fLockTaken)
683 {
684 PGM_LOCK_ASSERT_OWNER(pGVM);
685 pgmUnlock(pGVM);
686 }
687
688 if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
689 rc = VINF_SUCCESS;
690 /*
691 * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
692 * via its page tables, see @bugref{6043}.
693 */
694 else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
695 || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
696 || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
697 || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
698 {
699 Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip));
700 /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
701 single VCPU VMs though. */
702 rc = VINF_SUCCESS;
703 }
704
705 STAM_STATS({ if (!pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution))
706 pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Misc; });
707 STAM_PROFILE_STOP_EX(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0e, pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution), a);
708 return rc;
709}
710
711
712/**
713 * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
714 * employed for MMIO pages.
715 *
716 * @returns VBox status code (appropriate for trap handling and GC return).
717 * @param pGVM The global (ring-0) VM structure.
718 * @param pGVCpu The global (ring-0) CPU structure of the calling
719 * EMT.
720 * @param enmShwPagingMode Paging mode for the nested page tables.
721 * @param pRegFrame Trap register frame.
722 * @param GCPhysFault The fault address.
723 * @param uErr The error code, UINT32_MAX if not available
724 * (VT-x).
725 */
726VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode,
727 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, uint32_t uErr)
728{
729#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
730 STAM_PROFILE_START(&pGVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
731 VBOXSTRICTRC rc;
732
733 /*
734 * Try lookup the all access physical handler for the address.
735 */
736 pgmLock(pGVM);
737 PPGMPHYSHANDLER pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
738 PPGMPHYSHANDLERTYPEINT pHandlerType = RT_LIKELY(pHandler) ? PGMPHYSHANDLER_GET_TYPE(pGVM, pHandler) : NULL;
739 if (RT_LIKELY(pHandler && pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE))
740 {
741 /*
742 * If the handle has aliases page or pages that have been temporarily
743 * disabled, we'll have to take a detour to make sure we resync them
744 * to avoid lots of unnecessary exits.
745 */
746 PPGMPAGE pPage;
747 if ( ( pHandler->cAliasedPages
748 || pHandler->cTmpOffPages)
749 && ( (pPage = pgmPhysGetPage(pGVM, GCPhysFault)) == NULL
750 || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
751 )
752 {
753 Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
754 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage);
755 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
756 pgmUnlock(pGVM);
757 }
758 else
759 {
760 if (pHandlerType->CTX_SUFF(pfnPfHandler))
761 {
762 void *pvUser = pHandler->CTX_SUFF(pvUser);
763 STAM_PROFILE_START(&pHandler->Stat, h);
764 pgmUnlock(pGVM);
765
766 Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->CTX_SUFF(pfnPfHandler), uErr, GCPhysFault, pvUser));
767 rc = pHandlerType->CTX_SUFF(pfnPfHandler)(pGVM, pGVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame,
768 GCPhysFault, GCPhysFault, pvUser);
769
770#ifdef VBOX_WITH_STATISTICS
771 pgmLock(pGVM);
772 pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
773 if (pHandler)
774 STAM_PROFILE_STOP(&pHandler->Stat, h);
775 pgmUnlock(pGVM);
776#endif
777 }
778 else
779 {
780 pgmUnlock(pGVM);
781 Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
782 rc = VINF_EM_RAW_EMULATE_INSTR;
783 }
784 }
785 }
786 else
787 {
788 /*
789 * Must be out of sync, so do a SyncPage and restart the instruction.
790 *
791 * ASSUMES that ALL handlers are page aligned and covers whole pages
792 * (assumption asserted in PGMHandlerPhysicalRegisterEx).
793 */
794 Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
795 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage);
796 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
797 pgmUnlock(pGVM);
798 }
799
800 STAM_PROFILE_STOP(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfg, a);
801 return rc;
802
803#else
804 AssertLogRelFailed();
805 return VERR_PGM_NOT_USED_IN_MODE;
806#endif
807}
808
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette