VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PGMPhys.cpp@ 41395

Last change on this file since 41395 was 41395, checked in by vboxsync, 13 years ago

PGMR3PhysSetA20: Signal CR3 sync and pool clearing.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 163.8 KB
Line 
1/* $Id: PGMPhys.cpp 41395 2012-05-22 14:29:15Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Physical Memory Addressing.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_PHYS
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/iom.h>
25#include <VBox/vmm/mm.h>
26#include <VBox/vmm/stam.h>
27#ifdef VBOX_WITH_REM
28# include <VBox/vmm/rem.h>
29#endif
30#include <VBox/vmm/pdmdev.h>
31#include "PGMInternal.h"
32#include <VBox/vmm/vm.h>
33#include "PGMInline.h"
34#include <VBox/sup.h>
35#include <VBox/param.h>
36#include <VBox/err.h>
37#include <VBox/log.h>
38#include <iprt/assert.h>
39#include <iprt/alloc.h>
40#include <iprt/asm.h>
41#include <iprt/thread.h>
42#include <iprt/string.h>
43#include <iprt/system.h>
44
45
46/*******************************************************************************
47* Defined Constants And Macros *
48*******************************************************************************/
49/** The number of pages to free in one batch. */
50#define PGMPHYS_FREE_PAGE_BATCH_SIZE 128
51
52
53/*******************************************************************************
54* Internal Functions *
55*******************************************************************************/
56static DECLCALLBACK(int) pgmR3PhysRomWriteHandler(PVM pVM, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf, PGMACCESSTYPE enmAccessType, void *pvUser);
57
58
59/*
60 * PGMR3PhysReadU8-64
61 * PGMR3PhysWriteU8-64
62 */
63#define PGMPHYSFN_READNAME PGMR3PhysReadU8
64#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU8
65#define PGMPHYS_DATASIZE 1
66#define PGMPHYS_DATATYPE uint8_t
67#include "PGMPhysRWTmpl.h"
68
69#define PGMPHYSFN_READNAME PGMR3PhysReadU16
70#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU16
71#define PGMPHYS_DATASIZE 2
72#define PGMPHYS_DATATYPE uint16_t
73#include "PGMPhysRWTmpl.h"
74
75#define PGMPHYSFN_READNAME PGMR3PhysReadU32
76#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU32
77#define PGMPHYS_DATASIZE 4
78#define PGMPHYS_DATATYPE uint32_t
79#include "PGMPhysRWTmpl.h"
80
81#define PGMPHYSFN_READNAME PGMR3PhysReadU64
82#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU64
83#define PGMPHYS_DATASIZE 8
84#define PGMPHYS_DATATYPE uint64_t
85#include "PGMPhysRWTmpl.h"
86
87
88/**
89 * EMT worker for PGMR3PhysReadExternal.
90 */
91static DECLCALLBACK(int) pgmR3PhysReadExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, void *pvBuf, size_t cbRead)
92{
93 PGMPhysRead(pVM, *pGCPhys, pvBuf, cbRead);
94 return VINF_SUCCESS;
95}
96
97
98/**
99 * Read from physical memory, external users.
100 *
101 * @returns VBox status code.
102 * @retval VINF_SUCCESS.
103 *
104 * @param pVM The VM handle.
105 * @param GCPhys Physical address to read from.
106 * @param pvBuf Where to read into.
107 * @param cbRead How many bytes to read.
108 *
109 * @thread Any but EMTs.
110 */
111VMMR3DECL(int) PGMR3PhysReadExternal(PVM pVM, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead)
112{
113 VM_ASSERT_OTHER_THREAD(pVM);
114
115 AssertMsgReturn(cbRead > 0, ("don't even think about reading zero bytes!\n"), VINF_SUCCESS);
116 LogFlow(("PGMR3PhysReadExternal: %RGp %d\n", GCPhys, cbRead));
117
118 pgmLock(pVM);
119
120 /*
121 * Copy loop on ram ranges.
122 */
123 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
124 for (;;)
125 {
126 /* Inside range or not? */
127 if (pRam && GCPhys >= pRam->GCPhys)
128 {
129 /*
130 * Must work our way thru this page by page.
131 */
132 RTGCPHYS off = GCPhys - pRam->GCPhys;
133 while (off < pRam->cb)
134 {
135 unsigned iPage = off >> PAGE_SHIFT;
136 PPGMPAGE pPage = &pRam->aPages[iPage];
137
138 /*
139 * If the page has an ALL access handler, we'll have to
140 * delegate the job to EMT.
141 */
142 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
143 {
144 pgmUnlock(pVM);
145
146 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysReadExternalEMT, 4,
147 pVM, &GCPhys, pvBuf, cbRead);
148 }
149 Assert(!PGM_PAGE_IS_MMIO(pPage));
150
151 /*
152 * Simple stuff, go ahead.
153 */
154 size_t cb = PAGE_SIZE - (off & PAGE_OFFSET_MASK);
155 if (cb > cbRead)
156 cb = cbRead;
157 PGMPAGEMAPLOCK PgMpLck;
158 const void *pvSrc;
159 int rc = pgmPhysGCPhys2CCPtrInternalReadOnly(pVM, pPage, pRam->GCPhys + off, &pvSrc, &PgMpLck);
160 if (RT_SUCCESS(rc))
161 {
162 memcpy(pvBuf, pvSrc, cb);
163 pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck);
164 }
165 else
166 {
167 AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternalReadOnly failed on %RGp / %R[pgmpage] -> %Rrc\n",
168 pRam->GCPhys + off, pPage, rc));
169 memset(pvBuf, 0xff, cb);
170 }
171
172 /* next page */
173 if (cb >= cbRead)
174 {
175 pgmUnlock(pVM);
176 return VINF_SUCCESS;
177 }
178 cbRead -= cb;
179 off += cb;
180 GCPhys += cb;
181 pvBuf = (char *)pvBuf + cb;
182 } /* walk pages in ram range. */
183 }
184 else
185 {
186 LogFlow(("PGMPhysRead: Unassigned %RGp size=%u\n", GCPhys, cbRead));
187
188 /*
189 * Unassigned address space.
190 */
191 size_t cb = pRam ? pRam->GCPhys - GCPhys : ~(size_t)0;
192 if (cb >= cbRead)
193 {
194 memset(pvBuf, 0xff, cbRead);
195 break;
196 }
197 memset(pvBuf, 0xff, cb);
198
199 cbRead -= cb;
200 pvBuf = (char *)pvBuf + cb;
201 GCPhys += cb;
202 }
203
204 /* Advance range if necessary. */
205 while (pRam && GCPhys > pRam->GCPhysLast)
206 pRam = pRam->CTX_SUFF(pNext);
207 } /* Ram range walk */
208
209 pgmUnlock(pVM);
210
211 return VINF_SUCCESS;
212}
213
214
215/**
216 * EMT worker for PGMR3PhysWriteExternal.
217 */
218static DECLCALLBACK(int) pgmR3PhysWriteExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, const void *pvBuf, size_t cbWrite)
219{
220 /** @todo VERR_EM_NO_MEMORY */
221 PGMPhysWrite(pVM, *pGCPhys, pvBuf, cbWrite);
222 return VINF_SUCCESS;
223}
224
225
226/**
227 * Write to physical memory, external users.
228 *
229 * @returns VBox status code.
230 * @retval VINF_SUCCESS.
231 * @retval VERR_EM_NO_MEMORY.
232 *
233 * @param pVM The VM handle.
234 * @param GCPhys Physical address to write to.
235 * @param pvBuf What to write.
236 * @param cbWrite How many bytes to write.
237 * @param pszWho Who is writing. For tracking down who is writing
238 * after we've saved the state.
239 *
240 * @thread Any but EMTs.
241 */
242VMMDECL(int) PGMR3PhysWriteExternal(PVM pVM, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite, const char *pszWho)
243{
244 VM_ASSERT_OTHER_THREAD(pVM);
245
246 AssertMsg(!pVM->pgm.s.fNoMorePhysWrites,
247 ("Calling PGMR3PhysWriteExternal after pgmR3Save()! GCPhys=%RGp cbWrite=%#x pszWho=%s\n",
248 GCPhys, cbWrite, pszWho));
249 AssertMsgReturn(cbWrite > 0, ("don't even think about writing zero bytes!\n"), VINF_SUCCESS);
250 LogFlow(("PGMR3PhysWriteExternal: %RGp %d\n", GCPhys, cbWrite));
251
252 pgmLock(pVM);
253
254 /*
255 * Copy loop on ram ranges, stop when we hit something difficult.
256 */
257 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
258 for (;;)
259 {
260 /* Inside range or not? */
261 if (pRam && GCPhys >= pRam->GCPhys)
262 {
263 /*
264 * Must work our way thru this page by page.
265 */
266 RTGCPTR off = GCPhys - pRam->GCPhys;
267 while (off < pRam->cb)
268 {
269 RTGCPTR iPage = off >> PAGE_SHIFT;
270 PPGMPAGE pPage = &pRam->aPages[iPage];
271
272 /*
273 * Is the page problematic, we have to do the work on the EMT.
274 *
275 * Allocating writable pages and access handlers are
276 * problematic, write monitored pages are simple and can be
277 * dealt with here.
278 */
279 if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
280 || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
281 {
282 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
283 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
284 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage);
285 else
286 {
287 pgmUnlock(pVM);
288
289 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysWriteExternalEMT, 4,
290 pVM, &GCPhys, pvBuf, cbWrite);
291 }
292 }
293 Assert(!PGM_PAGE_IS_MMIO(pPage));
294
295 /*
296 * Simple stuff, go ahead.
297 */
298 size_t cb = PAGE_SIZE - (off & PAGE_OFFSET_MASK);
299 if (cb > cbWrite)
300 cb = cbWrite;
301 PGMPAGEMAPLOCK PgMpLck;
302 void *pvDst;
303 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, pRam->GCPhys + off, &pvDst, &PgMpLck);
304 if (RT_SUCCESS(rc))
305 {
306 memcpy(pvDst, pvBuf, cb);
307 pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck);
308 }
309 else
310 AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternal failed on %RGp / %R[pgmpage] -> %Rrc\n",
311 pRam->GCPhys + off, pPage, rc));
312
313 /* next page */
314 if (cb >= cbWrite)
315 {
316 pgmUnlock(pVM);
317 return VINF_SUCCESS;
318 }
319
320 cbWrite -= cb;
321 off += cb;
322 GCPhys += cb;
323 pvBuf = (const char *)pvBuf + cb;
324 } /* walk pages in ram range */
325 }
326 else
327 {
328 /*
329 * Unassigned address space, skip it.
330 */
331 if (!pRam)
332 break;
333 size_t cb = pRam->GCPhys - GCPhys;
334 if (cb >= cbWrite)
335 break;
336 cbWrite -= cb;
337 pvBuf = (const char *)pvBuf + cb;
338 GCPhys += cb;
339 }
340
341 /* Advance range if necessary. */
342 while (pRam && GCPhys > pRam->GCPhysLast)
343 pRam = pRam->CTX_SUFF(pNext);
344 } /* Ram range walk */
345
346 pgmUnlock(pVM);
347 return VINF_SUCCESS;
348}
349
350
351/**
352 * VMR3ReqCall worker for PGMR3PhysGCPhys2CCPtrExternal to make pages writable.
353 *
354 * @returns see PGMR3PhysGCPhys2CCPtrExternal
355 * @param pVM The VM handle.
356 * @param pGCPhys Pointer to the guest physical address.
357 * @param ppv Where to store the mapping address.
358 * @param pLock Where to store the lock.
359 */
360static DECLCALLBACK(int) pgmR3PhysGCPhys2CCPtrDelegated(PVM pVM, PRTGCPHYS pGCPhys, void **ppv, PPGMPAGEMAPLOCK pLock)
361{
362 /*
363 * Just hand it to PGMPhysGCPhys2CCPtr and check that it's not a page with
364 * an access handler after it succeeds.
365 */
366 int rc = pgmLock(pVM);
367 AssertRCReturn(rc, rc);
368
369 rc = PGMPhysGCPhys2CCPtr(pVM, *pGCPhys, ppv, pLock);
370 if (RT_SUCCESS(rc))
371 {
372 PPGMPAGEMAPTLBE pTlbe;
373 int rc2 = pgmPhysPageQueryTlbe(pVM, *pGCPhys, &pTlbe);
374 AssertFatalRC(rc2);
375 PPGMPAGE pPage = pTlbe->pPage;
376 if (PGM_PAGE_IS_MMIO(pPage))
377 {
378 PGMPhysReleasePageMappingLock(pVM, pLock);
379 rc = VERR_PGM_PHYS_PAGE_RESERVED;
380 }
381 else if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
382#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
383 || pgmPoolIsDirtyPage(pVM, *pGCPhys)
384#endif
385 )
386 {
387 /* We *must* flush any corresponding pgm pool page here, otherwise we'll
388 * not be informed about writes and keep bogus gst->shw mappings around.
389 */
390 pgmPoolFlushPageByGCPhys(pVM, *pGCPhys);
391 Assert(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage));
392 /** @todo r=bird: return VERR_PGM_PHYS_PAGE_RESERVED here if it still has
393 * active handlers, see the PGMR3PhysGCPhys2CCPtrExternal docs. */
394 }
395 }
396
397 pgmUnlock(pVM);
398 return rc;
399}
400
401
402/**
403 * Requests the mapping of a guest page into ring-3, external threads.
404 *
405 * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to
406 * release it.
407 *
408 * This API will assume your intention is to write to the page, and will
409 * therefore replace shared and zero pages. If you do not intend to modify the
410 * page, use the PGMR3PhysGCPhys2CCPtrReadOnlyExternal() API.
411 *
412 * @returns VBox status code.
413 * @retval VINF_SUCCESS on success.
414 * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical
415 * backing or if the page has any active access handlers. The caller
416 * must fall back on using PGMR3PhysWriteExternal.
417 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address.
418 *
419 * @param pVM The VM handle.
420 * @param GCPhys The guest physical address of the page that should be mapped.
421 * @param ppv Where to store the address corresponding to GCPhys.
422 * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs.
423 *
424 * @remark Avoid calling this API from within critical sections (other than the
425 * PGM one) because of the deadlock risk when we have to delegating the
426 * task to an EMT.
427 * @thread Any.
428 */
429VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrExternal(PVM pVM, RTGCPHYS GCPhys, void **ppv, PPGMPAGEMAPLOCK pLock)
430{
431 AssertPtr(ppv);
432 AssertPtr(pLock);
433
434 Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM));
435
436 int rc = pgmLock(pVM);
437 AssertRCReturn(rc, rc);
438
439 /*
440 * Query the Physical TLB entry for the page (may fail).
441 */
442 PPGMPAGEMAPTLBE pTlbe;
443 rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
444 if (RT_SUCCESS(rc))
445 {
446 PPGMPAGE pPage = pTlbe->pPage;
447 if (PGM_PAGE_IS_MMIO(pPage))
448 rc = VERR_PGM_PHYS_PAGE_RESERVED;
449 else
450 {
451 /*
452 * If the page is shared, the zero page, or being write monitored
453 * it must be converted to an page that's writable if possible.
454 * We can only deal with write monitored pages here, the rest have
455 * to be on an EMT.
456 */
457 if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
458 || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
459#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
460 || pgmPoolIsDirtyPage(pVM, GCPhys)
461#endif
462 )
463 {
464 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
465 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
466#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
467 && !pgmPoolIsDirtyPage(pVM, GCPhys)
468#endif
469 )
470 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage);
471 else
472 {
473 pgmUnlock(pVM);
474
475 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysGCPhys2CCPtrDelegated, 4,
476 pVM, &GCPhys, ppv, pLock);
477 }
478 }
479
480 /*
481 * Now, just perform the locking and calculate the return address.
482 */
483 PPGMPAGEMAP pMap = pTlbe->pMap;
484 if (pMap)
485 pMap->cRefs++;
486
487 unsigned cLocks = PGM_PAGE_GET_WRITE_LOCKS(pPage);
488 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
489 {
490 if (cLocks == 0)
491 pVM->pgm.s.cWriteLockedPages++;
492 PGM_PAGE_INC_WRITE_LOCKS(pPage);
493 }
494 else if (cLocks != PGM_PAGE_GET_WRITE_LOCKS(pPage))
495 {
496 PGM_PAGE_INC_WRITE_LOCKS(pPage);
497 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent write locked state!\n", GCPhys, pPage));
498 if (pMap)
499 pMap->cRefs++; /* Extra ref to prevent it from going away. */
500 }
501
502 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & PAGE_OFFSET_MASK));
503 pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_WRITE;
504 pLock->pvMap = pMap;
505 }
506 }
507
508 pgmUnlock(pVM);
509 return rc;
510}
511
512
513/**
514 * Requests the mapping of a guest page into ring-3, external threads.
515 *
516 * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to
517 * release it.
518 *
519 * @returns VBox status code.
520 * @retval VINF_SUCCESS on success.
521 * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical
522 * backing or if the page as an active ALL access handler. The caller
523 * must fall back on using PGMPhysRead.
524 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address.
525 *
526 * @param pVM The VM handle.
527 * @param GCPhys The guest physical address of the page that should be mapped.
528 * @param ppv Where to store the address corresponding to GCPhys.
529 * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs.
530 *
531 * @remark Avoid calling this API from within critical sections (other than
532 * the PGM one) because of the deadlock risk.
533 * @thread Any.
534 */
535VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrReadOnlyExternal(PVM pVM, RTGCPHYS GCPhys, void const **ppv, PPGMPAGEMAPLOCK pLock)
536{
537 int rc = pgmLock(pVM);
538 AssertRCReturn(rc, rc);
539
540 /*
541 * Query the Physical TLB entry for the page (may fail).
542 */
543 PPGMPAGEMAPTLBE pTlbe;
544 rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
545 if (RT_SUCCESS(rc))
546 {
547 PPGMPAGE pPage = pTlbe->pPage;
548#if 1
549 /* MMIO pages doesn't have any readable backing. */
550 if (PGM_PAGE_IS_MMIO(pPage))
551 rc = VERR_PGM_PHYS_PAGE_RESERVED;
552#else
553 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
554 rc = VERR_PGM_PHYS_PAGE_RESERVED;
555#endif
556 else
557 {
558 /*
559 * Now, just perform the locking and calculate the return address.
560 */
561 PPGMPAGEMAP pMap = pTlbe->pMap;
562 if (pMap)
563 pMap->cRefs++;
564
565 unsigned cLocks = PGM_PAGE_GET_READ_LOCKS(pPage);
566 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
567 {
568 if (cLocks == 0)
569 pVM->pgm.s.cReadLockedPages++;
570 PGM_PAGE_INC_READ_LOCKS(pPage);
571 }
572 else if (cLocks != PGM_PAGE_GET_READ_LOCKS(pPage))
573 {
574 PGM_PAGE_INC_READ_LOCKS(pPage);
575 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent readonly locked state!\n", GCPhys, pPage));
576 if (pMap)
577 pMap->cRefs++; /* Extra ref to prevent it from going away. */
578 }
579
580 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & PAGE_OFFSET_MASK));
581 pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_READ;
582 pLock->pvMap = pMap;
583 }
584 }
585
586 pgmUnlock(pVM);
587 return rc;
588}
589
590
591#define MAKE_LEAF(a_pNode) \
592 do { \
593 (a_pNode)->pLeftR3 = NIL_RTR3PTR; \
594 (a_pNode)->pRightR3 = NIL_RTR3PTR; \
595 (a_pNode)->pLeftR0 = NIL_RTR0PTR; \
596 (a_pNode)->pRightR0 = NIL_RTR0PTR; \
597 (a_pNode)->pLeftRC = NIL_RTRCPTR; \
598 (a_pNode)->pRightRC = NIL_RTRCPTR; \
599 } while (0)
600
601#define INSERT_LEFT(a_pParent, a_pNode) \
602 do { \
603 (a_pParent)->pLeftR3 = (a_pNode); \
604 (a_pParent)->pLeftR0 = (a_pNode)->pSelfR0; \
605 (a_pParent)->pLeftRC = (a_pNode)->pSelfRC; \
606 } while (0)
607#define INSERT_RIGHT(a_pParent, a_pNode) \
608 do { \
609 (a_pParent)->pRightR3 = (a_pNode); \
610 (a_pParent)->pRightR0 = (a_pNode)->pSelfR0; \
611 (a_pParent)->pRightRC = (a_pNode)->pSelfRC; \
612 } while (0)
613
614
615/**
616 * Recursive tree builder.
617 *
618 * @param ppRam Pointer to the iterator variable.
619 * @param iHeight The hight about normal leaf nodes. Inserts a leaf
620 * node if 0.
621 */
622static PPGMRAMRANGE pgmR3PhysRebuildRamRangeSearchTreesRecursively(PPGMRAMRANGE *ppRam, int iDepth)
623{
624 PPGMRAMRANGE pRam;
625 if (iDepth <= 0)
626 {
627 /*
628 * Leaf node.
629 */
630 pRam = *ppRam;
631 if (pRam)
632 {
633 *ppRam = pRam->pNextR3;
634 MAKE_LEAF(pRam);
635 }
636 }
637 else
638 {
639
640 /*
641 * Intermediate node.
642 */
643 PPGMRAMRANGE pLeft = pgmR3PhysRebuildRamRangeSearchTreesRecursively(ppRam, iDepth - 1);
644
645 pRam = *ppRam;
646 if (!pRam)
647 return pLeft;
648 *ppRam = pRam->pNextR3;
649 MAKE_LEAF(pRam);
650 INSERT_LEFT(pRam, pLeft);
651
652 PPGMRAMRANGE pRight = pgmR3PhysRebuildRamRangeSearchTreesRecursively(ppRam, iDepth - 1);
653 if (pRight)
654 INSERT_RIGHT(pRam, pRight);
655 }
656 return pRam;
657}
658
659
660/**
661 * Rebuilds the RAM range search trees.
662 *
663 * @param pVM The VM handle.
664 */
665static void pgmR3PhysRebuildRamRangeSearchTrees(PVM pVM)
666{
667
668 /*
669 * Create the reasonably balanced tree in a sequential fashion.
670 * For simplicity (laziness) we use standard recursion here.
671 */
672 int iDepth = 0;
673 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
674 PPGMRAMRANGE pRoot = pgmR3PhysRebuildRamRangeSearchTreesRecursively(&pRam, 0);
675 while (pRam)
676 {
677 PPGMRAMRANGE pLeft = pRoot;
678
679 pRoot = pRam;
680 pRam = pRam->pNextR3;
681 MAKE_LEAF(pRoot);
682 INSERT_LEFT(pRoot, pLeft);
683
684 PPGMRAMRANGE pRight = pgmR3PhysRebuildRamRangeSearchTreesRecursively(&pRam, iDepth);
685 if (pRight)
686 INSERT_RIGHT(pRoot, pRight);
687 /** @todo else: rotate the tree. */
688
689 iDepth++;
690 }
691
692 pVM->pgm.s.pRamRangeTreeR3 = pRoot;
693 pVM->pgm.s.pRamRangeTreeR0 = pRoot ? pRoot->pSelfR0 : NIL_RTR0PTR;
694 pVM->pgm.s.pRamRangeTreeRC = pRoot ? pRoot->pSelfRC : NIL_RTRCPTR;
695
696#ifdef VBOX_STRICT
697 /*
698 * Verify that the above code works.
699 */
700 unsigned cRanges = 0;
701 for (pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3)
702 cRanges++;
703 Assert(cRanges > 0);
704
705 unsigned cMaxDepth = ASMBitLastSetU32(cRanges);
706 if ((1U << cMaxDepth) < cRanges)
707 cMaxDepth++;
708
709 for (pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3)
710 {
711 unsigned cDepth = 0;
712 PPGMRAMRANGE pRam2 = pVM->pgm.s.pRamRangeTreeR3;
713 for (;;)
714 {
715 if (pRam == pRam2)
716 break;
717 Assert(pRam2);
718 if (pRam->GCPhys < pRam2->GCPhys)
719 pRam2 = pRam2->pLeftR3;
720 else
721 pRam2 = pRam2->pRightR3;
722 }
723 AssertMsg(cDepth <= cMaxDepth, ("cDepth=%d cMaxDepth=%d\n", cDepth, cMaxDepth));
724 }
725#endif /* VBOX_STRICT */
726}
727
728#undef MAKE_LEAF
729#undef INSERT_LEFT
730#undef INSERT_RIGHT
731
732/**
733 * Relinks the RAM ranges using the pSelfRC and pSelfR0 pointers.
734 *
735 * Called when anything was relocated.
736 *
737 * @param pVM The VM handle.
738 */
739void pgmR3PhysRelinkRamRanges(PVM pVM)
740{
741 PPGMRAMRANGE pCur;
742
743#ifdef VBOX_STRICT
744 for (pCur = pVM->pgm.s.pRamRangesXR3; pCur; pCur = pCur->pNextR3)
745 {
746 Assert((pCur->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pCur->pSelfR0 == MMHyperCCToR0(pVM, pCur));
747 Assert((pCur->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pCur->pSelfRC == MMHyperCCToRC(pVM, pCur));
748 Assert((pCur->GCPhys & PAGE_OFFSET_MASK) == 0);
749 Assert((pCur->GCPhysLast & PAGE_OFFSET_MASK) == PAGE_OFFSET_MASK);
750 Assert((pCur->cb & PAGE_OFFSET_MASK) == 0);
751 Assert(pCur->cb == pCur->GCPhysLast - pCur->GCPhys + 1);
752 for (PPGMRAMRANGE pCur2 = pVM->pgm.s.pRamRangesXR3; pCur2; pCur2 = pCur2->pNextR3)
753 Assert( pCur2 == pCur
754 || strcmp(pCur2->pszDesc, pCur->pszDesc)); /** @todo fix MMIO ranges!! */
755 }
756#endif
757
758 pCur = pVM->pgm.s.pRamRangesXR3;
759 if (pCur)
760 {
761 pVM->pgm.s.pRamRangesXR0 = pCur->pSelfR0;
762 pVM->pgm.s.pRamRangesXRC = pCur->pSelfRC;
763
764 for (; pCur->pNextR3; pCur = pCur->pNextR3)
765 {
766 pCur->pNextR0 = pCur->pNextR3->pSelfR0;
767 pCur->pNextRC = pCur->pNextR3->pSelfRC;
768 }
769
770 Assert(pCur->pNextR0 == NIL_RTR0PTR);
771 Assert(pCur->pNextRC == NIL_RTRCPTR);
772 }
773 else
774 {
775 Assert(pVM->pgm.s.pRamRangesXR0 == NIL_RTR0PTR);
776 Assert(pVM->pgm.s.pRamRangesXRC == NIL_RTRCPTR);
777 }
778 ASMAtomicIncU32(&pVM->pgm.s.idRamRangesGen);
779
780 pgmR3PhysRebuildRamRangeSearchTrees(pVM);
781}
782
783
784/**
785 * Links a new RAM range into the list.
786 *
787 * @param pVM The VM handle.
788 * @param pNew Pointer to the new list entry.
789 * @param pPrev Pointer to the previous list entry. If NULL, insert as head.
790 */
791static void pgmR3PhysLinkRamRange(PVM pVM, PPGMRAMRANGE pNew, PPGMRAMRANGE pPrev)
792{
793 AssertMsg(pNew->pszDesc, ("%RGp-%RGp\n", pNew->GCPhys, pNew->GCPhysLast));
794 Assert((pNew->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pNew->pSelfR0 == MMHyperCCToR0(pVM, pNew));
795 Assert((pNew->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pNew->pSelfRC == MMHyperCCToRC(pVM, pNew));
796
797 pgmLock(pVM);
798
799 PPGMRAMRANGE pRam = pPrev ? pPrev->pNextR3 : pVM->pgm.s.pRamRangesXR3;
800 pNew->pNextR3 = pRam;
801 pNew->pNextR0 = pRam ? pRam->pSelfR0 : NIL_RTR0PTR;
802 pNew->pNextRC = pRam ? pRam->pSelfRC : NIL_RTRCPTR;
803
804 if (pPrev)
805 {
806 pPrev->pNextR3 = pNew;
807 pPrev->pNextR0 = pNew->pSelfR0;
808 pPrev->pNextRC = pNew->pSelfRC;
809 }
810 else
811 {
812 pVM->pgm.s.pRamRangesXR3 = pNew;
813 pVM->pgm.s.pRamRangesXR0 = pNew->pSelfR0;
814 pVM->pgm.s.pRamRangesXRC = pNew->pSelfRC;
815 }
816 ASMAtomicIncU32(&pVM->pgm.s.idRamRangesGen);
817
818 pgmR3PhysRebuildRamRangeSearchTrees(pVM);
819 pgmUnlock(pVM);
820}
821
822
823/**
824 * Unlink an existing RAM range from the list.
825 *
826 * @param pVM The VM handle.
827 * @param pRam Pointer to the new list entry.
828 * @param pPrev Pointer to the previous list entry. If NULL, insert as head.
829 */
830static void pgmR3PhysUnlinkRamRange2(PVM pVM, PPGMRAMRANGE pRam, PPGMRAMRANGE pPrev)
831{
832 Assert(pPrev ? pPrev->pNextR3 == pRam : pVM->pgm.s.pRamRangesXR3 == pRam);
833 Assert((pRam->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pRam->pSelfR0 == MMHyperCCToR0(pVM, pRam));
834 Assert((pRam->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pRam->pSelfRC == MMHyperCCToRC(pVM, pRam));
835
836 pgmLock(pVM);
837
838 PPGMRAMRANGE pNext = pRam->pNextR3;
839 if (pPrev)
840 {
841 pPrev->pNextR3 = pNext;
842 pPrev->pNextR0 = pNext ? pNext->pSelfR0 : NIL_RTR0PTR;
843 pPrev->pNextRC = pNext ? pNext->pSelfRC : NIL_RTRCPTR;
844 }
845 else
846 {
847 Assert(pVM->pgm.s.pRamRangesXR3 == pRam);
848 pVM->pgm.s.pRamRangesXR3 = pNext;
849 pVM->pgm.s.pRamRangesXR0 = pNext ? pNext->pSelfR0 : NIL_RTR0PTR;
850 pVM->pgm.s.pRamRangesXRC = pNext ? pNext->pSelfRC : NIL_RTRCPTR;
851 }
852 ASMAtomicIncU32(&pVM->pgm.s.idRamRangesGen);
853
854 pgmR3PhysRebuildRamRangeSearchTrees(pVM);
855 pgmUnlock(pVM);
856}
857
858
859/**
860 * Unlink an existing RAM range from the list.
861 *
862 * @param pVM The VM handle.
863 * @param pRam Pointer to the new list entry.
864 */
865static void pgmR3PhysUnlinkRamRange(PVM pVM, PPGMRAMRANGE pRam)
866{
867 pgmLock(pVM);
868
869 /* find prev. */
870 PPGMRAMRANGE pPrev = NULL;
871 PPGMRAMRANGE pCur = pVM->pgm.s.pRamRangesXR3;
872 while (pCur != pRam)
873 {
874 pPrev = pCur;
875 pCur = pCur->pNextR3;
876 }
877 AssertFatal(pCur);
878
879 pgmR3PhysUnlinkRamRange2(pVM, pRam, pPrev);
880 pgmUnlock(pVM);
881}
882
883
884/**
885 * Frees a range of pages, replacing them with ZERO pages of the specified type.
886 *
887 * @returns VBox status code.
888 * @param pVM The VM handle.
889 * @param pRam The RAM range in which the pages resides.
890 * @param GCPhys The address of the first page.
891 * @param GCPhysLast The address of the last page.
892 * @param uType The page type to replace then with.
893 */
894static int pgmR3PhysFreePageRange(PVM pVM, PPGMRAMRANGE pRam, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast, uint8_t uType)
895{
896 PGM_LOCK_ASSERT_OWNER(pVM);
897 uint32_t cPendingPages = 0;
898 PGMMFREEPAGESREQ pReq;
899 int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
900 AssertLogRelRCReturn(rc, rc);
901
902 /* Iterate the pages. */
903 PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
904 uint32_t cPagesLeft = ((GCPhysLast - GCPhys) >> PAGE_SHIFT) + 1;
905 while (cPagesLeft-- > 0)
906 {
907 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPageDst, GCPhys);
908 AssertLogRelRCReturn(rc, rc); /* We're done for if this goes wrong. */
909
910 PGM_PAGE_SET_TYPE(pVM, pPageDst, uType);
911
912 GCPhys += PAGE_SIZE;
913 pPageDst++;
914 }
915
916 if (cPendingPages)
917 {
918 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
919 AssertLogRelRCReturn(rc, rc);
920 }
921 GMMR3FreePagesCleanup(pReq);
922
923 return rc;
924}
925
926#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD))
927
928/**
929 * Rendezvous callback used by PGMR3ChangeMemBalloon that changes the memory balloon size
930 *
931 * This is only called on one of the EMTs while the other ones are waiting for
932 * it to complete this function.
933 *
934 * @returns VINF_SUCCESS (VBox strict status code).
935 * @param pVM The VM handle.
936 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
937 * @param pvUser User parameter
938 */
939static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysChangeMemBalloonRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
940{
941 uintptr_t *paUser = (uintptr_t *)pvUser;
942 bool fInflate = !!paUser[0];
943 unsigned cPages = paUser[1];
944 RTGCPHYS *paPhysPage = (RTGCPHYS *)paUser[2];
945 uint32_t cPendingPages = 0;
946 PGMMFREEPAGESREQ pReq;
947 int rc;
948
949 Log(("pgmR3PhysChangeMemBalloonRendezvous: %s %x pages\n", (fInflate) ? "inflate" : "deflate", cPages));
950 pgmLock(pVM);
951
952 if (fInflate)
953 {
954 /* Flush the PGM pool cache as we might have stale references to pages that we just freed. */
955 pgmR3PoolClearAllRendezvous(pVM, pVCpu, NULL);
956
957 /* Replace pages with ZERO pages. */
958 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
959 if (RT_FAILURE(rc))
960 {
961 pgmUnlock(pVM);
962 AssertLogRelRC(rc);
963 return rc;
964 }
965
966 /* Iterate the pages. */
967 for (unsigned i = 0; i < cPages; i++)
968 {
969 PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]);
970 if ( pPage == NULL
971 || PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM)
972 {
973 Log(("pgmR3PhysChangeMemBalloonRendezvous: invalid physical page %RGp pPage->u3Type=%d\n", paPhysPage[i], pPage ? PGM_PAGE_GET_TYPE(pPage) : 0));
974 break;
975 }
976
977 LogFlow(("balloon page: %RGp\n", paPhysPage[i]));
978
979 /* Flush the shadow PT if this page was previously used as a guest page table. */
980 pgmPoolFlushPageByGCPhys(pVM, paPhysPage[i]);
981
982 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, paPhysPage[i]);
983 if (RT_FAILURE(rc))
984 {
985 pgmUnlock(pVM);
986 AssertLogRelRC(rc);
987 return rc;
988 }
989 Assert(PGM_PAGE_IS_ZERO(pPage));
990 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_BALLOONED);
991 }
992
993 if (cPendingPages)
994 {
995 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
996 if (RT_FAILURE(rc))
997 {
998 pgmUnlock(pVM);
999 AssertLogRelRC(rc);
1000 return rc;
1001 }
1002 }
1003 GMMR3FreePagesCleanup(pReq);
1004 }
1005 else
1006 {
1007 /* Iterate the pages. */
1008 for (unsigned i = 0; i < cPages; i++)
1009 {
1010 PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]);
1011 AssertBreak(pPage && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM);
1012
1013 LogFlow(("Free ballooned page: %RGp\n", paPhysPage[i]));
1014
1015 Assert(PGM_PAGE_IS_BALLOONED(pPage));
1016
1017 /* Change back to zero page. */
1018 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
1019 }
1020
1021 /* Note that we currently do not map any ballooned pages in our shadow page tables, so no need to flush the pgm pool. */
1022 }
1023
1024 /* Notify GMM about the balloon change. */
1025 rc = GMMR3BalloonedPages(pVM, (fInflate) ? GMMBALLOONACTION_INFLATE : GMMBALLOONACTION_DEFLATE, cPages);
1026 if (RT_SUCCESS(rc))
1027 {
1028 if (!fInflate)
1029 {
1030 Assert(pVM->pgm.s.cBalloonedPages >= cPages);
1031 pVM->pgm.s.cBalloonedPages -= cPages;
1032 }
1033 else
1034 pVM->pgm.s.cBalloonedPages += cPages;
1035 }
1036
1037 pgmUnlock(pVM);
1038
1039 /* Flush the recompiler's TLB as well. */
1040 for (VMCPUID i = 0; i < pVM->cCpus; i++)
1041 CPUMSetChangedFlags(&pVM->aCpus[i], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
1042
1043 AssertLogRelRC(rc);
1044 return rc;
1045}
1046
1047
1048/**
1049 * Frees a range of ram pages, replacing them with ZERO pages; helper for PGMR3PhysFreeRamPages
1050 *
1051 * @returns VBox status code.
1052 * @param pVM The VM handle.
1053 * @param fInflate Inflate or deflate memory balloon
1054 * @param cPages Number of pages to free
1055 * @param paPhysPage Array of guest physical addresses
1056 */
1057static DECLCALLBACK(void) pgmR3PhysChangeMemBalloonHelper(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage)
1058{
1059 uintptr_t paUser[3];
1060
1061 paUser[0] = fInflate;
1062 paUser[1] = cPages;
1063 paUser[2] = (uintptr_t)paPhysPage;
1064 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser);
1065 AssertRC(rc);
1066
1067 /* Made a copy in PGMR3PhysFreeRamPages; free it here. */
1068 RTMemFree(paPhysPage);
1069}
1070
1071#endif /* 64-bit host && (Windows || Solaris || Linux || FreeBSD) */
1072
1073/**
1074 * Inflate or deflate a memory balloon
1075 *
1076 * @returns VBox status code.
1077 * @param pVM The VM handle.
1078 * @param fInflate Inflate or deflate memory balloon
1079 * @param cPages Number of pages to free
1080 * @param paPhysPage Array of guest physical addresses
1081 */
1082VMMR3DECL(int) PGMR3PhysChangeMemBalloon(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage)
1083{
1084 /* This must match GMMR0Init; currently we only support memory ballooning on all 64-bit hosts except Mac OS X */
1085#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD))
1086 int rc;
1087
1088 /* Older additions (ancient non-functioning balloon code) pass wrong physical addresses. */
1089 AssertReturn(!(paPhysPage[0] & 0xfff), VERR_INVALID_PARAMETER);
1090
1091 /* We own the IOM lock here and could cause a deadlock by waiting for another VCPU that is blocking on the IOM lock.
1092 * In the SMP case we post a request packet to postpone the job.
1093 */
1094 if (pVM->cCpus > 1)
1095 {
1096 unsigned cbPhysPage = cPages * sizeof(paPhysPage[0]);
1097 RTGCPHYS *paPhysPageCopy = (RTGCPHYS *)RTMemAlloc(cbPhysPage);
1098 AssertReturn(paPhysPageCopy, VERR_NO_MEMORY);
1099
1100 memcpy(paPhysPageCopy, paPhysPage, cbPhysPage);
1101
1102 rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysChangeMemBalloonHelper, 4, pVM, fInflate, cPages, paPhysPageCopy);
1103 AssertRC(rc);
1104 }
1105 else
1106 {
1107 uintptr_t paUser[3];
1108
1109 paUser[0] = fInflate;
1110 paUser[1] = cPages;
1111 paUser[2] = (uintptr_t)paPhysPage;
1112 rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser);
1113 AssertRC(rc);
1114 }
1115 return rc;
1116
1117#else
1118 NOREF(pVM); NOREF(fInflate); NOREF(cPages); NOREF(paPhysPage);
1119 return VERR_NOT_IMPLEMENTED;
1120#endif
1121}
1122
1123
1124/**
1125 * Rendezvous callback used by PGMR3WriteProtectRAM that write protects all
1126 * physical RAM.
1127 *
1128 * This is only called on one of the EMTs while the other ones are waiting for
1129 * it to complete this function.
1130 *
1131 * @returns VINF_SUCCESS (VBox strict status code).
1132 * @param pVM The VM handle.
1133 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
1134 * @param pvUser User parameter, unused.
1135 */
1136static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysWriteProtectRAMRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
1137{
1138 int rc = VINF_SUCCESS;
1139 NOREF(pvUser); NOREF(pVCpu);
1140
1141 pgmLock(pVM);
1142#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1143 pgmPoolResetDirtyPages(pVM);
1144#endif
1145
1146 /** @todo pointless to write protect the physical page pointed to by RSP. */
1147
1148 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
1149 pRam;
1150 pRam = pRam->CTX_SUFF(pNext))
1151 {
1152 uint32_t cPages = pRam->cb >> PAGE_SHIFT;
1153 for (uint32_t iPage = 0; iPage < cPages; iPage++)
1154 {
1155 PPGMPAGE pPage = &pRam->aPages[iPage];
1156 PGMPAGETYPE enmPageType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage);
1157
1158 if ( RT_LIKELY(enmPageType == PGMPAGETYPE_RAM)
1159 || enmPageType == PGMPAGETYPE_MMIO2)
1160 {
1161 /*
1162 * A RAM page.
1163 */
1164 switch (PGM_PAGE_GET_STATE(pPage))
1165 {
1166 case PGM_PAGE_STATE_ALLOCATED:
1167 /** @todo Optimize this: Don't always re-enable write
1168 * monitoring if the page is known to be very busy. */
1169 if (PGM_PAGE_IS_WRITTEN_TO(pPage))
1170 {
1171 PGM_PAGE_CLEAR_WRITTEN_TO(pVM, pPage);
1172 /* Remember this dirty page for the next (memory) sync. */
1173 PGM_PAGE_SET_FT_DIRTY(pPage);
1174 }
1175
1176 pgmPhysPageWriteMonitor(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT));
1177 break;
1178
1179 case PGM_PAGE_STATE_SHARED:
1180 AssertFailed();
1181 break;
1182
1183 case PGM_PAGE_STATE_WRITE_MONITORED: /* nothing to change. */
1184 default:
1185 break;
1186 }
1187 }
1188 }
1189 }
1190 pgmR3PoolWriteProtectPages(pVM);
1191 PGM_INVL_ALL_VCPU_TLBS(pVM);
1192 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1193 CPUMSetChangedFlags(&pVM->aCpus[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
1194
1195 pgmUnlock(pVM);
1196 return rc;
1197}
1198
1199/**
1200 * Protect all physical RAM to monitor writes
1201 *
1202 * @returns VBox status code.
1203 * @param pVM The VM handle.
1204 */
1205VMMR3DECL(int) PGMR3PhysWriteProtectRAM(PVM pVM)
1206{
1207 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
1208
1209 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysWriteProtectRAMRendezvous, NULL);
1210 AssertRC(rc);
1211 return rc;
1212}
1213
1214/**
1215 * Enumerate all dirty FT pages.
1216 *
1217 * @returns VBox status code.
1218 * @param pVM The VM handle.
1219 * @param pfnEnum Enumerate callback handler.
1220 * @param pvUser Enumerate callback handler parameter.
1221 */
1222VMMR3DECL(int) PGMR3PhysEnumDirtyFTPages(PVM pVM, PFNPGMENUMDIRTYFTPAGES pfnEnum, void *pvUser)
1223{
1224 int rc = VINF_SUCCESS;
1225
1226 pgmLock(pVM);
1227 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
1228 pRam;
1229 pRam = pRam->CTX_SUFF(pNext))
1230 {
1231 uint32_t cPages = pRam->cb >> PAGE_SHIFT;
1232 for (uint32_t iPage = 0; iPage < cPages; iPage++)
1233 {
1234 PPGMPAGE pPage = &pRam->aPages[iPage];
1235 PGMPAGETYPE enmPageType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage);
1236
1237 if ( RT_LIKELY(enmPageType == PGMPAGETYPE_RAM)
1238 || enmPageType == PGMPAGETYPE_MMIO2)
1239 {
1240 /*
1241 * A RAM page.
1242 */
1243 switch (PGM_PAGE_GET_STATE(pPage))
1244 {
1245 case PGM_PAGE_STATE_ALLOCATED:
1246 case PGM_PAGE_STATE_WRITE_MONITORED:
1247 if ( !PGM_PAGE_IS_WRITTEN_TO(pPage) /* not very recently updated? */
1248 && PGM_PAGE_IS_FT_DIRTY(pPage))
1249 {
1250 unsigned cbPageRange = PAGE_SIZE;
1251 unsigned iPageClean = iPage + 1;
1252 RTGCPHYS GCPhysPage = pRam->GCPhys + iPage * PAGE_SIZE;
1253 uint8_t *pu8Page = NULL;
1254 PGMPAGEMAPLOCK Lock;
1255
1256 /* Find the next clean page, so we can merge adjacent dirty pages. */
1257 for (; iPageClean < cPages; iPageClean++)
1258 {
1259 PPGMPAGE pPageNext = &pRam->aPages[iPageClean];
1260 if ( RT_UNLIKELY(PGM_PAGE_GET_TYPE(pPageNext) != PGMPAGETYPE_RAM)
1261 || PGM_PAGE_GET_STATE(pPageNext) != PGM_PAGE_STATE_ALLOCATED
1262 || PGM_PAGE_IS_WRITTEN_TO(pPageNext)
1263 || !PGM_PAGE_IS_FT_DIRTY(pPageNext)
1264 /* Crossing a chunk boundary? */
1265 || (GCPhysPage & GMM_PAGEID_IDX_MASK) != ((GCPhysPage + cbPageRange) & GMM_PAGEID_IDX_MASK)
1266 )
1267 break;
1268
1269 cbPageRange += PAGE_SIZE;
1270 }
1271
1272 rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, GCPhysPage, (const void **)&pu8Page, &Lock);
1273 if (RT_SUCCESS(rc))
1274 {
1275 /** @todo this is risky; the range might be changed, but little choice as the sync
1276 * costs a lot of time. */
1277 pgmUnlock(pVM);
1278 pfnEnum(pVM, GCPhysPage, pu8Page, cbPageRange, pvUser);
1279 pgmLock(pVM);
1280 PGMPhysReleasePageMappingLock(pVM, &Lock);
1281 }
1282
1283 for (iPage; iPage < iPageClean; iPage++)
1284 PGM_PAGE_CLEAR_FT_DIRTY(&pRam->aPages[iPage]);
1285
1286 iPage = iPageClean - 1;
1287 }
1288 break;
1289 }
1290 }
1291 }
1292 }
1293 pgmUnlock(pVM);
1294 return rc;
1295}
1296
1297
1298/**
1299 * Gets the number of ram ranges.
1300 *
1301 * @returns Number of ram ranges. Returns UINT32_MAX if @a pVM is invalid.
1302 * @param pVM The VM handle.
1303 */
1304VMMR3DECL(uint32_t) PGMR3PhysGetRamRangeCount(PVM pVM)
1305{
1306 VM_ASSERT_VALID_EXT_RETURN(pVM, UINT32_MAX);
1307
1308 pgmLock(pVM);
1309 uint32_t cRamRanges = 0;
1310 for (PPGMRAMRANGE pCur = pVM->pgm.s.CTX_SUFF(pRamRangesX); pCur; pCur = pCur->CTX_SUFF(pNext))
1311 cRamRanges++;
1312 pgmUnlock(pVM);
1313 return cRamRanges;
1314}
1315
1316
1317/**
1318 * Get information about a range.
1319 *
1320 * @returns VINF_SUCCESS or VERR_OUT_OF_RANGE.
1321 * @param pVM The VM handle
1322 * @param iRange The ordinal of the range.
1323 * @param pGCPhysStart Where to return the start of the range. Optional.
1324 * @param pGCPhysLast Where to return the address of the last byte in the
1325 * range. Optional.
1326 * @param pfIsMmio Where to indicate that this is a pure MMIO range.
1327 * Optional.
1328 */
1329VMMR3DECL(int) PGMR3PhysGetRange(PVM pVM, uint32_t iRange, PRTGCPHYS pGCPhysStart, PRTGCPHYS pGCPhysLast,
1330 const char **ppszDesc, bool *pfIsMmio)
1331{
1332 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1333
1334 pgmLock(pVM);
1335 uint32_t iCurRange = 0;
1336 for (PPGMRAMRANGE pCur = pVM->pgm.s.CTX_SUFF(pRamRangesX); pCur; pCur = pCur->CTX_SUFF(pNext), iCurRange++)
1337 if (iCurRange == iRange)
1338 {
1339 if (pGCPhysStart)
1340 *pGCPhysStart = pCur->GCPhys;
1341 if (pGCPhysLast)
1342 *pGCPhysLast = pCur->GCPhysLast;
1343 if (ppszDesc)
1344 *ppszDesc = pCur->pszDesc;
1345 if (pfIsMmio)
1346 *pfIsMmio = !!(pCur->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO);
1347
1348 pgmUnlock(pVM);
1349 return VINF_SUCCESS;
1350 }
1351 pgmUnlock(pVM);
1352 return VERR_OUT_OF_RANGE;
1353}
1354
1355
1356/**
1357 * Query the amount of free memory inside VMMR0
1358 *
1359 * @returns VBox status code.
1360 * @param pVM The VM handle.
1361 * @param pcbAllocMem Where to return the amount of memory allocated
1362 * by VMs.
1363 * @param pcbFreeMem Where to return the amount of memory that is
1364 * allocated from the host but not currently used
1365 * by any VMs.
1366 * @param pcbBallonedMem Where to return the sum of memory that is
1367 * currently ballooned by the VMs.
1368 * @param pcbSharedMem Where to return the amount of memory that is
1369 * currently shared.
1370 */
1371VMMR3DECL(int) PGMR3QueryGlobalMemoryStats(PVM pVM, uint64_t *pcbAllocMem, uint64_t *pcbFreeMem,
1372 uint64_t *pcbBallonedMem, uint64_t *pcbSharedMem)
1373{
1374 uint64_t cAllocPages = 0;
1375 uint64_t cFreePages = 0;
1376 uint64_t cBalloonPages = 0;
1377 uint64_t cSharedPages = 0;
1378 int rc = GMMR3QueryHypervisorMemoryStats(pVM, &cAllocPages, &cFreePages, &cBalloonPages, &cSharedPages);
1379 AssertRCReturn(rc, rc);
1380
1381 if (pcbAllocMem)
1382 *pcbAllocMem = cAllocPages * _4K;
1383
1384 if (pcbFreeMem)
1385 *pcbFreeMem = cFreePages * _4K;
1386
1387 if (pcbBallonedMem)
1388 *pcbBallonedMem = cBalloonPages * _4K;
1389
1390 if (pcbSharedMem)
1391 *pcbSharedMem = cSharedPages * _4K;
1392
1393 Log(("PGMR3QueryVMMMemoryStats: all=%llx free=%llx ballooned=%llx shared=%llx\n",
1394 cAllocPages, cFreePages, cBalloonPages, cSharedPages));
1395 return VINF_SUCCESS;
1396}
1397
1398
1399/**
1400 * Query memory stats for the VM.
1401 *
1402 * @returns VBox status code.
1403 * @param pVM The VM handle.
1404 * @param pcbTotalMem Where to return total amount memory the VM may
1405 * possibly use.
1406 * @param pcbPrivateMem Where to return the amount of private memory
1407 * currently allocated.
1408 * @param pcbSharedMem Where to return the amount of actually shared
1409 * memory currently used by the VM.
1410 * @param pcbZeroMem Where to return the amount of memory backed by
1411 * zero pages.
1412 *
1413 * @remarks The total mem is normally larger than the sum of the three
1414 * components. There are two reasons for this, first the amount of
1415 * shared memory is what we're sure is shared instead of what could
1416 * possibly be shared with someone. Secondly, because the total may
1417 * include some pure MMIO pages that doesn't go into any of the three
1418 * sub-counts.
1419 *
1420 * @todo Why do we return reused shared pages instead of anything that could
1421 * potentially be shared? Doesn't this mean the first VM gets a much
1422 * lower number of shared pages?
1423 */
1424VMMR3DECL(int) PGMR3QueryMemoryStats(PVM pVM, uint64_t *pcbTotalMem, uint64_t *pcbPrivateMem,
1425 uint64_t *pcbSharedMem, uint64_t *pcbZeroMem)
1426{
1427 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1428
1429 if (pcbTotalMem)
1430 *pcbTotalMem = (uint64_t)pVM->pgm.s.cAllPages * PAGE_SIZE;
1431
1432 if (pcbPrivateMem)
1433 *pcbPrivateMem = (uint64_t)pVM->pgm.s.cPrivatePages * PAGE_SIZE;
1434
1435 if (pcbSharedMem)
1436 *pcbSharedMem = (uint64_t)pVM->pgm.s.cReusedSharedPages * PAGE_SIZE;
1437
1438 if (pcbZeroMem)
1439 *pcbZeroMem = (uint64_t)pVM->pgm.s.cZeroPages * PAGE_SIZE;
1440
1441 Log(("PGMR3QueryMemoryStats: all=%x private=%x reused=%x zero=%x\n", pVM->pgm.s.cAllPages, pVM->pgm.s.cPrivatePages, pVM->pgm.s.cReusedSharedPages, pVM->pgm.s.cZeroPages));
1442 return VINF_SUCCESS;
1443}
1444
1445
1446/**
1447 * PGMR3PhysRegisterRam worker that initializes and links a RAM range.
1448 *
1449 * @param pVM The VM handle.
1450 * @param pNew The new RAM range.
1451 * @param GCPhys The address of the RAM range.
1452 * @param GCPhysLast The last address of the RAM range.
1453 * @param RCPtrNew The RC address if the range is floating. NIL_RTRCPTR
1454 * if in HMA.
1455 * @param R0PtrNew Ditto for R0.
1456 * @param pszDesc The description.
1457 * @param pPrev The previous RAM range (for linking).
1458 */
1459static void pgmR3PhysInitAndLinkRamRange(PVM pVM, PPGMRAMRANGE pNew, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast,
1460 RTRCPTR RCPtrNew, RTR0PTR R0PtrNew, const char *pszDesc, PPGMRAMRANGE pPrev)
1461{
1462 /*
1463 * Initialize the range.
1464 */
1465 pNew->pSelfR0 = R0PtrNew != NIL_RTR0PTR ? R0PtrNew : MMHyperCCToR0(pVM, pNew);
1466 pNew->pSelfRC = RCPtrNew != NIL_RTRCPTR ? RCPtrNew : MMHyperCCToRC(pVM, pNew);
1467 pNew->GCPhys = GCPhys;
1468 pNew->GCPhysLast = GCPhysLast;
1469 pNew->cb = GCPhysLast - GCPhys + 1;
1470 pNew->pszDesc = pszDesc;
1471 pNew->fFlags = RCPtrNew != NIL_RTRCPTR ? PGM_RAM_RANGE_FLAGS_FLOATING : 0;
1472 pNew->pvR3 = NULL;
1473 pNew->paLSPages = NULL;
1474
1475 uint32_t const cPages = pNew->cb >> PAGE_SHIFT;
1476 RTGCPHYS iPage = cPages;
1477 while (iPage-- > 0)
1478 PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_RAM);
1479
1480 /* Update the page count stats. */
1481 pVM->pgm.s.cZeroPages += cPages;
1482 pVM->pgm.s.cAllPages += cPages;
1483
1484 /*
1485 * Link it.
1486 */
1487 pgmR3PhysLinkRamRange(pVM, pNew, pPrev);
1488}
1489
1490
1491/**
1492 * Relocate a floating RAM range.
1493 *
1494 * @copydoc FNPGMRELOCATE.
1495 */
1496static DECLCALLBACK(bool) pgmR3PhysRamRangeRelocate(PVM pVM, RTGCPTR GCPtrOld, RTGCPTR GCPtrNew, PGMRELOCATECALL enmMode, void *pvUser)
1497{
1498 PPGMRAMRANGE pRam = (PPGMRAMRANGE)pvUser;
1499 Assert(pRam->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING);
1500 Assert(pRam->pSelfRC == GCPtrOld + PAGE_SIZE);
1501
1502 switch (enmMode)
1503 {
1504 case PGMRELOCATECALL_SUGGEST:
1505 return true;
1506
1507 case PGMRELOCATECALL_RELOCATE:
1508 {
1509 /*
1510 * Update myself, then relink all the ranges and flush the RC TLB.
1511 */
1512 pgmLock(pVM);
1513
1514 pRam->pSelfRC = (RTRCPTR)(GCPtrNew + PAGE_SIZE);
1515
1516 pgmR3PhysRelinkRamRanges(pVM);
1517 for (unsigned i = 0; i < PGM_RAMRANGE_TLB_ENTRIES; i++)
1518 pVM->pgm.s.apRamRangesTlbRC[i] = NIL_RTRCPTR;
1519
1520 pgmUnlock(pVM);
1521 return true;
1522 }
1523
1524 default:
1525 AssertFailedReturn(false);
1526 }
1527}
1528
1529
1530/**
1531 * PGMR3PhysRegisterRam worker that registers a high chunk.
1532 *
1533 * @returns VBox status code.
1534 * @param pVM The VM handle.
1535 * @param GCPhys The address of the RAM.
1536 * @param cRamPages The number of RAM pages to register.
1537 * @param cbChunk The size of the PGMRAMRANGE guest mapping.
1538 * @param iChunk The chunk number.
1539 * @param pszDesc The RAM range description.
1540 * @param ppPrev Previous RAM range pointer. In/Out.
1541 */
1542static int pgmR3PhysRegisterHighRamChunk(PVM pVM, RTGCPHYS GCPhys, uint32_t cRamPages,
1543 uint32_t cbChunk, uint32_t iChunk, const char *pszDesc,
1544 PPGMRAMRANGE *ppPrev)
1545{
1546 const char *pszDescChunk = iChunk == 0
1547 ? pszDesc
1548 : MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s (#%u)", pszDesc, iChunk + 1);
1549 AssertReturn(pszDescChunk, VERR_NO_MEMORY);
1550
1551 /*
1552 * Allocate memory for the new chunk.
1553 */
1554 size_t const cChunkPages = RT_ALIGN_Z(RT_UOFFSETOF(PGMRAMRANGE, aPages[cRamPages]), PAGE_SIZE) >> PAGE_SHIFT;
1555 PSUPPAGE paChunkPages = (PSUPPAGE)RTMemTmpAllocZ(sizeof(SUPPAGE) * cChunkPages);
1556 AssertReturn(paChunkPages, VERR_NO_TMP_MEMORY);
1557 RTR0PTR R0PtrChunk = NIL_RTR0PTR;
1558 void *pvChunk = NULL;
1559 int rc = SUPR3PageAllocEx(cChunkPages, 0 /*fFlags*/, &pvChunk,
1560#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
1561 VMMIsHwVirtExtForced(pVM) ? &R0PtrChunk : NULL,
1562#else
1563 NULL,
1564#endif
1565 paChunkPages);
1566 if (RT_SUCCESS(rc))
1567 {
1568#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
1569 if (!VMMIsHwVirtExtForced(pVM))
1570 R0PtrChunk = NIL_RTR0PTR;
1571#else
1572 R0PtrChunk = (uintptr_t)pvChunk;
1573#endif
1574 memset(pvChunk, 0, cChunkPages << PAGE_SHIFT);
1575
1576 PPGMRAMRANGE pNew = (PPGMRAMRANGE)pvChunk;
1577
1578 /*
1579 * Create a mapping and map the pages into it.
1580 * We push these in below the HMA.
1581 */
1582 RTGCPTR GCPtrChunkMap = pVM->pgm.s.GCPtrPrevRamRangeMapping - cbChunk;
1583 rc = PGMR3MapPT(pVM, GCPtrChunkMap, cbChunk, 0 /*fFlags*/, pgmR3PhysRamRangeRelocate, pNew, pszDescChunk);
1584 if (RT_SUCCESS(rc))
1585 {
1586 pVM->pgm.s.GCPtrPrevRamRangeMapping = GCPtrChunkMap;
1587
1588 RTGCPTR const GCPtrChunk = GCPtrChunkMap + PAGE_SIZE;
1589 RTGCPTR GCPtrPage = GCPtrChunk;
1590 for (uint32_t iPage = 0; iPage < cChunkPages && RT_SUCCESS(rc); iPage++, GCPtrPage += PAGE_SIZE)
1591 rc = PGMMap(pVM, GCPtrPage, paChunkPages[iPage].Phys, PAGE_SIZE, 0);
1592 if (RT_SUCCESS(rc))
1593 {
1594 /*
1595 * Ok, init and link the range.
1596 */
1597 pgmR3PhysInitAndLinkRamRange(pVM, pNew, GCPhys, GCPhys + ((RTGCPHYS)cRamPages << PAGE_SHIFT) - 1,
1598 (RTRCPTR)GCPtrChunk, R0PtrChunk, pszDescChunk, *ppPrev);
1599 *ppPrev = pNew;
1600 }
1601 }
1602
1603 if (RT_FAILURE(rc))
1604 SUPR3PageFreeEx(pvChunk, cChunkPages);
1605 }
1606
1607 RTMemTmpFree(paChunkPages);
1608 return rc;
1609}
1610
1611
1612/**
1613 * Sets up a range RAM.
1614 *
1615 * This will check for conflicting registrations, make a resource
1616 * reservation for the memory (with GMM), and setup the per-page
1617 * tracking structures (PGMPAGE).
1618 *
1619 * @returns VBox status code.
1620 * @param pVM The VM handle.
1621 * @param GCPhys The physical address of the RAM.
1622 * @param cb The size of the RAM.
1623 * @param pszDesc The description - not copied, so, don't free or change it.
1624 */
1625VMMR3DECL(int) PGMR3PhysRegisterRam(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, const char *pszDesc)
1626{
1627 /*
1628 * Validate input.
1629 */
1630 Log(("PGMR3PhysRegisterRam: GCPhys=%RGp cb=%RGp pszDesc=%s\n", GCPhys, cb, pszDesc));
1631 AssertReturn(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER);
1632 AssertReturn(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER);
1633 AssertReturn(cb > 0, VERR_INVALID_PARAMETER);
1634 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
1635 AssertMsgReturn(GCPhysLast > GCPhys, ("The range wraps! GCPhys=%RGp cb=%RGp\n", GCPhys, cb), VERR_INVALID_PARAMETER);
1636 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
1637 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
1638
1639 pgmLock(pVM);
1640
1641 /*
1642 * Find range location and check for conflicts.
1643 * (We don't lock here because the locking by EMT is only required on update.)
1644 */
1645 PPGMRAMRANGE pPrev = NULL;
1646 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
1647 while (pRam && GCPhysLast >= pRam->GCPhys)
1648 {
1649 if ( GCPhysLast >= pRam->GCPhys
1650 && GCPhys <= pRam->GCPhysLast)
1651 AssertLogRelMsgFailedReturn(("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n",
1652 GCPhys, GCPhysLast, pszDesc,
1653 pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc),
1654 VERR_PGM_RAM_CONFLICT);
1655
1656 /* next */
1657 pPrev = pRam;
1658 pRam = pRam->pNextR3;
1659 }
1660
1661 /*
1662 * Register it with GMM (the API bitches).
1663 */
1664 const RTGCPHYS cPages = cb >> PAGE_SHIFT;
1665 int rc = MMR3IncreaseBaseReservation(pVM, cPages);
1666 if (RT_FAILURE(rc))
1667 {
1668 pgmUnlock(pVM);
1669 return rc;
1670 }
1671
1672 if ( GCPhys >= _4G
1673 && cPages > 256)
1674 {
1675 /*
1676 * The PGMRAMRANGE structures for the high memory can get very big.
1677 * In order to avoid SUPR3PageAllocEx allocation failures due to the
1678 * allocation size limit there and also to avoid being unable to find
1679 * guest mapping space for them, we split this memory up into 4MB in
1680 * (potential) raw-mode configs and 16MB chunks in forced AMD-V/VT-x
1681 * mode.
1682 *
1683 * The first and last page of each mapping are guard pages and marked
1684 * not-present. So, we've got 4186112 and 16769024 bytes available for
1685 * the PGMRAMRANGE structure.
1686 *
1687 * Note! The sizes used here will influence the saved state.
1688 */
1689 uint32_t cbChunk;
1690 uint32_t cPagesPerChunk;
1691 if (VMMIsHwVirtExtForced(pVM))
1692 {
1693 cbChunk = 16U*_1M;
1694 cPagesPerChunk = 1048048; /* max ~1048059 */
1695 AssertCompile(sizeof(PGMRAMRANGE) + sizeof(PGMPAGE) * 1048048 < 16U*_1M - PAGE_SIZE * 2);
1696 }
1697 else
1698 {
1699 cbChunk = 4U*_1M;
1700 cPagesPerChunk = 261616; /* max ~261627 */
1701 AssertCompile(sizeof(PGMRAMRANGE) + sizeof(PGMPAGE) * 261616 < 4U*_1M - PAGE_SIZE * 2);
1702 }
1703 AssertRelease(RT_UOFFSETOF(PGMRAMRANGE, aPages[cPagesPerChunk]) + PAGE_SIZE * 2 <= cbChunk);
1704
1705 RTGCPHYS cPagesLeft = cPages;
1706 RTGCPHYS GCPhysChunk = GCPhys;
1707 uint32_t iChunk = 0;
1708 while (cPagesLeft > 0)
1709 {
1710 uint32_t cPagesInChunk = cPagesLeft;
1711 if (cPagesInChunk > cPagesPerChunk)
1712 cPagesInChunk = cPagesPerChunk;
1713
1714 rc = pgmR3PhysRegisterHighRamChunk(pVM, GCPhysChunk, cPagesInChunk, cbChunk, iChunk, pszDesc, &pPrev);
1715 AssertRCReturn(rc, rc);
1716
1717 /* advance */
1718 GCPhysChunk += (RTGCPHYS)cPagesInChunk << PAGE_SHIFT;
1719 cPagesLeft -= cPagesInChunk;
1720 iChunk++;
1721 }
1722 }
1723 else
1724 {
1725 /*
1726 * Allocate, initialize and link the new RAM range.
1727 */
1728 const size_t cbRamRange = RT_OFFSETOF(PGMRAMRANGE, aPages[cPages]);
1729 PPGMRAMRANGE pNew;
1730 rc = MMR3HyperAllocOnceNoRel(pVM, cbRamRange, 0, MM_TAG_PGM_PHYS, (void **)&pNew);
1731 AssertLogRelMsgRCReturn(rc, ("cbRamRange=%zu\n", cbRamRange), rc);
1732
1733 pgmR3PhysInitAndLinkRamRange(pVM, pNew, GCPhys, GCPhysLast, NIL_RTRCPTR, NIL_RTR0PTR, pszDesc, pPrev);
1734 }
1735 pgmPhysInvalidatePageMapTLB(pVM);
1736 pgmUnlock(pVM);
1737
1738#ifdef VBOX_WITH_REM
1739 /*
1740 * Notify REM.
1741 */
1742 REMR3NotifyPhysRamRegister(pVM, GCPhys, cb, REM_NOTIFY_PHYS_RAM_FLAGS_RAM);
1743#endif
1744
1745 return VINF_SUCCESS;
1746}
1747
1748
1749/**
1750 * Worker called by PGMR3InitFinalize if we're configured to pre-allocate RAM.
1751 *
1752 * We do this late in the init process so that all the ROM and MMIO ranges have
1753 * been registered already and we don't go wasting memory on them.
1754 *
1755 * @returns VBox status code.
1756 *
1757 * @param pVM The VM handle.
1758 */
1759int pgmR3PhysRamPreAllocate(PVM pVM)
1760{
1761 Assert(pVM->pgm.s.fRamPreAlloc);
1762 Log(("pgmR3PhysRamPreAllocate: enter\n"));
1763
1764 /*
1765 * Walk the RAM ranges and allocate all RAM pages, halt at
1766 * the first allocation error.
1767 */
1768 uint64_t cPages = 0;
1769 uint64_t NanoTS = RTTimeNanoTS();
1770 pgmLock(pVM);
1771 for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3)
1772 {
1773 PPGMPAGE pPage = &pRam->aPages[0];
1774 RTGCPHYS GCPhys = pRam->GCPhys;
1775 uint32_t cLeft = pRam->cb >> PAGE_SHIFT;
1776 while (cLeft-- > 0)
1777 {
1778 if (PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1779 {
1780 switch (PGM_PAGE_GET_STATE(pPage))
1781 {
1782 case PGM_PAGE_STATE_ZERO:
1783 {
1784 int rc = pgmPhysAllocPage(pVM, pPage, GCPhys);
1785 if (RT_FAILURE(rc))
1786 {
1787 LogRel(("PGM: RAM Pre-allocation failed at %RGp (in %s) with rc=%Rrc\n", GCPhys, pRam->pszDesc, rc));
1788 pgmUnlock(pVM);
1789 return rc;
1790 }
1791 cPages++;
1792 break;
1793 }
1794
1795 case PGM_PAGE_STATE_BALLOONED:
1796 case PGM_PAGE_STATE_ALLOCATED:
1797 case PGM_PAGE_STATE_WRITE_MONITORED:
1798 case PGM_PAGE_STATE_SHARED:
1799 /* nothing to do here. */
1800 break;
1801 }
1802 }
1803
1804 /* next */
1805 pPage++;
1806 GCPhys += PAGE_SIZE;
1807 }
1808 }
1809 pgmUnlock(pVM);
1810 NanoTS = RTTimeNanoTS() - NanoTS;
1811
1812 LogRel(("PGM: Pre-allocated %llu pages in %llu ms\n", cPages, NanoTS / 1000000));
1813 Log(("pgmR3PhysRamPreAllocate: returns VINF_SUCCESS\n"));
1814 return VINF_SUCCESS;
1815}
1816
1817
1818/**
1819 * Resets (zeros) the RAM.
1820 *
1821 * ASSUMES that the caller owns the PGM lock.
1822 *
1823 * @returns VBox status code.
1824 * @param pVM The VM handle.
1825 */
1826int pgmR3PhysRamReset(PVM pVM)
1827{
1828 PGM_LOCK_ASSERT_OWNER(pVM);
1829
1830 /* Reset the memory balloon. */
1831 int rc = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0);
1832 AssertRC(rc);
1833
1834#ifdef VBOX_WITH_PAGE_SHARING
1835 /* Clear all registered shared modules. */
1836 rc = GMMR3ResetSharedModules(pVM);
1837 AssertRC(rc);
1838#endif
1839 /* Reset counters. */
1840 pVM->pgm.s.cReusedSharedPages = 0;
1841 pVM->pgm.s.cBalloonedPages = 0;
1842
1843 /*
1844 * We batch up pages that should be freed instead of calling GMM for
1845 * each and every one of them.
1846 */
1847 uint32_t cPendingPages = 0;
1848 PGMMFREEPAGESREQ pReq;
1849 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
1850 AssertLogRelRCReturn(rc, rc);
1851
1852 /*
1853 * Walk the ram ranges.
1854 */
1855 for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3)
1856 {
1857 uint32_t iPage = pRam->cb >> PAGE_SHIFT;
1858 AssertMsg(((RTGCPHYS)iPage << PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << PAGE_SHIFT, pRam->cb));
1859
1860#ifndef NO_RAM_RESET
1861 if (!pVM->pgm.s.fRamPreAlloc)
1862#else
1863 if (0)
1864#endif
1865 {
1866 /* Replace all RAM pages by ZERO pages. */
1867 while (iPage-- > 0)
1868 {
1869 PPGMPAGE pPage = &pRam->aPages[iPage];
1870 switch (PGM_PAGE_GET_TYPE(pPage))
1871 {
1872 case PGMPAGETYPE_RAM:
1873 /* Do not replace pages part of a 2 MB continuous range
1874 with zero pages, but zero them instead. */
1875 if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE
1876 || PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
1877 {
1878 void *pvPage;
1879 rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pvPage);
1880 AssertLogRelRCReturn(rc, rc);
1881 ASMMemZeroPage(pvPage);
1882 }
1883 else if (PGM_PAGE_IS_BALLOONED(pPage))
1884 {
1885 /* Turn into a zero page; the balloon status is lost when the VM reboots. */
1886 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
1887 }
1888 else if (!PGM_PAGE_IS_ZERO(pPage))
1889 {
1890 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT));
1891 AssertLogRelRCReturn(rc, rc);
1892 }
1893 break;
1894
1895 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
1896 pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT),
1897 true /*fDoAccounting*/);
1898 break;
1899
1900 case PGMPAGETYPE_MMIO2:
1901 case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */
1902 case PGMPAGETYPE_ROM:
1903 case PGMPAGETYPE_MMIO:
1904 break;
1905 default:
1906 AssertFailed();
1907 }
1908 } /* for each page */
1909 }
1910 else
1911 {
1912 /* Zero the memory. */
1913 while (iPage-- > 0)
1914 {
1915 PPGMPAGE pPage = &pRam->aPages[iPage];
1916 switch (PGM_PAGE_GET_TYPE(pPage))
1917 {
1918 case PGMPAGETYPE_RAM:
1919 switch (PGM_PAGE_GET_STATE(pPage))
1920 {
1921 case PGM_PAGE_STATE_ZERO:
1922 break;
1923
1924 case PGM_PAGE_STATE_BALLOONED:
1925 /* Turn into a zero page; the balloon status is lost when the VM reboots. */
1926 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
1927 break;
1928
1929 case PGM_PAGE_STATE_SHARED:
1930 case PGM_PAGE_STATE_WRITE_MONITORED:
1931 rc = pgmPhysPageMakeWritable(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT));
1932 AssertLogRelRCReturn(rc, rc);
1933 /* no break */
1934
1935 case PGM_PAGE_STATE_ALLOCATED:
1936 {
1937 void *pvPage;
1938 rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pvPage);
1939 AssertLogRelRCReturn(rc, rc);
1940#ifndef NO_RAM_RESET
1941 ASMMemZeroPage(pvPage);
1942#endif
1943 break;
1944 }
1945 }
1946 break;
1947
1948 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
1949 pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT),
1950 true /*fDoAccounting*/);
1951 break;
1952
1953 case PGMPAGETYPE_MMIO2:
1954 case PGMPAGETYPE_ROM_SHADOW:
1955 case PGMPAGETYPE_ROM:
1956 case PGMPAGETYPE_MMIO:
1957 break;
1958 default:
1959 AssertFailed();
1960
1961 }
1962 } /* for each page */
1963 }
1964
1965 }
1966
1967 /*
1968 * Finish off any pages pending freeing.
1969 */
1970 if (cPendingPages)
1971 {
1972 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
1973 AssertLogRelRCReturn(rc, rc);
1974 }
1975 GMMR3FreePagesCleanup(pReq);
1976
1977 return VINF_SUCCESS;
1978}
1979
1980/**
1981 * Frees all RAM during VM termination
1982 *
1983 * ASSUMES that the caller owns the PGM lock.
1984 *
1985 * @returns VBox status code.
1986 * @param pVM The VM handle.
1987 */
1988int pgmR3PhysRamTerm(PVM pVM)
1989{
1990 PGM_LOCK_ASSERT_OWNER(pVM);
1991
1992 /* Reset the memory balloon. */
1993 int rc = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0);
1994 AssertRC(rc);
1995
1996#ifdef VBOX_WITH_PAGE_SHARING
1997 /* Clear all registered shared modules. */
1998 rc = GMMR3ResetSharedModules(pVM);
1999 AssertRC(rc);
2000#endif
2001
2002 /*
2003 * We batch up pages that should be freed instead of calling GMM for
2004 * each and every one of them.
2005 */
2006 uint32_t cPendingPages = 0;
2007 PGMMFREEPAGESREQ pReq;
2008 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
2009 AssertLogRelRCReturn(rc, rc);
2010
2011 /*
2012 * Walk the ram ranges.
2013 */
2014 for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3)
2015 {
2016 uint32_t iPage = pRam->cb >> PAGE_SHIFT;
2017 AssertMsg(((RTGCPHYS)iPage << PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << PAGE_SHIFT, pRam->cb));
2018
2019 /* Replace all RAM pages by ZERO pages. */
2020 while (iPage-- > 0)
2021 {
2022 PPGMPAGE pPage = &pRam->aPages[iPage];
2023 switch (PGM_PAGE_GET_TYPE(pPage))
2024 {
2025 case PGMPAGETYPE_RAM:
2026 /* Free all shared pages. Private pages are automatically freed during GMM VM cleanup. */
2027 if (PGM_PAGE_IS_SHARED(pPage))
2028 {
2029 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT));
2030 AssertLogRelRCReturn(rc, rc);
2031 }
2032 break;
2033
2034 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
2035 case PGMPAGETYPE_MMIO2:
2036 case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */
2037 case PGMPAGETYPE_ROM:
2038 case PGMPAGETYPE_MMIO:
2039 break;
2040 default:
2041 AssertFailed();
2042 }
2043 } /* for each page */
2044 }
2045
2046 /*
2047 * Finish off any pages pending freeing.
2048 */
2049 if (cPendingPages)
2050 {
2051 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
2052 AssertLogRelRCReturn(rc, rc);
2053 }
2054 GMMR3FreePagesCleanup(pReq);
2055 return VINF_SUCCESS;
2056}
2057
2058/**
2059 * This is the interface IOM is using to register an MMIO region.
2060 *
2061 * It will check for conflicts and ensure that a RAM range structure
2062 * is present before calling the PGMR3HandlerPhysicalRegister API to
2063 * register the callbacks.
2064 *
2065 * @returns VBox status code.
2066 *
2067 * @param pVM The VM handle.
2068 * @param GCPhys The start of the MMIO region.
2069 * @param cb The size of the MMIO region.
2070 * @param pfnHandlerR3 The address of the ring-3 handler. (IOMR3MMIOHandler)
2071 * @param pvUserR3 The user argument for R3.
2072 * @param pfnHandlerR0 The address of the ring-0 handler. (IOMMMIOHandler)
2073 * @param pvUserR0 The user argument for R0.
2074 * @param pfnHandlerRC The address of the RC handler. (IOMMMIOHandler)
2075 * @param pvUserRC The user argument for RC.
2076 * @param pszDesc The description of the MMIO region.
2077 */
2078VMMR3DECL(int) PGMR3PhysMMIORegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb,
2079 R3PTRTYPE(PFNPGMR3PHYSHANDLER) pfnHandlerR3, RTR3PTR pvUserR3,
2080 R0PTRTYPE(PFNPGMR0PHYSHANDLER) pfnHandlerR0, RTR0PTR pvUserR0,
2081 RCPTRTYPE(PFNPGMRCPHYSHANDLER) pfnHandlerRC, RTRCPTR pvUserRC,
2082 R3PTRTYPE(const char *) pszDesc)
2083{
2084 /*
2085 * Assert on some assumption.
2086 */
2087 VM_ASSERT_EMT(pVM);
2088 AssertReturn(!(cb & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2089 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2090 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
2091 AssertReturn(*pszDesc, VERR_INVALID_PARAMETER);
2092
2093 int rc = pgmLock(pVM);
2094 AssertRCReturn(rc, rc);
2095
2096 /*
2097 * Make sure there's a RAM range structure for the region.
2098 */
2099 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
2100 bool fRamExists = false;
2101 PPGMRAMRANGE pRamPrev = NULL;
2102 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
2103 while (pRam && GCPhysLast >= pRam->GCPhys)
2104 {
2105 if ( GCPhysLast >= pRam->GCPhys
2106 && GCPhys <= pRam->GCPhysLast)
2107 {
2108 /* Simplification: all within the same range. */
2109 AssertLogRelMsgReturnStmt( GCPhys >= pRam->GCPhys
2110 && GCPhysLast <= pRam->GCPhysLast,
2111 ("%RGp-%RGp (MMIO/%s) falls partly outside %RGp-%RGp (%s)\n",
2112 GCPhys, GCPhysLast, pszDesc,
2113 pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc),
2114 pgmUnlock(pVM),
2115 VERR_PGM_RAM_CONFLICT);
2116
2117 /* Check that it's all RAM or MMIO pages. */
2118 PCPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
2119 uint32_t cLeft = cb >> PAGE_SHIFT;
2120 while (cLeft-- > 0)
2121 {
2122 AssertLogRelMsgReturnStmt( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2123 || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO,
2124 ("%RGp-%RGp (MMIO/%s): %RGp is not a RAM or MMIO page - type=%d desc=%s\n",
2125 GCPhys, GCPhysLast, pszDesc, PGM_PAGE_GET_TYPE(pPage), pRam->pszDesc),
2126 pgmUnlock(pVM),
2127 VERR_PGM_RAM_CONFLICT);
2128 pPage++;
2129 }
2130
2131 /* Looks good. */
2132 fRamExists = true;
2133 break;
2134 }
2135
2136 /* next */
2137 pRamPrev = pRam;
2138 pRam = pRam->pNextR3;
2139 }
2140 PPGMRAMRANGE pNew;
2141 if (fRamExists)
2142 {
2143 pNew = NULL;
2144
2145 /*
2146 * Make all the pages in the range MMIO/ZERO pages, freeing any
2147 * RAM pages currently mapped here. This might not be 100% correct
2148 * for PCI memory, but we're doing the same thing for MMIO2 pages.
2149 */
2150 rc = pgmR3PhysFreePageRange(pVM, pRam, GCPhys, GCPhysLast, PGMPAGETYPE_MMIO);
2151 AssertRCReturnStmt(rc, pgmUnlock(pVM), rc);
2152
2153 /* Force a PGM pool flush as guest ram references have been changed. */
2154 /** @todo not entirely SMP safe; assuming for now the guest takes
2155 * care of this internally (not touch mapped mmio while changing the
2156 * mapping). */
2157 PVMCPU pVCpu = VMMGetCpu(pVM);
2158 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2159 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2160 }
2161 else
2162 {
2163
2164 /*
2165 * No RAM range, insert an ad hoc one.
2166 *
2167 * Note that we don't have to tell REM about this range because
2168 * PGMHandlerPhysicalRegisterEx will do that for us.
2169 */
2170 Log(("PGMR3PhysMMIORegister: Adding ad hoc MMIO range for %RGp-%RGp %s\n", GCPhys, GCPhysLast, pszDesc));
2171
2172 const uint32_t cPages = cb >> PAGE_SHIFT;
2173 const size_t cbRamRange = RT_OFFSETOF(PGMRAMRANGE, aPages[cPages]);
2174 rc = MMHyperAlloc(pVM, RT_OFFSETOF(PGMRAMRANGE, aPages[cPages]), 16, MM_TAG_PGM_PHYS, (void **)&pNew);
2175 AssertLogRelMsgRCReturnStmt(rc, ("cbRamRange=%zu\n", cbRamRange), pgmUnlock(pVM), rc);
2176
2177 /* Initialize the range. */
2178 pNew->pSelfR0 = MMHyperCCToR0(pVM, pNew);
2179 pNew->pSelfRC = MMHyperCCToRC(pVM, pNew);
2180 pNew->GCPhys = GCPhys;
2181 pNew->GCPhysLast = GCPhysLast;
2182 pNew->cb = cb;
2183 pNew->pszDesc = pszDesc;
2184 pNew->fFlags = PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO;
2185 pNew->pvR3 = NULL;
2186 pNew->paLSPages = NULL;
2187
2188 uint32_t iPage = cPages;
2189 while (iPage-- > 0)
2190 PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_MMIO);
2191 Assert(PGM_PAGE_GET_TYPE(&pNew->aPages[0]) == PGMPAGETYPE_MMIO);
2192
2193 /* update the page count stats. */
2194 pVM->pgm.s.cPureMmioPages += cPages;
2195 pVM->pgm.s.cAllPages += cPages;
2196
2197 /* link it */
2198 pgmR3PhysLinkRamRange(pVM, pNew, pRamPrev);
2199 }
2200
2201 /*
2202 * Register the access handler.
2203 */
2204 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_MMIO, GCPhys, GCPhysLast,
2205 pfnHandlerR3, pvUserR3,
2206 pfnHandlerR0, pvUserR0,
2207 pfnHandlerRC, pvUserRC, pszDesc);
2208 if ( RT_FAILURE(rc)
2209 && !fRamExists)
2210 {
2211 pVM->pgm.s.cPureMmioPages -= cb >> PAGE_SHIFT;
2212 pVM->pgm.s.cAllPages -= cb >> PAGE_SHIFT;
2213
2214 /* remove the ad hoc range. */
2215 pgmR3PhysUnlinkRamRange2(pVM, pNew, pRamPrev);
2216 pNew->cb = pNew->GCPhys = pNew->GCPhysLast = NIL_RTGCPHYS;
2217 MMHyperFree(pVM, pRam);
2218 }
2219 pgmPhysInvalidatePageMapTLB(pVM);
2220
2221 pgmUnlock(pVM);
2222 return rc;
2223}
2224
2225
2226/**
2227 * This is the interface IOM is using to register an MMIO region.
2228 *
2229 * It will take care of calling PGMHandlerPhysicalDeregister and clean up
2230 * any ad hoc PGMRAMRANGE left behind.
2231 *
2232 * @returns VBox status code.
2233 * @param pVM The VM handle.
2234 * @param GCPhys The start of the MMIO region.
2235 * @param cb The size of the MMIO region.
2236 */
2237VMMR3DECL(int) PGMR3PhysMMIODeregister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb)
2238{
2239 VM_ASSERT_EMT(pVM);
2240
2241 int rc = pgmLock(pVM);
2242 AssertRCReturn(rc, rc);
2243
2244 /*
2245 * First deregister the handler, then check if we should remove the ram range.
2246 */
2247 rc = PGMHandlerPhysicalDeregister(pVM, GCPhys);
2248 if (RT_SUCCESS(rc))
2249 {
2250 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
2251 PPGMRAMRANGE pRamPrev = NULL;
2252 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
2253 while (pRam && GCPhysLast >= pRam->GCPhys)
2254 {
2255 /** @todo We're being a bit too careful here. rewrite. */
2256 if ( GCPhysLast == pRam->GCPhysLast
2257 && GCPhys == pRam->GCPhys)
2258 {
2259 Assert(pRam->cb == cb);
2260
2261 /*
2262 * See if all the pages are dead MMIO pages.
2263 */
2264 uint32_t const cPages = cb >> PAGE_SHIFT;
2265 bool fAllMMIO = true;
2266 uint32_t iPage = 0;
2267 uint32_t cLeft = cPages;
2268 while (cLeft-- > 0)
2269 {
2270 PPGMPAGE pPage = &pRam->aPages[iPage];
2271 if ( PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_MMIO
2272 /*|| not-out-of-action later */)
2273 {
2274 fAllMMIO = false;
2275 Assert(PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_MMIO2_ALIAS_MMIO);
2276 AssertMsgFailed(("%RGp %R[pgmpage]\n", pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), pPage));
2277 break;
2278 }
2279 Assert(PGM_PAGE_IS_ZERO(pPage));
2280 pPage++;
2281 }
2282 if (fAllMMIO)
2283 {
2284 /*
2285 * Ad-hoc range, unlink and free it.
2286 */
2287 Log(("PGMR3PhysMMIODeregister: Freeing ad hoc MMIO range for %RGp-%RGp %s\n",
2288 GCPhys, GCPhysLast, pRam->pszDesc));
2289
2290 pVM->pgm.s.cAllPages -= cPages;
2291 pVM->pgm.s.cPureMmioPages -= cPages;
2292
2293 pgmR3PhysUnlinkRamRange2(pVM, pRam, pRamPrev);
2294 pRam->cb = pRam->GCPhys = pRam->GCPhysLast = NIL_RTGCPHYS;
2295 MMHyperFree(pVM, pRam);
2296 break;
2297 }
2298 }
2299
2300 /*
2301 * Range match? It will all be within one range (see PGMAllHandler.cpp).
2302 */
2303 if ( GCPhysLast >= pRam->GCPhys
2304 && GCPhys <= pRam->GCPhysLast)
2305 {
2306 Assert(GCPhys >= pRam->GCPhys);
2307 Assert(GCPhysLast <= pRam->GCPhysLast);
2308
2309 /*
2310 * Turn the pages back into RAM pages.
2311 */
2312 uint32_t iPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2313 uint32_t cLeft = cb >> PAGE_SHIFT;
2314 while (cLeft--)
2315 {
2316 PPGMPAGE pPage = &pRam->aPages[iPage];
2317 AssertMsg(PGM_PAGE_IS_MMIO(pPage), ("%RGp %R[pgmpage]\n", pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), pPage));
2318 AssertMsg(PGM_PAGE_IS_ZERO(pPage), ("%RGp %R[pgmpage]\n", pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), pPage));
2319 if (PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO)
2320 PGM_PAGE_SET_TYPE(pVM, pPage, PGMPAGETYPE_RAM);
2321 }
2322 break;
2323 }
2324
2325 /* next */
2326 pRamPrev = pRam;
2327 pRam = pRam->pNextR3;
2328 }
2329 }
2330
2331 /* Force a PGM pool flush as guest ram references have been changed. */
2332 /** todo; not entirely SMP safe; assuming for now the guest takes care of this internally (not touch mapped mmio while changing the mapping). */
2333 PVMCPU pVCpu = VMMGetCpu(pVM);
2334 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2335 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2336
2337 pgmPhysInvalidatePageMapTLB(pVM);
2338 pgmPhysInvalidRamRangeTlbs(pVM);
2339 pgmUnlock(pVM);
2340 return rc;
2341}
2342
2343
2344/**
2345 * Locate a MMIO2 range.
2346 *
2347 * @returns Pointer to the MMIO2 range.
2348 * @param pVM The VM handle.
2349 * @param pDevIns The device instance owning the region.
2350 * @param iRegion The region.
2351 */
2352DECLINLINE(PPGMMMIO2RANGE) pgmR3PhysMMIO2Find(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion)
2353{
2354 /*
2355 * Search the list.
2356 */
2357 for (PPGMMMIO2RANGE pCur = pVM->pgm.s.pMmio2RangesR3; pCur; pCur = pCur->pNextR3)
2358 if ( pCur->pDevInsR3 == pDevIns
2359 && pCur->iRegion == iRegion)
2360 return pCur;
2361 return NULL;
2362}
2363
2364
2365/**
2366 * Allocate and register an MMIO2 region.
2367 *
2368 * As mentioned elsewhere, MMIO2 is just RAM spelled differently. It's RAM
2369 * associated with a device. It is also non-shared memory with a permanent
2370 * ring-3 mapping and page backing (presently).
2371 *
2372 * A MMIO2 range may overlap with base memory if a lot of RAM is configured for
2373 * the VM, in which case we'll drop the base memory pages. Presently we will
2374 * make no attempt to preserve anything that happens to be present in the base
2375 * memory that is replaced, this is of course incorrectly but it's too much
2376 * effort.
2377 *
2378 * @returns VBox status code.
2379 * @retval VINF_SUCCESS on success, *ppv pointing to the R3 mapping of the
2380 * memory.
2381 * @retval VERR_ALREADY_EXISTS if the region already exists.
2382 *
2383 * @param pVM The VM handle.
2384 * @param pDevIns The device instance owning the region.
2385 * @param iRegion The region number. If the MMIO2 memory is a PCI
2386 * I/O region this number has to be the number of that
2387 * region. Otherwise it can be any number safe
2388 * UINT8_MAX.
2389 * @param cb The size of the region. Must be page aligned.
2390 * @param fFlags Reserved for future use, must be zero.
2391 * @param ppv Where to store the pointer to the ring-3 mapping of
2392 * the memory.
2393 * @param pszDesc The description.
2394 */
2395VMMR3DECL(int) PGMR3PhysMMIO2Register(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion, RTGCPHYS cb, uint32_t fFlags, void **ppv, const char *pszDesc)
2396{
2397 /*
2398 * Validate input.
2399 */
2400 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2401 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2402 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
2403 AssertPtrReturn(ppv, VERR_INVALID_POINTER);
2404 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
2405 AssertReturn(*pszDesc, VERR_INVALID_PARAMETER);
2406 AssertReturn(pgmR3PhysMMIO2Find(pVM, pDevIns, iRegion) == NULL, VERR_ALREADY_EXISTS);
2407 AssertReturn(!(cb & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2408 AssertReturn(cb, VERR_INVALID_PARAMETER);
2409 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
2410
2411 const uint32_t cPages = cb >> PAGE_SHIFT;
2412 AssertLogRelReturn(((RTGCPHYS)cPages << PAGE_SHIFT) == cb, VERR_INVALID_PARAMETER);
2413 AssertLogRelReturn(cPages <= INT32_MAX / 2, VERR_NO_MEMORY);
2414
2415 /*
2416 * For the 2nd+ instance, mangle the description string so it's unique.
2417 */
2418 if (pDevIns->iInstance > 0) /** @todo Move to PDMDevHlp.cpp and use a real string cache. */
2419 {
2420 pszDesc = MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s [%u]", pszDesc, pDevIns->iInstance);
2421 if (!pszDesc)
2422 return VERR_NO_MEMORY;
2423 }
2424
2425 /*
2426 * Try reserve and allocate the backing memory first as this is what is
2427 * most likely to fail.
2428 */
2429 int rc = MMR3AdjustFixedReservation(pVM, cPages, pszDesc);
2430 if (RT_SUCCESS(rc))
2431 {
2432 void *pvPages;
2433 PSUPPAGE paPages = (PSUPPAGE)RTMemTmpAlloc(cPages * sizeof(SUPPAGE));
2434 if (RT_SUCCESS(rc))
2435 rc = SUPR3PageAllocEx(cPages, 0 /*fFlags*/, &pvPages, NULL /*pR0Ptr*/, paPages);
2436 if (RT_SUCCESS(rc))
2437 {
2438 memset(pvPages, 0, cPages * PAGE_SIZE);
2439
2440 /*
2441 * Create the MMIO2 range record for it.
2442 */
2443 const size_t cbRange = RT_OFFSETOF(PGMMMIO2RANGE, RamRange.aPages[cPages]);
2444 PPGMMMIO2RANGE pNew;
2445 rc = MMR3HyperAllocOnceNoRel(pVM, cbRange, 0, MM_TAG_PGM_PHYS, (void **)&pNew);
2446 AssertLogRelMsgRC(rc, ("cbRamRange=%zu\n", cbRange));
2447 if (RT_SUCCESS(rc))
2448 {
2449 pNew->pDevInsR3 = pDevIns;
2450 pNew->pvR3 = pvPages;
2451 //pNew->pNext = NULL;
2452 //pNew->fMapped = false;
2453 //pNew->fOverlapping = false;
2454 pNew->iRegion = iRegion;
2455 pNew->idSavedState = UINT8_MAX;
2456 pNew->RamRange.pSelfR0 = MMHyperCCToR0(pVM, &pNew->RamRange);
2457 pNew->RamRange.pSelfRC = MMHyperCCToRC(pVM, &pNew->RamRange);
2458 pNew->RamRange.GCPhys = NIL_RTGCPHYS;
2459 pNew->RamRange.GCPhysLast = NIL_RTGCPHYS;
2460 pNew->RamRange.pszDesc = pszDesc;
2461 pNew->RamRange.cb = cb;
2462 pNew->RamRange.fFlags = PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO2;
2463 pNew->RamRange.pvR3 = pvPages;
2464 //pNew->RamRange.paLSPages = NULL;
2465
2466 uint32_t iPage = cPages;
2467 while (iPage-- > 0)
2468 {
2469 PGM_PAGE_INIT(&pNew->RamRange.aPages[iPage],
2470 paPages[iPage].Phys, NIL_GMM_PAGEID,
2471 PGMPAGETYPE_MMIO2, PGM_PAGE_STATE_ALLOCATED);
2472 }
2473
2474 /* update page count stats */
2475 pVM->pgm.s.cAllPages += cPages;
2476 pVM->pgm.s.cPrivatePages += cPages;
2477
2478 /*
2479 * Link it into the list.
2480 * Since there is no particular order, just push it.
2481 */
2482 pgmLock(pVM);
2483 pNew->pNextR3 = pVM->pgm.s.pMmio2RangesR3;
2484 pVM->pgm.s.pMmio2RangesR3 = pNew;
2485 pgmUnlock(pVM);
2486
2487 *ppv = pvPages;
2488 RTMemTmpFree(paPages);
2489 pgmPhysInvalidatePageMapTLB(pVM);
2490 return VINF_SUCCESS;
2491 }
2492
2493 SUPR3PageFreeEx(pvPages, cPages);
2494 }
2495 RTMemTmpFree(paPages);
2496 MMR3AdjustFixedReservation(pVM, -(int32_t)cPages, pszDesc);
2497 }
2498 if (pDevIns->iInstance > 0)
2499 MMR3HeapFree((void *)pszDesc);
2500 return rc;
2501}
2502
2503
2504/**
2505 * Deregisters and frees an MMIO2 region.
2506 *
2507 * Any physical (and virtual) access handlers registered for the region must
2508 * be deregistered before calling this function.
2509 *
2510 * @returns VBox status code.
2511 * @param pVM The VM handle.
2512 * @param pDevIns The device instance owning the region.
2513 * @param iRegion The region. If it's UINT32_MAX it'll be a wildcard match.
2514 */
2515VMMR3DECL(int) PGMR3PhysMMIO2Deregister(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion)
2516{
2517 /*
2518 * Validate input.
2519 */
2520 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2521 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2522 AssertReturn(iRegion <= UINT8_MAX || iRegion == UINT32_MAX, VERR_INVALID_PARAMETER);
2523
2524 pgmLock(pVM);
2525 int rc = VINF_SUCCESS;
2526 unsigned cFound = 0;
2527 PPGMMMIO2RANGE pPrev = NULL;
2528 PPGMMMIO2RANGE pCur = pVM->pgm.s.pMmio2RangesR3;
2529 while (pCur)
2530 {
2531 if ( pCur->pDevInsR3 == pDevIns
2532 && ( iRegion == UINT32_MAX
2533 || pCur->iRegion == iRegion))
2534 {
2535 cFound++;
2536
2537 /*
2538 * Unmap it if it's mapped.
2539 */
2540 if (pCur->fMapped)
2541 {
2542 int rc2 = PGMR3PhysMMIO2Unmap(pVM, pCur->pDevInsR3, pCur->iRegion, pCur->RamRange.GCPhys);
2543 AssertRC(rc2);
2544 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
2545 rc = rc2;
2546 }
2547
2548 /*
2549 * Unlink it
2550 */
2551 PPGMMMIO2RANGE pNext = pCur->pNextR3;
2552 if (pPrev)
2553 pPrev->pNextR3 = pNext;
2554 else
2555 pVM->pgm.s.pMmio2RangesR3 = pNext;
2556 pCur->pNextR3 = NULL;
2557
2558 /*
2559 * Free the memory.
2560 */
2561 int rc2 = SUPR3PageFreeEx(pCur->pvR3, pCur->RamRange.cb >> PAGE_SHIFT);
2562 AssertRC(rc2);
2563 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
2564 rc = rc2;
2565
2566 uint32_t const cPages = pCur->RamRange.cb >> PAGE_SHIFT;
2567 rc2 = MMR3AdjustFixedReservation(pVM, -(int32_t)cPages, pCur->RamRange.pszDesc);
2568 AssertRC(rc2);
2569 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
2570 rc = rc2;
2571
2572 /* we're leaking hyper memory here if done at runtime. */
2573#ifdef VBOX_STRICT
2574 VMSTATE const enmState = VMR3GetState(pVM);
2575 AssertMsg( enmState == VMSTATE_POWERING_OFF
2576 || enmState == VMSTATE_POWERING_OFF_LS
2577 || enmState == VMSTATE_OFF
2578 || enmState == VMSTATE_OFF_LS
2579 || enmState == VMSTATE_DESTROYING
2580 || enmState == VMSTATE_TERMINATED
2581 || enmState == VMSTATE_CREATING
2582 , ("%s\n", VMR3GetStateName(enmState)));
2583#endif
2584 /*rc = MMHyperFree(pVM, pCur);
2585 AssertRCReturn(rc, rc); - not safe, see the alloc call. */
2586
2587
2588 /* update page count stats */
2589 pVM->pgm.s.cAllPages -= cPages;
2590 pVM->pgm.s.cPrivatePages -= cPages;
2591
2592 /* next */
2593 pCur = pNext;
2594 }
2595 else
2596 {
2597 pPrev = pCur;
2598 pCur = pCur->pNextR3;
2599 }
2600 }
2601 pgmPhysInvalidatePageMapTLB(pVM);
2602 pgmUnlock(pVM);
2603 return !cFound && iRegion != UINT32_MAX ? VERR_NOT_FOUND : rc;
2604}
2605
2606
2607/**
2608 * Maps a MMIO2 region.
2609 *
2610 * This is done when a guest / the bios / state loading changes the
2611 * PCI config. The replacing of base memory has the same restrictions
2612 * as during registration, of course.
2613 *
2614 * @returns VBox status code.
2615 *
2616 * @param pVM The VM handle.
2617 * @param pDevIns The device instance owning the region.
2618 */
2619VMMR3DECL(int) PGMR3PhysMMIO2Map(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion, RTGCPHYS GCPhys)
2620{
2621 /*
2622 * Validate input
2623 */
2624 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2625 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2626 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
2627 AssertReturn(GCPhys != NIL_RTGCPHYS, VERR_INVALID_PARAMETER);
2628 AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER);
2629 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2630
2631 PPGMMMIO2RANGE pCur = pgmR3PhysMMIO2Find(pVM, pDevIns, iRegion);
2632 AssertReturn(pCur, VERR_NOT_FOUND);
2633 AssertReturn(!pCur->fMapped, VERR_WRONG_ORDER);
2634 Assert(pCur->RamRange.GCPhys == NIL_RTGCPHYS);
2635 Assert(pCur->RamRange.GCPhysLast == NIL_RTGCPHYS);
2636
2637 const RTGCPHYS GCPhysLast = GCPhys + pCur->RamRange.cb - 1;
2638 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
2639
2640 /*
2641 * Find our location in the ram range list, checking for
2642 * restriction we don't bother implementing yet (partially overlapping).
2643 */
2644 bool fRamExists = false;
2645 PPGMRAMRANGE pRamPrev = NULL;
2646 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
2647 while (pRam && GCPhysLast >= pRam->GCPhys)
2648 {
2649 if ( GCPhys <= pRam->GCPhysLast
2650 && GCPhysLast >= pRam->GCPhys)
2651 {
2652 /* completely within? */
2653 AssertLogRelMsgReturn( GCPhys >= pRam->GCPhys
2654 && GCPhysLast <= pRam->GCPhysLast,
2655 ("%RGp-%RGp (MMIO2/%s) falls partly outside %RGp-%RGp (%s)\n",
2656 GCPhys, GCPhysLast, pCur->RamRange.pszDesc,
2657 pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc),
2658 VERR_PGM_RAM_CONFLICT);
2659 fRamExists = true;
2660 break;
2661 }
2662
2663 /* next */
2664 pRamPrev = pRam;
2665 pRam = pRam->pNextR3;
2666 }
2667 if (fRamExists)
2668 {
2669 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
2670 uint32_t cPagesLeft = pCur->RamRange.cb >> PAGE_SHIFT;
2671 while (cPagesLeft-- > 0)
2672 {
2673 AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM,
2674 ("%RGp isn't a RAM page (%d) - mapping %RGp-%RGp (MMIO2/%s).\n",
2675 GCPhys, PGM_PAGE_GET_TYPE(pPage), GCPhys, GCPhysLast, pCur->RamRange.pszDesc),
2676 VERR_PGM_RAM_CONFLICT);
2677 pPage++;
2678 }
2679 }
2680 Log(("PGMR3PhysMMIO2Map: %RGp-%RGp fRamExists=%RTbool %s\n",
2681 GCPhys, GCPhysLast, fRamExists, pCur->RamRange.pszDesc));
2682
2683 /*
2684 * Make the changes.
2685 */
2686 pgmLock(pVM);
2687
2688 pCur->RamRange.GCPhys = GCPhys;
2689 pCur->RamRange.GCPhysLast = GCPhysLast;
2690 pCur->fMapped = true;
2691 pCur->fOverlapping = fRamExists;
2692
2693 if (fRamExists)
2694 {
2695/** @todo use pgmR3PhysFreePageRange here. */
2696 uint32_t cPendingPages = 0;
2697 PGMMFREEPAGESREQ pReq;
2698 int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
2699 AssertLogRelRCReturn(rc, rc);
2700
2701 /* replace the pages, freeing all present RAM pages. */
2702 PPGMPAGE pPageSrc = &pCur->RamRange.aPages[0];
2703 PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
2704 uint32_t cPagesLeft = pCur->RamRange.cb >> PAGE_SHIFT;
2705 while (cPagesLeft-- > 0)
2706 {
2707 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPageDst, GCPhys);
2708 AssertLogRelRCReturn(rc, rc); /* We're done for if this goes wrong. */
2709
2710 RTHCPHYS const HCPhys = PGM_PAGE_GET_HCPHYS(pPageSrc);
2711 PGM_PAGE_SET_HCPHYS(pVM, pPageDst, HCPhys);
2712 PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO2);
2713 PGM_PAGE_SET_STATE(pVM, pPageDst, PGM_PAGE_STATE_ALLOCATED);
2714 PGM_PAGE_SET_PDE_TYPE(pVM, pPageDst, PGM_PAGE_PDE_TYPE_DONTCARE);
2715 PGM_PAGE_SET_PTE_INDEX(pVM, pPageDst, 0);
2716 PGM_PAGE_SET_TRACKING(pVM, pPageDst, 0);
2717
2718 pVM->pgm.s.cZeroPages--;
2719 GCPhys += PAGE_SIZE;
2720 pPageSrc++;
2721 pPageDst++;
2722 }
2723
2724 /* Flush physical page map TLB. */
2725 pgmPhysInvalidatePageMapTLB(pVM);
2726
2727 if (cPendingPages)
2728 {
2729 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
2730 AssertLogRelRCReturn(rc, rc);
2731 }
2732 GMMR3FreePagesCleanup(pReq);
2733
2734 /* Force a PGM pool flush as guest ram references have been changed. */
2735 /** todo; not entirely SMP safe; assuming for now the guest takes care of this internally (not touch mapped mmio while changing the mapping). */
2736 PVMCPU pVCpu = VMMGetCpu(pVM);
2737 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2738 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2739
2740 pgmUnlock(pVM);
2741 }
2742 else
2743 {
2744 RTGCPHYS cb = pCur->RamRange.cb;
2745
2746 /* Clear the tracking data of pages we're going to reactivate. */
2747 PPGMPAGE pPageSrc = &pCur->RamRange.aPages[0];
2748 uint32_t cPagesLeft = pCur->RamRange.cb >> PAGE_SHIFT;
2749 while (cPagesLeft-- > 0)
2750 {
2751 PGM_PAGE_SET_TRACKING(pVM, pPageSrc, 0);
2752 PGM_PAGE_SET_PTE_INDEX(pVM, pPageSrc, 0);
2753 pPageSrc++;
2754 }
2755
2756 /* link in the ram range */
2757 pgmR3PhysLinkRamRange(pVM, &pCur->RamRange, pRamPrev);
2758 pgmUnlock(pVM);
2759
2760#ifdef VBOX_WITH_REM
2761 REMR3NotifyPhysRamRegister(pVM, GCPhys, cb, REM_NOTIFY_PHYS_RAM_FLAGS_MMIO2);
2762#endif
2763 }
2764
2765 pgmPhysInvalidatePageMapTLB(pVM);
2766 return VINF_SUCCESS;
2767}
2768
2769
2770/**
2771 * Unmaps a MMIO2 region.
2772 *
2773 * This is done when a guest / the bios / state loading changes the
2774 * PCI config. The replacing of base memory has the same restrictions
2775 * as during registration, of course.
2776 */
2777VMMR3DECL(int) PGMR3PhysMMIO2Unmap(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion, RTGCPHYS GCPhys)
2778{
2779 /*
2780 * Validate input
2781 */
2782 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2783 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2784 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
2785 AssertReturn(GCPhys != NIL_RTGCPHYS, VERR_INVALID_PARAMETER);
2786 AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER);
2787 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2788
2789 PPGMMMIO2RANGE pCur = pgmR3PhysMMIO2Find(pVM, pDevIns, iRegion);
2790 AssertReturn(pCur, VERR_NOT_FOUND);
2791 AssertReturn(pCur->fMapped, VERR_WRONG_ORDER);
2792 AssertReturn(pCur->RamRange.GCPhys == GCPhys, VERR_INVALID_PARAMETER);
2793 Assert(pCur->RamRange.GCPhysLast != NIL_RTGCPHYS);
2794
2795 Log(("PGMR3PhysMMIO2Unmap: %RGp-%RGp %s\n",
2796 pCur->RamRange.GCPhys, pCur->RamRange.GCPhysLast, pCur->RamRange.pszDesc));
2797
2798 /*
2799 * Unmap it.
2800 */
2801 pgmLock(pVM);
2802
2803#ifdef VBOX_WITH_REM
2804 RTGCPHYS GCPhysRangeREM;
2805 RTGCPHYS cbRangeREM;
2806 bool fInformREM;
2807#endif
2808 if (pCur->fOverlapping)
2809 {
2810 /* Restore the RAM pages we've replaced. */
2811 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
2812 while (pRam->GCPhys > pCur->RamRange.GCPhysLast)
2813 pRam = pRam->pNextR3;
2814
2815 PPGMPAGE pPageDst = &pRam->aPages[(pCur->RamRange.GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
2816 uint32_t cPagesLeft = pCur->RamRange.cb >> PAGE_SHIFT;
2817 while (cPagesLeft-- > 0)
2818 {
2819 PGM_PAGE_INIT_ZERO(pPageDst, pVM, PGMPAGETYPE_RAM);
2820 pVM->pgm.s.cZeroPages++;
2821 pPageDst++;
2822 }
2823
2824 /* Flush physical page map TLB. */
2825 pgmPhysInvalidatePageMapTLB(pVM);
2826#ifdef VBOX_WITH_REM
2827 GCPhysRangeREM = NIL_RTGCPHYS; /* shuts up gcc */
2828 cbRangeREM = RTGCPHYS_MAX; /* ditto */
2829 fInformREM = false;
2830#endif
2831 }
2832 else
2833 {
2834#ifdef VBOX_WITH_REM
2835 GCPhysRangeREM = pCur->RamRange.GCPhys;
2836 cbRangeREM = pCur->RamRange.cb;
2837 fInformREM = true;
2838#endif
2839 pgmR3PhysUnlinkRamRange(pVM, &pCur->RamRange);
2840 }
2841
2842 pCur->RamRange.GCPhys = NIL_RTGCPHYS;
2843 pCur->RamRange.GCPhysLast = NIL_RTGCPHYS;
2844 pCur->fOverlapping = false;
2845 pCur->fMapped = false;
2846
2847 /* Force a PGM pool flush as guest ram references have been changed. */
2848 /** @todo not entirely SMP safe; assuming for now the guest takes care
2849 * of this internally (not touch mapped mmio while changing the
2850 * mapping). */
2851 PVMCPU pVCpu = VMMGetCpu(pVM);
2852 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2853 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2854
2855 pgmPhysInvalidatePageMapTLB(pVM);
2856 pgmPhysInvalidRamRangeTlbs(pVM);
2857 pgmUnlock(pVM);
2858
2859#ifdef VBOX_WITH_REM
2860 if (fInformREM)
2861 REMR3NotifyPhysRamDeregister(pVM, GCPhysRangeREM, cbRangeREM);
2862#endif
2863
2864 return VINF_SUCCESS;
2865}
2866
2867
2868/**
2869 * Checks if the given address is an MMIO2 base address or not.
2870 *
2871 * @returns true/false accordingly.
2872 * @param pVM The VM handle.
2873 * @param pDevIns The owner of the memory, optional.
2874 * @param GCPhys The address to check.
2875 */
2876VMMR3DECL(bool) PGMR3PhysMMIO2IsBase(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys)
2877{
2878 /*
2879 * Validate input
2880 */
2881 VM_ASSERT_EMT_RETURN(pVM, false);
2882 AssertPtrReturn(pDevIns, false);
2883 AssertReturn(GCPhys != NIL_RTGCPHYS, false);
2884 AssertReturn(GCPhys != 0, false);
2885 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), false);
2886
2887 /*
2888 * Search the list.
2889 */
2890 pgmLock(pVM);
2891 for (PPGMMMIO2RANGE pCur = pVM->pgm.s.pMmio2RangesR3; pCur; pCur = pCur->pNextR3)
2892 if (pCur->RamRange.GCPhys == GCPhys)
2893 {
2894 Assert(pCur->fMapped);
2895 pgmUnlock(pVM);
2896 return true;
2897 }
2898 pgmUnlock(pVM);
2899 return false;
2900}
2901
2902
2903/**
2904 * Gets the HC physical address of a page in the MMIO2 region.
2905 *
2906 * This is API is intended for MMHyper and shouldn't be called
2907 * by anyone else...
2908 *
2909 * @returns VBox status code.
2910 * @param pVM The VM handle.
2911 * @param pDevIns The owner of the memory, optional.
2912 * @param iRegion The region.
2913 * @param off The page expressed an offset into the MMIO2 region.
2914 * @param pHCPhys Where to store the result.
2915 */
2916VMMR3DECL(int) PGMR3PhysMMIO2GetHCPhys(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion, RTGCPHYS off, PRTHCPHYS pHCPhys)
2917{
2918 /*
2919 * Validate input
2920 */
2921 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2922 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2923 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
2924
2925 pgmLock(pVM);
2926 PPGMMMIO2RANGE pCur = pgmR3PhysMMIO2Find(pVM, pDevIns, iRegion);
2927 AssertReturn(pCur, VERR_NOT_FOUND);
2928 AssertReturn(off < pCur->RamRange.cb, VERR_INVALID_PARAMETER);
2929
2930 PCPGMPAGE pPage = &pCur->RamRange.aPages[off >> PAGE_SHIFT];
2931 *pHCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2932 pgmUnlock(pVM);
2933 return VINF_SUCCESS;
2934}
2935
2936
2937/**
2938 * Maps a portion of an MMIO2 region into kernel space (host).
2939 *
2940 * The kernel mapping will become invalid when the MMIO2 memory is deregistered
2941 * or the VM is terminated.
2942 *
2943 * @return VBox status code.
2944 *
2945 * @param pVM The VM handle.
2946 * @param pDevIns The device owning the MMIO2 memory.
2947 * @param iRegion The region.
2948 * @param off The offset into the region. Must be page aligned.
2949 * @param cb The number of bytes to map. Must be page aligned.
2950 * @param pszDesc Mapping description.
2951 * @param pR0Ptr Where to store the R0 address.
2952 */
2953VMMR3DECL(int) PGMR3PhysMMIO2MapKernel(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion, RTGCPHYS off, RTGCPHYS cb,
2954 const char *pszDesc, PRTR0PTR pR0Ptr)
2955{
2956 /*
2957 * Validate input.
2958 */
2959 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2960 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2961 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
2962
2963 PPGMMMIO2RANGE pCur = pgmR3PhysMMIO2Find(pVM, pDevIns, iRegion);
2964 AssertReturn(pCur, VERR_NOT_FOUND);
2965 AssertReturn(off < pCur->RamRange.cb, VERR_INVALID_PARAMETER);
2966 AssertReturn(cb <= pCur->RamRange.cb, VERR_INVALID_PARAMETER);
2967 AssertReturn(off + cb <= pCur->RamRange.cb, VERR_INVALID_PARAMETER);
2968 NOREF(pszDesc);
2969
2970 /*
2971 * Pass the request on to the support library/driver.
2972 */
2973 int rc = SUPR3PageMapKernel(pCur->pvR3, off, cb, 0, pR0Ptr);
2974
2975 return rc;
2976}
2977
2978
2979/**
2980 * Worker for PGMR3PhysRomRegister.
2981 *
2982 * This is here to simplify lock management, i.e. the caller does all the
2983 * locking and we can simply return without needing to remember to unlock
2984 * anything first.
2985 *
2986 * @returns VBox status.
2987 * @param pVM The VM handle.
2988 * @param pDevIns The device instance owning the ROM.
2989 * @param GCPhys First physical address in the range.
2990 * Must be page aligned!
2991 * @param cb The size of the range (in bytes).
2992 * Must be page aligned!
2993 * @param pvBinary Pointer to the binary data backing the ROM image.
2994 * @param cbBinary The size of the binary data pvBinary points to.
2995 * This must be less or equal to @a cb.
2996 * @param fFlags Mask of flags. PGMPHYS_ROM_FLAGS_SHADOWED
2997 * and/or PGMPHYS_ROM_FLAGS_PERMANENT_BINARY.
2998 * @param pszDesc Pointer to description string. This must not be freed.
2999 */
3000static int pgmR3PhysRomRegister(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb,
3001 const void *pvBinary, uint32_t cbBinary, uint32_t fFlags, const char *pszDesc)
3002{
3003 /*
3004 * Validate input.
3005 */
3006 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
3007 AssertReturn(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER);
3008 AssertReturn(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER);
3009 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
3010 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
3011 AssertPtrReturn(pvBinary, VERR_INVALID_PARAMETER);
3012 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
3013 AssertReturn(!(fFlags & ~(PGMPHYS_ROM_FLAGS_SHADOWED | PGMPHYS_ROM_FLAGS_PERMANENT_BINARY)), VERR_INVALID_PARAMETER);
3014 VM_ASSERT_STATE_RETURN(pVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
3015
3016 const uint32_t cPages = cb >> PAGE_SHIFT;
3017
3018 /*
3019 * Find the ROM location in the ROM list first.
3020 */
3021 PPGMROMRANGE pRomPrev = NULL;
3022 PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3;
3023 while (pRom && GCPhysLast >= pRom->GCPhys)
3024 {
3025 if ( GCPhys <= pRom->GCPhysLast
3026 && GCPhysLast >= pRom->GCPhys)
3027 AssertLogRelMsgFailedReturn(("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n",
3028 GCPhys, GCPhysLast, pszDesc,
3029 pRom->GCPhys, pRom->GCPhysLast, pRom->pszDesc),
3030 VERR_PGM_RAM_CONFLICT);
3031 /* next */
3032 pRomPrev = pRom;
3033 pRom = pRom->pNextR3;
3034 }
3035
3036 /*
3037 * Find the RAM location and check for conflicts.
3038 *
3039 * Conflict detection is a bit different than for RAM
3040 * registration since a ROM can be located within a RAM
3041 * range. So, what we have to check for is other memory
3042 * types (other than RAM that is) and that we don't span
3043 * more than one RAM range (layz).
3044 */
3045 bool fRamExists = false;
3046 PPGMRAMRANGE pRamPrev = NULL;
3047 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
3048 while (pRam && GCPhysLast >= pRam->GCPhys)
3049 {
3050 if ( GCPhys <= pRam->GCPhysLast
3051 && GCPhysLast >= pRam->GCPhys)
3052 {
3053 /* completely within? */
3054 AssertLogRelMsgReturn( GCPhys >= pRam->GCPhys
3055 && GCPhysLast <= pRam->GCPhysLast,
3056 ("%RGp-%RGp (%s) falls partly outside %RGp-%RGp (%s)\n",
3057 GCPhys, GCPhysLast, pszDesc,
3058 pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc),
3059 VERR_PGM_RAM_CONFLICT);
3060 fRamExists = true;
3061 break;
3062 }
3063
3064 /* next */
3065 pRamPrev = pRam;
3066 pRam = pRam->pNextR3;
3067 }
3068 if (fRamExists)
3069 {
3070 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
3071 uint32_t cPagesLeft = cPages;
3072 while (cPagesLeft-- > 0)
3073 {
3074 AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM,
3075 ("%RGp (%R[pgmpage]) isn't a RAM page - registering %RGp-%RGp (%s).\n",
3076 pRam->GCPhys + ((RTGCPHYS)(uintptr_t)(pPage - &pRam->aPages[0]) << PAGE_SHIFT),
3077 pPage, GCPhys, GCPhysLast, pszDesc), VERR_PGM_RAM_CONFLICT);
3078 Assert(PGM_PAGE_IS_ZERO(pPage));
3079 pPage++;
3080 }
3081 }
3082
3083 /*
3084 * Update the base memory reservation if necessary.
3085 */
3086 uint32_t cExtraBaseCost = fRamExists ? 0 : cPages;
3087 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
3088 cExtraBaseCost += cPages;
3089 if (cExtraBaseCost)
3090 {
3091 int rc = MMR3IncreaseBaseReservation(pVM, cExtraBaseCost);
3092 if (RT_FAILURE(rc))
3093 return rc;
3094 }
3095
3096 /*
3097 * Allocate memory for the virgin copy of the RAM.
3098 */
3099 PGMMALLOCATEPAGESREQ pReq;
3100 int rc = GMMR3AllocatePagesPrepare(pVM, &pReq, cPages, GMMACCOUNT_BASE);
3101 AssertRCReturn(rc, rc);
3102
3103 for (uint32_t iPage = 0; iPage < cPages; iPage++)
3104 {
3105 pReq->aPages[iPage].HCPhysGCPhys = GCPhys + (iPage << PAGE_SHIFT);
3106 pReq->aPages[iPage].idPage = NIL_GMM_PAGEID;
3107 pReq->aPages[iPage].idSharedPage = NIL_GMM_PAGEID;
3108 }
3109
3110 rc = GMMR3AllocatePagesPerform(pVM, pReq);
3111 if (RT_FAILURE(rc))
3112 {
3113 GMMR3AllocatePagesCleanup(pReq);
3114 return rc;
3115 }
3116
3117 /*
3118 * Allocate the new ROM range and RAM range (if necessary).
3119 */
3120 PPGMROMRANGE pRomNew;
3121 rc = MMHyperAlloc(pVM, RT_OFFSETOF(PGMROMRANGE, aPages[cPages]), 0, MM_TAG_PGM_PHYS, (void **)&pRomNew);
3122 if (RT_SUCCESS(rc))
3123 {
3124 PPGMRAMRANGE pRamNew = NULL;
3125 if (!fRamExists)
3126 rc = MMHyperAlloc(pVM, RT_OFFSETOF(PGMRAMRANGE, aPages[cPages]), sizeof(PGMPAGE), MM_TAG_PGM_PHYS, (void **)&pRamNew);
3127 if (RT_SUCCESS(rc))
3128 {
3129 /*
3130 * Initialize and insert the RAM range (if required).
3131 */
3132 PPGMROMPAGE pRomPage = &pRomNew->aPages[0];
3133 if (!fRamExists)
3134 {
3135 pRamNew->pSelfR0 = MMHyperCCToR0(pVM, pRamNew);
3136 pRamNew->pSelfRC = MMHyperCCToRC(pVM, pRamNew);
3137 pRamNew->GCPhys = GCPhys;
3138 pRamNew->GCPhysLast = GCPhysLast;
3139 pRamNew->cb = cb;
3140 pRamNew->pszDesc = pszDesc;
3141 pRamNew->fFlags = PGM_RAM_RANGE_FLAGS_AD_HOC_ROM;
3142 pRamNew->pvR3 = NULL;
3143 pRamNew->paLSPages = NULL;
3144
3145 PPGMPAGE pPage = &pRamNew->aPages[0];
3146 for (uint32_t iPage = 0; iPage < cPages; iPage++, pPage++, pRomPage++)
3147 {
3148 PGM_PAGE_INIT(pPage,
3149 pReq->aPages[iPage].HCPhysGCPhys,
3150 pReq->aPages[iPage].idPage,
3151 PGMPAGETYPE_ROM,
3152 PGM_PAGE_STATE_ALLOCATED);
3153
3154 pRomPage->Virgin = *pPage;
3155 }
3156
3157 pVM->pgm.s.cAllPages += cPages;
3158 pgmR3PhysLinkRamRange(pVM, pRamNew, pRamPrev);
3159 }
3160 else
3161 {
3162 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
3163 for (uint32_t iPage = 0; iPage < cPages; iPage++, pPage++, pRomPage++)
3164 {
3165 PGM_PAGE_SET_TYPE(pVM, pPage, PGMPAGETYPE_ROM);
3166 PGM_PAGE_SET_HCPHYS(pVM, pPage, pReq->aPages[iPage].HCPhysGCPhys);
3167 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ALLOCATED);
3168 PGM_PAGE_SET_PAGEID(pVM, pPage, pReq->aPages[iPage].idPage);
3169 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_DONTCARE);
3170 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0);
3171 PGM_PAGE_SET_TRACKING(pVM, pPage, 0);
3172
3173 pRomPage->Virgin = *pPage;
3174 }
3175
3176 pRamNew = pRam;
3177
3178 pVM->pgm.s.cZeroPages -= cPages;
3179 }
3180 pVM->pgm.s.cPrivatePages += cPages;
3181
3182 /* Flush physical page map TLB. */
3183 pgmPhysInvalidatePageMapTLB(pVM);
3184
3185
3186 /*
3187 * !HACK ALERT! REM + (Shadowed) ROM ==> mess.
3188 *
3189 * If it's shadowed we'll register the handler after the ROM notification
3190 * so we get the access handler callbacks that we should. If it isn't
3191 * shadowed we'll do it the other way around to make REM use the built-in
3192 * ROM behavior and not the handler behavior (which is to route all access
3193 * to PGM atm).
3194 */
3195 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
3196 {
3197#ifdef VBOX_WITH_REM
3198 REMR3NotifyPhysRomRegister(pVM, GCPhys, cb, NULL, true /* fShadowed */);
3199#endif
3200 rc = PGMR3HandlerPhysicalRegister(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE, GCPhys, GCPhysLast,
3201 pgmR3PhysRomWriteHandler, pRomNew,
3202 NULL, "pgmPhysRomWriteHandler", MMHyperCCToR0(pVM, pRomNew),
3203 NULL, "pgmPhysRomWriteHandler", MMHyperCCToRC(pVM, pRomNew), pszDesc);
3204 }
3205 else
3206 {
3207 rc = PGMR3HandlerPhysicalRegister(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE, GCPhys, GCPhysLast,
3208 pgmR3PhysRomWriteHandler, pRomNew,
3209 NULL, "pgmPhysRomWriteHandler", MMHyperCCToR0(pVM, pRomNew),
3210 NULL, "pgmPhysRomWriteHandler", MMHyperCCToRC(pVM, pRomNew), pszDesc);
3211#ifdef VBOX_WITH_REM
3212 REMR3NotifyPhysRomRegister(pVM, GCPhys, cb, NULL, false /* fShadowed */);
3213#endif
3214 }
3215 if (RT_SUCCESS(rc))
3216 {
3217 /*
3218 * Copy the image over to the virgin pages.
3219 * This must be done after linking in the RAM range.
3220 */
3221 size_t cbBinaryLeft = cbBinary;
3222 PPGMPAGE pRamPage = &pRamNew->aPages[(GCPhys - pRamNew->GCPhys) >> PAGE_SHIFT];
3223 for (uint32_t iPage = 0; iPage < cPages; iPage++, pRamPage++)
3224 {
3225 void *pvDstPage;
3226 rc = pgmPhysPageMap(pVM, pRamPage, GCPhys + (iPage << PAGE_SHIFT), &pvDstPage);
3227 if (RT_FAILURE(rc))
3228 {
3229 VMSetError(pVM, rc, RT_SRC_POS, "Failed to map virgin ROM page at %RGp", GCPhys);
3230 break;
3231 }
3232 if (cbBinaryLeft >= PAGE_SIZE)
3233 {
3234 memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << PAGE_SHIFT), PAGE_SIZE);
3235 cbBinaryLeft -= PAGE_SIZE;
3236 }
3237 else
3238 {
3239 ASMMemZeroPage(pvDstPage); /* (shouldn't be necessary, but can't hurt either) */
3240 if (cbBinaryLeft > 0)
3241 {
3242 memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << PAGE_SHIFT), cbBinaryLeft);
3243 cbBinaryLeft = 0;
3244 }
3245 }
3246 }
3247 if (RT_SUCCESS(rc))
3248 {
3249 /*
3250 * Initialize the ROM range.
3251 * Note that the Virgin member of the pages has already been initialized above.
3252 */
3253 pRomNew->GCPhys = GCPhys;
3254 pRomNew->GCPhysLast = GCPhysLast;
3255 pRomNew->cb = cb;
3256 pRomNew->fFlags = fFlags;
3257 pRomNew->idSavedState = UINT8_MAX;
3258 pRomNew->cbOriginal = cbBinary;
3259#ifdef VBOX_STRICT
3260 pRomNew->pvOriginal = fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY
3261 ? pvBinary : RTMemDup(pvBinary, cbBinary);
3262#else
3263 pRomNew->pvOriginal = fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY ? pvBinary : NULL;
3264#endif
3265 pRomNew->pszDesc = pszDesc;
3266
3267 for (unsigned iPage = 0; iPage < cPages; iPage++)
3268 {
3269 PPGMROMPAGE pPage = &pRomNew->aPages[iPage];
3270 pPage->enmProt = PGMROMPROT_READ_ROM_WRITE_IGNORE;
3271 PGM_PAGE_INIT_ZERO(&pPage->Shadow, pVM, PGMPAGETYPE_ROM_SHADOW);
3272 }
3273
3274 /* update the page count stats for the shadow pages. */
3275 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
3276 {
3277 pVM->pgm.s.cZeroPages += cPages;
3278 pVM->pgm.s.cAllPages += cPages;
3279 }
3280
3281 /*
3282 * Insert the ROM range, tell REM and return successfully.
3283 */
3284 pRomNew->pNextR3 = pRom;
3285 pRomNew->pNextR0 = pRom ? MMHyperCCToR0(pVM, pRom) : NIL_RTR0PTR;
3286 pRomNew->pNextRC = pRom ? MMHyperCCToRC(pVM, pRom) : NIL_RTRCPTR;
3287
3288 if (pRomPrev)
3289 {
3290 pRomPrev->pNextR3 = pRomNew;
3291 pRomPrev->pNextR0 = MMHyperCCToR0(pVM, pRomNew);
3292 pRomPrev->pNextRC = MMHyperCCToRC(pVM, pRomNew);
3293 }
3294 else
3295 {
3296 pVM->pgm.s.pRomRangesR3 = pRomNew;
3297 pVM->pgm.s.pRomRangesR0 = MMHyperCCToR0(pVM, pRomNew);
3298 pVM->pgm.s.pRomRangesRC = MMHyperCCToRC(pVM, pRomNew);
3299 }
3300
3301 pgmPhysInvalidatePageMapTLB(pVM);
3302 GMMR3AllocatePagesCleanup(pReq);
3303 return VINF_SUCCESS;
3304 }
3305
3306 /* bail out */
3307
3308 int rc2 = PGMHandlerPhysicalDeregister(pVM, GCPhys);
3309 AssertRC(rc2);
3310 }
3311
3312 if (!fRamExists)
3313 {
3314 pgmR3PhysUnlinkRamRange2(pVM, pRamNew, pRamPrev);
3315 MMHyperFree(pVM, pRamNew);
3316 }
3317 }
3318 MMHyperFree(pVM, pRomNew);
3319 }
3320
3321 /** @todo Purge the mapping cache or something... */
3322 GMMR3FreeAllocatedPages(pVM, pReq);
3323 GMMR3AllocatePagesCleanup(pReq);
3324 return rc;
3325}
3326
3327
3328/**
3329 * Registers a ROM image.
3330 *
3331 * Shadowed ROM images requires double the amount of backing memory, so,
3332 * don't use that unless you have to. Shadowing of ROM images is process
3333 * where we can select where the reads go and where the writes go. On real
3334 * hardware the chipset provides means to configure this. We provide
3335 * PGMR3PhysProtectROM() for this purpose.
3336 *
3337 * A read-only copy of the ROM image will always be kept around while we
3338 * will allocate RAM pages for the changes on demand (unless all memory
3339 * is configured to be preallocated).
3340 *
3341 * @returns VBox status.
3342 * @param pVM The VM handle.
3343 * @param pDevIns The device instance owning the ROM.
3344 * @param GCPhys First physical address in the range.
3345 * Must be page aligned!
3346 * @param cb The size of the range (in bytes).
3347 * Must be page aligned!
3348 * @param pvBinary Pointer to the binary data backing the ROM image.
3349 * @param cbBinary The size of the binary data pvBinary points to.
3350 * This must be less or equal to @a cb.
3351 * @param fFlags Mask of flags. PGMPHYS_ROM_FLAGS_SHADOWED
3352 * and/or PGMPHYS_ROM_FLAGS_PERMANENT_BINARY.
3353 * @param pszDesc Pointer to description string. This must not be freed.
3354 *
3355 * @remark There is no way to remove the rom, automatically on device cleanup or
3356 * manually from the device yet. This isn't difficult in any way, it's
3357 * just not something we expect to be necessary for a while.
3358 */
3359VMMR3DECL(int) PGMR3PhysRomRegister(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb,
3360 const void *pvBinary, uint32_t cbBinary, uint32_t fFlags, const char *pszDesc)
3361{
3362 Log(("PGMR3PhysRomRegister: pDevIns=%p GCPhys=%RGp(-%RGp) cb=%RGp pvBinary=%p cbBinary=%#x fFlags=%#x pszDesc=%s\n",
3363 pDevIns, GCPhys, GCPhys + cb, cb, pvBinary, cbBinary, fFlags, pszDesc));
3364 pgmLock(pVM);
3365 int rc = pgmR3PhysRomRegister(pVM, pDevIns, GCPhys, cb, pvBinary, cbBinary, fFlags, pszDesc);
3366 pgmUnlock(pVM);
3367 return rc;
3368}
3369
3370
3371/**
3372 * \#PF Handler callback for ROM write accesses.
3373 *
3374 * @returns VINF_SUCCESS if the handler have carried out the operation.
3375 * @returns VINF_PGM_HANDLER_DO_DEFAULT if the caller should carry out the access operation.
3376 * @param pVM The VM handle.
3377 * @param GCPhys The physical address the guest is writing to.
3378 * @param pvPhys The HC mapping of that address.
3379 * @param pvBuf What the guest is reading/writing.
3380 * @param cbBuf How much it's reading/writing.
3381 * @param enmAccessType The access type.
3382 * @param pvUser User argument.
3383 */
3384static DECLCALLBACK(int) pgmR3PhysRomWriteHandler(PVM pVM, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
3385 PGMACCESSTYPE enmAccessType, void *pvUser)
3386{
3387 PPGMROMRANGE pRom = (PPGMROMRANGE)pvUser;
3388 const uint32_t iPage = (GCPhys - pRom->GCPhys) >> PAGE_SHIFT;
3389 Assert(iPage < (pRom->cb >> PAGE_SHIFT));
3390 PPGMROMPAGE pRomPage = &pRom->aPages[iPage];
3391 Log5(("pgmR3PhysRomWriteHandler: %d %c %#08RGp %#04zx\n", pRomPage->enmProt, enmAccessType == PGMACCESSTYPE_READ ? 'R' : 'W', GCPhys, cbBuf));
3392 NOREF(pvPhys);
3393
3394 if (enmAccessType == PGMACCESSTYPE_READ)
3395 {
3396 switch (pRomPage->enmProt)
3397 {
3398 /*
3399 * Take the default action.
3400 */
3401 case PGMROMPROT_READ_ROM_WRITE_IGNORE:
3402 case PGMROMPROT_READ_RAM_WRITE_IGNORE:
3403 case PGMROMPROT_READ_ROM_WRITE_RAM:
3404 case PGMROMPROT_READ_RAM_WRITE_RAM:
3405 return VINF_PGM_HANDLER_DO_DEFAULT;
3406
3407 default:
3408 AssertMsgFailedReturn(("enmProt=%d iPage=%d GCPhys=%RGp\n",
3409 pRom->aPages[iPage].enmProt, iPage, GCPhys),
3410 VERR_IPE_NOT_REACHED_DEFAULT_CASE);
3411 }
3412 }
3413 else
3414 {
3415 Assert(enmAccessType == PGMACCESSTYPE_WRITE);
3416 switch (pRomPage->enmProt)
3417 {
3418 /*
3419 * Ignore writes.
3420 */
3421 case PGMROMPROT_READ_ROM_WRITE_IGNORE:
3422 case PGMROMPROT_READ_RAM_WRITE_IGNORE:
3423 return VINF_SUCCESS;
3424
3425 /*
3426 * Write to the RAM page.
3427 */
3428 case PGMROMPROT_READ_ROM_WRITE_RAM:
3429 case PGMROMPROT_READ_RAM_WRITE_RAM: /* yes this will get here too, it's *way* simpler that way. */
3430 {
3431 /* This should be impossible now, pvPhys doesn't work cross page anylonger. */
3432 Assert(((GCPhys - pRom->GCPhys + cbBuf - 1) >> PAGE_SHIFT) == iPage);
3433
3434 /*
3435 * Take the lock, do lazy allocation, map the page and copy the data.
3436 *
3437 * Note that we have to bypass the mapping TLB since it works on
3438 * guest physical addresses and entering the shadow page would
3439 * kind of screw things up...
3440 */
3441 int rc = pgmLock(pVM);
3442 AssertRC(rc);
3443
3444 PPGMPAGE pShadowPage = &pRomPage->Shadow;
3445 if (!PGMROMPROT_IS_ROM(pRomPage->enmProt))
3446 {
3447 pShadowPage = pgmPhysGetPage(pVM, GCPhys);
3448 AssertLogRelReturn(pShadowPage, VERR_PGM_PHYS_PAGE_GET_IPE);
3449 }
3450
3451 void *pvDstPage;
3452 rc = pgmPhysPageMakeWritableAndMap(pVM, pShadowPage, GCPhys & X86_PTE_PG_MASK, &pvDstPage);
3453 if (RT_SUCCESS(rc))
3454 {
3455 memcpy((uint8_t *)pvDstPage + (GCPhys & PAGE_OFFSET_MASK), pvBuf, cbBuf);
3456 pRomPage->LiveSave.fWrittenTo = true;
3457 }
3458
3459 pgmUnlock(pVM);
3460 return rc;
3461 }
3462
3463 default:
3464 AssertMsgFailedReturn(("enmProt=%d iPage=%d GCPhys=%RGp\n",
3465 pRom->aPages[iPage].enmProt, iPage, GCPhys),
3466 VERR_IPE_NOT_REACHED_DEFAULT_CASE);
3467 }
3468 }
3469}
3470
3471
3472/**
3473 * Called by PGMR3Reset to reset the shadow, switch to the virgin,
3474 * and verify that the virgin part is untouched.
3475 *
3476 * This is done after the normal memory has been cleared.
3477 *
3478 * ASSUMES that the caller owns the PGM lock.
3479 *
3480 * @param pVM The VM handle.
3481 */
3482int pgmR3PhysRomReset(PVM pVM)
3483{
3484 PGM_LOCK_ASSERT_OWNER(pVM);
3485 for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3)
3486 {
3487 const uint32_t cPages = pRom->cb >> PAGE_SHIFT;
3488
3489 if (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
3490 {
3491 /*
3492 * Reset the physical handler.
3493 */
3494 int rc = PGMR3PhysRomProtect(pVM, pRom->GCPhys, pRom->cb, PGMROMPROT_READ_ROM_WRITE_IGNORE);
3495 AssertRCReturn(rc, rc);
3496
3497 /*
3498 * What we do with the shadow pages depends on the memory
3499 * preallocation option. If not enabled, we'll just throw
3500 * out all the dirty pages and replace them by the zero page.
3501 */
3502 if (!pVM->pgm.s.fRamPreAlloc)
3503 {
3504 /* Free the dirty pages. */
3505 uint32_t cPendingPages = 0;
3506 PGMMFREEPAGESREQ pReq;
3507 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
3508 AssertRCReturn(rc, rc);
3509
3510 for (uint32_t iPage = 0; iPage < cPages; iPage++)
3511 if ( !PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow)
3512 && !PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow))
3513 {
3514 Assert(PGM_PAGE_GET_STATE(&pRom->aPages[iPage].Shadow) == PGM_PAGE_STATE_ALLOCATED);
3515 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, &pRom->aPages[iPage].Shadow,
3516 pRom->GCPhys + (iPage << PAGE_SHIFT));
3517 AssertLogRelRCReturn(rc, rc);
3518 }
3519
3520 if (cPendingPages)
3521 {
3522 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
3523 AssertLogRelRCReturn(rc, rc);
3524 }
3525 GMMR3FreePagesCleanup(pReq);
3526 }
3527 else
3528 {
3529 /* clear all the shadow pages. */
3530 for (uint32_t iPage = 0; iPage < cPages; iPage++)
3531 {
3532 if (PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow))
3533 continue;
3534 Assert(!PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow));
3535 void *pvDstPage;
3536 const RTGCPHYS GCPhys = pRom->GCPhys + (iPage << PAGE_SHIFT);
3537 rc = pgmPhysPageMakeWritableAndMap(pVM, &pRom->aPages[iPage].Shadow, GCPhys, &pvDstPage);
3538 if (RT_FAILURE(rc))
3539 break;
3540 ASMMemZeroPage(pvDstPage);
3541 }
3542 AssertRCReturn(rc, rc);
3543 }
3544 }
3545
3546#ifdef VBOX_STRICT
3547 /*
3548 * Verify that the virgin page is unchanged if possible.
3549 */
3550 if (pRom->pvOriginal)
3551 {
3552 size_t cbSrcLeft = pRom->cbOriginal;
3553 uint8_t const *pbSrcPage = (uint8_t const *)pRom->pvOriginal;
3554 for (uint32_t iPage = 0; iPage < cPages && cbSrcLeft > 0; iPage++, pbSrcPage += PAGE_SIZE)
3555 {
3556 const RTGCPHYS GCPhys = pRom->GCPhys + (iPage << PAGE_SHIFT);
3557 void const *pvDstPage;
3558 int rc = pgmPhysPageMapReadOnly(pVM, &pRom->aPages[iPage].Virgin, GCPhys, &pvDstPage);
3559 if (RT_FAILURE(rc))
3560 break;
3561
3562 if (memcmp(pvDstPage, pbSrcPage, RT_MIN(cbSrcLeft, PAGE_SIZE)))
3563 LogRel(("pgmR3PhysRomReset: %RGp rom page changed (%s) - loaded saved state?\n",
3564 GCPhys, pRom->pszDesc));
3565 cbSrcLeft -= RT_MIN(cbSrcLeft, PAGE_SIZE);
3566 }
3567 }
3568#endif
3569 }
3570
3571 return VINF_SUCCESS;
3572}
3573
3574
3575/**
3576 * Called by PGMR3Term to free resources.
3577 *
3578 * ASSUMES that the caller owns the PGM lock.
3579 *
3580 * @param pVM The VM handle.
3581 */
3582void pgmR3PhysRomTerm(PVM pVM)
3583{
3584#ifdef RT_STRICT
3585 /*
3586 * Free the heap copy of the original bits.
3587 */
3588 for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3)
3589 {
3590 if ( pRom->pvOriginal
3591 && !(pRom->fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY))
3592 {
3593 RTMemFree((void *)pRom->pvOriginal);
3594 pRom->pvOriginal = NULL;
3595 }
3596 }
3597#endif
3598}
3599
3600
3601/**
3602 * Change the shadowing of a range of ROM pages.
3603 *
3604 * This is intended for implementing chipset specific memory registers
3605 * and will not be very strict about the input. It will silently ignore
3606 * any pages that are not the part of a shadowed ROM.
3607 *
3608 * @returns VBox status code.
3609 * @retval VINF_PGM_SYNC_CR3
3610 *
3611 * @param pVM The VM handle.
3612 * @param GCPhys Where to start. Page aligned.
3613 * @param cb How much to change. Page aligned.
3614 * @param enmProt The new ROM protection.
3615 */
3616VMMR3DECL(int) PGMR3PhysRomProtect(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, PGMROMPROT enmProt)
3617{
3618 /*
3619 * Check input
3620 */
3621 if (!cb)
3622 return VINF_SUCCESS;
3623 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3624 AssertReturn(!(cb & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3625 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
3626 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
3627 AssertReturn(enmProt >= PGMROMPROT_INVALID && enmProt <= PGMROMPROT_END, VERR_INVALID_PARAMETER);
3628
3629 /*
3630 * Process the request.
3631 */
3632 pgmLock(pVM);
3633 int rc = VINF_SUCCESS;
3634 bool fFlushTLB = false;
3635 for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3)
3636 {
3637 if ( GCPhys <= pRom->GCPhysLast
3638 && GCPhysLast >= pRom->GCPhys
3639 && (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED))
3640 {
3641 /*
3642 * Iterate the relevant pages and make necessary the changes.
3643 */
3644 bool fChanges = false;
3645 uint32_t const cPages = pRom->GCPhysLast <= GCPhysLast
3646 ? pRom->cb >> PAGE_SHIFT
3647 : (GCPhysLast - pRom->GCPhys + 1) >> PAGE_SHIFT;
3648 for (uint32_t iPage = (GCPhys - pRom->GCPhys) >> PAGE_SHIFT;
3649 iPage < cPages;
3650 iPage++)
3651 {
3652 PPGMROMPAGE pRomPage = &pRom->aPages[iPage];
3653 if (PGMROMPROT_IS_ROM(pRomPage->enmProt) != PGMROMPROT_IS_ROM(enmProt))
3654 {
3655 fChanges = true;
3656
3657 /* flush references to the page. */
3658 PPGMPAGE pRamPage = pgmPhysGetPage(pVM, pRom->GCPhys + (iPage << PAGE_SHIFT));
3659 int rc2 = pgmPoolTrackUpdateGCPhys(pVM, pRom->GCPhys + (iPage << PAGE_SHIFT), pRamPage,
3660 true /*fFlushPTEs*/, &fFlushTLB);
3661 if (rc2 != VINF_SUCCESS && (rc == VINF_SUCCESS || RT_FAILURE(rc2)))
3662 rc = rc2;
3663
3664 PPGMPAGE pOld = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Virgin : &pRomPage->Shadow;
3665 PPGMPAGE pNew = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Shadow : &pRomPage->Virgin;
3666
3667 *pOld = *pRamPage;
3668 *pRamPage = *pNew;
3669 /** @todo preserve the volatile flags (handlers) when these have been moved out of HCPhys! */
3670 }
3671 pRomPage->enmProt = enmProt;
3672 }
3673
3674 /*
3675 * Reset the access handler if we made changes, no need
3676 * to optimize this.
3677 */
3678 if (fChanges)
3679 {
3680 int rc2 = PGMHandlerPhysicalReset(pVM, pRom->GCPhys);
3681 if (RT_FAILURE(rc2))
3682 {
3683 pgmUnlock(pVM);
3684 AssertRC(rc);
3685 return rc2;
3686 }
3687 }
3688
3689 /* Advance - cb isn't updated. */
3690 GCPhys = pRom->GCPhys + (cPages << PAGE_SHIFT);
3691 }
3692 }
3693 pgmUnlock(pVM);
3694 if (fFlushTLB)
3695 PGM_INVL_ALL_VCPU_TLBS(pVM);
3696
3697 return rc;
3698}
3699
3700
3701/**
3702 * Sets the Address Gate 20 state.
3703 *
3704 * @param pVCpu The VCPU to operate on.
3705 * @param fEnable True if the gate should be enabled.
3706 * False if the gate should be disabled.
3707 */
3708VMMDECL(void) PGMR3PhysSetA20(PVMCPU pVCpu, bool fEnable)
3709{
3710 LogFlow(("PGMR3PhysSetA20 %d (was %d)\n", fEnable, pVCpu->pgm.s.fA20Enabled));
3711 if (pVCpu->pgm.s.fA20Enabled != fEnable)
3712 {
3713 pVCpu->pgm.s.fA20Enabled = fEnable;
3714 pVCpu->pgm.s.GCPhysA20Mask = ~(RTGCPHYS)(!fEnable << 20);
3715#ifdef VBOX_WITH_REM
3716 REMR3A20Set(pVCpu->pVMR3, pVCpu, fEnable);
3717#endif
3718 /** @todo we're not handling this correctly for VT-x / AMD-V. See #2911 */
3719#ifdef PGM_WITH_A20
3720 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL | PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL;
3721 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3722#endif
3723 }
3724}
3725
3726
3727/**
3728 * Tree enumeration callback for dealing with age rollover.
3729 * It will perform a simple compression of the current age.
3730 */
3731static DECLCALLBACK(int) pgmR3PhysChunkAgeingRolloverCallback(PAVLU32NODECORE pNode, void *pvUser)
3732{
3733 /* Age compression - ASSUMES iNow == 4. */
3734 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode;
3735 if (pChunk->iLastUsed >= UINT32_C(0xffffff00))
3736 pChunk->iLastUsed = 3;
3737 else if (pChunk->iLastUsed >= UINT32_C(0xfffff000))
3738 pChunk->iLastUsed = 2;
3739 else if (pChunk->iLastUsed)
3740 pChunk->iLastUsed = 1;
3741 else /* iLastUsed = 0 */
3742 pChunk->iLastUsed = 4;
3743
3744 NOREF(pvUser);
3745 return 0;
3746}
3747
3748
3749/**
3750 * The structure passed in the pvUser argument of pgmR3PhysChunkUnmapCandidateCallback().
3751 */
3752typedef struct PGMR3PHYSCHUNKUNMAPCB
3753{
3754 PVM pVM; /**< The VM handle. */
3755 PPGMCHUNKR3MAP pChunk; /**< The chunk to unmap. */
3756} PGMR3PHYSCHUNKUNMAPCB, *PPGMR3PHYSCHUNKUNMAPCB;
3757
3758
3759/**
3760 * Callback used to find the mapping that's been unused for
3761 * the longest time.
3762 */
3763static DECLCALLBACK(int) pgmR3PhysChunkUnmapCandidateCallback(PAVLU32NODECORE pNode, void *pvUser)
3764{
3765 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode;
3766 PPGMR3PHYSCHUNKUNMAPCB pArg = (PPGMR3PHYSCHUNKUNMAPCB)pvUser;
3767
3768 /*
3769 * Check for locks and compare when last used.
3770 */
3771 if (pChunk->cRefs)
3772 return 0;
3773 if (pChunk->cPermRefs)
3774 return 0;
3775 if ( pArg->pChunk
3776 && pChunk->iLastUsed >= pArg->pChunk->iLastUsed)
3777 return 0;
3778
3779 /*
3780 * Check that it's not in any of the TLBs.
3781 */
3782 PVM pVM = pArg->pVM;
3783 if ( pVM->pgm.s.ChunkR3Map.Tlb.aEntries[PGM_CHUNKR3MAPTLB_IDX(pChunk->Core.Key)].idChunk
3784 == pChunk->Core.Key)
3785 {
3786 pChunk = NULL;
3787 return 0;
3788 }
3789#ifdef VBOX_STRICT
3790 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++)
3791 {
3792 Assert(pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk != pChunk);
3793 Assert(pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk != pChunk->Core.Key);
3794 }
3795#endif
3796
3797 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.PhysTlbHC.aEntries); i++)
3798 if (pVM->pgm.s.PhysTlbHC.aEntries[i].pMap == pChunk)
3799 return 0;
3800
3801 pArg->pChunk = pChunk;
3802 return 0;
3803}
3804
3805
3806/**
3807 * Finds a good candidate for unmapping when the ring-3 mapping cache is full.
3808 *
3809 * The candidate will not be part of any TLBs, so no need to flush
3810 * anything afterwards.
3811 *
3812 * @returns Chunk id.
3813 * @param pVM The VM handle.
3814 */
3815static int32_t pgmR3PhysChunkFindUnmapCandidate(PVM pVM)
3816{
3817 PGM_LOCK_ASSERT_OWNER(pVM);
3818
3819 /*
3820 * Enumerate the age tree starting with the left most node.
3821 */
3822 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkFindCandidate, a);
3823 PGMR3PHYSCHUNKUNMAPCB Args;
3824 Args.pVM = pVM;
3825 Args.pChunk = NULL;
3826 RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkUnmapCandidateCallback, &Args);
3827 Assert(Args.pChunk);
3828 if (Args.pChunk)
3829 {
3830 Assert(Args.pChunk->cRefs == 0);
3831 Assert(Args.pChunk->cPermRefs == 0);
3832 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkFindCandidate, a);
3833 return Args.pChunk->Core.Key;
3834 }
3835
3836 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkFindCandidate, a);
3837 return INT32_MAX;
3838}
3839
3840
3841/**
3842 * Rendezvous callback used by pgmR3PhysUnmapChunk that unmaps a chunk
3843 *
3844 * This is only called on one of the EMTs while the other ones are waiting for
3845 * it to complete this function.
3846 *
3847 * @returns VINF_SUCCESS (VBox strict status code).
3848 * @param pVM The VM handle.
3849 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
3850 * @param pvUser User pointer. Unused
3851 *
3852 */
3853static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysUnmapChunkRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
3854{
3855 int rc = VINF_SUCCESS;
3856 pgmLock(pVM);
3857 NOREF(pVCpu); NOREF(pvUser);
3858
3859 if (pVM->pgm.s.ChunkR3Map.c >= pVM->pgm.s.ChunkR3Map.cMax)
3860 {
3861 /* Flush the pgm pool cache; call the internal rendezvous handler as we're already in a rendezvous handler here. */
3862 /** @todo also not really efficient to unmap a chunk that contains PD
3863 * or PT pages. */
3864 pgmR3PoolClearAllRendezvous(pVM, &pVM->aCpus[0], NULL /* no need to flush the REM TLB as we already did that above */);
3865
3866 /*
3867 * Request the ring-0 part to unmap a chunk to make space in the mapping cache.
3868 */
3869 GMMMAPUNMAPCHUNKREQ Req;
3870 Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
3871 Req.Hdr.cbReq = sizeof(Req);
3872 Req.pvR3 = NULL;
3873 Req.idChunkMap = NIL_GMM_CHUNKID;
3874 Req.idChunkUnmap = pgmR3PhysChunkFindUnmapCandidate(pVM);
3875 if (Req.idChunkUnmap != INT32_MAX)
3876 {
3877 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkUnmap, a);
3878 rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr);
3879 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkUnmap, a);
3880 if (RT_SUCCESS(rc))
3881 {
3882 /*
3883 * Remove the unmapped one.
3884 */
3885 PPGMCHUNKR3MAP pUnmappedChunk = (PPGMCHUNKR3MAP)RTAvlU32Remove(&pVM->pgm.s.ChunkR3Map.pTree, Req.idChunkUnmap);
3886 AssertRelease(pUnmappedChunk);
3887 AssertRelease(!pUnmappedChunk->cRefs);
3888 AssertRelease(!pUnmappedChunk->cPermRefs);
3889 pUnmappedChunk->pv = NULL;
3890 pUnmappedChunk->Core.Key = UINT32_MAX;
3891#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
3892 MMR3HeapFree(pUnmappedChunk);
3893#else
3894 MMR3UkHeapFree(pVM, pUnmappedChunk, MM_TAG_PGM_CHUNK_MAPPING);
3895#endif
3896 pVM->pgm.s.ChunkR3Map.c--;
3897 pVM->pgm.s.cUnmappedChunks++;
3898
3899 /*
3900 * Flush dangling PGM pointers (R3 & R0 ptrs to GC physical addresses).
3901 */
3902 /** todo: we should not flush chunks which include cr3 mappings. */
3903 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3904 {
3905 PPGMCPU pPGM = &pVM->aCpus[idCpu].pgm.s;
3906
3907 pPGM->pGst32BitPdR3 = NULL;
3908 pPGM->pGstPaePdptR3 = NULL;
3909 pPGM->pGstAmd64Pml4R3 = NULL;
3910#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
3911 pPGM->pGst32BitPdR0 = NIL_RTR0PTR;
3912 pPGM->pGstPaePdptR0 = NIL_RTR0PTR;
3913 pPGM->pGstAmd64Pml4R0 = NIL_RTR0PTR;
3914#endif
3915 for (unsigned i = 0; i < RT_ELEMENTS(pPGM->apGstPaePDsR3); i++)
3916 {
3917 pPGM->apGstPaePDsR3[i] = NULL;
3918#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
3919 pPGM->apGstPaePDsR0[i] = NIL_RTR0PTR;
3920#endif
3921 }
3922
3923 /* Flush REM TLBs. */
3924 CPUMSetChangedFlags(&pVM->aCpus[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
3925 }
3926#ifdef VBOX_WITH_REM
3927 /* Flush REM translation blocks. */
3928 REMFlushTBs(pVM);
3929#endif
3930 }
3931 }
3932 }
3933 pgmUnlock(pVM);
3934 return rc;
3935}
3936
3937/**
3938 * Unmap a chunk to free up virtual address space (request packet handler for pgmR3PhysChunkMap)
3939 *
3940 * @returns VBox status code.
3941 * @param pVM The VM handle.
3942 */
3943void pgmR3PhysUnmapChunk(PVM pVM)
3944{
3945 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysUnmapChunkRendezvous, NULL);
3946 AssertRC(rc);
3947}
3948
3949
3950/**
3951 * Maps the given chunk into the ring-3 mapping cache.
3952 *
3953 * This will call ring-0.
3954 *
3955 * @returns VBox status code.
3956 * @param pVM The VM handle.
3957 * @param idChunk The chunk in question.
3958 * @param ppChunk Where to store the chunk tracking structure.
3959 *
3960 * @remarks Called from within the PGM critical section.
3961 * @remarks Can be called from any thread!
3962 */
3963int pgmR3PhysChunkMap(PVM pVM, uint32_t idChunk, PPPGMCHUNKR3MAP ppChunk)
3964{
3965 int rc;
3966
3967 PGM_LOCK_ASSERT_OWNER(pVM);
3968
3969 /*
3970 * Move the chunk time forward.
3971 */
3972 pVM->pgm.s.ChunkR3Map.iNow++;
3973 if (pVM->pgm.s.ChunkR3Map.iNow == 0)
3974 {
3975 pVM->pgm.s.ChunkR3Map.iNow = 4;
3976 RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkAgeingRolloverCallback, NULL);
3977 }
3978
3979 /*
3980 * Allocate a new tracking structure first.
3981 */
3982#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
3983 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)MMR3HeapAllocZ(pVM, MM_TAG_PGM_CHUNK_MAPPING, sizeof(*pChunk));
3984#else
3985 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)MMR3UkHeapAllocZ(pVM, MM_TAG_PGM_CHUNK_MAPPING, sizeof(*pChunk), NULL);
3986#endif
3987 AssertReturn(pChunk, VERR_NO_MEMORY);
3988 pChunk->Core.Key = idChunk;
3989 pChunk->iLastUsed = pVM->pgm.s.ChunkR3Map.iNow;
3990
3991 /*
3992 * Request the ring-0 part to map the chunk in question.
3993 */
3994 GMMMAPUNMAPCHUNKREQ Req;
3995 Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
3996 Req.Hdr.cbReq = sizeof(Req);
3997 Req.pvR3 = NULL;
3998 Req.idChunkMap = idChunk;
3999 Req.idChunkUnmap = NIL_GMM_CHUNKID;
4000
4001 /* Must be callable from any thread, so can't use VMMR3CallR0. */
4002 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkMap, a);
4003 rc = SUPR3CallVMMR0Ex(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr);
4004 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkMap, a);
4005 if (RT_SUCCESS(rc))
4006 {
4007 pChunk->pv = Req.pvR3;
4008
4009 /*
4010 * If we're running out of virtual address space, then we should
4011 * unmap another chunk.
4012 *
4013 * Currently, an unmap operation requires that all other virtual CPUs
4014 * are idling and not by chance making use of the memory we're
4015 * unmapping. So, we create an async unmap operation here.
4016 *
4017 * Now, when creating or restoring a saved state this wont work very
4018 * well since we may want to restore all guest RAM + a little something.
4019 * So, we have to do the unmap synchronously. Fortunately for us
4020 * though, during these operations the other virtual CPUs are inactive
4021 * and it should be safe to do this.
4022 */
4023 /** @todo Eventually we should lock all memory when used and do
4024 * map+unmap as one kernel call without any rendezvous or
4025 * other precautions. */
4026 if (pVM->pgm.s.ChunkR3Map.c + 1 >= pVM->pgm.s.ChunkR3Map.cMax)
4027 {
4028 switch (VMR3GetState(pVM))
4029 {
4030 case VMSTATE_LOADING:
4031 case VMSTATE_SAVING:
4032 {
4033 PVMCPU pVCpu = VMMGetCpu(pVM);
4034 if ( pVCpu
4035 && pVM->pgm.s.cDeprecatedPageLocks == 0)
4036 {
4037 pgmR3PhysUnmapChunkRendezvous(pVM, pVCpu, NULL);
4038 break;
4039 }
4040 /* fall thru */
4041 }
4042 default:
4043 rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysUnmapChunk, 1, pVM);
4044 AssertRC(rc);
4045 break;
4046 }
4047 }
4048
4049 /*
4050 * Update the tree. We must do this after any unmapping to make sure
4051 * the chunk we're going to return isn't unmapped by accident.
4052 */
4053 AssertPtr(Req.pvR3);
4054 bool fRc = RTAvlU32Insert(&pVM->pgm.s.ChunkR3Map.pTree, &pChunk->Core);
4055 AssertRelease(fRc);
4056 pVM->pgm.s.ChunkR3Map.c++;
4057 pVM->pgm.s.cMappedChunks++;
4058 }
4059 else
4060 {
4061 /** @todo this may fail because of /proc/sys/vm/max_map_count, so we
4062 * should probably restrict ourselves on linux. */
4063 AssertRC(rc);
4064#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
4065 MMR3HeapFree(pChunk);
4066#else
4067 MMR3UkHeapFree(pVM, pChunk, MM_TAG_PGM_CHUNK_MAPPING);
4068#endif
4069 pChunk = NULL;
4070 }
4071
4072 *ppChunk = pChunk;
4073 return rc;
4074}
4075
4076
4077/**
4078 * For VMMCALLRING3_PGM_MAP_CHUNK, considered internal.
4079 *
4080 * @returns see pgmR3PhysChunkMap.
4081 * @param pVM The VM handle.
4082 * @param idChunk The chunk to map.
4083 */
4084VMMR3DECL(int) PGMR3PhysChunkMap(PVM pVM, uint32_t idChunk)
4085{
4086 PPGMCHUNKR3MAP pChunk;
4087 int rc;
4088
4089 pgmLock(pVM);
4090 rc = pgmR3PhysChunkMap(pVM, idChunk, &pChunk);
4091 pgmUnlock(pVM);
4092 return rc;
4093}
4094
4095
4096/**
4097 * Invalidates the TLB for the ring-3 mapping cache.
4098 *
4099 * @param pVM The VM handle.
4100 */
4101VMMR3DECL(void) PGMR3PhysChunkInvalidateTLB(PVM pVM)
4102{
4103 pgmLock(pVM);
4104 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++)
4105 {
4106 pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk = NIL_GMM_CHUNKID;
4107 pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk = NULL;
4108 }
4109 /* The page map TLB references chunks, so invalidate that one too. */
4110 pgmPhysInvalidatePageMapTLB(pVM);
4111 pgmUnlock(pVM);
4112}
4113
4114
4115/**
4116 * Response to VMMCALLRING3_PGM_ALLOCATE_LARGE_PAGE to allocate a large (2MB) page
4117 * for use with a nested paging PDE.
4118 *
4119 * @returns The following VBox status codes.
4120 * @retval VINF_SUCCESS on success.
4121 * @retval VINF_EM_NO_MEMORY if we're out of memory.
4122 *
4123 * @param pVM The VM handle.
4124 * @param GCPhys GC physical start address of the 2 MB range
4125 */
4126VMMR3DECL(int) PGMR3PhysAllocateLargeHandyPage(PVM pVM, RTGCPHYS GCPhys)
4127{
4128#ifdef PGM_WITH_LARGE_PAGES
4129 uint64_t u64TimeStamp1, u64TimeStamp2;
4130
4131 pgmLock(pVM);
4132
4133 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatAllocLargePage, a);
4134 u64TimeStamp1 = RTTimeMilliTS();
4135 int rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_LARGE_HANDY_PAGE, 0, NULL);
4136 u64TimeStamp2 = RTTimeMilliTS();
4137 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatAllocLargePage, a);
4138 if (RT_SUCCESS(rc))
4139 {
4140 Assert(pVM->pgm.s.cLargeHandyPages == 1);
4141
4142 uint32_t idPage = pVM->pgm.s.aLargeHandyPage[0].idPage;
4143 RTHCPHYS HCPhys = pVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys;
4144
4145 void *pv;
4146
4147 /* Map the large page into our address space.
4148 *
4149 * Note: assuming that within the 2 MB range:
4150 * - GCPhys + PAGE_SIZE = HCPhys + PAGE_SIZE (whole point of this exercise)
4151 * - user space mapping is continuous as well
4152 * - page id (GCPhys) + 1 = page id (GCPhys + PAGE_SIZE)
4153 */
4154 rc = pgmPhysPageMapByPageID(pVM, idPage, HCPhys, &pv);
4155 AssertLogRelMsg(RT_SUCCESS(rc), ("idPage=%#x HCPhysGCPhys=%RHp rc=%Rrc\n", idPage, HCPhys, rc));
4156
4157 if (RT_SUCCESS(rc))
4158 {
4159 /*
4160 * Clear the pages.
4161 */
4162 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatClearLargePage, b);
4163 for (unsigned i = 0; i < _2M/PAGE_SIZE; i++)
4164 {
4165 ASMMemZeroPage(pv);
4166
4167 PPGMPAGE pPage;
4168 rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
4169 AssertRC(rc);
4170
4171 Assert(PGM_PAGE_IS_ZERO(pPage));
4172 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatRZPageReplaceZero);
4173 pVM->pgm.s.cZeroPages--;
4174
4175 /*
4176 * Do the PGMPAGE modifications.
4177 */
4178 pVM->pgm.s.cPrivatePages++;
4179 PGM_PAGE_SET_HCPHYS(pVM, pPage, HCPhys);
4180 PGM_PAGE_SET_PAGEID(pVM, pPage, idPage);
4181 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ALLOCATED);
4182 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_PDE);
4183 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0);
4184 PGM_PAGE_SET_TRACKING(pVM, pPage, 0);
4185
4186 /* Somewhat dirty assumption that page ids are increasing. */
4187 idPage++;
4188
4189 HCPhys += PAGE_SIZE;
4190 GCPhys += PAGE_SIZE;
4191
4192 pv = (void *)((uintptr_t)pv + PAGE_SIZE);
4193
4194 Log3(("PGMR3PhysAllocateLargePage: idPage=%#x HCPhys=%RGp\n", idPage, HCPhys));
4195 }
4196 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatClearLargePage, b);
4197
4198 /* Flush all TLBs. */
4199 PGM_INVL_ALL_VCPU_TLBS(pVM);
4200 pgmPhysInvalidatePageMapTLB(pVM);
4201 }
4202 pVM->pgm.s.cLargeHandyPages = 0;
4203 }
4204
4205 if (RT_SUCCESS(rc))
4206 {
4207 static uint32_t cTimeOut = 0;
4208 uint64_t u64TimeStampDelta = u64TimeStamp2 - u64TimeStamp1;
4209
4210 if (u64TimeStampDelta > 100)
4211 {
4212 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatLargePageOverflow);
4213 if ( ++cTimeOut > 10
4214 || u64TimeStampDelta > 1000 /* more than one second forces an early retirement from allocating large pages. */)
4215 {
4216 /* If repeated attempts to allocate a large page takes more than 100 ms, then we fall back to normal 4k pages.
4217 * E.g. Vista 64 tries to move memory around, which takes a huge amount of time.
4218 */
4219 LogRel(("PGMR3PhysAllocateLargePage: allocating large pages takes too long (last attempt %d ms; nr of timeouts %d); DISABLE\n", u64TimeStampDelta, cTimeOut));
4220 PGMSetLargePageUsage(pVM, false);
4221 }
4222 }
4223 else
4224 if (cTimeOut > 0)
4225 cTimeOut--;
4226 }
4227
4228 pgmUnlock(pVM);
4229 return rc;
4230#else
4231 return VERR_NOT_IMPLEMENTED;
4232#endif /* PGM_WITH_LARGE_PAGES */
4233}
4234
4235
4236/**
4237 * Response to VM_FF_PGM_NEED_HANDY_PAGES and VMMCALLRING3_PGM_ALLOCATE_HANDY_PAGES.
4238 *
4239 * This function will also work the VM_FF_PGM_NO_MEMORY force action flag, to
4240 * signal and clear the out of memory condition. When contracted, this API is
4241 * used to try clear the condition when the user wants to resume.
4242 *
4243 * @returns The following VBox status codes.
4244 * @retval VINF_SUCCESS on success. FFs cleared.
4245 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is not cleared in
4246 * this case and it gets accompanied by VM_FF_PGM_NO_MEMORY.
4247 *
4248 * @param pVM The VM handle.
4249 *
4250 * @remarks The VINF_EM_NO_MEMORY status is for the benefit of the FF processing
4251 * in EM.cpp and shouldn't be propagated outside TRPM, HWACCM, EM and
4252 * pgmPhysEnsureHandyPage. There is one exception to this in the \#PF
4253 * handler.
4254 */
4255VMMR3DECL(int) PGMR3PhysAllocateHandyPages(PVM pVM)
4256{
4257 pgmLock(pVM);
4258
4259 /*
4260 * Allocate more pages, noting down the index of the first new page.
4261 */
4262 uint32_t iClear = pVM->pgm.s.cHandyPages;
4263 AssertMsgReturn(iClear <= RT_ELEMENTS(pVM->pgm.s.aHandyPages), ("%d", iClear), VERR_PGM_HANDY_PAGE_IPE);
4264 Log(("PGMR3PhysAllocateHandyPages: %d -> %d\n", iClear, RT_ELEMENTS(pVM->pgm.s.aHandyPages)));
4265 int rcAlloc = VINF_SUCCESS;
4266 int rcSeed = VINF_SUCCESS;
4267 int rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES, 0, NULL);
4268 while (rc == VERR_GMM_SEED_ME)
4269 {
4270 void *pvChunk;
4271 rcAlloc = rc = SUPR3PageAlloc(GMM_CHUNK_SIZE >> PAGE_SHIFT, &pvChunk);
4272 if (RT_SUCCESS(rc))
4273 {
4274 rcSeed = rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_SEED_CHUNK, (uintptr_t)pvChunk, NULL);
4275 if (RT_FAILURE(rc))
4276 SUPR3PageFree(pvChunk, GMM_CHUNK_SIZE >> PAGE_SHIFT);
4277 }
4278 if (RT_SUCCESS(rc))
4279 rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES, 0, NULL);
4280 }
4281
4282 /* todo: we should split this up into an allocate and flush operation. sometimes you want to flush and not allocate more (which will trigger the vm account limit error) */
4283 if ( rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT
4284 && pVM->pgm.s.cHandyPages > 0)
4285 {
4286 /* Still handy pages left, so don't panic. */
4287 rc = VINF_SUCCESS;
4288 }
4289
4290 if (RT_SUCCESS(rc))
4291 {
4292 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
4293 Assert(pVM->pgm.s.cHandyPages > 0);
4294 VM_FF_CLEAR(pVM, VM_FF_PGM_NEED_HANDY_PAGES);
4295 VM_FF_CLEAR(pVM, VM_FF_PGM_NO_MEMORY);
4296
4297#ifdef VBOX_STRICT
4298 uint32_t i;
4299 for (i = iClear; i < pVM->pgm.s.cHandyPages; i++)
4300 if ( pVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID
4301 || pVM->pgm.s.aHandyPages[i].idSharedPage != NIL_GMM_PAGEID
4302 || (pVM->pgm.s.aHandyPages[i].HCPhysGCPhys & PAGE_OFFSET_MASK))
4303 break;
4304 if (i != pVM->pgm.s.cHandyPages)
4305 {
4306 RTAssertMsg1Weak(NULL, __LINE__, __FILE__, __FUNCTION__);
4307 RTAssertMsg2Weak("i=%d iClear=%d cHandyPages=%d\n", i, iClear, pVM->pgm.s.cHandyPages);
4308 for (uint32_t j = iClear; j < pVM->pgm.s.cHandyPages; j++)
4309 RTAssertMsg2Add(("%03d: idPage=%d HCPhysGCPhys=%RHp idSharedPage=%d%\n", j,
4310 pVM->pgm.s.aHandyPages[j].idPage,
4311 pVM->pgm.s.aHandyPages[j].HCPhysGCPhys,
4312 pVM->pgm.s.aHandyPages[j].idSharedPage,
4313 j == i ? " <---" : ""));
4314 RTAssertPanic();
4315 }
4316#endif
4317 /*
4318 * Clear the pages.
4319 */
4320 while (iClear < pVM->pgm.s.cHandyPages)
4321 {
4322 PGMMPAGEDESC pPage = &pVM->pgm.s.aHandyPages[iClear];
4323 void *pv;
4324 rc = pgmPhysPageMapByPageID(pVM, pPage->idPage, pPage->HCPhysGCPhys, &pv);
4325 AssertLogRelMsgBreak(RT_SUCCESS(rc),
4326 ("%u/%u: idPage=%#x HCPhysGCPhys=%RHp rc=%Rrc\n",
4327 iClear, pVM->pgm.s.cHandyPages, pPage->idPage, pPage->HCPhysGCPhys, rc));
4328 ASMMemZeroPage(pv);
4329 iClear++;
4330 Log3(("PGMR3PhysAllocateHandyPages: idPage=%#x HCPhys=%RGp\n", pPage->idPage, pPage->HCPhysGCPhys));
4331 }
4332 }
4333 else
4334 {
4335 uint64_t cAllocPages, cMaxPages, cBalloonPages;
4336
4337 /*
4338 * We should never get here unless there is a genuine shortage of
4339 * memory (or some internal error). Flag the error so the VM can be
4340 * suspended ASAP and the user informed. If we're totally out of
4341 * handy pages we will return failure.
4342 */
4343 /* Report the failure. */
4344 LogRel(("PGM: Failed to procure handy pages; rc=%Rrc rcAlloc=%Rrc rcSeed=%Rrc cHandyPages=%#x\n"
4345 " cAllPages=%#x cPrivatePages=%#x cSharedPages=%#x cZeroPages=%#x\n",
4346 rc, rcAlloc, rcSeed,
4347 pVM->pgm.s.cHandyPages,
4348 pVM->pgm.s.cAllPages,
4349 pVM->pgm.s.cPrivatePages,
4350 pVM->pgm.s.cSharedPages,
4351 pVM->pgm.s.cZeroPages));
4352
4353 if (GMMR3QueryMemoryStats(pVM, &cAllocPages, &cMaxPages, &cBalloonPages) == VINF_SUCCESS)
4354 {
4355 LogRel(("GMM: Statistics:\n"
4356 " Allocated pages: %RX64\n"
4357 " Maximum pages: %RX64\n"
4358 " Ballooned pages: %RX64\n", cAllocPages, cMaxPages, cBalloonPages));
4359 }
4360
4361 if ( rc != VERR_NO_MEMORY
4362 && rc != VERR_NO_PHYS_MEMORY
4363 && rc != VERR_LOCK_FAILED)
4364 {
4365 for (uint32_t i = 0; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
4366 {
4367 LogRel(("PGM: aHandyPages[#%#04x] = {.HCPhysGCPhys=%RHp, .idPage=%#08x, .idSharedPage=%#08x}\n",
4368 i, pVM->pgm.s.aHandyPages[i].HCPhysGCPhys, pVM->pgm.s.aHandyPages[i].idPage,
4369 pVM->pgm.s.aHandyPages[i].idSharedPage));
4370 uint32_t const idPage = pVM->pgm.s.aHandyPages[i].idPage;
4371 if (idPage != NIL_GMM_PAGEID)
4372 {
4373 for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
4374 pRam;
4375 pRam = pRam->pNextR3)
4376 {
4377 uint32_t const cPages = pRam->cb >> PAGE_SHIFT;
4378 for (uint32_t iPage = 0; iPage < cPages; iPage++)
4379 if (PGM_PAGE_GET_PAGEID(&pRam->aPages[iPage]) == idPage)
4380 LogRel(("PGM: Used by %RGp %R[pgmpage] (%s)\n",
4381 pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pRam->aPages[iPage], pRam->pszDesc));
4382 }
4383 }
4384 }
4385 }
4386
4387 /* Set the FFs and adjust rc. */
4388 VM_FF_SET(pVM, VM_FF_PGM_NEED_HANDY_PAGES);
4389 VM_FF_SET(pVM, VM_FF_PGM_NO_MEMORY);
4390 if ( rc == VERR_NO_MEMORY
4391 || rc == VERR_NO_PHYS_MEMORY
4392 || rc == VERR_LOCK_FAILED)
4393 rc = VINF_EM_NO_MEMORY;
4394 }
4395
4396 pgmUnlock(pVM);
4397 return rc;
4398}
4399
4400
4401/**
4402 * Frees the specified RAM page and replaces it with the ZERO page.
4403 *
4404 * This is used by ballooning, remapping MMIO2, RAM reset and state loading.
4405 *
4406 * @param pVM The VM handle.
4407 * @param pReq Pointer to the request.
4408 * @param pcPendingPages Where the number of pages waiting to be freed are
4409 * kept. This will normally be incremented.
4410 * @param pPage Pointer to the page structure.
4411 * @param GCPhys The guest physical address of the page, if applicable.
4412 *
4413 * @remarks The caller must own the PGM lock.
4414 */
4415int pgmPhysFreePage(PVM pVM, PGMMFREEPAGESREQ pReq, uint32_t *pcPendingPages, PPGMPAGE pPage, RTGCPHYS GCPhys)
4416{
4417 /*
4418 * Assert sanity.
4419 */
4420 PGM_LOCK_ASSERT_OWNER(pVM);
4421 if (RT_UNLIKELY( PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM
4422 && PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_ROM_SHADOW))
4423 {
4424 AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage));
4425 return VMSetError(pVM, VERR_PGM_PHYS_NOT_RAM, RT_SRC_POS, "GCPhys=%RGp type=%d", GCPhys, PGM_PAGE_GET_TYPE(pPage));
4426 }
4427
4428 /** @todo What about ballooning of large pages??! */
4429 Assert( PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE
4430 && PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE_DISABLED);
4431
4432 if ( PGM_PAGE_IS_ZERO(pPage)
4433 || PGM_PAGE_IS_BALLOONED(pPage))
4434 return VINF_SUCCESS;
4435
4436 const uint32_t idPage = PGM_PAGE_GET_PAGEID(pPage);
4437 Log3(("pgmPhysFreePage: idPage=%#x GCPhys=%RGp pPage=%R[pgmpage]\n", idPage, GCPhys, pPage));
4438 if (RT_UNLIKELY( idPage == NIL_GMM_PAGEID
4439 || idPage > GMM_PAGEID_LAST
4440 || PGM_PAGE_GET_CHUNKID(pPage) == NIL_GMM_CHUNKID))
4441 {
4442 AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage));
4443 return VMSetError(pVM, VERR_PGM_PHYS_INVALID_PAGE_ID, RT_SRC_POS, "GCPhys=%RGp idPage=%#x", GCPhys, pPage);
4444 }
4445
4446 /* update page count stats. */
4447 if (PGM_PAGE_IS_SHARED(pPage))
4448 pVM->pgm.s.cSharedPages--;
4449 else
4450 pVM->pgm.s.cPrivatePages--;
4451 pVM->pgm.s.cZeroPages++;
4452
4453 /* Deal with write monitored pages. */
4454 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED)
4455 {
4456 PGM_PAGE_SET_WRITTEN_TO(pVM, pPage);
4457 pVM->pgm.s.cWrittenToPages++;
4458 }
4459
4460 /*
4461 * pPage = ZERO page.
4462 */
4463 PGM_PAGE_SET_HCPHYS(pVM, pPage, pVM->pgm.s.HCPhysZeroPg);
4464 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
4465 PGM_PAGE_SET_PAGEID(pVM, pPage, NIL_GMM_PAGEID);
4466 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_DONTCARE);
4467 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0);
4468 PGM_PAGE_SET_TRACKING(pVM, pPage, 0);
4469
4470 /* Flush physical page map TLB entry. */
4471 pgmPhysInvalidatePageMapTLBEntry(pVM, GCPhys);
4472
4473 /*
4474 * Make sure it's not in the handy page array.
4475 */
4476 for (uint32_t i = pVM->pgm.s.cHandyPages; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
4477 {
4478 if (pVM->pgm.s.aHandyPages[i].idPage == idPage)
4479 {
4480 pVM->pgm.s.aHandyPages[i].idPage = NIL_GMM_PAGEID;
4481 break;
4482 }
4483 if (pVM->pgm.s.aHandyPages[i].idSharedPage == idPage)
4484 {
4485 pVM->pgm.s.aHandyPages[i].idSharedPage = NIL_GMM_PAGEID;
4486 break;
4487 }
4488 }
4489
4490 /*
4491 * Push it onto the page array.
4492 */
4493 uint32_t iPage = *pcPendingPages;
4494 Assert(iPage < PGMPHYS_FREE_PAGE_BATCH_SIZE);
4495 *pcPendingPages += 1;
4496
4497 pReq->aPages[iPage].idPage = idPage;
4498
4499 if (iPage + 1 < PGMPHYS_FREE_PAGE_BATCH_SIZE)
4500 return VINF_SUCCESS;
4501
4502 /*
4503 * Flush the pages.
4504 */
4505 int rc = GMMR3FreePagesPerform(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE);
4506 if (RT_SUCCESS(rc))
4507 {
4508 GMMR3FreePagesRePrep(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
4509 *pcPendingPages = 0;
4510 }
4511 return rc;
4512}
4513
4514
4515/**
4516 * Converts a GC physical address to a HC ring-3 pointer, with some
4517 * additional checks.
4518 *
4519 * @returns VBox status code.
4520 * @retval VINF_SUCCESS on success.
4521 * @retval VINF_PGM_PHYS_TLB_CATCH_WRITE and *ppv set if the page has a write
4522 * access handler of some kind.
4523 * @retval VERR_PGM_PHYS_TLB_CATCH_ALL if the page has a handler catching all
4524 * accesses or is odd in any way.
4525 * @retval VERR_PGM_PHYS_TLB_UNASSIGNED if the page doesn't exist.
4526 *
4527 * @param pVM The VM handle.
4528 * @param GCPhys The GC physical address to convert.
4529 * @param fWritable Whether write access is required.
4530 * @param ppv Where to store the pointer corresponding to GCPhys on
4531 * success.
4532 */
4533VMMR3DECL(int) PGMR3PhysTlbGCPhys2Ptr(PVM pVM, RTGCPHYS GCPhys, bool fWritable, void **ppv)
4534{
4535 pgmLock(pVM);
4536
4537 PPGMRAMRANGE pRam;
4538 PPGMPAGE pPage;
4539 int rc = pgmPhysGetPageAndRangeEx(pVM, GCPhys, &pPage, &pRam);
4540 if (RT_SUCCESS(rc))
4541 {
4542 if (PGM_PAGE_IS_BALLOONED(pPage))
4543 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
4544 else if (!PGM_PAGE_HAS_ANY_HANDLERS(pPage))
4545 rc = VINF_SUCCESS;
4546 else
4547 {
4548 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)) /* catches MMIO */
4549 rc = VERR_PGM_PHYS_TLB_CATCH_ALL;
4550 else if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
4551 {
4552 /** @todo Handle TLB loads of virtual handlers so ./test.sh can be made to work
4553 * in -norawr0 mode. */
4554 if (fWritable)
4555 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
4556 }
4557 else
4558 {
4559 /* Temporarily disabled physical handler(s), since the recompiler
4560 doesn't get notified when it's reset we'll have to pretend it's
4561 operating normally. */
4562 if (pgmHandlerPhysicalIsAll(pVM, GCPhys))
4563 rc = VERR_PGM_PHYS_TLB_CATCH_ALL;
4564 else
4565 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
4566 }
4567 }
4568 if (RT_SUCCESS(rc))
4569 {
4570 int rc2;
4571
4572 /* Make sure what we return is writable. */
4573 if (fWritable)
4574 switch (PGM_PAGE_GET_STATE(pPage))
4575 {
4576 case PGM_PAGE_STATE_ALLOCATED:
4577 break;
4578 case PGM_PAGE_STATE_BALLOONED:
4579 AssertFailed();
4580 break;
4581 case PGM_PAGE_STATE_ZERO:
4582 case PGM_PAGE_STATE_SHARED:
4583 if (rc == VINF_PGM_PHYS_TLB_CATCH_WRITE)
4584 break;
4585 case PGM_PAGE_STATE_WRITE_MONITORED:
4586 rc2 = pgmPhysPageMakeWritable(pVM, pPage, GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
4587 AssertLogRelRCReturn(rc2, rc2);
4588 break;
4589 }
4590
4591 /* Get a ring-3 mapping of the address. */
4592 PPGMPAGER3MAPTLBE pTlbe;
4593 rc2 = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
4594 AssertLogRelRCReturn(rc2, rc2);
4595 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & PAGE_OFFSET_MASK));
4596 /** @todo mapping/locking hell; this isn't horribly efficient since
4597 * pgmPhysPageLoadIntoTlb will repeat the lookup we've done here. */
4598
4599 Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage] *ppv=%p\n", GCPhys, rc, pPage, *ppv));
4600 }
4601 else
4602 Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage]\n", GCPhys, rc, pPage));
4603
4604 /* else: handler catching all access, no pointer returned. */
4605 }
4606 else
4607 rc = VERR_PGM_PHYS_TLB_UNASSIGNED;
4608
4609 pgmUnlock(pVM);
4610 return rc;
4611}
4612
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette