VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PGMPhys.cpp@ 107625

Last change on this file since 107625 was 107625, checked in by vboxsync, 9 days ago

VMM/VMMR3/PGMPhys.cpp: Fix unread variable parfait warning in PGMR3PhysMmio2GetMappingAddress() which would always return NIL_RTGCPHYS before (harmless as it isn't used anywhere), bugref:3409

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 250.6 KB
Line 
1/* $Id: PGMPhys.cpp 107625 2025-01-10 10:06:36Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Physical Memory Addressing.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_PHYS
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/iem.h>
36#include <VBox/vmm/iom.h>
37#include <VBox/vmm/mm.h>
38#include <VBox/vmm/nem.h>
39#include <VBox/vmm/stam.h>
40#include <VBox/vmm/pdmdev.h>
41#include "PGMInternal.h"
42#include <VBox/vmm/vmcc.h>
43
44#include "PGMInline.h"
45
46#include <VBox/sup.h>
47#include <VBox/param.h>
48#include <VBox/err.h>
49#include <VBox/log.h>
50#include <iprt/assert.h>
51#include <iprt/alloc.h>
52#include <iprt/asm.h>
53#ifdef VBOX_STRICT
54# include <iprt/crc.h>
55#endif
56#include <iprt/thread.h>
57#include <iprt/string.h>
58#include <iprt/system.h>
59
60
61/*********************************************************************************************************************************
62* Defined Constants And Macros *
63*********************************************************************************************************************************/
64/** The number of pages to free in one batch. */
65#define PGMPHYS_FREE_PAGE_BATCH_SIZE 128
66
67
68
69/*********************************************************************************************************************************
70* Reading and Writing Guest Pysical Memory *
71*********************************************************************************************************************************/
72
73/*
74 * PGMR3PhysReadU8-64
75 * PGMR3PhysWriteU8-64
76 */
77#define PGMPHYSFN_READNAME PGMR3PhysReadU8
78#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU8
79#define PGMPHYS_DATASIZE 1
80#define PGMPHYS_DATATYPE uint8_t
81#include "PGMPhysRWTmpl.h"
82
83#define PGMPHYSFN_READNAME PGMR3PhysReadU16
84#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU16
85#define PGMPHYS_DATASIZE 2
86#define PGMPHYS_DATATYPE uint16_t
87#include "PGMPhysRWTmpl.h"
88
89#define PGMPHYSFN_READNAME PGMR3PhysReadU32
90#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU32
91#define PGMPHYS_DATASIZE 4
92#define PGMPHYS_DATATYPE uint32_t
93#include "PGMPhysRWTmpl.h"
94
95#define PGMPHYSFN_READNAME PGMR3PhysReadU64
96#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU64
97#define PGMPHYS_DATASIZE 8
98#define PGMPHYS_DATATYPE uint64_t
99#include "PGMPhysRWTmpl.h"
100
101
102/**
103 * EMT worker for PGMR3PhysReadExternal.
104 */
105static DECLCALLBACK(int) pgmR3PhysReadExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, void *pvBuf, size_t cbRead,
106 PGMACCESSORIGIN enmOrigin)
107{
108 VBOXSTRICTRC rcStrict = PGMPhysRead(pVM, *pGCPhys, pvBuf, cbRead, enmOrigin);
109 AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
110 return VINF_SUCCESS;
111}
112
113
114/**
115 * Read from physical memory, external users.
116 *
117 * @returns VBox status code.
118 * @retval VINF_SUCCESS.
119 *
120 * @param pVM The cross context VM structure.
121 * @param GCPhys Physical address to read from.
122 * @param pvBuf Where to read into.
123 * @param cbRead How many bytes to read.
124 * @param enmOrigin Who is calling.
125 *
126 * @thread Any but EMTs.
127 */
128VMMR3DECL(int) PGMR3PhysReadExternal(PVM pVM, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead, PGMACCESSORIGIN enmOrigin)
129{
130 VM_ASSERT_OTHER_THREAD(pVM);
131
132 AssertMsgReturn(cbRead > 0, ("don't even think about reading zero bytes!\n"), VINF_SUCCESS);
133 LogFlow(("PGMR3PhysReadExternal: %RGp %d\n", GCPhys, cbRead));
134
135 PGM_LOCK_VOID(pVM);
136
137 /*
138 * Copy loop on ram ranges.
139 */
140 for (;;)
141 {
142 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
143
144 /* Inside range or not? */
145 if (pRam && GCPhys >= pRam->GCPhys)
146 {
147 /*
148 * Must work our way thru this page by page.
149 */
150 RTGCPHYS off = GCPhys - pRam->GCPhys;
151 while (off < pRam->cb)
152 {
153 unsigned iPage = off >> GUEST_PAGE_SHIFT;
154 PPGMPAGE pPage = &pRam->aPages[iPage];
155
156 /*
157 * If the page has an ALL access handler, we'll have to
158 * delegate the job to EMT.
159 */
160 if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
161 || PGM_PAGE_IS_SPECIAL_ALIAS_MMIO(pPage))
162 {
163 PGM_UNLOCK(pVM);
164
165 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysReadExternalEMT, 5,
166 pVM, &GCPhys, pvBuf, cbRead, enmOrigin);
167 }
168 Assert(!PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage));
169
170 /*
171 * Simple stuff, go ahead.
172 */
173 size_t cb = GUEST_PAGE_SIZE - (off & GUEST_PAGE_OFFSET_MASK);
174 if (cb > cbRead)
175 cb = cbRead;
176 PGMPAGEMAPLOCK PgMpLck;
177 const void *pvSrc;
178 int rc = pgmPhysGCPhys2CCPtrInternalReadOnly(pVM, pPage, pRam->GCPhys + off, &pvSrc, &PgMpLck);
179 if (RT_SUCCESS(rc))
180 {
181 memcpy(pvBuf, pvSrc, cb);
182 pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck);
183 }
184 else
185 {
186 AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternalReadOnly failed on %RGp / %R[pgmpage] -> %Rrc\n",
187 pRam->GCPhys + off, pPage, rc));
188 memset(pvBuf, 0xff, cb);
189 }
190
191 /* next page */
192 if (cb >= cbRead)
193 {
194 PGM_UNLOCK(pVM);
195 return VINF_SUCCESS;
196 }
197 cbRead -= cb;
198 off += cb;
199 GCPhys += cb;
200 pvBuf = (char *)pvBuf + cb;
201 } /* walk pages in ram range. */
202 }
203 else
204 {
205 LogFlow(("PGMPhysRead: Unassigned %RGp size=%u\n", GCPhys, cbRead));
206
207 /*
208 * Unassigned address space.
209 */
210 size_t cb = pRam ? pRam->GCPhys - GCPhys : ~(size_t)0;
211 if (cb >= cbRead)
212 {
213 memset(pvBuf, 0xff, cbRead);
214 break;
215 }
216 memset(pvBuf, 0xff, cb);
217
218 cbRead -= cb;
219 pvBuf = (char *)pvBuf + cb;
220 GCPhys += cb;
221 }
222 } /* Ram range walk */
223
224 PGM_UNLOCK(pVM);
225
226 return VINF_SUCCESS;
227}
228
229
230/**
231 * EMT worker for PGMR3PhysWriteExternal.
232 */
233static DECLCALLBACK(int) pgmR3PhysWriteExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, const void *pvBuf, size_t cbWrite,
234 PGMACCESSORIGIN enmOrigin)
235{
236 /** @todo VERR_EM_NO_MEMORY */
237 VBOXSTRICTRC rcStrict = PGMPhysWrite(pVM, *pGCPhys, pvBuf, cbWrite, enmOrigin);
238 AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
239 return VINF_SUCCESS;
240}
241
242
243/**
244 * Write to physical memory, external users.
245 *
246 * @returns VBox status code.
247 * @retval VINF_SUCCESS.
248 * @retval VERR_EM_NO_MEMORY.
249 *
250 * @param pVM The cross context VM structure.
251 * @param GCPhys Physical address to write to.
252 * @param pvBuf What to write.
253 * @param cbWrite How many bytes to write.
254 * @param enmOrigin Who is calling.
255 *
256 * @thread Any but EMTs.
257 */
258VMMDECL(int) PGMR3PhysWriteExternal(PVM pVM, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite, PGMACCESSORIGIN enmOrigin)
259{
260 VM_ASSERT_OTHER_THREAD(pVM);
261
262 AssertMsg(!pVM->pgm.s.fNoMorePhysWrites,
263 ("Calling PGMR3PhysWriteExternal after pgmR3Save()! GCPhys=%RGp cbWrite=%#x enmOrigin=%d\n",
264 GCPhys, cbWrite, enmOrigin));
265 AssertMsgReturn(cbWrite > 0, ("don't even think about writing zero bytes!\n"), VINF_SUCCESS);
266 LogFlow(("PGMR3PhysWriteExternal: %RGp %d\n", GCPhys, cbWrite));
267
268 PGM_LOCK_VOID(pVM);
269
270 /*
271 * Copy loop on ram ranges, stop when we hit something difficult.
272 */
273 for (;;)
274 {
275 PPGMRAMRANGE const pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
276
277 /* Inside range or not? */
278 if (pRam && GCPhys >= pRam->GCPhys)
279 {
280 /*
281 * Must work our way thru this page by page.
282 */
283 RTGCPTR off = GCPhys - pRam->GCPhys;
284 while (off < pRam->cb)
285 {
286 RTGCPTR iPage = off >> GUEST_PAGE_SHIFT;
287 PPGMPAGE pPage = &pRam->aPages[iPage];
288
289 /*
290 * Is the page problematic, we have to do the work on the EMT.
291 *
292 * Allocating writable pages and access handlers are
293 * problematic, write monitored pages are simple and can be
294 * dealt with here.
295 */
296 if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
297 || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
298 || PGM_PAGE_IS_SPECIAL_ALIAS_MMIO(pPage))
299 {
300 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
301 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
302 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage, GCPhys);
303 else
304 {
305 PGM_UNLOCK(pVM);
306
307 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysWriteExternalEMT, 5,
308 pVM, &GCPhys, pvBuf, cbWrite, enmOrigin);
309 }
310 }
311 Assert(!PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage));
312
313 /*
314 * Simple stuff, go ahead.
315 */
316 size_t cb = GUEST_PAGE_SIZE - (off & GUEST_PAGE_OFFSET_MASK);
317 if (cb > cbWrite)
318 cb = cbWrite;
319 PGMPAGEMAPLOCK PgMpLck;
320 void *pvDst;
321 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, pRam->GCPhys + off, &pvDst, &PgMpLck);
322 if (RT_SUCCESS(rc))
323 {
324 memcpy(pvDst, pvBuf, cb);
325 pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck);
326 }
327 else
328 AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternal failed on %RGp / %R[pgmpage] -> %Rrc\n",
329 pRam->GCPhys + off, pPage, rc));
330
331 /* next page */
332 if (cb >= cbWrite)
333 {
334 PGM_UNLOCK(pVM);
335 return VINF_SUCCESS;
336 }
337
338 cbWrite -= cb;
339 off += cb;
340 GCPhys += cb;
341 pvBuf = (const char *)pvBuf + cb;
342 } /* walk pages in ram range */
343 }
344 else
345 {
346 /*
347 * Unassigned address space, skip it.
348 */
349 if (!pRam)
350 break;
351 size_t cb = pRam->GCPhys - GCPhys;
352 if (cb >= cbWrite)
353 break;
354 cbWrite -= cb;
355 pvBuf = (const char *)pvBuf + cb;
356 GCPhys += cb;
357 }
358 } /* Ram range walk */
359
360 PGM_UNLOCK(pVM);
361 return VINF_SUCCESS;
362}
363
364
365/*********************************************************************************************************************************
366* Mapping Guest Physical Memory *
367*********************************************************************************************************************************/
368
369/**
370 * VMR3ReqCall worker for PGMR3PhysGCPhys2CCPtrExternal to make pages writable.
371 *
372 * @returns see PGMR3PhysGCPhys2CCPtrExternal
373 * @param pVM The cross context VM structure.
374 * @param pGCPhys Pointer to the guest physical address.
375 * @param ppv Where to store the mapping address.
376 * @param pLock Where to store the lock.
377 */
378static DECLCALLBACK(int) pgmR3PhysGCPhys2CCPtrDelegated(PVM pVM, PRTGCPHYS pGCPhys, void **ppv, PPGMPAGEMAPLOCK pLock)
379{
380 /*
381 * Just hand it to PGMPhysGCPhys2CCPtr and check that it's not a page with
382 * an access handler after it succeeds.
383 */
384 int rc = PGM_LOCK(pVM);
385 AssertRCReturn(rc, rc);
386
387 rc = PGMPhysGCPhys2CCPtr(pVM, *pGCPhys, ppv, pLock);
388 if (RT_SUCCESS(rc))
389 {
390 PPGMPAGEMAPTLBE pTlbe;
391 int rc2 = pgmPhysPageQueryTlbe(pVM, *pGCPhys, &pTlbe);
392 AssertFatalRC(rc2);
393 PPGMPAGE pPage = pTlbe->pPage;
394 if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage))
395 {
396 PGMPhysReleasePageMappingLock(pVM, pLock);
397 rc = VERR_PGM_PHYS_PAGE_RESERVED;
398 }
399 else if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
400#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
401 || pgmPoolIsDirtyPage(pVM, *pGCPhys)
402#endif
403 )
404 {
405 /* We *must* flush any corresponding pgm pool page here, otherwise we'll
406 * not be informed about writes and keep bogus gst->shw mappings around.
407 */
408 pgmPoolFlushPageByGCPhys(pVM, *pGCPhys);
409 Assert(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage));
410 /** @todo r=bird: return VERR_PGM_PHYS_PAGE_RESERVED here if it still has
411 * active handlers, see the PGMR3PhysGCPhys2CCPtrExternal docs. */
412 }
413 }
414
415 PGM_UNLOCK(pVM);
416 return rc;
417}
418
419
420/**
421 * Requests the mapping of a guest page into ring-3, external threads.
422 *
423 * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to
424 * release it.
425 *
426 * This API will assume your intention is to write to the page, and will
427 * therefore replace shared and zero pages. If you do not intend to modify the
428 * page, use the PGMR3PhysGCPhys2CCPtrReadOnlyExternal() API.
429 *
430 * @returns VBox status code.
431 * @retval VINF_SUCCESS on success.
432 * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical
433 * backing or if the page has any active access handlers. The caller
434 * must fall back on using PGMR3PhysWriteExternal.
435 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address.
436 *
437 * @param pVM The cross context VM structure.
438 * @param GCPhys The guest physical address of the page that should be mapped.
439 * @param ppv Where to store the address corresponding to GCPhys.
440 * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs.
441 *
442 * @remark Avoid calling this API from within critical sections (other than the
443 * PGM one) because of the deadlock risk when we have to delegating the
444 * task to an EMT.
445 * @thread Any.
446 */
447VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrExternal(PVM pVM, RTGCPHYS GCPhys, void **ppv, PPGMPAGEMAPLOCK pLock)
448{
449 AssertPtr(ppv);
450 AssertPtr(pLock);
451
452 Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM));
453
454 int rc = PGM_LOCK(pVM);
455 AssertRCReturn(rc, rc);
456
457 /*
458 * Query the Physical TLB entry for the page (may fail).
459 */
460 PPGMPAGEMAPTLBE pTlbe;
461 rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
462 if (RT_SUCCESS(rc))
463 {
464 PPGMPAGE pPage = pTlbe->pPage;
465 if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage))
466 rc = VERR_PGM_PHYS_PAGE_RESERVED;
467 else
468 {
469 /*
470 * If the page is shared, the zero page, or being write monitored
471 * it must be converted to an page that's writable if possible.
472 * We can only deal with write monitored pages here, the rest have
473 * to be on an EMT.
474 */
475 if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
476 || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
477#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
478 || pgmPoolIsDirtyPage(pVM, GCPhys)
479#endif
480 )
481 {
482 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
483 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
484#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
485 && !pgmPoolIsDirtyPage(pVM, GCPhys) /** @todo we're very likely doing this twice. */
486#endif
487 )
488 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage, GCPhys);
489 else
490 {
491 PGM_UNLOCK(pVM);
492
493 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysGCPhys2CCPtrDelegated, 4,
494 pVM, &GCPhys, ppv, pLock);
495 }
496 }
497
498 /*
499 * Now, just perform the locking and calculate the return address.
500 */
501 PPGMPAGEMAP pMap = pTlbe->pMap;
502 if (pMap)
503 pMap->cRefs++;
504
505 unsigned cLocks = PGM_PAGE_GET_WRITE_LOCKS(pPage);
506 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
507 {
508 if (cLocks == 0)
509 pVM->pgm.s.cWriteLockedPages++;
510 PGM_PAGE_INC_WRITE_LOCKS(pPage);
511 }
512 else if (cLocks != PGM_PAGE_GET_WRITE_LOCKS(pPage))
513 {
514 PGM_PAGE_INC_WRITE_LOCKS(pPage);
515 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent write locked state!\n", GCPhys, pPage));
516 if (pMap)
517 pMap->cRefs++; /* Extra ref to prevent it from going away. */
518 }
519
520 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & GUEST_PAGE_OFFSET_MASK));
521 pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_WRITE;
522 pLock->pvMap = pMap;
523 }
524 }
525
526 PGM_UNLOCK(pVM);
527 return rc;
528}
529
530
531/**
532 * Requests the mapping of a guest page into ring-3, external threads.
533 *
534 * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to
535 * release it.
536 *
537 * @returns VBox status code.
538 * @retval VINF_SUCCESS on success.
539 * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical
540 * backing or if the page as an active ALL access handler. The caller
541 * must fall back on using PGMPhysRead.
542 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address.
543 *
544 * @param pVM The cross context VM structure.
545 * @param GCPhys The guest physical address of the page that should be mapped.
546 * @param ppv Where to store the address corresponding to GCPhys.
547 * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs.
548 *
549 * @remark Avoid calling this API from within critical sections (other than
550 * the PGM one) because of the deadlock risk.
551 * @thread Any.
552 */
553VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrReadOnlyExternal(PVM pVM, RTGCPHYS GCPhys, void const **ppv, PPGMPAGEMAPLOCK pLock)
554{
555 int rc = PGM_LOCK(pVM);
556 AssertRCReturn(rc, rc);
557
558 /*
559 * Query the Physical TLB entry for the page (may fail).
560 */
561 PPGMPAGEMAPTLBE pTlbe;
562 rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
563 if (RT_SUCCESS(rc))
564 {
565 PPGMPAGE pPage = pTlbe->pPage;
566#if 1
567 /* MMIO pages doesn't have any readable backing. */
568 if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage))
569 rc = VERR_PGM_PHYS_PAGE_RESERVED;
570#else
571 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
572 rc = VERR_PGM_PHYS_PAGE_RESERVED;
573#endif
574 else
575 {
576 /*
577 * Now, just perform the locking and calculate the return address.
578 */
579 PPGMPAGEMAP pMap = pTlbe->pMap;
580 if (pMap)
581 pMap->cRefs++;
582
583 unsigned cLocks = PGM_PAGE_GET_READ_LOCKS(pPage);
584 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
585 {
586 if (cLocks == 0)
587 pVM->pgm.s.cReadLockedPages++;
588 PGM_PAGE_INC_READ_LOCKS(pPage);
589 }
590 else if (cLocks != PGM_PAGE_GET_READ_LOCKS(pPage))
591 {
592 PGM_PAGE_INC_READ_LOCKS(pPage);
593 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent readonly locked state!\n", GCPhys, pPage));
594 if (pMap)
595 pMap->cRefs++; /* Extra ref to prevent it from going away. */
596 }
597
598 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & GUEST_PAGE_OFFSET_MASK));
599 pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_READ;
600 pLock->pvMap = pMap;
601 }
602 }
603
604 PGM_UNLOCK(pVM);
605 return rc;
606}
607
608
609/**
610 * Requests the mapping of multiple guest page into ring-3, external threads.
611 *
612 * When you're done with the pages, call PGMPhysBulkReleasePageMappingLock()
613 * ASAP to release them.
614 *
615 * This API will assume your intention is to write to the pages, and will
616 * therefore replace shared and zero pages. If you do not intend to modify the
617 * pages, use the PGMR3PhysBulkGCPhys2CCPtrReadOnlyExternal() API.
618 *
619 * @returns VBox status code.
620 * @retval VINF_SUCCESS on success.
621 * @retval VERR_PGM_PHYS_PAGE_RESERVED if any of the pages has no physical
622 * backing or if any of the pages the page has any active access
623 * handlers. The caller must fall back on using PGMR3PhysWriteExternal.
624 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if @a paGCPhysPages contains
625 * an invalid physical address.
626 *
627 * @param pVM The cross context VM structure.
628 * @param cPages Number of pages to lock.
629 * @param paGCPhysPages The guest physical address of the pages that
630 * should be mapped (@a cPages entries).
631 * @param papvPages Where to store the ring-3 mapping addresses
632 * corresponding to @a paGCPhysPages.
633 * @param paLocks Where to store the locking information that
634 * pfnPhysBulkReleasePageMappingLock needs (@a cPages
635 * in length).
636 *
637 * @remark Avoid calling this API from within critical sections (other than the
638 * PGM one) because of the deadlock risk when we have to delegating the
639 * task to an EMT.
640 * @thread Any.
641 */
642VMMR3DECL(int) PGMR3PhysBulkGCPhys2CCPtrExternal(PVM pVM, uint32_t cPages, PCRTGCPHYS paGCPhysPages,
643 void **papvPages, PPGMPAGEMAPLOCK paLocks)
644{
645 Assert(cPages > 0);
646 AssertPtr(papvPages);
647 AssertPtr(paLocks);
648
649 Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM));
650
651 int rc = PGM_LOCK(pVM);
652 AssertRCReturn(rc, rc);
653
654 /*
655 * Lock the pages one by one.
656 * The loop body is similar to PGMR3PhysGCPhys2CCPtrExternal.
657 */
658 int32_t cNextYield = 128;
659 uint32_t iPage;
660 for (iPage = 0; iPage < cPages; iPage++)
661 {
662 if (--cNextYield > 0)
663 { /* likely */ }
664 else
665 {
666 PGM_UNLOCK(pVM);
667 ASMNopPause();
668 PGM_LOCK_VOID(pVM);
669 cNextYield = 128;
670 }
671
672 /*
673 * Query the Physical TLB entry for the page (may fail).
674 */
675 PPGMPAGEMAPTLBE pTlbe;
676 rc = pgmPhysPageQueryTlbe(pVM, paGCPhysPages[iPage], &pTlbe);
677 if (RT_SUCCESS(rc))
678 { }
679 else
680 break;
681 PPGMPAGE pPage = pTlbe->pPage;
682
683 /*
684 * No MMIO or active access handlers.
685 */
686 if ( !PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage)
687 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
688 { }
689 else
690 {
691 rc = VERR_PGM_PHYS_PAGE_RESERVED;
692 break;
693 }
694
695 /*
696 * The page must be in the allocated state and not be a dirty pool page.
697 * We can handle converting a write monitored page to an allocated one, but
698 * anything more complicated must be delegated to an EMT.
699 */
700 bool fDelegateToEmt = false;
701 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
702#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
703 fDelegateToEmt = pgmPoolIsDirtyPage(pVM, paGCPhysPages[iPage]);
704#else
705 fDelegateToEmt = false;
706#endif
707 else if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED)
708 {
709#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
710 if (!pgmPoolIsDirtyPage(pVM, paGCPhysPages[iPage]))
711 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage, paGCPhysPages[iPage]);
712 else
713 fDelegateToEmt = true;
714#endif
715 }
716 else
717 fDelegateToEmt = true;
718 if (!fDelegateToEmt)
719 { }
720 else
721 {
722 /* We could do this delegation in bulk, but considered too much work vs gain. */
723 PGM_UNLOCK(pVM);
724 rc = VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysGCPhys2CCPtrDelegated, 4,
725 pVM, &paGCPhysPages[iPage], &papvPages[iPage], &paLocks[iPage]);
726 PGM_LOCK_VOID(pVM);
727 if (RT_FAILURE(rc))
728 break;
729 cNextYield = 128;
730 }
731
732 /*
733 * Now, just perform the locking and address calculation.
734 */
735 PPGMPAGEMAP pMap = pTlbe->pMap;
736 if (pMap)
737 pMap->cRefs++;
738
739 unsigned cLocks = PGM_PAGE_GET_WRITE_LOCKS(pPage);
740 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
741 {
742 if (cLocks == 0)
743 pVM->pgm.s.cWriteLockedPages++;
744 PGM_PAGE_INC_WRITE_LOCKS(pPage);
745 }
746 else if (cLocks != PGM_PAGE_GET_WRITE_LOCKS(pPage))
747 {
748 PGM_PAGE_INC_WRITE_LOCKS(pPage);
749 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent write locked state!\n", paGCPhysPages[iPage], pPage));
750 if (pMap)
751 pMap->cRefs++; /* Extra ref to prevent it from going away. */
752 }
753
754 papvPages[iPage] = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(paGCPhysPages[iPage] & GUEST_PAGE_OFFSET_MASK));
755 paLocks[iPage].uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_WRITE;
756 paLocks[iPage].pvMap = pMap;
757 }
758
759 PGM_UNLOCK(pVM);
760
761 /*
762 * On failure we must unlock any pages we managed to get already.
763 */
764 if (RT_FAILURE(rc) && iPage > 0)
765 PGMPhysBulkReleasePageMappingLocks(pVM, iPage, paLocks);
766
767 return rc;
768}
769
770
771/**
772 * Requests the mapping of multiple guest page into ring-3, for reading only,
773 * external threads.
774 *
775 * When you're done with the pages, call PGMPhysReleasePageMappingLock() ASAP
776 * to release them.
777 *
778 * @returns VBox status code.
779 * @retval VINF_SUCCESS on success.
780 * @retval VERR_PGM_PHYS_PAGE_RESERVED if any of the pages has no physical
781 * backing or if any of the pages the page has an active ALL access
782 * handler. The caller must fall back on using PGMR3PhysWriteExternal.
783 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if @a paGCPhysPages contains
784 * an invalid physical address.
785 *
786 * @param pVM The cross context VM structure.
787 * @param cPages Number of pages to lock.
788 * @param paGCPhysPages The guest physical address of the pages that
789 * should be mapped (@a cPages entries).
790 * @param papvPages Where to store the ring-3 mapping addresses
791 * corresponding to @a paGCPhysPages.
792 * @param paLocks Where to store the lock information that
793 * pfnPhysReleasePageMappingLock needs (@a cPages
794 * in length).
795 *
796 * @remark Avoid calling this API from within critical sections (other than
797 * the PGM one) because of the deadlock risk.
798 * @thread Any.
799 */
800VMMR3DECL(int) PGMR3PhysBulkGCPhys2CCPtrReadOnlyExternal(PVM pVM, uint32_t cPages, PCRTGCPHYS paGCPhysPages,
801 void const **papvPages, PPGMPAGEMAPLOCK paLocks)
802{
803 Assert(cPages > 0);
804 AssertPtr(papvPages);
805 AssertPtr(paLocks);
806
807 Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM));
808
809 int rc = PGM_LOCK(pVM);
810 AssertRCReturn(rc, rc);
811
812 /*
813 * Lock the pages one by one.
814 * The loop body is similar to PGMR3PhysGCPhys2CCPtrReadOnlyExternal.
815 */
816 int32_t cNextYield = 256;
817 uint32_t iPage;
818 for (iPage = 0; iPage < cPages; iPage++)
819 {
820 if (--cNextYield > 0)
821 { /* likely */ }
822 else
823 {
824 PGM_UNLOCK(pVM);
825 ASMNopPause();
826 PGM_LOCK_VOID(pVM);
827 cNextYield = 256;
828 }
829
830 /*
831 * Query the Physical TLB entry for the page (may fail).
832 */
833 PPGMPAGEMAPTLBE pTlbe;
834 rc = pgmPhysPageQueryTlbe(pVM, paGCPhysPages[iPage], &pTlbe);
835 if (RT_SUCCESS(rc))
836 { }
837 else
838 break;
839 PPGMPAGE pPage = pTlbe->pPage;
840
841 /*
842 * No MMIO or active all access handlers, everything else can be accessed.
843 */
844 if ( !PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage)
845 && !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
846 { }
847 else
848 {
849 rc = VERR_PGM_PHYS_PAGE_RESERVED;
850 break;
851 }
852
853 /*
854 * Now, just perform the locking and address calculation.
855 */
856 PPGMPAGEMAP pMap = pTlbe->pMap;
857 if (pMap)
858 pMap->cRefs++;
859
860 unsigned cLocks = PGM_PAGE_GET_READ_LOCKS(pPage);
861 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
862 {
863 if (cLocks == 0)
864 pVM->pgm.s.cReadLockedPages++;
865 PGM_PAGE_INC_READ_LOCKS(pPage);
866 }
867 else if (cLocks != PGM_PAGE_GET_READ_LOCKS(pPage))
868 {
869 PGM_PAGE_INC_READ_LOCKS(pPage);
870 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent readonly locked state!\n", paGCPhysPages[iPage], pPage));
871 if (pMap)
872 pMap->cRefs++; /* Extra ref to prevent it from going away. */
873 }
874
875 papvPages[iPage] = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(paGCPhysPages[iPage] & GUEST_PAGE_OFFSET_MASK));
876 paLocks[iPage].uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_READ;
877 paLocks[iPage].pvMap = pMap;
878 }
879
880 PGM_UNLOCK(pVM);
881
882 /*
883 * On failure we must unlock any pages we managed to get already.
884 */
885 if (RT_FAILURE(rc) && iPage > 0)
886 PGMPhysBulkReleasePageMappingLocks(pVM, iPage, paLocks);
887
888 return rc;
889}
890
891
892/**
893 * Converts a GC physical address to a HC ring-3 pointer, with some
894 * additional checks.
895 *
896 * @returns VBox status code.
897 * @retval VINF_SUCCESS on success.
898 * @retval VINF_PGM_PHYS_TLB_CATCH_WRITE and *ppv set if the page has a write
899 * access handler of some kind.
900 * @retval VERR_PGM_PHYS_TLB_CATCH_ALL if the page has a handler catching all
901 * accesses or is odd in any way.
902 * @retval VERR_PGM_PHYS_TLB_UNASSIGNED if the page doesn't exist.
903 *
904 * @param pVM The cross context VM structure.
905 * @param GCPhys The GC physical address to convert. Since this is only
906 * used for filling the REM TLB, the A20 mask must be
907 * applied before calling this API.
908 * @param fWritable Whether write access is required.
909 * @param ppv Where to store the pointer corresponding to GCPhys on
910 * success.
911 */
912VMMR3DECL(int) PGMR3PhysTlbGCPhys2Ptr(PVM pVM, RTGCPHYS GCPhys, bool fWritable, void **ppv)
913{
914 PGM_LOCK_VOID(pVM);
915 PGM_A20_ASSERT_MASKED(VMMGetCpu(pVM), GCPhys);
916
917 PPGMRAMRANGE pRam;
918 PPGMPAGE pPage;
919 int rc = pgmPhysGetPageAndRangeEx(pVM, GCPhys, &pPage, &pRam);
920 if (RT_SUCCESS(rc))
921 {
922 if (PGM_PAGE_IS_BALLOONED(pPage))
923 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
924 else if (!PGM_PAGE_HAS_ANY_HANDLERS(pPage))
925 rc = VINF_SUCCESS;
926 else
927 {
928 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)) /* catches MMIO */
929 rc = VERR_PGM_PHYS_TLB_CATCH_ALL;
930 else if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
931 {
932 /** @todo Handle TLB loads of virtual handlers so ./test.sh can be made to work
933 * in -norawr0 mode. */
934 if (fWritable)
935 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
936 }
937 else
938 {
939 /* Temporarily disabled physical handler(s), since the recompiler
940 doesn't get notified when it's reset we'll have to pretend it's
941 operating normally. */
942 if (pgmHandlerPhysicalIsAll(pVM, GCPhys))
943 rc = VERR_PGM_PHYS_TLB_CATCH_ALL;
944 else
945 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
946 }
947 }
948 if (RT_SUCCESS(rc))
949 {
950 int rc2;
951
952 /* Make sure what we return is writable. */
953 if (fWritable)
954 switch (PGM_PAGE_GET_STATE(pPage))
955 {
956 case PGM_PAGE_STATE_ALLOCATED:
957 break;
958 case PGM_PAGE_STATE_BALLOONED:
959 AssertFailed();
960 break;
961 case PGM_PAGE_STATE_ZERO:
962 case PGM_PAGE_STATE_SHARED:
963 if (rc == VINF_PGM_PHYS_TLB_CATCH_WRITE)
964 break;
965 RT_FALL_THRU();
966 case PGM_PAGE_STATE_WRITE_MONITORED:
967 rc2 = pgmPhysPageMakeWritable(pVM, pPage, GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
968 AssertLogRelRCReturn(rc2, rc2);
969 break;
970 }
971
972 /* Get a ring-3 mapping of the address. */
973 PPGMPAGER3MAPTLBE pTlbe;
974 rc2 = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
975 AssertLogRelRCReturn(rc2, rc2);
976 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & GUEST_PAGE_OFFSET_MASK));
977 /** @todo mapping/locking hell; this isn't horribly efficient since
978 * pgmPhysPageLoadIntoTlb will repeat the lookup we've done here. */
979
980 Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage] *ppv=%p\n", GCPhys, rc, pPage, *ppv));
981 }
982 else
983 Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage]\n", GCPhys, rc, pPage));
984
985 /* else: handler catching all access, no pointer returned. */
986 }
987 else
988 rc = VERR_PGM_PHYS_TLB_UNASSIGNED;
989
990 PGM_UNLOCK(pVM);
991 return rc;
992}
993
994
995
996/*********************************************************************************************************************************
997* RAM Range Management *
998*********************************************************************************************************************************/
999
1000/**
1001 * Given the range @a GCPhys thru @a GCPhysLast, find overlapping RAM range or
1002 * the correct insertion point.
1003 *
1004 * @returns Pointer to overlapping RAM range if found, NULL if not.
1005 * @param pVM The cross context VM structure.
1006 * @param GCPhys The address of the first byte in the range.
1007 * @param GCPhysLast The address of the last byte in the range.
1008 * @param pidxInsert Where to return the lookup table index to insert the
1009 * range at when returning NULL. Set to UINT32_MAX when
1010 * returning the pointer to an overlapping range.
1011 * @note Caller must own the PGM lock.
1012 */
1013static PPGMRAMRANGE pgmR3PhysRamRangeFindOverlapping(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast, uint32_t *pidxInsert)
1014{
1015 PGM_LOCK_ASSERT_OWNER(pVM);
1016 uint32_t iStart = 0;
1017 uint32_t iEnd = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1018 for (;;)
1019 {
1020 uint32_t idxLookup = iStart + (iEnd - iStart) / 2;
1021 RTGCPHYS const GCPhysEntryFirst = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1022 if (GCPhysLast < GCPhysEntryFirst)
1023 {
1024 if (idxLookup > iStart)
1025 iEnd = idxLookup;
1026 else
1027 {
1028 *pidxInsert = idxLookup;
1029 return NULL;
1030 }
1031 }
1032 else
1033 {
1034 RTGCPHYS const GCPhysEntryLast = pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast;
1035 if (GCPhys > GCPhysEntryLast)
1036 {
1037 idxLookup += 1;
1038 if (idxLookup < iEnd)
1039 iStart = idxLookup;
1040 else
1041 {
1042 *pidxInsert = idxLookup;
1043 return NULL;
1044 }
1045 }
1046 else
1047 {
1048 /* overlap */
1049 Assert(GCPhysEntryLast > GCPhys && GCPhysEntryFirst < GCPhysLast);
1050 *pidxInsert = UINT32_MAX;
1051 return pVM->pgm.s.apRamRanges[PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup])];
1052 }
1053 }
1054 }
1055}
1056
1057
1058/**
1059 * Given the range @a GCPhys thru @a GCPhysLast, find the lookup table entry
1060 * that's overlapping it.
1061 *
1062 * @returns The lookup table index of the overlapping entry, UINT32_MAX if not
1063 * found.
1064 * @param pVM The cross context VM structure.
1065 * @param GCPhys The address of the first byte in the range.
1066 * @param GCPhysLast The address of the last byte in the range.
1067 * @note Caller must own the PGM lock.
1068 */
1069static uint32_t pgmR3PhysRamRangeFindOverlappingIndex(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast)
1070{
1071 PGM_LOCK_ASSERT_OWNER(pVM);
1072 uint32_t iStart = 0;
1073 uint32_t iEnd = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1074 for (;;)
1075 {
1076 uint32_t idxLookup = iStart + (iEnd - iStart) / 2;
1077 RTGCPHYS const GCPhysEntryFirst = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1078 if (GCPhysLast < GCPhysEntryFirst)
1079 {
1080 if (idxLookup > iStart)
1081 iEnd = idxLookup;
1082 else
1083 return UINT32_MAX;
1084 }
1085 else
1086 {
1087 RTGCPHYS const GCPhysEntryLast = pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast;
1088 if (GCPhys > GCPhysEntryLast)
1089 {
1090 idxLookup += 1;
1091 if (idxLookup < iEnd)
1092 iStart = idxLookup;
1093 else
1094 return UINT32_MAX;
1095 }
1096 else
1097 {
1098 /* overlap */
1099 Assert(GCPhysEntryLast > GCPhys && GCPhysEntryFirst < GCPhysLast);
1100 return idxLookup;
1101 }
1102 }
1103 }
1104}
1105
1106
1107/**
1108 * Insert @a pRam into the lookup table.
1109 *
1110 * @returns VBox status code.
1111 * @param pVM The cross context VM structure.
1112 * @param pRam The RAM range to insert into the lookup table.
1113 * @param GCPhys The new mapping address to assign @a pRam on insertion.
1114 * @param pidxLookup Optional lookup table hint. This is updated.
1115 * @note Caller must own PGM lock.
1116 */
1117static int pgmR3PhysRamRangeInsertLookup(PVM pVM, PPGMRAMRANGE pRam, RTGCPHYS GCPhys, uint32_t *pidxLookup)
1118{
1119 PGM_LOCK_ASSERT_OWNER(pVM);
1120#ifdef DEBUG_bird
1121 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, true /*fRamRelaxed*/);
1122#endif
1123 AssertMsg(pRam->pszDesc, ("%RGp-%RGp\n", pRam->GCPhys, pRam->GCPhysLast));
1124 AssertLogRelMsgReturn( pRam->GCPhys == NIL_RTGCPHYS
1125 && pRam->GCPhysLast == NIL_RTGCPHYS,
1126 ("GCPhys=%RGp; range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n",
1127 GCPhys, pRam->GCPhys, pRam->cb, pRam->GCPhysLast, pRam->pszDesc),
1128 VERR_ALREADY_EXISTS);
1129 uint32_t const idRamRange = pRam->idRange;
1130 AssertReturn(pVM->pgm.s.apRamRanges[idRamRange] == pRam, VERR_INTERNAL_ERROR_2);
1131
1132 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
1133 RTGCPHYS const GCPhysLast = GCPhys + pRam->cb - 1U;
1134 AssertReturn(GCPhysLast > GCPhys, VERR_INTERNAL_ERROR_4);
1135 LogFlowFunc(("GCPhys=%RGp LB %RGp GCPhysLast=%RGp id=%#x %s\n", GCPhys, pRam->cb, GCPhysLast, idRamRange, pRam->pszDesc));
1136
1137 /*
1138 * Find the lookup table location if necessary.
1139 */
1140 uint32_t const cLookupEntries = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1141 AssertLogRelMsgReturn(cLookupEntries + 1 < RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup), /* id=0 is unused, so < is correct. */
1142 ("%#x\n", cLookupEntries), VERR_INTERNAL_ERROR_3);
1143
1144 uint32_t idxLookup = pidxLookup ? *pidxLookup : UINT32_MAX;
1145 if (cLookupEntries == 0)
1146 idxLookup = 0; /* special case: empty table */
1147 else
1148 {
1149 if ( idxLookup > cLookupEntries
1150 || ( idxLookup != 0
1151 && pVM->pgm.s.aRamRangeLookup[idxLookup - 1].GCPhysLast >= GCPhys)
1152 || ( idxLookup < cLookupEntries
1153 && PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]) < GCPhysLast))
1154 {
1155 PPGMRAMRANGE pOverlapping = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxLookup);
1156 AssertLogRelMsgReturn(!pOverlapping,
1157 ("GCPhys=%RGp; GCPhysLast=%RGp %s - overlaps %RGp...%RGp %s\n",
1158 GCPhys, GCPhysLast, pRam->pszDesc,
1159 pOverlapping->GCPhys, pOverlapping->GCPhysLast, pOverlapping->pszDesc),
1160 VERR_PGM_RAM_CONFLICT);
1161 AssertLogRelMsgReturn(idxLookup <= cLookupEntries, ("%#x vs %#x\n", idxLookup, cLookupEntries), VERR_INTERNAL_ERROR_5);
1162 }
1163 /* else we've got a good hint. */
1164 }
1165
1166 /*
1167 * Do the actual job.
1168 *
1169 * The moving of existing table entries is done in a way that allows other
1170 * EMTs to perform concurrent lookups with the updating.
1171 */
1172 bool const fUseAtomic = pVM->enmVMState != VMSTATE_CREATING
1173 && pVM->cCpus > 1
1174#ifdef RT_ARCH_AMD64
1175 && g_CpumHostFeatures.s.fCmpXchg16b
1176#endif
1177 ;
1178
1179 /* Signal that we're modifying the lookup table: */
1180 uint32_t const idGeneration = (pVM->pgm.s.RamRangeUnion.idGeneration + 1) | 1; /* paranoia^3 */
1181 ASMAtomicWriteU32(&pVM->pgm.s.RamRangeUnion.idGeneration, idGeneration);
1182
1183 /* Update the RAM range entry. */
1184 pRam->GCPhys = GCPhys;
1185 pRam->GCPhysLast = GCPhysLast;
1186
1187 /* Do we need to shift any lookup table entries? */
1188 if (idxLookup != cLookupEntries)
1189 {
1190 /* We do. Make a copy of the final entry first. */
1191 uint32_t cToMove = cLookupEntries - idxLookup;
1192 PGMRAMRANGELOOKUPENTRY *pCur = &pVM->pgm.s.aRamRangeLookup[cLookupEntries];
1193 pCur->GCPhysFirstAndId = pCur[-1].GCPhysFirstAndId;
1194 pCur->GCPhysLast = pCur[-1].GCPhysLast;
1195
1196 /* Then increase the table size. This will ensure that anyone starting
1197 a search from here on should have consistent data. */
1198 ASMAtomicWriteU32(&pVM->pgm.s.RamRangeUnion.cLookupEntries, cLookupEntries + 1);
1199
1200 /* Transfer the rest of the entries. */
1201 cToMove -= 1;
1202 if (cToMove > 0)
1203 {
1204 if (!fUseAtomic)
1205 do
1206 {
1207 pCur -= 1;
1208 pCur->GCPhysFirstAndId = pCur[-1].GCPhysFirstAndId;
1209 pCur->GCPhysLast = pCur[-1].GCPhysLast;
1210 cToMove -= 1;
1211 } while (cToMove > 0);
1212 else
1213 {
1214#if RTASM_HAVE_WRITE_U128 >= 2
1215 do
1216 {
1217 pCur -= 1;
1218 ASMAtomicWriteU128U(&pCur->u128Volatile, pCur[-1].u128Normal);
1219 cToMove -= 1;
1220 } while (cToMove > 0);
1221
1222#else
1223 uint64_t u64PrevLo = pCur[-1].u128Normal.s.Lo;
1224 uint64_t u64PrevHi = pCur[-1].u128Normal.s.Hi;
1225 do
1226 {
1227 pCur -= 1;
1228 uint64_t const u64CurLo = pCur[-1].u128Normal.s.Lo;
1229 uint64_t const u64CurHi = pCur[-1].u128Normal.s.Hi;
1230 uint128_t uOldIgn;
1231 AssertStmt(ASMAtomicCmpXchgU128v2(&pCur->u128Volatile.u, u64CurHi, u64CurLo, u64PrevHi, u64PrevLo, &uOldIgn),
1232 (pCur->u128Volatile.s.Lo = u64CurLo, pCur->u128Volatile.s.Hi = u64CurHi));
1233 u64PrevLo = u64CurLo;
1234 u64PrevHi = u64CurHi;
1235 cToMove -= 1;
1236 } while (cToMove > 0);
1237#endif
1238 }
1239 }
1240 }
1241
1242 /*
1243 * Write the new entry.
1244 */
1245 PGMRAMRANGELOOKUPENTRY *pInsert = &pVM->pgm.s.aRamRangeLookup[idxLookup];
1246 if (!fUseAtomic)
1247 {
1248 pInsert->GCPhysFirstAndId = idRamRange | GCPhys;
1249 pInsert->GCPhysLast = GCPhysLast;
1250 }
1251 else
1252 {
1253 PGMRAMRANGELOOKUPENTRY NewEntry;
1254 NewEntry.GCPhysFirstAndId = idRamRange | GCPhys;
1255 NewEntry.GCPhysLast = GCPhysLast;
1256 ASMAtomicWriteU128v2(&pInsert->u128Volatile.u, NewEntry.u128Normal.s.Hi, NewEntry.u128Normal.s.Lo);
1257 }
1258
1259 /*
1260 * Update the generation and count in one go, signaling the end of the updating.
1261 */
1262 PGM::PGMRAMRANGEGENANDLOOKUPCOUNT GenAndCount;
1263 GenAndCount.cLookupEntries = cLookupEntries + 1;
1264 GenAndCount.idGeneration = idGeneration + 1;
1265 ASMAtomicWriteU64(&pVM->pgm.s.RamRangeUnion.u64Combined, GenAndCount.u64Combined);
1266
1267 if (pidxLookup)
1268 *pidxLookup = idxLookup + 1;
1269
1270#ifdef DEBUG_bird
1271 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
1272#endif
1273 return VINF_SUCCESS;
1274}
1275
1276
1277/**
1278 * Removes @a pRam from the lookup table.
1279 *
1280 * @returns VBox status code.
1281 * @param pVM The cross context VM structure.
1282 * @param pRam The RAM range to insert into the lookup table.
1283 * @param pidxLookup Optional lookup table hint. This is updated.
1284 * @note Caller must own PGM lock.
1285 */
1286static int pgmR3PhysRamRangeRemoveLookup(PVM pVM, PPGMRAMRANGE pRam, uint32_t *pidxLookup)
1287{
1288 PGM_LOCK_ASSERT_OWNER(pVM);
1289 AssertMsg(pRam->pszDesc, ("%RGp-%RGp\n", pRam->GCPhys, pRam->GCPhysLast));
1290
1291 RTGCPHYS const GCPhys = pRam->GCPhys;
1292 RTGCPHYS const GCPhysLast = pRam->GCPhysLast;
1293 AssertLogRelMsgReturn( GCPhys != NIL_RTGCPHYS
1294 || GCPhysLast != NIL_RTGCPHYS,
1295 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n", GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1296 VERR_NOT_FOUND);
1297 AssertLogRelMsgReturn( GCPhys != NIL_RTGCPHYS
1298 && GCPhysLast == GCPhys + pRam->cb - 1U
1299 && (GCPhys & GUEST_PAGE_OFFSET_MASK) == 0
1300 && (GCPhysLast & GUEST_PAGE_OFFSET_MASK) == GUEST_PAGE_OFFSET_MASK
1301 && GCPhysLast > GCPhys,
1302 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n", GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1303 VERR_INTERNAL_ERROR_5);
1304 uint32_t const idRamRange = pRam->idRange;
1305 AssertReturn(pVM->pgm.s.apRamRanges[idRamRange] == pRam, VERR_INTERNAL_ERROR_4);
1306 LogFlowFunc(("GCPhys=%RGp LB %RGp GCPhysLast=%RGp id=%#x %s\n", GCPhys, pRam->cb, GCPhysLast, idRamRange, pRam->pszDesc));
1307
1308 /*
1309 * Find the lookup table location.
1310 */
1311 uint32_t const cLookupEntries = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1312 AssertLogRelMsgReturn( cLookupEntries > 0
1313 && cLookupEntries < RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup), /* id=0 is unused, so < is correct. */
1314 ("%#x\n", cLookupEntries), VERR_INTERNAL_ERROR_3);
1315
1316 uint32_t idxLookup = pidxLookup ? *pidxLookup : UINT32_MAX;
1317 if ( idxLookup >= cLookupEntries
1318 || pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast != GCPhysLast
1319 || pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysFirstAndId != (GCPhys | idRamRange))
1320 {
1321 uint32_t iStart = 0;
1322 uint32_t iEnd = cLookupEntries;
1323 for (;;)
1324 {
1325 idxLookup = iStart + (iEnd - iStart) / 2;
1326 RTGCPHYS const GCPhysEntryFirst = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1327 if (GCPhysLast < GCPhysEntryFirst)
1328 {
1329 AssertLogRelMsgReturn(idxLookup > iStart,
1330 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n",
1331 GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1332 VERR_NOT_FOUND);
1333 iEnd = idxLookup;
1334 }
1335 else
1336 {
1337 RTGCPHYS const GCPhysEntryLast = pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast;
1338 if (GCPhys > GCPhysEntryLast)
1339 {
1340 idxLookup += 1;
1341 AssertLogRelMsgReturn(idxLookup < iEnd,
1342 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n",
1343 GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1344 VERR_NOT_FOUND);
1345 iStart = idxLookup;
1346 }
1347 else
1348 {
1349 uint32_t const idEntry = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1350 AssertLogRelMsgReturn( GCPhysEntryFirst == GCPhys
1351 && GCPhysEntryLast == GCPhysLast
1352 && idEntry == idRamRange,
1353 ("Found: %RGp..%RGp id=%#x; Wanted: GCPhys=%RGp LB %RGp GCPhysLast=%RGp id=%#x %s\n",
1354 GCPhysEntryFirst, GCPhysEntryLast, idEntry,
1355 GCPhys, pRam->cb, GCPhysLast, pRam->idRange, pRam->pszDesc),
1356 VERR_NOT_FOUND);
1357 break;
1358 }
1359 }
1360 }
1361 }
1362 /* else we've got a good hint. */
1363
1364 /*
1365 * Do the actual job.
1366 *
1367 * The moving of existing table entries is done in a way that allows other
1368 * EMTs to perform concurrent lookups with the updating.
1369 */
1370 bool const fUseAtomic = pVM->enmVMState != VMSTATE_CREATING
1371 && pVM->cCpus > 1
1372#ifdef RT_ARCH_AMD64
1373 && g_CpumHostFeatures.s.fCmpXchg16b
1374#endif
1375 ;
1376
1377 /* Signal that we're modifying the lookup table: */
1378 uint32_t const idGeneration = (pVM->pgm.s.RamRangeUnion.idGeneration + 1) | 1; /* paranoia^3 */
1379 ASMAtomicWriteU32(&pVM->pgm.s.RamRangeUnion.idGeneration, idGeneration);
1380
1381 /* Do we need to shift any lookup table entries? (This is a lot simpler
1382 than insertion.) */
1383 if (idxLookup + 1U < cLookupEntries)
1384 {
1385 uint32_t cToMove = cLookupEntries - idxLookup - 1U;
1386 PGMRAMRANGELOOKUPENTRY *pCur = &pVM->pgm.s.aRamRangeLookup[idxLookup];
1387 if (!fUseAtomic)
1388 do
1389 {
1390 pCur->GCPhysFirstAndId = pCur[1].GCPhysFirstAndId;
1391 pCur->GCPhysLast = pCur[1].GCPhysLast;
1392 pCur += 1;
1393 cToMove -= 1;
1394 } while (cToMove > 0);
1395 else
1396 {
1397#if RTASM_HAVE_WRITE_U128 >= 2
1398 do
1399 {
1400 ASMAtomicWriteU128U(&pCur->u128Volatile, pCur[1].u128Normal);
1401 pCur += 1;
1402 cToMove -= 1;
1403 } while (cToMove > 0);
1404
1405#else
1406 uint64_t u64PrevLo = pCur->u128Normal.s.Lo;
1407 uint64_t u64PrevHi = pCur->u128Normal.s.Hi;
1408 do
1409 {
1410 uint64_t const u64CurLo = pCur[1].u128Normal.s.Lo;
1411 uint64_t const u64CurHi = pCur[1].u128Normal.s.Hi;
1412 uint128_t uOldIgn;
1413 AssertStmt(ASMAtomicCmpXchgU128v2(&pCur->u128Volatile.u, u64CurHi, u64CurLo, u64PrevHi, u64PrevLo, &uOldIgn),
1414 (pCur->u128Volatile.s.Lo = u64CurLo, pCur->u128Volatile.s.Hi = u64CurHi));
1415 u64PrevLo = u64CurLo;
1416 u64PrevHi = u64CurHi;
1417 pCur += 1;
1418 cToMove -= 1;
1419 } while (cToMove > 0);
1420#endif
1421 }
1422 }
1423
1424 /* Update the RAM range entry to indicate that it is no longer mapped.
1425 The GCPhys member is accessed by the lockless TLB lookup code, so update
1426 it last and atomically to be on the safe side. */
1427 pRam->GCPhysLast = NIL_RTGCPHYS;
1428 ASMAtomicWriteU64(&pRam->GCPhys, NIL_RTGCPHYS);
1429
1430 /*
1431 * Update the generation and count in one go, signaling the end of the updating.
1432 */
1433 PGM::PGMRAMRANGEGENANDLOOKUPCOUNT GenAndCount;
1434 GenAndCount.cLookupEntries = cLookupEntries - 1;
1435 GenAndCount.idGeneration = idGeneration + 1;
1436 ASMAtomicWriteU64(&pVM->pgm.s.RamRangeUnion.u64Combined, GenAndCount.u64Combined);
1437
1438 if (pidxLookup)
1439 *pidxLookup = idxLookup + 1;
1440
1441 return VINF_SUCCESS;
1442}
1443
1444
1445/**
1446 * Gets the number of ram ranges.
1447 *
1448 * @returns Number of ram ranges. Returns UINT32_MAX if @a pVM is invalid.
1449 * @param pVM The cross context VM structure.
1450 */
1451VMMR3DECL(uint32_t) PGMR3PhysGetRamRangeCount(PVM pVM)
1452{
1453 VM_ASSERT_VALID_EXT_RETURN(pVM, UINT32_MAX);
1454
1455 PGM_LOCK_VOID(pVM);
1456 uint32_t const cRamRanges = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
1457 PGM_UNLOCK(pVM);
1458 return cRamRanges;
1459}
1460
1461
1462/**
1463 * Get information about a range.
1464 *
1465 * @returns VINF_SUCCESS or VERR_OUT_OF_RANGE.
1466 * @param pVM The cross context VM structure.
1467 * @param iRange The ordinal of the range.
1468 * @param pGCPhysStart Where to return the start of the range. Optional.
1469 * @param pGCPhysLast Where to return the address of the last byte in the
1470 * range. Optional.
1471 * @param ppszDesc Where to return the range description. Optional.
1472 * @param pfIsMmio Where to indicate that this is a pure MMIO range.
1473 * Optional.
1474 */
1475VMMR3DECL(int) PGMR3PhysGetRange(PVM pVM, uint32_t iRange, PRTGCPHYS pGCPhysStart, PRTGCPHYS pGCPhysLast,
1476 const char **ppszDesc, bool *pfIsMmio)
1477{
1478 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1479
1480 PGM_LOCK_VOID(pVM);
1481 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
1482 if (iRange < cLookupEntries)
1483 {
1484 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[iRange]);
1485 Assert(idRamRange && idRamRange <= pVM->pgm.s.idRamRangeMax);
1486 PGMRAMRANGE const * const pRamRange = pVM->pgm.s.apRamRanges[idRamRange];
1487 AssertPtr(pRamRange);
1488
1489 if (pGCPhysStart)
1490 *pGCPhysStart = pRamRange->GCPhys;
1491 if (pGCPhysLast)
1492 *pGCPhysLast = pRamRange->GCPhysLast;
1493 if (ppszDesc)
1494 *ppszDesc = pRamRange->pszDesc;
1495 if (pfIsMmio)
1496 *pfIsMmio = !!(pRamRange->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO);
1497
1498 PGM_UNLOCK(pVM);
1499 return VINF_SUCCESS;
1500 }
1501 PGM_UNLOCK(pVM);
1502 return VERR_OUT_OF_RANGE;
1503}
1504
1505
1506/**
1507 * Gets RAM ranges that are supposed to be zero'ed at boot.
1508 *
1509 * This function gets all RAM ranges that are not ad hoc (ROM, MMIO, MMIO2) memory.
1510 * The RAM hole (if any) is -NOT- included because we don't return 0s when it is
1511 * read anyway.
1512 *
1513 * @returns VBox status code.
1514 * @param pVM The cross context VM structure.
1515 * @param pRanges Where to store the physical RAM ranges.
1516 * @param cMaxRanges The maximum ranges that can be stored.
1517 */
1518VMMR3_INT_DECL(int) PGMR3PhysGetRamBootZeroedRanges(PVM pVM, PPGMPHYSRANGES pRanges, uint32_t cMaxRanges)
1519{
1520 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1521 AssertPtrReturn(pRanges, VERR_INVALID_PARAMETER);
1522 AssertReturn(cMaxRanges > 0, VERR_INVALID_PARAMETER);
1523
1524 int rc = VINF_SUCCESS;
1525 uint32_t idxRange = 0;
1526 PGM_LOCK_VOID(pVM);
1527
1528 /*
1529 * The primary purpose of this API is the GIM Hyper-V hypercall which recommends (not
1530 * requires) that the largest ranges are reported earlier. Therefore, here we iterate
1531 * the ranges in reverse because in PGM the largest range is generally at the end.
1532 */
1533 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
1534 for (int32_t idxLookup = cLookupEntries - 1; idxLookup >= 0; idxLookup--)
1535 {
1536 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1537 Assert(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
1538 PPGMRAMRANGE const pCur = pVM->pgm.s.apRamRanges[idRamRange];
1539 AssertContinue(pCur);
1540
1541 if (!PGM_RAM_RANGE_IS_AD_HOC(pCur))
1542 {
1543 if (idxRange < cMaxRanges)
1544 {
1545 /* Combine with previous range if it is contiguous, otherwise add it as a new range. */
1546 if ( idxRange > 0
1547 && pRanges->aRanges[idxRange - 1].GCPhysStart == pCur->GCPhysLast + 1U)
1548 {
1549 pRanges->aRanges[idxRange - 1].GCPhysStart = pCur->GCPhys;
1550 pRanges->aRanges[idxRange - 1].cPages += (pCur->cb >> GUEST_PAGE_SHIFT);
1551 }
1552 else
1553 {
1554 pRanges->aRanges[idxRange].GCPhysStart = pCur->GCPhys;
1555 pRanges->aRanges[idxRange].cPages = pCur->cb >> GUEST_PAGE_SHIFT;
1556 ++idxRange;
1557 }
1558 }
1559 else
1560 {
1561 rc = VERR_BUFFER_OVERFLOW;
1562 break;
1563 }
1564 }
1565 }
1566 pRanges->cRanges = idxRange;
1567 PGM_UNLOCK(pVM);
1568 return rc;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* RAM *
1574*********************************************************************************************************************************/
1575
1576/**
1577 * Frees the specified RAM page and replaces it with the ZERO page.
1578 *
1579 * This is used by ballooning, remapping MMIO2, RAM reset and state loading.
1580 *
1581 * @param pVM The cross context VM structure.
1582 * @param pReq Pointer to the request. This is NULL when doing a
1583 * bulk free in NEM memory mode.
1584 * @param pcPendingPages Where the number of pages waiting to be freed are
1585 * kept. This will normally be incremented. This is
1586 * NULL when doing a bulk free in NEM memory mode.
1587 * @param pPage Pointer to the page structure.
1588 * @param GCPhys The guest physical address of the page, if applicable.
1589 * @param enmNewType New page type for NEM notification, since several
1590 * callers will change the type upon successful return.
1591 *
1592 * @remarks The caller must own the PGM lock.
1593 */
1594int pgmPhysFreePage(PVM pVM, PGMMFREEPAGESREQ pReq, uint32_t *pcPendingPages, PPGMPAGE pPage, RTGCPHYS GCPhys,
1595 PGMPAGETYPE enmNewType)
1596{
1597 RT_NOREF(enmNewType, pcPendingPages);
1598
1599 /*
1600 * Assert sanity.
1601 */
1602 PGM_LOCK_ASSERT_OWNER(pVM);
1603 if (RT_UNLIKELY( PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM
1604 && PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_ROM_SHADOW))
1605 {
1606 AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage));
1607 return VMSetError(pVM, VERR_PGM_PHYS_NOT_RAM, RT_SRC_POS, "GCPhys=%RGp type=%d", GCPhys, PGM_PAGE_GET_TYPE(pPage));
1608 }
1609
1610 /** @todo What about ballooning of large pages??! */
1611 Assert( PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE
1612 && PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE_DISABLED);
1613
1614 if ( PGM_PAGE_IS_ZERO(pPage)
1615 || PGM_PAGE_IS_BALLOONED(pPage))
1616 return VINF_SUCCESS;
1617
1618 const uint32_t idPage = PGM_PAGE_GET_PAGEID(pPage);
1619 Log3(("pgmPhysFreePage: idPage=%#x GCPhys=%RGp pPage=%R[pgmpage]\n", idPage, GCPhys, pPage));
1620 if (RT_UNLIKELY(!PGM_IS_IN_NEM_MODE(pVM)
1621 ? idPage == NIL_GMM_PAGEID
1622 || idPage > GMM_PAGEID_LAST
1623 || PGM_PAGE_GET_CHUNKID(pPage) == NIL_GMM_CHUNKID
1624 : idPage != NIL_GMM_PAGEID))
1625 {
1626 AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage));
1627 return VMSetError(pVM, VERR_PGM_PHYS_INVALID_PAGE_ID, RT_SRC_POS, "GCPhys=%RGp idPage=%#x", GCPhys, pPage);
1628 }
1629#if defined(VBOX_WITH_NATIVE_NEM) && !defined(VBOX_WITH_ONLY_PGM_NEM_MODE)
1630 const RTHCPHYS HCPhysPrev = PGM_PAGE_GET_HCPHYS(pPage);
1631#endif
1632
1633 /* update page count stats. */
1634 if (PGM_PAGE_IS_SHARED(pPage))
1635 pVM->pgm.s.cSharedPages--;
1636 else
1637 pVM->pgm.s.cPrivatePages--;
1638 pVM->pgm.s.cZeroPages++;
1639
1640 /* Deal with write monitored pages. */
1641 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED)
1642 {
1643 PGM_PAGE_SET_WRITTEN_TO(pVM, pPage);
1644 pVM->pgm.s.cWrittenToPages++;
1645 }
1646 PGM_PAGE_CLEAR_CODE_PAGE(pVM, pPage); /* No callback needed, IEMTlbInvalidateAllPhysicalAllCpus is called below. */
1647
1648 /*
1649 * pPage = ZERO page.
1650 */
1651 PGM_PAGE_SET_HCPHYS(pVM, pPage, pVM->pgm.s.HCPhysZeroPg);
1652 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
1653 PGM_PAGE_SET_PAGEID(pVM, pPage, NIL_GMM_PAGEID);
1654 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_DONTCARE);
1655 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0);
1656 PGM_PAGE_SET_TRACKING(pVM, pPage, 0);
1657
1658 /* Flush physical page map TLB entry. */
1659 pgmPhysInvalidatePageMapTLBEntry(pVM, GCPhys);
1660 IEMTlbInvalidateAllPhysicalAllCpus(pVM, NIL_VMCPUID, IEMTLBPHYSFLUSHREASON_FREED); /// @todo move to the perform step.
1661
1662#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1663# ifdef VBOX_WITH_PGM_NEM_MODE
1664 /*
1665 * Skip the rest if we're doing a bulk free in NEM memory mode.
1666 */
1667 if (!pReq)
1668 return VINF_SUCCESS;
1669 AssertLogRelReturn(!pVM->pgm.s.fNemMode, VERR_PGM_NOT_SUPPORTED_FOR_NEM_MODE);
1670# endif
1671
1672# ifdef VBOX_WITH_NATIVE_NEM
1673 /* Notify NEM. */
1674 /** @todo Remove this one? */
1675 if (VM_IS_NEM_ENABLED(pVM))
1676 {
1677 uint8_t u2State = PGM_PAGE_GET_NEM_STATE(pPage);
1678 NEMHCNotifyPhysPageChanged(pVM, GCPhys, HCPhysPrev, pVM->pgm.s.HCPhysZeroPg, pVM->pgm.s.abZeroPg,
1679 pgmPhysPageCalcNemProtection(pPage, enmNewType), enmNewType, &u2State);
1680 PGM_PAGE_SET_NEM_STATE(pPage, u2State);
1681 }
1682# endif
1683
1684 /*
1685 * Make sure it's not in the handy page array.
1686 */
1687 for (uint32_t i = pVM->pgm.s.cHandyPages; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
1688 {
1689 if (pVM->pgm.s.aHandyPages[i].idPage == idPage)
1690 {
1691 pVM->pgm.s.aHandyPages[i].HCPhysGCPhys = NIL_GMMPAGEDESC_PHYS;
1692 pVM->pgm.s.aHandyPages[i].fZeroed = false;
1693 pVM->pgm.s.aHandyPages[i].idPage = NIL_GMM_PAGEID;
1694 break;
1695 }
1696 if (pVM->pgm.s.aHandyPages[i].idSharedPage == idPage)
1697 {
1698 pVM->pgm.s.aHandyPages[i].idSharedPage = NIL_GMM_PAGEID;
1699 break;
1700 }
1701 }
1702
1703 /*
1704 * Push it onto the page array.
1705 */
1706 uint32_t iPage = *pcPendingPages;
1707 Assert(iPage < PGMPHYS_FREE_PAGE_BATCH_SIZE);
1708 *pcPendingPages += 1;
1709
1710 pReq->aPages[iPage].idPage = idPage;
1711
1712 if (iPage + 1 < PGMPHYS_FREE_PAGE_BATCH_SIZE)
1713 return VINF_SUCCESS;
1714
1715 /*
1716 * Flush the pages.
1717 */
1718 int rc = GMMR3FreePagesPerform(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE);
1719 if (RT_SUCCESS(rc))
1720 {
1721 GMMR3FreePagesRePrep(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
1722 *pcPendingPages = 0;
1723 }
1724 return rc;
1725
1726#else /* VBOX_WITH_ONLY_PGM_NEM_MODE */
1727 Assert(!pReq); RT_NOREF(pReq);
1728 return VINF_SUCCESS;
1729#endif /* VBOX_WITH_ONLY_PGM_NEM_MODE */
1730}
1731
1732
1733/**
1734 * Frees a range of pages, replacing them with MMIO ZERO pages.
1735 *
1736 * @returns VBox status code.
1737 * @param pVM The cross context VM structure.
1738 * @param pRam The RAM range in which the pages resides.
1739 * @param GCPhys The address of the first page.
1740 * @param GCPhysLast The address of the last page.
1741 * @param pvMmio2 Pointer to the ring-3 mapping of any MMIO2 memory that
1742 * will replace the pages we're freeing up.
1743 */
1744static int pgmR3PhysFreePageRange(PVM pVM, PPGMRAMRANGE pRam, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast, void *pvMmio2)
1745{
1746 PGM_LOCK_ASSERT_OWNER(pVM);
1747 /** @todo pvMmio2 is always NULL. */
1748
1749#ifdef VBOX_WITH_PGM_NEM_MODE
1750 /*
1751 * In simplified memory mode we don't actually free the memory,
1752 * we just unmap it and let NEM do any unlocking of it.
1753 */
1754# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1755 if (pVM->pgm.s.fNemMode)
1756# endif
1757 {
1758 Assert(VM_IS_NEM_ENABLED(pVM) || VM_IS_EXEC_ENGINE_IEM(pVM));
1759 uint8_t u2State = 0; /* (We don't support UINT8_MAX here.) */
1760 if (VM_IS_NEM_ENABLED(pVM))
1761 {
1762 uint32_t const fNemNotify = (pvMmio2 ? NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2 : 0) | NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE;
1763 int rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhys, GCPhysLast - GCPhys + 1, fNemNotify,
1764 pRam->pbR3 ? pRam->pbR3 + GCPhys - pRam->GCPhys : NULL,
1765 pvMmio2, &u2State, NULL /*puNemRange*/);
1766 AssertLogRelRCReturn(rc, rc);
1767 }
1768
1769 /* Iterate the pages. */
1770 PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT];
1771 uint32_t cPagesLeft = ((GCPhysLast - GCPhys) >> GUEST_PAGE_SHIFT) + 1;
1772 while (cPagesLeft-- > 0)
1773 {
1774 int rc = pgmPhysFreePage(pVM, NULL, NULL, pPageDst, GCPhys, PGMPAGETYPE_MMIO);
1775 AssertLogRelRCReturn(rc, rc); /* We're done for if this goes wrong. */
1776
1777 PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO);
1778 PGM_PAGE_SET_NEM_STATE(pPageDst, u2State);
1779
1780 GCPhys += GUEST_PAGE_SIZE;
1781 pPageDst++;
1782 }
1783 return VINF_SUCCESS;
1784 }
1785#else /* !VBOX_WITH_PGM_NEM_MODE */
1786 RT_NOREF(pvMmio2);
1787#endif /* !VBOX_WITH_PGM_NEM_MODE */
1788#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1789
1790 /*
1791 * Regular mode.
1792 */
1793 /* Prepare. */
1794 uint32_t cPendingPages = 0;
1795 PGMMFREEPAGESREQ pReq;
1796 int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
1797 AssertLogRelRCReturn(rc, rc);
1798
1799# ifdef VBOX_WITH_NATIVE_NEM
1800 /* Tell NEM up-front. */
1801 uint8_t u2State = UINT8_MAX;
1802 if (VM_IS_NEM_ENABLED(pVM))
1803 {
1804 uint32_t const fNemNotify = (pvMmio2 ? NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2 : 0) | NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE;
1805 rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhys, GCPhysLast - GCPhys + 1, fNemNotify, NULL, pvMmio2,
1806 &u2State, NULL /*puNemRange*/);
1807 AssertLogRelRCReturnStmt(rc, GMMR3FreePagesCleanup(pReq), rc);
1808 }
1809# endif
1810
1811 /* Iterate the pages. */
1812 PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT];
1813 uint32_t cPagesLeft = ((GCPhysLast - GCPhys) >> GUEST_PAGE_SHIFT) + 1;
1814 while (cPagesLeft-- > 0)
1815 {
1816 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPageDst, GCPhys, PGMPAGETYPE_MMIO);
1817 AssertLogRelRCReturn(rc, rc); /* We're done for if this goes wrong. */
1818
1819 PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO);
1820# ifdef VBOX_WITH_NATIVE_NEM
1821 if (u2State != UINT8_MAX)
1822 PGM_PAGE_SET_NEM_STATE(pPageDst, u2State);
1823# endif
1824
1825 GCPhys += GUEST_PAGE_SIZE;
1826 pPageDst++;
1827 }
1828
1829 /* Finish pending and cleanup. */
1830 if (cPendingPages)
1831 {
1832 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
1833 AssertLogRelRCReturn(rc, rc);
1834 }
1835 GMMR3FreePagesCleanup(pReq);
1836
1837 return rc;
1838#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
1839}
1840
1841
1842/**
1843 * Wrapper around VMMR0_DO_PGM_PHYS_ALLOCATE_RAM_RANGE.
1844 */
1845static int pgmR3PhysAllocateRamRange(PVM pVM, PVMCPU pVCpu, uint32_t cGuestPages, uint32_t fFlags, PPGMRAMRANGE *ppRamRange)
1846{
1847 int rc;
1848 PGMPHYSALLOCATERAMRANGEREQ AllocRangeReq;
1849 AllocRangeReq.idNewRange = UINT32_MAX / 4;
1850#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
1851 if (!SUPR3IsDriverless())
1852 {
1853 AllocRangeReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
1854 AllocRangeReq.Hdr.cbReq = sizeof(AllocRangeReq);
1855 AllocRangeReq.cbGuestPage = GUEST_PAGE_SIZE;
1856 AllocRangeReq.cGuestPages = cGuestPages;
1857 AllocRangeReq.fFlags = fFlags;
1858 rc = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_ALLOCATE_RAM_RANGE, 0 /*u64Arg*/, &AllocRangeReq.Hdr);
1859 }
1860 else
1861#endif
1862 rc = pgmPhysRamRangeAllocCommon(pVM, cGuestPages, fFlags, &AllocRangeReq.idNewRange);
1863 if (RT_SUCCESS(rc))
1864 {
1865 Assert(AllocRangeReq.idNewRange != 0);
1866 Assert(AllocRangeReq.idNewRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
1867 AssertPtr(pVM->pgm.s.apRamRanges[AllocRangeReq.idNewRange]);
1868 *ppRamRange = pVM->pgm.s.apRamRanges[AllocRangeReq.idNewRange];
1869 return VINF_SUCCESS;
1870 }
1871
1872 RT_NOREF(pVCpu);
1873 *ppRamRange = NULL;
1874 return rc;
1875}
1876
1877
1878/**
1879 * PGMR3PhysRegisterRam worker that initializes and links a RAM range.
1880 *
1881 * In NEM mode, this will allocate the pages backing the RAM range and this may
1882 * fail. NEM registration may also fail. (In regular HM mode it won't fail.)
1883 *
1884 * @returns VBox status code.
1885 * @param pVM The cross context VM structure.
1886 * @param pNew The new RAM range.
1887 * @param GCPhys The address of the RAM range.
1888 * @param GCPhysLast The last address of the RAM range.
1889 * @param pszDesc The description.
1890 * @param pidxLookup The lookup table insertion point.
1891 */
1892static int pgmR3PhysInitAndLinkRamRange(PVM pVM, PPGMRAMRANGE pNew, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast,
1893 const char *pszDesc, uint32_t *pidxLookup)
1894{
1895 /*
1896 * Initialize the range.
1897 */
1898 Assert(pNew->cb == GCPhysLast - GCPhys + 1U); RT_NOREF(GCPhysLast);
1899 pNew->pszDesc = pszDesc;
1900 pNew->uNemRange = UINT32_MAX;
1901 pNew->pbR3 = NULL;
1902 pNew->paLSPages = NULL;
1903
1904 uint32_t const cPages = pNew->cb >> GUEST_PAGE_SHIFT;
1905#ifdef VBOX_WITH_PGM_NEM_MODE
1906 if (PGM_IS_IN_NEM_MODE(pVM))
1907 {
1908 int rc = SUPR3PageAlloc(RT_ALIGN_Z(pNew->cb, HOST_PAGE_SIZE) >> HOST_PAGE_SHIFT,
1909 pVM->pgm.s.fUseLargePages ? SUP_PAGE_ALLOC_F_LARGE_PAGES : 0, (void **)&pNew->pbR3);
1910 if (RT_FAILURE(rc))
1911 return rc;
1912
1913 RTGCPHYS iPage = cPages;
1914 while (iPage-- > 0)
1915 PGM_PAGE_INIT(&pNew->aPages[iPage], UINT64_C(0x0000fffffffff000), NIL_GMM_PAGEID,
1916 PGMPAGETYPE_RAM, PGM_PAGE_STATE_ALLOCATED);
1917
1918 /* Update the page count stats. */
1919 pVM->pgm.s.cPrivatePages += cPages;
1920 pVM->pgm.s.cAllPages += cPages;
1921 }
1922 else
1923#endif
1924 {
1925#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1926 RTGCPHYS iPage = cPages;
1927 while (iPage-- > 0)
1928 PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_RAM);
1929
1930 /* Update the page count stats. */
1931 pVM->pgm.s.cZeroPages += cPages;
1932 pVM->pgm.s.cAllPages += cPages;
1933#endif
1934 }
1935
1936 /*
1937 * Insert it into the lookup table.
1938 */
1939 int rc = pgmR3PhysRamRangeInsertLookup(pVM, pNew, GCPhys, pidxLookup);
1940 AssertRCReturn(rc, rc);
1941
1942#ifdef VBOX_WITH_NATIVE_NEM
1943 /*
1944 * Notify NEM now that it has been linked.
1945 *
1946 * As above, it is assumed that on failure the VM creation will fail, so
1947 * no extra cleanup is needed here.
1948 */
1949 if (VM_IS_NEM_ENABLED(pVM))
1950 {
1951 uint8_t u2State = UINT8_MAX;
1952 rc = NEMR3NotifyPhysRamRegister(pVM, GCPhys, pNew->cb, pNew->pbR3, &u2State, &pNew->uNemRange);
1953 if (RT_SUCCESS(rc) && u2State != UINT8_MAX)
1954 pgmPhysSetNemStateForPages(&pNew->aPages[0], cPages, u2State);
1955 return rc;
1956 }
1957#endif
1958 return VINF_SUCCESS;
1959}
1960
1961
1962/**
1963 * Worker for PGMR3PhysRegisterRam called with the PGM lock.
1964 *
1965 * The caller releases the lock.
1966 */
1967static int pgmR3PhysRegisterRamWorker(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, RTGCPHYS cb, const char *pszDesc,
1968 uint32_t const cRamRanges, RTGCPHYS const GCPhysLast)
1969{
1970#ifdef VBOX_STRICT
1971 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
1972#endif
1973
1974 /*
1975 * Check that we've got enough free RAM ranges.
1976 */
1977 AssertLogRelMsgReturn((uint64_t)pVM->pgm.s.idRamRangeMax + cRamRanges + 1 <= RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup),
1978 ("idRamRangeMax=%#RX32 vs GCPhys=%RGp cb=%RGp / %#RX32 ranges (%s)\n",
1979 pVM->pgm.s.idRamRangeMax, GCPhys, cb, cRamRanges, pszDesc),
1980 VERR_PGM_TOO_MANY_RAM_RANGES);
1981
1982 /*
1983 * Check for conflicts via the lookup table. We search it backwards,
1984 * assuming that memory is added in ascending order by address.
1985 */
1986 uint32_t idxLookup = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1987 while (idxLookup)
1988 {
1989 if (GCPhys > pVM->pgm.s.aRamRangeLookup[idxLookup - 1].GCPhysLast)
1990 break;
1991 idxLookup--;
1992 RTGCPHYS const GCPhysCur = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1993 AssertLogRelMsgReturn( GCPhysLast < GCPhysCur
1994 || GCPhys > pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast,
1995 ("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n",
1996 GCPhys, GCPhysLast, pszDesc, GCPhysCur, pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast,
1997 pVM->pgm.s.apRamRanges[PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup])]->pszDesc),
1998 VERR_PGM_RAM_CONFLICT);
1999 }
2000
2001 /*
2002 * Register it with GMM (the API bitches).
2003 */
2004 const RTGCPHYS cPages = cb >> GUEST_PAGE_SHIFT;
2005 int rc = MMR3IncreaseBaseReservation(pVM, cPages);
2006 if (RT_FAILURE(rc))
2007 return rc;
2008
2009 /*
2010 * Create the required chunks.
2011 */
2012 RTGCPHYS cPagesLeft = cPages;
2013 RTGCPHYS GCPhysChunk = GCPhys;
2014 uint32_t idxChunk = 0;
2015 while (cPagesLeft > 0)
2016 {
2017 uint32_t cPagesInChunk = cPagesLeft;
2018 if (cPagesInChunk > PGM_MAX_PAGES_PER_RAM_RANGE)
2019 cPagesInChunk = PGM_MAX_PAGES_PER_RAM_RANGE;
2020
2021 const char *pszDescChunk = idxChunk == 0
2022 ? pszDesc
2023 : MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s (#%u)", pszDesc, idxChunk + 1);
2024 AssertReturn(pszDescChunk, VERR_NO_MEMORY);
2025
2026 /*
2027 * Allocate a RAM range.
2028 */
2029 PPGMRAMRANGE pNew = NULL;
2030 rc = pgmR3PhysAllocateRamRange(pVM, pVCpu, cPagesInChunk, 0 /*fFlags*/, &pNew);
2031 AssertLogRelMsgReturn(RT_SUCCESS(rc),
2032 ("pgmR3PhysAllocateRamRange failed: GCPhysChunk=%RGp cPagesInChunk=%#RX32 (%s): %Rrc\n",
2033 GCPhysChunk, cPagesInChunk, pszDescChunk, rc),
2034 rc);
2035
2036 /*
2037 * Ok, init and link the range.
2038 */
2039 rc = pgmR3PhysInitAndLinkRamRange(pVM, pNew, GCPhysChunk,
2040 GCPhysChunk + ((RTGCPHYS)cPagesInChunk << GUEST_PAGE_SHIFT) - 1U,
2041 pszDescChunk, &idxLookup);
2042 AssertLogRelMsgReturn(RT_SUCCESS(rc),
2043 ("pgmR3PhysInitAndLinkRamRange failed: GCPhysChunk=%RGp cPagesInChunk=%#RX32 (%s): %Rrc\n",
2044 GCPhysChunk, cPagesInChunk, pszDescChunk, rc),
2045 rc);
2046
2047 /* advance */
2048 GCPhysChunk += (RTGCPHYS)cPagesInChunk << GUEST_PAGE_SHIFT;
2049 cPagesLeft -= cPagesInChunk;
2050 idxChunk++;
2051 }
2052
2053 return rc;
2054}
2055
2056
2057/**
2058 * Sets up a range RAM.
2059 *
2060 * This will check for conflicting registrations, make a resource reservation
2061 * for the memory (with GMM), and setup the per-page tracking structures
2062 * (PGMPAGE).
2063 *
2064 * @returns VBox status code.
2065 * @param pVM The cross context VM structure.
2066 * @param GCPhys The physical address of the RAM.
2067 * @param cb The size of the RAM.
2068 * @param pszDesc The description - not copied, so, don't free or change it.
2069 */
2070VMMR3DECL(int) PGMR3PhysRegisterRam(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, const char *pszDesc)
2071{
2072 /*
2073 * Validate input.
2074 */
2075 Log(("PGMR3PhysRegisterRam: GCPhys=%RGp cb=%RGp pszDesc=%s\n", GCPhys, cb, pszDesc));
2076 AssertReturn(RT_ALIGN_T(GCPhys, GUEST_PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER);
2077 AssertReturn(RT_ALIGN_T(cb, GUEST_PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER);
2078 AssertReturn(cb > 0, VERR_INVALID_PARAMETER);
2079 RTGCPHYS const GCPhysLast = GCPhys + (cb - 1);
2080 AssertMsgReturn(GCPhysLast > GCPhys, ("The range wraps! GCPhys=%RGp cb=%RGp\n", GCPhys, cb), VERR_INVALID_PARAMETER);
2081 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
2082 PVMCPU const pVCpu = VMMGetCpu(pVM);
2083 AssertReturn(pVCpu, VERR_VM_THREAD_NOT_EMT);
2084 AssertReturn(pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
2085
2086 /*
2087 * Calculate the number of RAM ranges required.
2088 * See also pgmPhysMmio2CalcChunkCount.
2089 */
2090 uint32_t const cPagesPerChunk = PGM_MAX_PAGES_PER_RAM_RANGE;
2091 uint32_t const cRamRanges = (uint32_t)(((cb >> GUEST_PAGE_SHIFT) + cPagesPerChunk - 1) / cPagesPerChunk);
2092 AssertLogRelMsgReturn(cRamRanges * (RTGCPHYS)cPagesPerChunk * GUEST_PAGE_SIZE >= cb,
2093 ("cb=%RGp cRamRanges=%#RX32 cPagesPerChunk=%#RX32\n", cb, cRamRanges, cPagesPerChunk),
2094 VERR_OUT_OF_RANGE);
2095
2096 PGM_LOCK_VOID(pVM);
2097
2098 int rc = pgmR3PhysRegisterRamWorker(pVM, pVCpu, GCPhys, cb, pszDesc, cRamRanges, GCPhysLast);
2099#ifdef VBOX_STRICT
2100 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
2101#endif
2102
2103 PGM_UNLOCK(pVM);
2104 return rc;
2105}
2106
2107
2108#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
2109/**
2110 * Worker called by PGMR3InitFinalize if we're configured to pre-allocate RAM.
2111 *
2112 * We do this late in the init process so that all the ROM and MMIO ranges have
2113 * been registered already and we don't go wasting memory on them.
2114 *
2115 * @returns VBox status code.
2116 *
2117 * @param pVM The cross context VM structure.
2118 */
2119int pgmR3PhysRamPreAllocate(PVM pVM)
2120{
2121 Assert(pVM->pgm.s.fRamPreAlloc);
2122 Log(("pgmR3PhysRamPreAllocate: enter\n"));
2123# ifdef VBOX_WITH_PGM_NEM_MODE
2124 if (VM_IS_NEM_ENABLED(pVM))
2125 {
2126 LogRel(("PGM: Pre-alloc ignored in NEM mode.\n"));
2127 return VINF_SUCCESS;
2128 }
2129# endif
2130
2131 /*
2132 * Walk the RAM ranges and allocate all RAM pages, halt at
2133 * the first allocation error.
2134 */
2135 uint64_t cPages = 0;
2136 uint64_t NanoTS = RTTimeNanoTS();
2137 PGM_LOCK_VOID(pVM);
2138 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
2139 for (uint32_t idxLookup = 0; idxLookup < cLookupEntries; idxLookup++)
2140 {
2141 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
2142 AssertContinue(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
2143 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2144 AssertContinue(pRam);
2145
2146 PPGMPAGE pPage = &pRam->aPages[0];
2147 RTGCPHYS GCPhys = pRam->GCPhys;
2148 uint32_t cLeft = pRam->cb >> GUEST_PAGE_SHIFT;
2149 while (cLeft-- > 0)
2150 {
2151 if (PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2152 {
2153 switch (PGM_PAGE_GET_STATE(pPage))
2154 {
2155 case PGM_PAGE_STATE_ZERO:
2156 {
2157 int rc = pgmPhysAllocPage(pVM, pPage, GCPhys);
2158 if (RT_FAILURE(rc))
2159 {
2160 LogRel(("PGM: RAM Pre-allocation failed at %RGp (in %s) with rc=%Rrc\n", GCPhys, pRam->pszDesc, rc));
2161 PGM_UNLOCK(pVM);
2162 return rc;
2163 }
2164 cPages++;
2165 break;
2166 }
2167
2168 case PGM_PAGE_STATE_BALLOONED:
2169 case PGM_PAGE_STATE_ALLOCATED:
2170 case PGM_PAGE_STATE_WRITE_MONITORED:
2171 case PGM_PAGE_STATE_SHARED:
2172 /* nothing to do here. */
2173 break;
2174 }
2175 }
2176
2177 /* next */
2178 pPage++;
2179 GCPhys += GUEST_PAGE_SIZE;
2180 }
2181 }
2182 PGM_UNLOCK(pVM);
2183 NanoTS = RTTimeNanoTS() - NanoTS;
2184
2185 LogRel(("PGM: Pre-allocated %llu pages in %llu ms\n", cPages, NanoTS / 1000000));
2186 Log(("pgmR3PhysRamPreAllocate: returns VINF_SUCCESS\n"));
2187 return VINF_SUCCESS;
2188}
2189#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
2190
2191
2192/**
2193 * Checks shared page checksums.
2194 *
2195 * @param pVM The cross context VM structure.
2196 */
2197void pgmR3PhysAssertSharedPageChecksums(PVM pVM)
2198{
2199#ifdef VBOX_STRICT
2200 PGM_LOCK_VOID(pVM);
2201
2202 if (pVM->pgm.s.cSharedPages > 0)
2203 {
2204 /*
2205 * Walk the ram ranges.
2206 */
2207 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
2208 for (uint32_t idxLookup = 0; idxLookup < cLookupEntries; idxLookup++)
2209 {
2210 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
2211 AssertContinue(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
2212 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2213 AssertContinue(pRam);
2214
2215 uint32_t iPage = pRam->cb >> GUEST_PAGE_SHIFT;
2216 AssertMsg(((RTGCPHYS)iPage << GUEST_PAGE_SHIFT) == pRam->cb,
2217 ("%RGp %RGp\n", (RTGCPHYS)iPage << GUEST_PAGE_SHIFT, pRam->cb));
2218
2219 while (iPage-- > 0)
2220 {
2221 PPGMPAGE pPage = &pRam->aPages[iPage];
2222 if (PGM_PAGE_IS_SHARED(pPage))
2223 {
2224 uint32_t u32Checksum = pPage->s.u2Unused0/* | ((uint32_t)pPage->s.u2Unused1 << 8)*/;
2225 if (!u32Checksum)
2226 {
2227 RTGCPHYS GCPhysPage = pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT);
2228 void const *pvPage;
2229 int rc = pgmPhysPageMapReadOnly(pVM, pPage, GCPhysPage, &pvPage);
2230 if (RT_SUCCESS(rc))
2231 {
2232 uint32_t u32Checksum2 = RTCrc32(pvPage, GUEST_PAGE_SIZE);
2233# if 0
2234 AssertMsg((u32Checksum2 & /*UINT32_C(0x00000303)*/ 0x3) == u32Checksum, ("GCPhysPage=%RGp\n", GCPhysPage));
2235# else
2236 if ((u32Checksum2 & /*UINT32_C(0x00000303)*/ 0x3) == u32Checksum)
2237 LogFlow(("shpg %#x @ %RGp %#x [OK]\n", PGM_PAGE_GET_PAGEID(pPage), GCPhysPage, u32Checksum2));
2238 else
2239 AssertMsgFailed(("shpg %#x @ %RGp %#x\n", PGM_PAGE_GET_PAGEID(pPage), GCPhysPage, u32Checksum2));
2240# endif
2241 }
2242 else
2243 AssertRC(rc);
2244 }
2245 }
2246
2247 } /* for each page */
2248
2249 } /* for each ram range */
2250 }
2251
2252 PGM_UNLOCK(pVM);
2253#endif /* VBOX_STRICT */
2254 NOREF(pVM);
2255}
2256
2257
2258/**
2259 * Resets the physical memory state.
2260 *
2261 * ASSUMES that the caller owns the PGM lock.
2262 *
2263 * @returns VBox status code.
2264 * @param pVM The cross context VM structure.
2265 */
2266int pgmR3PhysRamReset(PVM pVM)
2267{
2268 PGM_LOCK_ASSERT_OWNER(pVM);
2269
2270#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
2271 /* Reset the memory balloon. */
2272 int rc1 = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0);
2273 AssertRC(rc1);
2274#endif
2275
2276#ifdef VBOX_WITH_PAGE_SHARING
2277 /* Clear all registered shared modules. */
2278 pgmR3PhysAssertSharedPageChecksums(pVM);
2279 int rc2 = GMMR3ResetSharedModules(pVM);
2280 AssertRC(rc2);
2281#endif
2282 /* Reset counters. */
2283 pVM->pgm.s.cReusedSharedPages = 0;
2284 pVM->pgm.s.cBalloonedPages = 0;
2285
2286 return VINF_SUCCESS;
2287}
2288
2289
2290/**
2291 * Resets (zeros) the RAM after all devices and components have been reset.
2292 *
2293 * ASSUMES that the caller owns the PGM lock.
2294 *
2295 * @returns VBox status code.
2296 * @param pVM The cross context VM structure.
2297 */
2298int pgmR3PhysRamZeroAll(PVM pVM)
2299{
2300 PGM_LOCK_ASSERT_OWNER(pVM);
2301
2302 /*
2303 * We batch up pages that should be freed instead of calling GMM for
2304 * each and every one of them.
2305 */
2306 uint32_t cPendingPages = 0;
2307 PGMMFREEPAGESREQ pReq;
2308 int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
2309 AssertLogRelRCReturn(rc, rc);
2310
2311 /*
2312 * Walk the ram ranges.
2313 */
2314 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
2315 for (uint32_t idRamRange = 0; idRamRange <= idRamRangeMax; idRamRange++)
2316 {
2317 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2318 Assert(pRam || idRamRange == 0);
2319 if (!pRam) continue;
2320 Assert(pRam->idRange == idRamRange);
2321
2322 uint32_t iPage = pRam->cb >> GUEST_PAGE_SHIFT;
2323 AssertMsg(((RTGCPHYS)iPage << GUEST_PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << GUEST_PAGE_SHIFT, pRam->cb));
2324
2325#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
2326 if ( !pVM->pgm.s.fRamPreAlloc
2327# ifdef VBOX_WITH_PGM_NEM_MODE
2328 && !pVM->pgm.s.fNemMode
2329# endif
2330 && pVM->pgm.s.fZeroRamPagesOnReset)
2331 {
2332 /* Replace all RAM pages by ZERO pages. */
2333 while (iPage-- > 0)
2334 {
2335 PPGMPAGE pPage = &pRam->aPages[iPage];
2336 switch (PGM_PAGE_GET_TYPE(pPage))
2337 {
2338 case PGMPAGETYPE_RAM:
2339 /* Do not replace pages part of a 2 MB continuous range
2340 with zero pages, but zero them instead. */
2341 if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE
2342 || PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
2343 {
2344 void *pvPage;
2345 rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), &pvPage);
2346 AssertLogRelRCReturn(rc, rc);
2347 RT_BZERO(pvPage, GUEST_PAGE_SIZE);
2348 }
2349 else if (PGM_PAGE_IS_BALLOONED(pPage))
2350 {
2351 /* Turn into a zero page; the balloon status is lost when the VM reboots. */
2352 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
2353 }
2354 else if (!PGM_PAGE_IS_ZERO(pPage))
2355 {
2356 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage,
2357 pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), PGMPAGETYPE_RAM);
2358 AssertLogRelRCReturn(rc, rc);
2359 }
2360 break;
2361
2362 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
2363 case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: /** @todo perhaps leave the special page alone? I don't think VT-x copes with this code. */
2364 pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT),
2365 pRam, true /*fDoAccounting*/, false /*fFlushIemTlbs*/);
2366 break;
2367
2368 case PGMPAGETYPE_MMIO2:
2369 case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */
2370 case PGMPAGETYPE_ROM:
2371 case PGMPAGETYPE_MMIO:
2372 break;
2373 default:
2374 AssertFailed();
2375 }
2376 } /* for each page */
2377 }
2378 else
2379#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
2380
2381 {
2382 /* Zero the memory. */
2383 while (iPage-- > 0)
2384 {
2385 PPGMPAGE pPage = &pRam->aPages[iPage];
2386 switch (PGM_PAGE_GET_TYPE(pPage))
2387 {
2388 case PGMPAGETYPE_RAM:
2389 switch (PGM_PAGE_GET_STATE(pPage))
2390 {
2391 case PGM_PAGE_STATE_ZERO:
2392 break;
2393
2394 case PGM_PAGE_STATE_BALLOONED:
2395 /* Turn into a zero page; the balloon status is lost when the VM reboots. */
2396 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
2397 break;
2398
2399 case PGM_PAGE_STATE_SHARED:
2400 case PGM_PAGE_STATE_WRITE_MONITORED:
2401 rc = pgmPhysPageMakeWritable(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT));
2402 AssertLogRelRCReturn(rc, rc);
2403 RT_FALL_THRU();
2404
2405 case PGM_PAGE_STATE_ALLOCATED:
2406 if (pVM->pgm.s.fZeroRamPagesOnReset)
2407 {
2408 void *pvPage;
2409 rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), &pvPage);
2410 AssertLogRelRCReturn(rc, rc);
2411 RT_BZERO(pvPage, GUEST_PAGE_SIZE);
2412 }
2413 break;
2414 }
2415 break;
2416
2417 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
2418 case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: /** @todo perhaps leave the special page alone? I don't think VT-x copes with this code. */
2419 pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT),
2420 pRam, true /*fDoAccounting*/, false /*fFlushIemTlbs*/);
2421 break;
2422
2423 case PGMPAGETYPE_MMIO2:
2424 case PGMPAGETYPE_ROM_SHADOW:
2425 case PGMPAGETYPE_ROM:
2426 case PGMPAGETYPE_MMIO:
2427 break;
2428 default:
2429 AssertFailed();
2430
2431 }
2432 } /* for each page */
2433 }
2434 }
2435
2436 /*
2437 * Finish off any pages pending freeing.
2438 */
2439 if (cPendingPages)
2440 {
2441 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
2442 AssertLogRelRCReturn(rc, rc);
2443 }
2444 GMMR3FreePagesCleanup(pReq);
2445
2446 /*
2447 * Flush the IEM TLB, just to be sure it really is done.
2448 */
2449 IEMTlbInvalidateAllPhysicalAllCpus(pVM, NIL_VMCPUID, IEMTLBPHYSFLUSHREASON_ZERO_ALL);
2450
2451 return VINF_SUCCESS;
2452}
2453
2454
2455/**
2456 * Frees all RAM during VM termination
2457 *
2458 * ASSUMES that the caller owns the PGM lock.
2459 *
2460 * @returns VBox status code.
2461 * @param pVM The cross context VM structure.
2462 */
2463int pgmR3PhysRamTerm(PVM pVM)
2464{
2465 int rc;
2466 PGM_LOCK_ASSERT_OWNER(pVM);
2467
2468#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
2469 /* Reset the memory balloon. */
2470 rc = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0);
2471 AssertRC(rc);
2472#endif
2473
2474#ifdef VBOX_WITH_PAGE_SHARING
2475
2476 /*
2477 * Clear all registered shared modules.
2478 */
2479 pgmR3PhysAssertSharedPageChecksums(pVM);
2480 rc = GMMR3ResetSharedModules(pVM);
2481 AssertRC(rc);
2482
2483 /*
2484 * Flush the handy pages updates to make sure no shared pages are hiding
2485 * in there. (Not unlikely if the VM shuts down, apparently.)
2486 */
2487# ifdef VBOX_WITH_PGM_NEM_MODE
2488 if (!pVM->pgm.s.fNemMode)
2489# endif
2490 rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_FLUSH_HANDY_PAGES, 0, NULL);
2491#endif
2492
2493 /*
2494 * We batch up pages that should be freed instead of calling GMM for
2495 * each and every one of them.
2496 */
2497 uint32_t cPendingPages = 0;
2498 PGMMFREEPAGESREQ pReq;
2499 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
2500 AssertLogRelRCReturn(rc, rc);
2501
2502 /*
2503 * Walk the ram ranges.
2504 */
2505 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
2506 for (uint32_t idRamRange = 0; idRamRange <= idRamRangeMax; idRamRange++)
2507 {
2508 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2509 Assert(pRam || idRamRange == 0);
2510 if (!pRam) continue;
2511 Assert(pRam->idRange == idRamRange);
2512
2513 uint32_t iPage = pRam->cb >> GUEST_PAGE_SHIFT;
2514 AssertMsg(((RTGCPHYS)iPage << GUEST_PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << GUEST_PAGE_SHIFT, pRam->cb));
2515
2516 while (iPage-- > 0)
2517 {
2518 PPGMPAGE pPage = &pRam->aPages[iPage];
2519 switch (PGM_PAGE_GET_TYPE(pPage))
2520 {
2521 case PGMPAGETYPE_RAM:
2522 /* Free all shared pages. Private pages are automatically freed during GMM VM cleanup. */
2523 /** @todo change this to explicitly free private pages here. */
2524 if (PGM_PAGE_IS_SHARED(pPage))
2525 {
2526 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage,
2527 pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), PGMPAGETYPE_RAM);
2528 AssertLogRelRCReturn(rc, rc);
2529 }
2530 break;
2531
2532 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
2533 case PGMPAGETYPE_SPECIAL_ALIAS_MMIO:
2534 case PGMPAGETYPE_MMIO2:
2535 case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */
2536 case PGMPAGETYPE_ROM:
2537 case PGMPAGETYPE_MMIO:
2538 break;
2539 default:
2540 AssertFailed();
2541 }
2542 } /* for each page */
2543 }
2544
2545 /*
2546 * Finish off any pages pending freeing.
2547 */
2548 if (cPendingPages)
2549 {
2550 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
2551 AssertLogRelRCReturn(rc, rc);
2552 }
2553 GMMR3FreePagesCleanup(pReq);
2554 return VINF_SUCCESS;
2555}
2556
2557
2558
2559/*********************************************************************************************************************************
2560* MMIO *
2561*********************************************************************************************************************************/
2562
2563/**
2564 * This is the interface IOM is using to register an MMIO region (unmapped).
2565 *
2566 *
2567 * @returns VBox status code.
2568 *
2569 * @param pVM The cross context VM structure.
2570 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2571 * @param cb The size of the MMIO region.
2572 * @param pszDesc The description of the MMIO region.
2573 * @param pidRamRange Where to return the RAM range ID for the MMIO region
2574 * on success.
2575 * @thread EMT(0)
2576 */
2577VMMR3_INT_DECL(int) PGMR3PhysMmioRegister(PVM pVM, PVMCPU pVCpu, RTGCPHYS cb, const char *pszDesc, uint16_t *pidRamRange)
2578{
2579 /*
2580 * Assert assumptions.
2581 */
2582 AssertPtrReturn(pidRamRange, VERR_INVALID_POINTER);
2583 *pidRamRange = UINT16_MAX;
2584 AssertReturn(pVCpu == VMMGetCpu(pVM) && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
2585 VM_ASSERT_STATE_RETURN(pVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
2586 /// @todo AssertReturn(!pVM->pgm.s.fRamRangesFrozen, VERR_WRONG_ORDER);
2587 AssertReturn(cb <= ((RTGCPHYS)PGM_MAX_PAGES_PER_RAM_RANGE << GUEST_PAGE_SHIFT), VERR_OUT_OF_RANGE);
2588 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2589 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
2590 AssertReturn(*pszDesc != '\0', VERR_INVALID_POINTER);
2591
2592 /*
2593 * Take the PGM lock and allocate an ad-hoc MMIO RAM range.
2594 */
2595 int rc = PGM_LOCK(pVM);
2596 AssertRCReturn(rc, rc);
2597
2598 uint32_t const cPages = cb >> GUEST_PAGE_SHIFT;
2599 PPGMRAMRANGE pNew = NULL;
2600 rc = pgmR3PhysAllocateRamRange(pVM, pVCpu, cPages, PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO, &pNew);
2601 AssertLogRelMsg(RT_SUCCESS(rc), ("pgmR3PhysAllocateRamRange failed: cPages=%#RX32 (%s): %Rrc\n", cPages, pszDesc, rc));
2602 if (RT_SUCCESS(rc))
2603 {
2604 /* Initialize the range. */
2605 pNew->pszDesc = pszDesc;
2606 pNew->uNemRange = UINT32_MAX;
2607 pNew->pbR3 = NULL;
2608 pNew->paLSPages = NULL;
2609 Assert(pNew->fFlags == PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO && pNew->cb == cb);
2610
2611 uint32_t iPage = cPages;
2612 while (iPage-- > 0)
2613 PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_MMIO);
2614 Assert(PGM_PAGE_GET_TYPE(&pNew->aPages[0]) == PGMPAGETYPE_MMIO);
2615
2616 /* update the page count stats. */
2617 pVM->pgm.s.cPureMmioPages += cPages;
2618 pVM->pgm.s.cAllPages += cPages;
2619
2620 /*
2621 * Set the return value, release lock and return to IOM.
2622 */
2623 *pidRamRange = pNew->idRange;
2624 }
2625
2626 PGM_UNLOCK(pVM);
2627 return rc;
2628}
2629
2630
2631/**
2632 * Worker for PGMR3PhysMmioMap that's called owning the lock.
2633 */
2634static int pgmR3PhysMmioMapLocked(PVM pVM, PVMCPU pVCpu, RTGCPHYS const GCPhys, RTGCPHYS const cb, RTGCPHYS const GCPhysLast,
2635 PPGMRAMRANGE const pMmioRamRange, PGMPHYSHANDLERTYPE const hType, uint64_t const uUser)
2636{
2637 /* Check that the range isn't mapped already. */
2638 AssertLogRelMsgReturn(pMmioRamRange->GCPhys == NIL_RTGCPHYS,
2639 ("desired %RGp mapping for '%s' - already mapped at %RGp!\n",
2640 GCPhys, pMmioRamRange->pszDesc, pMmioRamRange->GCPhys),
2641 VERR_ALREADY_EXISTS);
2642
2643 /*
2644 * Now, check if this falls into a regular RAM range or if we should use
2645 * the ad-hoc one (idRamRange).
2646 */
2647 int rc;
2648 uint32_t idxInsert = UINT32_MAX;
2649 PPGMRAMRANGE const pOverlappingRange = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxInsert);
2650 if (pOverlappingRange)
2651 {
2652 /* Simplification: all within the same range. */
2653 AssertLogRelMsgReturn( GCPhys >= pOverlappingRange->GCPhys
2654 && GCPhysLast <= pOverlappingRange->GCPhysLast,
2655 ("%RGp-%RGp (MMIO/%s) falls partly outside %RGp-%RGp (%s)\n",
2656 GCPhys, GCPhysLast, pMmioRamRange->pszDesc,
2657 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
2658 VERR_PGM_RAM_CONFLICT);
2659
2660 /* Check that is isn't an ad hoc range, but a real RAM range. */
2661 AssertLogRelMsgReturn(!PGM_RAM_RANGE_IS_AD_HOC(pOverlappingRange),
2662 ("%RGp-%RGp (MMIO/%s) mapping attempt in non-RAM range: %RGp-%RGp (%s)\n",
2663 GCPhys, GCPhysLast, pMmioRamRange->pszDesc,
2664 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
2665 VERR_PGM_RAM_CONFLICT);
2666
2667 /* Check that it's all RAM or MMIO pages. */
2668 PCPGMPAGE pPage = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
2669 uint32_t cLeft = cb >> GUEST_PAGE_SHIFT;
2670 while (cLeft-- > 0)
2671 {
2672 AssertLogRelMsgReturn( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2673 || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO, /** @todo MMIO type isn't right */
2674 ("%RGp-%RGp (MMIO/%s): %RGp is not a RAM or MMIO page - type=%d desc=%s\n",
2675 GCPhys, GCPhysLast, pMmioRamRange->pszDesc, pOverlappingRange->GCPhys,
2676 PGM_PAGE_GET_TYPE(pPage), pOverlappingRange->pszDesc),
2677 VERR_PGM_RAM_CONFLICT);
2678 pPage++;
2679 }
2680
2681 /*
2682 * Make all the pages in the range MMIO/ZERO pages, freeing any
2683 * RAM pages currently mapped here. This might not be 100% correct
2684 * for PCI memory, but we're doing the same thing for MMIO2 pages.
2685 */
2686 rc = pgmR3PhysFreePageRange(pVM, pOverlappingRange, GCPhys, GCPhysLast, NULL);
2687 AssertRCReturn(rc, rc);
2688
2689 /* Force a PGM pool flush as guest ram references have been changed. */
2690 /** @todo not entirely SMP safe; assuming for now the guest takes
2691 * care of this internally (not touch mapped mmio while changing the
2692 * mapping). */
2693 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2694 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2695 }
2696 else
2697 {
2698 /*
2699 * No RAM range, use the ad hoc one (idRamRange).
2700 *
2701 * Note that we don't have to tell REM about this range because
2702 * PGMHandlerPhysicalRegisterEx will do that for us.
2703 */
2704 AssertLogRelReturn(idxInsert <= pVM->pgm.s.RamRangeUnion.cLookupEntries, VERR_INTERNAL_ERROR_4);
2705 Log(("PGMR3PhysMmioMap: Inserting ad hoc MMIO range #%x for %RGp-%RGp %s\n",
2706 pMmioRamRange->idRange, GCPhys, GCPhysLast, pMmioRamRange->pszDesc));
2707
2708 Assert(PGM_PAGE_GET_TYPE(&pMmioRamRange->aPages[0]) == PGMPAGETYPE_MMIO);
2709
2710 /* We ASSUME that all the pages in the ad-hoc range are in the proper
2711 state and all that and that we don't need to re-initialize them here. */
2712
2713#ifdef VBOX_WITH_NATIVE_NEM
2714 /* Notify NEM. */
2715 if (VM_IS_NEM_ENABLED(pVM))
2716 {
2717 uint8_t u2State = 0; /* (must have valid state as there can't be anything to preserve) */
2718 rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhys, cb, 0 /*fFlags*/, NULL, NULL, &u2State, &pMmioRamRange->uNemRange);
2719 AssertLogRelRCReturn(rc, rc);
2720
2721 uint32_t iPage = cb >> GUEST_PAGE_SHIFT;
2722 while (iPage-- > 0)
2723 PGM_PAGE_SET_NEM_STATE(&pMmioRamRange->aPages[iPage], u2State);
2724 }
2725#endif
2726 /* Insert it into the lookup table (may in theory fail). */
2727 rc = pgmR3PhysRamRangeInsertLookup(pVM, pMmioRamRange, GCPhys, &idxInsert);
2728 }
2729 if (RT_SUCCESS(rc))
2730 {
2731 /*
2732 * Register the access handler.
2733 */
2734 rc = PGMHandlerPhysicalRegister(pVM, GCPhys, GCPhysLast, hType, uUser, pMmioRamRange->pszDesc);
2735 if (RT_SUCCESS(rc))
2736 {
2737#ifdef VBOX_WITH_NATIVE_NEM
2738 /* Late NEM notification (currently not used by anyone). */
2739 if (VM_IS_NEM_ENABLED(pVM))
2740 {
2741 if (pOverlappingRange)
2742 rc = NEMR3NotifyPhysMmioExMapLate(pVM, GCPhys, cb, NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE,
2743 pOverlappingRange->pbR3 + (uintptr_t)(GCPhys - pOverlappingRange->GCPhys),
2744 NULL /*pvMmio2*/, NULL /*puNemRange*/);
2745 else
2746 rc = NEMR3NotifyPhysMmioExMapLate(pVM, GCPhys, cb, 0 /*fFlags*/, NULL /*pvRam*/, NULL /*pvMmio2*/,
2747 &pMmioRamRange->uNemRange);
2748 AssertLogRelRC(rc);
2749 }
2750 if (RT_SUCCESS(rc))
2751#endif
2752 {
2753 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
2754 return VINF_SUCCESS;
2755 }
2756
2757#ifdef VBOX_WITH_NATIVE_NEM
2758 /*
2759 * Failed, so revert it all as best as we can (the memory content in
2760 * the overlapping case is gone).
2761 */
2762 PGMHandlerPhysicalDeregister(pVM, GCPhys);
2763#endif
2764 }
2765 }
2766
2767 if (!pOverlappingRange)
2768 {
2769#ifdef VBOX_WITH_NATIVE_NEM
2770 /* Notify NEM about the sudden removal of the RAM range we just told it about. */
2771 NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, cb, 0 /*fFlags*/, NULL /*pvRam*/, NULL /*pvMmio2*/,
2772 NULL /*pu2State*/, &pMmioRamRange->uNemRange);
2773#endif
2774
2775 /* Remove the ad hoc range from the lookup table. */
2776 idxInsert -= 1;
2777 pgmR3PhysRamRangeRemoveLookup(pVM, pMmioRamRange, &idxInsert);
2778 }
2779
2780 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
2781 return rc;
2782}
2783
2784
2785/**
2786 * This is the interface IOM is using to map an MMIO region.
2787 *
2788 * It will check for conflicts and ensure that a RAM range structure
2789 * is present before calling the PGMR3HandlerPhysicalRegister API to
2790 * register the callbacks.
2791 *
2792 * @returns VBox status code.
2793 *
2794 * @param pVM The cross context VM structure.
2795 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2796 * @param GCPhys The start of the MMIO region.
2797 * @param cb The size of the MMIO region.
2798 * @param idRamRange The RAM range ID for the MMIO region as returned by
2799 * PGMR3PhysMmioRegister().
2800 * @param hType The physical access handler type registration.
2801 * @param uUser The user argument.
2802 * @thread EMT(pVCpu)
2803 */
2804VMMR3_INT_DECL(int) PGMR3PhysMmioMap(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, RTGCPHYS cb, uint16_t idRamRange,
2805 PGMPHYSHANDLERTYPE hType, uint64_t uUser)
2806{
2807 /*
2808 * Assert on some assumption.
2809 */
2810 VMCPU_ASSERT_EMT(pVCpu);
2811 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2812 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2813 RTGCPHYS const GCPhysLast = GCPhys + cb - 1U;
2814 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
2815#ifdef VBOX_STRICT
2816 PCPGMPHYSHANDLERTYPEINT pType = pgmHandlerPhysicalTypeHandleToPtr(pVM, hType);
2817 Assert(pType);
2818 Assert(pType->enmKind == PGMPHYSHANDLERKIND_MMIO);
2819#endif
2820 AssertReturn(idRamRange <= pVM->pgm.s.idRamRangeMax && idRamRange > 0, VERR_INVALID_HANDLE);
2821 PPGMRAMRANGE const pMmioRamRange = pVM->pgm.s.apRamRanges[idRamRange];
2822 AssertReturn(pMmioRamRange, VERR_INVALID_HANDLE);
2823 AssertReturn(pMmioRamRange->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO, VERR_INVALID_HANDLE);
2824 AssertReturn(pMmioRamRange->cb == cb, VERR_OUT_OF_RANGE);
2825
2826 /*
2827 * Take the PGM lock and do the work.
2828 */
2829 int rc = PGM_LOCK(pVM);
2830 AssertRCReturn(rc, rc);
2831
2832 rc = pgmR3PhysMmioMapLocked(pVM, pVCpu, GCPhys, cb, GCPhysLast, pMmioRamRange, hType, uUser);
2833#ifdef VBOX_STRICT
2834 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
2835#endif
2836
2837 PGM_UNLOCK(pVM);
2838 return rc;
2839}
2840
2841
2842/**
2843 * Worker for PGMR3PhysMmioUnmap that's called with the PGM lock held.
2844 */
2845static int pgmR3PhysMmioUnmapLocked(PVM pVM, PVMCPU pVCpu, RTGCPHYS const GCPhys, RTGCPHYS const cb,
2846 RTGCPHYS const GCPhysLast, PPGMRAMRANGE const pMmioRamRange)
2847{
2848 /*
2849 * Lookup the RAM range containing the region to make sure it is actually mapped.
2850 */
2851 uint32_t idxLookup = pgmR3PhysRamRangeFindOverlappingIndex(pVM, GCPhys, GCPhysLast);
2852 AssertLogRelMsgReturn(idxLookup < pVM->pgm.s.RamRangeUnion.cLookupEntries,
2853 ("MMIO range not found at %RGp LB %RGp! (%s)\n", GCPhys, cb, pMmioRamRange->pszDesc),
2854 VERR_NOT_FOUND);
2855
2856 uint32_t const idLookupRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
2857 AssertLogRelReturn(idLookupRange != 0 && idLookupRange <= pVM->pgm.s.idRamRangeMax, VERR_INTERNAL_ERROR_5);
2858 PPGMRAMRANGE const pLookupRange = pVM->pgm.s.apRamRanges[idLookupRange];
2859 AssertLogRelReturn(pLookupRange, VERR_INTERNAL_ERROR_4);
2860
2861 AssertLogRelMsgReturn(pLookupRange == pMmioRamRange || !PGM_RAM_RANGE_IS_AD_HOC(pLookupRange),
2862 ("MMIO unmap mixup at %RGp LB %RGp (%s) vs %RGp LB %RGp (%s)\n",
2863 GCPhys, cb, pMmioRamRange->pszDesc, pLookupRange->GCPhys, pLookupRange->cb, pLookupRange->pszDesc),
2864 VERR_NOT_FOUND);
2865
2866 /*
2867 * Deregister the handler. This should reset any aliases, so an ad hoc
2868 * range will only contain MMIO type pages afterwards.
2869 */
2870 int rc = PGMHandlerPhysicalDeregister(pVM, GCPhys);
2871 if (RT_SUCCESS(rc))
2872 {
2873 if (pLookupRange != pMmioRamRange)
2874 {
2875 /*
2876 * Turn the pages back into RAM pages.
2877 */
2878 Log(("pgmR3PhysMmioUnmapLocked: Reverting MMIO range %RGp-%RGp (%s) in %RGp-%RGp (%s) to RAM.\n",
2879 GCPhys, GCPhysLast, pMmioRamRange->pszDesc,
2880 pLookupRange->GCPhys, pLookupRange->GCPhysLast, pLookupRange->pszDesc));
2881
2882 RTGCPHYS const offRange = GCPhys - pLookupRange->GCPhys;
2883 uint32_t iPage = offRange >> GUEST_PAGE_SHIFT;
2884 uint32_t cLeft = cb >> GUEST_PAGE_SHIFT;
2885 while (cLeft--)
2886 {
2887 PPGMPAGE pPage = &pLookupRange->aPages[iPage];
2888 AssertMsg( (PGM_PAGE_IS_MMIO(pPage) && PGM_PAGE_IS_ZERO(pPage))
2889 //|| PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO2_ALIAS_MMIO
2890 //|| PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_SPECIAL_ALIAS_MMIO
2891 , ("%RGp %R[pgmpage]\n", pLookupRange->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), pPage));
2892/** @todo this isn't entirely correct, is it now... aliases must be converted
2893 * to zero pages as they won't be. however, shouldn't
2894 * PGMHandlerPhysicalDeregister deal with this already? */
2895 if (PGM_PAGE_IS_MMIO_OR_ALIAS(pPage))
2896 PGM_PAGE_SET_TYPE(pVM, pPage, PGMPAGETYPE_RAM);
2897 iPage++;
2898 }
2899
2900#ifdef VBOX_WITH_NATIVE_NEM
2901 /* Notify REM (failure will probably leave things in a non-working state). */
2902 if (VM_IS_NEM_ENABLED(pVM))
2903 {
2904 uint8_t u2State = UINT8_MAX;
2905 rc = NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, GCPhysLast - GCPhys + 1, NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE,
2906 pLookupRange->pbR3 ? pLookupRange->pbR3 + GCPhys - pLookupRange->GCPhys : NULL,
2907 NULL, &u2State, &pLookupRange->uNemRange);
2908 AssertLogRelRC(rc);
2909 /** @todo status code propagation here... This is likely fatal, right? */
2910 if (u2State != UINT8_MAX)
2911 pgmPhysSetNemStateForPages(&pLookupRange->aPages[(GCPhys - pLookupRange->GCPhys) >> GUEST_PAGE_SHIFT],
2912 cb >> GUEST_PAGE_SHIFT, u2State);
2913 }
2914#endif
2915 }
2916 else
2917 {
2918 /*
2919 * Unlink the ad hoc range.
2920 */
2921#ifdef VBOX_STRICT
2922 uint32_t iPage = cb >> GUEST_PAGE_SHIFT;
2923 while (iPage-- > 0)
2924 {
2925 PPGMPAGE const pPage = &pMmioRamRange->aPages[iPage];
2926 Assert(PGM_PAGE_IS_MMIO(pPage));
2927 }
2928#endif
2929
2930 Log(("pgmR3PhysMmioUnmapLocked: Unmapping ad hoc MMIO range for %RGp-%RGp %s\n",
2931 GCPhys, GCPhysLast, pMmioRamRange->pszDesc));
2932
2933#ifdef VBOX_WITH_NATIVE_NEM
2934 if (VM_IS_NEM_ENABLED(pVM)) /* Notify REM before we unlink the range. */
2935 {
2936 rc = NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, GCPhysLast - GCPhys + 1, 0 /*fFlags*/,
2937 NULL, NULL, NULL, &pMmioRamRange->uNemRange);
2938 AssertLogRelRCReturn(rc, rc); /* we're up the creek if this hits. */
2939 }
2940#endif
2941
2942 pgmR3PhysRamRangeRemoveLookup(pVM, pMmioRamRange, &idxLookup);
2943 }
2944 }
2945
2946 /* Force a PGM pool flush as guest ram references have been changed. */
2947 /** @todo Not entirely SMP safe; assuming for now the guest takes care of
2948 * this internally (not touch mapped mmio while changing the mapping). */
2949 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2950 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2951
2952 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
2953 /*pgmPhysInvalidRamRangeTlbs(pVM); - not necessary */
2954
2955 return rc;
2956}
2957
2958
2959/**
2960 * This is the interface IOM is using to register an MMIO region.
2961 *
2962 * It will take care of calling PGMHandlerPhysicalDeregister and clean up
2963 * any ad hoc PGMRAMRANGE left behind.
2964 *
2965 * @returns VBox status code.
2966 * @param pVM The cross context VM structure.
2967 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2968 * @param GCPhys The start of the MMIO region.
2969 * @param cb The size of the MMIO region.
2970 * @param idRamRange The RAM range ID for the MMIO region as returned by
2971 * PGMR3PhysMmioRegister().
2972 * @thread EMT(pVCpu)
2973 */
2974VMMR3_INT_DECL(int) PGMR3PhysMmioUnmap(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, RTGCPHYS cb, uint16_t idRamRange)
2975{
2976 /*
2977 * Input validation.
2978 */
2979 VMCPU_ASSERT_EMT(pVCpu);
2980 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2981 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2982 RTGCPHYS const GCPhysLast = GCPhys + cb - 1U;
2983 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
2984 AssertReturn(idRamRange <= pVM->pgm.s.idRamRangeMax && idRamRange > 0, VERR_INVALID_HANDLE);
2985 PPGMRAMRANGE const pMmioRamRange = pVM->pgm.s.apRamRanges[idRamRange];
2986 AssertReturn(pMmioRamRange, VERR_INVALID_HANDLE);
2987 AssertReturn(pMmioRamRange->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO, VERR_INVALID_HANDLE);
2988 AssertReturn(pMmioRamRange->cb == cb, VERR_OUT_OF_RANGE);
2989
2990 /*
2991 * Take the PGM lock and do what's asked.
2992 */
2993 int rc = PGM_LOCK(pVM);
2994 AssertRCReturn(rc, rc);
2995
2996 rc = pgmR3PhysMmioUnmapLocked(pVM, pVCpu, GCPhys, cb, GCPhysLast, pMmioRamRange);
2997#ifdef VBOX_STRICT
2998 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
2999#endif
3000
3001 PGM_UNLOCK(pVM);
3002 return rc;
3003}
3004
3005
3006
3007/*********************************************************************************************************************************
3008* MMIO2 *
3009*********************************************************************************************************************************/
3010
3011/**
3012 * Validates the claim to an MMIO2 range and returns the pointer to it.
3013 *
3014 * @returns The MMIO2 entry index on success, negative error status on failure.
3015 * @param pVM The cross context VM structure.
3016 * @param pDevIns The device instance owning the region.
3017 * @param hMmio2 Handle to look up.
3018 * @param pcChunks Where to return the number of chunks associated with
3019 * this handle.
3020 */
3021static int32_t pgmR3PhysMmio2ResolveHandle(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, uint32_t *pcChunks)
3022{
3023 *pcChunks = 0;
3024 uint32_t const idxFirst = hMmio2 - 1U;
3025 uint32_t const cMmio2Ranges = RT_MIN(pVM->pgm.s.cMmio2Ranges, RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges));
3026 AssertReturn(idxFirst < cMmio2Ranges, VERR_INVALID_HANDLE);
3027
3028 PPGMREGMMIO2RANGE const pFirst = &pVM->pgm.s.aMmio2Ranges[idxFirst];
3029 AssertReturn(pFirst->idMmio2 == hMmio2, VERR_INVALID_HANDLE);
3030 AssertReturn((pFirst->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK), VERR_INVALID_HANDLE);
3031 AssertReturn(pFirst->pDevInsR3 == pDevIns && RT_VALID_PTR(pDevIns), VERR_NOT_OWNER);
3032
3033 /* Figure out how many chunks this handle spans. */
3034 if (pFirst->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3035 *pcChunks = 1;
3036 else
3037 {
3038 uint32_t cChunks = 1;
3039 for (uint32_t idx = idxFirst + 1;; idx++)
3040 {
3041 cChunks++;
3042 AssertReturn(idx < cMmio2Ranges, VERR_INTERNAL_ERROR_2);
3043 PPGMREGMMIO2RANGE const pCur = &pVM->pgm.s.aMmio2Ranges[idx];
3044 AssertLogRelMsgReturn( pCur->pDevInsR3 == pDevIns
3045 && pCur->idMmio2 == idx + 1
3046 && pCur->iSubDev == pFirst->iSubDev
3047 && pCur->iRegion == pFirst->iRegion
3048 && !(pCur->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK),
3049 ("cur: %p/%#x/%#x/%#x/%#x/%s; first: %p/%#x/%#x/%#x/%#x/%s\n",
3050 pCur->pDevInsR3, pCur->idMmio2, pCur->iSubDev, pCur->iRegion, pCur->fFlags,
3051 pVM->pgm.s.apMmio2RamRanges[idx]->pszDesc,
3052 pDevIns, idx + 1, pFirst->iSubDev, pFirst->iRegion, pFirst->fFlags,
3053 pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc),
3054 VERR_INTERNAL_ERROR_3);
3055 if (pCur->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3056 break;
3057 }
3058 *pcChunks = cChunks;
3059 }
3060
3061 return (int32_t)idxFirst;
3062}
3063
3064
3065/**
3066 * Check if a device has already registered a MMIO2 region.
3067 *
3068 * @returns NULL if not registered, otherwise pointer to the MMIO2.
3069 * @param pVM The cross context VM structure.
3070 * @param pDevIns The device instance owning the region.
3071 * @param iSubDev The sub-device number.
3072 * @param iRegion The region.
3073 */
3074DECLINLINE(PPGMREGMMIO2RANGE) pgmR3PhysMmio2Find(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion)
3075{
3076 /*
3077 * Search the array. There shouldn't be many entries.
3078 */
3079 uint32_t idx = RT_MIN(pVM->pgm.s.cMmio2Ranges, RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges));
3080 while (idx-- > 0)
3081 if (RT_LIKELY( pVM->pgm.s.aMmio2Ranges[idx].pDevInsR3 != pDevIns
3082 || pVM->pgm.s.aMmio2Ranges[idx].iRegion != iRegion
3083 || pVM->pgm.s.aMmio2Ranges[idx].iSubDev != iSubDev))
3084 { /* likely */ }
3085 else
3086 return &pVM->pgm.s.aMmio2Ranges[idx];
3087 return NULL;
3088}
3089
3090/**
3091 * Worker for PGMR3PhysMmio2ControlDirtyPageTracking and PGMR3PhysMmio2Map.
3092 */
3093static int pgmR3PhysMmio2EnableDirtyPageTracing(PVM pVM, uint32_t idx, uint32_t cChunks)
3094{
3095 int rc = VINF_SUCCESS;
3096 while (cChunks-- > 0)
3097 {
3098 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3099 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3100
3101 Assert(!(pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_TRACKING));
3102 int rc2 = pgmHandlerPhysicalExRegister(pVM, pMmio2->pPhysHandlerR3, pRamRange->GCPhys, pRamRange->GCPhysLast);
3103 if (RT_SUCCESS(rc2))
3104 pMmio2->fFlags |= PGMREGMMIO2RANGE_F_IS_TRACKING;
3105 else
3106 AssertLogRelMsgFailedStmt(("%#RGp-%#RGp %s failed -> %Rrc\n",
3107 pRamRange->GCPhys, pRamRange->GCPhysLast, pRamRange->pszDesc, rc2),
3108 rc = RT_SUCCESS(rc) ? rc2 : rc);
3109
3110 idx++;
3111 }
3112 return rc;
3113}
3114
3115
3116/**
3117 * Worker for PGMR3PhysMmio2ControlDirtyPageTracking and PGMR3PhysMmio2Unmap.
3118 */
3119static int pgmR3PhysMmio2DisableDirtyPageTracing(PVM pVM, uint32_t idx, uint32_t cChunks)
3120{
3121 int rc = VINF_SUCCESS;
3122 while (cChunks-- > 0)
3123 {
3124 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3125 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3126 if (pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_TRACKING)
3127 {
3128 int rc2 = pgmHandlerPhysicalExDeregister(pVM, pMmio2->pPhysHandlerR3);
3129 AssertLogRelMsgStmt(RT_SUCCESS(rc2),
3130 ("%#RGp-%#RGp %s failed -> %Rrc\n",
3131 pRamRange->GCPhys, pRamRange->GCPhysLast, pRamRange->pszDesc, rc2),
3132 rc = RT_SUCCESS(rc) ? rc2 : rc);
3133 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_IS_TRACKING;
3134 }
3135 idx++;
3136 }
3137 return rc;
3138}
3139
3140#if 0 // temp
3141
3142/**
3143 * Common worker PGMR3PhysMmio2PreRegister & PGMR3PhysMMIO2Register that links a
3144 * complete registration entry into the lists and lookup tables.
3145 *
3146 * @param pVM The cross context VM structure.
3147 * @param pNew The new MMIO / MMIO2 registration to link.
3148 */
3149static void pgmR3PhysMmio2Link(PVM pVM, PPGMREGMMIO2RANGE pNew)
3150{
3151 Assert(pNew->idMmio2 != UINT8_MAX);
3152
3153 /*
3154 * Link it into the list (order doesn't matter, so insert it at the head).
3155 *
3156 * Note! The range we're linking may consist of multiple chunks, so we
3157 * have to find the last one.
3158 */
3159 PPGMREGMMIO2RANGE pLast = pNew;
3160 for (pLast = pNew; ; pLast = pLast->pNextR3)
3161 {
3162 if (pLast->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3163 break;
3164 Assert(pLast->pNextR3);
3165 Assert(pLast->pNextR3->pDevInsR3 == pNew->pDevInsR3);
3166 Assert(pLast->pNextR3->iSubDev == pNew->iSubDev);
3167 Assert(pLast->pNextR3->iRegion == pNew->iRegion);
3168 Assert(pLast->pNextR3->idMmio2 == pLast->idMmio2 + 1);
3169 }
3170
3171 PGM_LOCK_VOID(pVM);
3172
3173 /* Link in the chain of ranges at the head of the list. */
3174 pLast->pNextR3 = pVM->pgm.s.pRegMmioRangesR3;
3175 pVM->pgm.s.pRegMmioRangesR3 = pNew;
3176
3177 /* Insert the MMIO2 range/page IDs. */
3178 uint8_t idMmio2 = pNew->idMmio2;
3179 for (;;)
3180 {
3181 Assert(pVM->pgm.s.apMmio2RangesR3[idMmio2 - 1] == NULL);
3182 Assert(pVM->pgm.s.apMmio2RangesR0[idMmio2 - 1] == NIL_RTR0PTR);
3183 pVM->pgm.s.apMmio2RangesR3[idMmio2 - 1] = pNew;
3184 pVM->pgm.s.apMmio2RangesR0[idMmio2 - 1] = pNew->RamRange.pSelfR0 - RT_UOFFSETOF(PGMREGMMIO2RANGE, RamRange);
3185 if (pNew->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3186 break;
3187 pNew = pNew->pNextR3;
3188 idMmio2++;
3189 }
3190
3191 pgmPhysInvalidatePageMapTLB(pVM);
3192 PGM_UNLOCK(pVM);
3193}
3194#endif
3195
3196
3197/**
3198 * Allocate and register an MMIO2 region.
3199 *
3200 * As mentioned elsewhere, MMIO2 is just RAM spelled differently. It's RAM
3201 * associated with a device. It is also non-shared memory with a permanent
3202 * ring-3 mapping and page backing (presently).
3203 *
3204 * A MMIO2 range may overlap with base memory if a lot of RAM is configured for
3205 * the VM, in which case we'll drop the base memory pages. Presently we will
3206 * make no attempt to preserve anything that happens to be present in the base
3207 * memory that is replaced, this is of course incorrect but it's too much
3208 * effort.
3209 *
3210 * @returns VBox status code.
3211 * @retval VINF_SUCCESS on success, *ppv pointing to the R3 mapping of the
3212 * memory.
3213 * @retval VERR_ALREADY_EXISTS if the region already exists.
3214 *
3215 * @param pVM The cross context VM structure.
3216 * @param pDevIns The device instance owning the region.
3217 * @param iSubDev The sub-device number.
3218 * @param iRegion The region number. If the MMIO2 memory is a PCI
3219 * I/O region this number has to be the number of that
3220 * region. Otherwise it can be any number save
3221 * UINT8_MAX.
3222 * @param cb The size of the region. Must be page aligned.
3223 * @param fFlags Reserved for future use, must be zero.
3224 * @param pszDesc The description.
3225 * @param ppv Where to store the pointer to the ring-3 mapping of
3226 * the memory.
3227 * @param phRegion Where to return the MMIO2 region handle. Optional.
3228 * @thread EMT(0)
3229 *
3230 * @note Only callable at VM creation time and during VM state loading.
3231 * The latter is for PCNet saved state compatibility with pre 4.3.6
3232 * state.
3233 */
3234VMMR3_INT_DECL(int) PGMR3PhysMmio2Register(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, RTGCPHYS cb,
3235 uint32_t fFlags, const char *pszDesc, void **ppv, PGMMMIO2HANDLE *phRegion)
3236{
3237 /*
3238 * Validate input.
3239 */
3240 AssertPtrReturn(ppv, VERR_INVALID_POINTER);
3241 *ppv = NULL;
3242 if (phRegion)
3243 {
3244 AssertPtrReturn(phRegion, VERR_INVALID_POINTER);
3245 *phRegion = NIL_PGMMMIO2HANDLE;
3246 }
3247 PVMCPU const pVCpu = VMMGetCpu(pVM);
3248 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
3249 VMSTATE const enmVMState = VMR3GetState(pVM);
3250 AssertMsgReturn(enmVMState == VMSTATE_CREATING || enmVMState == VMSTATE_LOADING,
3251 ("state %s, expected CREATING or LOADING\n", VMGetStateName(enmVMState)),
3252 VERR_VM_INVALID_VM_STATE);
3253
3254 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
3255 AssertReturn(iSubDev <= UINT8_MAX, VERR_INVALID_PARAMETER);
3256 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
3257
3258 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
3259 AssertReturn(*pszDesc, VERR_INVALID_PARAMETER);
3260
3261 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3262 AssertReturn(cb, VERR_INVALID_PARAMETER);
3263 AssertReturn(!(fFlags & ~PGMPHYS_MMIO2_FLAGS_VALID_MASK), VERR_INVALID_FLAGS);
3264
3265 const uint32_t cGuestPages = cb >> GUEST_PAGE_SHIFT;
3266 AssertLogRelReturn(((RTGCPHYS)cGuestPages << GUEST_PAGE_SHIFT) == cb, VERR_INVALID_PARAMETER);
3267 AssertLogRelReturn(cGuestPages <= PGM_MAX_PAGES_PER_MMIO2_REGION, VERR_OUT_OF_RANGE);
3268 AssertLogRelReturn(cGuestPages <= (MM_MMIO_64_MAX >> GUEST_PAGE_SHIFT), VERR_OUT_OF_RANGE);
3269
3270 AssertReturn(pgmR3PhysMmio2Find(pVM, pDevIns, iSubDev, iRegion) == NULL, VERR_ALREADY_EXISTS);
3271
3272 /*
3273 * For the 2nd+ instance, mangle the description string so it's unique.
3274 */
3275 if (pDevIns->iInstance > 0) /** @todo Move to PDMDevHlp.cpp and use a real string cache. */
3276 {
3277 pszDesc = MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s [%u]", pszDesc, pDevIns->iInstance);
3278 if (!pszDesc)
3279 return VERR_NO_MEMORY;
3280 }
3281
3282 /*
3283 * Check that we've got sufficient MMIO2 ID space for this request (the
3284 * allocation will be done later once we've got the backing memory secured,
3285 * but given the EMT0 restriction, that's not going to be a problem).
3286 *
3287 * The zero ID is not used as it could be confused with NIL_GMM_PAGEID, so
3288 * the IDs goes from 1 thru PGM_MAX_MMIO2_RANGES.
3289 */
3290 unsigned const cChunks = pgmPhysMmio2CalcChunkCount(cb, NULL);
3291
3292 int rc = PGM_LOCK(pVM);
3293 AssertRCReturn(rc, rc);
3294
3295 AssertCompile(PGM_MAX_MMIO2_RANGES < 255);
3296 uint8_t const idMmio2 = pVM->pgm.s.cMmio2Ranges + 1;
3297 AssertLogRelReturnStmt(idMmio2 + cChunks <= PGM_MAX_MMIO2_RANGES, PGM_UNLOCK(pVM), VERR_PGM_TOO_MANY_MMIO2_RANGES);
3298
3299 /*
3300 * Try reserve and allocate the backing memory first as this is what is
3301 * most likely to fail.
3302 */
3303 rc = MMR3AdjustFixedReservation(pVM, cGuestPages, pszDesc);
3304 if (RT_SUCCESS(rc))
3305 {
3306 /*
3307 * If we're in driverless we'll be doing the work here, otherwise we
3308 * must call ring-0 to do the job as we'll need physical addresses
3309 * and maybe a ring-0 mapping address for it all.
3310 */
3311#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
3312 if (!SUPR3IsDriverless())
3313 {
3314 PGMPHYSMMIO2REGISTERREQ Mmio2RegReq;
3315 Mmio2RegReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
3316 Mmio2RegReq.Hdr.cbReq = sizeof(Mmio2RegReq);
3317 Mmio2RegReq.cbGuestPage = GUEST_PAGE_SIZE;
3318 Mmio2RegReq.cGuestPages = cGuestPages;
3319 Mmio2RegReq.idMmio2 = idMmio2;
3320 Mmio2RegReq.cChunks = cChunks;
3321 Mmio2RegReq.iSubDev = (uint8_t)iSubDev;
3322 Mmio2RegReq.iRegion = (uint8_t)iRegion;
3323 Mmio2RegReq.fFlags = fFlags;
3324 Mmio2RegReq.pDevIns = pDevIns;
3325 rc = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_MMIO2_REGISTER, 0 /*u64Arg*/, &Mmio2RegReq.Hdr);
3326 }
3327 else
3328#endif
3329 rc = pgmPhysMmio2RegisterWorker(pVM, cGuestPages, idMmio2, cChunks, pDevIns, iSubDev, iRegion, fFlags);
3330 if (RT_SUCCESS(rc))
3331 {
3332 Assert(idMmio2 + cChunks - 1 == pVM->pgm.s.cMmio2Ranges);
3333
3334 /*
3335 * There are two things left to do:
3336 * 1. Add the description to the associated RAM ranges.
3337 * 2. Pre-allocate access handlers for dirty bit tracking if necessary.
3338 */
3339 bool const fNeedHandler = (fFlags & PGMPHYS_MMIO2_FLAGS_TRACK_DIRTY_PAGES)
3340#ifdef VBOX_WITH_PGM_NEM_MODE
3341 && (!VM_IS_NEM_ENABLED(pVM) || !NEMR3IsMmio2DirtyPageTrackingSupported(pVM))
3342#endif
3343 ;
3344 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
3345 {
3346 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idxChunk + idMmio2 - 1];
3347 Assert(pMmio2->idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
3348 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apRamRanges[pMmio2->idRamRange];
3349 Assert(pRamRange->pbR3 == pMmio2->pbR3);
3350 Assert(pRamRange->cb == pMmio2->cbReal);
3351
3352 pRamRange->pszDesc = pszDesc; /** @todo mangle this if we got more than one chunk */
3353 if (fNeedHandler)
3354 {
3355 rc = pgmHandlerPhysicalExCreate(pVM, pVM->pgm.s.hMmio2DirtyPhysHandlerType, pMmio2->idMmio2,
3356 pszDesc, &pMmio2->pPhysHandlerR3);
3357 AssertLogRelMsgReturnStmt(RT_SUCCESS(rc),
3358 ("idMmio2=%#x idxChunk=%#x rc=%Rc\n", idMmio2, idxChunk, rc),
3359 PGM_UNLOCK(pVM),
3360 rc); /* PGMR3Term will take care of it all. */
3361 }
3362 }
3363
3364 /*
3365 * Done!
3366 */
3367 if (phRegion)
3368 *phRegion = idMmio2;
3369 *ppv = pVM->pgm.s.aMmio2Ranges[idMmio2 - 1].pbR3;
3370
3371 PGM_UNLOCK(pVM);
3372 return VINF_SUCCESS;
3373 }
3374
3375 MMR3AdjustFixedReservation(pVM, -(int32_t)cGuestPages, pszDesc);
3376 }
3377 if (pDevIns->iInstance > 0)
3378 MMR3HeapFree((void *)pszDesc);
3379 return rc;
3380}
3381
3382/**
3383 * Deregisters and frees an MMIO2 region.
3384 *
3385 * Any physical access handlers registered for the region must be deregistered
3386 * before calling this function.
3387 *
3388 * @returns VBox status code.
3389 * @param pVM The cross context VM structure.
3390 * @param pDevIns The device instance owning the region.
3391 * @param hMmio2 The MMIO2 handle to deregister, or NIL if all
3392 * regions for the given device is to be deregistered.
3393 * @thread EMT(0)
3394 *
3395 * @note Only callable during VM state loading. This is to jettison an unused
3396 * MMIO2 section present in PCNet saved state prior to VBox v4.3.6.
3397 */
3398VMMR3_INT_DECL(int) PGMR3PhysMmio2Deregister(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
3399{
3400 /*
3401 * Validate input.
3402 */
3403 PVMCPU const pVCpu = VMMGetCpu(pVM);
3404 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
3405 VMSTATE const enmVMState = VMR3GetState(pVM);
3406 AssertMsgReturn(enmVMState == VMSTATE_LOADING,
3407 ("state %s, expected LOADING\n", VMGetStateName(enmVMState)),
3408 VERR_VM_INVALID_VM_STATE);
3409
3410 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
3411
3412 /*
3413 * Take the PGM lock and scan for registrations matching the requirements.
3414 * We do this backwards to more easily reduce the cMmio2Ranges count when
3415 * stuff is removed.
3416 */
3417 PGM_LOCK_VOID(pVM);
3418
3419 int rc = VINF_SUCCESS;
3420 unsigned cFound = 0;
3421 uint32_t const cMmio2Ranges = RT_MIN(pVM->pgm.s.cMmio2Ranges, RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges));
3422 uint32_t idx = cMmio2Ranges;
3423 while (idx-- > 0)
3424 {
3425 PPGMREGMMIO2RANGE pCur = &pVM->pgm.s.aMmio2Ranges[idx];
3426 if ( pCur->pDevInsR3 == pDevIns
3427 && ( hMmio2 == NIL_PGMMMIO2HANDLE
3428 || pCur->idMmio2 == hMmio2))
3429 {
3430 cFound++;
3431
3432 /*
3433 * Wind back the first chunk for this registration.
3434 */
3435 AssertLogRelMsgReturnStmt(pCur->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK, ("idx=%u fFlags=%#x\n", idx, pCur->fFlags),
3436 PGM_UNLOCK(pVM), VERR_INTERNAL_ERROR_3);
3437 uint32_t cGuestPages = pCur->cbReal >> GUEST_PAGE_SHIFT;
3438 uint32_t cChunks = 1;
3439 while ( idx > 0
3440 && !(pCur->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK))
3441 {
3442 AssertLogRelMsgReturnStmt( pCur[-1].pDevInsR3 == pDevIns
3443 && pCur[-1].iRegion == pCur->iRegion
3444 && pCur[-1].iSubDev == pCur->iSubDev,
3445 ("[%u]: %p/%#x/%#x/fl=%#x; [%u]: %p/%#x/%#x/fl=%#x; cChunks=%#x\n",
3446 idx - 1, pCur[-1].pDevInsR3, pCur[-1].iRegion, pCur[-1].iSubDev, pCur[-1].fFlags,
3447 idx, pCur->pDevInsR3, pCur->iRegion, pCur->iSubDev, pCur->fFlags, cChunks),
3448 PGM_UNLOCK(pVM), VERR_INTERNAL_ERROR_3);
3449 cChunks++;
3450 pCur--;
3451 idx--;
3452 cGuestPages += pCur->cbReal >> GUEST_PAGE_SHIFT;
3453 }
3454 AssertLogRelMsgReturnStmt(pCur->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK,
3455 ("idx=%u fFlags=%#x cChunks=%#x\n", idx, pCur->fFlags, cChunks),
3456 PGM_UNLOCK(pVM), VERR_INTERNAL_ERROR_3);
3457
3458 /*
3459 * Unmap it if it's mapped.
3460 */
3461 if (pCur->fFlags & PGMREGMMIO2RANGE_F_MAPPED)
3462 {
3463 int rc2 = PGMR3PhysMmio2Unmap(pVM, pCur->pDevInsR3, idx + 1, pCur->GCPhys);
3464 AssertRC(rc2);
3465 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
3466 rc = rc2;
3467 }
3468
3469 /*
3470 * Destroy access handlers.
3471 */
3472 for (uint32_t iChunk = 0; iChunk < cChunks; iChunk++)
3473 if (pCur[iChunk].pPhysHandlerR3)
3474 {
3475 pgmHandlerPhysicalExDestroy(pVM, pCur[iChunk].pPhysHandlerR3);
3476 pCur[iChunk].pPhysHandlerR3 = NULL;
3477 }
3478
3479 /*
3480 * Call kernel mode / worker to do the actual deregistration.
3481 */
3482 const char * const pszDesc = pVM->pgm.s.apMmio2RamRanges[idx] ? pVM->pgm.s.apMmio2RamRanges[idx]->pszDesc : NULL;
3483 int rc2;
3484#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
3485 if (!SUPR3IsDriverless())
3486 {
3487 PGMPHYSMMIO2DEREGISTERREQ Mmio2DeregReq;
3488 Mmio2DeregReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
3489 Mmio2DeregReq.Hdr.cbReq = sizeof(Mmio2DeregReq);
3490 Mmio2DeregReq.idMmio2 = idx + 1;
3491 Mmio2DeregReq.cChunks = cChunks;
3492 Mmio2DeregReq.pDevIns = pDevIns;
3493 rc2 = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_MMIO2_DEREGISTER, 0 /*u64Arg*/, &Mmio2DeregReq.Hdr);
3494 AssertLogRelMsgStmt(RT_SUCCESS(rc2),
3495 ("VMMR0_DO_PGM_PHYS_MMIO2_DEREGISTER: rc=%Rrc idx=%#x cChunks=%#x %s\n",
3496 rc2, idx, cChunks, pszDesc),
3497 rc = RT_SUCCESS(rc) ? rc2 : rc);
3498 pgmPhysInvalidRamRangeTlbs(pVM); /* Ensure no stale pointers in the ring-3 RAM range TLB. */
3499 }
3500 else
3501#endif
3502 {
3503 Assert(PGM_IS_IN_NEM_MODE(pVM));
3504 rc2 = pgmPhysMmio2DeregisterWorker(pVM, idx, cChunks, pDevIns);
3505 AssertLogRelMsgStmt(RT_SUCCESS(rc2),
3506 ("pgmPhysMmio2DeregisterWorker: rc=%Rrc idx=%#x cChunks=%#x %s\n",
3507 rc2, idx, cChunks, pszDesc),
3508 rc = RT_SUCCESS(rc) ? rc2 : rc);
3509 }
3510 if (RT_FAILURE(rc2))
3511 {
3512 LogRel(("PGMR3PhysMmio2Deregister: Deregistration failed: %Rrc; cChunks=%u %s\n", rc, cChunks, pszDesc));
3513 if (RT_SUCCESS(rc))
3514 rc = rc2;
3515 }
3516
3517 /*
3518 * Adjust the memory reservation.
3519 */
3520 if (!PGM_IS_IN_NEM_MODE(pVM) && RT_SUCCESS(rc2))
3521 {
3522 rc2 = MMR3AdjustFixedReservation(pVM, -(int32_t)cGuestPages, pszDesc);
3523 AssertLogRelMsgStmt(RT_SUCCESS(rc2), ("rc=%Rrc cGuestPages=%#x\n", rc, cGuestPages),
3524 rc = RT_SUCCESS(rc) ? rc2 : rc);
3525 }
3526
3527 /* Are we done? */
3528 if (hMmio2 != NIL_PGMMMIO2HANDLE)
3529 break;
3530 }
3531 }
3532 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
3533 PGM_UNLOCK(pVM);
3534 return !cFound && hMmio2 != NIL_PGMMMIO2HANDLE ? VERR_NOT_FOUND : rc;
3535}
3536
3537
3538/**
3539 * Worker form PGMR3PhysMmio2Map.
3540 */
3541static int pgmR3PhysMmio2MapLocked(PVM pVM, uint32_t const idxFirst, uint32_t const cChunks,
3542 RTGCPHYS const GCPhys, RTGCPHYS const GCPhysLast)
3543{
3544 /*
3545 * Validate the mapped status now that we've got the lock.
3546 */
3547 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3548 {
3549 AssertReturn( pVM->pgm.s.aMmio2Ranges[idx].GCPhys == NIL_RTGCPHYS
3550 && !(pVM->pgm.s.aMmio2Ranges[idx].fFlags & PGMREGMMIO2RANGE_F_MAPPED),
3551 VERR_WRONG_ORDER);
3552 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3553 AssertReturn(pRamRange->GCPhys == NIL_RTGCPHYS, VERR_INTERNAL_ERROR_3);
3554 AssertReturn(pRamRange->GCPhysLast == NIL_RTGCPHYS, VERR_INTERNAL_ERROR_3);
3555 Assert(pRamRange->pbR3 == pVM->pgm.s.aMmio2Ranges[idx].pbR3);
3556 Assert(pRamRange->idRange == pVM->pgm.s.aMmio2Ranges[idx].idRamRange);
3557 }
3558
3559 const char * const pszDesc = pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc;
3560#ifdef VBOX_WITH_NATIVE_NEM
3561 uint32_t const fNemFlags = NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2
3562 | (pVM->pgm.s.aMmio2Ranges[idxFirst].fFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES
3563 ? NEM_NOTIFY_PHYS_MMIO_EX_F_TRACK_DIRTY_PAGES : 0);
3564#endif
3565
3566 /*
3567 * Now, check if this falls into a regular RAM range or if we should use
3568 * the ad-hoc one.
3569 *
3570 * Note! For reasons of simplictly, we're considering the whole MMIO2 area
3571 * here rather than individual chunks.
3572 */
3573 int rc = VINF_SUCCESS;
3574 uint32_t idxInsert = UINT32_MAX;
3575 PPGMRAMRANGE const pOverlappingRange = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxInsert);
3576 if (pOverlappingRange)
3577 {
3578 /* Simplification: all within the same range. */
3579 AssertLogRelMsgReturn( GCPhys >= pOverlappingRange->GCPhys
3580 && GCPhysLast <= pOverlappingRange->GCPhysLast,
3581 ("%RGp-%RGp (MMIO2/%s) falls partly outside %RGp-%RGp (%s)\n",
3582 GCPhys, GCPhysLast, pszDesc,
3583 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
3584 VERR_PGM_RAM_CONFLICT);
3585
3586 /* Check that is isn't an ad hoc range, but a real RAM range. */
3587 AssertLogRelMsgReturn(!PGM_RAM_RANGE_IS_AD_HOC(pOverlappingRange),
3588 ("%RGp-%RGp (MMIO2/%s) mapping attempt in non-RAM range: %RGp-%RGp (%s)\n",
3589 GCPhys, GCPhysLast, pszDesc,
3590 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
3591 VERR_PGM_RAM_CONFLICT);
3592
3593 /* There can only be one MMIO2 chunk matching here! */
3594 AssertLogRelMsgReturn(cChunks == 1,
3595 ("%RGp-%RGp (MMIO2/%s) consists of %u chunks whereas the RAM (%s) somehow doesn't!\n",
3596 GCPhys, GCPhysLast, pszDesc, cChunks, pOverlappingRange->pszDesc),
3597 VERR_PGM_PHYS_MMIO_EX_IPE);
3598
3599 /* Check that it's all RAM pages. */
3600 PCPGMPAGE pPage = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
3601 uint32_t const cMmio2Pages = pVM->pgm.s.apMmio2RamRanges[idxFirst]->cb >> GUEST_PAGE_SHIFT;
3602 uint32_t cPagesLeft = cMmio2Pages;
3603 while (cPagesLeft-- > 0)
3604 {
3605 AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM,
3606 ("%RGp-%RGp (MMIO2/%s): %RGp is not a RAM page - type=%d desc=%s\n", GCPhys, GCPhysLast,
3607 pszDesc, pOverlappingRange->GCPhys, PGM_PAGE_GET_TYPE(pPage), pOverlappingRange->pszDesc),
3608 VERR_PGM_RAM_CONFLICT);
3609 pPage++;
3610 }
3611
3612#ifdef VBOX_WITH_PGM_NEM_MODE
3613 /* We cannot mix MMIO2 into a RAM range in simplified memory mode because pOverlappingRange->pbR3 can't point
3614 both at the RAM and MMIO2, so we won't ever write & read from the actual MMIO2 memory if we try. */
3615 AssertLogRelMsgReturn(!VM_IS_NEM_ENABLED(pVM),
3616 ("Putting %s at %RGp-%RGp is not possible in NEM mode because existing %RGp-%RGp (%s) mapping\n",
3617 pszDesc, GCPhys, GCPhysLast,
3618 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
3619 VERR_PGM_NOT_SUPPORTED_FOR_NEM_MODE);
3620#endif
3621
3622 /*
3623 * Make all the pages in the range MMIO/ZERO pages, freeing any
3624 * RAM pages currently mapped here. This might not be 100% correct,
3625 * but so what, we do the same from MMIO...
3626 */
3627 rc = pgmR3PhysFreePageRange(pVM, pOverlappingRange, GCPhys, GCPhysLast, NULL);
3628 AssertRCReturn(rc, rc);
3629
3630 Log(("PGMR3PhysMmio2Map: %RGp-%RGp %s - inside %RGp-%RGp %s\n", GCPhys, GCPhysLast, pszDesc,
3631 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc));
3632
3633 /*
3634 * We're all in for mapping it now. Update the MMIO2 range to reflect it.
3635 */
3636 pVM->pgm.s.aMmio2Ranges[idxFirst].GCPhys = GCPhys;
3637 pVM->pgm.s.aMmio2Ranges[idxFirst].fFlags |= PGMREGMMIO2RANGE_F_OVERLAPPING | PGMREGMMIO2RANGE_F_MAPPED;
3638
3639 /*
3640 * Replace the pages in the range.
3641 */
3642 PPGMPAGE pPageSrc = &pVM->pgm.s.apMmio2RamRanges[idxFirst]->aPages[0];
3643 PPGMPAGE pPageDst = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
3644 cPagesLeft = cMmio2Pages;
3645 while (cPagesLeft-- > 0)
3646 {
3647 Assert(PGM_PAGE_IS_MMIO(pPageDst));
3648
3649 RTHCPHYS const HCPhys = PGM_PAGE_GET_HCPHYS(pPageSrc); RT_NOREF_PV(HCPhys);
3650 uint32_t const idPage = PGM_PAGE_GET_PAGEID(pPageSrc);
3651 PGM_PAGE_SET_PAGEID(pVM, pPageDst, idPage);
3652 PGM_PAGE_SET_HCPHYS(pVM, pPageDst, HCPhys);
3653 PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO2);
3654 PGM_PAGE_SET_STATE(pVM, pPageDst, PGM_PAGE_STATE_ALLOCATED);
3655 PGM_PAGE_SET_PDE_TYPE(pVM, pPageDst, PGM_PAGE_PDE_TYPE_DONTCARE);
3656 PGM_PAGE_SET_PTE_INDEX(pVM, pPageDst, 0);
3657 PGM_PAGE_SET_TRACKING(pVM, pPageDst, 0);
3658 /* NEM state is not relevant, see VERR_PGM_NOT_SUPPORTED_FOR_NEM_MODE above. */
3659
3660 pVM->pgm.s.cZeroPages--;
3661 pPageSrc++;
3662 pPageDst++;
3663 }
3664
3665 /* Force a PGM pool flush as guest ram references have been changed. */
3666 /** @todo not entirely SMP safe; assuming for now the guest takes
3667 * care of this internally (not touch mapped mmio while changing the
3668 * mapping). */
3669 PVMCPU pVCpu = VMMGetCpu(pVM);
3670 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3671 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3672 }
3673 else
3674 {
3675 /*
3676 * No RAM range, insert the ones prepared during registration.
3677 */
3678 Log(("PGMR3PhysMmio2Map: %RGp-%RGp %s - no RAM overlap\n", GCPhys, GCPhysLast, pszDesc));
3679 RTGCPHYS GCPhysCur = GCPhys;
3680 uint32_t iChunk = 0;
3681 uint32_t idx = idxFirst;
3682 for (; iChunk < cChunks; iChunk++, idx++)
3683 {
3684 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3685 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3686 Assert(pRamRange->idRange == pMmio2->idRamRange);
3687 Assert(pMmio2->GCPhys == NIL_RTGCPHYS);
3688
3689#ifdef VBOX_WITH_NATIVE_NEM
3690 /* Tell NEM and get the new NEM state for the pages. */
3691 uint8_t u2NemState = 0;
3692 if (VM_IS_NEM_ENABLED(pVM))
3693 {
3694 rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhysCur, pRamRange->cb, fNemFlags, NULL /*pvRam*/, pRamRange->pbR3,
3695 &u2NemState, &pRamRange->uNemRange);
3696 AssertLogRelMsgBreak(RT_SUCCESS(rc),
3697 ("%RGp LB %RGp fFlags=%#x (%s)\n",
3698 GCPhysCur, pRamRange->cb, pMmio2->fFlags, pRamRange->pszDesc));
3699 pMmio2->fFlags |= PGMREGMMIO2RANGE_F_MAPPED; /* Set this early to indicate that NEM has been notified. */
3700 }
3701#endif
3702
3703 /* Clear the tracking data of pages we're going to reactivate. */
3704 PPGMPAGE pPageSrc = &pRamRange->aPages[0];
3705 uint32_t cPagesLeft = pRamRange->cb >> GUEST_PAGE_SHIFT;
3706 while (cPagesLeft-- > 0)
3707 {
3708 PGM_PAGE_SET_TRACKING(pVM, pPageSrc, 0);
3709 PGM_PAGE_SET_PTE_INDEX(pVM, pPageSrc, 0);
3710#ifdef VBOX_WITH_NATIVE_NEM
3711 PGM_PAGE_SET_NEM_STATE(pPageSrc, u2NemState);
3712#endif
3713 pPageSrc++;
3714 }
3715
3716 /* Insert the RAM range into the lookup table. */
3717 rc = pgmR3PhysRamRangeInsertLookup(pVM, pRamRange, GCPhysCur, &idxInsert);
3718 AssertRCBreak(rc);
3719
3720 /* Mark the range as fully mapped. */
3721 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_OVERLAPPING;
3722 pMmio2->fFlags |= PGMREGMMIO2RANGE_F_MAPPED;
3723 pMmio2->GCPhys = GCPhysCur;
3724
3725 /* Advance. */
3726 GCPhysCur += pRamRange->cb;
3727 }
3728 if (RT_FAILURE(rc))
3729 {
3730 /*
3731 * Bail out anything we've done so far.
3732 */
3733 idxInsert -= 1;
3734 do
3735 {
3736 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3737 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3738
3739#ifdef VBOX_WITH_NATIVE_NEM
3740 if ( VM_IS_NEM_ENABLED(pVM)
3741 && (pVM->pgm.s.aMmio2Ranges[idx].fFlags & PGMREGMMIO2RANGE_F_MAPPED))
3742 {
3743 uint8_t u2NemState = UINT8_MAX;
3744 NEMR3NotifyPhysMmioExUnmap(pVM, GCPhysCur, pRamRange->cb, fNemFlags, NULL, pRamRange->pbR3,
3745 &u2NemState, &pRamRange->uNemRange);
3746 if (u2NemState != UINT8_MAX)
3747 pgmPhysSetNemStateForPages(pRamRange->aPages, pRamRange->cb >> GUEST_PAGE_SHIFT, u2NemState);
3748 }
3749#endif
3750 if (pMmio2->GCPhys != NIL_RTGCPHYS)
3751 pgmR3PhysRamRangeRemoveLookup(pVM, pRamRange, &idxInsert);
3752
3753 pMmio2->GCPhys = NIL_RTGCPHYS;
3754 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_MAPPED;
3755
3756 idx--;
3757 } while (iChunk-- > 0);
3758 return rc;
3759 }
3760 }
3761
3762 /*
3763 * If the range have dirty page monitoring enabled, enable that.
3764 *
3765 * We ignore failures here for now because if we fail, the whole mapping
3766 * will have to be reversed and we'll end up with nothing at all on the
3767 * screen and a grumpy guest, whereas if we just go on, we'll only have
3768 * visual distortions to gripe about. There will be something in the
3769 * release log.
3770 */
3771 if ( pVM->pgm.s.aMmio2Ranges[idxFirst].pPhysHandlerR3
3772 && (pVM->pgm.s.aMmio2Ranges[idxFirst].fFlags & PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
3773 pgmR3PhysMmio2EnableDirtyPageTracing(pVM, idxFirst, cChunks);
3774
3775 /* Flush physical page map TLB. */
3776 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
3777
3778#ifdef VBOX_WITH_NATIVE_NEM
3779 /*
3780 * Late NEM notification (currently unused).
3781 */
3782 if (VM_IS_NEM_ENABLED(pVM))
3783 {
3784 if (pOverlappingRange)
3785 {
3786 uint8_t * const pbRam = pOverlappingRange->pbR3 ? &pOverlappingRange->pbR3[GCPhys - pOverlappingRange->GCPhys] : NULL;
3787 rc = NEMR3NotifyPhysMmioExMapLate(pVM, GCPhys, GCPhysLast - GCPhys + 1U,
3788 fNemFlags | NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE, pbRam,
3789 pVM->pgm.s.aMmio2Ranges[idxFirst].pbR3, NULL /*puNemRange*/);
3790 }
3791 else
3792 {
3793 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3794 {
3795 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3796 Assert(pVM->pgm.s.aMmio2Ranges[idx].GCPhys == pRamRange->GCPhys);
3797
3798 rc = NEMR3NotifyPhysMmioExMapLate(pVM, pRamRange->GCPhys, pRamRange->cb, fNemFlags, NULL /*pvRam*/,
3799 pRamRange->pbR3, &pRamRange->uNemRange);
3800 AssertRCBreak(rc);
3801 }
3802 }
3803 AssertLogRelRCReturnStmt(rc,
3804 PGMR3PhysMmio2Unmap(pVM, pVM->pgm.s.aMmio2Ranges[idxFirst].pDevInsR3, idxFirst + 1, GCPhys),
3805 rc);
3806 }
3807#endif
3808
3809 return VINF_SUCCESS;
3810}
3811
3812
3813/**
3814 * Maps a MMIO2 region.
3815 *
3816 * This is typically done when a guest / the bios / state loading changes the
3817 * PCI config. The replacing of base memory has the same restrictions as during
3818 * registration, of course.
3819 *
3820 * @returns VBox status code.
3821 *
3822 * @param pVM The cross context VM structure.
3823 * @param pDevIns The device instance owning the region.
3824 * @param hMmio2 The handle of the region to map.
3825 * @param GCPhys The guest-physical address to be remapped.
3826 */
3827VMMR3_INT_DECL(int) PGMR3PhysMmio2Map(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, RTGCPHYS GCPhys)
3828{
3829 /*
3830 * Validate input.
3831 */
3832 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
3833 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
3834 AssertReturn(GCPhys != NIL_RTGCPHYS, VERR_INVALID_PARAMETER);
3835 AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER);
3836 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3837 AssertReturn(hMmio2 != NIL_PGMMMIO2HANDLE, VERR_INVALID_HANDLE);
3838
3839 uint32_t cChunks = 0;
3840 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
3841 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
3842
3843 /* Gather the full range size so we can validate the mapping address properly. */
3844 RTGCPHYS cbRange = 0;
3845 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3846 cbRange += pVM->pgm.s.apMmio2RamRanges[idx]->cb;
3847
3848 RTGCPHYS const GCPhysLast = GCPhys + cbRange - 1;
3849 AssertLogRelReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
3850
3851 /*
3852 * Take the PGM lock and call worker.
3853 */
3854 int rc = PGM_LOCK(pVM);
3855 AssertRCReturn(rc, rc);
3856
3857 rc = pgmR3PhysMmio2MapLocked(pVM, idxFirst, cChunks, GCPhys, GCPhysLast);
3858#ifdef VBOX_STRICT
3859 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
3860#endif
3861
3862 PGM_UNLOCK(pVM);
3863 return rc;
3864}
3865
3866
3867/**
3868 * Worker form PGMR3PhysMmio2Map.
3869 */
3870static int pgmR3PhysMmio2UnmapLocked(PVM pVM, uint32_t const idxFirst, uint32_t const cChunks, RTGCPHYS const GCPhysIn)
3871{
3872 /*
3873 * Validate input.
3874 */
3875 RTGCPHYS cbRange = 0;
3876 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3877 {
3878 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3879 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3880 AssertReturn(pMmio2->idRamRange == pRamRange->idRange, VERR_INTERNAL_ERROR_3);
3881 AssertReturn(pMmio2->fFlags & PGMREGMMIO2RANGE_F_MAPPED, VERR_WRONG_ORDER);
3882 AssertReturn(pMmio2->GCPhys != NIL_RTGCPHYS, VERR_WRONG_ORDER);
3883 cbRange += pRamRange->cb;
3884 }
3885
3886 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
3887 PPGMRAMRANGE const pFirstRamRange = pVM->pgm.s.apMmio2RamRanges[idxFirst];
3888 const char * const pszDesc = pFirstRamRange->pszDesc;
3889 AssertLogRelMsgReturn(GCPhysIn == pFirstMmio2->GCPhys || GCPhysIn == NIL_RTGCPHYS,
3890 ("GCPhys=%RGp, actual address is %RGp\n", GCPhysIn, pFirstMmio2->GCPhys),
3891 VERR_MISMATCH);
3892 RTGCPHYS const GCPhys = pFirstMmio2->GCPhys; /* (it's always NIL_RTGCPHYS) */
3893 Log(("PGMR3PhysMmio2Unmap: %RGp-%RGp %s\n", GCPhys, GCPhys + cbRange - 1U, pszDesc));
3894
3895 uint16_t const fOldFlags = pFirstMmio2->fFlags;
3896 Assert(fOldFlags & PGMREGMMIO2RANGE_F_MAPPED);
3897
3898 /* Find the first entry in the lookup table and verify the overlapping flag. */
3899 uint32_t idxLookup = pgmR3PhysRamRangeFindOverlappingIndex(pVM, GCPhys, GCPhys + pFirstRamRange->cb - 1U);
3900 AssertLogRelMsgReturn(idxLookup < pVM->pgm.s.RamRangeUnion.cLookupEntries,
3901 ("MMIO2 range not found at %RGp LB %RGp in the lookup table! (%s)\n",
3902 GCPhys, pFirstRamRange->cb, pszDesc),
3903 VERR_INTERNAL_ERROR_2);
3904
3905 uint32_t const idLookupRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
3906 AssertLogRelReturn(idLookupRange != 0 && idLookupRange <= pVM->pgm.s.idRamRangeMax, VERR_INTERNAL_ERROR_5);
3907 PPGMRAMRANGE const pLookupRange = pVM->pgm.s.apRamRanges[idLookupRange];
3908 AssertLogRelReturn(pLookupRange, VERR_INTERNAL_ERROR_3);
3909
3910 AssertLogRelMsgReturn(fOldFlags & PGMREGMMIO2RANGE_F_OVERLAPPING
3911 ? pLookupRange != pFirstRamRange : pLookupRange == pFirstRamRange,
3912 ("MMIO2 unmap mixup at %RGp LB %RGp fl=%#x (%s) vs %RGp LB %RGp (%s)\n",
3913 GCPhys, cbRange, fOldFlags, pszDesc, pLookupRange->GCPhys, pLookupRange->cb, pLookupRange->pszDesc),
3914 VERR_INTERNAL_ERROR_4);
3915
3916 /*
3917 * If monitoring dirty pages, we must deregister the handlers first.
3918 */
3919 if ( pFirstMmio2->pPhysHandlerR3
3920 && (fOldFlags & PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
3921 pgmR3PhysMmio2DisableDirtyPageTracing(pVM, idxFirst, cChunks);
3922
3923 /*
3924 * Unmap it.
3925 */
3926 int rcRet = VINF_SUCCESS;
3927#ifdef VBOX_WITH_NATIVE_NEM
3928 uint32_t const fNemFlags = NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2
3929 | (fOldFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES
3930 ? NEM_NOTIFY_PHYS_MMIO_EX_F_TRACK_DIRTY_PAGES : 0);
3931#endif
3932 if (fOldFlags & PGMREGMMIO2RANGE_F_OVERLAPPING)
3933 {
3934 /*
3935 * We've replaced RAM, replace with zero pages.
3936 *
3937 * Note! This is where we might differ a little from a real system, because
3938 * it's likely to just show the RAM pages as they were before the
3939 * MMIO2 region was mapped here.
3940 */
3941 /* Only one chunk allowed when overlapping! */
3942 Assert(cChunks == 1);
3943 /* No NEM stuff should ever get here, see assertion in the mapping function. */
3944 AssertReturn(!VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
3945
3946 /* Restore the RAM pages we've replaced. */
3947 PPGMPAGE pPageDst = &pLookupRange->aPages[(pFirstRamRange->GCPhys - pLookupRange->GCPhys) >> GUEST_PAGE_SHIFT];
3948 uint32_t cPagesLeft = pFirstRamRange->cb >> GUEST_PAGE_SHIFT;
3949 pVM->pgm.s.cZeroPages += cPagesLeft;
3950 while (cPagesLeft-- > 0)
3951 {
3952 PGM_PAGE_INIT_ZERO(pPageDst, pVM, PGMPAGETYPE_RAM);
3953 pPageDst++;
3954 }
3955
3956 /* Update range state. */
3957 pFirstMmio2->fFlags &= ~(PGMREGMMIO2RANGE_F_OVERLAPPING | PGMREGMMIO2RANGE_F_MAPPED);
3958 pFirstMmio2->GCPhys = NIL_RTGCPHYS;
3959 Assert(pFirstRamRange->GCPhys == NIL_RTGCPHYS);
3960 Assert(pFirstRamRange->GCPhysLast == NIL_RTGCPHYS);
3961 }
3962 else
3963 {
3964 /*
3965 * Unlink the chunks related to the MMIO/MMIO2 region.
3966 */
3967 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3968 {
3969 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3970 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3971 Assert(pMmio2->idRamRange == pRamRange->idRange);
3972 Assert(pMmio2->GCPhys == pRamRange->GCPhys);
3973
3974#ifdef VBOX_WITH_NATIVE_NEM
3975 if (VM_IS_NEM_ENABLED(pVM)) /* Notify NEM. */
3976 {
3977 uint8_t u2State = UINT8_MAX;
3978 int rc = NEMR3NotifyPhysMmioExUnmap(pVM, pRamRange->GCPhys, pRamRange->cb, fNemFlags,
3979 NULL, pMmio2->pbR3, &u2State, &pRamRange->uNemRange);
3980 AssertLogRelMsgStmt(RT_SUCCESS(rc),
3981 ("NEMR3NotifyPhysMmioExUnmap failed: %Rrc - GCPhys=RGp LB %RGp fNemFlags=%#x pbR3=%p %s\n",
3982 rc, pRamRange->GCPhys, pRamRange->cb, fNemFlags, pMmio2->pbR3, pRamRange->pszDesc),
3983 rcRet = rc);
3984 if (u2State != UINT8_MAX)
3985 pgmPhysSetNemStateForPages(pRamRange->aPages, pRamRange->cb >> GUEST_PAGE_SHIFT, u2State);
3986 }
3987#endif
3988
3989 int rc = pgmR3PhysRamRangeRemoveLookup(pVM, pRamRange, &idxLookup);
3990 AssertLogRelMsgStmt(RT_SUCCESS(rc),
3991 ("pgmR3PhysRamRangeRemoveLookup failed: %Rrc - GCPhys=%RGp LB %RGp %s\n",
3992 rc, pRamRange->GCPhys, pRamRange->cb, pRamRange->pszDesc),
3993 rcRet = rc);
3994
3995 pMmio2->GCPhys = NIL_RTGCPHYS;
3996 pMmio2->fFlags &= ~(PGMREGMMIO2RANGE_F_OVERLAPPING | PGMREGMMIO2RANGE_F_MAPPED);
3997 Assert(pRamRange->GCPhys == NIL_RTGCPHYS);
3998 Assert(pRamRange->GCPhysLast == NIL_RTGCPHYS);
3999 }
4000 }
4001
4002 /* Force a PGM pool flush as guest ram references have been changed. */
4003 /** @todo not entirely SMP safe; assuming for now the guest takes care
4004 * of this internally (not touch mapped mmio while changing the
4005 * mapping). */
4006 PVMCPU pVCpu = VMMGetCpu(pVM);
4007 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
4008 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4009
4010 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
4011 /* pgmPhysInvalidRamRangeTlbs(pVM); - not necessary */
4012
4013 return rcRet;
4014}
4015
4016
4017/**
4018 * Unmaps an MMIO2 region.
4019 *
4020 * This is typically done when a guest / the bios / state loading changes the
4021 * PCI config. The replacing of base memory has the same restrictions as during
4022 * registration, of course.
4023 */
4024VMMR3_INT_DECL(int) PGMR3PhysMmio2Unmap(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, RTGCPHYS GCPhys)
4025{
4026 /*
4027 * Validate input
4028 */
4029 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
4030 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
4031 AssertReturn(hMmio2 != NIL_PGMMMIO2HANDLE, VERR_INVALID_HANDLE);
4032 if (GCPhys != NIL_RTGCPHYS)
4033 {
4034 AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER);
4035 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
4036 }
4037
4038 uint32_t cChunks = 0;
4039 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4040 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4041
4042
4043 /*
4044 * Take the PGM lock and call worker.
4045 */
4046 int rc = PGM_LOCK(pVM);
4047 AssertRCReturn(rc, rc);
4048
4049 rc = pgmR3PhysMmio2UnmapLocked(pVM, idxFirst, cChunks, GCPhys);
4050#ifdef VBOX_STRICT
4051 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
4052#endif
4053
4054 PGM_UNLOCK(pVM);
4055 return rc;
4056}
4057
4058
4059/**
4060 * Reduces the mapping size of a MMIO2 region.
4061 *
4062 * This is mainly for dealing with old saved states after changing the default
4063 * size of a mapping region. See PDMDevHlpMmio2Reduce and
4064 * PDMPCIDEV::pfnRegionLoadChangeHookR3.
4065 *
4066 * The region must not currently be mapped when making this call. The VM state
4067 * must be state restore or VM construction.
4068 *
4069 * @returns VBox status code.
4070 * @param pVM The cross context VM structure.
4071 * @param pDevIns The device instance owning the region.
4072 * @param hMmio2 The handle of the region to reduce.
4073 * @param cbRegion The new mapping size.
4074 */
4075VMMR3_INT_DECL(int) PGMR3PhysMmio2Reduce(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, RTGCPHYS cbRegion)
4076{
4077 /*
4078 * Validate input
4079 */
4080 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
4081 AssertReturn(hMmio2 != NIL_PGMMMIO2HANDLE && hMmio2 != 0 && hMmio2 <= RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges),
4082 VERR_INVALID_HANDLE);
4083 AssertReturn(cbRegion >= GUEST_PAGE_SIZE, VERR_INVALID_PARAMETER);
4084 AssertReturn(!(cbRegion & GUEST_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
4085
4086 PVMCPU const pVCpu = VMMGetCpu(pVM);
4087 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
4088
4089 VMSTATE const enmVmState = VMR3GetState(pVM);
4090 AssertLogRelMsgReturn( enmVmState == VMSTATE_CREATING
4091 || enmVmState == VMSTATE_LOADING,
4092 ("enmVmState=%d (%s)\n", enmVmState, VMR3GetStateName(enmVmState)),
4093 VERR_VM_INVALID_VM_STATE);
4094
4095 /*
4096 * Grab the PGM lock and validate the request properly.
4097 */
4098 int rc = PGM_LOCK(pVM);
4099 AssertRCReturn(rc, rc);
4100
4101 uint32_t cChunks = 0;
4102 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4103 if ((int32_t)idxFirst >= 0)
4104 {
4105 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
4106 PPGMRAMRANGE const pFirstRamRange = pVM->pgm.s.apMmio2RamRanges[idxFirst];
4107 if ( !(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_MAPPED)
4108 && pFirstMmio2->GCPhys == NIL_RTGCPHYS)
4109 {
4110 /*
4111 * NOTE! Current implementation does not support multiple ranges.
4112 * Implement when there is a real world need and thus a testcase.
4113 */
4114 if (cChunks == 1)
4115 {
4116 /*
4117 * The request has to be within the initial size.
4118 */
4119 if (cbRegion <= pFirstMmio2->cbReal)
4120 {
4121 /*
4122 * All we have to do is modify the size stored in the RAM range,
4123 * as it is the one used when mapping it and such.
4124 * The two page counts stored in PGMR0PERVM remain unchanged.
4125 */
4126 Log(("PGMR3PhysMmio2Reduce: %s changes from %#RGp bytes (%#RGp) to %#RGp bytes.\n",
4127 pFirstRamRange->pszDesc, pFirstRamRange->cb, pFirstMmio2->cbReal, cbRegion));
4128 pFirstRamRange->cb = cbRegion;
4129 rc = VINF_SUCCESS;
4130 }
4131 else
4132 {
4133 AssertLogRelMsgFailed(("MMIO2/%s: cbRegion=%#RGp > cbReal=%#RGp\n",
4134 pFirstRamRange->pszDesc, cbRegion, pFirstMmio2->cbReal));
4135 rc = VERR_OUT_OF_RANGE;
4136 }
4137 }
4138 else
4139 {
4140 AssertLogRelMsgFailed(("MMIO2/%s: more than one chunk: %d (flags=%#x)\n",
4141 pFirstRamRange->pszDesc, cChunks, pFirstMmio2->fFlags));
4142 rc = VERR_NOT_SUPPORTED;
4143 }
4144 }
4145 else
4146 {
4147 AssertLogRelMsgFailed(("MMIO2/%s: cannot change size of mapped range: %RGp..%RGp\n", pFirstRamRange->pszDesc,
4148 pFirstMmio2->GCPhys, pFirstMmio2->GCPhys + pFirstRamRange->cb - 1U));
4149 rc = VERR_WRONG_ORDER;
4150 }
4151 }
4152 else
4153 rc = (int32_t)idxFirst;
4154
4155 PGM_UNLOCK(pVM);
4156 return rc;
4157}
4158
4159
4160/**
4161 * Validates @a hMmio2, making sure it belongs to @a pDevIns.
4162 *
4163 * @returns VBox status code.
4164 * @param pVM The cross context VM structure.
4165 * @param pDevIns The device which allegedly owns @a hMmio2.
4166 * @param hMmio2 The handle to validate.
4167 */
4168VMMR3_INT_DECL(int) PGMR3PhysMmio2ValidateHandle(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
4169{
4170 /*
4171 * Validate input
4172 */
4173 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
4174 AssertPtrReturn(pDevIns, VERR_INVALID_POINTER);
4175
4176 /*
4177 * Just do this the simple way.
4178 */
4179 int rc = PGM_LOCK_VOID(pVM);
4180 AssertRCReturn(rc, rc);
4181 uint32_t cChunks;
4182 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4183 PGM_UNLOCK(pVM);
4184 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4185 return VINF_SUCCESS;
4186}
4187
4188
4189/**
4190 * Gets the mapping address of an MMIO2 region.
4191 *
4192 * @returns Mapping address, NIL_RTGCPHYS if not mapped or invalid handle.
4193 *
4194 * @param pVM The cross context VM structure.
4195 * @param pDevIns The device owning the MMIO2 handle.
4196 * @param hMmio2 The region handle.
4197 */
4198VMMR3_INT_DECL(RTGCPHYS) PGMR3PhysMmio2GetMappingAddress(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
4199{
4200 RTGCPHYS GCPhysRet = NIL_RTGCPHYS;
4201
4202 int rc = PGM_LOCK_VOID(pVM);
4203 AssertRCReturn(rc, NIL_RTGCPHYS);
4204
4205 uint32_t cChunks;
4206 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4207 if ((int32_t)idxFirst >= 0)
4208 GCPhysRet = pVM->pgm.s.aMmio2Ranges[idxFirst].GCPhys;
4209
4210 PGM_UNLOCK(pVM);
4211 return GCPhysRet;
4212}
4213
4214
4215/**
4216 * Worker for PGMR3PhysMmio2QueryAndResetDirtyBitmap.
4217 *
4218 * Called holding the PGM lock.
4219 */
4220static int pgmR3PhysMmio2QueryAndResetDirtyBitmapLocked(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
4221 void *pvBitmap, size_t cbBitmap)
4222{
4223 /*
4224 * Continue validation.
4225 */
4226 uint32_t cChunks;
4227 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4228 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4229 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
4230 AssertReturn(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES, VERR_INVALID_FUNCTION);
4231
4232 int rc = VINF_SUCCESS;
4233 if (cbBitmap || pvBitmap)
4234 {
4235 /*
4236 * Check the bitmap size and collect all the dirty flags.
4237 */
4238 RTGCPHYS cbTotal = 0;
4239 uint16_t fTotalDirty = 0;
4240 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4241 {
4242 /* Not using cbReal here, because NEM is not in on the creating, only the mapping. */
4243 cbTotal += pVM->pgm.s.apMmio2RamRanges[idx]->cb;
4244 fTotalDirty |= pVM->pgm.s.aMmio2Ranges[idx].fFlags;
4245 }
4246 size_t const cbTotalBitmap = RT_ALIGN_T(cbTotal, GUEST_PAGE_SIZE * 64, RTGCPHYS) / GUEST_PAGE_SIZE / 8;
4247
4248 AssertPtrReturn(pvBitmap, VERR_INVALID_POINTER);
4249 AssertReturn(RT_ALIGN_P(pvBitmap, sizeof(uint64_t)) == pvBitmap, VERR_INVALID_POINTER);
4250 AssertReturn(cbBitmap == cbTotalBitmap, VERR_INVALID_PARAMETER);
4251
4252#ifdef VBOX_WITH_PGM_NEM_MODE
4253 /*
4254 * If there is no physical handler we must be in NEM mode and NEM
4255 * taking care of the dirty bit collecting.
4256 */
4257 if (pFirstMmio2->pPhysHandlerR3 == NULL)
4258 {
4259/** @todo This does not integrate at all with --execute-all-in-iem, leaving the
4260 * screen blank when using it together with --driverless. Fixing this won't be
4261 * entirely easy as we take the PGM_PAGE_HNDL_PHYS_STATE_DISABLED page status to
4262 * mean a dirty page. */
4263 AssertReturn(VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
4264 uint8_t *pbBitmap = (uint8_t *)pvBitmap;
4265 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4266 {
4267 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
4268 size_t const cbBitmapChunk = (pRamRange->cb / GUEST_PAGE_SIZE + 7) / 8;
4269 Assert((RTGCPHYS)cbBitmapChunk * GUEST_PAGE_SIZE * 8 == pRamRange->cb);
4270 Assert(pRamRange->GCPhys == pVM->pgm.s.aMmio2Ranges[idx].GCPhys); /* (No MMIO2 inside RAM in NEM mode!)*/
4271 int rc2 = NEMR3PhysMmio2QueryAndResetDirtyBitmap(pVM, pRamRange->GCPhys, pRamRange->cb,
4272 pRamRange->uNemRange, pbBitmap, cbBitmapChunk);
4273 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4274 rc = rc2;
4275 pbBitmap += pRamRange->cb / GUEST_PAGE_SIZE / 8;
4276 }
4277 }
4278 else
4279#endif
4280 if (fTotalDirty & PGMREGMMIO2RANGE_F_IS_DIRTY)
4281 {
4282 if ( (pFirstMmio2->fFlags & (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4283 == (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4284 {
4285 /*
4286 * Reset each chunk, gathering dirty bits.
4287 */
4288 RT_BZERO(pvBitmap, cbBitmap); /* simpler for now. */
4289 for (uint32_t iChunk = 0, idx = idxFirst, iPageNo = 0; iChunk < cChunks; iChunk++, idx++)
4290 {
4291 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
4292 if (pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_DIRTY)
4293 {
4294 int rc2 = pgmHandlerPhysicalResetMmio2WithBitmap(pVM, pMmio2->GCPhys, pvBitmap, iPageNo);
4295 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4296 rc = rc2;
4297 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_IS_DIRTY;
4298 }
4299 iPageNo += pVM->pgm.s.apMmio2RamRanges[idx]->cb >> GUEST_PAGE_SHIFT;
4300 }
4301 }
4302 else
4303 {
4304 /*
4305 * If not mapped or tracking is disabled, we return the
4306 * PGMREGMMIO2RANGE_F_IS_DIRTY status for all pages. We cannot
4307 * get more accurate data than that after unmapping or disabling.
4308 */
4309 RT_BZERO(pvBitmap, cbBitmap);
4310 for (uint32_t iChunk = 0, idx = idxFirst, iPageNo = 0; iChunk < cChunks; iChunk++, idx++)
4311 {
4312 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
4313 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
4314 if (pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_DIRTY)
4315 {
4316 ASMBitSetRange(pvBitmap, iPageNo, iPageNo + (pRamRange->cb >> GUEST_PAGE_SHIFT));
4317 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_IS_DIRTY;
4318 }
4319 iPageNo += pRamRange->cb >> GUEST_PAGE_SHIFT;
4320 }
4321 }
4322 }
4323 /*
4324 * No dirty chunks.
4325 */
4326 else
4327 RT_BZERO(pvBitmap, cbBitmap);
4328 }
4329 /*
4330 * No bitmap. Reset the region if tracking is currently enabled.
4331 */
4332 else if ( (pFirstMmio2->fFlags & (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4333 == (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4334 {
4335#ifdef VBOX_WITH_PGM_NEM_MODE
4336 if (pFirstMmio2->pPhysHandlerR3 == NULL)
4337 {
4338 AssertReturn(VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
4339 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4340 {
4341 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
4342 Assert(pRamRange->GCPhys == pVM->pgm.s.aMmio2Ranges[idx].GCPhys); /* (No MMIO2 inside RAM in NEM mode!)*/
4343 int rc2 = NEMR3PhysMmio2QueryAndResetDirtyBitmap(pVM, pRamRange->GCPhys, pRamRange->cb,
4344 pRamRange->uNemRange, NULL, 0);
4345 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4346 rc = rc2;
4347 }
4348 }
4349 else
4350#endif
4351 {
4352 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4353 {
4354 pVM->pgm.s.aMmio2Ranges[idx].fFlags &= ~PGMREGMMIO2RANGE_F_IS_DIRTY;
4355 int rc2 = PGMHandlerPhysicalReset(pVM, pVM->pgm.s.aMmio2Ranges[idx].GCPhys);
4356 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4357 rc = rc2;
4358 }
4359 }
4360 }
4361
4362 return rc;
4363}
4364
4365
4366/**
4367 * Queries the dirty page bitmap and resets the monitoring.
4368 *
4369 * The PGMPHYS_MMIO2_FLAGS_TRACK_DIRTY_PAGES flag must be specified when
4370 * creating the range for this to work.
4371 *
4372 * @returns VBox status code.
4373 * @retval VERR_INVALID_FUNCTION if not created using
4374 * PGMPHYS_MMIO2_FLAGS_TRACK_DIRTY_PAGES.
4375 * @param pVM The cross context VM structure.
4376 * @param pDevIns The device owning the MMIO2 handle.
4377 * @param hMmio2 The region handle.
4378 * @param pvBitmap The output bitmap. Must be 8-byte aligned. Ignored
4379 * when @a cbBitmap is zero.
4380 * @param cbBitmap The size of the bitmap. Must be the size of the whole
4381 * MMIO2 range, rounded up to the nearest 8 bytes.
4382 * When zero only a reset is done.
4383 */
4384VMMR3_INT_DECL(int) PGMR3PhysMmio2QueryAndResetDirtyBitmap(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
4385 void *pvBitmap, size_t cbBitmap)
4386{
4387 /*
4388 * Do some basic validation before grapping the PGM lock and continuing.
4389 */
4390 AssertPtrReturn(pDevIns, VERR_INVALID_POINTER);
4391 AssertReturn(RT_ALIGN_Z(cbBitmap, sizeof(uint64_t)) == cbBitmap, VERR_INVALID_PARAMETER);
4392 int rc = PGM_LOCK(pVM);
4393 if (RT_SUCCESS(rc))
4394 {
4395 STAM_PROFILE_START(&pVM->pgm.s.StatMmio2QueryAndResetDirtyBitmap, a);
4396 rc = pgmR3PhysMmio2QueryAndResetDirtyBitmapLocked(pVM, pDevIns, hMmio2, pvBitmap, cbBitmap);
4397 STAM_PROFILE_STOP(&pVM->pgm.s.StatMmio2QueryAndResetDirtyBitmap, a);
4398 PGM_UNLOCK(pVM);
4399 }
4400 return rc;
4401}
4402
4403
4404/**
4405 * Worker for PGMR3PhysMmio2ControlDirtyPageTracking
4406 *
4407 * Called owning the PGM lock.
4408 */
4409static int pgmR3PhysMmio2ControlDirtyPageTrackingLocked(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, bool fEnabled)
4410{
4411 /*
4412 * Continue validation.
4413 */
4414 uint32_t cChunks;
4415 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4416 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4417 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
4418 AssertReturn(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES, VERR_INVALID_FUNCTION);
4419
4420#ifdef VBOX_WITH_PGM_NEM_MODE
4421 /*
4422 * This is a nop if NEM is responsible for doing the tracking, we simply
4423 * leave the tracking on all the time there.
4424 */
4425 if (pFirstMmio2->pPhysHandlerR3 == NULL)
4426 {
4427 AssertReturn(VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
4428 return VINF_SUCCESS;
4429 }
4430#endif
4431
4432 /*
4433 * Anything needing doing?
4434 */
4435 if (fEnabled != RT_BOOL(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4436 {
4437 LogFlowFunc(("fEnabled=%RTbool %s\n", fEnabled, pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc));
4438
4439 /*
4440 * Update the PGMREGMMIO2RANGE_F_TRACKING_ENABLED flag.
4441 */
4442 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4443 if (fEnabled)
4444 pVM->pgm.s.aMmio2Ranges[idx].fFlags |= PGMREGMMIO2RANGE_F_TRACKING_ENABLED;
4445 else
4446 pVM->pgm.s.aMmio2Ranges[idx].fFlags &= ~PGMREGMMIO2RANGE_F_TRACKING_ENABLED;
4447
4448 /*
4449 * Enable/disable handlers if currently mapped.
4450 *
4451 * We ignore status codes here as we've already changed the flags and
4452 * returning a failure status now would be confusing. Besides, the two
4453 * functions will continue past failures. As argued in the mapping code,
4454 * it's in the release log.
4455 */
4456 if (pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_MAPPED)
4457 {
4458 if (fEnabled)
4459 pgmR3PhysMmio2EnableDirtyPageTracing(pVM, idxFirst, cChunks);
4460 else
4461 pgmR3PhysMmio2DisableDirtyPageTracing(pVM, idxFirst, cChunks);
4462 }
4463 }
4464 else
4465 LogFlowFunc(("fEnabled=%RTbool %s - no change\n", fEnabled, pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc));
4466
4467 return VINF_SUCCESS;
4468}
4469
4470
4471/**
4472 * Controls the dirty page tracking for an MMIO2 range.
4473 *
4474 * @returns VBox status code.
4475 * @param pVM The cross context VM structure.
4476 * @param pDevIns The device owning the MMIO2 memory.
4477 * @param hMmio2 The handle of the region.
4478 * @param fEnabled The new tracking state.
4479 */
4480VMMR3_INT_DECL(int) PGMR3PhysMmio2ControlDirtyPageTracking(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, bool fEnabled)
4481{
4482 /*
4483 * Do some basic validation before grapping the PGM lock and continuing.
4484 */
4485 AssertPtrReturn(pDevIns, VERR_INVALID_POINTER);
4486 int rc = PGM_LOCK(pVM);
4487 if (RT_SUCCESS(rc))
4488 {
4489 rc = pgmR3PhysMmio2ControlDirtyPageTrackingLocked(pVM, pDevIns, hMmio2, fEnabled);
4490 PGM_UNLOCK(pVM);
4491 }
4492 return rc;
4493}
4494
4495
4496/**
4497 * Changes the region number of an MMIO2 region.
4498 *
4499 * This is only for dealing with save state issues, nothing else.
4500 *
4501 * @return VBox status code.
4502 *
4503 * @param pVM The cross context VM structure.
4504 * @param pDevIns The device owning the MMIO2 memory.
4505 * @param hMmio2 The handle of the region.
4506 * @param iNewRegion The new region index.
4507 *
4508 * @thread EMT(0)
4509 * @sa @bugref{9359}
4510 */
4511VMMR3_INT_DECL(int) PGMR3PhysMmio2ChangeRegionNo(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, uint32_t iNewRegion)
4512{
4513 /*
4514 * Validate input.
4515 */
4516 VM_ASSERT_EMT0_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
4517 VM_ASSERT_STATE_RETURN(pVM, VMSTATE_LOADING, VERR_VM_INVALID_VM_STATE);
4518 AssertReturn(iNewRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
4519
4520 int rc = PGM_LOCK(pVM);
4521 AssertRCReturn(rc, rc);
4522
4523 /* Validate and resolve the handle. */
4524 uint32_t cChunks;
4525 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4526 if ((int32_t)idxFirst >= 0)
4527 {
4528 /* Check that the new range number is unused. */
4529 PPGMREGMMIO2RANGE const pConflict = pgmR3PhysMmio2Find(pVM, pDevIns, pVM->pgm.s.aMmio2Ranges[idxFirst].iSubDev,
4530 iNewRegion);
4531 if (!pConflict)
4532 {
4533 /*
4534 * Make the change.
4535 */
4536 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4537 pVM->pgm.s.aMmio2Ranges[idx].iRegion = (uint8_t)iNewRegion;
4538 rc = VINF_SUCCESS;
4539 }
4540 else
4541 {
4542 AssertLogRelMsgFailed(("MMIO2/%s: iNewRegion=%d conflicts with %s\n", pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc,
4543 iNewRegion, pVM->pgm.s.apMmio2RamRanges[pConflict->idRamRange]->pszDesc));
4544 rc = VERR_RESOURCE_IN_USE;
4545 }
4546 }
4547 else
4548 rc = (int32_t)idxFirst;
4549
4550 PGM_UNLOCK(pVM);
4551 return rc;
4552}
4553
4554
4555
4556/*********************************************************************************************************************************
4557* ROM *
4558*********************************************************************************************************************************/
4559
4560/**
4561 * Worker for PGMR3PhysRomRegister.
4562 *
4563 * This is here to simplify lock management, i.e. the caller does all the
4564 * locking and we can simply return without needing to remember to unlock
4565 * anything first.
4566 *
4567 * @returns VBox status code.
4568 * @param pVM The cross context VM structure.
4569 * @param pDevIns The device instance owning the ROM.
4570 * @param GCPhys First physical address in the range.
4571 * Must be page aligned!
4572 * @param cb The size of the range (in bytes).
4573 * Must be page aligned!
4574 * @param pvBinary Pointer to the binary data backing the ROM image.
4575 * @param cbBinary The size of the binary data pvBinary points to.
4576 * This must be less or equal to @a cb.
4577 * @param fFlags Mask of flags. PGMPHYS_ROM_FLAGS_SHADOWED
4578 * and/or PGMPHYS_ROM_FLAGS_PERMANENT_BINARY.
4579 * @param pszDesc Pointer to description string. This must not be freed.
4580 */
4581static int pgmR3PhysRomRegisterLocked(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb,
4582 const void *pvBinary, uint32_t cbBinary, uint8_t fFlags, const char *pszDesc)
4583{
4584 /*
4585 * Validate input.
4586 */
4587 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
4588 AssertReturn(RT_ALIGN_T(GCPhys, GUEST_PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER);
4589 AssertReturn(RT_ALIGN_T(cb, GUEST_PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER);
4590 RTGCPHYS const GCPhysLast = GCPhys + (cb - 1);
4591 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
4592 AssertPtrReturn(pvBinary, VERR_INVALID_PARAMETER);
4593 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
4594 AssertReturn(!(fFlags & ~PGMPHYS_ROM_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
4595
4596 PVMCPU const pVCpu = VMMGetCpu(pVM);
4597 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
4598 VM_ASSERT_STATE_RETURN(pVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
4599
4600 const uint32_t cGuestPages = cb >> GUEST_PAGE_SHIFT;
4601 AssertReturn(cGuestPages <= PGM_MAX_PAGES_PER_ROM_RANGE, VERR_OUT_OF_RANGE);
4602
4603#ifdef VBOX_WITH_PGM_NEM_MODE
4604 const uint32_t cHostPages = RT_ALIGN_T(cb, HOST_PAGE_SIZE, RTGCPHYS) >> HOST_PAGE_SHIFT;
4605#endif
4606
4607 /*
4608 * Make sure we've got a free ROM range.
4609 */
4610 uint8_t const idRomRange = pVM->pgm.s.cRomRanges;
4611 AssertLogRelReturn(idRomRange < RT_ELEMENTS(pVM->pgm.s.apRomRanges), VERR_PGM_TOO_MANY_ROM_RANGES);
4612
4613 /*
4614 * Look thru the existing ROM range and make sure there aren't any
4615 * overlapping registration.
4616 */
4617 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
4618 for (uint32_t idx = 0; idx < cRomRanges; idx++)
4619 {
4620 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
4621 AssertLogRelMsgReturn( GCPhys > pRom->GCPhysLast
4622 || GCPhysLast < pRom->GCPhys,
4623 ("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n",
4624 GCPhys, GCPhysLast, pszDesc,
4625 pRom->GCPhys, pRom->GCPhysLast, pRom->pszDesc),
4626 VERR_PGM_RAM_CONFLICT);
4627 }
4628
4629 /*
4630 * Find the RAM location and check for conflicts.
4631 *
4632 * Conflict detection is a bit different than for RAM registration since a
4633 * ROM can be located within a RAM range. So, what we have to check for is
4634 * other memory types (other than RAM that is) and that we don't span more
4635 * than one RAM range (lazy).
4636 */
4637 uint32_t idxInsert = UINT32_MAX;
4638 PPGMRAMRANGE const pOverlappingRange = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxInsert);
4639 if (pOverlappingRange)
4640 {
4641 /* completely within? */
4642 AssertLogRelMsgReturn( GCPhys >= pOverlappingRange->GCPhys
4643 && GCPhysLast <= pOverlappingRange->GCPhysLast,
4644 ("%RGp-%RGp (%s) falls partly outside %RGp-%RGp (%s)\n",
4645 GCPhys, GCPhysLast, pszDesc,
4646 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
4647 VERR_PGM_RAM_CONFLICT);
4648
4649 /* Check that is isn't an ad hoc range, but a real RAM range. */
4650 AssertLogRelMsgReturn(!PGM_RAM_RANGE_IS_AD_HOC(pOverlappingRange),
4651 ("%RGp-%RGp (ROM/%s) mapping attempt in non-RAM range: %RGp-%RGp (%s)\n",
4652 GCPhys, GCPhysLast, pszDesc,
4653 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
4654 VERR_PGM_RAM_CONFLICT);
4655
4656 /* All the pages must be RAM pages. */
4657 PPGMPAGE pPage = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
4658 uint32_t cPagesLeft = cGuestPages;
4659 while (cPagesLeft-- > 0)
4660 {
4661 AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM,
4662 ("%RGp (%R[pgmpage]) isn't a RAM page - registering %RGp-%RGp (%s).\n",
4663 GCPhys + ((RTGCPHYS)cPagesLeft << GUEST_PAGE_SHIFT), pPage, GCPhys, GCPhysLast, pszDesc),
4664 VERR_PGM_RAM_CONFLICT);
4665 AssertLogRelMsgReturn(PGM_PAGE_IS_ZERO(pPage) || PGM_IS_IN_NEM_MODE(pVM),
4666 ("%RGp (%R[pgmpage]) is not a ZERO page - registering %RGp-%RGp (%s).\n",
4667 GCPhys + ((RTGCPHYS)cPagesLeft << GUEST_PAGE_SHIFT), pPage, GCPhys, GCPhysLast, pszDesc),
4668 VERR_PGM_UNEXPECTED_PAGE_STATE);
4669 pPage++;
4670 }
4671 }
4672
4673 /*
4674 * Update the base memory reservation if necessary.
4675 */
4676 uint32_t const cExtraBaseCost = (pOverlappingRange ? 0 : cGuestPages)
4677 + (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED ? cGuestPages : 0);
4678 if (cExtraBaseCost)
4679 {
4680 int rc = MMR3IncreaseBaseReservation(pVM, cExtraBaseCost);
4681 AssertRCReturn(rc, rc);
4682 }
4683
4684#ifdef VBOX_WITH_NATIVE_NEM
4685 /*
4686 * Early NEM notification before we've made any changes or anything.
4687 */
4688 uint32_t const fNemNotify = (pOverlappingRange ? NEM_NOTIFY_PHYS_ROM_F_REPLACE : 0)
4689 | (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED ? NEM_NOTIFY_PHYS_ROM_F_SHADOW : 0);
4690 uint8_t u2NemState = UINT8_MAX;
4691 uint32_t uNemRange = 0;
4692 if (VM_IS_NEM_ENABLED(pVM))
4693 {
4694 int rc = NEMR3NotifyPhysRomRegisterEarly(pVM, GCPhys, cGuestPages << GUEST_PAGE_SHIFT,
4695 pOverlappingRange
4696 ? PGM_RAMRANGE_CALC_PAGE_R3PTR(pOverlappingRange, GCPhys) : NULL,
4697 fNemNotify, &u2NemState,
4698 pOverlappingRange ? &pOverlappingRange->uNemRange : &uNemRange);
4699 AssertLogRelRCReturn(rc, rc);
4700 }
4701#endif
4702
4703 /*
4704 * Allocate memory for the virgin copy of the RAM. In simplified memory
4705 * mode, we allocate memory for any ad-hoc RAM range and for shadow pages.
4706 */
4707 int rc;
4708 PGMMALLOCATEPAGESREQ pReq = NULL;
4709#ifdef VBOX_WITH_PGM_NEM_MODE
4710 void *pvRam = NULL;
4711 void *pvAlt = NULL;
4712 if (PGM_IS_IN_NEM_MODE(pVM))
4713 {
4714 if (!pOverlappingRange)
4715 {
4716 rc = SUPR3PageAlloc(cHostPages, 0, &pvRam);
4717 if (RT_FAILURE(rc))
4718 return rc;
4719 }
4720 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
4721 {
4722 rc = SUPR3PageAlloc(cHostPages, 0, &pvAlt);
4723 if (RT_FAILURE(rc))
4724 {
4725 if (pvRam)
4726 SUPR3PageFree(pvRam, cHostPages);
4727 return rc;
4728 }
4729 }
4730 }
4731 else
4732#endif
4733 {
4734#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
4735 rc = GMMR3AllocatePagesPrepare(pVM, &pReq, cGuestPages, GMMACCOUNT_BASE);
4736 AssertRCReturn(rc, rc);
4737
4738 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++)
4739 {
4740 pReq->aPages[iPage].HCPhysGCPhys = GCPhys + (iPage << GUEST_PAGE_SHIFT);
4741 pReq->aPages[iPage].fZeroed = false;
4742 pReq->aPages[iPage].idPage = NIL_GMM_PAGEID;
4743 pReq->aPages[iPage].idSharedPage = NIL_GMM_PAGEID;
4744 }
4745
4746 rc = GMMR3AllocatePagesPerform(pVM, pReq);
4747 if (RT_FAILURE(rc))
4748 {
4749 GMMR3AllocatePagesCleanup(pReq);
4750 return rc;
4751 }
4752#endif
4753 }
4754
4755 /*
4756 * Allocate a RAM range if required.
4757 * Note! We don't clean up the RAM range here on failure, VM destruction does that.
4758 */
4759 rc = VINF_SUCCESS;
4760 PPGMRAMRANGE pRamRange = NULL;
4761 if (!pOverlappingRange)
4762 rc = pgmR3PhysAllocateRamRange(pVM, pVCpu, cGuestPages, PGM_RAM_RANGE_FLAGS_AD_HOC_ROM, &pRamRange);
4763 if (RT_SUCCESS(rc))
4764 {
4765 /*
4766 * Allocate a ROM range.
4767 * Note! We don't clean up the ROM range here on failure, VM destruction does that.
4768 */
4769#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
4770 if (!SUPR3IsDriverless())
4771 {
4772 PGMPHYSROMALLOCATERANGEREQ RomRangeReq;
4773 RomRangeReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
4774 RomRangeReq.Hdr.cbReq = sizeof(RomRangeReq);
4775 RomRangeReq.cbGuestPage = GUEST_PAGE_SIZE;
4776 RomRangeReq.cGuestPages = cGuestPages;
4777 RomRangeReq.idRomRange = idRomRange;
4778 RomRangeReq.fFlags = fFlags;
4779 rc = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_ROM_ALLOCATE_RANGE, 0 /*u64Arg*/, &RomRangeReq.Hdr);
4780 }
4781 else
4782#endif
4783 rc = pgmPhysRomRangeAllocCommon(pVM, cGuestPages, idRomRange, fFlags);
4784 }
4785 if (RT_SUCCESS(rc))
4786 {
4787 /*
4788 * Initialize and map the RAM range (if required).
4789 */
4790 PPGMROMRANGE const pRomRange = pVM->pgm.s.apRomRanges[idRomRange];
4791 AssertPtr(pRomRange);
4792 uint32_t const idxFirstRamPage = pOverlappingRange ? (GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT : 0;
4793 PPGMROMPAGE pRomPage = &pRomRange->aPages[0];
4794 if (!pOverlappingRange)
4795 {
4796 /* Initialize the new RAM range and insert it into the lookup table. */
4797 pRamRange->pszDesc = pszDesc;
4798#ifdef VBOX_WITH_NATIVE_NEM
4799 pRamRange->uNemRange = uNemRange;
4800#endif
4801
4802 PPGMPAGE pRamPage = &pRamRange->aPages[idxFirstRamPage];
4803#ifdef VBOX_WITH_PGM_NEM_MODE
4804 if (PGM_IS_IN_NEM_MODE(pVM))
4805 {
4806 AssertPtr(pvRam); Assert(pReq == NULL);
4807 pRamRange->pbR3 = (uint8_t *)pvRam;
4808 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4809 {
4810 PGM_PAGE_INIT(pRamPage, UINT64_C(0x0000fffffffff000), NIL_GMM_PAGEID,
4811 PGMPAGETYPE_ROM, PGM_PAGE_STATE_ALLOCATED);
4812 pRomPage->Virgin = *pRamPage;
4813 }
4814 }
4815 else
4816#endif
4817 {
4818#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
4819 Assert(!pRamRange->pbR3); Assert(!pvRam);
4820 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4821 {
4822 PGM_PAGE_INIT(pRamPage,
4823 pReq->aPages[iPage].HCPhysGCPhys,
4824 pReq->aPages[iPage].idPage,
4825 PGMPAGETYPE_ROM,
4826 PGM_PAGE_STATE_ALLOCATED);
4827
4828 pRomPage->Virgin = *pRamPage;
4829 }
4830#endif
4831 }
4832
4833 pVM->pgm.s.cAllPages += cGuestPages;
4834 pVM->pgm.s.cPrivatePages += cGuestPages;
4835
4836 rc = pgmR3PhysRamRangeInsertLookup(pVM, pRamRange, GCPhys, &idxInsert);
4837 }
4838 else
4839 {
4840 /* Insert the ROM into an existing RAM range. */
4841 PPGMPAGE pRamPage = &pOverlappingRange->aPages[idxFirstRamPage];
4842#ifdef VBOX_WITH_PGM_NEM_MODE
4843 if (PGM_IS_IN_NEM_MODE(pVM))
4844 {
4845 Assert(pvRam == NULL); Assert(pReq == NULL);
4846 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4847 {
4848 Assert(PGM_PAGE_GET_HCPHYS(pRamPage) == UINT64_C(0x0000fffffffff000) || PGM_PAGE_GET_HCPHYS(pRamPage) == 0);
4849 Assert(PGM_PAGE_GET_PAGEID(pRamPage) == NIL_GMM_PAGEID);
4850 Assert(PGM_PAGE_GET_STATE(pRamPage) == PGM_PAGE_STATE_ALLOCATED);
4851 PGM_PAGE_SET_TYPE(pVM, pRamPage, PGMPAGETYPE_ROM);
4852 PGM_PAGE_SET_STATE(pVM, pRamPage, PGM_PAGE_STATE_ALLOCATED);
4853 PGM_PAGE_SET_PDE_TYPE(pVM, pRamPage, PGM_PAGE_PDE_TYPE_DONTCARE);
4854 PGM_PAGE_SET_PTE_INDEX(pVM, pRamPage, 0);
4855 PGM_PAGE_SET_TRACKING(pVM, pRamPage, 0);
4856
4857 pRomPage->Virgin = *pRamPage;
4858 }
4859 }
4860 else
4861#endif
4862 {
4863#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
4864 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4865 {
4866 PGM_PAGE_SET_TYPE(pVM, pRamPage, PGMPAGETYPE_ROM);
4867 PGM_PAGE_SET_HCPHYS(pVM, pRamPage, pReq->aPages[iPage].HCPhysGCPhys);
4868 PGM_PAGE_SET_STATE(pVM, pRamPage, PGM_PAGE_STATE_ALLOCATED);
4869 PGM_PAGE_SET_PAGEID(pVM, pRamPage, pReq->aPages[iPage].idPage);
4870 PGM_PAGE_SET_PDE_TYPE(pVM, pRamPage, PGM_PAGE_PDE_TYPE_DONTCARE);
4871 PGM_PAGE_SET_PTE_INDEX(pVM, pRamPage, 0);
4872 PGM_PAGE_SET_TRACKING(pVM, pRamPage, 0);
4873
4874 pRomPage->Virgin = *pRamPage;
4875 }
4876 pVM->pgm.s.cZeroPages -= cGuestPages;
4877 pVM->pgm.s.cPrivatePages += cGuestPages;
4878#endif
4879 }
4880 pRamRange = pOverlappingRange;
4881 }
4882
4883 if (RT_SUCCESS(rc))
4884 {
4885#ifdef VBOX_WITH_NATIVE_NEM
4886 /* Set the NEM state of the pages if needed. */
4887 if (u2NemState != UINT8_MAX)
4888 pgmPhysSetNemStateForPages(&pRamRange->aPages[idxFirstRamPage], cGuestPages, u2NemState);
4889#endif
4890
4891 /* Flush physical page map TLB. */
4892 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
4893
4894 /*
4895 * Register the ROM access handler.
4896 */
4897 rc = PGMHandlerPhysicalRegister(pVM, GCPhys, GCPhysLast, pVM->pgm.s.hRomPhysHandlerType, idRomRange, pszDesc);
4898 if (RT_SUCCESS(rc))
4899 {
4900 /*
4901 * Copy the image over to the virgin pages.
4902 * This must be done after linking in the RAM range.
4903 */
4904 size_t cbBinaryLeft = cbBinary;
4905 PPGMPAGE pRamPage = &pRamRange->aPages[idxFirstRamPage];
4906 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++)
4907 {
4908 void *pvDstPage;
4909 rc = pgmPhysPageMap(pVM, pRamPage, GCPhys + (iPage << GUEST_PAGE_SHIFT), &pvDstPage);
4910 if (RT_FAILURE(rc))
4911 {
4912 VMSetError(pVM, rc, RT_SRC_POS, "Failed to map virgin ROM page at %RGp", GCPhys);
4913 break;
4914 }
4915 if (cbBinaryLeft >= GUEST_PAGE_SIZE)
4916 {
4917 memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << GUEST_PAGE_SHIFT), GUEST_PAGE_SIZE);
4918 cbBinaryLeft -= GUEST_PAGE_SIZE;
4919 }
4920 else
4921 {
4922 RT_BZERO(pvDstPage, GUEST_PAGE_SIZE); /* (shouldn't be necessary, but can't hurt either) */
4923 if (cbBinaryLeft > 0)
4924 {
4925 memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << GUEST_PAGE_SHIFT), cbBinaryLeft);
4926 cbBinaryLeft = 0;
4927 }
4928 }
4929 }
4930 if (RT_SUCCESS(rc))
4931 {
4932 /*
4933 * Initialize the ROM range.
4934 * Note that the Virgin member of the pages has already been initialized above.
4935 */
4936 Assert(pRomRange->cb == cb);
4937 Assert(pRomRange->fFlags == fFlags);
4938 Assert(pRomRange->idSavedState == UINT8_MAX);
4939 pRomRange->GCPhys = GCPhys;
4940 pRomRange->GCPhysLast = GCPhysLast;
4941 pRomRange->cbOriginal = cbBinary;
4942 pRomRange->pszDesc = pszDesc;
4943#ifdef VBOX_WITH_PGM_NEM_MODE
4944 pRomRange->pbR3Alternate = (uint8_t *)pvAlt;
4945#endif
4946 pRomRange->pvOriginal = fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY
4947 ? pvBinary : RTMemDup(pvBinary, cbBinary);
4948 if (pRomRange->pvOriginal)
4949 {
4950 for (unsigned iPage = 0; iPage < cGuestPages; iPage++)
4951 {
4952 PPGMROMPAGE const pPage = &pRomRange->aPages[iPage];
4953 pPage->enmProt = PGMROMPROT_READ_ROM_WRITE_IGNORE;
4954#ifdef VBOX_WITH_PGM_NEM_MODE
4955 if (PGM_IS_IN_NEM_MODE(pVM))
4956 PGM_PAGE_INIT(&pPage->Shadow, UINT64_C(0x0000fffffffff000), NIL_GMM_PAGEID,
4957 PGMPAGETYPE_ROM_SHADOW, PGM_PAGE_STATE_ALLOCATED);
4958 else
4959#endif
4960 PGM_PAGE_INIT_ZERO(&pPage->Shadow, pVM, PGMPAGETYPE_ROM_SHADOW);
4961 }
4962
4963 /* update the page count stats for the shadow pages. */
4964 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
4965 {
4966 if (PGM_IS_IN_NEM_MODE(pVM))
4967 pVM->pgm.s.cPrivatePages += cGuestPages;
4968 else
4969 pVM->pgm.s.cZeroPages += cGuestPages;
4970 pVM->pgm.s.cAllPages += cGuestPages;
4971 }
4972
4973#ifdef VBOX_WITH_NATIVE_NEM
4974 /*
4975 * Notify NEM again.
4976 */
4977 if (VM_IS_NEM_ENABLED(pVM))
4978 {
4979 u2NemState = UINT8_MAX;
4980 rc = NEMR3NotifyPhysRomRegisterLate(pVM, GCPhys, cb, PGM_RAMRANGE_CALC_PAGE_R3PTR(pRamRange, GCPhys),
4981 fNemNotify, &u2NemState, &pRamRange->uNemRange);
4982 if (u2NemState != UINT8_MAX)
4983 pgmPhysSetNemStateForPages(&pRamRange->aPages[idxFirstRamPage], cGuestPages, u2NemState);
4984 }
4985 else
4986#endif
4987 GMMR3AllocatePagesCleanup(pReq);
4988 if (RT_SUCCESS(rc))
4989 {
4990 /*
4991 * Done!
4992 */
4993#ifdef VBOX_STRICT
4994 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
4995#endif
4996 return rc;
4997 }
4998
4999 /*
5000 * bail out
5001 */
5002#ifdef VBOX_WITH_NATIVE_NEM
5003 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
5004 {
5005 Assert(VM_IS_NEM_ENABLED(pVM));
5006 pVM->pgm.s.cPrivatePages -= cGuestPages;
5007 pVM->pgm.s.cAllPages -= cGuestPages;
5008 }
5009#endif
5010 }
5011 else
5012 rc = VERR_NO_MEMORY;
5013 }
5014
5015 int rc2 = PGMHandlerPhysicalDeregister(pVM, GCPhys);
5016 AssertRC(rc2);
5017 }
5018
5019 idxInsert -= 1;
5020 if (!pOverlappingRange)
5021 pgmR3PhysRamRangeRemoveLookup(pVM, pRamRange, &idxInsert);
5022 }
5023 /* else: lookup insertion failed. */
5024
5025 if (pOverlappingRange)
5026 {
5027 PPGMPAGE pRamPage = &pOverlappingRange->aPages[idxFirstRamPage];
5028#ifdef VBOX_WITH_PGM_NEM_MODE
5029 if (PGM_IS_IN_NEM_MODE(pVM))
5030 {
5031 Assert(pvRam == NULL); Assert(pReq == NULL);
5032 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
5033 {
5034 Assert(PGM_PAGE_GET_HCPHYS(pRamPage) == UINT64_C(0x0000fffffffff000) || PGM_PAGE_GET_HCPHYS(pRamPage) == 0);
5035 Assert(PGM_PAGE_GET_PAGEID(pRamPage) == NIL_GMM_PAGEID);
5036 Assert(PGM_PAGE_GET_STATE(pRamPage) == PGM_PAGE_STATE_ALLOCATED);
5037 PGM_PAGE_SET_TYPE(pVM, pRamPage, PGMPAGETYPE_RAM);
5038 PGM_PAGE_SET_STATE(pVM, pRamPage, PGM_PAGE_STATE_ALLOCATED);
5039 }
5040 }
5041 else
5042#endif
5043 {
5044#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5045 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++)
5046 PGM_PAGE_INIT_ZERO(pRamPage, pVM, PGMPAGETYPE_RAM);
5047 pVM->pgm.s.cZeroPages += cGuestPages;
5048 pVM->pgm.s.cPrivatePages -= cGuestPages;
5049#endif
5050 }
5051 }
5052 }
5053 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
5054 pgmPhysInvalidRamRangeTlbs(pVM);
5055
5056#ifdef VBOX_WITH_PGM_NEM_MODE
5057 if (PGM_IS_IN_NEM_MODE(pVM))
5058 {
5059 Assert(!pReq);
5060 if (pvRam)
5061 SUPR3PageFree(pvRam, cHostPages);
5062 if (pvAlt)
5063 SUPR3PageFree(pvAlt, cHostPages);
5064 }
5065 else
5066#endif
5067 {
5068#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5069 GMMR3FreeAllocatedPages(pVM, pReq);
5070 GMMR3AllocatePagesCleanup(pReq);
5071#endif
5072 }
5073
5074 /* We don't bother to actually free either the ROM nor the RAM ranges
5075 themselves, as already mentioned above, we'll leave that to the VM
5076 termination cleanup code. */
5077 return rc;
5078}
5079
5080
5081/**
5082 * Registers a ROM image.
5083 *
5084 * Shadowed ROM images requires double the amount of backing memory, so,
5085 * don't use that unless you have to. Shadowing of ROM images is process
5086 * where we can select where the reads go and where the writes go. On real
5087 * hardware the chipset provides means to configure this. We provide
5088 * PGMR3PhysRomProtect() for this purpose.
5089 *
5090 * A read-only copy of the ROM image will always be kept around while we
5091 * will allocate RAM pages for the changes on demand (unless all memory
5092 * is configured to be preallocated).
5093 *
5094 * @returns VBox status code.
5095 * @param pVM The cross context VM structure.
5096 * @param pDevIns The device instance owning the ROM.
5097 * @param GCPhys First physical address in the range.
5098 * Must be page aligned!
5099 * @param cb The size of the range (in bytes).
5100 * Must be page aligned!
5101 * @param pvBinary Pointer to the binary data backing the ROM image.
5102 * @param cbBinary The size of the binary data pvBinary points to.
5103 * This must be less or equal to @a cb.
5104 * @param fFlags Mask of flags, PGMPHYS_ROM_FLAGS_XXX.
5105 * @param pszDesc Pointer to description string. This must not be freed.
5106 *
5107 * @remark There is no way to remove the rom, automatically on device cleanup or
5108 * manually from the device yet. This isn't difficult in any way, it's
5109 * just not something we expect to be necessary for a while.
5110 */
5111VMMR3DECL(int) PGMR3PhysRomRegister(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb,
5112 const void *pvBinary, uint32_t cbBinary, uint8_t fFlags, const char *pszDesc)
5113{
5114 Log(("PGMR3PhysRomRegister: pDevIns=%p GCPhys=%RGp(-%RGp) cb=%RGp pvBinary=%p cbBinary=%#x fFlags=%#x pszDesc=%s\n",
5115 pDevIns, GCPhys, GCPhys + cb, cb, pvBinary, cbBinary, fFlags, pszDesc));
5116 PGM_LOCK_VOID(pVM);
5117
5118 int rc = pgmR3PhysRomRegisterLocked(pVM, pDevIns, GCPhys, cb, pvBinary, cbBinary, fFlags, pszDesc);
5119
5120 PGM_UNLOCK(pVM);
5121 return rc;
5122}
5123
5124
5125/**
5126 * Called by PGMR3MemSetup to reset the shadow, switch to the virgin, and verify
5127 * that the virgin part is untouched.
5128 *
5129 * This is done after the normal memory has been cleared.
5130 *
5131 * ASSUMES that the caller owns the PGM lock.
5132 *
5133 * @param pVM The cross context VM structure.
5134 */
5135int pgmR3PhysRomReset(PVM pVM)
5136{
5137 PGM_LOCK_ASSERT_OWNER(pVM);
5138 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
5139 for (uint32_t idx = 0; idx < cRomRanges; idx++)
5140 {
5141 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
5142 uint32_t const cGuestPages = pRom->cb >> GUEST_PAGE_SHIFT;
5143
5144 if (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
5145 {
5146 /*
5147 * Reset the physical handler.
5148 */
5149 int rc = PGMR3PhysRomProtect(pVM, pRom->GCPhys, pRom->cb, PGMROMPROT_READ_ROM_WRITE_IGNORE);
5150 AssertRCReturn(rc, rc);
5151
5152 /*
5153 * What we do with the shadow pages depends on the memory
5154 * preallocation option. If not enabled, we'll just throw
5155 * out all the dirty pages and replace them by the zero page.
5156 */
5157#ifdef VBOX_WITH_PGM_NEM_MODE
5158 if (PGM_IS_IN_NEM_MODE(pVM))
5159 {
5160 /* Clear all the shadow pages (currently using alternate backing). */
5161 RT_BZERO(pRom->pbR3Alternate, pRom->cb);
5162 }
5163# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5164 else
5165# endif
5166#endif
5167#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5168 if (!pVM->pgm.s.fRamPreAlloc)
5169 {
5170 /* Free the dirty pages. */
5171 uint32_t cPendingPages = 0;
5172 PGMMFREEPAGESREQ pReq;
5173 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
5174 AssertRCReturn(rc, rc);
5175
5176 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++)
5177 if ( !PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow)
5178 && !PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow))
5179 {
5180 Assert(PGM_PAGE_GET_STATE(&pRom->aPages[iPage].Shadow) == PGM_PAGE_STATE_ALLOCATED);
5181 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, &pRom->aPages[iPage].Shadow,
5182 pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT),
5183 (PGMPAGETYPE)PGM_PAGE_GET_TYPE(&pRom->aPages[iPage].Shadow));
5184 AssertLogRelRCReturn(rc, rc);
5185 }
5186
5187 if (cPendingPages)
5188 {
5189 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
5190 AssertLogRelRCReturn(rc, rc);
5191 }
5192 GMMR3FreePagesCleanup(pReq);
5193 }
5194 else
5195 {
5196 /* clear all the shadow pages. */
5197 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++)
5198 {
5199 if (PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow))
5200 continue;
5201 Assert(!PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow));
5202 void *pvDstPage;
5203 RTGCPHYS const GCPhys = pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT);
5204 rc = pgmPhysPageMakeWritableAndMap(pVM, &pRom->aPages[iPage].Shadow, GCPhys, &pvDstPage);
5205 if (RT_FAILURE(rc))
5206 break;
5207 RT_BZERO(pvDstPage, GUEST_PAGE_SIZE);
5208 }
5209 AssertRCReturn(rc, rc);
5210 }
5211#endif
5212 }
5213
5214 /*
5215 * Restore the original ROM pages after a saved state load.
5216 * Also, in strict builds check that ROM pages remain unmodified.
5217 */
5218#ifndef VBOX_STRICT
5219 if (pVM->pgm.s.fRestoreRomPagesOnReset)
5220#endif
5221 {
5222 size_t cbSrcLeft = pRom->cbOriginal;
5223 uint8_t const *pbSrcPage = (uint8_t const *)pRom->pvOriginal;
5224 uint32_t cRestored = 0;
5225 for (uint32_t iPage = 0; iPage < cGuestPages && cbSrcLeft > 0; iPage++, pbSrcPage += GUEST_PAGE_SIZE)
5226 {
5227 RTGCPHYS const GCPhys = pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT);
5228 PPGMPAGE const pPage = pgmPhysGetPage(pVM, GCPhys);
5229 void const *pvDstPage = NULL;
5230 int rc = pgmPhysPageMapReadOnly(pVM, pPage, GCPhys, &pvDstPage);
5231 if (RT_FAILURE(rc))
5232 break;
5233
5234 if (memcmp(pvDstPage, pbSrcPage, RT_MIN(cbSrcLeft, GUEST_PAGE_SIZE)))
5235 {
5236 if (pVM->pgm.s.fRestoreRomPagesOnReset)
5237 {
5238 void *pvDstPageW = NULL;
5239 rc = pgmPhysPageMap(pVM, pPage, GCPhys, &pvDstPageW);
5240 AssertLogRelRCReturn(rc, rc);
5241 memcpy(pvDstPageW, pbSrcPage, RT_MIN(cbSrcLeft, GUEST_PAGE_SIZE));
5242 cRestored++;
5243 }
5244 else
5245 LogRel(("pgmR3PhysRomReset: %RGp: ROM page changed (%s)\n", GCPhys, pRom->pszDesc));
5246 }
5247 cbSrcLeft -= RT_MIN(cbSrcLeft, GUEST_PAGE_SIZE);
5248 }
5249 if (cRestored > 0)
5250 LogRel(("PGM: ROM \"%s\": Reloaded %u of %u pages.\n", pRom->pszDesc, cRestored, cGuestPages));
5251 }
5252 }
5253
5254 /* Clear the ROM restore flag now as we only need to do this once after
5255 loading saved state. */
5256 pVM->pgm.s.fRestoreRomPagesOnReset = false;
5257
5258 return VINF_SUCCESS;
5259}
5260
5261
5262/**
5263 * Called by PGMR3Term to free resources.
5264 *
5265 * ASSUMES that the caller owns the PGM lock.
5266 *
5267 * @param pVM The cross context VM structure.
5268 */
5269void pgmR3PhysRomTerm(PVM pVM)
5270{
5271 /*
5272 * Free the heap copy of the original bits.
5273 */
5274 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
5275 for (uint32_t idx = 0; idx < cRomRanges; idx++)
5276 {
5277 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
5278 if ( pRom->pvOriginal
5279 && !(pRom->fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY))
5280 {
5281 RTMemFree((void *)pRom->pvOriginal);
5282 pRom->pvOriginal = NULL;
5283 }
5284 }
5285}
5286
5287
5288/**
5289 * Change the shadowing of a range of ROM pages.
5290 *
5291 * This is intended for implementing chipset specific memory registers
5292 * and will not be very strict about the input. It will silently ignore
5293 * any pages that are not the part of a shadowed ROM.
5294 *
5295 * @returns VBox status code.
5296 * @retval VINF_PGM_SYNC_CR3
5297 *
5298 * @param pVM The cross context VM structure.
5299 * @param GCPhys Where to start. Page aligned.
5300 * @param cb How much to change. Page aligned.
5301 * @param enmProt The new ROM protection.
5302 */
5303VMMR3DECL(int) PGMR3PhysRomProtect(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, PGMROMPROT enmProt)
5304{
5305 LogFlow(("PGMR3PhysRomProtect: GCPhys=%RGp cb=%RGp enmProt=%d\n", GCPhys, cb, enmProt));
5306
5307 /*
5308 * Check input
5309 */
5310 if (!cb)
5311 return VINF_SUCCESS;
5312 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
5313 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
5314 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
5315 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
5316 AssertReturn(enmProt >= PGMROMPROT_INVALID && enmProt <= PGMROMPROT_END, VERR_INVALID_PARAMETER);
5317
5318 /*
5319 * Process the request.
5320 */
5321 PGM_LOCK_VOID(pVM);
5322 int rc = VINF_SUCCESS;
5323 bool fFlushTLB = false;
5324 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
5325 for (uint32_t idx = 0; idx < cRomRanges; idx++)
5326 {
5327 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
5328 if ( GCPhys <= pRom->GCPhysLast
5329 && GCPhysLast >= pRom->GCPhys
5330 && (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED))
5331 {
5332 /*
5333 * Iterate the relevant pages and make necessary the changes.
5334 */
5335#ifdef VBOX_WITH_NATIVE_NEM
5336 PPGMRAMRANGE const pRam = pgmPhysGetRange(pVM, GCPhys);
5337 AssertPtrReturn(pRam, VERR_INTERNAL_ERROR_3);
5338#endif
5339 bool fChanges = false;
5340 uint32_t const cPages = pRom->GCPhysLast <= GCPhysLast
5341 ? pRom->cb >> GUEST_PAGE_SHIFT
5342 : (GCPhysLast - pRom->GCPhys + 1) >> GUEST_PAGE_SHIFT;
5343 for (uint32_t iPage = (GCPhys - pRom->GCPhys) >> GUEST_PAGE_SHIFT;
5344 iPage < cPages;
5345 iPage++)
5346 {
5347 PPGMROMPAGE pRomPage = &pRom->aPages[iPage];
5348 if (PGMROMPROT_IS_ROM(pRomPage->enmProt) != PGMROMPROT_IS_ROM(enmProt))
5349 {
5350 fChanges = true;
5351
5352 /* flush references to the page. */
5353 RTGCPHYS const GCPhysPage = pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT);
5354 PPGMPAGE pRamPage = pgmPhysGetPage(pVM, GCPhysPage);
5355 int rc2 = pgmPoolTrackUpdateGCPhys(pVM, GCPhysPage, pRamPage, true /*fFlushPTEs*/, &fFlushTLB);
5356 if (rc2 != VINF_SUCCESS && (rc == VINF_SUCCESS || RT_FAILURE(rc2)))
5357 rc = rc2;
5358#ifdef VBOX_WITH_NATIVE_NEM
5359 uint8_t u2State = PGM_PAGE_GET_NEM_STATE(pRamPage);
5360#endif
5361
5362 PPGMPAGE pOld = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Virgin : &pRomPage->Shadow;
5363 PPGMPAGE pNew = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Shadow : &pRomPage->Virgin;
5364
5365 *pOld = *pRamPage;
5366 *pRamPage = *pNew;
5367 /** @todo preserve the volatile flags (handlers) when these have been moved out of HCPhys! */
5368
5369#ifdef VBOX_WITH_NATIVE_NEM
5370# ifdef VBOX_WITH_PGM_NEM_MODE
5371 /* In simplified mode we have to switch the page data around too. */
5372 if (PGM_IS_IN_NEM_MODE(pVM))
5373 {
5374 uint8_t abPage[GUEST_PAGE_SIZE];
5375 uint8_t * const pbRamPage = PGM_RAMRANGE_CALC_PAGE_R3PTR(pRam, GCPhysPage);
5376 memcpy(abPage, &pRom->pbR3Alternate[(size_t)iPage << GUEST_PAGE_SHIFT], sizeof(abPage));
5377 memcpy(&pRom->pbR3Alternate[(size_t)iPage << GUEST_PAGE_SHIFT], pbRamPage, sizeof(abPage));
5378 memcpy(pbRamPage, abPage, sizeof(abPage));
5379 }
5380# endif
5381 /* Tell NEM about the backing and protection change. */
5382 if (VM_IS_NEM_ENABLED(pVM))
5383 {
5384 PGMPAGETYPE enmType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pNew);
5385 NEMHCNotifyPhysPageChanged(pVM, GCPhys, PGM_PAGE_GET_HCPHYS(pOld), PGM_PAGE_GET_HCPHYS(pNew),
5386 PGM_RAMRANGE_CALC_PAGE_R3PTR(pRam, GCPhysPage),
5387 pgmPhysPageCalcNemProtection(pRamPage, enmType), enmType, &u2State);
5388 PGM_PAGE_SET_NEM_STATE(pRamPage, u2State);
5389 }
5390#endif
5391 }
5392 pRomPage->enmProt = enmProt;
5393 }
5394
5395 /*
5396 * Reset the access handler if we made changes, no need to optimize this.
5397 */
5398 if (fChanges)
5399 {
5400 int rc2 = PGMHandlerPhysicalReset(pVM, pRom->GCPhys);
5401 if (RT_FAILURE(rc2))
5402 {
5403 PGM_UNLOCK(pVM);
5404 AssertRC(rc);
5405 return rc2;
5406 }
5407
5408 /* Explicitly flush IEM. Not sure if this is really necessary, but better
5409 be on the safe side. This shouldn't be a high volume flush source. */
5410 IEMTlbInvalidateAllPhysicalAllCpus(pVM, NIL_VMCPUID, IEMTLBPHYSFLUSHREASON_ROM_PROTECT);
5411 }
5412
5413 /* Advance - cb isn't updated. */
5414 GCPhys = pRom->GCPhys + (cPages << GUEST_PAGE_SHIFT);
5415 }
5416 }
5417 PGM_UNLOCK(pVM);
5418 if (fFlushTLB)
5419 PGM_INVL_ALL_VCPU_TLBS(pVM);
5420
5421 return rc;
5422}
5423
5424
5425
5426/*********************************************************************************************************************************
5427* Ballooning *
5428*********************************************************************************************************************************/
5429
5430#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD))
5431
5432/**
5433 * Rendezvous callback used by PGMR3ChangeMemBalloon that changes the memory balloon size
5434 *
5435 * This is only called on one of the EMTs while the other ones are waiting for
5436 * it to complete this function.
5437 *
5438 * @returns VINF_SUCCESS (VBox strict status code).
5439 * @param pVM The cross context VM structure.
5440 * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused.
5441 * @param pvUser User parameter
5442 */
5443static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysChangeMemBalloonRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
5444{
5445 uintptr_t *paUser = (uintptr_t *)pvUser;
5446 bool fInflate = !!paUser[0];
5447 unsigned cPages = paUser[1];
5448 RTGCPHYS *paPhysPage = (RTGCPHYS *)paUser[2];
5449 int rc = VINF_SUCCESS;
5450
5451 Log(("pgmR3PhysChangeMemBalloonRendezvous: %s %x pages\n", (fInflate) ? "inflate" : "deflate", cPages));
5452 PGM_LOCK_VOID(pVM);
5453
5454 if (fInflate)
5455 {
5456 /* Flush the PGM pool cache as we might have stale references to pages that we just freed. */
5457 pgmR3PoolClearAllRendezvous(pVM, pVCpu, NULL);
5458
5459 /* Replace pages with ZERO pages. */
5460 uint32_t cPendingPages = 0;
5461 PGMMFREEPAGESREQ pReq = NULL;
5462 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
5463 if (RT_FAILURE(rc))
5464 {
5465 PGM_UNLOCK(pVM);
5466 AssertLogRelRC(rc);
5467 return rc;
5468 }
5469
5470 /* Iterate the pages. */
5471 for (unsigned i = 0; i < cPages; i++)
5472 {
5473 PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]);
5474 if ( pPage == NULL
5475 || PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM)
5476 {
5477 Log(("pgmR3PhysChangeMemBalloonRendezvous: invalid physical page %RGp pPage->u3Type=%d\n", paPhysPage[i], pPage ? PGM_PAGE_GET_TYPE(pPage) : 0));
5478 break;
5479 }
5480
5481 LogFlow(("balloon page: %RGp\n", paPhysPage[i]));
5482
5483 /* Flush the shadow PT if this page was previously used as a guest page table. */
5484 pgmPoolFlushPageByGCPhys(pVM, paPhysPage[i]);
5485
5486 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, paPhysPage[i], (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage));
5487 if (RT_FAILURE(rc))
5488 {
5489 PGM_UNLOCK(pVM);
5490 AssertLogRelRC(rc);
5491 return rc;
5492 }
5493 Assert(PGM_PAGE_IS_ZERO(pPage));
5494 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_BALLOONED);
5495 }
5496
5497 if (cPendingPages)
5498 {
5499 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
5500 if (RT_FAILURE(rc))
5501 {
5502 PGM_UNLOCK(pVM);
5503 AssertLogRelRC(rc);
5504 return rc;
5505 }
5506 }
5507 GMMR3FreePagesCleanup(pReq);
5508 }
5509 else
5510 {
5511 /* Iterate the pages. */
5512 for (unsigned i = 0; i < cPages; i++)
5513 {
5514 PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]);
5515 AssertBreak(pPage && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM);
5516
5517 LogFlow(("Free ballooned page: %RGp\n", paPhysPage[i]));
5518
5519 Assert(PGM_PAGE_IS_BALLOONED(pPage));
5520
5521 /* Change back to zero page. (NEM does not need to be informed.) */
5522 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
5523 }
5524
5525 /* Note that we currently do not map any ballooned pages in our shadow page tables, so no need to flush the pgm pool. */
5526 }
5527
5528#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
5529 /* Notify GMM about the balloon change. */
5530 rc = GMMR3BalloonedPages(pVM, (fInflate) ? GMMBALLOONACTION_INFLATE : GMMBALLOONACTION_DEFLATE, cPages);
5531 if (RT_SUCCESS(rc))
5532 {
5533 if (!fInflate)
5534 {
5535 Assert(pVM->pgm.s.cBalloonedPages >= cPages);
5536 pVM->pgm.s.cBalloonedPages -= cPages;
5537 }
5538 else
5539 pVM->pgm.s.cBalloonedPages += cPages;
5540 }
5541#endif
5542
5543 PGM_UNLOCK(pVM);
5544
5545 /* Flush the recompiler's TLB as well. */
5546 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5547 CPUMSetChangedFlags(pVM->apCpusR3[i], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5548
5549 AssertLogRelRC(rc);
5550 return rc;
5551}
5552
5553
5554/**
5555 * Frees a range of ram pages, replacing them with ZERO pages; helper for PGMR3PhysFreeRamPages
5556 *
5557 * @param pVM The cross context VM structure.
5558 * @param fInflate Inflate or deflate memory balloon
5559 * @param cPages Number of pages to free
5560 * @param paPhysPage Array of guest physical addresses
5561 */
5562static DECLCALLBACK(void) pgmR3PhysChangeMemBalloonHelper(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage)
5563{
5564 uintptr_t paUser[3];
5565
5566 paUser[0] = fInflate;
5567 paUser[1] = cPages;
5568 paUser[2] = (uintptr_t)paPhysPage;
5569 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser);
5570 AssertRC(rc);
5571
5572 /* Made a copy in PGMR3PhysFreeRamPages; free it here. */
5573 RTMemFree(paPhysPage);
5574}
5575
5576#endif /* 64-bit host && (Windows || Solaris || Linux || FreeBSD) */
5577
5578/**
5579 * Inflate or deflate a memory balloon
5580 *
5581 * @returns VBox status code.
5582 * @param pVM The cross context VM structure.
5583 * @param fInflate Inflate or deflate memory balloon
5584 * @param cPages Number of pages to free
5585 * @param paPhysPage Array of guest physical addresses
5586 */
5587VMMR3DECL(int) PGMR3PhysChangeMemBalloon(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage)
5588{
5589 /* This must match GMMR0Init; currently we only support memory ballooning on all 64-bit hosts except Mac OS X */
5590#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD))
5591 int rc;
5592
5593 /* Older additions (ancient non-functioning balloon code) pass wrong physical addresses. */
5594 AssertReturn(!(paPhysPage[0] & 0xfff), VERR_INVALID_PARAMETER);
5595
5596 /* We own the IOM lock here and could cause a deadlock by waiting for another VCPU that is blocking on the IOM lock.
5597 * In the SMP case we post a request packet to postpone the job.
5598 */
5599 if (pVM->cCpus > 1)
5600 {
5601 unsigned cbPhysPage = cPages * sizeof(paPhysPage[0]);
5602 RTGCPHYS *paPhysPageCopy = (RTGCPHYS *)RTMemAlloc(cbPhysPage);
5603 AssertReturn(paPhysPageCopy, VERR_NO_MEMORY);
5604
5605 memcpy(paPhysPageCopy, paPhysPage, cbPhysPage);
5606
5607 rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysChangeMemBalloonHelper, 4,
5608 pVM, fInflate, cPages, paPhysPageCopy);
5609 AssertRC(rc);
5610 }
5611 else
5612 {
5613 uintptr_t paUser[3];
5614
5615 paUser[0] = fInflate;
5616 paUser[1] = cPages;
5617 paUser[2] = (uintptr_t)paPhysPage;
5618 rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser);
5619 AssertRC(rc);
5620 }
5621 return rc;
5622
5623#else
5624 NOREF(pVM); NOREF(fInflate); NOREF(cPages); NOREF(paPhysPage);
5625 return VERR_NOT_IMPLEMENTED;
5626#endif
5627}
5628
5629
5630
5631/*********************************************************************************************************************************
5632* Write Monitoring *
5633*********************************************************************************************************************************/
5634
5635/**
5636 * Rendezvous callback used by PGMR3WriteProtectRAM that write protects all
5637 * physical RAM.
5638 *
5639 * This is only called on one of the EMTs while the other ones are waiting for
5640 * it to complete this function.
5641 *
5642 * @returns VINF_SUCCESS (VBox strict status code).
5643 * @param pVM The cross context VM structure.
5644 * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused.
5645 * @param pvUser User parameter, unused.
5646 */
5647static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysWriteProtectRAMRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
5648{
5649 int rc = VINF_SUCCESS;
5650 NOREF(pvUser); NOREF(pVCpu);
5651
5652 PGM_LOCK_VOID(pVM);
5653#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5654 pgmPoolResetDirtyPages(pVM);
5655#endif
5656
5657 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
5658 for (uint32_t idxLookup = 0; idxLookup < cLookupEntries; idxLookup++)
5659 {
5660 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
5661 AssertContinue(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
5662 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
5663 AssertContinue(pRam);
5664
5665 uint32_t cPages = pRam->cb >> GUEST_PAGE_SHIFT;
5666 for (uint32_t iPage = 0; iPage < cPages; iPage++)
5667 {
5668 PPGMPAGE const pPage = &pRam->aPages[iPage];
5669 PGMPAGETYPE const enmPageType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage);
5670
5671 if ( RT_LIKELY(enmPageType == PGMPAGETYPE_RAM)
5672 || enmPageType == PGMPAGETYPE_MMIO2)
5673 {
5674 /*
5675 * A RAM page.
5676 */
5677 switch (PGM_PAGE_GET_STATE(pPage))
5678 {
5679 case PGM_PAGE_STATE_ALLOCATED:
5680 /** @todo Optimize this: Don't always re-enable write
5681 * monitoring if the page is known to be very busy. */
5682 if (PGM_PAGE_IS_WRITTEN_TO(pPage))
5683 PGM_PAGE_CLEAR_WRITTEN_TO(pVM, pPage);
5684
5685 pgmPhysPageWriteMonitor(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT));
5686 break;
5687
5688 case PGM_PAGE_STATE_SHARED:
5689 AssertFailed();
5690 break;
5691
5692 case PGM_PAGE_STATE_WRITE_MONITORED: /* nothing to change. */
5693 default:
5694 break;
5695 }
5696 }
5697 }
5698 }
5699 pgmR3PoolWriteProtectPages(pVM);
5700 PGM_INVL_ALL_VCPU_TLBS(pVM);
5701 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
5702 CPUMSetChangedFlags(pVM->apCpusR3[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5703
5704 PGM_UNLOCK(pVM);
5705 return rc;
5706}
5707
5708/**
5709 * Protect all physical RAM to monitor writes
5710 *
5711 * @returns VBox status code.
5712 * @param pVM The cross context VM structure.
5713 */
5714VMMR3DECL(int) PGMR3PhysWriteProtectRAM(PVM pVM)
5715{
5716 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
5717
5718 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysWriteProtectRAMRendezvous, NULL);
5719 AssertRC(rc);
5720 return rc;
5721}
5722
5723
5724/*********************************************************************************************************************************
5725* Stats. *
5726*********************************************************************************************************************************/
5727
5728/**
5729 * Query the amount of free memory inside VMMR0
5730 *
5731 * @returns VBox status code.
5732 * @param pUVM The user mode VM handle.
5733 * @param pcbAllocMem Where to return the amount of memory allocated
5734 * by VMs.
5735 * @param pcbFreeMem Where to return the amount of memory that is
5736 * allocated from the host but not currently used
5737 * by any VMs.
5738 * @param pcbBallonedMem Where to return the sum of memory that is
5739 * currently ballooned by the VMs.
5740 * @param pcbSharedMem Where to return the amount of memory that is
5741 * currently shared.
5742 */
5743VMMR3DECL(int) PGMR3QueryGlobalMemoryStats(PUVM pUVM, uint64_t *pcbAllocMem, uint64_t *pcbFreeMem,
5744 uint64_t *pcbBallonedMem, uint64_t *pcbSharedMem)
5745{
5746 UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE);
5747 VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE);
5748
5749 uint64_t cAllocPages = 0;
5750 uint64_t cFreePages = 0;
5751 uint64_t cBalloonPages = 0;
5752 uint64_t cSharedPages = 0;
5753#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
5754 if (!SUPR3IsDriverless())
5755 {
5756 int rc = GMMR3QueryHypervisorMemoryStats(pUVM->pVM, &cAllocPages, &cFreePages, &cBalloonPages, &cSharedPages);
5757 AssertRCReturn(rc, rc);
5758 }
5759#endif
5760
5761 if (pcbAllocMem)
5762 *pcbAllocMem = cAllocPages * _4K;
5763
5764 if (pcbFreeMem)
5765 *pcbFreeMem = cFreePages * _4K;
5766
5767 if (pcbBallonedMem)
5768 *pcbBallonedMem = cBalloonPages * _4K;
5769
5770 if (pcbSharedMem)
5771 *pcbSharedMem = cSharedPages * _4K;
5772
5773 Log(("PGMR3QueryVMMMemoryStats: all=%llx free=%llx ballooned=%llx shared=%llx\n",
5774 cAllocPages, cFreePages, cBalloonPages, cSharedPages));
5775 return VINF_SUCCESS;
5776}
5777
5778
5779/**
5780 * Query memory stats for the VM.
5781 *
5782 * @returns VBox status code.
5783 * @param pUVM The user mode VM handle.
5784 * @param pcbTotalMem Where to return total amount memory the VM may
5785 * possibly use.
5786 * @param pcbPrivateMem Where to return the amount of private memory
5787 * currently allocated.
5788 * @param pcbSharedMem Where to return the amount of actually shared
5789 * memory currently used by the VM.
5790 * @param pcbZeroMem Where to return the amount of memory backed by
5791 * zero pages.
5792 *
5793 * @remarks The total mem is normally larger than the sum of the three
5794 * components. There are two reasons for this, first the amount of
5795 * shared memory is what we're sure is shared instead of what could
5796 * possibly be shared with someone. Secondly, because the total may
5797 * include some pure MMIO pages that doesn't go into any of the three
5798 * sub-counts.
5799 *
5800 * @todo Why do we return reused shared pages instead of anything that could
5801 * potentially be shared? Doesn't this mean the first VM gets a much
5802 * lower number of shared pages?
5803 */
5804VMMR3DECL(int) PGMR3QueryMemoryStats(PUVM pUVM, uint64_t *pcbTotalMem, uint64_t *pcbPrivateMem,
5805 uint64_t *pcbSharedMem, uint64_t *pcbZeroMem)
5806{
5807 UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE);
5808 PVM pVM = pUVM->pVM;
5809 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
5810
5811 if (pcbTotalMem)
5812 *pcbTotalMem = (uint64_t)pVM->pgm.s.cAllPages * GUEST_PAGE_SIZE;
5813
5814 if (pcbPrivateMem)
5815 *pcbPrivateMem = (uint64_t)pVM->pgm.s.cPrivatePages * GUEST_PAGE_SIZE;
5816
5817 if (pcbSharedMem)
5818 *pcbSharedMem = (uint64_t)pVM->pgm.s.cReusedSharedPages * GUEST_PAGE_SIZE;
5819
5820 if (pcbZeroMem)
5821 *pcbZeroMem = (uint64_t)pVM->pgm.s.cZeroPages * GUEST_PAGE_SIZE;
5822
5823 Log(("PGMR3QueryMemoryStats: all=%x private=%x reused=%x zero=%x\n", pVM->pgm.s.cAllPages, pVM->pgm.s.cPrivatePages, pVM->pgm.s.cReusedSharedPages, pVM->pgm.s.cZeroPages));
5824 return VINF_SUCCESS;
5825}
5826
5827
5828
5829/*********************************************************************************************************************************
5830* Chunk Mappings and Page Allocation *
5831*********************************************************************************************************************************/
5832#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5833
5834/**
5835 * Tree enumeration callback for dealing with age rollover.
5836 * It will perform a simple compression of the current age.
5837 */
5838static DECLCALLBACK(int) pgmR3PhysChunkAgeingRolloverCallback(PAVLU32NODECORE pNode, void *pvUser)
5839{
5840 /* Age compression - ASSUMES iNow == 4. */
5841 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode;
5842 if (pChunk->iLastUsed >= UINT32_C(0xffffff00))
5843 pChunk->iLastUsed = 3;
5844 else if (pChunk->iLastUsed >= UINT32_C(0xfffff000))
5845 pChunk->iLastUsed = 2;
5846 else if (pChunk->iLastUsed)
5847 pChunk->iLastUsed = 1;
5848 else /* iLastUsed = 0 */
5849 pChunk->iLastUsed = 4;
5850
5851 NOREF(pvUser);
5852 return 0;
5853}
5854
5855
5856/**
5857 * The structure passed in the pvUser argument of pgmR3PhysChunkUnmapCandidateCallback().
5858 */
5859typedef struct PGMR3PHYSCHUNKUNMAPCB
5860{
5861 PVM pVM; /**< Pointer to the VM. */
5862 PPGMCHUNKR3MAP pChunk; /**< The chunk to unmap. */
5863} PGMR3PHYSCHUNKUNMAPCB, *PPGMR3PHYSCHUNKUNMAPCB;
5864
5865
5866/**
5867 * Callback used to find the mapping that's been unused for
5868 * the longest time.
5869 */
5870static DECLCALLBACK(int) pgmR3PhysChunkUnmapCandidateCallback(PAVLU32NODECORE pNode, void *pvUser)
5871{
5872 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode;
5873 PPGMR3PHYSCHUNKUNMAPCB pArg = (PPGMR3PHYSCHUNKUNMAPCB)pvUser;
5874
5875 /*
5876 * Check for locks and compare when last used.
5877 */
5878 if (pChunk->cRefs)
5879 return 0;
5880 if (pChunk->cPermRefs)
5881 return 0;
5882 if ( pArg->pChunk
5883 && pChunk->iLastUsed >= pArg->pChunk->iLastUsed)
5884 return 0;
5885
5886 /*
5887 * Check that it's not in any of the TLBs.
5888 */
5889 PVM pVM = pArg->pVM;
5890 if ( pVM->pgm.s.ChunkR3Map.Tlb.aEntries[PGM_CHUNKR3MAPTLB_IDX(pChunk->Core.Key)].idChunk
5891 == pChunk->Core.Key)
5892 {
5893 pChunk = NULL;
5894 return 0;
5895 }
5896# ifdef VBOX_STRICT
5897 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++)
5898 {
5899 Assert(pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk != pChunk);
5900 Assert(pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk != pChunk->Core.Key);
5901 }
5902# endif
5903
5904# if 0 /* This is too much work with the PGMCPU::PhysTlb as well. We flush them all instead. */
5905 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.PhysTlbR3.aEntries); i++)
5906 if (pVM->pgm.s.PhysTlbR3.aEntries[i].pMap == pChunk)
5907 return 0;
5908# endif
5909
5910 pArg->pChunk = pChunk;
5911 return 0;
5912}
5913
5914
5915/**
5916 * Finds a good candidate for unmapping when the ring-3 mapping cache is full.
5917 *
5918 * The candidate will not be part of any TLBs, so no need to flush
5919 * anything afterwards.
5920 *
5921 * @returns Chunk id.
5922 * @param pVM The cross context VM structure.
5923 */
5924static int32_t pgmR3PhysChunkFindUnmapCandidate(PVM pVM)
5925{
5926 PGM_LOCK_ASSERT_OWNER(pVM);
5927
5928 /*
5929 * Enumerate the age tree starting with the left most node.
5930 */
5931 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatChunkFindCandidate, a);
5932 PGMR3PHYSCHUNKUNMAPCB Args;
5933 Args.pVM = pVM;
5934 Args.pChunk = NULL;
5935 RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkUnmapCandidateCallback, &Args);
5936 Assert(Args.pChunk);
5937 if (Args.pChunk)
5938 {
5939 Assert(Args.pChunk->cRefs == 0);
5940 Assert(Args.pChunk->cPermRefs == 0);
5941 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkFindCandidate, a);
5942 return Args.pChunk->Core.Key;
5943 }
5944
5945 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkFindCandidate, a);
5946 return INT32_MAX;
5947}
5948
5949
5950/**
5951 * Rendezvous callback used by pgmR3PhysUnmapChunk that unmaps a chunk
5952 *
5953 * This is only called on one of the EMTs while the other ones are waiting for
5954 * it to complete this function.
5955 *
5956 * @returns VINF_SUCCESS (VBox strict status code).
5957 * @param pVM The cross context VM structure.
5958 * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused.
5959 * @param pvUser User pointer. Unused
5960 *
5961 */
5962static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysUnmapChunkRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
5963{
5964 int rc = VINF_SUCCESS;
5965 PGM_LOCK_VOID(pVM);
5966 NOREF(pVCpu); NOREF(pvUser);
5967
5968 if (pVM->pgm.s.ChunkR3Map.c >= pVM->pgm.s.ChunkR3Map.cMax)
5969 {
5970 /* Flush the pgm pool cache; call the internal rendezvous handler as we're already in a rendezvous handler here. */
5971 /** @todo also not really efficient to unmap a chunk that contains PD
5972 * or PT pages. */
5973 pgmR3PoolClearAllRendezvous(pVM, pVM->apCpusR3[0], NULL /* no need to flush the REM TLB as we already did that above */);
5974
5975 /*
5976 * Request the ring-0 part to unmap a chunk to make space in the mapping cache.
5977 */
5978 GMMMAPUNMAPCHUNKREQ Req;
5979 Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
5980 Req.Hdr.cbReq = sizeof(Req);
5981 Req.pvR3 = NULL;
5982 Req.idChunkMap = NIL_GMM_CHUNKID;
5983 Req.idChunkUnmap = pgmR3PhysChunkFindUnmapCandidate(pVM);
5984 if (Req.idChunkUnmap != INT32_MAX)
5985 {
5986 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatChunkUnmap, a);
5987 rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr);
5988 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkUnmap, a);
5989 if (RT_SUCCESS(rc))
5990 {
5991 /*
5992 * Remove the unmapped one.
5993 */
5994 PPGMCHUNKR3MAP pUnmappedChunk = (PPGMCHUNKR3MAP)RTAvlU32Remove(&pVM->pgm.s.ChunkR3Map.pTree, Req.idChunkUnmap);
5995 AssertRelease(pUnmappedChunk);
5996 AssertRelease(!pUnmappedChunk->cRefs);
5997 AssertRelease(!pUnmappedChunk->cPermRefs);
5998 pUnmappedChunk->pv = NULL;
5999 pUnmappedChunk->Core.Key = UINT32_MAX;
6000 MMR3HeapFree(pUnmappedChunk);
6001 pVM->pgm.s.ChunkR3Map.c--;
6002 pVM->pgm.s.cUnmappedChunks++;
6003
6004 /*
6005 * Flush dangling PGM pointers (R3 & R0 ptrs to GC physical addresses).
6006 */
6007 /** @todo We should not flush chunks which include cr3 mappings. */
6008 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
6009 {
6010 PPGMCPU pPGM = &pVM->apCpusR3[idCpu]->pgm.s;
6011
6012 pPGM->pGst32BitPdR3 = NULL;
6013 pPGM->pGstPaePdptR3 = NULL;
6014 pPGM->pGstAmd64Pml4R3 = NULL;
6015 pPGM->pGstEptPml4R3 = NULL;
6016 pPGM->pGst32BitPdR0 = NIL_RTR0PTR;
6017 pPGM->pGstPaePdptR0 = NIL_RTR0PTR;
6018 pPGM->pGstAmd64Pml4R0 = NIL_RTR0PTR;
6019 pPGM->pGstEptPml4R0 = NIL_RTR0PTR;
6020 for (unsigned i = 0; i < RT_ELEMENTS(pPGM->apGstPaePDsR3); i++)
6021 {
6022 pPGM->apGstPaePDsR3[i] = NULL;
6023 pPGM->apGstPaePDsR0[i] = NIL_RTR0PTR;
6024 }
6025
6026 /* Flush REM TLBs. */
6027 CPUMSetChangedFlags(pVM->apCpusR3[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
6028 }
6029
6030 pgmR3PhysChunkInvalidateTLB(pVM, true /*fInRendezvous*/); /* includes pgmPhysInvalidatePageMapTLB call */
6031 }
6032 }
6033 }
6034 PGM_UNLOCK(pVM);
6035 return rc;
6036}
6037
6038/**
6039 * Unmap a chunk to free up virtual address space (request packet handler for pgmR3PhysChunkMap)
6040 *
6041 * @param pVM The cross context VM structure.
6042 */
6043static DECLCALLBACK(void) pgmR3PhysUnmapChunk(PVM pVM)
6044{
6045 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysUnmapChunkRendezvous, NULL);
6046 AssertRC(rc);
6047}
6048
6049
6050/**
6051 * Maps the given chunk into the ring-3 mapping cache.
6052 *
6053 * This will call ring-0.
6054 *
6055 * @returns VBox status code.
6056 * @param pVM The cross context VM structure.
6057 * @param idChunk The chunk in question.
6058 * @param ppChunk Where to store the chunk tracking structure.
6059 *
6060 * @remarks Called from within the PGM critical section.
6061 * @remarks Can be called from any thread!
6062 */
6063int pgmR3PhysChunkMap(PVM pVM, uint32_t idChunk, PPPGMCHUNKR3MAP ppChunk)
6064{
6065 int rc;
6066
6067 PGM_LOCK_ASSERT_OWNER(pVM);
6068
6069 /*
6070 * Move the chunk time forward.
6071 */
6072 pVM->pgm.s.ChunkR3Map.iNow++;
6073 if (pVM->pgm.s.ChunkR3Map.iNow == 0)
6074 {
6075 pVM->pgm.s.ChunkR3Map.iNow = 4;
6076 RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkAgeingRolloverCallback, NULL);
6077 }
6078
6079 /*
6080 * Allocate a new tracking structure first.
6081 */
6082 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)MMR3HeapAllocZ(pVM, MM_TAG_PGM_CHUNK_MAPPING, sizeof(*pChunk));
6083 AssertReturn(pChunk, VERR_NO_MEMORY);
6084 pChunk->Core.Key = idChunk;
6085 pChunk->iLastUsed = pVM->pgm.s.ChunkR3Map.iNow;
6086
6087 /*
6088 * Request the ring-0 part to map the chunk in question.
6089 */
6090 GMMMAPUNMAPCHUNKREQ Req;
6091 Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
6092 Req.Hdr.cbReq = sizeof(Req);
6093 Req.pvR3 = NULL;
6094 Req.idChunkMap = idChunk;
6095 Req.idChunkUnmap = NIL_GMM_CHUNKID;
6096
6097 /* Must be callable from any thread, so can't use VMMR3CallR0. */
6098 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatChunkMap, a);
6099 rc = SUPR3CallVMMR0Ex(VMCC_GET_VMR0_FOR_CALL(pVM), NIL_VMCPUID, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr);
6100 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkMap, a);
6101 if (RT_SUCCESS(rc))
6102 {
6103 pChunk->pv = Req.pvR3;
6104
6105 /*
6106 * If we're running out of virtual address space, then we should
6107 * unmap another chunk.
6108 *
6109 * Currently, an unmap operation requires that all other virtual CPUs
6110 * are idling and not by chance making use of the memory we're
6111 * unmapping. So, we create an async unmap operation here.
6112 *
6113 * Now, when creating or restoring a saved state this wont work very
6114 * well since we may want to restore all guest RAM + a little something.
6115 * So, we have to do the unmap synchronously. Fortunately for us
6116 * though, during these operations the other virtual CPUs are inactive
6117 * and it should be safe to do this.
6118 */
6119 /** @todo Eventually we should lock all memory when used and do
6120 * map+unmap as one kernel call without any rendezvous or
6121 * other precautions. */
6122 if (pVM->pgm.s.ChunkR3Map.c + 1 >= pVM->pgm.s.ChunkR3Map.cMax)
6123 {
6124 switch (VMR3GetState(pVM))
6125 {
6126 case VMSTATE_LOADING:
6127 case VMSTATE_SAVING:
6128 {
6129 PVMCPU pVCpu = VMMGetCpu(pVM);
6130 if ( pVCpu
6131 && pVM->pgm.s.cDeprecatedPageLocks == 0)
6132 {
6133 pgmR3PhysUnmapChunkRendezvous(pVM, pVCpu, NULL);
6134 break;
6135 }
6136 }
6137 RT_FALL_THRU();
6138 default:
6139 rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysUnmapChunk, 1, pVM);
6140 AssertRC(rc);
6141 break;
6142 }
6143 }
6144
6145 /*
6146 * Update the tree. We must do this after any unmapping to make sure
6147 * the chunk we're going to return isn't unmapped by accident.
6148 */
6149 AssertPtr(Req.pvR3);
6150 bool fRc = RTAvlU32Insert(&pVM->pgm.s.ChunkR3Map.pTree, &pChunk->Core);
6151 AssertRelease(fRc);
6152 pVM->pgm.s.ChunkR3Map.c++;
6153 pVM->pgm.s.cMappedChunks++;
6154 }
6155 else
6156 {
6157 /** @todo this may fail because of /proc/sys/vm/max_map_count, so we
6158 * should probably restrict ourselves on linux. */
6159 AssertRC(rc);
6160 MMR3HeapFree(pChunk);
6161 pChunk = NULL;
6162 }
6163
6164 *ppChunk = pChunk;
6165 return rc;
6166}
6167
6168
6169/**
6170 * Invalidates the TLB for the ring-3 mapping cache.
6171 *
6172 * @param pVM The cross context VM structure.
6173 * @param fInRendezvous Set if we're in a rendezvous.
6174 */
6175DECLHIDDEN(void) pgmR3PhysChunkInvalidateTLB(PVM pVM, bool fInRendezvous)
6176{
6177 PGM_LOCK_VOID(pVM);
6178 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++)
6179 {
6180 pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk = NIL_GMM_CHUNKID;
6181 pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk = NULL;
6182 }
6183 /* The page map TLB references chunks, so invalidate that one too. */
6184 pgmPhysInvalidatePageMapTLB(pVM, fInRendezvous);
6185 PGM_UNLOCK(pVM);
6186}
6187
6188
6189/**
6190 * Response to VM_FF_PGM_NEED_HANDY_PAGES and helper for pgmPhysEnsureHandyPage.
6191 *
6192 * This function will also work the VM_FF_PGM_NO_MEMORY force action flag, to
6193 * signal and clear the out of memory condition. When called, this API is used
6194 * to try clear the condition when the user wants to resume.
6195 *
6196 * @returns The following VBox status codes.
6197 * @retval VINF_SUCCESS on success. FFs cleared.
6198 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is not cleared in
6199 * this case and it gets accompanied by VM_FF_PGM_NO_MEMORY.
6200 *
6201 * @param pVM The cross context VM structure.
6202 *
6203 * @remarks The VINF_EM_NO_MEMORY status is for the benefit of the FF processing
6204 * in EM.cpp and shouldn't be propagated outside TRPM, HM, EM and
6205 * pgmPhysEnsureHandyPage. There is one exception to this in the \#PF
6206 * handler.
6207 */
6208VMMR3DECL(int) PGMR3PhysAllocateHandyPages(PVM pVM)
6209{
6210 PGM_LOCK_VOID(pVM);
6211
6212 /*
6213 * Allocate more pages, noting down the index of the first new page.
6214 */
6215 uint32_t iClear = pVM->pgm.s.cHandyPages;
6216 AssertMsgReturn(iClear <= RT_ELEMENTS(pVM->pgm.s.aHandyPages), ("%d", iClear), VERR_PGM_HANDY_PAGE_IPE);
6217 Log(("PGMR3PhysAllocateHandyPages: %d -> %d\n", iClear, RT_ELEMENTS(pVM->pgm.s.aHandyPages)));
6218 int rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES, 0, NULL);
6219 /** @todo we should split this up into an allocate and flush operation. sometimes you want to flush and not allocate more (which will trigger the vm account limit error) */
6220 if ( rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT
6221 && pVM->pgm.s.cHandyPages > 0)
6222 {
6223 /* Still handy pages left, so don't panic. */
6224 rc = VINF_SUCCESS;
6225 }
6226
6227 if (RT_SUCCESS(rc))
6228 {
6229 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
6230 Assert(pVM->pgm.s.cHandyPages > 0);
6231# ifdef VBOX_STRICT
6232 uint32_t i;
6233 for (i = iClear; i < pVM->pgm.s.cHandyPages; i++)
6234 if ( pVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID
6235 || pVM->pgm.s.aHandyPages[i].idSharedPage != NIL_GMM_PAGEID
6236 || (pVM->pgm.s.aHandyPages[i].HCPhysGCPhys & GUEST_PAGE_OFFSET_MASK))
6237 break;
6238 if (i != pVM->pgm.s.cHandyPages)
6239 {
6240 RTAssertMsg1Weak(NULL, __LINE__, __FILE__, __FUNCTION__);
6241 RTAssertMsg2Weak("i=%d iClear=%d cHandyPages=%d\n", i, iClear, pVM->pgm.s.cHandyPages);
6242 for (uint32_t j = iClear; j < pVM->pgm.s.cHandyPages; j++)
6243 RTAssertMsg2Add("%03d: idPage=%d HCPhysGCPhys=%RHp idSharedPage=%d%s\n", j,
6244 pVM->pgm.s.aHandyPages[j].idPage,
6245 pVM->pgm.s.aHandyPages[j].HCPhysGCPhys,
6246 pVM->pgm.s.aHandyPages[j].idSharedPage,
6247 j == i ? " <---" : "");
6248 RTAssertPanic();
6249 }
6250# endif
6251 }
6252 else
6253 {
6254 /*
6255 * We should never get here unless there is a genuine shortage of
6256 * memory (or some internal error). Flag the error so the VM can be
6257 * suspended ASAP and the user informed. If we're totally out of
6258 * handy pages we will return failure.
6259 */
6260 /* Report the failure. */
6261 LogRel(("PGM: Failed to procure handy pages; rc=%Rrc cHandyPages=%#x\n"
6262 " cAllPages=%#x cPrivatePages=%#x cSharedPages=%#x cZeroPages=%#x\n",
6263 rc, pVM->pgm.s.cHandyPages,
6264 pVM->pgm.s.cAllPages, pVM->pgm.s.cPrivatePages, pVM->pgm.s.cSharedPages, pVM->pgm.s.cZeroPages));
6265
6266 if ( rc != VERR_NO_MEMORY
6267 && rc != VERR_NO_PHYS_MEMORY
6268 && rc != VERR_LOCK_FAILED)
6269 for (uint32_t i = 0; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
6270 {
6271 LogRel(("PGM: aHandyPages[#%#04x] = {.HCPhysGCPhys=%RHp, .idPage=%#08x, .idSharedPage=%#08x}\n",
6272 i, pVM->pgm.s.aHandyPages[i].HCPhysGCPhys, pVM->pgm.s.aHandyPages[i].idPage,
6273 pVM->pgm.s.aHandyPages[i].idSharedPage));
6274 uint32_t const idPage = pVM->pgm.s.aHandyPages[i].idPage;
6275 if (idPage != NIL_GMM_PAGEID)
6276 {
6277 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
6278 for (uint32_t idRamRange = 0; idRamRange <= idRamRangeMax; idRamRange++)
6279 {
6280 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
6281 Assert(pRam || idRamRange == 0);
6282 if (!pRam) continue;
6283 Assert(pRam->idRange == idRamRange);
6284
6285 uint32_t const cPages = pRam->cb >> GUEST_PAGE_SHIFT;
6286 for (uint32_t iPage = 0; iPage < cPages; iPage++)
6287 if (PGM_PAGE_GET_PAGEID(&pRam->aPages[iPage]) == idPage)
6288 LogRel(("PGM: Used by %RGp %R[pgmpage] (%s)\n",
6289 pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), &pRam->aPages[iPage], pRam->pszDesc));
6290 }
6291 }
6292 }
6293
6294 if (rc == VERR_NO_MEMORY)
6295 {
6296 uint64_t cbHostRamAvail = 0;
6297 int rc2 = RTSystemQueryAvailableRam(&cbHostRamAvail);
6298 if (RT_SUCCESS(rc2))
6299 LogRel(("Host RAM: %RU64MB available\n", cbHostRamAvail / _1M));
6300 else
6301 LogRel(("Cannot determine the amount of available host memory\n"));
6302 }
6303
6304 /* Set the FFs and adjust rc. */
6305 VM_FF_SET(pVM, VM_FF_PGM_NEED_HANDY_PAGES);
6306 VM_FF_SET(pVM, VM_FF_PGM_NO_MEMORY);
6307 if ( rc == VERR_NO_MEMORY
6308 || rc == VERR_NO_PHYS_MEMORY
6309 || rc == VERR_LOCK_FAILED)
6310 rc = VINF_EM_NO_MEMORY;
6311 }
6312
6313 PGM_UNLOCK(pVM);
6314 return rc;
6315}
6316
6317#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
6318
6319
6320/*********************************************************************************************************************************
6321* Other Stuff *
6322*********************************************************************************************************************************/
6323
6324#ifdef VBOX_VMM_TARGET_X86
6325/**
6326 * Sets the Address Gate 20 state.
6327 *
6328 * @param pVCpu The cross context virtual CPU structure.
6329 * @param fEnable True if the gate should be enabled.
6330 * False if the gate should be disabled.
6331 */
6332VMMDECL(void) PGMR3PhysSetA20(PVMCPU pVCpu, bool fEnable)
6333{
6334 LogFlow(("PGMR3PhysSetA20 %d (was %d)\n", fEnable, pVCpu->pgm.s.fA20Enabled));
6335 if (pVCpu->pgm.s.fA20Enabled != fEnable)
6336 {
6337# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6338 PCCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu);
6339 if ( CPUMIsGuestInVmxRootMode(pCtx)
6340 && !fEnable)
6341 {
6342 Log(("Cannot enter A20M mode while in VMX root mode\n"));
6343 return;
6344 }
6345# endif
6346 pVCpu->pgm.s.fA20Enabled = fEnable;
6347 pVCpu->pgm.s.GCPhysA20Mask = ~((RTGCPHYS)!fEnable << 20);
6348 if (VM_IS_NEM_ENABLED(pVCpu->CTX_SUFF(pVM)))
6349 NEMR3NotifySetA20(pVCpu, fEnable);
6350# ifdef PGM_WITH_A20
6351 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
6352 pgmR3RefreshShadowModeAfterA20Change(pVCpu);
6353 HMFlushTlb(pVCpu);
6354# endif
6355# if 0 /* PGMGetPage will apply the A20 mask to the GCPhys it returns, so we must invalid both sides of the TLB. */
6356 IEMTlbInvalidateAllPhysical(pVCpu);
6357# else
6358 IEMTlbInvalidateAllGlobal(pVCpu);
6359# endif
6360 STAM_REL_COUNTER_INC(&pVCpu->pgm.s.cA20Changes);
6361 }
6362}
6363#endif /* VBOX_VMM_TARGET_X86 */
6364
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette