VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PGMPhys.cpp@ 109008

Last change on this file since 109008 was 108968, checked in by vboxsync, 7 days ago

VMM,Main,Devices: Respect VBOX_VMM_TARGET_ARMV8 correctly on amd64 hosts (for IEM debugging purposes). jiraref:VBP-1598

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 251.4 KB
Line 
1/* $Id: PGMPhys.cpp 108968 2025-04-14 20:45:36Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Physical Memory Addressing.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_PHYS
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/iem.h>
36#include <VBox/vmm/iom.h>
37#include <VBox/vmm/mm.h>
38#include <VBox/vmm/nem.h>
39#include <VBox/vmm/stam.h>
40#include <VBox/vmm/pdmdev.h>
41#include "PGMInternal.h"
42#include <VBox/vmm/vmcc.h>
43
44#include "PGMInline.h"
45
46#include <VBox/sup.h>
47#include <VBox/param.h>
48#include <VBox/err.h>
49#include <VBox/log.h>
50#include <iprt/assert.h>
51#include <iprt/alloc.h>
52#include <iprt/asm.h>
53#ifdef VBOX_STRICT
54# include <iprt/crc.h>
55#endif
56#include <iprt/thread.h>
57#include <iprt/string.h>
58#include <iprt/system.h>
59
60
61/*********************************************************************************************************************************
62* Defined Constants And Macros *
63*********************************************************************************************************************************/
64/** The number of pages to free in one batch. */
65#define PGMPHYS_FREE_PAGE_BATCH_SIZE 128
66
67
68
69/*********************************************************************************************************************************
70* Reading and Writing Guest Pysical Memory *
71*********************************************************************************************************************************/
72
73/*
74 * PGMR3PhysReadU8-64
75 * PGMR3PhysWriteU8-64
76 */
77#define PGMPHYSFN_READNAME PGMR3PhysReadU8
78#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU8
79#define PGMPHYS_DATASIZE 1
80#define PGMPHYS_DATATYPE uint8_t
81#include "PGMPhysRWTmpl.h"
82
83#define PGMPHYSFN_READNAME PGMR3PhysReadU16
84#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU16
85#define PGMPHYS_DATASIZE 2
86#define PGMPHYS_DATATYPE uint16_t
87#include "PGMPhysRWTmpl.h"
88
89#define PGMPHYSFN_READNAME PGMR3PhysReadU32
90#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU32
91#define PGMPHYS_DATASIZE 4
92#define PGMPHYS_DATATYPE uint32_t
93#include "PGMPhysRWTmpl.h"
94
95#define PGMPHYSFN_READNAME PGMR3PhysReadU64
96#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU64
97#define PGMPHYS_DATASIZE 8
98#define PGMPHYS_DATATYPE uint64_t
99#include "PGMPhysRWTmpl.h"
100
101
102/**
103 * EMT worker for PGMR3PhysReadExternal.
104 */
105static DECLCALLBACK(int) pgmR3PhysReadExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, void *pvBuf, size_t cbRead,
106 PGMACCESSORIGIN enmOrigin)
107{
108 VBOXSTRICTRC rcStrict = PGMPhysRead(pVM, *pGCPhys, pvBuf, cbRead, enmOrigin);
109 AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
110 return VINF_SUCCESS;
111}
112
113
114/**
115 * Read from physical memory, external users.
116 *
117 * @returns VBox status code.
118 * @retval VINF_SUCCESS.
119 *
120 * @param pVM The cross context VM structure.
121 * @param GCPhys Physical address to read from.
122 * @param pvBuf Where to read into.
123 * @param cbRead How many bytes to read.
124 * @param enmOrigin Who is calling.
125 *
126 * @thread Any but EMTs.
127 */
128VMMR3DECL(int) PGMR3PhysReadExternal(PVM pVM, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead, PGMACCESSORIGIN enmOrigin)
129{
130 VM_ASSERT_OTHER_THREAD(pVM);
131
132 AssertMsgReturn(cbRead > 0, ("don't even think about reading zero bytes!\n"), VINF_SUCCESS);
133 LogFlow(("PGMR3PhysReadExternal: %RGp %d\n", GCPhys, cbRead));
134
135 PGM_LOCK_VOID(pVM);
136
137 /*
138 * Copy loop on ram ranges.
139 */
140 for (;;)
141 {
142 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
143
144 /* Inside range or not? */
145 if (pRam && GCPhys >= pRam->GCPhys)
146 {
147 /*
148 * Must work our way thru this page by page.
149 */
150 RTGCPHYS off = GCPhys - pRam->GCPhys;
151 while (off < pRam->cb)
152 {
153 unsigned iPage = off >> GUEST_PAGE_SHIFT;
154 PPGMPAGE pPage = &pRam->aPages[iPage];
155
156 /*
157 * If the page has an ALL access handler, we'll have to
158 * delegate the job to EMT.
159 */
160 if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
161 || PGM_PAGE_IS_SPECIAL_ALIAS_MMIO(pPage))
162 {
163 PGM_UNLOCK(pVM);
164
165 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysReadExternalEMT, 5,
166 pVM, &GCPhys, pvBuf, cbRead, enmOrigin);
167 }
168 Assert(!PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage));
169
170 /*
171 * Simple stuff, go ahead.
172 */
173 size_t cb = GUEST_PAGE_SIZE - (off & GUEST_PAGE_OFFSET_MASK);
174 if (cb > cbRead)
175 cb = cbRead;
176 PGMPAGEMAPLOCK PgMpLck;
177 const void *pvSrc;
178 int rc = pgmPhysGCPhys2CCPtrInternalReadOnly(pVM, pPage, pRam->GCPhys + off, &pvSrc, &PgMpLck);
179 if (RT_SUCCESS(rc))
180 {
181 memcpy(pvBuf, pvSrc, cb);
182 pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck);
183 }
184 else
185 {
186 AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternalReadOnly failed on %RGp / %R[pgmpage] -> %Rrc\n",
187 pRam->GCPhys + off, pPage, rc));
188 memset(pvBuf, 0xff, cb);
189 }
190
191 /* next page */
192 if (cb >= cbRead)
193 {
194 PGM_UNLOCK(pVM);
195 return VINF_SUCCESS;
196 }
197 cbRead -= cb;
198 off += cb;
199 GCPhys += cb;
200 pvBuf = (char *)pvBuf + cb;
201 } /* walk pages in ram range. */
202 }
203 else
204 {
205 LogFlow(("PGMPhysRead: Unassigned %RGp size=%u\n", GCPhys, cbRead));
206
207 /*
208 * Unassigned address space.
209 */
210 size_t cb = pRam ? pRam->GCPhys - GCPhys : ~(size_t)0;
211 if (cb >= cbRead)
212 {
213 memset(pvBuf, 0xff, cbRead);
214 break;
215 }
216 memset(pvBuf, 0xff, cb);
217
218 cbRead -= cb;
219 pvBuf = (char *)pvBuf + cb;
220 GCPhys += cb;
221 }
222 } /* Ram range walk */
223
224 PGM_UNLOCK(pVM);
225
226 return VINF_SUCCESS;
227}
228
229
230/**
231 * EMT worker for PGMR3PhysWriteExternal.
232 */
233static DECLCALLBACK(int) pgmR3PhysWriteExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, const void *pvBuf, size_t cbWrite,
234 PGMACCESSORIGIN enmOrigin)
235{
236 /** @todo VERR_EM_NO_MEMORY */
237 VBOXSTRICTRC rcStrict = PGMPhysWrite(pVM, *pGCPhys, pvBuf, cbWrite, enmOrigin);
238 AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
239 return VINF_SUCCESS;
240}
241
242
243/**
244 * Write to physical memory, external users.
245 *
246 * @returns VBox status code.
247 * @retval VINF_SUCCESS.
248 * @retval VERR_EM_NO_MEMORY.
249 *
250 * @param pVM The cross context VM structure.
251 * @param GCPhys Physical address to write to.
252 * @param pvBuf What to write.
253 * @param cbWrite How many bytes to write.
254 * @param enmOrigin Who is calling.
255 *
256 * @thread Any but EMTs.
257 */
258VMMDECL(int) PGMR3PhysWriteExternal(PVM pVM, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite, PGMACCESSORIGIN enmOrigin)
259{
260 VM_ASSERT_OTHER_THREAD(pVM);
261
262 AssertMsg(!pVM->pgm.s.fNoMorePhysWrites,
263 ("Calling PGMR3PhysWriteExternal after pgmR3Save()! GCPhys=%RGp cbWrite=%#x enmOrigin=%d\n",
264 GCPhys, cbWrite, enmOrigin));
265 AssertMsgReturn(cbWrite > 0, ("don't even think about writing zero bytes!\n"), VINF_SUCCESS);
266 LogFlow(("PGMR3PhysWriteExternal: %RGp %d\n", GCPhys, cbWrite));
267
268 PGM_LOCK_VOID(pVM);
269
270 /*
271 * Copy loop on ram ranges, stop when we hit something difficult.
272 */
273 for (;;)
274 {
275 PPGMRAMRANGE const pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
276
277 /* Inside range or not? */
278 if (pRam && GCPhys >= pRam->GCPhys)
279 {
280 /*
281 * Must work our way thru this page by page.
282 */
283 RTGCPTR off = GCPhys - pRam->GCPhys;
284 while (off < pRam->cb)
285 {
286 RTGCPTR iPage = off >> GUEST_PAGE_SHIFT;
287 PPGMPAGE pPage = &pRam->aPages[iPage];
288
289 /*
290 * Is the page problematic, we have to do the work on the EMT.
291 *
292 * Allocating writable pages and access handlers are
293 * problematic, write monitored pages are simple and can be
294 * dealt with here.
295 */
296 if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
297 || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
298 || PGM_PAGE_IS_SPECIAL_ALIAS_MMIO(pPage))
299 {
300 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
301 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
302 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage, GCPhys);
303 else
304 {
305 PGM_UNLOCK(pVM);
306
307 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysWriteExternalEMT, 5,
308 pVM, &GCPhys, pvBuf, cbWrite, enmOrigin);
309 }
310 }
311 Assert(!PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage));
312
313 /*
314 * Simple stuff, go ahead.
315 */
316 size_t cb = GUEST_PAGE_SIZE - (off & GUEST_PAGE_OFFSET_MASK);
317 if (cb > cbWrite)
318 cb = cbWrite;
319 PGMPAGEMAPLOCK PgMpLck;
320 void *pvDst;
321 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, pRam->GCPhys + off, &pvDst, &PgMpLck);
322 if (RT_SUCCESS(rc))
323 {
324 memcpy(pvDst, pvBuf, cb);
325 pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck);
326 }
327 else
328 AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternal failed on %RGp / %R[pgmpage] -> %Rrc\n",
329 pRam->GCPhys + off, pPage, rc));
330
331 /* next page */
332 if (cb >= cbWrite)
333 {
334 PGM_UNLOCK(pVM);
335 return VINF_SUCCESS;
336 }
337
338 cbWrite -= cb;
339 off += cb;
340 GCPhys += cb;
341 pvBuf = (const char *)pvBuf + cb;
342 } /* walk pages in ram range */
343 }
344 else
345 {
346 /*
347 * Unassigned address space, skip it.
348 */
349 if (!pRam)
350 break;
351 size_t cb = pRam->GCPhys - GCPhys;
352 if (cb >= cbWrite)
353 break;
354 cbWrite -= cb;
355 pvBuf = (const char *)pvBuf + cb;
356 GCPhys += cb;
357 }
358 } /* Ram range walk */
359
360 PGM_UNLOCK(pVM);
361 return VINF_SUCCESS;
362}
363
364
365/*********************************************************************************************************************************
366* Mapping Guest Physical Memory *
367*********************************************************************************************************************************/
368
369/**
370 * VMR3ReqCall worker for PGMR3PhysGCPhys2CCPtrExternal to make pages writable.
371 *
372 * @returns see PGMR3PhysGCPhys2CCPtrExternal
373 * @param pVM The cross context VM structure.
374 * @param pGCPhys Pointer to the guest physical address.
375 * @param ppv Where to store the mapping address.
376 * @param pLock Where to store the lock.
377 */
378static DECLCALLBACK(int) pgmR3PhysGCPhys2CCPtrDelegated(PVM pVM, PRTGCPHYS pGCPhys, void **ppv, PPGMPAGEMAPLOCK pLock)
379{
380 /*
381 * Just hand it to PGMPhysGCPhys2CCPtr and check that it's not a page with
382 * an access handler after it succeeds.
383 */
384 int rc = PGM_LOCK(pVM);
385 AssertRCReturn(rc, rc);
386
387 rc = PGMPhysGCPhys2CCPtr(pVM, *pGCPhys, ppv, pLock);
388 if (RT_SUCCESS(rc))
389 {
390 PPGMPAGEMAPTLBE pTlbe;
391 int rc2 = pgmPhysPageQueryTlbe(pVM, *pGCPhys, &pTlbe);
392 AssertFatalRC(rc2);
393 PPGMPAGE pPage = pTlbe->pPage;
394 if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage))
395 {
396 PGMPhysReleasePageMappingLock(pVM, pLock);
397 rc = VERR_PGM_PHYS_PAGE_RESERVED;
398 }
399 else if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
400#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
401 || pgmPoolIsDirtyPage(pVM, *pGCPhys)
402#endif
403 )
404 {
405 /* We *must* flush any corresponding pgm pool page here, otherwise we'll
406 * not be informed about writes and keep bogus gst->shw mappings around.
407 */
408#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
409 pgmPoolFlushPageByGCPhys(pVM, *pGCPhys);
410#endif
411 Assert(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage));
412 /** @todo r=bird: return VERR_PGM_PHYS_PAGE_RESERVED here if it still has
413 * active handlers, see the PGMR3PhysGCPhys2CCPtrExternal docs. */
414 }
415 }
416
417 PGM_UNLOCK(pVM);
418 return rc;
419}
420
421
422/**
423 * Requests the mapping of a guest page into ring-3, external threads.
424 *
425 * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to
426 * release it.
427 *
428 * This API will assume your intention is to write to the page, and will
429 * therefore replace shared and zero pages. If you do not intend to modify the
430 * page, use the PGMR3PhysGCPhys2CCPtrReadOnlyExternal() API.
431 *
432 * @returns VBox status code.
433 * @retval VINF_SUCCESS on success.
434 * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical
435 * backing or if the page has any active access handlers. The caller
436 * must fall back on using PGMR3PhysWriteExternal.
437 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address.
438 *
439 * @param pVM The cross context VM structure.
440 * @param GCPhys The guest physical address of the page that should be mapped.
441 * @param ppv Where to store the address corresponding to GCPhys.
442 * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs.
443 *
444 * @remark Avoid calling this API from within critical sections (other than the
445 * PGM one) because of the deadlock risk when we have to delegating the
446 * task to an EMT.
447 * @thread Any.
448 */
449VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrExternal(PVM pVM, RTGCPHYS GCPhys, void **ppv, PPGMPAGEMAPLOCK pLock)
450{
451 AssertPtr(ppv);
452 AssertPtr(pLock);
453
454 Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM));
455
456 int rc = PGM_LOCK(pVM);
457 AssertRCReturn(rc, rc);
458
459 /*
460 * Query the Physical TLB entry for the page (may fail).
461 */
462 PPGMPAGEMAPTLBE pTlbe;
463 rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
464 if (RT_SUCCESS(rc))
465 {
466 PPGMPAGE pPage = pTlbe->pPage;
467 if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage))
468 rc = VERR_PGM_PHYS_PAGE_RESERVED;
469 else
470 {
471 /*
472 * If the page is shared, the zero page, or being write monitored
473 * it must be converted to an page that's writable if possible.
474 * We can only deal with write monitored pages here, the rest have
475 * to be on an EMT.
476 */
477 if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
478 || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
479#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
480 || pgmPoolIsDirtyPage(pVM, GCPhys)
481#endif
482 )
483 {
484 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
485 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
486#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
487 && !pgmPoolIsDirtyPage(pVM, GCPhys) /** @todo we're very likely doing this twice. */
488#endif
489 )
490 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage, GCPhys);
491 else
492 {
493 PGM_UNLOCK(pVM);
494
495 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysGCPhys2CCPtrDelegated, 4,
496 pVM, &GCPhys, ppv, pLock);
497 }
498 }
499
500 /*
501 * Now, just perform the locking and calculate the return address.
502 */
503 PPGMPAGEMAP pMap = pTlbe->pMap;
504 if (pMap)
505 pMap->cRefs++;
506
507 unsigned cLocks = PGM_PAGE_GET_WRITE_LOCKS(pPage);
508 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
509 {
510 if (cLocks == 0)
511 pVM->pgm.s.cWriteLockedPages++;
512 PGM_PAGE_INC_WRITE_LOCKS(pPage);
513 }
514 else if (cLocks != PGM_PAGE_GET_WRITE_LOCKS(pPage))
515 {
516 PGM_PAGE_INC_WRITE_LOCKS(pPage);
517 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent write locked state!\n", GCPhys, pPage));
518 if (pMap)
519 pMap->cRefs++; /* Extra ref to prevent it from going away. */
520 }
521
522 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & GUEST_PAGE_OFFSET_MASK));
523 pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_WRITE;
524 pLock->pvMap = pMap;
525 }
526 }
527
528 PGM_UNLOCK(pVM);
529 return rc;
530}
531
532
533/**
534 * Requests the mapping of a guest page into ring-3, external threads.
535 *
536 * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to
537 * release it.
538 *
539 * @returns VBox status code.
540 * @retval VINF_SUCCESS on success.
541 * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical
542 * backing or if the page as an active ALL access handler. The caller
543 * must fall back on using PGMPhysRead.
544 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address.
545 *
546 * @param pVM The cross context VM structure.
547 * @param GCPhys The guest physical address of the page that should be mapped.
548 * @param ppv Where to store the address corresponding to GCPhys.
549 * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs.
550 *
551 * @remark Avoid calling this API from within critical sections (other than
552 * the PGM one) because of the deadlock risk.
553 * @thread Any.
554 */
555VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrReadOnlyExternal(PVM pVM, RTGCPHYS GCPhys, void const **ppv, PPGMPAGEMAPLOCK pLock)
556{
557 int rc = PGM_LOCK(pVM);
558 AssertRCReturn(rc, rc);
559
560 /*
561 * Query the Physical TLB entry for the page (may fail).
562 */
563 PPGMPAGEMAPTLBE pTlbe;
564 rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
565 if (RT_SUCCESS(rc))
566 {
567 PPGMPAGE pPage = pTlbe->pPage;
568#if 1
569 /* MMIO pages doesn't have any readable backing. */
570 if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage))
571 rc = VERR_PGM_PHYS_PAGE_RESERVED;
572#else
573 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
574 rc = VERR_PGM_PHYS_PAGE_RESERVED;
575#endif
576 else
577 {
578 /*
579 * Now, just perform the locking and calculate the return address.
580 */
581 PPGMPAGEMAP pMap = pTlbe->pMap;
582 if (pMap)
583 pMap->cRefs++;
584
585 unsigned cLocks = PGM_PAGE_GET_READ_LOCKS(pPage);
586 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
587 {
588 if (cLocks == 0)
589 pVM->pgm.s.cReadLockedPages++;
590 PGM_PAGE_INC_READ_LOCKS(pPage);
591 }
592 else if (cLocks != PGM_PAGE_GET_READ_LOCKS(pPage))
593 {
594 PGM_PAGE_INC_READ_LOCKS(pPage);
595 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent readonly locked state!\n", GCPhys, pPage));
596 if (pMap)
597 pMap->cRefs++; /* Extra ref to prevent it from going away. */
598 }
599
600 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & GUEST_PAGE_OFFSET_MASK));
601 pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_READ;
602 pLock->pvMap = pMap;
603 }
604 }
605
606 PGM_UNLOCK(pVM);
607 return rc;
608}
609
610
611/**
612 * Requests the mapping of multiple guest page into ring-3, external threads.
613 *
614 * When you're done with the pages, call PGMPhysBulkReleasePageMappingLock()
615 * ASAP to release them.
616 *
617 * This API will assume your intention is to write to the pages, and will
618 * therefore replace shared and zero pages. If you do not intend to modify the
619 * pages, use the PGMR3PhysBulkGCPhys2CCPtrReadOnlyExternal() API.
620 *
621 * @returns VBox status code.
622 * @retval VINF_SUCCESS on success.
623 * @retval VERR_PGM_PHYS_PAGE_RESERVED if any of the pages has no physical
624 * backing or if any of the pages the page has any active access
625 * handlers. The caller must fall back on using PGMR3PhysWriteExternal.
626 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if @a paGCPhysPages contains
627 * an invalid physical address.
628 *
629 * @param pVM The cross context VM structure.
630 * @param cPages Number of pages to lock.
631 * @param paGCPhysPages The guest physical address of the pages that
632 * should be mapped (@a cPages entries).
633 * @param papvPages Where to store the ring-3 mapping addresses
634 * corresponding to @a paGCPhysPages.
635 * @param paLocks Where to store the locking information that
636 * pfnPhysBulkReleasePageMappingLock needs (@a cPages
637 * in length).
638 *
639 * @remark Avoid calling this API from within critical sections (other than the
640 * PGM one) because of the deadlock risk when we have to delegating the
641 * task to an EMT.
642 * @thread Any.
643 */
644VMMR3DECL(int) PGMR3PhysBulkGCPhys2CCPtrExternal(PVM pVM, uint32_t cPages, PCRTGCPHYS paGCPhysPages,
645 void **papvPages, PPGMPAGEMAPLOCK paLocks)
646{
647 Assert(cPages > 0);
648 AssertPtr(papvPages);
649 AssertPtr(paLocks);
650
651 Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM));
652
653 int rc = PGM_LOCK(pVM);
654 AssertRCReturn(rc, rc);
655
656 /*
657 * Lock the pages one by one.
658 * The loop body is similar to PGMR3PhysGCPhys2CCPtrExternal.
659 */
660 int32_t cNextYield = 128;
661 uint32_t iPage;
662 for (iPage = 0; iPage < cPages; iPage++)
663 {
664 if (--cNextYield > 0)
665 { /* likely */ }
666 else
667 {
668 PGM_UNLOCK(pVM);
669 ASMNopPause();
670 PGM_LOCK_VOID(pVM);
671 cNextYield = 128;
672 }
673
674 /*
675 * Query the Physical TLB entry for the page (may fail).
676 */
677 PPGMPAGEMAPTLBE pTlbe;
678 rc = pgmPhysPageQueryTlbe(pVM, paGCPhysPages[iPage], &pTlbe);
679 if (RT_SUCCESS(rc))
680 { }
681 else
682 break;
683 PPGMPAGE pPage = pTlbe->pPage;
684
685 /*
686 * No MMIO or active access handlers.
687 */
688 if ( !PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage)
689 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
690 { }
691 else
692 {
693 rc = VERR_PGM_PHYS_PAGE_RESERVED;
694 break;
695 }
696
697 /*
698 * The page must be in the allocated state and not be a dirty pool page.
699 * We can handle converting a write monitored page to an allocated one, but
700 * anything more complicated must be delegated to an EMT.
701 */
702 bool fDelegateToEmt = false;
703 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
704#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
705 fDelegateToEmt = pgmPoolIsDirtyPage(pVM, paGCPhysPages[iPage]);
706#else
707 fDelegateToEmt = false;
708#endif
709 else if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED)
710 {
711#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
712 if (!pgmPoolIsDirtyPage(pVM, paGCPhysPages[iPage]))
713 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage, paGCPhysPages[iPage]);
714 else
715 fDelegateToEmt = true;
716#endif
717 }
718 else
719 fDelegateToEmt = true;
720 if (!fDelegateToEmt)
721 { }
722 else
723 {
724 /* We could do this delegation in bulk, but considered too much work vs gain. */
725 PGM_UNLOCK(pVM);
726 rc = VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysGCPhys2CCPtrDelegated, 4,
727 pVM, &paGCPhysPages[iPage], &papvPages[iPage], &paLocks[iPage]);
728 PGM_LOCK_VOID(pVM);
729 if (RT_FAILURE(rc))
730 break;
731 cNextYield = 128;
732 }
733
734 /*
735 * Now, just perform the locking and address calculation.
736 */
737 PPGMPAGEMAP pMap = pTlbe->pMap;
738 if (pMap)
739 pMap->cRefs++;
740
741 unsigned cLocks = PGM_PAGE_GET_WRITE_LOCKS(pPage);
742 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
743 {
744 if (cLocks == 0)
745 pVM->pgm.s.cWriteLockedPages++;
746 PGM_PAGE_INC_WRITE_LOCKS(pPage);
747 }
748 else if (cLocks != PGM_PAGE_GET_WRITE_LOCKS(pPage))
749 {
750 PGM_PAGE_INC_WRITE_LOCKS(pPage);
751 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent write locked state!\n", paGCPhysPages[iPage], pPage));
752 if (pMap)
753 pMap->cRefs++; /* Extra ref to prevent it from going away. */
754 }
755
756 papvPages[iPage] = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(paGCPhysPages[iPage] & GUEST_PAGE_OFFSET_MASK));
757 paLocks[iPage].uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_WRITE;
758 paLocks[iPage].pvMap = pMap;
759 }
760
761 PGM_UNLOCK(pVM);
762
763 /*
764 * On failure we must unlock any pages we managed to get already.
765 */
766 if (RT_FAILURE(rc) && iPage > 0)
767 PGMPhysBulkReleasePageMappingLocks(pVM, iPage, paLocks);
768
769 return rc;
770}
771
772
773/**
774 * Requests the mapping of multiple guest page into ring-3, for reading only,
775 * external threads.
776 *
777 * When you're done with the pages, call PGMPhysReleasePageMappingLock() ASAP
778 * to release them.
779 *
780 * @returns VBox status code.
781 * @retval VINF_SUCCESS on success.
782 * @retval VERR_PGM_PHYS_PAGE_RESERVED if any of the pages has no physical
783 * backing or if any of the pages the page has an active ALL access
784 * handler. The caller must fall back on using PGMR3PhysWriteExternal.
785 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if @a paGCPhysPages contains
786 * an invalid physical address.
787 *
788 * @param pVM The cross context VM structure.
789 * @param cPages Number of pages to lock.
790 * @param paGCPhysPages The guest physical address of the pages that
791 * should be mapped (@a cPages entries).
792 * @param papvPages Where to store the ring-3 mapping addresses
793 * corresponding to @a paGCPhysPages.
794 * @param paLocks Where to store the lock information that
795 * pfnPhysReleasePageMappingLock needs (@a cPages
796 * in length).
797 *
798 * @remark Avoid calling this API from within critical sections (other than
799 * the PGM one) because of the deadlock risk.
800 * @thread Any.
801 */
802VMMR3DECL(int) PGMR3PhysBulkGCPhys2CCPtrReadOnlyExternal(PVM pVM, uint32_t cPages, PCRTGCPHYS paGCPhysPages,
803 void const **papvPages, PPGMPAGEMAPLOCK paLocks)
804{
805 Assert(cPages > 0);
806 AssertPtr(papvPages);
807 AssertPtr(paLocks);
808
809 Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM));
810
811 int rc = PGM_LOCK(pVM);
812 AssertRCReturn(rc, rc);
813
814 /*
815 * Lock the pages one by one.
816 * The loop body is similar to PGMR3PhysGCPhys2CCPtrReadOnlyExternal.
817 */
818 int32_t cNextYield = 256;
819 uint32_t iPage;
820 for (iPage = 0; iPage < cPages; iPage++)
821 {
822 if (--cNextYield > 0)
823 { /* likely */ }
824 else
825 {
826 PGM_UNLOCK(pVM);
827 ASMNopPause();
828 PGM_LOCK_VOID(pVM);
829 cNextYield = 256;
830 }
831
832 /*
833 * Query the Physical TLB entry for the page (may fail).
834 */
835 PPGMPAGEMAPTLBE pTlbe;
836 rc = pgmPhysPageQueryTlbe(pVM, paGCPhysPages[iPage], &pTlbe);
837 if (RT_SUCCESS(rc))
838 { }
839 else
840 break;
841 PPGMPAGE pPage = pTlbe->pPage;
842
843 /*
844 * No MMIO or active all access handlers, everything else can be accessed.
845 */
846 if ( !PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage)
847 && !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
848 { }
849 else
850 {
851 rc = VERR_PGM_PHYS_PAGE_RESERVED;
852 break;
853 }
854
855 /*
856 * Now, just perform the locking and address calculation.
857 */
858 PPGMPAGEMAP pMap = pTlbe->pMap;
859 if (pMap)
860 pMap->cRefs++;
861
862 unsigned cLocks = PGM_PAGE_GET_READ_LOCKS(pPage);
863 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
864 {
865 if (cLocks == 0)
866 pVM->pgm.s.cReadLockedPages++;
867 PGM_PAGE_INC_READ_LOCKS(pPage);
868 }
869 else if (cLocks != PGM_PAGE_GET_READ_LOCKS(pPage))
870 {
871 PGM_PAGE_INC_READ_LOCKS(pPage);
872 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent readonly locked state!\n", paGCPhysPages[iPage], pPage));
873 if (pMap)
874 pMap->cRefs++; /* Extra ref to prevent it from going away. */
875 }
876
877 papvPages[iPage] = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(paGCPhysPages[iPage] & GUEST_PAGE_OFFSET_MASK));
878 paLocks[iPage].uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_READ;
879 paLocks[iPage].pvMap = pMap;
880 }
881
882 PGM_UNLOCK(pVM);
883
884 /*
885 * On failure we must unlock any pages we managed to get already.
886 */
887 if (RT_FAILURE(rc) && iPage > 0)
888 PGMPhysBulkReleasePageMappingLocks(pVM, iPage, paLocks);
889
890 return rc;
891}
892
893
894/**
895 * Converts a GC physical address to a HC ring-3 pointer, with some
896 * additional checks.
897 *
898 * @returns VBox status code.
899 * @retval VINF_SUCCESS on success.
900 * @retval VINF_PGM_PHYS_TLB_CATCH_WRITE and *ppv set if the page has a write
901 * access handler of some kind.
902 * @retval VERR_PGM_PHYS_TLB_CATCH_ALL if the page has a handler catching all
903 * accesses or is odd in any way.
904 * @retval VERR_PGM_PHYS_TLB_UNASSIGNED if the page doesn't exist.
905 *
906 * @param pVM The cross context VM structure.
907 * @param GCPhys The GC physical address to convert. Since this is only
908 * used for filling the REM TLB, the A20 mask must be
909 * applied before calling this API.
910 * @param fWritable Whether write access is required.
911 * @param ppv Where to store the pointer corresponding to GCPhys on
912 * success.
913 */
914VMMR3DECL(int) PGMR3PhysTlbGCPhys2Ptr(PVM pVM, RTGCPHYS GCPhys, bool fWritable, void **ppv)
915{
916 PGM_LOCK_VOID(pVM);
917 PGM_A20_ASSERT_MASKED(VMMGetCpu(pVM), GCPhys);
918
919 PPGMRAMRANGE pRam;
920 PPGMPAGE pPage;
921 int rc = pgmPhysGetPageAndRangeEx(pVM, GCPhys, &pPage, &pRam);
922 if (RT_SUCCESS(rc))
923 {
924 if (PGM_PAGE_IS_BALLOONED(pPage))
925 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
926 else if (!PGM_PAGE_HAS_ANY_HANDLERS(pPage))
927 rc = VINF_SUCCESS;
928 else
929 {
930 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)) /* catches MMIO */
931 rc = VERR_PGM_PHYS_TLB_CATCH_ALL;
932 else if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
933 {
934 /** @todo Handle TLB loads of virtual handlers so ./test.sh can be made to work
935 * in -norawr0 mode. */
936 if (fWritable)
937 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
938 }
939 else
940 {
941 /* Temporarily disabled physical handler(s), since the recompiler
942 doesn't get notified when it's reset we'll have to pretend it's
943 operating normally. */
944 if (pgmHandlerPhysicalIsAll(pVM, GCPhys))
945 rc = VERR_PGM_PHYS_TLB_CATCH_ALL;
946 else
947 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
948 }
949 }
950 if (RT_SUCCESS(rc))
951 {
952 int rc2;
953
954 /* Make sure what we return is writable. */
955 if (fWritable)
956 switch (PGM_PAGE_GET_STATE(pPage))
957 {
958 case PGM_PAGE_STATE_ALLOCATED:
959 break;
960 case PGM_PAGE_STATE_BALLOONED:
961 AssertFailed();
962 break;
963 case PGM_PAGE_STATE_ZERO:
964 case PGM_PAGE_STATE_SHARED:
965 if (rc == VINF_PGM_PHYS_TLB_CATCH_WRITE)
966 break;
967 RT_FALL_THRU();
968 case PGM_PAGE_STATE_WRITE_MONITORED:
969 rc2 = pgmPhysPageMakeWritable(pVM, pPage, GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
970 AssertLogRelRCReturn(rc2, rc2);
971 break;
972 }
973
974 /* Get a ring-3 mapping of the address. */
975 PPGMPAGER3MAPTLBE pTlbe;
976 rc2 = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
977 AssertLogRelRCReturn(rc2, rc2);
978 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & GUEST_PAGE_OFFSET_MASK));
979 /** @todo mapping/locking hell; this isn't horribly efficient since
980 * pgmPhysPageLoadIntoTlb will repeat the lookup we've done here. */
981
982 Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage] *ppv=%p\n", GCPhys, rc, pPage, *ppv));
983 }
984 else
985 Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage]\n", GCPhys, rc, pPage));
986
987 /* else: handler catching all access, no pointer returned. */
988 }
989 else
990 rc = VERR_PGM_PHYS_TLB_UNASSIGNED;
991
992 PGM_UNLOCK(pVM);
993 return rc;
994}
995
996
997
998/*********************************************************************************************************************************
999* RAM Range Management *
1000*********************************************************************************************************************************/
1001
1002/**
1003 * Given the range @a GCPhys thru @a GCPhysLast, find overlapping RAM range or
1004 * the correct insertion point.
1005 *
1006 * @returns Pointer to overlapping RAM range if found, NULL if not.
1007 * @param pVM The cross context VM structure.
1008 * @param GCPhys The address of the first byte in the range.
1009 * @param GCPhysLast The address of the last byte in the range.
1010 * @param pidxInsert Where to return the lookup table index to insert the
1011 * range at when returning NULL. Set to UINT32_MAX when
1012 * returning the pointer to an overlapping range.
1013 * @note Caller must own the PGM lock.
1014 */
1015static PPGMRAMRANGE pgmR3PhysRamRangeFindOverlapping(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast, uint32_t *pidxInsert)
1016{
1017 PGM_LOCK_ASSERT_OWNER(pVM);
1018 uint32_t iStart = 0;
1019 uint32_t iEnd = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1020 for (;;)
1021 {
1022 uint32_t idxLookup = iStart + (iEnd - iStart) / 2;
1023 RTGCPHYS const GCPhysEntryFirst = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1024 if (GCPhysLast < GCPhysEntryFirst)
1025 {
1026 if (idxLookup > iStart)
1027 iEnd = idxLookup;
1028 else
1029 {
1030 *pidxInsert = idxLookup;
1031 return NULL;
1032 }
1033 }
1034 else
1035 {
1036 RTGCPHYS const GCPhysEntryLast = pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast;
1037 if (GCPhys > GCPhysEntryLast)
1038 {
1039 idxLookup += 1;
1040 if (idxLookup < iEnd)
1041 iStart = idxLookup;
1042 else
1043 {
1044 *pidxInsert = idxLookup;
1045 return NULL;
1046 }
1047 }
1048 else
1049 {
1050 /* overlap */
1051 Assert(GCPhysEntryLast > GCPhys && GCPhysEntryFirst < GCPhysLast);
1052 *pidxInsert = UINT32_MAX;
1053 return pVM->pgm.s.apRamRanges[PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup])];
1054 }
1055 }
1056 }
1057}
1058
1059
1060/**
1061 * Given the range @a GCPhys thru @a GCPhysLast, find the lookup table entry
1062 * that's overlapping it.
1063 *
1064 * @returns The lookup table index of the overlapping entry, UINT32_MAX if not
1065 * found.
1066 * @param pVM The cross context VM structure.
1067 * @param GCPhys The address of the first byte in the range.
1068 * @param GCPhysLast The address of the last byte in the range.
1069 * @note Caller must own the PGM lock.
1070 */
1071static uint32_t pgmR3PhysRamRangeFindOverlappingIndex(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast)
1072{
1073 PGM_LOCK_ASSERT_OWNER(pVM);
1074 uint32_t iStart = 0;
1075 uint32_t iEnd = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1076 for (;;)
1077 {
1078 uint32_t idxLookup = iStart + (iEnd - iStart) / 2;
1079 RTGCPHYS const GCPhysEntryFirst = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1080 if (GCPhysLast < GCPhysEntryFirst)
1081 {
1082 if (idxLookup > iStart)
1083 iEnd = idxLookup;
1084 else
1085 return UINT32_MAX;
1086 }
1087 else
1088 {
1089 RTGCPHYS const GCPhysEntryLast = pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast;
1090 if (GCPhys > GCPhysEntryLast)
1091 {
1092 idxLookup += 1;
1093 if (idxLookup < iEnd)
1094 iStart = idxLookup;
1095 else
1096 return UINT32_MAX;
1097 }
1098 else
1099 {
1100 /* overlap */
1101 Assert(GCPhysEntryLast > GCPhys && GCPhysEntryFirst < GCPhysLast);
1102 return idxLookup;
1103 }
1104 }
1105 }
1106}
1107
1108
1109/**
1110 * Insert @a pRam into the lookup table.
1111 *
1112 * @returns VBox status code.
1113 * @param pVM The cross context VM structure.
1114 * @param pRam The RAM range to insert into the lookup table.
1115 * @param GCPhys The new mapping address to assign @a pRam on insertion.
1116 * @param pidxLookup Optional lookup table hint. This is updated.
1117 * @note Caller must own PGM lock.
1118 */
1119static int pgmR3PhysRamRangeInsertLookup(PVM pVM, PPGMRAMRANGE pRam, RTGCPHYS GCPhys, uint32_t *pidxLookup)
1120{
1121 PGM_LOCK_ASSERT_OWNER(pVM);
1122#ifdef DEBUG_bird
1123 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, true /*fRamRelaxed*/);
1124#endif
1125 AssertMsg(pRam->pszDesc, ("%RGp-%RGp\n", pRam->GCPhys, pRam->GCPhysLast));
1126 AssertLogRelMsgReturn( pRam->GCPhys == NIL_RTGCPHYS
1127 && pRam->GCPhysLast == NIL_RTGCPHYS,
1128 ("GCPhys=%RGp; range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n",
1129 GCPhys, pRam->GCPhys, pRam->cb, pRam->GCPhysLast, pRam->pszDesc),
1130 VERR_ALREADY_EXISTS);
1131 uint32_t const idRamRange = pRam->idRange;
1132 AssertReturn(pVM->pgm.s.apRamRanges[idRamRange] == pRam, VERR_INTERNAL_ERROR_2);
1133
1134 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
1135 RTGCPHYS const GCPhysLast = GCPhys + pRam->cb - 1U;
1136 AssertReturn(GCPhysLast > GCPhys, VERR_INTERNAL_ERROR_4);
1137 LogFlowFunc(("GCPhys=%RGp LB %RGp GCPhysLast=%RGp id=%#x %s\n", GCPhys, pRam->cb, GCPhysLast, idRamRange, pRam->pszDesc));
1138
1139 /*
1140 * Find the lookup table location if necessary.
1141 */
1142 uint32_t const cLookupEntries = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1143 AssertLogRelMsgReturn(cLookupEntries + 1 < RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup), /* id=0 is unused, so < is correct. */
1144 ("%#x\n", cLookupEntries), VERR_INTERNAL_ERROR_3);
1145
1146 uint32_t idxLookup = pidxLookup ? *pidxLookup : UINT32_MAX;
1147 if (cLookupEntries == 0)
1148 idxLookup = 0; /* special case: empty table */
1149 else
1150 {
1151 if ( idxLookup > cLookupEntries
1152 || ( idxLookup != 0
1153 && pVM->pgm.s.aRamRangeLookup[idxLookup - 1].GCPhysLast >= GCPhys)
1154 || ( idxLookup < cLookupEntries
1155 && PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]) < GCPhysLast))
1156 {
1157 PPGMRAMRANGE pOverlapping = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxLookup);
1158 AssertLogRelMsgReturn(!pOverlapping,
1159 ("GCPhys=%RGp; GCPhysLast=%RGp %s - overlaps %RGp...%RGp %s\n",
1160 GCPhys, GCPhysLast, pRam->pszDesc,
1161 pOverlapping->GCPhys, pOverlapping->GCPhysLast, pOverlapping->pszDesc),
1162 VERR_PGM_RAM_CONFLICT);
1163 AssertLogRelMsgReturn(idxLookup <= cLookupEntries, ("%#x vs %#x\n", idxLookup, cLookupEntries), VERR_INTERNAL_ERROR_5);
1164 }
1165 /* else we've got a good hint. */
1166 }
1167
1168 /*
1169 * Do the actual job.
1170 *
1171 * The moving of existing table entries is done in a way that allows other
1172 * EMTs to perform concurrent lookups with the updating.
1173 */
1174 bool const fUseAtomic = pVM->enmVMState != VMSTATE_CREATING
1175 && pVM->cCpus > 1
1176#ifdef RT_ARCH_AMD64
1177 && g_CpumHostFeatures.s.fCmpXchg16b
1178#endif
1179 ;
1180
1181 /* Signal that we're modifying the lookup table: */
1182 uint32_t const idGeneration = (pVM->pgm.s.RamRangeUnion.idGeneration + 1) | 1; /* paranoia^3 */
1183 ASMAtomicWriteU32(&pVM->pgm.s.RamRangeUnion.idGeneration, idGeneration);
1184
1185 /* Update the RAM range entry. */
1186 pRam->GCPhys = GCPhys;
1187 pRam->GCPhysLast = GCPhysLast;
1188
1189 /* Do we need to shift any lookup table entries? */
1190 if (idxLookup != cLookupEntries)
1191 {
1192 /* We do. Make a copy of the final entry first. */
1193 uint32_t cToMove = cLookupEntries - idxLookup;
1194 PGMRAMRANGELOOKUPENTRY *pCur = &pVM->pgm.s.aRamRangeLookup[cLookupEntries];
1195 pCur->GCPhysFirstAndId = pCur[-1].GCPhysFirstAndId;
1196 pCur->GCPhysLast = pCur[-1].GCPhysLast;
1197
1198 /* Then increase the table size. This will ensure that anyone starting
1199 a search from here on should have consistent data. */
1200 ASMAtomicWriteU32(&pVM->pgm.s.RamRangeUnion.cLookupEntries, cLookupEntries + 1);
1201
1202 /* Transfer the rest of the entries. */
1203 cToMove -= 1;
1204 if (cToMove > 0)
1205 {
1206 if (!fUseAtomic)
1207 do
1208 {
1209 pCur -= 1;
1210 pCur->GCPhysFirstAndId = pCur[-1].GCPhysFirstAndId;
1211 pCur->GCPhysLast = pCur[-1].GCPhysLast;
1212 cToMove -= 1;
1213 } while (cToMove > 0);
1214 else
1215 {
1216#if RTASM_HAVE_WRITE_U128 >= 2
1217 do
1218 {
1219 pCur -= 1;
1220 ASMAtomicWriteU128U(&pCur->u128Volatile, pCur[-1].u128Normal);
1221 cToMove -= 1;
1222 } while (cToMove > 0);
1223
1224#else
1225 uint64_t u64PrevLo = pCur[-1].u128Normal.s.Lo;
1226 uint64_t u64PrevHi = pCur[-1].u128Normal.s.Hi;
1227 do
1228 {
1229 pCur -= 1;
1230 uint64_t const u64CurLo = pCur[-1].u128Normal.s.Lo;
1231 uint64_t const u64CurHi = pCur[-1].u128Normal.s.Hi;
1232 uint128_t uOldIgn;
1233 AssertStmt(ASMAtomicCmpXchgU128v2(&pCur->u128Volatile.u, u64CurHi, u64CurLo, u64PrevHi, u64PrevLo, &uOldIgn),
1234 (pCur->u128Volatile.s.Lo = u64CurLo, pCur->u128Volatile.s.Hi = u64CurHi));
1235 u64PrevLo = u64CurLo;
1236 u64PrevHi = u64CurHi;
1237 cToMove -= 1;
1238 } while (cToMove > 0);
1239#endif
1240 }
1241 }
1242 }
1243
1244 /*
1245 * Write the new entry.
1246 */
1247 PGMRAMRANGELOOKUPENTRY *pInsert = &pVM->pgm.s.aRamRangeLookup[idxLookup];
1248 if (!fUseAtomic)
1249 {
1250 pInsert->GCPhysFirstAndId = idRamRange | GCPhys;
1251 pInsert->GCPhysLast = GCPhysLast;
1252 }
1253 else
1254 {
1255 PGMRAMRANGELOOKUPENTRY NewEntry;
1256 NewEntry.GCPhysFirstAndId = idRamRange | GCPhys;
1257 NewEntry.GCPhysLast = GCPhysLast;
1258 ASMAtomicWriteU128v2(&pInsert->u128Volatile.u, NewEntry.u128Normal.s.Hi, NewEntry.u128Normal.s.Lo);
1259 }
1260
1261 /*
1262 * Update the generation and count in one go, signaling the end of the updating.
1263 */
1264 PGM::PGMRAMRANGEGENANDLOOKUPCOUNT GenAndCount;
1265 GenAndCount.cLookupEntries = cLookupEntries + 1;
1266 GenAndCount.idGeneration = idGeneration + 1;
1267 ASMAtomicWriteU64(&pVM->pgm.s.RamRangeUnion.u64Combined, GenAndCount.u64Combined);
1268
1269 if (pidxLookup)
1270 *pidxLookup = idxLookup + 1;
1271
1272#ifdef DEBUG_bird
1273 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
1274#endif
1275 return VINF_SUCCESS;
1276}
1277
1278
1279/**
1280 * Removes @a pRam from the lookup table.
1281 *
1282 * @returns VBox status code.
1283 * @param pVM The cross context VM structure.
1284 * @param pRam The RAM range to insert into the lookup table.
1285 * @param pidxLookup Optional lookup table hint. This is updated.
1286 * @note Caller must own PGM lock.
1287 */
1288static int pgmR3PhysRamRangeRemoveLookup(PVM pVM, PPGMRAMRANGE pRam, uint32_t *pidxLookup)
1289{
1290 PGM_LOCK_ASSERT_OWNER(pVM);
1291 AssertMsg(pRam->pszDesc, ("%RGp-%RGp\n", pRam->GCPhys, pRam->GCPhysLast));
1292
1293 RTGCPHYS const GCPhys = pRam->GCPhys;
1294 RTGCPHYS const GCPhysLast = pRam->GCPhysLast;
1295 AssertLogRelMsgReturn( GCPhys != NIL_RTGCPHYS
1296 || GCPhysLast != NIL_RTGCPHYS,
1297 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n", GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1298 VERR_NOT_FOUND);
1299 AssertLogRelMsgReturn( GCPhys != NIL_RTGCPHYS
1300 && GCPhysLast == GCPhys + pRam->cb - 1U
1301 && (GCPhys & GUEST_PAGE_OFFSET_MASK) == 0
1302 && (GCPhysLast & GUEST_PAGE_OFFSET_MASK) == GUEST_PAGE_OFFSET_MASK
1303 && GCPhysLast > GCPhys,
1304 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n", GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1305 VERR_INTERNAL_ERROR_5);
1306 uint32_t const idRamRange = pRam->idRange;
1307 AssertReturn(pVM->pgm.s.apRamRanges[idRamRange] == pRam, VERR_INTERNAL_ERROR_4);
1308 LogFlowFunc(("GCPhys=%RGp LB %RGp GCPhysLast=%RGp id=%#x %s\n", GCPhys, pRam->cb, GCPhysLast, idRamRange, pRam->pszDesc));
1309
1310 /*
1311 * Find the lookup table location.
1312 */
1313 uint32_t const cLookupEntries = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1314 AssertLogRelMsgReturn( cLookupEntries > 0
1315 && cLookupEntries < RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup), /* id=0 is unused, so < is correct. */
1316 ("%#x\n", cLookupEntries), VERR_INTERNAL_ERROR_3);
1317
1318 uint32_t idxLookup = pidxLookup ? *pidxLookup : UINT32_MAX;
1319 if ( idxLookup >= cLookupEntries
1320 || pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast != GCPhysLast
1321 || pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysFirstAndId != (GCPhys | idRamRange))
1322 {
1323 uint32_t iStart = 0;
1324 uint32_t iEnd = cLookupEntries;
1325 for (;;)
1326 {
1327 idxLookup = iStart + (iEnd - iStart) / 2;
1328 RTGCPHYS const GCPhysEntryFirst = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1329 if (GCPhysLast < GCPhysEntryFirst)
1330 {
1331 AssertLogRelMsgReturn(idxLookup > iStart,
1332 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n",
1333 GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1334 VERR_NOT_FOUND);
1335 iEnd = idxLookup;
1336 }
1337 else
1338 {
1339 RTGCPHYS const GCPhysEntryLast = pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast;
1340 if (GCPhys > GCPhysEntryLast)
1341 {
1342 idxLookup += 1;
1343 AssertLogRelMsgReturn(idxLookup < iEnd,
1344 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n",
1345 GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1346 VERR_NOT_FOUND);
1347 iStart = idxLookup;
1348 }
1349 else
1350 {
1351 uint32_t const idEntry = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1352 AssertLogRelMsgReturn( GCPhysEntryFirst == GCPhys
1353 && GCPhysEntryLast == GCPhysLast
1354 && idEntry == idRamRange,
1355 ("Found: %RGp..%RGp id=%#x; Wanted: GCPhys=%RGp LB %RGp GCPhysLast=%RGp id=%#x %s\n",
1356 GCPhysEntryFirst, GCPhysEntryLast, idEntry,
1357 GCPhys, pRam->cb, GCPhysLast, pRam->idRange, pRam->pszDesc),
1358 VERR_NOT_FOUND);
1359 break;
1360 }
1361 }
1362 }
1363 }
1364 /* else we've got a good hint. */
1365
1366 /*
1367 * Do the actual job.
1368 *
1369 * The moving of existing table entries is done in a way that allows other
1370 * EMTs to perform concurrent lookups with the updating.
1371 */
1372 bool const fUseAtomic = pVM->enmVMState != VMSTATE_CREATING
1373 && pVM->cCpus > 1
1374#ifdef RT_ARCH_AMD64
1375 && g_CpumHostFeatures.s.fCmpXchg16b
1376#endif
1377 ;
1378
1379 /* Signal that we're modifying the lookup table: */
1380 uint32_t const idGeneration = (pVM->pgm.s.RamRangeUnion.idGeneration + 1) | 1; /* paranoia^3 */
1381 ASMAtomicWriteU32(&pVM->pgm.s.RamRangeUnion.idGeneration, idGeneration);
1382
1383 /* Do we need to shift any lookup table entries? (This is a lot simpler
1384 than insertion.) */
1385 if (idxLookup + 1U < cLookupEntries)
1386 {
1387 uint32_t cToMove = cLookupEntries - idxLookup - 1U;
1388 PGMRAMRANGELOOKUPENTRY *pCur = &pVM->pgm.s.aRamRangeLookup[idxLookup];
1389 if (!fUseAtomic)
1390 do
1391 {
1392 pCur->GCPhysFirstAndId = pCur[1].GCPhysFirstAndId;
1393 pCur->GCPhysLast = pCur[1].GCPhysLast;
1394 pCur += 1;
1395 cToMove -= 1;
1396 } while (cToMove > 0);
1397 else
1398 {
1399#if RTASM_HAVE_WRITE_U128 >= 2
1400 do
1401 {
1402 ASMAtomicWriteU128U(&pCur->u128Volatile, pCur[1].u128Normal);
1403 pCur += 1;
1404 cToMove -= 1;
1405 } while (cToMove > 0);
1406
1407#else
1408 uint64_t u64PrevLo = pCur->u128Normal.s.Lo;
1409 uint64_t u64PrevHi = pCur->u128Normal.s.Hi;
1410 do
1411 {
1412 uint64_t const u64CurLo = pCur[1].u128Normal.s.Lo;
1413 uint64_t const u64CurHi = pCur[1].u128Normal.s.Hi;
1414 uint128_t uOldIgn;
1415 AssertStmt(ASMAtomicCmpXchgU128v2(&pCur->u128Volatile.u, u64CurHi, u64CurLo, u64PrevHi, u64PrevLo, &uOldIgn),
1416 (pCur->u128Volatile.s.Lo = u64CurLo, pCur->u128Volatile.s.Hi = u64CurHi));
1417 u64PrevLo = u64CurLo;
1418 u64PrevHi = u64CurHi;
1419 pCur += 1;
1420 cToMove -= 1;
1421 } while (cToMove > 0);
1422#endif
1423 }
1424 }
1425
1426 /* Update the RAM range entry to indicate that it is no longer mapped.
1427 The GCPhys member is accessed by the lockless TLB lookup code, so update
1428 it last and atomically to be on the safe side. */
1429 pRam->GCPhysLast = NIL_RTGCPHYS;
1430 ASMAtomicWriteU64(&pRam->GCPhys, NIL_RTGCPHYS);
1431
1432 /*
1433 * Update the generation and count in one go, signaling the end of the updating.
1434 */
1435 PGM::PGMRAMRANGEGENANDLOOKUPCOUNT GenAndCount;
1436 GenAndCount.cLookupEntries = cLookupEntries - 1;
1437 GenAndCount.idGeneration = idGeneration + 1;
1438 ASMAtomicWriteU64(&pVM->pgm.s.RamRangeUnion.u64Combined, GenAndCount.u64Combined);
1439
1440 if (pidxLookup)
1441 *pidxLookup = idxLookup + 1;
1442
1443 return VINF_SUCCESS;
1444}
1445
1446
1447/**
1448 * Gets the number of ram ranges.
1449 *
1450 * @returns Number of ram ranges. Returns UINT32_MAX if @a pVM is invalid.
1451 * @param pVM The cross context VM structure.
1452 */
1453VMMR3DECL(uint32_t) PGMR3PhysGetRamRangeCount(PVM pVM)
1454{
1455 VM_ASSERT_VALID_EXT_RETURN(pVM, UINT32_MAX);
1456
1457 PGM_LOCK_VOID(pVM);
1458 uint32_t const cRamRanges = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
1459 PGM_UNLOCK(pVM);
1460 return cRamRanges;
1461}
1462
1463
1464/**
1465 * Get information about a range.
1466 *
1467 * @returns VINF_SUCCESS or VERR_OUT_OF_RANGE.
1468 * @param pVM The cross context VM structure.
1469 * @param iRange The ordinal of the range.
1470 * @param pGCPhysStart Where to return the start of the range. Optional.
1471 * @param pGCPhysLast Where to return the address of the last byte in the
1472 * range. Optional.
1473 * @param ppszDesc Where to return the range description. Optional.
1474 * @param pfIsMmio Where to indicate that this is a pure MMIO range.
1475 * Optional.
1476 */
1477VMMR3DECL(int) PGMR3PhysGetRange(PVM pVM, uint32_t iRange, PRTGCPHYS pGCPhysStart, PRTGCPHYS pGCPhysLast,
1478 const char **ppszDesc, bool *pfIsMmio)
1479{
1480 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1481
1482 PGM_LOCK_VOID(pVM);
1483 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
1484 if (iRange < cLookupEntries)
1485 {
1486 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[iRange]);
1487 Assert(idRamRange && idRamRange <= pVM->pgm.s.idRamRangeMax);
1488 PGMRAMRANGE const * const pRamRange = pVM->pgm.s.apRamRanges[idRamRange];
1489 AssertPtr(pRamRange);
1490
1491 if (pGCPhysStart)
1492 *pGCPhysStart = pRamRange->GCPhys;
1493 if (pGCPhysLast)
1494 *pGCPhysLast = pRamRange->GCPhysLast;
1495 if (ppszDesc)
1496 *ppszDesc = pRamRange->pszDesc;
1497 if (pfIsMmio)
1498 *pfIsMmio = !!(pRamRange->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO);
1499
1500 PGM_UNLOCK(pVM);
1501 return VINF_SUCCESS;
1502 }
1503 PGM_UNLOCK(pVM);
1504 return VERR_OUT_OF_RANGE;
1505}
1506
1507
1508/**
1509 * Gets RAM ranges that are supposed to be zero'ed at boot.
1510 *
1511 * This function gets all RAM ranges that are not ad hoc (ROM, MMIO, MMIO2) memory.
1512 * The RAM hole (if any) is -NOT- included because we don't return 0s when it is
1513 * read anyway.
1514 *
1515 * @returns VBox status code.
1516 * @param pVM The cross context VM structure.
1517 * @param pRanges Where to store the physical RAM ranges.
1518 * @param cMaxRanges The maximum ranges that can be stored.
1519 */
1520VMMR3_INT_DECL(int) PGMR3PhysGetRamBootZeroedRanges(PVM pVM, PPGMPHYSRANGES pRanges, uint32_t cMaxRanges)
1521{
1522 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1523 AssertPtrReturn(pRanges, VERR_INVALID_PARAMETER);
1524 AssertReturn(cMaxRanges > 0, VERR_INVALID_PARAMETER);
1525
1526 int rc = VINF_SUCCESS;
1527 uint32_t idxRange = 0;
1528 PGM_LOCK_VOID(pVM);
1529
1530 /*
1531 * The primary purpose of this API is the GIM Hyper-V hypercall which recommends (not
1532 * requires) that the largest ranges are reported earlier. Therefore, here we iterate
1533 * the ranges in reverse because in PGM the largest range is generally at the end.
1534 */
1535 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
1536 for (int32_t idxLookup = cLookupEntries - 1; idxLookup >= 0; idxLookup--)
1537 {
1538 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1539 Assert(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
1540 PPGMRAMRANGE const pCur = pVM->pgm.s.apRamRanges[idRamRange];
1541 AssertContinue(pCur);
1542
1543 if (!PGM_RAM_RANGE_IS_AD_HOC(pCur))
1544 {
1545 if (idxRange < cMaxRanges)
1546 {
1547 /* Combine with previous range if it is contiguous, otherwise add it as a new range. */
1548 if ( idxRange > 0
1549 && pRanges->aRanges[idxRange - 1].GCPhysStart == pCur->GCPhysLast + 1U)
1550 {
1551 pRanges->aRanges[idxRange - 1].GCPhysStart = pCur->GCPhys;
1552 pRanges->aRanges[idxRange - 1].cPages += (pCur->cb >> GUEST_PAGE_SHIFT);
1553 }
1554 else
1555 {
1556 pRanges->aRanges[idxRange].GCPhysStart = pCur->GCPhys;
1557 pRanges->aRanges[idxRange].cPages = pCur->cb >> GUEST_PAGE_SHIFT;
1558 ++idxRange;
1559 }
1560 }
1561 else
1562 {
1563 rc = VERR_BUFFER_OVERFLOW;
1564 break;
1565 }
1566 }
1567 }
1568 pRanges->cRanges = idxRange;
1569 PGM_UNLOCK(pVM);
1570 return rc;
1571}
1572
1573
1574/*********************************************************************************************************************************
1575* RAM *
1576*********************************************************************************************************************************/
1577
1578/**
1579 * Frees the specified RAM page and replaces it with the ZERO page.
1580 *
1581 * This is used by ballooning, remapping MMIO2, RAM reset and state loading.
1582 *
1583 * @param pVM The cross context VM structure.
1584 * @param pReq Pointer to the request. This is NULL when doing a
1585 * bulk free in NEM memory mode.
1586 * @param pcPendingPages Where the number of pages waiting to be freed are
1587 * kept. This will normally be incremented. This is
1588 * NULL when doing a bulk free in NEM memory mode.
1589 * @param pPage Pointer to the page structure.
1590 * @param GCPhys The guest physical address of the page, if applicable.
1591 * @param enmNewType New page type for NEM notification, since several
1592 * callers will change the type upon successful return.
1593 *
1594 * @remarks The caller must own the PGM lock.
1595 */
1596int pgmPhysFreePage(PVM pVM, PGMMFREEPAGESREQ pReq, uint32_t *pcPendingPages, PPGMPAGE pPage, RTGCPHYS GCPhys,
1597 PGMPAGETYPE enmNewType)
1598{
1599 RT_NOREF(enmNewType, pcPendingPages);
1600
1601 /*
1602 * Assert sanity.
1603 */
1604 PGM_LOCK_ASSERT_OWNER(pVM);
1605 if (RT_UNLIKELY( PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM
1606 && PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_ROM_SHADOW))
1607 {
1608 AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage));
1609 return VMSetError(pVM, VERR_PGM_PHYS_NOT_RAM, RT_SRC_POS, "GCPhys=%RGp type=%d", GCPhys, PGM_PAGE_GET_TYPE(pPage));
1610 }
1611
1612 /** @todo What about ballooning of large pages??! */
1613 Assert( PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE
1614 && PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE_DISABLED);
1615
1616 if ( PGM_PAGE_IS_ZERO(pPage)
1617 || PGM_PAGE_IS_BALLOONED(pPage))
1618 return VINF_SUCCESS;
1619
1620 const uint32_t idPage = PGM_PAGE_GET_PAGEID(pPage);
1621 Log3(("pgmPhysFreePage: idPage=%#x GCPhys=%RGp pPage=%R[pgmpage]\n", idPage, GCPhys, pPage));
1622 if (RT_UNLIKELY(!PGM_IS_IN_NEM_MODE(pVM)
1623 ? idPage == NIL_GMM_PAGEID
1624 || idPage > GMM_PAGEID_LAST
1625 || PGM_PAGE_GET_CHUNKID(pPage) == NIL_GMM_CHUNKID
1626 : idPage != NIL_GMM_PAGEID))
1627 {
1628 AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage));
1629 return VMSetError(pVM, VERR_PGM_PHYS_INVALID_PAGE_ID, RT_SRC_POS, "GCPhys=%RGp idPage=%#x", GCPhys, pPage);
1630 }
1631#if defined(VBOX_WITH_NATIVE_NEM) && !defined(VBOX_WITH_ONLY_PGM_NEM_MODE)
1632 const RTHCPHYS HCPhysPrev = PGM_PAGE_GET_HCPHYS(pPage);
1633#endif
1634
1635 /* update page count stats. */
1636 if (PGM_PAGE_IS_SHARED(pPage))
1637 pVM->pgm.s.cSharedPages--;
1638 else
1639 pVM->pgm.s.cPrivatePages--;
1640 pVM->pgm.s.cZeroPages++;
1641
1642 /* Deal with write monitored pages. */
1643 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED)
1644 {
1645 PGM_PAGE_SET_WRITTEN_TO(pVM, pPage);
1646 pVM->pgm.s.cWrittenToPages++;
1647 }
1648 PGM_PAGE_CLEAR_CODE_PAGE(pVM, pPage); /* No callback needed, IEMTlbInvalidateAllPhysicalAllCpus is called below. */
1649
1650 /*
1651 * pPage = ZERO page.
1652 */
1653 PGM_PAGE_SET_HCPHYS(pVM, pPage, pVM->pgm.s.HCPhysZeroPg);
1654 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
1655 PGM_PAGE_SET_PAGEID(pVM, pPage, NIL_GMM_PAGEID);
1656 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_DONTCARE);
1657 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0);
1658 PGM_PAGE_SET_TRACKING(pVM, pPage, 0);
1659
1660 /* Flush physical page map TLB entry. */
1661 pgmPhysInvalidatePageMapTLBEntry(pVM, GCPhys);
1662 IEMTlbInvalidateAllPhysicalAllCpus(pVM, NIL_VMCPUID, IEMTLBPHYSFLUSHREASON_FREED); /// @todo move to the perform step.
1663
1664#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1665# ifdef VBOX_WITH_PGM_NEM_MODE
1666 /*
1667 * Skip the rest if we're doing a bulk free in NEM memory mode.
1668 */
1669 if (!pReq)
1670 return VINF_SUCCESS;
1671 AssertLogRelReturn(!pVM->pgm.s.fNemMode, VERR_PGM_NOT_SUPPORTED_FOR_NEM_MODE);
1672# endif
1673
1674# ifdef VBOX_WITH_NATIVE_NEM
1675 /* Notify NEM. */
1676 /** @todo Remove this one? */
1677 if (VM_IS_NEM_ENABLED(pVM))
1678 {
1679 uint8_t u2State = PGM_PAGE_GET_NEM_STATE(pPage);
1680 NEMHCNotifyPhysPageChanged(pVM, GCPhys, HCPhysPrev, pVM->pgm.s.HCPhysZeroPg, pVM->pgm.s.abZeroPg,
1681 pgmPhysPageCalcNemProtection(pPage, enmNewType), enmNewType, &u2State);
1682 PGM_PAGE_SET_NEM_STATE(pPage, u2State);
1683 }
1684# endif
1685
1686 /*
1687 * Make sure it's not in the handy page array.
1688 */
1689 for (uint32_t i = pVM->pgm.s.cHandyPages; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
1690 {
1691 if (pVM->pgm.s.aHandyPages[i].idPage == idPage)
1692 {
1693 pVM->pgm.s.aHandyPages[i].HCPhysGCPhys = NIL_GMMPAGEDESC_PHYS;
1694 pVM->pgm.s.aHandyPages[i].fZeroed = false;
1695 pVM->pgm.s.aHandyPages[i].idPage = NIL_GMM_PAGEID;
1696 break;
1697 }
1698 if (pVM->pgm.s.aHandyPages[i].idSharedPage == idPage)
1699 {
1700 pVM->pgm.s.aHandyPages[i].idSharedPage = NIL_GMM_PAGEID;
1701 break;
1702 }
1703 }
1704
1705 /*
1706 * Push it onto the page array.
1707 */
1708 uint32_t iPage = *pcPendingPages;
1709 Assert(iPage < PGMPHYS_FREE_PAGE_BATCH_SIZE);
1710 *pcPendingPages += 1;
1711
1712 pReq->aPages[iPage].idPage = idPage;
1713
1714 if (iPage + 1 < PGMPHYS_FREE_PAGE_BATCH_SIZE)
1715 return VINF_SUCCESS;
1716
1717 /*
1718 * Flush the pages.
1719 */
1720 int rc = GMMR3FreePagesPerform(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE);
1721 if (RT_SUCCESS(rc))
1722 {
1723 GMMR3FreePagesRePrep(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
1724 *pcPendingPages = 0;
1725 }
1726 return rc;
1727
1728#else /* VBOX_WITH_ONLY_PGM_NEM_MODE */
1729 Assert(!pReq); RT_NOREF(pReq);
1730 return VINF_SUCCESS;
1731#endif /* VBOX_WITH_ONLY_PGM_NEM_MODE */
1732}
1733
1734
1735/**
1736 * Frees a range of pages, replacing them with MMIO ZERO pages.
1737 *
1738 * @returns VBox status code.
1739 * @param pVM The cross context VM structure.
1740 * @param pRam The RAM range in which the pages resides.
1741 * @param GCPhys The address of the first page.
1742 * @param GCPhysLast The address of the last page.
1743 * @param pvMmio2 Pointer to the ring-3 mapping of any MMIO2 memory that
1744 * will replace the pages we're freeing up.
1745 */
1746static int pgmR3PhysFreePageRange(PVM pVM, PPGMRAMRANGE pRam, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast, void *pvMmio2)
1747{
1748 PGM_LOCK_ASSERT_OWNER(pVM);
1749 /** @todo pvMmio2 is always NULL. */
1750
1751#ifdef VBOX_WITH_PGM_NEM_MODE
1752 /*
1753 * In simplified memory mode we don't actually free the memory,
1754 * we just unmap it and let NEM do any unlocking of it.
1755 */
1756# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1757 if (pVM->pgm.s.fNemMode)
1758# endif
1759 {
1760 Assert(VM_IS_NEM_ENABLED(pVM) || VM_IS_EXEC_ENGINE_IEM(pVM));
1761 uint8_t u2State = 0; /* (We don't support UINT8_MAX here.) */
1762 if (VM_IS_NEM_ENABLED(pVM))
1763 {
1764 uint32_t const fNemNotify = (pvMmio2 ? NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2 : 0) | NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE;
1765 int rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhys, GCPhysLast - GCPhys + 1, fNemNotify,
1766 pRam->pbR3 ? pRam->pbR3 + GCPhys - pRam->GCPhys : NULL,
1767 pvMmio2, &u2State, NULL /*puNemRange*/);
1768 AssertLogRelRCReturn(rc, rc);
1769 }
1770
1771 /* Iterate the pages. */
1772 PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT];
1773 uint32_t cPagesLeft = ((GCPhysLast - GCPhys) >> GUEST_PAGE_SHIFT) + 1;
1774 while (cPagesLeft-- > 0)
1775 {
1776 int rc = pgmPhysFreePage(pVM, NULL, NULL, pPageDst, GCPhys, PGMPAGETYPE_MMIO);
1777 AssertLogRelRCReturn(rc, rc); /* We're done for if this goes wrong. */
1778
1779 PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO);
1780 PGM_PAGE_SET_NEM_STATE(pPageDst, u2State);
1781
1782 GCPhys += GUEST_PAGE_SIZE;
1783 pPageDst++;
1784 }
1785 return VINF_SUCCESS;
1786 }
1787#else /* !VBOX_WITH_PGM_NEM_MODE */
1788 RT_NOREF(pvMmio2);
1789#endif /* !VBOX_WITH_PGM_NEM_MODE */
1790#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1791
1792 /*
1793 * Regular mode.
1794 */
1795 /* Prepare. */
1796 uint32_t cPendingPages = 0;
1797 PGMMFREEPAGESREQ pReq;
1798 int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
1799 AssertLogRelRCReturn(rc, rc);
1800
1801# ifdef VBOX_WITH_NATIVE_NEM
1802 /* Tell NEM up-front. */
1803 uint8_t u2State = UINT8_MAX;
1804 if (VM_IS_NEM_ENABLED(pVM))
1805 {
1806 uint32_t const fNemNotify = (pvMmio2 ? NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2 : 0) | NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE;
1807 rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhys, GCPhysLast - GCPhys + 1, fNemNotify, NULL, pvMmio2,
1808 &u2State, NULL /*puNemRange*/);
1809 AssertLogRelRCReturnStmt(rc, GMMR3FreePagesCleanup(pReq), rc);
1810 }
1811# endif
1812
1813 /* Iterate the pages. */
1814 PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT];
1815 uint32_t cPagesLeft = ((GCPhysLast - GCPhys) >> GUEST_PAGE_SHIFT) + 1;
1816 while (cPagesLeft-- > 0)
1817 {
1818 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPageDst, GCPhys, PGMPAGETYPE_MMIO);
1819 AssertLogRelRCReturn(rc, rc); /* We're done for if this goes wrong. */
1820
1821 PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO);
1822# ifdef VBOX_WITH_NATIVE_NEM
1823 if (u2State != UINT8_MAX)
1824 PGM_PAGE_SET_NEM_STATE(pPageDst, u2State);
1825# endif
1826
1827 GCPhys += GUEST_PAGE_SIZE;
1828 pPageDst++;
1829 }
1830
1831 /* Finish pending and cleanup. */
1832 if (cPendingPages)
1833 {
1834 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
1835 AssertLogRelRCReturn(rc, rc);
1836 }
1837 GMMR3FreePagesCleanup(pReq);
1838
1839 return rc;
1840#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
1841}
1842
1843
1844/**
1845 * Wrapper around VMMR0_DO_PGM_PHYS_ALLOCATE_RAM_RANGE.
1846 */
1847static int pgmR3PhysAllocateRamRange(PVM pVM, PVMCPU pVCpu, uint32_t cGuestPages, uint32_t fFlags, PPGMRAMRANGE *ppRamRange)
1848{
1849 int rc;
1850 PGMPHYSALLOCATERAMRANGEREQ AllocRangeReq;
1851 AllocRangeReq.idNewRange = UINT32_MAX / 4;
1852#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
1853 if (!SUPR3IsDriverless())
1854 {
1855 AllocRangeReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
1856 AllocRangeReq.Hdr.cbReq = sizeof(AllocRangeReq);
1857 AllocRangeReq.cbGuestPage = GUEST_PAGE_SIZE;
1858 AllocRangeReq.cGuestPages = cGuestPages;
1859 AllocRangeReq.fFlags = fFlags;
1860 rc = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_ALLOCATE_RAM_RANGE, 0 /*u64Arg*/, &AllocRangeReq.Hdr);
1861 }
1862 else
1863#endif
1864 rc = pgmPhysRamRangeAllocCommon(pVM, cGuestPages, fFlags, &AllocRangeReq.idNewRange);
1865 if (RT_SUCCESS(rc))
1866 {
1867 Assert(AllocRangeReq.idNewRange != 0);
1868 Assert(AllocRangeReq.idNewRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
1869 AssertPtr(pVM->pgm.s.apRamRanges[AllocRangeReq.idNewRange]);
1870 *ppRamRange = pVM->pgm.s.apRamRanges[AllocRangeReq.idNewRange];
1871 return VINF_SUCCESS;
1872 }
1873
1874 RT_NOREF(pVCpu);
1875 *ppRamRange = NULL;
1876 return rc;
1877}
1878
1879
1880/**
1881 * PGMR3PhysRegisterRam worker that initializes and links a RAM range.
1882 *
1883 * In NEM mode, this will allocate the pages backing the RAM range and this may
1884 * fail. NEM registration may also fail. (In regular HM mode it won't fail.)
1885 *
1886 * @returns VBox status code.
1887 * @param pVM The cross context VM structure.
1888 * @param pNew The new RAM range.
1889 * @param GCPhys The address of the RAM range.
1890 * @param GCPhysLast The last address of the RAM range.
1891 * @param pszDesc The description.
1892 * @param pidxLookup The lookup table insertion point.
1893 */
1894static int pgmR3PhysInitAndLinkRamRange(PVM pVM, PPGMRAMRANGE pNew, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast,
1895 const char *pszDesc, uint32_t *pidxLookup)
1896{
1897 /*
1898 * Initialize the range.
1899 */
1900 Assert(pNew->cb == GCPhysLast - GCPhys + 1U); RT_NOREF(GCPhysLast);
1901 pNew->pszDesc = pszDesc;
1902 pNew->uNemRange = UINT32_MAX;
1903 pNew->pbR3 = NULL;
1904 pNew->paLSPages = NULL;
1905
1906 uint32_t const cPages = pNew->cb >> GUEST_PAGE_SHIFT;
1907#ifdef VBOX_WITH_PGM_NEM_MODE
1908 if (PGM_IS_IN_NEM_MODE(pVM))
1909 {
1910 int rc = SUPR3PageAlloc(RT_ALIGN_Z(pNew->cb, HOST_PAGE_SIZE_DYNAMIC) >> HOST_PAGE_SHIFT_DYNAMIC,
1911 pVM->pgm.s.fUseLargePages ? SUP_PAGE_ALLOC_F_LARGE_PAGES : 0, (void **)&pNew->pbR3);
1912 if (RT_FAILURE(rc))
1913 return rc;
1914
1915 RTGCPHYS iPage = cPages;
1916 while (iPage-- > 0)
1917 PGM_PAGE_INIT(&pNew->aPages[iPage], UINT64_C(0x0000fffffffff000), NIL_GMM_PAGEID,
1918 PGMPAGETYPE_RAM, PGM_PAGE_STATE_ALLOCATED);
1919
1920 /* Update the page count stats. */
1921 pVM->pgm.s.cPrivatePages += cPages;
1922 pVM->pgm.s.cAllPages += cPages;
1923 }
1924 else
1925#endif
1926 {
1927#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1928 RTGCPHYS iPage = cPages;
1929 while (iPage-- > 0)
1930 PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_RAM);
1931
1932 /* Update the page count stats. */
1933 pVM->pgm.s.cZeroPages += cPages;
1934 pVM->pgm.s.cAllPages += cPages;
1935#endif
1936 }
1937
1938 /*
1939 * Insert it into the lookup table.
1940 */
1941 int rc = pgmR3PhysRamRangeInsertLookup(pVM, pNew, GCPhys, pidxLookup);
1942 AssertRCReturn(rc, rc);
1943
1944#ifdef VBOX_WITH_NATIVE_NEM
1945 /*
1946 * Notify NEM now that it has been linked.
1947 *
1948 * As above, it is assumed that on failure the VM creation will fail, so
1949 * no extra cleanup is needed here.
1950 */
1951 if (VM_IS_NEM_ENABLED(pVM))
1952 {
1953 uint8_t u2State = UINT8_MAX;
1954 rc = NEMR3NotifyPhysRamRegister(pVM, GCPhys, pNew->cb, pNew->pbR3, &u2State, &pNew->uNemRange);
1955 if (RT_SUCCESS(rc) && u2State != UINT8_MAX)
1956 pgmPhysSetNemStateForPages(&pNew->aPages[0], cPages, u2State);
1957 return rc;
1958 }
1959#endif
1960 return VINF_SUCCESS;
1961}
1962
1963
1964/**
1965 * Worker for PGMR3PhysRegisterRam called with the PGM lock.
1966 *
1967 * The caller releases the lock.
1968 */
1969static int pgmR3PhysRegisterRamWorker(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, RTGCPHYS cb, const char *pszDesc,
1970 uint32_t const cRamRanges, RTGCPHYS const GCPhysLast)
1971{
1972#ifdef VBOX_STRICT
1973 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
1974#endif
1975
1976 /*
1977 * Check that we've got enough free RAM ranges.
1978 */
1979 AssertLogRelMsgReturn((uint64_t)pVM->pgm.s.idRamRangeMax + cRamRanges + 1 <= RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup),
1980 ("idRamRangeMax=%#RX32 vs GCPhys=%RGp cb=%RGp / %#RX32 ranges (%s)\n",
1981 pVM->pgm.s.idRamRangeMax, GCPhys, cb, cRamRanges, pszDesc),
1982 VERR_PGM_TOO_MANY_RAM_RANGES);
1983
1984 /*
1985 * Check for conflicts via the lookup table. We search it backwards,
1986 * assuming that memory is added in ascending order by address.
1987 */
1988 uint32_t idxLookup = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1989 while (idxLookup)
1990 {
1991 if (GCPhys > pVM->pgm.s.aRamRangeLookup[idxLookup - 1].GCPhysLast)
1992 break;
1993 idxLookup--;
1994 RTGCPHYS const GCPhysCur = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1995 AssertLogRelMsgReturn( GCPhysLast < GCPhysCur
1996 || GCPhys > pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast,
1997 ("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n",
1998 GCPhys, GCPhysLast, pszDesc, GCPhysCur, pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast,
1999 pVM->pgm.s.apRamRanges[PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup])]->pszDesc),
2000 VERR_PGM_RAM_CONFLICT);
2001 }
2002
2003 /*
2004 * Register it with GMM (the API bitches).
2005 */
2006 const RTGCPHYS cPages = cb >> GUEST_PAGE_SHIFT;
2007 int rc = MMR3IncreaseBaseReservation(pVM, cPages);
2008 if (RT_FAILURE(rc))
2009 return rc;
2010
2011 /*
2012 * Create the required chunks.
2013 */
2014 RTGCPHYS cPagesLeft = cPages;
2015 RTGCPHYS GCPhysChunk = GCPhys;
2016 uint32_t idxChunk = 0;
2017 while (cPagesLeft > 0)
2018 {
2019 uint32_t cPagesInChunk = cPagesLeft;
2020 if (cPagesInChunk > PGM_MAX_PAGES_PER_RAM_RANGE)
2021 cPagesInChunk = PGM_MAX_PAGES_PER_RAM_RANGE;
2022
2023 const char *pszDescChunk = idxChunk == 0
2024 ? pszDesc
2025 : MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s (#%u)", pszDesc, idxChunk + 1);
2026 AssertReturn(pszDescChunk, VERR_NO_MEMORY);
2027
2028 /*
2029 * Allocate a RAM range.
2030 */
2031 PPGMRAMRANGE pNew = NULL;
2032 rc = pgmR3PhysAllocateRamRange(pVM, pVCpu, cPagesInChunk, 0 /*fFlags*/, &pNew);
2033 AssertLogRelMsgReturn(RT_SUCCESS(rc),
2034 ("pgmR3PhysAllocateRamRange failed: GCPhysChunk=%RGp cPagesInChunk=%#RX32 (%s): %Rrc\n",
2035 GCPhysChunk, cPagesInChunk, pszDescChunk, rc),
2036 rc);
2037
2038 /*
2039 * Ok, init and link the range.
2040 */
2041 rc = pgmR3PhysInitAndLinkRamRange(pVM, pNew, GCPhysChunk,
2042 GCPhysChunk + ((RTGCPHYS)cPagesInChunk << GUEST_PAGE_SHIFT) - 1U,
2043 pszDescChunk, &idxLookup);
2044 AssertLogRelMsgReturn(RT_SUCCESS(rc),
2045 ("pgmR3PhysInitAndLinkRamRange failed: GCPhysChunk=%RGp cPagesInChunk=%#RX32 (%s): %Rrc\n",
2046 GCPhysChunk, cPagesInChunk, pszDescChunk, rc),
2047 rc);
2048
2049 /* advance */
2050 GCPhysChunk += (RTGCPHYS)cPagesInChunk << GUEST_PAGE_SHIFT;
2051 cPagesLeft -= cPagesInChunk;
2052 idxChunk++;
2053 }
2054
2055 return rc;
2056}
2057
2058
2059/**
2060 * Sets up a range RAM.
2061 *
2062 * This will check for conflicting registrations, make a resource reservation
2063 * for the memory (with GMM), and setup the per-page tracking structures
2064 * (PGMPAGE).
2065 *
2066 * @returns VBox status code.
2067 * @param pVM The cross context VM structure.
2068 * @param GCPhys The physical address of the RAM.
2069 * @param cb The size of the RAM.
2070 * @param pszDesc The description - not copied, so, don't free or change it.
2071 */
2072VMMR3DECL(int) PGMR3PhysRegisterRam(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, const char *pszDesc)
2073{
2074 /*
2075 * Validate input.
2076 */
2077 Log(("PGMR3PhysRegisterRam: GCPhys=%RGp cb=%RGp pszDesc=%s\n", GCPhys, cb, pszDesc));
2078 AssertReturn(RT_ALIGN_T(GCPhys, GUEST_PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER);
2079 AssertReturn(RT_ALIGN_T(cb, GUEST_PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER);
2080 AssertReturn(cb > 0, VERR_INVALID_PARAMETER);
2081 RTGCPHYS const GCPhysLast = GCPhys + (cb - 1);
2082 AssertMsgReturn(GCPhysLast > GCPhys, ("The range wraps! GCPhys=%RGp cb=%RGp\n", GCPhys, cb), VERR_INVALID_PARAMETER);
2083 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
2084 PVMCPU const pVCpu = VMMGetCpu(pVM);
2085 AssertReturn(pVCpu, VERR_VM_THREAD_NOT_EMT);
2086 AssertReturn(pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
2087
2088 /*
2089 * Calculate the number of RAM ranges required.
2090 * See also pgmPhysMmio2CalcChunkCount.
2091 */
2092 uint32_t const cPagesPerChunk = PGM_MAX_PAGES_PER_RAM_RANGE;
2093 uint32_t const cRamRanges = (uint32_t)(((cb >> GUEST_PAGE_SHIFT) + cPagesPerChunk - 1) / cPagesPerChunk);
2094 AssertLogRelMsgReturn(cRamRanges * (RTGCPHYS)cPagesPerChunk * GUEST_PAGE_SIZE >= cb,
2095 ("cb=%RGp cRamRanges=%#RX32 cPagesPerChunk=%#RX32\n", cb, cRamRanges, cPagesPerChunk),
2096 VERR_OUT_OF_RANGE);
2097
2098 PGM_LOCK_VOID(pVM);
2099
2100 int rc = pgmR3PhysRegisterRamWorker(pVM, pVCpu, GCPhys, cb, pszDesc, cRamRanges, GCPhysLast);
2101#ifdef VBOX_STRICT
2102 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
2103#endif
2104
2105 PGM_UNLOCK(pVM);
2106 return rc;
2107}
2108
2109
2110#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
2111/**
2112 * Worker called by PGMR3InitFinalize if we're configured to pre-allocate RAM.
2113 *
2114 * We do this late in the init process so that all the ROM and MMIO ranges have
2115 * been registered already and we don't go wasting memory on them.
2116 *
2117 * @returns VBox status code.
2118 *
2119 * @param pVM The cross context VM structure.
2120 */
2121int pgmR3PhysRamPreAllocate(PVM pVM)
2122{
2123 Assert(pVM->pgm.s.fRamPreAlloc);
2124 Log(("pgmR3PhysRamPreAllocate: enter\n"));
2125# ifdef VBOX_WITH_PGM_NEM_MODE
2126 if (VM_IS_NEM_ENABLED(pVM))
2127 {
2128 LogRel(("PGM: Pre-alloc ignored in NEM mode.\n"));
2129 return VINF_SUCCESS;
2130 }
2131# endif
2132
2133 /*
2134 * Walk the RAM ranges and allocate all RAM pages, halt at
2135 * the first allocation error.
2136 */
2137 uint64_t cPages = 0;
2138 uint64_t NanoTS = RTTimeNanoTS();
2139 PGM_LOCK_VOID(pVM);
2140 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
2141 for (uint32_t idxLookup = 0; idxLookup < cLookupEntries; idxLookup++)
2142 {
2143 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
2144 AssertContinue(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
2145 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2146 AssertContinue(pRam);
2147
2148 PPGMPAGE pPage = &pRam->aPages[0];
2149 RTGCPHYS GCPhys = pRam->GCPhys;
2150 uint32_t cLeft = pRam->cb >> GUEST_PAGE_SHIFT;
2151 while (cLeft-- > 0)
2152 {
2153 if (PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2154 {
2155 switch (PGM_PAGE_GET_STATE(pPage))
2156 {
2157 case PGM_PAGE_STATE_ZERO:
2158 {
2159 int rc = pgmPhysAllocPage(pVM, pPage, GCPhys);
2160 if (RT_FAILURE(rc))
2161 {
2162 LogRel(("PGM: RAM Pre-allocation failed at %RGp (in %s) with rc=%Rrc\n", GCPhys, pRam->pszDesc, rc));
2163 PGM_UNLOCK(pVM);
2164 return rc;
2165 }
2166 cPages++;
2167 break;
2168 }
2169
2170 case PGM_PAGE_STATE_BALLOONED:
2171 case PGM_PAGE_STATE_ALLOCATED:
2172 case PGM_PAGE_STATE_WRITE_MONITORED:
2173 case PGM_PAGE_STATE_SHARED:
2174 /* nothing to do here. */
2175 break;
2176 }
2177 }
2178
2179 /* next */
2180 pPage++;
2181 GCPhys += GUEST_PAGE_SIZE;
2182 }
2183 }
2184 PGM_UNLOCK(pVM);
2185 NanoTS = RTTimeNanoTS() - NanoTS;
2186
2187 LogRel(("PGM: Pre-allocated %llu pages in %llu ms\n", cPages, NanoTS / 1000000));
2188 Log(("pgmR3PhysRamPreAllocate: returns VINF_SUCCESS\n"));
2189 return VINF_SUCCESS;
2190}
2191#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
2192
2193
2194/**
2195 * Checks shared page checksums.
2196 *
2197 * @param pVM The cross context VM structure.
2198 */
2199void pgmR3PhysAssertSharedPageChecksums(PVM pVM)
2200{
2201#ifdef VBOX_STRICT
2202 PGM_LOCK_VOID(pVM);
2203
2204 if (pVM->pgm.s.cSharedPages > 0)
2205 {
2206 /*
2207 * Walk the ram ranges.
2208 */
2209 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
2210 for (uint32_t idxLookup = 0; idxLookup < cLookupEntries; idxLookup++)
2211 {
2212 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
2213 AssertContinue(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
2214 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2215 AssertContinue(pRam);
2216
2217 uint32_t iPage = pRam->cb >> GUEST_PAGE_SHIFT;
2218 AssertMsg(((RTGCPHYS)iPage << GUEST_PAGE_SHIFT) == pRam->cb,
2219 ("%RGp %RGp\n", (RTGCPHYS)iPage << GUEST_PAGE_SHIFT, pRam->cb));
2220
2221 while (iPage-- > 0)
2222 {
2223 PPGMPAGE pPage = &pRam->aPages[iPage];
2224 if (PGM_PAGE_IS_SHARED(pPage))
2225 {
2226 uint32_t u32Checksum = pPage->s.u2Unused0/* | ((uint32_t)pPage->s.u2Unused1 << 8)*/;
2227 if (!u32Checksum)
2228 {
2229 RTGCPHYS GCPhysPage = pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT);
2230 void const *pvPage;
2231 int rc = pgmPhysPageMapReadOnly(pVM, pPage, GCPhysPage, &pvPage);
2232 if (RT_SUCCESS(rc))
2233 {
2234 uint32_t u32Checksum2 = RTCrc32(pvPage, GUEST_PAGE_SIZE);
2235# if 0
2236 AssertMsg((u32Checksum2 & /*UINT32_C(0x00000303)*/ 0x3) == u32Checksum, ("GCPhysPage=%RGp\n", GCPhysPage));
2237# else
2238 if ((u32Checksum2 & /*UINT32_C(0x00000303)*/ 0x3) == u32Checksum)
2239 LogFlow(("shpg %#x @ %RGp %#x [OK]\n", PGM_PAGE_GET_PAGEID(pPage), GCPhysPage, u32Checksum2));
2240 else
2241 AssertMsgFailed(("shpg %#x @ %RGp %#x\n", PGM_PAGE_GET_PAGEID(pPage), GCPhysPage, u32Checksum2));
2242# endif
2243 }
2244 else
2245 AssertRC(rc);
2246 }
2247 }
2248
2249 } /* for each page */
2250
2251 } /* for each ram range */
2252 }
2253
2254 PGM_UNLOCK(pVM);
2255#endif /* VBOX_STRICT */
2256 NOREF(pVM);
2257}
2258
2259
2260/**
2261 * Resets the physical memory state.
2262 *
2263 * ASSUMES that the caller owns the PGM lock.
2264 *
2265 * @returns VBox status code.
2266 * @param pVM The cross context VM structure.
2267 */
2268int pgmR3PhysRamReset(PVM pVM)
2269{
2270 PGM_LOCK_ASSERT_OWNER(pVM);
2271
2272#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
2273 /* Reset the memory balloon. */
2274 int rc1 = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0);
2275 AssertRC(rc1);
2276#endif
2277
2278#ifdef VBOX_WITH_PAGE_SHARING
2279 /* Clear all registered shared modules. */
2280 pgmR3PhysAssertSharedPageChecksums(pVM);
2281 int rc2 = GMMR3ResetSharedModules(pVM);
2282 AssertRC(rc2);
2283#endif
2284 /* Reset counters. */
2285 pVM->pgm.s.cReusedSharedPages = 0;
2286 pVM->pgm.s.cBalloonedPages = 0;
2287
2288 return VINF_SUCCESS;
2289}
2290
2291
2292/**
2293 * Resets (zeros) the RAM after all devices and components have been reset.
2294 *
2295 * ASSUMES that the caller owns the PGM lock.
2296 *
2297 * @returns VBox status code.
2298 * @param pVM The cross context VM structure.
2299 */
2300int pgmR3PhysRamZeroAll(PVM pVM)
2301{
2302 PGM_LOCK_ASSERT_OWNER(pVM);
2303
2304 /*
2305 * We batch up pages that should be freed instead of calling GMM for
2306 * each and every one of them.
2307 */
2308 uint32_t cPendingPages = 0;
2309 PGMMFREEPAGESREQ pReq;
2310 int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
2311 AssertLogRelRCReturn(rc, rc);
2312
2313 /*
2314 * Walk the ram ranges.
2315 */
2316 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
2317 for (uint32_t idRamRange = 0; idRamRange <= idRamRangeMax; idRamRange++)
2318 {
2319 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2320 Assert(pRam || idRamRange == 0);
2321 if (!pRam) continue;
2322 Assert(pRam->idRange == idRamRange);
2323
2324 uint32_t iPage = pRam->cb >> GUEST_PAGE_SHIFT;
2325 AssertMsg(((RTGCPHYS)iPage << GUEST_PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << GUEST_PAGE_SHIFT, pRam->cb));
2326
2327#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
2328 if ( !pVM->pgm.s.fRamPreAlloc
2329# ifdef VBOX_WITH_PGM_NEM_MODE
2330 && !pVM->pgm.s.fNemMode
2331# endif
2332 && pVM->pgm.s.fZeroRamPagesOnReset)
2333 {
2334 /* Replace all RAM pages by ZERO pages. */
2335 while (iPage-- > 0)
2336 {
2337 PPGMPAGE pPage = &pRam->aPages[iPage];
2338 switch (PGM_PAGE_GET_TYPE(pPage))
2339 {
2340 case PGMPAGETYPE_RAM:
2341 /* Do not replace pages part of a 2 MB continuous range
2342 with zero pages, but zero them instead. */
2343 if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE
2344 || PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
2345 {
2346 void *pvPage;
2347 rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), &pvPage);
2348 AssertLogRelRCReturn(rc, rc);
2349 RT_BZERO(pvPage, GUEST_PAGE_SIZE);
2350 }
2351 else if (PGM_PAGE_IS_BALLOONED(pPage))
2352 {
2353 /* Turn into a zero page; the balloon status is lost when the VM reboots. */
2354 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
2355 }
2356 else if (!PGM_PAGE_IS_ZERO(pPage))
2357 {
2358 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage,
2359 pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), PGMPAGETYPE_RAM);
2360 AssertLogRelRCReturn(rc, rc);
2361 }
2362 break;
2363
2364 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
2365 case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: /** @todo perhaps leave the special page alone? I don't think VT-x copes with this code. */
2366 pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT),
2367 pRam, true /*fDoAccounting*/, false /*fFlushIemTlbs*/);
2368 break;
2369
2370 case PGMPAGETYPE_MMIO2:
2371 case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */
2372 case PGMPAGETYPE_ROM:
2373 case PGMPAGETYPE_MMIO:
2374 break;
2375 default:
2376 AssertFailed();
2377 }
2378 } /* for each page */
2379 }
2380 else
2381#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
2382
2383 {
2384 /* Zero the memory. */
2385 while (iPage-- > 0)
2386 {
2387 PPGMPAGE pPage = &pRam->aPages[iPage];
2388 switch (PGM_PAGE_GET_TYPE(pPage))
2389 {
2390 case PGMPAGETYPE_RAM:
2391 switch (PGM_PAGE_GET_STATE(pPage))
2392 {
2393 case PGM_PAGE_STATE_ZERO:
2394 break;
2395
2396 case PGM_PAGE_STATE_BALLOONED:
2397 /* Turn into a zero page; the balloon status is lost when the VM reboots. */
2398 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
2399 break;
2400
2401 case PGM_PAGE_STATE_SHARED:
2402 case PGM_PAGE_STATE_WRITE_MONITORED:
2403 rc = pgmPhysPageMakeWritable(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT));
2404 AssertLogRelRCReturn(rc, rc);
2405 RT_FALL_THRU();
2406
2407 case PGM_PAGE_STATE_ALLOCATED:
2408 if (pVM->pgm.s.fZeroRamPagesOnReset)
2409 {
2410 void *pvPage;
2411 rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), &pvPage);
2412 AssertLogRelRCReturn(rc, rc);
2413 RT_BZERO(pvPage, GUEST_PAGE_SIZE);
2414 }
2415 break;
2416 }
2417 break;
2418
2419 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
2420 case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: /** @todo perhaps leave the special page alone? I don't think VT-x copes with this code. */
2421 pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT),
2422 pRam, true /*fDoAccounting*/, false /*fFlushIemTlbs*/);
2423 break;
2424
2425 case PGMPAGETYPE_MMIO2:
2426 case PGMPAGETYPE_ROM_SHADOW:
2427 case PGMPAGETYPE_ROM:
2428 case PGMPAGETYPE_MMIO:
2429 break;
2430 default:
2431 AssertFailed();
2432
2433 }
2434 } /* for each page */
2435 }
2436 }
2437
2438 /*
2439 * Finish off any pages pending freeing.
2440 */
2441 if (cPendingPages)
2442 {
2443 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
2444 AssertLogRelRCReturn(rc, rc);
2445 }
2446 GMMR3FreePagesCleanup(pReq);
2447
2448 /*
2449 * Flush the IEM TLB, just to be sure it really is done.
2450 */
2451 IEMTlbInvalidateAllPhysicalAllCpus(pVM, NIL_VMCPUID, IEMTLBPHYSFLUSHREASON_ZERO_ALL);
2452
2453 return VINF_SUCCESS;
2454}
2455
2456
2457/**
2458 * Frees all RAM during VM termination
2459 *
2460 * ASSUMES that the caller owns the PGM lock.
2461 *
2462 * @returns VBox status code.
2463 * @param pVM The cross context VM structure.
2464 */
2465int pgmR3PhysRamTerm(PVM pVM)
2466{
2467 int rc;
2468 PGM_LOCK_ASSERT_OWNER(pVM);
2469
2470#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
2471 /* Reset the memory balloon. */
2472 rc = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0);
2473 AssertRC(rc);
2474#endif
2475
2476#ifdef VBOX_WITH_PAGE_SHARING
2477
2478 /*
2479 * Clear all registered shared modules.
2480 */
2481 pgmR3PhysAssertSharedPageChecksums(pVM);
2482 rc = GMMR3ResetSharedModules(pVM);
2483 AssertRC(rc);
2484
2485 /*
2486 * Flush the handy pages updates to make sure no shared pages are hiding
2487 * in there. (Not unlikely if the VM shuts down, apparently.)
2488 */
2489# ifdef VBOX_WITH_PGM_NEM_MODE
2490 if (!pVM->pgm.s.fNemMode)
2491# endif
2492 rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_FLUSH_HANDY_PAGES, 0, NULL);
2493#endif
2494
2495 /*
2496 * We batch up pages that should be freed instead of calling GMM for
2497 * each and every one of them.
2498 */
2499 uint32_t cPendingPages = 0;
2500 PGMMFREEPAGESREQ pReq;
2501 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
2502 AssertLogRelRCReturn(rc, rc);
2503
2504 /*
2505 * Walk the ram ranges.
2506 */
2507 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
2508 for (uint32_t idRamRange = 0; idRamRange <= idRamRangeMax; idRamRange++)
2509 {
2510 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2511 Assert(pRam || idRamRange == 0);
2512 if (!pRam) continue;
2513 Assert(pRam->idRange == idRamRange);
2514
2515 uint32_t iPage = pRam->cb >> GUEST_PAGE_SHIFT;
2516 AssertMsg(((RTGCPHYS)iPage << GUEST_PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << GUEST_PAGE_SHIFT, pRam->cb));
2517
2518 while (iPage-- > 0)
2519 {
2520 PPGMPAGE pPage = &pRam->aPages[iPage];
2521 switch (PGM_PAGE_GET_TYPE(pPage))
2522 {
2523 case PGMPAGETYPE_RAM:
2524 /* Free all shared pages. Private pages are automatically freed during GMM VM cleanup. */
2525 /** @todo change this to explicitly free private pages here. */
2526 if (PGM_PAGE_IS_SHARED(pPage))
2527 {
2528 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage,
2529 pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), PGMPAGETYPE_RAM);
2530 AssertLogRelRCReturn(rc, rc);
2531 }
2532 break;
2533
2534 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
2535 case PGMPAGETYPE_SPECIAL_ALIAS_MMIO:
2536 case PGMPAGETYPE_MMIO2:
2537 case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */
2538 case PGMPAGETYPE_ROM:
2539 case PGMPAGETYPE_MMIO:
2540 break;
2541 default:
2542 AssertFailed();
2543 }
2544 } /* for each page */
2545 }
2546
2547 /*
2548 * Finish off any pages pending freeing.
2549 */
2550 if (cPendingPages)
2551 {
2552 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
2553 AssertLogRelRCReturn(rc, rc);
2554 }
2555 GMMR3FreePagesCleanup(pReq);
2556 return VINF_SUCCESS;
2557}
2558
2559
2560
2561/*********************************************************************************************************************************
2562* MMIO *
2563*********************************************************************************************************************************/
2564
2565/**
2566 * This is the interface IOM is using to register an MMIO region (unmapped).
2567 *
2568 *
2569 * @returns VBox status code.
2570 *
2571 * @param pVM The cross context VM structure.
2572 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2573 * @param cb The size of the MMIO region.
2574 * @param pszDesc The description of the MMIO region.
2575 * @param pidRamRange Where to return the RAM range ID for the MMIO region
2576 * on success.
2577 * @thread EMT(0)
2578 */
2579VMMR3_INT_DECL(int) PGMR3PhysMmioRegister(PVM pVM, PVMCPU pVCpu, RTGCPHYS cb, const char *pszDesc, uint16_t *pidRamRange)
2580{
2581 /*
2582 * Assert assumptions.
2583 */
2584 AssertPtrReturn(pidRamRange, VERR_INVALID_POINTER);
2585 *pidRamRange = UINT16_MAX;
2586 AssertReturn(pVCpu == VMMGetCpu(pVM) && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
2587 VM_ASSERT_STATE_RETURN(pVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
2588 /// @todo AssertReturn(!pVM->pgm.s.fRamRangesFrozen, VERR_WRONG_ORDER);
2589 AssertReturn(cb <= ((RTGCPHYS)PGM_MAX_PAGES_PER_RAM_RANGE << GUEST_PAGE_SHIFT), VERR_OUT_OF_RANGE);
2590 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2591 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
2592 AssertReturn(*pszDesc != '\0', VERR_INVALID_POINTER);
2593
2594 /*
2595 * Take the PGM lock and allocate an ad-hoc MMIO RAM range.
2596 */
2597 int rc = PGM_LOCK(pVM);
2598 AssertRCReturn(rc, rc);
2599
2600 uint32_t const cPages = cb >> GUEST_PAGE_SHIFT;
2601 PPGMRAMRANGE pNew = NULL;
2602 rc = pgmR3PhysAllocateRamRange(pVM, pVCpu, cPages, PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO, &pNew);
2603 AssertLogRelMsg(RT_SUCCESS(rc), ("pgmR3PhysAllocateRamRange failed: cPages=%#RX32 (%s): %Rrc\n", cPages, pszDesc, rc));
2604 if (RT_SUCCESS(rc))
2605 {
2606 /* Initialize the range. */
2607 pNew->pszDesc = pszDesc;
2608 pNew->uNemRange = UINT32_MAX;
2609 pNew->pbR3 = NULL;
2610 pNew->paLSPages = NULL;
2611 Assert(pNew->fFlags == PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO && pNew->cb == cb);
2612
2613 uint32_t iPage = cPages;
2614 while (iPage-- > 0)
2615 PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_MMIO);
2616 Assert(PGM_PAGE_GET_TYPE(&pNew->aPages[0]) == PGMPAGETYPE_MMIO);
2617
2618 /* update the page count stats. */
2619 pVM->pgm.s.cPureMmioPages += cPages;
2620 pVM->pgm.s.cAllPages += cPages;
2621
2622 /*
2623 * Set the return value, release lock and return to IOM.
2624 */
2625 *pidRamRange = pNew->idRange;
2626 }
2627
2628 PGM_UNLOCK(pVM);
2629 return rc;
2630}
2631
2632
2633/**
2634 * Worker for PGMR3PhysMmioMap that's called owning the lock.
2635 */
2636static int pgmR3PhysMmioMapLocked(PVM pVM, PVMCPU pVCpu, RTGCPHYS const GCPhys, RTGCPHYS const cb, RTGCPHYS const GCPhysLast,
2637 PPGMRAMRANGE const pMmioRamRange, PGMPHYSHANDLERTYPE const hType, uint64_t const uUser)
2638{
2639 /* Check that the range isn't mapped already. */
2640 AssertLogRelMsgReturn(pMmioRamRange->GCPhys == NIL_RTGCPHYS,
2641 ("desired %RGp mapping for '%s' - already mapped at %RGp!\n",
2642 GCPhys, pMmioRamRange->pszDesc, pMmioRamRange->GCPhys),
2643 VERR_ALREADY_EXISTS);
2644
2645 /*
2646 * Now, check if this falls into a regular RAM range or if we should use
2647 * the ad-hoc one (idRamRange).
2648 */
2649 int rc;
2650 uint32_t idxInsert = UINT32_MAX;
2651 PPGMRAMRANGE const pOverlappingRange = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxInsert);
2652 if (pOverlappingRange)
2653 {
2654 /* Simplification: all within the same range. */
2655 AssertLogRelMsgReturn( GCPhys >= pOverlappingRange->GCPhys
2656 && GCPhysLast <= pOverlappingRange->GCPhysLast,
2657 ("%RGp-%RGp (MMIO/%s) falls partly outside %RGp-%RGp (%s)\n",
2658 GCPhys, GCPhysLast, pMmioRamRange->pszDesc,
2659 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
2660 VERR_PGM_RAM_CONFLICT);
2661
2662 /* Check that is isn't an ad hoc range, but a real RAM range. */
2663 AssertLogRelMsgReturn(!PGM_RAM_RANGE_IS_AD_HOC(pOverlappingRange),
2664 ("%RGp-%RGp (MMIO/%s) mapping attempt in non-RAM range: %RGp-%RGp (%s)\n",
2665 GCPhys, GCPhysLast, pMmioRamRange->pszDesc,
2666 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
2667 VERR_PGM_RAM_CONFLICT);
2668
2669 /* Check that it's all RAM or MMIO pages. */
2670 PCPGMPAGE pPage = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
2671 uint32_t cLeft = cb >> GUEST_PAGE_SHIFT;
2672 while (cLeft-- > 0)
2673 {
2674 AssertLogRelMsgReturn( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2675 || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO, /** @todo MMIO type isn't right */
2676 ("%RGp-%RGp (MMIO/%s): %RGp is not a RAM or MMIO page - type=%d desc=%s\n",
2677 GCPhys, GCPhysLast, pMmioRamRange->pszDesc, pOverlappingRange->GCPhys,
2678 PGM_PAGE_GET_TYPE(pPage), pOverlappingRange->pszDesc),
2679 VERR_PGM_RAM_CONFLICT);
2680 pPage++;
2681 }
2682
2683 /*
2684 * Make all the pages in the range MMIO/ZERO pages, freeing any
2685 * RAM pages currently mapped here. This might not be 100% correct
2686 * for PCI memory, but we're doing the same thing for MMIO2 pages.
2687 */
2688 rc = pgmR3PhysFreePageRange(pVM, pOverlappingRange, GCPhys, GCPhysLast, NULL);
2689 AssertRCReturn(rc, rc);
2690
2691 /* Force a PGM pool flush as guest ram references have been changed. */
2692 /** @todo not entirely SMP safe; assuming for now the guest takes
2693 * care of this internally (not touch mapped mmio while changing the
2694 * mapping). */
2695 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2696 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2697 }
2698 else
2699 {
2700 /*
2701 * No RAM range, use the ad hoc one (idRamRange).
2702 *
2703 * Note that we don't have to tell REM about this range because
2704 * PGMHandlerPhysicalRegisterEx will do that for us.
2705 */
2706 AssertLogRelReturn(idxInsert <= pVM->pgm.s.RamRangeUnion.cLookupEntries, VERR_INTERNAL_ERROR_4);
2707 Log(("PGMR3PhysMmioMap: Inserting ad hoc MMIO range #%x for %RGp-%RGp %s\n",
2708 pMmioRamRange->idRange, GCPhys, GCPhysLast, pMmioRamRange->pszDesc));
2709
2710 Assert(PGM_PAGE_GET_TYPE(&pMmioRamRange->aPages[0]) == PGMPAGETYPE_MMIO);
2711
2712 /* We ASSUME that all the pages in the ad-hoc range are in the proper
2713 state and all that and that we don't need to re-initialize them here. */
2714
2715#ifdef VBOX_WITH_NATIVE_NEM
2716 /* Notify NEM. */
2717 if (VM_IS_NEM_ENABLED(pVM))
2718 {
2719 uint8_t u2State = 0; /* (must have valid state as there can't be anything to preserve) */
2720 rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhys, cb, 0 /*fFlags*/, NULL, NULL, &u2State, &pMmioRamRange->uNemRange);
2721 AssertLogRelRCReturn(rc, rc);
2722
2723 uint32_t iPage = cb >> GUEST_PAGE_SHIFT;
2724 while (iPage-- > 0)
2725 PGM_PAGE_SET_NEM_STATE(&pMmioRamRange->aPages[iPage], u2State);
2726 }
2727#endif
2728 /* Insert it into the lookup table (may in theory fail). */
2729 rc = pgmR3PhysRamRangeInsertLookup(pVM, pMmioRamRange, GCPhys, &idxInsert);
2730 }
2731 if (RT_SUCCESS(rc))
2732 {
2733 /*
2734 * Register the access handler.
2735 */
2736 rc = PGMHandlerPhysicalRegister(pVM, GCPhys, GCPhysLast, hType, uUser, pMmioRamRange->pszDesc);
2737 if (RT_SUCCESS(rc))
2738 {
2739#ifdef VBOX_WITH_NATIVE_NEM
2740 /* Late NEM notification (currently not used by anyone). */
2741 if (VM_IS_NEM_ENABLED(pVM))
2742 {
2743 if (pOverlappingRange)
2744 rc = NEMR3NotifyPhysMmioExMapLate(pVM, GCPhys, cb, NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE,
2745 pOverlappingRange->pbR3 + (uintptr_t)(GCPhys - pOverlappingRange->GCPhys),
2746 NULL /*pvMmio2*/, NULL /*puNemRange*/);
2747 else
2748 rc = NEMR3NotifyPhysMmioExMapLate(pVM, GCPhys, cb, 0 /*fFlags*/, NULL /*pvRam*/, NULL /*pvMmio2*/,
2749 &pMmioRamRange->uNemRange);
2750 AssertLogRelRC(rc);
2751 }
2752 if (RT_SUCCESS(rc))
2753#endif
2754 {
2755 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
2756 return VINF_SUCCESS;
2757 }
2758
2759#ifdef VBOX_WITH_NATIVE_NEM
2760 /*
2761 * Failed, so revert it all as best as we can (the memory content in
2762 * the overlapping case is gone).
2763 */
2764 PGMHandlerPhysicalDeregister(pVM, GCPhys);
2765#endif
2766 }
2767 }
2768
2769 if (!pOverlappingRange)
2770 {
2771#ifdef VBOX_WITH_NATIVE_NEM
2772 /* Notify NEM about the sudden removal of the RAM range we just told it about. */
2773 NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, cb, 0 /*fFlags*/, NULL /*pvRam*/, NULL /*pvMmio2*/,
2774 NULL /*pu2State*/, &pMmioRamRange->uNemRange);
2775#endif
2776
2777 /* Remove the ad hoc range from the lookup table. */
2778 idxInsert -= 1;
2779 pgmR3PhysRamRangeRemoveLookup(pVM, pMmioRamRange, &idxInsert);
2780 }
2781
2782 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
2783 return rc;
2784}
2785
2786
2787/**
2788 * This is the interface IOM is using to map an MMIO region.
2789 *
2790 * It will check for conflicts and ensure that a RAM range structure
2791 * is present before calling the PGMR3HandlerPhysicalRegister API to
2792 * register the callbacks.
2793 *
2794 * @returns VBox status code.
2795 *
2796 * @param pVM The cross context VM structure.
2797 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2798 * @param GCPhys The start of the MMIO region.
2799 * @param cb The size of the MMIO region.
2800 * @param idRamRange The RAM range ID for the MMIO region as returned by
2801 * PGMR3PhysMmioRegister().
2802 * @param hType The physical access handler type registration.
2803 * @param uUser The user argument.
2804 * @thread EMT(pVCpu)
2805 */
2806VMMR3_INT_DECL(int) PGMR3PhysMmioMap(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, RTGCPHYS cb, uint16_t idRamRange,
2807 PGMPHYSHANDLERTYPE hType, uint64_t uUser)
2808{
2809 /*
2810 * Assert on some assumption.
2811 */
2812 VMCPU_ASSERT_EMT(pVCpu);
2813 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2814 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2815 RTGCPHYS const GCPhysLast = GCPhys + cb - 1U;
2816 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
2817#ifdef VBOX_STRICT
2818 PCPGMPHYSHANDLERTYPEINT pType = pgmHandlerPhysicalTypeHandleToPtr(pVM, hType);
2819 Assert(pType);
2820 Assert(pType->enmKind == PGMPHYSHANDLERKIND_MMIO);
2821#endif
2822 AssertReturn(idRamRange <= pVM->pgm.s.idRamRangeMax && idRamRange > 0, VERR_INVALID_HANDLE);
2823 PPGMRAMRANGE const pMmioRamRange = pVM->pgm.s.apRamRanges[idRamRange];
2824 AssertReturn(pMmioRamRange, VERR_INVALID_HANDLE);
2825 AssertReturn(pMmioRamRange->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO, VERR_INVALID_HANDLE);
2826 AssertReturn(pMmioRamRange->cb == cb, VERR_OUT_OF_RANGE);
2827
2828 /*
2829 * Take the PGM lock and do the work.
2830 */
2831 int rc = PGM_LOCK(pVM);
2832 AssertRCReturn(rc, rc);
2833
2834 rc = pgmR3PhysMmioMapLocked(pVM, pVCpu, GCPhys, cb, GCPhysLast, pMmioRamRange, hType, uUser);
2835#ifdef VBOX_STRICT
2836 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
2837#endif
2838
2839 PGM_UNLOCK(pVM);
2840 return rc;
2841}
2842
2843
2844/**
2845 * Worker for PGMR3PhysMmioUnmap that's called with the PGM lock held.
2846 */
2847static int pgmR3PhysMmioUnmapLocked(PVM pVM, PVMCPU pVCpu, RTGCPHYS const GCPhys, RTGCPHYS const cb,
2848 RTGCPHYS const GCPhysLast, PPGMRAMRANGE const pMmioRamRange)
2849{
2850 /*
2851 * Lookup the RAM range containing the region to make sure it is actually mapped.
2852 */
2853 uint32_t idxLookup = pgmR3PhysRamRangeFindOverlappingIndex(pVM, GCPhys, GCPhysLast);
2854 AssertLogRelMsgReturn(idxLookup < pVM->pgm.s.RamRangeUnion.cLookupEntries,
2855 ("MMIO range not found at %RGp LB %RGp! (%s)\n", GCPhys, cb, pMmioRamRange->pszDesc),
2856 VERR_NOT_FOUND);
2857
2858 uint32_t const idLookupRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
2859 AssertLogRelReturn(idLookupRange != 0 && idLookupRange <= pVM->pgm.s.idRamRangeMax, VERR_INTERNAL_ERROR_5);
2860 PPGMRAMRANGE const pLookupRange = pVM->pgm.s.apRamRanges[idLookupRange];
2861 AssertLogRelReturn(pLookupRange, VERR_INTERNAL_ERROR_4);
2862
2863 AssertLogRelMsgReturn(pLookupRange == pMmioRamRange || !PGM_RAM_RANGE_IS_AD_HOC(pLookupRange),
2864 ("MMIO unmap mixup at %RGp LB %RGp (%s) vs %RGp LB %RGp (%s)\n",
2865 GCPhys, cb, pMmioRamRange->pszDesc, pLookupRange->GCPhys, pLookupRange->cb, pLookupRange->pszDesc),
2866 VERR_NOT_FOUND);
2867
2868 /*
2869 * Deregister the handler. This should reset any aliases, so an ad hoc
2870 * range will only contain MMIO type pages afterwards.
2871 */
2872 int rc = PGMHandlerPhysicalDeregister(pVM, GCPhys);
2873 if (RT_SUCCESS(rc))
2874 {
2875 if (pLookupRange != pMmioRamRange)
2876 {
2877 /*
2878 * Turn the pages back into RAM pages.
2879 */
2880 Log(("pgmR3PhysMmioUnmapLocked: Reverting MMIO range %RGp-%RGp (%s) in %RGp-%RGp (%s) to RAM.\n",
2881 GCPhys, GCPhysLast, pMmioRamRange->pszDesc,
2882 pLookupRange->GCPhys, pLookupRange->GCPhysLast, pLookupRange->pszDesc));
2883
2884 RTGCPHYS const offRange = GCPhys - pLookupRange->GCPhys;
2885 uint32_t iPage = offRange >> GUEST_PAGE_SHIFT;
2886 uint32_t cLeft = cb >> GUEST_PAGE_SHIFT;
2887 while (cLeft--)
2888 {
2889 PPGMPAGE pPage = &pLookupRange->aPages[iPage];
2890 AssertMsg( (PGM_PAGE_IS_MMIO(pPage) && PGM_PAGE_IS_ZERO(pPage))
2891 //|| PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO2_ALIAS_MMIO
2892 //|| PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_SPECIAL_ALIAS_MMIO
2893 , ("%RGp %R[pgmpage]\n", pLookupRange->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), pPage));
2894/** @todo this isn't entirely correct, is it now... aliases must be converted
2895 * to zero pages as they won't be. however, shouldn't
2896 * PGMHandlerPhysicalDeregister deal with this already? */
2897 if (PGM_PAGE_IS_MMIO_OR_ALIAS(pPage))
2898 PGM_PAGE_SET_TYPE(pVM, pPage, PGMPAGETYPE_RAM);
2899 iPage++;
2900 }
2901
2902#ifdef VBOX_WITH_NATIVE_NEM
2903 /* Notify REM (failure will probably leave things in a non-working state). */
2904 if (VM_IS_NEM_ENABLED(pVM))
2905 {
2906 uint8_t u2State = UINT8_MAX;
2907 rc = NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, GCPhysLast - GCPhys + 1, NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE,
2908 pLookupRange->pbR3 ? pLookupRange->pbR3 + GCPhys - pLookupRange->GCPhys : NULL,
2909 NULL, &u2State, &pLookupRange->uNemRange);
2910 AssertLogRelRC(rc);
2911 /** @todo status code propagation here... This is likely fatal, right? */
2912 if (u2State != UINT8_MAX)
2913 pgmPhysSetNemStateForPages(&pLookupRange->aPages[(GCPhys - pLookupRange->GCPhys) >> GUEST_PAGE_SHIFT],
2914 cb >> GUEST_PAGE_SHIFT, u2State);
2915 }
2916#endif
2917 }
2918 else
2919 {
2920 /*
2921 * Unlink the ad hoc range.
2922 */
2923#ifdef VBOX_STRICT
2924 uint32_t iPage = cb >> GUEST_PAGE_SHIFT;
2925 while (iPage-- > 0)
2926 {
2927 PPGMPAGE const pPage = &pMmioRamRange->aPages[iPage];
2928 Assert(PGM_PAGE_IS_MMIO(pPage));
2929 }
2930#endif
2931
2932 Log(("pgmR3PhysMmioUnmapLocked: Unmapping ad hoc MMIO range for %RGp-%RGp %s\n",
2933 GCPhys, GCPhysLast, pMmioRamRange->pszDesc));
2934
2935#ifdef VBOX_WITH_NATIVE_NEM
2936 if (VM_IS_NEM_ENABLED(pVM)) /* Notify REM before we unlink the range. */
2937 {
2938 rc = NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, GCPhysLast - GCPhys + 1, 0 /*fFlags*/,
2939 NULL, NULL, NULL, &pMmioRamRange->uNemRange);
2940 AssertLogRelRCReturn(rc, rc); /* we're up the creek if this hits. */
2941 }
2942#endif
2943
2944 pgmR3PhysRamRangeRemoveLookup(pVM, pMmioRamRange, &idxLookup);
2945 }
2946 }
2947
2948 /* Force a PGM pool flush as guest ram references have been changed. */
2949 /** @todo Not entirely SMP safe; assuming for now the guest takes care of
2950 * this internally (not touch mapped mmio while changing the mapping). */
2951 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2952 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2953
2954 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
2955 /*pgmPhysInvalidRamRangeTlbs(pVM); - not necessary */
2956
2957 return rc;
2958}
2959
2960
2961/**
2962 * This is the interface IOM is using to register an MMIO region.
2963 *
2964 * It will take care of calling PGMHandlerPhysicalDeregister and clean up
2965 * any ad hoc PGMRAMRANGE left behind.
2966 *
2967 * @returns VBox status code.
2968 * @param pVM The cross context VM structure.
2969 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2970 * @param GCPhys The start of the MMIO region.
2971 * @param cb The size of the MMIO region.
2972 * @param idRamRange The RAM range ID for the MMIO region as returned by
2973 * PGMR3PhysMmioRegister().
2974 * @thread EMT(pVCpu)
2975 */
2976VMMR3_INT_DECL(int) PGMR3PhysMmioUnmap(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, RTGCPHYS cb, uint16_t idRamRange)
2977{
2978 /*
2979 * Input validation.
2980 */
2981 VMCPU_ASSERT_EMT(pVCpu);
2982 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2983 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2984 RTGCPHYS const GCPhysLast = GCPhys + cb - 1U;
2985 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
2986 AssertReturn(idRamRange <= pVM->pgm.s.idRamRangeMax && idRamRange > 0, VERR_INVALID_HANDLE);
2987 PPGMRAMRANGE const pMmioRamRange = pVM->pgm.s.apRamRanges[idRamRange];
2988 AssertReturn(pMmioRamRange, VERR_INVALID_HANDLE);
2989 AssertReturn(pMmioRamRange->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO, VERR_INVALID_HANDLE);
2990 AssertReturn(pMmioRamRange->cb == cb, VERR_OUT_OF_RANGE);
2991
2992 /*
2993 * Take the PGM lock and do what's asked.
2994 */
2995 int rc = PGM_LOCK(pVM);
2996 AssertRCReturn(rc, rc);
2997
2998 rc = pgmR3PhysMmioUnmapLocked(pVM, pVCpu, GCPhys, cb, GCPhysLast, pMmioRamRange);
2999#ifdef VBOX_STRICT
3000 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
3001#endif
3002
3003 PGM_UNLOCK(pVM);
3004 return rc;
3005}
3006
3007
3008
3009/*********************************************************************************************************************************
3010* MMIO2 *
3011*********************************************************************************************************************************/
3012
3013/**
3014 * Validates the claim to an MMIO2 range and returns the pointer to it.
3015 *
3016 * @returns The MMIO2 entry index on success, negative error status on failure.
3017 * @param pVM The cross context VM structure.
3018 * @param pDevIns The device instance owning the region.
3019 * @param hMmio2 Handle to look up.
3020 * @param pcChunks Where to return the number of chunks associated with
3021 * this handle.
3022 */
3023static int32_t pgmR3PhysMmio2ResolveHandle(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, uint32_t *pcChunks)
3024{
3025 *pcChunks = 0;
3026 uint32_t const idxFirst = hMmio2 - 1U;
3027 uint32_t const cMmio2Ranges = RT_MIN(pVM->pgm.s.cMmio2Ranges, RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges));
3028 AssertReturn(idxFirst < cMmio2Ranges, VERR_INVALID_HANDLE);
3029
3030 PPGMREGMMIO2RANGE const pFirst = &pVM->pgm.s.aMmio2Ranges[idxFirst];
3031 AssertReturn(pFirst->idMmio2 == hMmio2, VERR_INVALID_HANDLE);
3032 AssertReturn((pFirst->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK), VERR_INVALID_HANDLE);
3033 AssertReturn(pFirst->pDevInsR3 == pDevIns && RT_VALID_PTR(pDevIns), VERR_NOT_OWNER);
3034
3035 /* Figure out how many chunks this handle spans. */
3036 if (pFirst->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3037 *pcChunks = 1;
3038 else
3039 {
3040 uint32_t cChunks = 1;
3041 for (uint32_t idx = idxFirst + 1;; idx++)
3042 {
3043 cChunks++;
3044 AssertReturn(idx < cMmio2Ranges, VERR_INTERNAL_ERROR_2);
3045 PPGMREGMMIO2RANGE const pCur = &pVM->pgm.s.aMmio2Ranges[idx];
3046 AssertLogRelMsgReturn( pCur->pDevInsR3 == pDevIns
3047 && pCur->idMmio2 == idx + 1
3048 && pCur->iSubDev == pFirst->iSubDev
3049 && pCur->iRegion == pFirst->iRegion
3050 && !(pCur->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK),
3051 ("cur: %p/%#x/%#x/%#x/%#x/%s; first: %p/%#x/%#x/%#x/%#x/%s\n",
3052 pCur->pDevInsR3, pCur->idMmio2, pCur->iSubDev, pCur->iRegion, pCur->fFlags,
3053 pVM->pgm.s.apMmio2RamRanges[idx]->pszDesc,
3054 pDevIns, idx + 1, pFirst->iSubDev, pFirst->iRegion, pFirst->fFlags,
3055 pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc),
3056 VERR_INTERNAL_ERROR_3);
3057 if (pCur->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3058 break;
3059 }
3060 *pcChunks = cChunks;
3061 }
3062
3063 return (int32_t)idxFirst;
3064}
3065
3066
3067/**
3068 * Check if a device has already registered a MMIO2 region.
3069 *
3070 * @returns NULL if not registered, otherwise pointer to the MMIO2.
3071 * @param pVM The cross context VM structure.
3072 * @param pDevIns The device instance owning the region.
3073 * @param iSubDev The sub-device number.
3074 * @param iRegion The region.
3075 */
3076DECLINLINE(PPGMREGMMIO2RANGE) pgmR3PhysMmio2Find(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion)
3077{
3078 /*
3079 * Search the array. There shouldn't be many entries.
3080 */
3081 uint32_t idx = RT_MIN(pVM->pgm.s.cMmio2Ranges, RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges));
3082 while (idx-- > 0)
3083 if (RT_LIKELY( pVM->pgm.s.aMmio2Ranges[idx].pDevInsR3 != pDevIns
3084 || pVM->pgm.s.aMmio2Ranges[idx].iRegion != iRegion
3085 || pVM->pgm.s.aMmio2Ranges[idx].iSubDev != iSubDev))
3086 { /* likely */ }
3087 else
3088 return &pVM->pgm.s.aMmio2Ranges[idx];
3089 return NULL;
3090}
3091
3092/**
3093 * Worker for PGMR3PhysMmio2ControlDirtyPageTracking and PGMR3PhysMmio2Map.
3094 */
3095static int pgmR3PhysMmio2EnableDirtyPageTracing(PVM pVM, uint32_t idx, uint32_t cChunks)
3096{
3097 int rc = VINF_SUCCESS;
3098 while (cChunks-- > 0)
3099 {
3100 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3101 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3102
3103 Assert(!(pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_TRACKING));
3104 int rc2 = pgmHandlerPhysicalExRegister(pVM, pMmio2->pPhysHandlerR3, pRamRange->GCPhys, pRamRange->GCPhysLast);
3105 if (RT_SUCCESS(rc2))
3106 pMmio2->fFlags |= PGMREGMMIO2RANGE_F_IS_TRACKING;
3107 else
3108 AssertLogRelMsgFailedStmt(("%#RGp-%#RGp %s failed -> %Rrc\n",
3109 pRamRange->GCPhys, pRamRange->GCPhysLast, pRamRange->pszDesc, rc2),
3110 rc = RT_SUCCESS(rc) ? rc2 : rc);
3111
3112 idx++;
3113 }
3114 return rc;
3115}
3116
3117
3118/**
3119 * Worker for PGMR3PhysMmio2ControlDirtyPageTracking and PGMR3PhysMmio2Unmap.
3120 */
3121static int pgmR3PhysMmio2DisableDirtyPageTracing(PVM pVM, uint32_t idx, uint32_t cChunks)
3122{
3123 int rc = VINF_SUCCESS;
3124 while (cChunks-- > 0)
3125 {
3126 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3127 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3128 if (pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_TRACKING)
3129 {
3130 int rc2 = pgmHandlerPhysicalExDeregister(pVM, pMmio2->pPhysHandlerR3);
3131 AssertLogRelMsgStmt(RT_SUCCESS(rc2),
3132 ("%#RGp-%#RGp %s failed -> %Rrc\n",
3133 pRamRange->GCPhys, pRamRange->GCPhysLast, pRamRange->pszDesc, rc2),
3134 rc = RT_SUCCESS(rc) ? rc2 : rc);
3135 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_IS_TRACKING;
3136 }
3137 idx++;
3138 }
3139 return rc;
3140}
3141
3142#if 0 // temp
3143
3144/**
3145 * Common worker PGMR3PhysMmio2PreRegister & PGMR3PhysMMIO2Register that links a
3146 * complete registration entry into the lists and lookup tables.
3147 *
3148 * @param pVM The cross context VM structure.
3149 * @param pNew The new MMIO / MMIO2 registration to link.
3150 */
3151static void pgmR3PhysMmio2Link(PVM pVM, PPGMREGMMIO2RANGE pNew)
3152{
3153 Assert(pNew->idMmio2 != UINT8_MAX);
3154
3155 /*
3156 * Link it into the list (order doesn't matter, so insert it at the head).
3157 *
3158 * Note! The range we're linking may consist of multiple chunks, so we
3159 * have to find the last one.
3160 */
3161 PPGMREGMMIO2RANGE pLast = pNew;
3162 for (pLast = pNew; ; pLast = pLast->pNextR3)
3163 {
3164 if (pLast->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3165 break;
3166 Assert(pLast->pNextR3);
3167 Assert(pLast->pNextR3->pDevInsR3 == pNew->pDevInsR3);
3168 Assert(pLast->pNextR3->iSubDev == pNew->iSubDev);
3169 Assert(pLast->pNextR3->iRegion == pNew->iRegion);
3170 Assert(pLast->pNextR3->idMmio2 == pLast->idMmio2 + 1);
3171 }
3172
3173 PGM_LOCK_VOID(pVM);
3174
3175 /* Link in the chain of ranges at the head of the list. */
3176 pLast->pNextR3 = pVM->pgm.s.pRegMmioRangesR3;
3177 pVM->pgm.s.pRegMmioRangesR3 = pNew;
3178
3179 /* Insert the MMIO2 range/page IDs. */
3180 uint8_t idMmio2 = pNew->idMmio2;
3181 for (;;)
3182 {
3183 Assert(pVM->pgm.s.apMmio2RangesR3[idMmio2 - 1] == NULL);
3184 Assert(pVM->pgm.s.apMmio2RangesR0[idMmio2 - 1] == NIL_RTR0PTR);
3185 pVM->pgm.s.apMmio2RangesR3[idMmio2 - 1] = pNew;
3186 pVM->pgm.s.apMmio2RangesR0[idMmio2 - 1] = pNew->RamRange.pSelfR0 - RT_UOFFSETOF(PGMREGMMIO2RANGE, RamRange);
3187 if (pNew->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3188 break;
3189 pNew = pNew->pNextR3;
3190 idMmio2++;
3191 }
3192
3193 pgmPhysInvalidatePageMapTLB(pVM);
3194 PGM_UNLOCK(pVM);
3195}
3196#endif
3197
3198
3199/**
3200 * Allocate and register an MMIO2 region.
3201 *
3202 * As mentioned elsewhere, MMIO2 is just RAM spelled differently. It's RAM
3203 * associated with a device. It is also non-shared memory with a permanent
3204 * ring-3 mapping and page backing (presently).
3205 *
3206 * A MMIO2 range may overlap with base memory if a lot of RAM is configured for
3207 * the VM, in which case we'll drop the base memory pages. Presently we will
3208 * make no attempt to preserve anything that happens to be present in the base
3209 * memory that is replaced, this is of course incorrect but it's too much
3210 * effort.
3211 *
3212 * @returns VBox status code.
3213 * @retval VINF_SUCCESS on success, *ppv pointing to the R3 mapping of the
3214 * memory.
3215 * @retval VERR_ALREADY_EXISTS if the region already exists.
3216 *
3217 * @param pVM The cross context VM structure.
3218 * @param pDevIns The device instance owning the region.
3219 * @param iSubDev The sub-device number.
3220 * @param iRegion The region number. If the MMIO2 memory is a PCI
3221 * I/O region this number has to be the number of that
3222 * region. Otherwise it can be any number save
3223 * UINT8_MAX.
3224 * @param cb The size of the region. Must be page aligned.
3225 * @param fFlags Reserved for future use, must be zero.
3226 * @param pszDesc The description.
3227 * @param ppv Where to store the pointer to the ring-3 mapping of
3228 * the memory.
3229 * @param phRegion Where to return the MMIO2 region handle. Optional.
3230 * @thread EMT(0)
3231 *
3232 * @note Only callable at VM creation time and during VM state loading.
3233 * The latter is for PCNet saved state compatibility with pre 4.3.6
3234 * state.
3235 */
3236VMMR3_INT_DECL(int) PGMR3PhysMmio2Register(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, RTGCPHYS cb,
3237 uint32_t fFlags, const char *pszDesc, void **ppv, PGMMMIO2HANDLE *phRegion)
3238{
3239 /*
3240 * Validate input.
3241 */
3242 AssertPtrReturn(ppv, VERR_INVALID_POINTER);
3243 *ppv = NULL;
3244 if (phRegion)
3245 {
3246 AssertPtrReturn(phRegion, VERR_INVALID_POINTER);
3247 *phRegion = NIL_PGMMMIO2HANDLE;
3248 }
3249 PVMCPU const pVCpu = VMMGetCpu(pVM);
3250 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
3251 VMSTATE const enmVMState = VMR3GetState(pVM);
3252 AssertMsgReturn(enmVMState == VMSTATE_CREATING || enmVMState == VMSTATE_LOADING,
3253 ("state %s, expected CREATING or LOADING\n", VMGetStateName(enmVMState)),
3254 VERR_VM_INVALID_VM_STATE);
3255
3256 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
3257 AssertReturn(iSubDev <= UINT8_MAX, VERR_INVALID_PARAMETER);
3258 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
3259
3260 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
3261 AssertReturn(*pszDesc, VERR_INVALID_PARAMETER);
3262
3263 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3264 AssertReturn(cb, VERR_INVALID_PARAMETER);
3265 AssertReturn(!(fFlags & ~PGMPHYS_MMIO2_FLAGS_VALID_MASK), VERR_INVALID_FLAGS);
3266
3267 const uint32_t cGuestPages = cb >> GUEST_PAGE_SHIFT;
3268 AssertLogRelReturn(((RTGCPHYS)cGuestPages << GUEST_PAGE_SHIFT) == cb, VERR_INVALID_PARAMETER);
3269 AssertLogRelReturn(cGuestPages <= PGM_MAX_PAGES_PER_MMIO2_REGION, VERR_OUT_OF_RANGE);
3270 AssertLogRelReturn(cGuestPages <= (MM_MMIO_64_MAX >> GUEST_PAGE_SHIFT), VERR_OUT_OF_RANGE);
3271
3272 AssertReturn(pgmR3PhysMmio2Find(pVM, pDevIns, iSubDev, iRegion) == NULL, VERR_ALREADY_EXISTS);
3273
3274 /*
3275 * For the 2nd+ instance, mangle the description string so it's unique.
3276 */
3277 if (pDevIns->iInstance > 0) /** @todo Move to PDMDevHlp.cpp and use a real string cache. */
3278 {
3279 pszDesc = MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s [%u]", pszDesc, pDevIns->iInstance);
3280 if (!pszDesc)
3281 return VERR_NO_MEMORY;
3282 }
3283
3284 /*
3285 * Check that we've got sufficient MMIO2 ID space for this request (the
3286 * allocation will be done later once we've got the backing memory secured,
3287 * but given the EMT0 restriction, that's not going to be a problem).
3288 *
3289 * The zero ID is not used as it could be confused with NIL_GMM_PAGEID, so
3290 * the IDs goes from 1 thru PGM_MAX_MMIO2_RANGES.
3291 */
3292 unsigned const cChunks = pgmPhysMmio2CalcChunkCount(cb, NULL);
3293
3294 int rc = PGM_LOCK(pVM);
3295 AssertRCReturn(rc, rc);
3296
3297 AssertCompile(PGM_MAX_MMIO2_RANGES < 255);
3298 uint8_t const idMmio2 = pVM->pgm.s.cMmio2Ranges + 1;
3299 AssertLogRelReturnStmt(idMmio2 + cChunks <= PGM_MAX_MMIO2_RANGES, PGM_UNLOCK(pVM), VERR_PGM_TOO_MANY_MMIO2_RANGES);
3300
3301 /*
3302 * Try reserve and allocate the backing memory first as this is what is
3303 * most likely to fail.
3304 */
3305 rc = MMR3AdjustFixedReservation(pVM, cGuestPages, pszDesc);
3306 if (RT_SUCCESS(rc))
3307 {
3308 /*
3309 * If we're in driverless we'll be doing the work here, otherwise we
3310 * must call ring-0 to do the job as we'll need physical addresses
3311 * and maybe a ring-0 mapping address for it all.
3312 */
3313#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
3314 if (!SUPR3IsDriverless())
3315 {
3316 PGMPHYSMMIO2REGISTERREQ Mmio2RegReq;
3317 Mmio2RegReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
3318 Mmio2RegReq.Hdr.cbReq = sizeof(Mmio2RegReq);
3319 Mmio2RegReq.cbGuestPage = GUEST_PAGE_SIZE;
3320 Mmio2RegReq.cGuestPages = cGuestPages;
3321 Mmio2RegReq.idMmio2 = idMmio2;
3322 Mmio2RegReq.cChunks = cChunks;
3323 Mmio2RegReq.iSubDev = (uint8_t)iSubDev;
3324 Mmio2RegReq.iRegion = (uint8_t)iRegion;
3325 Mmio2RegReq.fFlags = fFlags;
3326 Mmio2RegReq.pDevIns = pDevIns;
3327 rc = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_MMIO2_REGISTER, 0 /*u64Arg*/, &Mmio2RegReq.Hdr);
3328 }
3329 else
3330#endif
3331 rc = pgmPhysMmio2RegisterWorker(pVM, cGuestPages, idMmio2, cChunks, pDevIns, iSubDev, iRegion, fFlags);
3332 if (RT_SUCCESS(rc))
3333 {
3334 Assert(idMmio2 + cChunks - 1 == pVM->pgm.s.cMmio2Ranges);
3335
3336 /*
3337 * There are two things left to do:
3338 * 1. Add the description to the associated RAM ranges.
3339 * 2. Pre-allocate access handlers for dirty bit tracking if necessary.
3340 */
3341 bool const fNeedHandler = (fFlags & PGMPHYS_MMIO2_FLAGS_TRACK_DIRTY_PAGES)
3342#ifdef VBOX_WITH_PGM_NEM_MODE
3343 && (!VM_IS_NEM_ENABLED(pVM) || !NEMR3IsMmio2DirtyPageTrackingSupported(pVM))
3344#endif
3345 ;
3346 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
3347 {
3348 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idxChunk + idMmio2 - 1];
3349 Assert(pMmio2->idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
3350 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apRamRanges[pMmio2->idRamRange];
3351 Assert(pRamRange->pbR3 == pMmio2->pbR3);
3352 Assert(pRamRange->cb == pMmio2->cbReal);
3353
3354 pRamRange->pszDesc = pszDesc; /** @todo mangle this if we got more than one chunk */
3355 if (fNeedHandler)
3356 {
3357 rc = pgmHandlerPhysicalExCreate(pVM, pVM->pgm.s.hMmio2DirtyPhysHandlerType, pMmio2->idMmio2,
3358 pszDesc, &pMmio2->pPhysHandlerR3);
3359 AssertLogRelMsgReturnStmt(RT_SUCCESS(rc),
3360 ("idMmio2=%#x idxChunk=%#x rc=%Rc\n", idMmio2, idxChunk, rc),
3361 PGM_UNLOCK(pVM),
3362 rc); /* PGMR3Term will take care of it all. */
3363 }
3364 }
3365
3366 /*
3367 * Done!
3368 */
3369 if (phRegion)
3370 *phRegion = idMmio2;
3371 *ppv = pVM->pgm.s.aMmio2Ranges[idMmio2 - 1].pbR3;
3372
3373 PGM_UNLOCK(pVM);
3374 return VINF_SUCCESS;
3375 }
3376
3377 MMR3AdjustFixedReservation(pVM, -(int32_t)cGuestPages, pszDesc);
3378 }
3379 if (pDevIns->iInstance > 0)
3380 MMR3HeapFree((void *)pszDesc);
3381 return rc;
3382}
3383
3384/**
3385 * Deregisters and frees an MMIO2 region.
3386 *
3387 * Any physical access handlers registered for the region must be deregistered
3388 * before calling this function.
3389 *
3390 * @returns VBox status code.
3391 * @param pVM The cross context VM structure.
3392 * @param pDevIns The device instance owning the region.
3393 * @param hMmio2 The MMIO2 handle to deregister, or NIL if all
3394 * regions for the given device is to be deregistered.
3395 * @thread EMT(0)
3396 *
3397 * @note Only callable during VM state loading. This is to jettison an unused
3398 * MMIO2 section present in PCNet saved state prior to VBox v4.3.6.
3399 */
3400VMMR3_INT_DECL(int) PGMR3PhysMmio2Deregister(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
3401{
3402 /*
3403 * Validate input.
3404 */
3405 PVMCPU const pVCpu = VMMGetCpu(pVM);
3406 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
3407 VMSTATE const enmVMState = VMR3GetState(pVM);
3408 AssertMsgReturn(enmVMState == VMSTATE_LOADING,
3409 ("state %s, expected LOADING\n", VMGetStateName(enmVMState)),
3410 VERR_VM_INVALID_VM_STATE);
3411
3412 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
3413
3414 /*
3415 * Take the PGM lock and scan for registrations matching the requirements.
3416 * We do this backwards to more easily reduce the cMmio2Ranges count when
3417 * stuff is removed.
3418 */
3419 PGM_LOCK_VOID(pVM);
3420
3421 int rc = VINF_SUCCESS;
3422 unsigned cFound = 0;
3423 uint32_t const cMmio2Ranges = RT_MIN(pVM->pgm.s.cMmio2Ranges, RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges));
3424 uint32_t idx = cMmio2Ranges;
3425 while (idx-- > 0)
3426 {
3427 PPGMREGMMIO2RANGE pCur = &pVM->pgm.s.aMmio2Ranges[idx];
3428 if ( pCur->pDevInsR3 == pDevIns
3429 && ( hMmio2 == NIL_PGMMMIO2HANDLE
3430 || pCur->idMmio2 == hMmio2))
3431 {
3432 cFound++;
3433
3434 /*
3435 * Wind back the first chunk for this registration.
3436 */
3437 AssertLogRelMsgReturnStmt(pCur->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK, ("idx=%u fFlags=%#x\n", idx, pCur->fFlags),
3438 PGM_UNLOCK(pVM), VERR_INTERNAL_ERROR_3);
3439 uint32_t cGuestPages = pCur->cbReal >> GUEST_PAGE_SHIFT;
3440 uint32_t cChunks = 1;
3441 while ( idx > 0
3442 && !(pCur->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK))
3443 {
3444 AssertLogRelMsgReturnStmt( pCur[-1].pDevInsR3 == pDevIns
3445 && pCur[-1].iRegion == pCur->iRegion
3446 && pCur[-1].iSubDev == pCur->iSubDev,
3447 ("[%u]: %p/%#x/%#x/fl=%#x; [%u]: %p/%#x/%#x/fl=%#x; cChunks=%#x\n",
3448 idx - 1, pCur[-1].pDevInsR3, pCur[-1].iRegion, pCur[-1].iSubDev, pCur[-1].fFlags,
3449 idx, pCur->pDevInsR3, pCur->iRegion, pCur->iSubDev, pCur->fFlags, cChunks),
3450 PGM_UNLOCK(pVM), VERR_INTERNAL_ERROR_3);
3451 cChunks++;
3452 pCur--;
3453 idx--;
3454 cGuestPages += pCur->cbReal >> GUEST_PAGE_SHIFT;
3455 }
3456 AssertLogRelMsgReturnStmt(pCur->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK,
3457 ("idx=%u fFlags=%#x cChunks=%#x\n", idx, pCur->fFlags, cChunks),
3458 PGM_UNLOCK(pVM), VERR_INTERNAL_ERROR_3);
3459
3460 /*
3461 * Unmap it if it's mapped.
3462 */
3463 if (pCur->fFlags & PGMREGMMIO2RANGE_F_MAPPED)
3464 {
3465 int rc2 = PGMR3PhysMmio2Unmap(pVM, pCur->pDevInsR3, idx + 1, pCur->GCPhys);
3466 AssertRC(rc2);
3467 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
3468 rc = rc2;
3469 }
3470
3471 /*
3472 * Destroy access handlers.
3473 */
3474 for (uint32_t iChunk = 0; iChunk < cChunks; iChunk++)
3475 if (pCur[iChunk].pPhysHandlerR3)
3476 {
3477 pgmHandlerPhysicalExDestroy(pVM, pCur[iChunk].pPhysHandlerR3);
3478 pCur[iChunk].pPhysHandlerR3 = NULL;
3479 }
3480
3481 /*
3482 * Call kernel mode / worker to do the actual deregistration.
3483 */
3484 const char * const pszDesc = pVM->pgm.s.apMmio2RamRanges[idx] ? pVM->pgm.s.apMmio2RamRanges[idx]->pszDesc : NULL;
3485 int rc2;
3486#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
3487 if (!SUPR3IsDriverless())
3488 {
3489 PGMPHYSMMIO2DEREGISTERREQ Mmio2DeregReq;
3490 Mmio2DeregReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
3491 Mmio2DeregReq.Hdr.cbReq = sizeof(Mmio2DeregReq);
3492 Mmio2DeregReq.idMmio2 = idx + 1;
3493 Mmio2DeregReq.cChunks = cChunks;
3494 Mmio2DeregReq.pDevIns = pDevIns;
3495 rc2 = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_MMIO2_DEREGISTER, 0 /*u64Arg*/, &Mmio2DeregReq.Hdr);
3496 AssertLogRelMsgStmt(RT_SUCCESS(rc2),
3497 ("VMMR0_DO_PGM_PHYS_MMIO2_DEREGISTER: rc=%Rrc idx=%#x cChunks=%#x %s\n",
3498 rc2, idx, cChunks, pszDesc),
3499 rc = RT_SUCCESS(rc) ? rc2 : rc);
3500 pgmPhysInvalidRamRangeTlbs(pVM); /* Ensure no stale pointers in the ring-3 RAM range TLB. */
3501 }
3502 else
3503#endif
3504 {
3505 Assert(PGM_IS_IN_NEM_MODE(pVM));
3506 rc2 = pgmPhysMmio2DeregisterWorker(pVM, idx, cChunks, pDevIns);
3507 AssertLogRelMsgStmt(RT_SUCCESS(rc2),
3508 ("pgmPhysMmio2DeregisterWorker: rc=%Rrc idx=%#x cChunks=%#x %s\n",
3509 rc2, idx, cChunks, pszDesc),
3510 rc = RT_SUCCESS(rc) ? rc2 : rc);
3511 }
3512 if (RT_FAILURE(rc2))
3513 {
3514 LogRel(("PGMR3PhysMmio2Deregister: Deregistration failed: %Rrc; cChunks=%u %s\n", rc, cChunks, pszDesc));
3515 if (RT_SUCCESS(rc))
3516 rc = rc2;
3517 }
3518
3519 /*
3520 * Adjust the memory reservation.
3521 */
3522 if (!PGM_IS_IN_NEM_MODE(pVM) && RT_SUCCESS(rc2))
3523 {
3524 rc2 = MMR3AdjustFixedReservation(pVM, -(int32_t)cGuestPages, pszDesc);
3525 AssertLogRelMsgStmt(RT_SUCCESS(rc2), ("rc=%Rrc cGuestPages=%#x\n", rc, cGuestPages),
3526 rc = RT_SUCCESS(rc) ? rc2 : rc);
3527 }
3528
3529 /* Are we done? */
3530 if (hMmio2 != NIL_PGMMMIO2HANDLE)
3531 break;
3532 }
3533 }
3534 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
3535 PGM_UNLOCK(pVM);
3536 return !cFound && hMmio2 != NIL_PGMMMIO2HANDLE ? VERR_NOT_FOUND : rc;
3537}
3538
3539
3540/**
3541 * Worker form PGMR3PhysMmio2Map.
3542 */
3543static int pgmR3PhysMmio2MapLocked(PVM pVM, uint32_t const idxFirst, uint32_t const cChunks,
3544 RTGCPHYS const GCPhys, RTGCPHYS const GCPhysLast)
3545{
3546 /*
3547 * Validate the mapped status now that we've got the lock.
3548 */
3549 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3550 {
3551 AssertReturn( pVM->pgm.s.aMmio2Ranges[idx].GCPhys == NIL_RTGCPHYS
3552 && !(pVM->pgm.s.aMmio2Ranges[idx].fFlags & PGMREGMMIO2RANGE_F_MAPPED),
3553 VERR_WRONG_ORDER);
3554 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3555 AssertReturn(pRamRange->GCPhys == NIL_RTGCPHYS, VERR_INTERNAL_ERROR_3);
3556 AssertReturn(pRamRange->GCPhysLast == NIL_RTGCPHYS, VERR_INTERNAL_ERROR_3);
3557 Assert(pRamRange->pbR3 == pVM->pgm.s.aMmio2Ranges[idx].pbR3);
3558 Assert(pRamRange->idRange == pVM->pgm.s.aMmio2Ranges[idx].idRamRange);
3559 }
3560
3561 const char * const pszDesc = pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc;
3562#ifdef VBOX_WITH_NATIVE_NEM
3563 uint32_t const fNemFlags = NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2
3564 | (pVM->pgm.s.aMmio2Ranges[idxFirst].fFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES
3565 ? NEM_NOTIFY_PHYS_MMIO_EX_F_TRACK_DIRTY_PAGES : 0);
3566#endif
3567
3568 /*
3569 * Now, check if this falls into a regular RAM range or if we should use
3570 * the ad-hoc one.
3571 *
3572 * Note! For reasons of simplictly, we're considering the whole MMIO2 area
3573 * here rather than individual chunks.
3574 */
3575 int rc = VINF_SUCCESS;
3576 uint32_t idxInsert = UINT32_MAX;
3577 PPGMRAMRANGE const pOverlappingRange = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxInsert);
3578 if (pOverlappingRange)
3579 {
3580 /* Simplification: all within the same range. */
3581 AssertLogRelMsgReturn( GCPhys >= pOverlappingRange->GCPhys
3582 && GCPhysLast <= pOverlappingRange->GCPhysLast,
3583 ("%RGp-%RGp (MMIO2/%s) falls partly outside %RGp-%RGp (%s)\n",
3584 GCPhys, GCPhysLast, pszDesc,
3585 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
3586 VERR_PGM_RAM_CONFLICT);
3587
3588 /* Check that is isn't an ad hoc range, but a real RAM range. */
3589 AssertLogRelMsgReturn(!PGM_RAM_RANGE_IS_AD_HOC(pOverlappingRange),
3590 ("%RGp-%RGp (MMIO2/%s) mapping attempt in non-RAM range: %RGp-%RGp (%s)\n",
3591 GCPhys, GCPhysLast, pszDesc,
3592 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
3593 VERR_PGM_RAM_CONFLICT);
3594
3595 /* There can only be one MMIO2 chunk matching here! */
3596 AssertLogRelMsgReturn(cChunks == 1,
3597 ("%RGp-%RGp (MMIO2/%s) consists of %u chunks whereas the RAM (%s) somehow doesn't!\n",
3598 GCPhys, GCPhysLast, pszDesc, cChunks, pOverlappingRange->pszDesc),
3599 VERR_PGM_PHYS_MMIO_EX_IPE);
3600
3601 /* Check that it's all RAM pages. */
3602 PCPGMPAGE pPage = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
3603 uint32_t const cMmio2Pages = pVM->pgm.s.apMmio2RamRanges[idxFirst]->cb >> GUEST_PAGE_SHIFT;
3604 uint32_t cPagesLeft = cMmio2Pages;
3605 while (cPagesLeft-- > 0)
3606 {
3607 AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM,
3608 ("%RGp-%RGp (MMIO2/%s): %RGp is not a RAM page - type=%d desc=%s\n", GCPhys, GCPhysLast,
3609 pszDesc, pOverlappingRange->GCPhys, PGM_PAGE_GET_TYPE(pPage), pOverlappingRange->pszDesc),
3610 VERR_PGM_RAM_CONFLICT);
3611 pPage++;
3612 }
3613
3614#ifdef VBOX_WITH_PGM_NEM_MODE
3615 /* We cannot mix MMIO2 into a RAM range in simplified memory mode because pOverlappingRange->pbR3 can't point
3616 both at the RAM and MMIO2, so we won't ever write & read from the actual MMIO2 memory if we try. */
3617 AssertLogRelMsgReturn(!VM_IS_NEM_ENABLED(pVM),
3618 ("Putting %s at %RGp-%RGp is not possible in NEM mode because existing %RGp-%RGp (%s) mapping\n",
3619 pszDesc, GCPhys, GCPhysLast,
3620 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
3621 VERR_PGM_NOT_SUPPORTED_FOR_NEM_MODE);
3622#endif
3623
3624 /*
3625 * Make all the pages in the range MMIO/ZERO pages, freeing any
3626 * RAM pages currently mapped here. This might not be 100% correct,
3627 * but so what, we do the same from MMIO...
3628 */
3629 rc = pgmR3PhysFreePageRange(pVM, pOverlappingRange, GCPhys, GCPhysLast, NULL);
3630 AssertRCReturn(rc, rc);
3631
3632 Log(("PGMR3PhysMmio2Map: %RGp-%RGp %s - inside %RGp-%RGp %s\n", GCPhys, GCPhysLast, pszDesc,
3633 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc));
3634
3635 /*
3636 * We're all in for mapping it now. Update the MMIO2 range to reflect it.
3637 */
3638 pVM->pgm.s.aMmio2Ranges[idxFirst].GCPhys = GCPhys;
3639 pVM->pgm.s.aMmio2Ranges[idxFirst].fFlags |= PGMREGMMIO2RANGE_F_OVERLAPPING | PGMREGMMIO2RANGE_F_MAPPED;
3640
3641 /*
3642 * Replace the pages in the range.
3643 */
3644 PPGMPAGE pPageSrc = &pVM->pgm.s.apMmio2RamRanges[idxFirst]->aPages[0];
3645 PPGMPAGE pPageDst = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
3646 cPagesLeft = cMmio2Pages;
3647 while (cPagesLeft-- > 0)
3648 {
3649 Assert(PGM_PAGE_IS_MMIO(pPageDst));
3650
3651 RTHCPHYS const HCPhys = PGM_PAGE_GET_HCPHYS(pPageSrc); RT_NOREF_PV(HCPhys);
3652 uint32_t const idPage = PGM_PAGE_GET_PAGEID(pPageSrc);
3653 PGM_PAGE_SET_PAGEID(pVM, pPageDst, idPage);
3654 PGM_PAGE_SET_HCPHYS(pVM, pPageDst, HCPhys);
3655 PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO2);
3656 PGM_PAGE_SET_STATE(pVM, pPageDst, PGM_PAGE_STATE_ALLOCATED);
3657 PGM_PAGE_SET_PDE_TYPE(pVM, pPageDst, PGM_PAGE_PDE_TYPE_DONTCARE);
3658 PGM_PAGE_SET_PTE_INDEX(pVM, pPageDst, 0);
3659 PGM_PAGE_SET_TRACKING(pVM, pPageDst, 0);
3660 /* NEM state is not relevant, see VERR_PGM_NOT_SUPPORTED_FOR_NEM_MODE above. */
3661
3662 pVM->pgm.s.cZeroPages--;
3663 pPageSrc++;
3664 pPageDst++;
3665 }
3666
3667 /* Force a PGM pool flush as guest ram references have been changed. */
3668 /** @todo not entirely SMP safe; assuming for now the guest takes
3669 * care of this internally (not touch mapped mmio while changing the
3670 * mapping). */
3671 PVMCPU pVCpu = VMMGetCpu(pVM);
3672 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3673 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3674 }
3675 else
3676 {
3677 /*
3678 * No RAM range, insert the ones prepared during registration.
3679 */
3680 Log(("PGMR3PhysMmio2Map: %RGp-%RGp %s - no RAM overlap\n", GCPhys, GCPhysLast, pszDesc));
3681 RTGCPHYS GCPhysCur = GCPhys;
3682 uint32_t iChunk = 0;
3683 uint32_t idx = idxFirst;
3684 for (; iChunk < cChunks; iChunk++, idx++)
3685 {
3686 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3687 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3688 Assert(pRamRange->idRange == pMmio2->idRamRange);
3689 Assert(pMmio2->GCPhys == NIL_RTGCPHYS);
3690
3691#ifdef VBOX_WITH_NATIVE_NEM
3692 /* Tell NEM and get the new NEM state for the pages. */
3693 uint8_t u2NemState = 0;
3694 if (VM_IS_NEM_ENABLED(pVM))
3695 {
3696 rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhysCur, pRamRange->cb, fNemFlags, NULL /*pvRam*/, pRamRange->pbR3,
3697 &u2NemState, &pRamRange->uNemRange);
3698 AssertLogRelMsgBreak(RT_SUCCESS(rc),
3699 ("%RGp LB %RGp fFlags=%#x (%s)\n",
3700 GCPhysCur, pRamRange->cb, pMmio2->fFlags, pRamRange->pszDesc));
3701 pMmio2->fFlags |= PGMREGMMIO2RANGE_F_MAPPED; /* Set this early to indicate that NEM has been notified. */
3702 }
3703#endif
3704
3705 /* Clear the tracking data of pages we're going to reactivate. */
3706 PPGMPAGE pPageSrc = &pRamRange->aPages[0];
3707 uint32_t cPagesLeft = pRamRange->cb >> GUEST_PAGE_SHIFT;
3708 while (cPagesLeft-- > 0)
3709 {
3710 PGM_PAGE_SET_TRACKING(pVM, pPageSrc, 0);
3711 PGM_PAGE_SET_PTE_INDEX(pVM, pPageSrc, 0);
3712#ifdef VBOX_WITH_NATIVE_NEM
3713 PGM_PAGE_SET_NEM_STATE(pPageSrc, u2NemState);
3714#endif
3715 pPageSrc++;
3716 }
3717
3718 /* Insert the RAM range into the lookup table. */
3719 rc = pgmR3PhysRamRangeInsertLookup(pVM, pRamRange, GCPhysCur, &idxInsert);
3720 AssertRCBreak(rc);
3721
3722 /* Mark the range as fully mapped. */
3723 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_OVERLAPPING;
3724 pMmio2->fFlags |= PGMREGMMIO2RANGE_F_MAPPED;
3725 pMmio2->GCPhys = GCPhysCur;
3726
3727 /* Advance. */
3728 GCPhysCur += pRamRange->cb;
3729 }
3730 if (RT_FAILURE(rc))
3731 {
3732 /*
3733 * Bail out anything we've done so far.
3734 */
3735 idxInsert -= 1;
3736 do
3737 {
3738 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3739 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3740
3741#ifdef VBOX_WITH_NATIVE_NEM
3742 if ( VM_IS_NEM_ENABLED(pVM)
3743 && (pVM->pgm.s.aMmio2Ranges[idx].fFlags & PGMREGMMIO2RANGE_F_MAPPED))
3744 {
3745 uint8_t u2NemState = UINT8_MAX;
3746 NEMR3NotifyPhysMmioExUnmap(pVM, GCPhysCur, pRamRange->cb, fNemFlags, NULL, pRamRange->pbR3,
3747 &u2NemState, &pRamRange->uNemRange);
3748 if (u2NemState != UINT8_MAX)
3749 pgmPhysSetNemStateForPages(pRamRange->aPages, pRamRange->cb >> GUEST_PAGE_SHIFT, u2NemState);
3750 }
3751#endif
3752 if (pMmio2->GCPhys != NIL_RTGCPHYS)
3753 pgmR3PhysRamRangeRemoveLookup(pVM, pRamRange, &idxInsert);
3754
3755 pMmio2->GCPhys = NIL_RTGCPHYS;
3756 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_MAPPED;
3757
3758 idx--;
3759 } while (iChunk-- > 0);
3760 return rc;
3761 }
3762 }
3763
3764 /*
3765 * If the range have dirty page monitoring enabled, enable that.
3766 *
3767 * We ignore failures here for now because if we fail, the whole mapping
3768 * will have to be reversed and we'll end up with nothing at all on the
3769 * screen and a grumpy guest, whereas if we just go on, we'll only have
3770 * visual distortions to gripe about. There will be something in the
3771 * release log.
3772 */
3773 if ( pVM->pgm.s.aMmio2Ranges[idxFirst].pPhysHandlerR3
3774 && (pVM->pgm.s.aMmio2Ranges[idxFirst].fFlags & PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
3775 pgmR3PhysMmio2EnableDirtyPageTracing(pVM, idxFirst, cChunks);
3776
3777 /* Flush physical page map TLB. */
3778 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
3779
3780#ifdef VBOX_WITH_NATIVE_NEM
3781 /*
3782 * Late NEM notification (currently unused).
3783 */
3784 if (VM_IS_NEM_ENABLED(pVM))
3785 {
3786 if (pOverlappingRange)
3787 {
3788 uint8_t * const pbRam = pOverlappingRange->pbR3 ? &pOverlappingRange->pbR3[GCPhys - pOverlappingRange->GCPhys] : NULL;
3789 rc = NEMR3NotifyPhysMmioExMapLate(pVM, GCPhys, GCPhysLast - GCPhys + 1U,
3790 fNemFlags | NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE, pbRam,
3791 pVM->pgm.s.aMmio2Ranges[idxFirst].pbR3, NULL /*puNemRange*/);
3792 }
3793 else
3794 {
3795 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3796 {
3797 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3798 Assert(pVM->pgm.s.aMmio2Ranges[idx].GCPhys == pRamRange->GCPhys);
3799
3800 rc = NEMR3NotifyPhysMmioExMapLate(pVM, pRamRange->GCPhys, pRamRange->cb, fNemFlags, NULL /*pvRam*/,
3801 pRamRange->pbR3, &pRamRange->uNemRange);
3802 AssertRCBreak(rc);
3803 }
3804 }
3805 AssertLogRelRCReturnStmt(rc,
3806 PGMR3PhysMmio2Unmap(pVM, pVM->pgm.s.aMmio2Ranges[idxFirst].pDevInsR3, idxFirst + 1, GCPhys),
3807 rc);
3808 }
3809#endif
3810
3811 return VINF_SUCCESS;
3812}
3813
3814
3815/**
3816 * Maps a MMIO2 region.
3817 *
3818 * This is typically done when a guest / the bios / state loading changes the
3819 * PCI config. The replacing of base memory has the same restrictions as during
3820 * registration, of course.
3821 *
3822 * @returns VBox status code.
3823 *
3824 * @param pVM The cross context VM structure.
3825 * @param pDevIns The device instance owning the region.
3826 * @param hMmio2 The handle of the region to map.
3827 * @param GCPhys The guest-physical address to be remapped.
3828 */
3829VMMR3_INT_DECL(int) PGMR3PhysMmio2Map(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, RTGCPHYS GCPhys)
3830{
3831 /*
3832 * Validate input.
3833 */
3834 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
3835 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
3836 AssertReturn(GCPhys != NIL_RTGCPHYS, VERR_INVALID_PARAMETER);
3837 AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER);
3838 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3839 AssertReturn(hMmio2 != NIL_PGMMMIO2HANDLE, VERR_INVALID_HANDLE);
3840
3841 uint32_t cChunks = 0;
3842 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
3843 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
3844
3845 /* Gather the full range size so we can validate the mapping address properly. */
3846 RTGCPHYS cbRange = 0;
3847 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3848 cbRange += pVM->pgm.s.apMmio2RamRanges[idx]->cb;
3849
3850 RTGCPHYS const GCPhysLast = GCPhys + cbRange - 1;
3851 AssertLogRelReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
3852
3853 /*
3854 * Take the PGM lock and call worker.
3855 */
3856 int rc = PGM_LOCK(pVM);
3857 AssertRCReturn(rc, rc);
3858
3859 rc = pgmR3PhysMmio2MapLocked(pVM, idxFirst, cChunks, GCPhys, GCPhysLast);
3860#ifdef VBOX_STRICT
3861 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
3862#endif
3863
3864 PGM_UNLOCK(pVM);
3865 return rc;
3866}
3867
3868
3869/**
3870 * Worker form PGMR3PhysMmio2Map.
3871 */
3872static int pgmR3PhysMmio2UnmapLocked(PVM pVM, uint32_t const idxFirst, uint32_t const cChunks, RTGCPHYS const GCPhysIn)
3873{
3874 /*
3875 * Validate input.
3876 */
3877 RTGCPHYS cbRange = 0;
3878 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3879 {
3880 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3881 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3882 AssertReturn(pMmio2->idRamRange == pRamRange->idRange, VERR_INTERNAL_ERROR_3);
3883 AssertReturn(pMmio2->fFlags & PGMREGMMIO2RANGE_F_MAPPED, VERR_WRONG_ORDER);
3884 AssertReturn(pMmio2->GCPhys != NIL_RTGCPHYS, VERR_WRONG_ORDER);
3885 cbRange += pRamRange->cb;
3886 }
3887
3888 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
3889 PPGMRAMRANGE const pFirstRamRange = pVM->pgm.s.apMmio2RamRanges[idxFirst];
3890 const char * const pszDesc = pFirstRamRange->pszDesc;
3891 AssertLogRelMsgReturn(GCPhysIn == pFirstMmio2->GCPhys || GCPhysIn == NIL_RTGCPHYS,
3892 ("GCPhys=%RGp, actual address is %RGp\n", GCPhysIn, pFirstMmio2->GCPhys),
3893 VERR_MISMATCH);
3894 RTGCPHYS const GCPhys = pFirstMmio2->GCPhys; /* (it's always NIL_RTGCPHYS) */
3895 Log(("PGMR3PhysMmio2Unmap: %RGp-%RGp %s\n", GCPhys, GCPhys + cbRange - 1U, pszDesc));
3896
3897 uint16_t const fOldFlags = pFirstMmio2->fFlags;
3898 Assert(fOldFlags & PGMREGMMIO2RANGE_F_MAPPED);
3899
3900 /* Find the first entry in the lookup table and verify the overlapping flag. */
3901 uint32_t idxLookup = pgmR3PhysRamRangeFindOverlappingIndex(pVM, GCPhys, GCPhys + pFirstRamRange->cb - 1U);
3902 AssertLogRelMsgReturn(idxLookup < pVM->pgm.s.RamRangeUnion.cLookupEntries,
3903 ("MMIO2 range not found at %RGp LB %RGp in the lookup table! (%s)\n",
3904 GCPhys, pFirstRamRange->cb, pszDesc),
3905 VERR_INTERNAL_ERROR_2);
3906
3907 uint32_t const idLookupRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
3908 AssertLogRelReturn(idLookupRange != 0 && idLookupRange <= pVM->pgm.s.idRamRangeMax, VERR_INTERNAL_ERROR_5);
3909 PPGMRAMRANGE const pLookupRange = pVM->pgm.s.apRamRanges[idLookupRange];
3910 AssertLogRelReturn(pLookupRange, VERR_INTERNAL_ERROR_3);
3911
3912 AssertLogRelMsgReturn(fOldFlags & PGMREGMMIO2RANGE_F_OVERLAPPING
3913 ? pLookupRange != pFirstRamRange : pLookupRange == pFirstRamRange,
3914 ("MMIO2 unmap mixup at %RGp LB %RGp fl=%#x (%s) vs %RGp LB %RGp (%s)\n",
3915 GCPhys, cbRange, fOldFlags, pszDesc, pLookupRange->GCPhys, pLookupRange->cb, pLookupRange->pszDesc),
3916 VERR_INTERNAL_ERROR_4);
3917
3918 /*
3919 * If monitoring dirty pages, we must deregister the handlers first.
3920 */
3921 if ( pFirstMmio2->pPhysHandlerR3
3922 && (fOldFlags & PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
3923 pgmR3PhysMmio2DisableDirtyPageTracing(pVM, idxFirst, cChunks);
3924
3925 /*
3926 * Unmap it.
3927 */
3928 int rcRet = VINF_SUCCESS;
3929#ifdef VBOX_WITH_NATIVE_NEM
3930 uint32_t const fNemFlags = NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2
3931 | (fOldFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES
3932 ? NEM_NOTIFY_PHYS_MMIO_EX_F_TRACK_DIRTY_PAGES : 0);
3933#endif
3934 if (fOldFlags & PGMREGMMIO2RANGE_F_OVERLAPPING)
3935 {
3936 /*
3937 * We've replaced RAM, replace with zero pages.
3938 *
3939 * Note! This is where we might differ a little from a real system, because
3940 * it's likely to just show the RAM pages as they were before the
3941 * MMIO2 region was mapped here.
3942 */
3943 /* Only one chunk allowed when overlapping! */
3944 Assert(cChunks == 1);
3945 /* No NEM stuff should ever get here, see assertion in the mapping function. */
3946 AssertReturn(!VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
3947
3948 /* Restore the RAM pages we've replaced. */
3949 PPGMPAGE pPageDst = &pLookupRange->aPages[(pFirstRamRange->GCPhys - pLookupRange->GCPhys) >> GUEST_PAGE_SHIFT];
3950 uint32_t cPagesLeft = pFirstRamRange->cb >> GUEST_PAGE_SHIFT;
3951 pVM->pgm.s.cZeroPages += cPagesLeft;
3952 while (cPagesLeft-- > 0)
3953 {
3954 PGM_PAGE_INIT_ZERO(pPageDst, pVM, PGMPAGETYPE_RAM);
3955 pPageDst++;
3956 }
3957
3958 /* Update range state. */
3959 pFirstMmio2->fFlags &= ~(PGMREGMMIO2RANGE_F_OVERLAPPING | PGMREGMMIO2RANGE_F_MAPPED);
3960 pFirstMmio2->GCPhys = NIL_RTGCPHYS;
3961 Assert(pFirstRamRange->GCPhys == NIL_RTGCPHYS);
3962 Assert(pFirstRamRange->GCPhysLast == NIL_RTGCPHYS);
3963 }
3964 else
3965 {
3966 /*
3967 * Unlink the chunks related to the MMIO/MMIO2 region.
3968 */
3969 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3970 {
3971 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3972 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3973 Assert(pMmio2->idRamRange == pRamRange->idRange);
3974 Assert(pMmio2->GCPhys == pRamRange->GCPhys);
3975
3976#ifdef VBOX_WITH_NATIVE_NEM
3977 if (VM_IS_NEM_ENABLED(pVM)) /* Notify NEM. */
3978 {
3979 uint8_t u2State = UINT8_MAX;
3980 int rc = NEMR3NotifyPhysMmioExUnmap(pVM, pRamRange->GCPhys, pRamRange->cb, fNemFlags,
3981 NULL, pMmio2->pbR3, &u2State, &pRamRange->uNemRange);
3982 AssertLogRelMsgStmt(RT_SUCCESS(rc),
3983 ("NEMR3NotifyPhysMmioExUnmap failed: %Rrc - GCPhys=RGp LB %RGp fNemFlags=%#x pbR3=%p %s\n",
3984 rc, pRamRange->GCPhys, pRamRange->cb, fNemFlags, pMmio2->pbR3, pRamRange->pszDesc),
3985 rcRet = rc);
3986 if (u2State != UINT8_MAX)
3987 pgmPhysSetNemStateForPages(pRamRange->aPages, pRamRange->cb >> GUEST_PAGE_SHIFT, u2State);
3988 }
3989#endif
3990
3991 int rc = pgmR3PhysRamRangeRemoveLookup(pVM, pRamRange, &idxLookup);
3992 AssertLogRelMsgStmt(RT_SUCCESS(rc),
3993 ("pgmR3PhysRamRangeRemoveLookup failed: %Rrc - GCPhys=%RGp LB %RGp %s\n",
3994 rc, pRamRange->GCPhys, pRamRange->cb, pRamRange->pszDesc),
3995 rcRet = rc);
3996
3997 pMmio2->GCPhys = NIL_RTGCPHYS;
3998 pMmio2->fFlags &= ~(PGMREGMMIO2RANGE_F_OVERLAPPING | PGMREGMMIO2RANGE_F_MAPPED);
3999 Assert(pRamRange->GCPhys == NIL_RTGCPHYS);
4000 Assert(pRamRange->GCPhysLast == NIL_RTGCPHYS);
4001 }
4002 }
4003
4004 /* Force a PGM pool flush as guest ram references have been changed. */
4005 /** @todo not entirely SMP safe; assuming for now the guest takes care
4006 * of this internally (not touch mapped mmio while changing the
4007 * mapping). */
4008 PVMCPU pVCpu = VMMGetCpu(pVM);
4009 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
4010 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4011
4012 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
4013 /* pgmPhysInvalidRamRangeTlbs(pVM); - not necessary */
4014
4015 return rcRet;
4016}
4017
4018
4019/**
4020 * Unmaps an MMIO2 region.
4021 *
4022 * This is typically done when a guest / the bios / state loading changes the
4023 * PCI config. The replacing of base memory has the same restrictions as during
4024 * registration, of course.
4025 */
4026VMMR3_INT_DECL(int) PGMR3PhysMmio2Unmap(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, RTGCPHYS GCPhys)
4027{
4028 /*
4029 * Validate input
4030 */
4031 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
4032 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
4033 AssertReturn(hMmio2 != NIL_PGMMMIO2HANDLE, VERR_INVALID_HANDLE);
4034 if (GCPhys != NIL_RTGCPHYS)
4035 {
4036 AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER);
4037 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
4038 }
4039
4040 uint32_t cChunks = 0;
4041 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4042 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4043
4044
4045 /*
4046 * Take the PGM lock and call worker.
4047 */
4048 int rc = PGM_LOCK(pVM);
4049 AssertRCReturn(rc, rc);
4050
4051 rc = pgmR3PhysMmio2UnmapLocked(pVM, idxFirst, cChunks, GCPhys);
4052#ifdef VBOX_STRICT
4053 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
4054#endif
4055
4056 PGM_UNLOCK(pVM);
4057 return rc;
4058}
4059
4060
4061/**
4062 * Reduces the mapping size of a MMIO2 region.
4063 *
4064 * This is mainly for dealing with old saved states after changing the default
4065 * size of a mapping region. See PDMDevHlpMmio2Reduce and
4066 * PDMPCIDEV::pfnRegionLoadChangeHookR3.
4067 *
4068 * The region must not currently be mapped when making this call. The VM state
4069 * must be state restore or VM construction.
4070 *
4071 * @returns VBox status code.
4072 * @param pVM The cross context VM structure.
4073 * @param pDevIns The device instance owning the region.
4074 * @param hMmio2 The handle of the region to reduce.
4075 * @param cbRegion The new mapping size.
4076 */
4077VMMR3_INT_DECL(int) PGMR3PhysMmio2Reduce(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, RTGCPHYS cbRegion)
4078{
4079 /*
4080 * Validate input
4081 */
4082 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
4083 AssertReturn(hMmio2 != NIL_PGMMMIO2HANDLE && hMmio2 != 0 && hMmio2 <= RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges),
4084 VERR_INVALID_HANDLE);
4085 AssertReturn(cbRegion >= GUEST_PAGE_SIZE, VERR_INVALID_PARAMETER);
4086 AssertReturn(!(cbRegion & GUEST_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
4087
4088 PVMCPU const pVCpu = VMMGetCpu(pVM);
4089 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
4090
4091 VMSTATE const enmVmState = VMR3GetState(pVM);
4092 AssertLogRelMsgReturn( enmVmState == VMSTATE_CREATING
4093 || enmVmState == VMSTATE_LOADING,
4094 ("enmVmState=%d (%s)\n", enmVmState, VMR3GetStateName(enmVmState)),
4095 VERR_VM_INVALID_VM_STATE);
4096
4097 /*
4098 * Grab the PGM lock and validate the request properly.
4099 */
4100 int rc = PGM_LOCK(pVM);
4101 AssertRCReturn(rc, rc);
4102
4103 uint32_t cChunks = 0;
4104 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4105 if ((int32_t)idxFirst >= 0)
4106 {
4107 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
4108 PPGMRAMRANGE const pFirstRamRange = pVM->pgm.s.apMmio2RamRanges[idxFirst];
4109 if ( !(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_MAPPED)
4110 && pFirstMmio2->GCPhys == NIL_RTGCPHYS)
4111 {
4112 /*
4113 * NOTE! Current implementation does not support multiple ranges.
4114 * Implement when there is a real world need and thus a testcase.
4115 */
4116 if (cChunks == 1)
4117 {
4118 /*
4119 * The request has to be within the initial size.
4120 */
4121 if (cbRegion <= pFirstMmio2->cbReal)
4122 {
4123 /*
4124 * All we have to do is modify the size stored in the RAM range,
4125 * as it is the one used when mapping it and such.
4126 * The two page counts stored in PGMR0PERVM remain unchanged.
4127 */
4128 Log(("PGMR3PhysMmio2Reduce: %s changes from %#RGp bytes (%#RGp) to %#RGp bytes.\n",
4129 pFirstRamRange->pszDesc, pFirstRamRange->cb, pFirstMmio2->cbReal, cbRegion));
4130 pFirstRamRange->cb = cbRegion;
4131 rc = VINF_SUCCESS;
4132 }
4133 else
4134 {
4135 AssertLogRelMsgFailed(("MMIO2/%s: cbRegion=%#RGp > cbReal=%#RGp\n",
4136 pFirstRamRange->pszDesc, cbRegion, pFirstMmio2->cbReal));
4137 rc = VERR_OUT_OF_RANGE;
4138 }
4139 }
4140 else
4141 {
4142 AssertLogRelMsgFailed(("MMIO2/%s: more than one chunk: %d (flags=%#x)\n",
4143 pFirstRamRange->pszDesc, cChunks, pFirstMmio2->fFlags));
4144 rc = VERR_NOT_SUPPORTED;
4145 }
4146 }
4147 else
4148 {
4149 AssertLogRelMsgFailed(("MMIO2/%s: cannot change size of mapped range: %RGp..%RGp\n", pFirstRamRange->pszDesc,
4150 pFirstMmio2->GCPhys, pFirstMmio2->GCPhys + pFirstRamRange->cb - 1U));
4151 rc = VERR_WRONG_ORDER;
4152 }
4153 }
4154 else
4155 rc = (int32_t)idxFirst;
4156
4157 PGM_UNLOCK(pVM);
4158 return rc;
4159}
4160
4161
4162/**
4163 * Validates @a hMmio2, making sure it belongs to @a pDevIns.
4164 *
4165 * @returns VBox status code.
4166 * @param pVM The cross context VM structure.
4167 * @param pDevIns The device which allegedly owns @a hMmio2.
4168 * @param hMmio2 The handle to validate.
4169 */
4170VMMR3_INT_DECL(int) PGMR3PhysMmio2ValidateHandle(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
4171{
4172 /*
4173 * Validate input
4174 */
4175 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
4176 AssertPtrReturn(pDevIns, VERR_INVALID_POINTER);
4177
4178 /*
4179 * Just do this the simple way.
4180 */
4181 int rc = PGM_LOCK_VOID(pVM);
4182 AssertRCReturn(rc, rc);
4183 uint32_t cChunks;
4184 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4185 PGM_UNLOCK(pVM);
4186 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4187 return VINF_SUCCESS;
4188}
4189
4190
4191/**
4192 * Gets the mapping address of an MMIO2 region.
4193 *
4194 * @returns Mapping address, NIL_RTGCPHYS if not mapped or invalid handle.
4195 *
4196 * @param pVM The cross context VM structure.
4197 * @param pDevIns The device owning the MMIO2 handle.
4198 * @param hMmio2 The region handle.
4199 */
4200VMMR3_INT_DECL(RTGCPHYS) PGMR3PhysMmio2GetMappingAddress(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
4201{
4202 RTGCPHYS GCPhysRet = NIL_RTGCPHYS;
4203
4204 int rc = PGM_LOCK_VOID(pVM);
4205 AssertRCReturn(rc, NIL_RTGCPHYS);
4206
4207 uint32_t cChunks;
4208 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4209 if ((int32_t)idxFirst >= 0)
4210 GCPhysRet = pVM->pgm.s.aMmio2Ranges[idxFirst].GCPhys;
4211
4212 PGM_UNLOCK(pVM);
4213 return GCPhysRet;
4214}
4215
4216
4217/**
4218 * Worker for PGMR3PhysMmio2QueryAndResetDirtyBitmap.
4219 *
4220 * Called holding the PGM lock.
4221 */
4222static int pgmR3PhysMmio2QueryAndResetDirtyBitmapLocked(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
4223 void *pvBitmap, size_t cbBitmap)
4224{
4225 /*
4226 * Continue validation.
4227 */
4228 uint32_t cChunks;
4229 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4230 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4231 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
4232 AssertReturn(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES, VERR_INVALID_FUNCTION);
4233
4234 int rc = VINF_SUCCESS;
4235 if (cbBitmap || pvBitmap)
4236 {
4237 /*
4238 * Check the bitmap size and collect all the dirty flags.
4239 */
4240 RTGCPHYS cbTotal = 0;
4241 uint16_t fTotalDirty = 0;
4242 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4243 {
4244 /* Not using cbReal here, because NEM is not in on the creating, only the mapping. */
4245 cbTotal += pVM->pgm.s.apMmio2RamRanges[idx]->cb;
4246 fTotalDirty |= pVM->pgm.s.aMmio2Ranges[idx].fFlags;
4247 }
4248 size_t const cbTotalBitmap = RT_ALIGN_T(cbTotal, GUEST_PAGE_SIZE * 64, RTGCPHYS) / GUEST_PAGE_SIZE / 8;
4249
4250 AssertPtrReturn(pvBitmap, VERR_INVALID_POINTER);
4251 AssertReturn(RT_ALIGN_P(pvBitmap, sizeof(uint64_t)) == pvBitmap, VERR_INVALID_POINTER);
4252 AssertReturn(cbBitmap == cbTotalBitmap, VERR_INVALID_PARAMETER);
4253
4254#ifdef VBOX_WITH_PGM_NEM_MODE
4255 /*
4256 * If there is no physical handler we must be in NEM mode and NEM
4257 * taking care of the dirty bit collecting.
4258 */
4259 if (pFirstMmio2->pPhysHandlerR3 == NULL)
4260 {
4261/** @todo This does not integrate at all with --execute-all-in-iem, leaving the
4262 * screen blank when using it together with --driverless. Fixing this won't be
4263 * entirely easy as we take the PGM_PAGE_HNDL_PHYS_STATE_DISABLED page status to
4264 * mean a dirty page. */
4265 AssertReturn(VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
4266 uint8_t *pbBitmap = (uint8_t *)pvBitmap;
4267 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4268 {
4269 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
4270 size_t const cbBitmapChunk = (pRamRange->cb / GUEST_PAGE_SIZE + 7) / 8;
4271 Assert((RTGCPHYS)cbBitmapChunk * GUEST_PAGE_SIZE * 8 == pRamRange->cb);
4272 Assert(pRamRange->GCPhys == pVM->pgm.s.aMmio2Ranges[idx].GCPhys); /* (No MMIO2 inside RAM in NEM mode!)*/
4273 int rc2 = NEMR3PhysMmio2QueryAndResetDirtyBitmap(pVM, pRamRange->GCPhys, pRamRange->cb,
4274 pRamRange->uNemRange, pbBitmap, cbBitmapChunk);
4275 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4276 rc = rc2;
4277 pbBitmap += pRamRange->cb / GUEST_PAGE_SIZE / 8;
4278 }
4279 }
4280 else
4281#endif
4282 if (fTotalDirty & PGMREGMMIO2RANGE_F_IS_DIRTY)
4283 {
4284 if ( (pFirstMmio2->fFlags & (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4285 == (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4286 {
4287 /*
4288 * Reset each chunk, gathering dirty bits.
4289 */
4290 RT_BZERO(pvBitmap, cbBitmap); /* simpler for now. */
4291 for (uint32_t iChunk = 0, idx = idxFirst, iPageNo = 0; iChunk < cChunks; iChunk++, idx++)
4292 {
4293 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
4294 if (pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_DIRTY)
4295 {
4296 int rc2 = pgmHandlerPhysicalResetMmio2WithBitmap(pVM, pMmio2->GCPhys, pvBitmap, iPageNo);
4297 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4298 rc = rc2;
4299 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_IS_DIRTY;
4300 }
4301 iPageNo += pVM->pgm.s.apMmio2RamRanges[idx]->cb >> GUEST_PAGE_SHIFT;
4302 }
4303 }
4304 else
4305 {
4306 /*
4307 * If not mapped or tracking is disabled, we return the
4308 * PGMREGMMIO2RANGE_F_IS_DIRTY status for all pages. We cannot
4309 * get more accurate data than that after unmapping or disabling.
4310 */
4311 RT_BZERO(pvBitmap, cbBitmap);
4312 for (uint32_t iChunk = 0, idx = idxFirst, iPageNo = 0; iChunk < cChunks; iChunk++, idx++)
4313 {
4314 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
4315 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
4316 if (pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_DIRTY)
4317 {
4318 ASMBitSetRange(pvBitmap, iPageNo, iPageNo + (pRamRange->cb >> GUEST_PAGE_SHIFT));
4319 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_IS_DIRTY;
4320 }
4321 iPageNo += pRamRange->cb >> GUEST_PAGE_SHIFT;
4322 }
4323 }
4324 }
4325 /*
4326 * No dirty chunks.
4327 */
4328 else
4329 RT_BZERO(pvBitmap, cbBitmap);
4330 }
4331 /*
4332 * No bitmap. Reset the region if tracking is currently enabled.
4333 */
4334 else if ( (pFirstMmio2->fFlags & (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4335 == (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4336 {
4337#ifdef VBOX_WITH_PGM_NEM_MODE
4338 if (pFirstMmio2->pPhysHandlerR3 == NULL)
4339 {
4340 AssertReturn(VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
4341 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4342 {
4343 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
4344 Assert(pRamRange->GCPhys == pVM->pgm.s.aMmio2Ranges[idx].GCPhys); /* (No MMIO2 inside RAM in NEM mode!)*/
4345 int rc2 = NEMR3PhysMmio2QueryAndResetDirtyBitmap(pVM, pRamRange->GCPhys, pRamRange->cb,
4346 pRamRange->uNemRange, NULL, 0);
4347 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4348 rc = rc2;
4349 }
4350 }
4351 else
4352#endif
4353 {
4354 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4355 {
4356 pVM->pgm.s.aMmio2Ranges[idx].fFlags &= ~PGMREGMMIO2RANGE_F_IS_DIRTY;
4357 int rc2 = PGMHandlerPhysicalReset(pVM, pVM->pgm.s.aMmio2Ranges[idx].GCPhys);
4358 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4359 rc = rc2;
4360 }
4361 }
4362 }
4363
4364 return rc;
4365}
4366
4367
4368/**
4369 * Queries the dirty page bitmap and resets the monitoring.
4370 *
4371 * The PGMPHYS_MMIO2_FLAGS_TRACK_DIRTY_PAGES flag must be specified when
4372 * creating the range for this to work.
4373 *
4374 * @returns VBox status code.
4375 * @retval VERR_INVALID_FUNCTION if not created using
4376 * PGMPHYS_MMIO2_FLAGS_TRACK_DIRTY_PAGES.
4377 * @param pVM The cross context VM structure.
4378 * @param pDevIns The device owning the MMIO2 handle.
4379 * @param hMmio2 The region handle.
4380 * @param pvBitmap The output bitmap. Must be 8-byte aligned. Ignored
4381 * when @a cbBitmap is zero.
4382 * @param cbBitmap The size of the bitmap. Must be the size of the whole
4383 * MMIO2 range, rounded up to the nearest 8 bytes.
4384 * When zero only a reset is done.
4385 */
4386VMMR3_INT_DECL(int) PGMR3PhysMmio2QueryAndResetDirtyBitmap(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
4387 void *pvBitmap, size_t cbBitmap)
4388{
4389 /*
4390 * Do some basic validation before grapping the PGM lock and continuing.
4391 */
4392 AssertPtrReturn(pDevIns, VERR_INVALID_POINTER);
4393 AssertReturn(RT_ALIGN_Z(cbBitmap, sizeof(uint64_t)) == cbBitmap, VERR_INVALID_PARAMETER);
4394 int rc = PGM_LOCK(pVM);
4395 if (RT_SUCCESS(rc))
4396 {
4397 STAM_PROFILE_START(&pVM->pgm.s.StatMmio2QueryAndResetDirtyBitmap, a);
4398 rc = pgmR3PhysMmio2QueryAndResetDirtyBitmapLocked(pVM, pDevIns, hMmio2, pvBitmap, cbBitmap);
4399 STAM_PROFILE_STOP(&pVM->pgm.s.StatMmio2QueryAndResetDirtyBitmap, a);
4400 PGM_UNLOCK(pVM);
4401 }
4402 return rc;
4403}
4404
4405
4406/**
4407 * Worker for PGMR3PhysMmio2ControlDirtyPageTracking
4408 *
4409 * Called owning the PGM lock.
4410 */
4411static int pgmR3PhysMmio2ControlDirtyPageTrackingLocked(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, bool fEnabled)
4412{
4413 /*
4414 * Continue validation.
4415 */
4416 uint32_t cChunks;
4417 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4418 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4419 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
4420 AssertReturn(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES, VERR_INVALID_FUNCTION);
4421
4422#ifdef VBOX_WITH_PGM_NEM_MODE
4423 /*
4424 * This is a nop if NEM is responsible for doing the tracking, we simply
4425 * leave the tracking on all the time there.
4426 */
4427 if (pFirstMmio2->pPhysHandlerR3 == NULL)
4428 {
4429 AssertReturn(VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
4430 return VINF_SUCCESS;
4431 }
4432#endif
4433
4434 /*
4435 * Anything needing doing?
4436 */
4437 if (fEnabled != RT_BOOL(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4438 {
4439 LogFlowFunc(("fEnabled=%RTbool %s\n", fEnabled, pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc));
4440
4441 /*
4442 * Update the PGMREGMMIO2RANGE_F_TRACKING_ENABLED flag.
4443 */
4444 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4445 if (fEnabled)
4446 pVM->pgm.s.aMmio2Ranges[idx].fFlags |= PGMREGMMIO2RANGE_F_TRACKING_ENABLED;
4447 else
4448 pVM->pgm.s.aMmio2Ranges[idx].fFlags &= ~PGMREGMMIO2RANGE_F_TRACKING_ENABLED;
4449
4450 /*
4451 * Enable/disable handlers if currently mapped.
4452 *
4453 * We ignore status codes here as we've already changed the flags and
4454 * returning a failure status now would be confusing. Besides, the two
4455 * functions will continue past failures. As argued in the mapping code,
4456 * it's in the release log.
4457 */
4458 if (pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_MAPPED)
4459 {
4460 if (fEnabled)
4461 pgmR3PhysMmio2EnableDirtyPageTracing(pVM, idxFirst, cChunks);
4462 else
4463 pgmR3PhysMmio2DisableDirtyPageTracing(pVM, idxFirst, cChunks);
4464 }
4465 }
4466 else
4467 LogFlowFunc(("fEnabled=%RTbool %s - no change\n", fEnabled, pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc));
4468
4469 return VINF_SUCCESS;
4470}
4471
4472
4473/**
4474 * Controls the dirty page tracking for an MMIO2 range.
4475 *
4476 * @returns VBox status code.
4477 * @param pVM The cross context VM structure.
4478 * @param pDevIns The device owning the MMIO2 memory.
4479 * @param hMmio2 The handle of the region.
4480 * @param fEnabled The new tracking state.
4481 */
4482VMMR3_INT_DECL(int) PGMR3PhysMmio2ControlDirtyPageTracking(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, bool fEnabled)
4483{
4484 /*
4485 * Do some basic validation before grapping the PGM lock and continuing.
4486 */
4487 AssertPtrReturn(pDevIns, VERR_INVALID_POINTER);
4488 int rc = PGM_LOCK(pVM);
4489 if (RT_SUCCESS(rc))
4490 {
4491 rc = pgmR3PhysMmio2ControlDirtyPageTrackingLocked(pVM, pDevIns, hMmio2, fEnabled);
4492 PGM_UNLOCK(pVM);
4493 }
4494 return rc;
4495}
4496
4497
4498/**
4499 * Changes the region number of an MMIO2 region.
4500 *
4501 * This is only for dealing with save state issues, nothing else.
4502 *
4503 * @return VBox status code.
4504 *
4505 * @param pVM The cross context VM structure.
4506 * @param pDevIns The device owning the MMIO2 memory.
4507 * @param hMmio2 The handle of the region.
4508 * @param iNewRegion The new region index.
4509 *
4510 * @thread EMT(0)
4511 * @sa @bugref{9359}
4512 */
4513VMMR3_INT_DECL(int) PGMR3PhysMmio2ChangeRegionNo(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, uint32_t iNewRegion)
4514{
4515 /*
4516 * Validate input.
4517 */
4518 VM_ASSERT_EMT0_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
4519 VM_ASSERT_STATE_RETURN(pVM, VMSTATE_LOADING, VERR_VM_INVALID_VM_STATE);
4520 AssertReturn(iNewRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
4521
4522 int rc = PGM_LOCK(pVM);
4523 AssertRCReturn(rc, rc);
4524
4525 /* Validate and resolve the handle. */
4526 uint32_t cChunks;
4527 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4528 if ((int32_t)idxFirst >= 0)
4529 {
4530 /* Check that the new range number is unused. */
4531 PPGMREGMMIO2RANGE const pConflict = pgmR3PhysMmio2Find(pVM, pDevIns, pVM->pgm.s.aMmio2Ranges[idxFirst].iSubDev,
4532 iNewRegion);
4533 if (!pConflict)
4534 {
4535 /*
4536 * Make the change.
4537 */
4538 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4539 pVM->pgm.s.aMmio2Ranges[idx].iRegion = (uint8_t)iNewRegion;
4540 rc = VINF_SUCCESS;
4541 }
4542 else
4543 {
4544 AssertLogRelMsgFailed(("MMIO2/%s: iNewRegion=%d conflicts with %s\n", pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc,
4545 iNewRegion, pVM->pgm.s.apMmio2RamRanges[pConflict->idRamRange]->pszDesc));
4546 rc = VERR_RESOURCE_IN_USE;
4547 }
4548 }
4549 else
4550 rc = (int32_t)idxFirst;
4551
4552 PGM_UNLOCK(pVM);
4553 return rc;
4554}
4555
4556
4557
4558/*********************************************************************************************************************************
4559* ROM *
4560*********************************************************************************************************************************/
4561
4562/**
4563 * Worker for PGMR3PhysRomRegister.
4564 *
4565 * This is here to simplify lock management, i.e. the caller does all the
4566 * locking and we can simply return without needing to remember to unlock
4567 * anything first.
4568 *
4569 * @returns VBox status code.
4570 * @param pVM The cross context VM structure.
4571 * @param pDevIns The device instance owning the ROM.
4572 * @param GCPhys First physical address in the range.
4573 * Must be page aligned!
4574 * @param cb The size of the range (in bytes).
4575 * Must be page aligned!
4576 * @param pvBinary Pointer to the binary data backing the ROM image.
4577 * @param cbBinary The size of the binary data pvBinary points to.
4578 * This must be less or equal to @a cb.
4579 * @param fFlags Mask of flags. PGMPHYS_ROM_FLAGS_SHADOWED
4580 * and/or PGMPHYS_ROM_FLAGS_PERMANENT_BINARY.
4581 * @param pszDesc Pointer to description string. This must not be freed.
4582 */
4583static int pgmR3PhysRomRegisterLocked(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb,
4584 const void *pvBinary, uint32_t cbBinary, uint8_t fFlags, const char *pszDesc)
4585{
4586 /*
4587 * Validate input.
4588 */
4589 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
4590 AssertReturn(RT_ALIGN_T(GCPhys, GUEST_PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER);
4591 AssertReturn(RT_ALIGN_T(cb, GUEST_PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER);
4592 RTGCPHYS const GCPhysLast = GCPhys + (cb - 1);
4593 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
4594 AssertPtrReturn(pvBinary, VERR_INVALID_PARAMETER);
4595 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
4596 AssertReturn(!(fFlags & ~PGMPHYS_ROM_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
4597
4598 PVMCPU const pVCpu = VMMGetCpu(pVM);
4599 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
4600 VM_ASSERT_STATE_RETURN(pVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
4601
4602 const uint32_t cGuestPages = cb >> GUEST_PAGE_SHIFT;
4603 AssertReturn(cGuestPages <= PGM_MAX_PAGES_PER_ROM_RANGE, VERR_OUT_OF_RANGE);
4604
4605#ifdef VBOX_WITH_PGM_NEM_MODE
4606 const uint32_t cHostPages = RT_ALIGN_T(cb, HOST_PAGE_SIZE_DYNAMIC, RTGCPHYS) >> HOST_PAGE_SHIFT_DYNAMIC;
4607#endif
4608
4609 /*
4610 * Make sure we've got a free ROM range.
4611 */
4612 uint8_t const idRomRange = pVM->pgm.s.cRomRanges;
4613 AssertLogRelReturn(idRomRange < RT_ELEMENTS(pVM->pgm.s.apRomRanges), VERR_PGM_TOO_MANY_ROM_RANGES);
4614
4615 /*
4616 * Look thru the existing ROM range and make sure there aren't any
4617 * overlapping registration.
4618 */
4619 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
4620 for (uint32_t idx = 0; idx < cRomRanges; idx++)
4621 {
4622 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
4623 AssertLogRelMsgReturn( GCPhys > pRom->GCPhysLast
4624 || GCPhysLast < pRom->GCPhys,
4625 ("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n",
4626 GCPhys, GCPhysLast, pszDesc,
4627 pRom->GCPhys, pRom->GCPhysLast, pRom->pszDesc),
4628 VERR_PGM_RAM_CONFLICT);
4629 }
4630
4631 /*
4632 * Find the RAM location and check for conflicts.
4633 *
4634 * Conflict detection is a bit different than for RAM registration since a
4635 * ROM can be located within a RAM range. So, what we have to check for is
4636 * other memory types (other than RAM that is) and that we don't span more
4637 * than one RAM range (lazy).
4638 */
4639 uint32_t idxInsert = UINT32_MAX;
4640 PPGMRAMRANGE const pOverlappingRange = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxInsert);
4641 if (pOverlappingRange)
4642 {
4643 /* completely within? */
4644 AssertLogRelMsgReturn( GCPhys >= pOverlappingRange->GCPhys
4645 && GCPhysLast <= pOverlappingRange->GCPhysLast,
4646 ("%RGp-%RGp (%s) falls partly outside %RGp-%RGp (%s)\n",
4647 GCPhys, GCPhysLast, pszDesc,
4648 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
4649 VERR_PGM_RAM_CONFLICT);
4650
4651 /* Check that is isn't an ad hoc range, but a real RAM range. */
4652 AssertLogRelMsgReturn(!PGM_RAM_RANGE_IS_AD_HOC(pOverlappingRange),
4653 ("%RGp-%RGp (ROM/%s) mapping attempt in non-RAM range: %RGp-%RGp (%s)\n",
4654 GCPhys, GCPhysLast, pszDesc,
4655 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
4656 VERR_PGM_RAM_CONFLICT);
4657
4658 /* All the pages must be RAM pages. */
4659 PPGMPAGE pPage = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
4660 uint32_t cPagesLeft = cGuestPages;
4661 while (cPagesLeft-- > 0)
4662 {
4663 AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM,
4664 ("%RGp (%R[pgmpage]) isn't a RAM page - registering %RGp-%RGp (%s).\n",
4665 GCPhys + ((RTGCPHYS)cPagesLeft << GUEST_PAGE_SHIFT), pPage, GCPhys, GCPhysLast, pszDesc),
4666 VERR_PGM_RAM_CONFLICT);
4667 AssertLogRelMsgReturn(PGM_PAGE_IS_ZERO(pPage) || PGM_IS_IN_NEM_MODE(pVM),
4668 ("%RGp (%R[pgmpage]) is not a ZERO page - registering %RGp-%RGp (%s).\n",
4669 GCPhys + ((RTGCPHYS)cPagesLeft << GUEST_PAGE_SHIFT), pPage, GCPhys, GCPhysLast, pszDesc),
4670 VERR_PGM_UNEXPECTED_PAGE_STATE);
4671 pPage++;
4672 }
4673 }
4674
4675 /*
4676 * Update the base memory reservation if necessary.
4677 */
4678 uint32_t const cExtraBaseCost = (pOverlappingRange ? 0 : cGuestPages)
4679 + (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED ? cGuestPages : 0);
4680 if (cExtraBaseCost)
4681 {
4682 int rc = MMR3IncreaseBaseReservation(pVM, cExtraBaseCost);
4683 AssertRCReturn(rc, rc);
4684 }
4685
4686#ifdef VBOX_WITH_NATIVE_NEM
4687 /*
4688 * Early NEM notification before we've made any changes or anything.
4689 */
4690 uint32_t const fNemNotify = (pOverlappingRange ? NEM_NOTIFY_PHYS_ROM_F_REPLACE : 0)
4691 | (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED ? NEM_NOTIFY_PHYS_ROM_F_SHADOW : 0);
4692 uint8_t u2NemState = UINT8_MAX;
4693 uint32_t uNemRange = 0;
4694 if (VM_IS_NEM_ENABLED(pVM))
4695 {
4696 int rc = NEMR3NotifyPhysRomRegisterEarly(pVM, GCPhys, cGuestPages << GUEST_PAGE_SHIFT,
4697 pOverlappingRange
4698 ? PGM_RAMRANGE_CALC_PAGE_R3PTR(pOverlappingRange, GCPhys) : NULL,
4699 fNemNotify, &u2NemState,
4700 pOverlappingRange ? &pOverlappingRange->uNemRange : &uNemRange);
4701 AssertLogRelRCReturn(rc, rc);
4702 }
4703#endif
4704
4705 /*
4706 * Allocate memory for the virgin copy of the RAM. In simplified memory
4707 * mode, we allocate memory for any ad-hoc RAM range and for shadow pages.
4708 */
4709 int rc;
4710 PGMMALLOCATEPAGESREQ pReq = NULL;
4711#ifdef VBOX_WITH_PGM_NEM_MODE
4712 void *pvRam = NULL;
4713 void *pvAlt = NULL;
4714 if (PGM_IS_IN_NEM_MODE(pVM))
4715 {
4716 if (!pOverlappingRange)
4717 {
4718 rc = SUPR3PageAlloc(cHostPages, 0, &pvRam);
4719 if (RT_FAILURE(rc))
4720 return rc;
4721 }
4722 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
4723 {
4724 rc = SUPR3PageAlloc(cHostPages, 0, &pvAlt);
4725 if (RT_FAILURE(rc))
4726 {
4727 if (pvRam)
4728 SUPR3PageFree(pvRam, cHostPages);
4729 return rc;
4730 }
4731 }
4732 }
4733 else
4734#endif
4735 {
4736#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
4737 rc = GMMR3AllocatePagesPrepare(pVM, &pReq, cGuestPages, GMMACCOUNT_BASE);
4738 AssertRCReturn(rc, rc);
4739
4740 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++)
4741 {
4742 pReq->aPages[iPage].HCPhysGCPhys = GCPhys + (iPage << GUEST_PAGE_SHIFT);
4743 pReq->aPages[iPage].fZeroed = false;
4744 pReq->aPages[iPage].idPage = NIL_GMM_PAGEID;
4745 pReq->aPages[iPage].idSharedPage = NIL_GMM_PAGEID;
4746 }
4747
4748 rc = GMMR3AllocatePagesPerform(pVM, pReq);
4749 if (RT_FAILURE(rc))
4750 {
4751 GMMR3AllocatePagesCleanup(pReq);
4752 return rc;
4753 }
4754#endif
4755 }
4756
4757 /*
4758 * Allocate a RAM range if required.
4759 * Note! We don't clean up the RAM range here on failure, VM destruction does that.
4760 */
4761 rc = VINF_SUCCESS;
4762 PPGMRAMRANGE pRamRange = NULL;
4763 if (!pOverlappingRange)
4764 rc = pgmR3PhysAllocateRamRange(pVM, pVCpu, cGuestPages, PGM_RAM_RANGE_FLAGS_AD_HOC_ROM, &pRamRange);
4765 if (RT_SUCCESS(rc))
4766 {
4767 /*
4768 * Allocate a ROM range.
4769 * Note! We don't clean up the ROM range here on failure, VM destruction does that.
4770 */
4771#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
4772 if (!SUPR3IsDriverless())
4773 {
4774 PGMPHYSROMALLOCATERANGEREQ RomRangeReq;
4775 RomRangeReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
4776 RomRangeReq.Hdr.cbReq = sizeof(RomRangeReq);
4777 RomRangeReq.cbGuestPage = GUEST_PAGE_SIZE;
4778 RomRangeReq.cGuestPages = cGuestPages;
4779 RomRangeReq.idRomRange = idRomRange;
4780 RomRangeReq.fFlags = fFlags;
4781 rc = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_ROM_ALLOCATE_RANGE, 0 /*u64Arg*/, &RomRangeReq.Hdr);
4782 }
4783 else
4784#endif
4785 rc = pgmPhysRomRangeAllocCommon(pVM, cGuestPages, idRomRange, fFlags);
4786 }
4787 if (RT_SUCCESS(rc))
4788 {
4789 /*
4790 * Initialize and map the RAM range (if required).
4791 */
4792 PPGMROMRANGE const pRomRange = pVM->pgm.s.apRomRanges[idRomRange];
4793 AssertPtr(pRomRange);
4794 uint32_t const idxFirstRamPage = pOverlappingRange ? (GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT : 0;
4795 PPGMROMPAGE pRomPage = &pRomRange->aPages[0];
4796 if (!pOverlappingRange)
4797 {
4798 /* Initialize the new RAM range and insert it into the lookup table. */
4799 pRamRange->pszDesc = pszDesc;
4800#ifdef VBOX_WITH_NATIVE_NEM
4801 pRamRange->uNemRange = uNemRange;
4802#endif
4803
4804 PPGMPAGE pRamPage = &pRamRange->aPages[idxFirstRamPage];
4805#ifdef VBOX_WITH_PGM_NEM_MODE
4806 if (PGM_IS_IN_NEM_MODE(pVM))
4807 {
4808 AssertPtr(pvRam); Assert(pReq == NULL);
4809 pRamRange->pbR3 = (uint8_t *)pvRam;
4810 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4811 {
4812 PGM_PAGE_INIT(pRamPage, UINT64_C(0x0000fffffffff000), NIL_GMM_PAGEID,
4813 PGMPAGETYPE_ROM, PGM_PAGE_STATE_ALLOCATED);
4814 pRomPage->Virgin = *pRamPage;
4815 }
4816 }
4817 else
4818#endif
4819 {
4820#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
4821 Assert(!pRamRange->pbR3); Assert(!pvRam);
4822 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4823 {
4824 PGM_PAGE_INIT(pRamPage,
4825 pReq->aPages[iPage].HCPhysGCPhys,
4826 pReq->aPages[iPage].idPage,
4827 PGMPAGETYPE_ROM,
4828 PGM_PAGE_STATE_ALLOCATED);
4829
4830 pRomPage->Virgin = *pRamPage;
4831 }
4832#endif
4833 }
4834
4835 pVM->pgm.s.cAllPages += cGuestPages;
4836 pVM->pgm.s.cPrivatePages += cGuestPages;
4837
4838 rc = pgmR3PhysRamRangeInsertLookup(pVM, pRamRange, GCPhys, &idxInsert);
4839 }
4840 else
4841 {
4842 /* Insert the ROM into an existing RAM range. */
4843 PPGMPAGE pRamPage = &pOverlappingRange->aPages[idxFirstRamPage];
4844#ifdef VBOX_WITH_PGM_NEM_MODE
4845 if (PGM_IS_IN_NEM_MODE(pVM))
4846 {
4847 Assert(pvRam == NULL); Assert(pReq == NULL);
4848 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4849 {
4850# ifdef VBOX_WITH_ONLY_PGM_NEM_MODE /* gcc will complain that the first part of the assertion is always false otherwise */
4851 Assert(PGM_PAGE_GET_HCPHYS(pRamPage) == 0);
4852# else
4853 Assert(PGM_PAGE_GET_HCPHYS(pRamPage) == UINT64_C(0x0000fffffffff000) || PGM_PAGE_GET_HCPHYS(pRamPage) == 0);
4854# endif
4855 Assert(PGM_PAGE_GET_PAGEID(pRamPage) == NIL_GMM_PAGEID);
4856 Assert(PGM_PAGE_GET_STATE(pRamPage) == PGM_PAGE_STATE_ALLOCATED);
4857 PGM_PAGE_SET_TYPE(pVM, pRamPage, PGMPAGETYPE_ROM);
4858 PGM_PAGE_SET_STATE(pVM, pRamPage, PGM_PAGE_STATE_ALLOCATED);
4859 PGM_PAGE_SET_PDE_TYPE(pVM, pRamPage, PGM_PAGE_PDE_TYPE_DONTCARE);
4860 PGM_PAGE_SET_PTE_INDEX(pVM, pRamPage, 0);
4861 PGM_PAGE_SET_TRACKING(pVM, pRamPage, 0);
4862
4863 pRomPage->Virgin = *pRamPage;
4864 }
4865 }
4866 else
4867#endif
4868 {
4869#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
4870 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4871 {
4872 PGM_PAGE_SET_TYPE(pVM, pRamPage, PGMPAGETYPE_ROM);
4873 PGM_PAGE_SET_HCPHYS(pVM, pRamPage, pReq->aPages[iPage].HCPhysGCPhys);
4874 PGM_PAGE_SET_STATE(pVM, pRamPage, PGM_PAGE_STATE_ALLOCATED);
4875 PGM_PAGE_SET_PAGEID(pVM, pRamPage, pReq->aPages[iPage].idPage);
4876 PGM_PAGE_SET_PDE_TYPE(pVM, pRamPage, PGM_PAGE_PDE_TYPE_DONTCARE);
4877 PGM_PAGE_SET_PTE_INDEX(pVM, pRamPage, 0);
4878 PGM_PAGE_SET_TRACKING(pVM, pRamPage, 0);
4879
4880 pRomPage->Virgin = *pRamPage;
4881 }
4882 pVM->pgm.s.cZeroPages -= cGuestPages;
4883 pVM->pgm.s.cPrivatePages += cGuestPages;
4884#endif
4885 }
4886 pRamRange = pOverlappingRange;
4887 }
4888
4889 if (RT_SUCCESS(rc))
4890 {
4891#ifdef VBOX_WITH_NATIVE_NEM
4892 /* Set the NEM state of the pages if needed. */
4893 if (u2NemState != UINT8_MAX)
4894 pgmPhysSetNemStateForPages(&pRamRange->aPages[idxFirstRamPage], cGuestPages, u2NemState);
4895#endif
4896
4897 /* Flush physical page map TLB. */
4898 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
4899
4900 /*
4901 * Register the ROM access handler.
4902 */
4903 rc = PGMHandlerPhysicalRegister(pVM, GCPhys, GCPhysLast, pVM->pgm.s.hRomPhysHandlerType, idRomRange, pszDesc);
4904 if (RT_SUCCESS(rc))
4905 {
4906 /*
4907 * Copy the image over to the virgin pages.
4908 * This must be done after linking in the RAM range.
4909 */
4910 size_t cbBinaryLeft = cbBinary;
4911 PPGMPAGE pRamPage = &pRamRange->aPages[idxFirstRamPage];
4912 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++)
4913 {
4914 void *pvDstPage;
4915 rc = pgmPhysPageMap(pVM, pRamPage, GCPhys + (iPage << GUEST_PAGE_SHIFT), &pvDstPage);
4916 if (RT_FAILURE(rc))
4917 {
4918 VMSetError(pVM, rc, RT_SRC_POS, "Failed to map virgin ROM page at %RGp", GCPhys);
4919 break;
4920 }
4921 if (cbBinaryLeft >= GUEST_PAGE_SIZE)
4922 {
4923 memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << GUEST_PAGE_SHIFT), GUEST_PAGE_SIZE);
4924 cbBinaryLeft -= GUEST_PAGE_SIZE;
4925 }
4926 else
4927 {
4928 RT_BZERO(pvDstPage, GUEST_PAGE_SIZE); /* (shouldn't be necessary, but can't hurt either) */
4929 if (cbBinaryLeft > 0)
4930 {
4931 memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << GUEST_PAGE_SHIFT), cbBinaryLeft);
4932 cbBinaryLeft = 0;
4933 }
4934 }
4935 }
4936 if (RT_SUCCESS(rc))
4937 {
4938 /*
4939 * Initialize the ROM range.
4940 * Note that the Virgin member of the pages has already been initialized above.
4941 */
4942 Assert(pRomRange->cb == cb);
4943 Assert(pRomRange->fFlags == fFlags);
4944 Assert(pRomRange->idSavedState == UINT8_MAX);
4945 pRomRange->GCPhys = GCPhys;
4946 pRomRange->GCPhysLast = GCPhysLast;
4947 pRomRange->cbOriginal = cbBinary;
4948 pRomRange->pszDesc = pszDesc;
4949#ifdef VBOX_WITH_PGM_NEM_MODE
4950 pRomRange->pbR3Alternate = (uint8_t *)pvAlt;
4951#endif
4952 pRomRange->pvOriginal = fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY
4953 ? pvBinary : RTMemDup(pvBinary, cbBinary);
4954 if (pRomRange->pvOriginal)
4955 {
4956 for (unsigned iPage = 0; iPage < cGuestPages; iPage++)
4957 {
4958 PPGMROMPAGE const pPage = &pRomRange->aPages[iPage];
4959 pPage->enmProt = PGMROMPROT_READ_ROM_WRITE_IGNORE;
4960#ifdef VBOX_WITH_PGM_NEM_MODE
4961 if (PGM_IS_IN_NEM_MODE(pVM))
4962 PGM_PAGE_INIT(&pPage->Shadow, UINT64_C(0x0000fffffffff000), NIL_GMM_PAGEID,
4963 PGMPAGETYPE_ROM_SHADOW, PGM_PAGE_STATE_ALLOCATED);
4964 else
4965#endif
4966 PGM_PAGE_INIT_ZERO(&pPage->Shadow, pVM, PGMPAGETYPE_ROM_SHADOW);
4967 }
4968
4969 /* update the page count stats for the shadow pages. */
4970 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
4971 {
4972 if (PGM_IS_IN_NEM_MODE(pVM))
4973 pVM->pgm.s.cPrivatePages += cGuestPages;
4974 else
4975 pVM->pgm.s.cZeroPages += cGuestPages;
4976 pVM->pgm.s.cAllPages += cGuestPages;
4977 }
4978
4979#ifdef VBOX_WITH_NATIVE_NEM
4980 /*
4981 * Notify NEM again.
4982 */
4983 if (VM_IS_NEM_ENABLED(pVM))
4984 {
4985 u2NemState = UINT8_MAX;
4986 rc = NEMR3NotifyPhysRomRegisterLate(pVM, GCPhys, cb, PGM_RAMRANGE_CALC_PAGE_R3PTR(pRamRange, GCPhys),
4987 fNemNotify, &u2NemState, &pRamRange->uNemRange);
4988 if (u2NemState != UINT8_MAX)
4989 pgmPhysSetNemStateForPages(&pRamRange->aPages[idxFirstRamPage], cGuestPages, u2NemState);
4990 }
4991 else
4992#endif
4993 GMMR3AllocatePagesCleanup(pReq);
4994 if (RT_SUCCESS(rc))
4995 {
4996 /*
4997 * Done!
4998 */
4999#ifdef VBOX_STRICT
5000 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
5001#endif
5002 return rc;
5003 }
5004
5005 /*
5006 * bail out
5007 */
5008#ifdef VBOX_WITH_NATIVE_NEM
5009 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
5010 {
5011 Assert(VM_IS_NEM_ENABLED(pVM));
5012 pVM->pgm.s.cPrivatePages -= cGuestPages;
5013 pVM->pgm.s.cAllPages -= cGuestPages;
5014 }
5015#endif
5016 }
5017 else
5018 rc = VERR_NO_MEMORY;
5019 }
5020
5021 int rc2 = PGMHandlerPhysicalDeregister(pVM, GCPhys);
5022 AssertRC(rc2);
5023 }
5024
5025 idxInsert -= 1;
5026 if (!pOverlappingRange)
5027 pgmR3PhysRamRangeRemoveLookup(pVM, pRamRange, &idxInsert);
5028 }
5029 /* else: lookup insertion failed. */
5030
5031 if (pOverlappingRange)
5032 {
5033 PPGMPAGE pRamPage = &pOverlappingRange->aPages[idxFirstRamPage];
5034#ifdef VBOX_WITH_PGM_NEM_MODE
5035 if (PGM_IS_IN_NEM_MODE(pVM))
5036 {
5037 Assert(pvRam == NULL); Assert(pReq == NULL);
5038 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
5039 {
5040# ifdef VBOX_WITH_ONLY_PGM_NEM_MODE /* gcc will complain that the first part of the assertion is always false otherwise */
5041 Assert(PGM_PAGE_GET_HCPHYS(pRamPage) == 0);
5042# else
5043 Assert(PGM_PAGE_GET_HCPHYS(pRamPage) == UINT64_C(0x0000fffffffff000) || PGM_PAGE_GET_HCPHYS(pRamPage) == 0);
5044# endif
5045 Assert(PGM_PAGE_GET_PAGEID(pRamPage) == NIL_GMM_PAGEID);
5046 Assert(PGM_PAGE_GET_STATE(pRamPage) == PGM_PAGE_STATE_ALLOCATED);
5047 PGM_PAGE_SET_TYPE(pVM, pRamPage, PGMPAGETYPE_RAM);
5048 PGM_PAGE_SET_STATE(pVM, pRamPage, PGM_PAGE_STATE_ALLOCATED);
5049 }
5050 }
5051 else
5052#endif
5053 {
5054#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5055 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++)
5056 PGM_PAGE_INIT_ZERO(pRamPage, pVM, PGMPAGETYPE_RAM);
5057 pVM->pgm.s.cZeroPages += cGuestPages;
5058 pVM->pgm.s.cPrivatePages -= cGuestPages;
5059#endif
5060 }
5061 }
5062 }
5063 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
5064 pgmPhysInvalidRamRangeTlbs(pVM);
5065
5066#ifdef VBOX_WITH_PGM_NEM_MODE
5067 if (PGM_IS_IN_NEM_MODE(pVM))
5068 {
5069 Assert(!pReq);
5070 if (pvRam)
5071 SUPR3PageFree(pvRam, cHostPages);
5072 if (pvAlt)
5073 SUPR3PageFree(pvAlt, cHostPages);
5074 }
5075 else
5076#endif
5077 {
5078#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5079 GMMR3FreeAllocatedPages(pVM, pReq);
5080 GMMR3AllocatePagesCleanup(pReq);
5081#endif
5082 }
5083
5084 /* We don't bother to actually free either the ROM nor the RAM ranges
5085 themselves, as already mentioned above, we'll leave that to the VM
5086 termination cleanup code. */
5087 return rc;
5088}
5089
5090
5091/**
5092 * Registers a ROM image.
5093 *
5094 * Shadowed ROM images requires double the amount of backing memory, so,
5095 * don't use that unless you have to. Shadowing of ROM images is process
5096 * where we can select where the reads go and where the writes go. On real
5097 * hardware the chipset provides means to configure this. We provide
5098 * PGMR3PhysRomProtect() for this purpose.
5099 *
5100 * A read-only copy of the ROM image will always be kept around while we
5101 * will allocate RAM pages for the changes on demand (unless all memory
5102 * is configured to be preallocated).
5103 *
5104 * @returns VBox status code.
5105 * @param pVM The cross context VM structure.
5106 * @param pDevIns The device instance owning the ROM.
5107 * @param GCPhys First physical address in the range.
5108 * Must be page aligned!
5109 * @param cb The size of the range (in bytes).
5110 * Must be page aligned!
5111 * @param pvBinary Pointer to the binary data backing the ROM image.
5112 * @param cbBinary The size of the binary data pvBinary points to.
5113 * This must be less or equal to @a cb.
5114 * @param fFlags Mask of flags, PGMPHYS_ROM_FLAGS_XXX.
5115 * @param pszDesc Pointer to description string. This must not be freed.
5116 *
5117 * @remark There is no way to remove the rom, automatically on device cleanup or
5118 * manually from the device yet. This isn't difficult in any way, it's
5119 * just not something we expect to be necessary for a while.
5120 */
5121VMMR3DECL(int) PGMR3PhysRomRegister(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb,
5122 const void *pvBinary, uint32_t cbBinary, uint8_t fFlags, const char *pszDesc)
5123{
5124 Log(("PGMR3PhysRomRegister: pDevIns=%p GCPhys=%RGp(-%RGp) cb=%RGp pvBinary=%p cbBinary=%#x fFlags=%#x pszDesc=%s\n",
5125 pDevIns, GCPhys, GCPhys + cb, cb, pvBinary, cbBinary, fFlags, pszDesc));
5126 PGM_LOCK_VOID(pVM);
5127
5128 int rc = pgmR3PhysRomRegisterLocked(pVM, pDevIns, GCPhys, cb, pvBinary, cbBinary, fFlags, pszDesc);
5129
5130 PGM_UNLOCK(pVM);
5131 return rc;
5132}
5133
5134
5135/**
5136 * Called by PGMR3MemSetup to reset the shadow, switch to the virgin, and verify
5137 * that the virgin part is untouched.
5138 *
5139 * This is done after the normal memory has been cleared.
5140 *
5141 * ASSUMES that the caller owns the PGM lock.
5142 *
5143 * @param pVM The cross context VM structure.
5144 */
5145int pgmR3PhysRomReset(PVM pVM)
5146{
5147 PGM_LOCK_ASSERT_OWNER(pVM);
5148 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
5149 for (uint32_t idx = 0; idx < cRomRanges; idx++)
5150 {
5151 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
5152 uint32_t const cGuestPages = pRom->cb >> GUEST_PAGE_SHIFT;
5153
5154 if (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
5155 {
5156 /*
5157 * Reset the physical handler.
5158 */
5159 int rc = PGMR3PhysRomProtect(pVM, pRom->GCPhys, pRom->cb, PGMROMPROT_READ_ROM_WRITE_IGNORE);
5160 AssertRCReturn(rc, rc);
5161
5162 /*
5163 * What we do with the shadow pages depends on the memory
5164 * preallocation option. If not enabled, we'll just throw
5165 * out all the dirty pages and replace them by the zero page.
5166 */
5167#ifdef VBOX_WITH_PGM_NEM_MODE
5168 if (PGM_IS_IN_NEM_MODE(pVM))
5169 {
5170 /* Clear all the shadow pages (currently using alternate backing). */
5171 RT_BZERO(pRom->pbR3Alternate, pRom->cb);
5172 }
5173# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5174 else
5175# endif
5176#endif
5177#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5178 if (!pVM->pgm.s.fRamPreAlloc)
5179 {
5180 /* Free the dirty pages. */
5181 uint32_t cPendingPages = 0;
5182 PGMMFREEPAGESREQ pReq;
5183 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
5184 AssertRCReturn(rc, rc);
5185
5186 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++)
5187 if ( !PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow)
5188 && !PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow))
5189 {
5190 Assert(PGM_PAGE_GET_STATE(&pRom->aPages[iPage].Shadow) == PGM_PAGE_STATE_ALLOCATED);
5191 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, &pRom->aPages[iPage].Shadow,
5192 pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT),
5193 (PGMPAGETYPE)PGM_PAGE_GET_TYPE(&pRom->aPages[iPage].Shadow));
5194 AssertLogRelRCReturn(rc, rc);
5195 }
5196
5197 if (cPendingPages)
5198 {
5199 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
5200 AssertLogRelRCReturn(rc, rc);
5201 }
5202 GMMR3FreePagesCleanup(pReq);
5203 }
5204 else
5205 {
5206 /* clear all the shadow pages. */
5207 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++)
5208 {
5209 if (PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow))
5210 continue;
5211 Assert(!PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow));
5212 void *pvDstPage;
5213 RTGCPHYS const GCPhys = pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT);
5214 rc = pgmPhysPageMakeWritableAndMap(pVM, &pRom->aPages[iPage].Shadow, GCPhys, &pvDstPage);
5215 if (RT_FAILURE(rc))
5216 break;
5217 RT_BZERO(pvDstPage, GUEST_PAGE_SIZE);
5218 }
5219 AssertRCReturn(rc, rc);
5220 }
5221#endif
5222 }
5223
5224 /*
5225 * Restore the original ROM pages after a saved state load.
5226 * Also, in strict builds check that ROM pages remain unmodified.
5227 */
5228#ifndef VBOX_STRICT
5229 if (pVM->pgm.s.fRestoreRomPagesOnReset)
5230#endif
5231 {
5232 size_t cbSrcLeft = pRom->cbOriginal;
5233 uint8_t const *pbSrcPage = (uint8_t const *)pRom->pvOriginal;
5234 uint32_t cRestored = 0;
5235 for (uint32_t iPage = 0; iPage < cGuestPages && cbSrcLeft > 0; iPage++, pbSrcPage += GUEST_PAGE_SIZE)
5236 {
5237 RTGCPHYS const GCPhys = pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT);
5238 PPGMPAGE const pPage = pgmPhysGetPage(pVM, GCPhys);
5239 void const *pvDstPage = NULL;
5240 int rc = pgmPhysPageMapReadOnly(pVM, pPage, GCPhys, &pvDstPage);
5241 if (RT_FAILURE(rc))
5242 break;
5243
5244 if (memcmp(pvDstPage, pbSrcPage, RT_MIN(cbSrcLeft, GUEST_PAGE_SIZE)))
5245 {
5246 if (pVM->pgm.s.fRestoreRomPagesOnReset)
5247 {
5248 void *pvDstPageW = NULL;
5249 rc = pgmPhysPageMap(pVM, pPage, GCPhys, &pvDstPageW);
5250 AssertLogRelRCReturn(rc, rc);
5251 memcpy(pvDstPageW, pbSrcPage, RT_MIN(cbSrcLeft, GUEST_PAGE_SIZE));
5252 cRestored++;
5253 }
5254 else
5255 LogRel(("pgmR3PhysRomReset: %RGp: ROM page changed (%s)\n", GCPhys, pRom->pszDesc));
5256 }
5257 cbSrcLeft -= RT_MIN(cbSrcLeft, GUEST_PAGE_SIZE);
5258 }
5259 if (cRestored > 0)
5260 LogRel(("PGM: ROM \"%s\": Reloaded %u of %u pages.\n", pRom->pszDesc, cRestored, cGuestPages));
5261 }
5262 }
5263
5264 /* Clear the ROM restore flag now as we only need to do this once after
5265 loading saved state. */
5266 pVM->pgm.s.fRestoreRomPagesOnReset = false;
5267
5268 return VINF_SUCCESS;
5269}
5270
5271
5272/**
5273 * Called by PGMR3Term to free resources.
5274 *
5275 * ASSUMES that the caller owns the PGM lock.
5276 *
5277 * @param pVM The cross context VM structure.
5278 */
5279void pgmR3PhysRomTerm(PVM pVM)
5280{
5281 /*
5282 * Free the heap copy of the original bits.
5283 */
5284 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
5285 for (uint32_t idx = 0; idx < cRomRanges; idx++)
5286 {
5287 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
5288 if ( pRom->pvOriginal
5289 && !(pRom->fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY))
5290 {
5291 RTMemFree((void *)pRom->pvOriginal);
5292 pRom->pvOriginal = NULL;
5293 }
5294 }
5295}
5296
5297
5298/**
5299 * Change the shadowing of a range of ROM pages.
5300 *
5301 * This is intended for implementing chipset specific memory registers
5302 * and will not be very strict about the input. It will silently ignore
5303 * any pages that are not the part of a shadowed ROM.
5304 *
5305 * @returns VBox status code.
5306 * @retval VINF_PGM_SYNC_CR3
5307 *
5308 * @param pVM The cross context VM structure.
5309 * @param GCPhys Where to start. Page aligned.
5310 * @param cb How much to change. Page aligned.
5311 * @param enmProt The new ROM protection.
5312 */
5313VMMR3DECL(int) PGMR3PhysRomProtect(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, PGMROMPROT enmProt)
5314{
5315 LogFlow(("PGMR3PhysRomProtect: GCPhys=%RGp cb=%RGp enmProt=%d\n", GCPhys, cb, enmProt));
5316
5317 /*
5318 * Check input
5319 */
5320 if (!cb)
5321 return VINF_SUCCESS;
5322 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
5323 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
5324 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
5325 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
5326 AssertReturn(enmProt >= PGMROMPROT_INVALID && enmProt <= PGMROMPROT_END, VERR_INVALID_PARAMETER);
5327
5328 /*
5329 * Process the request.
5330 */
5331 PGM_LOCK_VOID(pVM);
5332 int rc = VINF_SUCCESS;
5333 bool fFlushTLB = false;
5334 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
5335 for (uint32_t idx = 0; idx < cRomRanges; idx++)
5336 {
5337 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
5338 if ( GCPhys <= pRom->GCPhysLast
5339 && GCPhysLast >= pRom->GCPhys
5340 && (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED))
5341 {
5342 /*
5343 * Iterate the relevant pages and make necessary the changes.
5344 */
5345#ifdef VBOX_WITH_NATIVE_NEM
5346 PPGMRAMRANGE const pRam = pgmPhysGetRange(pVM, GCPhys);
5347 AssertPtrReturn(pRam, VERR_INTERNAL_ERROR_3);
5348#endif
5349 bool fChanges = false;
5350 uint32_t const cPages = pRom->GCPhysLast <= GCPhysLast
5351 ? pRom->cb >> GUEST_PAGE_SHIFT
5352 : (GCPhysLast - pRom->GCPhys + 1) >> GUEST_PAGE_SHIFT;
5353 for (uint32_t iPage = (GCPhys - pRom->GCPhys) >> GUEST_PAGE_SHIFT;
5354 iPage < cPages;
5355 iPage++)
5356 {
5357 PPGMROMPAGE pRomPage = &pRom->aPages[iPage];
5358 if (PGMROMPROT_IS_ROM(pRomPage->enmProt) != PGMROMPROT_IS_ROM(enmProt))
5359 {
5360 fChanges = true;
5361
5362 /* flush references to the page. */
5363 RTGCPHYS const GCPhysPage = pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT);
5364 PPGMPAGE pRamPage = pgmPhysGetPage(pVM, GCPhysPage);
5365#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5366 int rc2 = pgmPoolTrackUpdateGCPhys(pVM, GCPhysPage, pRamPage, true /*fFlushPTEs*/, &fFlushTLB);
5367 if (rc2 != VINF_SUCCESS && (rc == VINF_SUCCESS || RT_FAILURE(rc2)))
5368 rc = rc2;
5369#endif
5370#ifdef VBOX_WITH_NATIVE_NEM
5371 uint8_t u2State = PGM_PAGE_GET_NEM_STATE(pRamPage);
5372#endif
5373
5374 PPGMPAGE pOld = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Virgin : &pRomPage->Shadow;
5375 PPGMPAGE pNew = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Shadow : &pRomPage->Virgin;
5376
5377 *pOld = *pRamPage;
5378 *pRamPage = *pNew;
5379 /** @todo preserve the volatile flags (handlers) when these have been moved out of HCPhys! */
5380
5381#ifdef VBOX_WITH_NATIVE_NEM
5382# ifdef VBOX_WITH_PGM_NEM_MODE
5383 /* In simplified mode we have to switch the page data around too. */
5384 if (PGM_IS_IN_NEM_MODE(pVM))
5385 {
5386 uint8_t abPage[GUEST_PAGE_SIZE];
5387 uint8_t * const pbRamPage = PGM_RAMRANGE_CALC_PAGE_R3PTR(pRam, GCPhysPage);
5388 memcpy(abPage, &pRom->pbR3Alternate[(size_t)iPage << GUEST_PAGE_SHIFT], sizeof(abPage));
5389 memcpy(&pRom->pbR3Alternate[(size_t)iPage << GUEST_PAGE_SHIFT], pbRamPage, sizeof(abPage));
5390 memcpy(pbRamPage, abPage, sizeof(abPage));
5391 }
5392# endif
5393 /* Tell NEM about the backing and protection change. */
5394 if (VM_IS_NEM_ENABLED(pVM))
5395 {
5396 PGMPAGETYPE enmType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pNew);
5397 NEMHCNotifyPhysPageChanged(pVM, GCPhys, PGM_PAGE_GET_HCPHYS(pOld), PGM_PAGE_GET_HCPHYS(pNew),
5398 PGM_RAMRANGE_CALC_PAGE_R3PTR(pRam, GCPhysPage),
5399 pgmPhysPageCalcNemProtection(pRamPage, enmType), enmType, &u2State);
5400 PGM_PAGE_SET_NEM_STATE(pRamPage, u2State);
5401 }
5402#endif
5403 }
5404 pRomPage->enmProt = enmProt;
5405 }
5406
5407 /*
5408 * Reset the access handler if we made changes, no need to optimize this.
5409 */
5410 if (fChanges)
5411 {
5412 int rc2 = PGMHandlerPhysicalReset(pVM, pRom->GCPhys);
5413 if (RT_FAILURE(rc2))
5414 {
5415 PGM_UNLOCK(pVM);
5416 AssertRC(rc);
5417 return rc2;
5418 }
5419
5420 /* Explicitly flush IEM. Not sure if this is really necessary, but better
5421 be on the safe side. This shouldn't be a high volume flush source. */
5422 IEMTlbInvalidateAllPhysicalAllCpus(pVM, NIL_VMCPUID, IEMTLBPHYSFLUSHREASON_ROM_PROTECT);
5423 }
5424
5425 /* Advance - cb isn't updated. */
5426 GCPhys = pRom->GCPhys + (cPages << GUEST_PAGE_SHIFT);
5427 }
5428 }
5429 PGM_UNLOCK(pVM);
5430 if (fFlushTLB)
5431 PGM_INVL_ALL_VCPU_TLBS(pVM);
5432
5433 return rc;
5434}
5435
5436
5437
5438/*********************************************************************************************************************************
5439* Ballooning *
5440*********************************************************************************************************************************/
5441
5442#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD))
5443
5444/**
5445 * Rendezvous callback used by PGMR3ChangeMemBalloon that changes the memory balloon size
5446 *
5447 * This is only called on one of the EMTs while the other ones are waiting for
5448 * it to complete this function.
5449 *
5450 * @returns VINF_SUCCESS (VBox strict status code).
5451 * @param pVM The cross context VM structure.
5452 * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused.
5453 * @param pvUser User parameter
5454 */
5455static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysChangeMemBalloonRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
5456{
5457 uintptr_t *paUser = (uintptr_t *)pvUser;
5458 bool fInflate = !!paUser[0];
5459 unsigned cPages = paUser[1];
5460 RTGCPHYS *paPhysPage = (RTGCPHYS *)paUser[2];
5461 int rc = VINF_SUCCESS;
5462 RT_NOREF(pVCpu);
5463
5464 Log(("pgmR3PhysChangeMemBalloonRendezvous: %s %x pages\n", (fInflate) ? "inflate" : "deflate", cPages));
5465 PGM_LOCK_VOID(pVM);
5466
5467 if (fInflate)
5468 {
5469# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5470 /* Flush the PGM pool cache as we might have stale references to pages that we just freed. */
5471 pgmR3PoolClearAllRendezvous(pVM, pVCpu, NULL);
5472# endif
5473
5474 /* Replace pages with ZERO pages. */
5475 uint32_t cPendingPages = 0;
5476 PGMMFREEPAGESREQ pReq = NULL;
5477 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
5478 if (RT_FAILURE(rc))
5479 {
5480 PGM_UNLOCK(pVM);
5481 AssertLogRelRC(rc);
5482 return rc;
5483 }
5484
5485 /* Iterate the pages. */
5486 for (unsigned i = 0; i < cPages; i++)
5487 {
5488 PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]);
5489 if ( pPage == NULL
5490 || PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM)
5491 {
5492 Log(("pgmR3PhysChangeMemBalloonRendezvous: invalid physical page %RGp pPage->u3Type=%d\n", paPhysPage[i], pPage ? PGM_PAGE_GET_TYPE(pPage) : 0));
5493 break;
5494 }
5495
5496 LogFlow(("balloon page: %RGp\n", paPhysPage[i]));
5497
5498# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5499 /* Flush the shadow PT if this page was previously used as a guest page table. */
5500 pgmPoolFlushPageByGCPhys(pVM, paPhysPage[i]);
5501# endif
5502
5503 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, paPhysPage[i], (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage));
5504 if (RT_FAILURE(rc))
5505 {
5506 PGM_UNLOCK(pVM);
5507 AssertLogRelRC(rc);
5508 return rc;
5509 }
5510 Assert(PGM_PAGE_IS_ZERO(pPage));
5511 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_BALLOONED);
5512 }
5513
5514 if (cPendingPages)
5515 {
5516 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
5517 if (RT_FAILURE(rc))
5518 {
5519 PGM_UNLOCK(pVM);
5520 AssertLogRelRC(rc);
5521 return rc;
5522 }
5523 }
5524 GMMR3FreePagesCleanup(pReq);
5525 }
5526 else
5527 {
5528 /* Iterate the pages. */
5529 for (unsigned i = 0; i < cPages; i++)
5530 {
5531 PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]);
5532 AssertBreak(pPage && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM);
5533
5534 LogFlow(("Free ballooned page: %RGp\n", paPhysPage[i]));
5535
5536 Assert(PGM_PAGE_IS_BALLOONED(pPage));
5537
5538 /* Change back to zero page. (NEM does not need to be informed.) */
5539 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
5540 }
5541
5542 /* Note that we currently do not map any ballooned pages in our shadow page tables, so no need to flush the pgm pool. */
5543 }
5544
5545# if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
5546 /* Notify GMM about the balloon change. */
5547 rc = GMMR3BalloonedPages(pVM, (fInflate) ? GMMBALLOONACTION_INFLATE : GMMBALLOONACTION_DEFLATE, cPages);
5548 if (RT_SUCCESS(rc))
5549 {
5550 if (!fInflate)
5551 {
5552 Assert(pVM->pgm.s.cBalloonedPages >= cPages);
5553 pVM->pgm.s.cBalloonedPages -= cPages;
5554 }
5555 else
5556 pVM->pgm.s.cBalloonedPages += cPages;
5557 }
5558# endif
5559
5560 PGM_UNLOCK(pVM);
5561
5562 /* Flush the recompiler's TLB as well. */
5563 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5564 CPUMSetChangedFlags(pVM->apCpusR3[i], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5565
5566 AssertLogRelRC(rc);
5567 return rc;
5568}
5569
5570
5571/**
5572 * Frees a range of ram pages, replacing them with ZERO pages; helper for PGMR3PhysFreeRamPages
5573 *
5574 * @param pVM The cross context VM structure.
5575 * @param fInflate Inflate or deflate memory balloon
5576 * @param cPages Number of pages to free
5577 * @param paPhysPage Array of guest physical addresses
5578 */
5579static DECLCALLBACK(void) pgmR3PhysChangeMemBalloonHelper(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage)
5580{
5581 uintptr_t paUser[3];
5582
5583 paUser[0] = fInflate;
5584 paUser[1] = cPages;
5585 paUser[2] = (uintptr_t)paPhysPage;
5586 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser);
5587 AssertRC(rc);
5588
5589 /* Made a copy in PGMR3PhysFreeRamPages; free it here. */
5590 RTMemFree(paPhysPage);
5591}
5592
5593#endif /* 64-bit host && (Windows || Solaris || Linux || FreeBSD) */
5594
5595/**
5596 * Inflate or deflate a memory balloon
5597 *
5598 * @returns VBox status code.
5599 * @param pVM The cross context VM structure.
5600 * @param fInflate Inflate or deflate memory balloon
5601 * @param cPages Number of pages to free
5602 * @param paPhysPage Array of guest physical addresses
5603 */
5604VMMR3DECL(int) PGMR3PhysChangeMemBalloon(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage)
5605{
5606 /* This must match GMMR0Init; currently we only support memory ballooning on all 64-bit hosts except Mac OS X */
5607#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD))
5608 int rc;
5609
5610 /* Older additions (ancient non-functioning balloon code) pass wrong physical addresses. */
5611 AssertReturn(!(paPhysPage[0] & 0xfff), VERR_INVALID_PARAMETER);
5612
5613 /* We own the IOM lock here and could cause a deadlock by waiting for another VCPU that is blocking on the IOM lock.
5614 * In the SMP case we post a request packet to postpone the job.
5615 */
5616 if (pVM->cCpus > 1)
5617 {
5618 unsigned cbPhysPage = cPages * sizeof(paPhysPage[0]);
5619 RTGCPHYS *paPhysPageCopy = (RTGCPHYS *)RTMemAlloc(cbPhysPage);
5620 AssertReturn(paPhysPageCopy, VERR_NO_MEMORY);
5621
5622 memcpy(paPhysPageCopy, paPhysPage, cbPhysPage);
5623
5624 rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysChangeMemBalloonHelper, 4,
5625 pVM, fInflate, cPages, paPhysPageCopy);
5626 AssertRC(rc);
5627 }
5628 else
5629 {
5630 uintptr_t paUser[3];
5631
5632 paUser[0] = fInflate;
5633 paUser[1] = cPages;
5634 paUser[2] = (uintptr_t)paPhysPage;
5635 rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser);
5636 AssertRC(rc);
5637 }
5638 return rc;
5639
5640#else
5641 NOREF(pVM); NOREF(fInflate); NOREF(cPages); NOREF(paPhysPage);
5642 return VERR_NOT_IMPLEMENTED;
5643#endif
5644}
5645
5646
5647
5648/*********************************************************************************************************************************
5649* Write Monitoring *
5650*********************************************************************************************************************************/
5651
5652/**
5653 * Rendezvous callback used by PGMR3WriteProtectRAM that write protects all
5654 * physical RAM.
5655 *
5656 * This is only called on one of the EMTs while the other ones are waiting for
5657 * it to complete this function.
5658 *
5659 * @returns VINF_SUCCESS (VBox strict status code).
5660 * @param pVM The cross context VM structure.
5661 * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused.
5662 * @param pvUser User parameter, unused.
5663 */
5664static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysWriteProtectRAMRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
5665{
5666 int rc = VINF_SUCCESS;
5667 NOREF(pvUser); NOREF(pVCpu);
5668
5669 PGM_LOCK_VOID(pVM);
5670#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5671 pgmPoolResetDirtyPages(pVM);
5672#endif
5673
5674 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
5675 for (uint32_t idxLookup = 0; idxLookup < cLookupEntries; idxLookup++)
5676 {
5677 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
5678 AssertContinue(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
5679 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
5680 AssertContinue(pRam);
5681
5682 uint32_t cPages = pRam->cb >> GUEST_PAGE_SHIFT;
5683 for (uint32_t iPage = 0; iPage < cPages; iPage++)
5684 {
5685 PPGMPAGE const pPage = &pRam->aPages[iPage];
5686 PGMPAGETYPE const enmPageType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage);
5687
5688 if ( RT_LIKELY(enmPageType == PGMPAGETYPE_RAM)
5689 || enmPageType == PGMPAGETYPE_MMIO2)
5690 {
5691 /*
5692 * A RAM page.
5693 */
5694 switch (PGM_PAGE_GET_STATE(pPage))
5695 {
5696 case PGM_PAGE_STATE_ALLOCATED:
5697 /** @todo Optimize this: Don't always re-enable write
5698 * monitoring if the page is known to be very busy. */
5699 if (PGM_PAGE_IS_WRITTEN_TO(pPage))
5700 PGM_PAGE_CLEAR_WRITTEN_TO(pVM, pPage);
5701
5702 pgmPhysPageWriteMonitor(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT));
5703 break;
5704
5705 case PGM_PAGE_STATE_SHARED:
5706 AssertFailed();
5707 break;
5708
5709 case PGM_PAGE_STATE_WRITE_MONITORED: /* nothing to change. */
5710 default:
5711 break;
5712 }
5713 }
5714 }
5715 }
5716#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5717 pgmR3PoolWriteProtectPages(pVM);
5718#endif
5719 PGM_INVL_ALL_VCPU_TLBS(pVM);
5720 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
5721 CPUMSetChangedFlags(pVM->apCpusR3[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5722
5723 PGM_UNLOCK(pVM);
5724 return rc;
5725}
5726
5727/**
5728 * Protect all physical RAM to monitor writes
5729 *
5730 * @returns VBox status code.
5731 * @param pVM The cross context VM structure.
5732 */
5733VMMR3DECL(int) PGMR3PhysWriteProtectRAM(PVM pVM)
5734{
5735 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
5736
5737 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysWriteProtectRAMRendezvous, NULL);
5738 AssertRC(rc);
5739 return rc;
5740}
5741
5742
5743/*********************************************************************************************************************************
5744* Stats. *
5745*********************************************************************************************************************************/
5746
5747/**
5748 * Query the amount of free memory inside VMMR0
5749 *
5750 * @returns VBox status code.
5751 * @param pUVM The user mode VM handle.
5752 * @param pcbAllocMem Where to return the amount of memory allocated
5753 * by VMs.
5754 * @param pcbFreeMem Where to return the amount of memory that is
5755 * allocated from the host but not currently used
5756 * by any VMs.
5757 * @param pcbBallonedMem Where to return the sum of memory that is
5758 * currently ballooned by the VMs.
5759 * @param pcbSharedMem Where to return the amount of memory that is
5760 * currently shared.
5761 */
5762VMMR3DECL(int) PGMR3QueryGlobalMemoryStats(PUVM pUVM, uint64_t *pcbAllocMem, uint64_t *pcbFreeMem,
5763 uint64_t *pcbBallonedMem, uint64_t *pcbSharedMem)
5764{
5765 UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE);
5766 VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE);
5767
5768 uint64_t cAllocPages = 0;
5769 uint64_t cFreePages = 0;
5770 uint64_t cBalloonPages = 0;
5771 uint64_t cSharedPages = 0;
5772#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
5773 if (!SUPR3IsDriverless())
5774 {
5775 int rc = GMMR3QueryHypervisorMemoryStats(pUVM->pVM, &cAllocPages, &cFreePages, &cBalloonPages, &cSharedPages);
5776 AssertRCReturn(rc, rc);
5777 }
5778#endif
5779
5780 if (pcbAllocMem)
5781 *pcbAllocMem = cAllocPages * _4K;
5782
5783 if (pcbFreeMem)
5784 *pcbFreeMem = cFreePages * _4K;
5785
5786 if (pcbBallonedMem)
5787 *pcbBallonedMem = cBalloonPages * _4K;
5788
5789 if (pcbSharedMem)
5790 *pcbSharedMem = cSharedPages * _4K;
5791
5792 Log(("PGMR3QueryVMMMemoryStats: all=%llx free=%llx ballooned=%llx shared=%llx\n",
5793 cAllocPages, cFreePages, cBalloonPages, cSharedPages));
5794 return VINF_SUCCESS;
5795}
5796
5797
5798/**
5799 * Query memory stats for the VM.
5800 *
5801 * @returns VBox status code.
5802 * @param pUVM The user mode VM handle.
5803 * @param pcbTotalMem Where to return total amount memory the VM may
5804 * possibly use.
5805 * @param pcbPrivateMem Where to return the amount of private memory
5806 * currently allocated.
5807 * @param pcbSharedMem Where to return the amount of actually shared
5808 * memory currently used by the VM.
5809 * @param pcbZeroMem Where to return the amount of memory backed by
5810 * zero pages.
5811 *
5812 * @remarks The total mem is normally larger than the sum of the three
5813 * components. There are two reasons for this, first the amount of
5814 * shared memory is what we're sure is shared instead of what could
5815 * possibly be shared with someone. Secondly, because the total may
5816 * include some pure MMIO pages that doesn't go into any of the three
5817 * sub-counts.
5818 *
5819 * @todo Why do we return reused shared pages instead of anything that could
5820 * potentially be shared? Doesn't this mean the first VM gets a much
5821 * lower number of shared pages?
5822 */
5823VMMR3DECL(int) PGMR3QueryMemoryStats(PUVM pUVM, uint64_t *pcbTotalMem, uint64_t *pcbPrivateMem,
5824 uint64_t *pcbSharedMem, uint64_t *pcbZeroMem)
5825{
5826 UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE);
5827 PVM pVM = pUVM->pVM;
5828 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
5829
5830 if (pcbTotalMem)
5831 *pcbTotalMem = (uint64_t)pVM->pgm.s.cAllPages * GUEST_PAGE_SIZE;
5832
5833 if (pcbPrivateMem)
5834 *pcbPrivateMem = (uint64_t)pVM->pgm.s.cPrivatePages * GUEST_PAGE_SIZE;
5835
5836 if (pcbSharedMem)
5837 *pcbSharedMem = (uint64_t)pVM->pgm.s.cReusedSharedPages * GUEST_PAGE_SIZE;
5838
5839 if (pcbZeroMem)
5840 *pcbZeroMem = (uint64_t)pVM->pgm.s.cZeroPages * GUEST_PAGE_SIZE;
5841
5842 Log(("PGMR3QueryMemoryStats: all=%x private=%x reused=%x zero=%x\n", pVM->pgm.s.cAllPages, pVM->pgm.s.cPrivatePages, pVM->pgm.s.cReusedSharedPages, pVM->pgm.s.cZeroPages));
5843 return VINF_SUCCESS;
5844}
5845
5846
5847
5848/*********************************************************************************************************************************
5849* Chunk Mappings and Page Allocation *
5850*********************************************************************************************************************************/
5851#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5852
5853/**
5854 * Tree enumeration callback for dealing with age rollover.
5855 * It will perform a simple compression of the current age.
5856 */
5857static DECLCALLBACK(int) pgmR3PhysChunkAgeingRolloverCallback(PAVLU32NODECORE pNode, void *pvUser)
5858{
5859 /* Age compression - ASSUMES iNow == 4. */
5860 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode;
5861 if (pChunk->iLastUsed >= UINT32_C(0xffffff00))
5862 pChunk->iLastUsed = 3;
5863 else if (pChunk->iLastUsed >= UINT32_C(0xfffff000))
5864 pChunk->iLastUsed = 2;
5865 else if (pChunk->iLastUsed)
5866 pChunk->iLastUsed = 1;
5867 else /* iLastUsed = 0 */
5868 pChunk->iLastUsed = 4;
5869
5870 NOREF(pvUser);
5871 return 0;
5872}
5873
5874
5875/**
5876 * The structure passed in the pvUser argument of pgmR3PhysChunkUnmapCandidateCallback().
5877 */
5878typedef struct PGMR3PHYSCHUNKUNMAPCB
5879{
5880 PVM pVM; /**< Pointer to the VM. */
5881 PPGMCHUNKR3MAP pChunk; /**< The chunk to unmap. */
5882} PGMR3PHYSCHUNKUNMAPCB, *PPGMR3PHYSCHUNKUNMAPCB;
5883
5884
5885/**
5886 * Callback used to find the mapping that's been unused for
5887 * the longest time.
5888 */
5889static DECLCALLBACK(int) pgmR3PhysChunkUnmapCandidateCallback(PAVLU32NODECORE pNode, void *pvUser)
5890{
5891 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode;
5892 PPGMR3PHYSCHUNKUNMAPCB pArg = (PPGMR3PHYSCHUNKUNMAPCB)pvUser;
5893
5894 /*
5895 * Check for locks and compare when last used.
5896 */
5897 if (pChunk->cRefs)
5898 return 0;
5899 if (pChunk->cPermRefs)
5900 return 0;
5901 if ( pArg->pChunk
5902 && pChunk->iLastUsed >= pArg->pChunk->iLastUsed)
5903 return 0;
5904
5905 /*
5906 * Check that it's not in any of the TLBs.
5907 */
5908 PVM pVM = pArg->pVM;
5909 if ( pVM->pgm.s.ChunkR3Map.Tlb.aEntries[PGM_CHUNKR3MAPTLB_IDX(pChunk->Core.Key)].idChunk
5910 == pChunk->Core.Key)
5911 {
5912 pChunk = NULL;
5913 return 0;
5914 }
5915# ifdef VBOX_STRICT
5916 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++)
5917 {
5918 Assert(pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk != pChunk);
5919 Assert(pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk != pChunk->Core.Key);
5920 }
5921# endif
5922
5923# if 0 /* This is too much work with the PGMCPU::PhysTlb as well. We flush them all instead. */
5924 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.PhysTlbR3.aEntries); i++)
5925 if (pVM->pgm.s.PhysTlbR3.aEntries[i].pMap == pChunk)
5926 return 0;
5927# endif
5928
5929 pArg->pChunk = pChunk;
5930 return 0;
5931}
5932
5933
5934/**
5935 * Finds a good candidate for unmapping when the ring-3 mapping cache is full.
5936 *
5937 * The candidate will not be part of any TLBs, so no need to flush
5938 * anything afterwards.
5939 *
5940 * @returns Chunk id.
5941 * @param pVM The cross context VM structure.
5942 */
5943static int32_t pgmR3PhysChunkFindUnmapCandidate(PVM pVM)
5944{
5945 PGM_LOCK_ASSERT_OWNER(pVM);
5946
5947 /*
5948 * Enumerate the age tree starting with the left most node.
5949 */
5950 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatChunkFindCandidate, a);
5951 PGMR3PHYSCHUNKUNMAPCB Args;
5952 Args.pVM = pVM;
5953 Args.pChunk = NULL;
5954 RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkUnmapCandidateCallback, &Args);
5955 Assert(Args.pChunk);
5956 if (Args.pChunk)
5957 {
5958 Assert(Args.pChunk->cRefs == 0);
5959 Assert(Args.pChunk->cPermRefs == 0);
5960 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkFindCandidate, a);
5961 return Args.pChunk->Core.Key;
5962 }
5963
5964 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkFindCandidate, a);
5965 return INT32_MAX;
5966}
5967
5968
5969/**
5970 * Rendezvous callback used by pgmR3PhysUnmapChunk that unmaps a chunk
5971 *
5972 * This is only called on one of the EMTs while the other ones are waiting for
5973 * it to complete this function.
5974 *
5975 * @returns VINF_SUCCESS (VBox strict status code).
5976 * @param pVM The cross context VM structure.
5977 * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused.
5978 * @param pvUser User pointer. Unused
5979 *
5980 */
5981static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysUnmapChunkRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
5982{
5983 int rc = VINF_SUCCESS;
5984 PGM_LOCK_VOID(pVM);
5985 NOREF(pVCpu); NOREF(pvUser);
5986
5987 if (pVM->pgm.s.ChunkR3Map.c >= pVM->pgm.s.ChunkR3Map.cMax)
5988 {
5989# ifdef VBOX_WITH_ONLY_PGM_NEM_MODE
5990# error
5991# endif
5992 /* Flush the pgm pool cache; call the internal rendezvous handler as we're already in a rendezvous handler here. */
5993 /** @todo also not really efficient to unmap a chunk that contains PD
5994 * or PT pages. */
5995 pgmR3PoolClearAllRendezvous(pVM, pVM->apCpusR3[0], NULL /* no need to flush the REM TLB as we already did that above */);
5996
5997 /*
5998 * Request the ring-0 part to unmap a chunk to make space in the mapping cache.
5999 */
6000 GMMMAPUNMAPCHUNKREQ Req;
6001 Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
6002 Req.Hdr.cbReq = sizeof(Req);
6003 Req.pvR3 = NULL;
6004 Req.idChunkMap = NIL_GMM_CHUNKID;
6005 Req.idChunkUnmap = pgmR3PhysChunkFindUnmapCandidate(pVM);
6006 if (Req.idChunkUnmap != INT32_MAX)
6007 {
6008 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatChunkUnmap, a);
6009 rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr);
6010 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkUnmap, a);
6011 if (RT_SUCCESS(rc))
6012 {
6013 /*
6014 * Remove the unmapped one.
6015 */
6016 PPGMCHUNKR3MAP pUnmappedChunk = (PPGMCHUNKR3MAP)RTAvlU32Remove(&pVM->pgm.s.ChunkR3Map.pTree, Req.idChunkUnmap);
6017 AssertRelease(pUnmappedChunk);
6018 AssertRelease(!pUnmappedChunk->cRefs);
6019 AssertRelease(!pUnmappedChunk->cPermRefs);
6020 pUnmappedChunk->pv = NULL;
6021 pUnmappedChunk->Core.Key = UINT32_MAX;
6022 MMR3HeapFree(pUnmappedChunk);
6023 pVM->pgm.s.ChunkR3Map.c--;
6024 pVM->pgm.s.cUnmappedChunks++;
6025
6026 /*
6027 * Flush dangling PGM pointers (R3 & R0 ptrs to GC physical addresses).
6028 */
6029 /** @todo We should not flush chunks which include cr3 mappings. */
6030 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
6031 {
6032# ifdef VBOX_VMM_TARGET_X86
6033 PPGMCPU pPGM = &pVM->apCpusR3[idCpu]->pgm.s;
6034
6035 pPGM->pGst32BitPdR3 = NULL;
6036 pPGM->pGstPaePdptR3 = NULL;
6037 pPGM->pGstAmd64Pml4R3 = NULL;
6038 pPGM->pGstEptPml4R3 = NULL;
6039 pPGM->pGst32BitPdR0 = NIL_RTR0PTR;
6040 pPGM->pGstPaePdptR0 = NIL_RTR0PTR;
6041 pPGM->pGstAmd64Pml4R0 = NIL_RTR0PTR;
6042 pPGM->pGstEptPml4R0 = NIL_RTR0PTR;
6043 for (unsigned i = 0; i < RT_ELEMENTS(pPGM->apGstPaePDsR3); i++)
6044 {
6045 pPGM->apGstPaePDsR3[i] = NULL;
6046 pPGM->apGstPaePDsR0[i] = NIL_RTR0PTR;
6047 }
6048# endif
6049
6050 /* Flush REM TLBs. */
6051 CPUMSetChangedFlags(pVM->apCpusR3[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
6052 }
6053
6054 pgmR3PhysChunkInvalidateTLB(pVM, true /*fInRendezvous*/); /* includes pgmPhysInvalidatePageMapTLB call */
6055 }
6056 }
6057 }
6058 PGM_UNLOCK(pVM);
6059 return rc;
6060}
6061
6062/**
6063 * Unmap a chunk to free up virtual address space (request packet handler for pgmR3PhysChunkMap)
6064 *
6065 * @param pVM The cross context VM structure.
6066 */
6067static DECLCALLBACK(void) pgmR3PhysUnmapChunk(PVM pVM)
6068{
6069 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysUnmapChunkRendezvous, NULL);
6070 AssertRC(rc);
6071}
6072
6073
6074/**
6075 * Maps the given chunk into the ring-3 mapping cache.
6076 *
6077 * This will call ring-0.
6078 *
6079 * @returns VBox status code.
6080 * @param pVM The cross context VM structure.
6081 * @param idChunk The chunk in question.
6082 * @param ppChunk Where to store the chunk tracking structure.
6083 *
6084 * @remarks Called from within the PGM critical section.
6085 * @remarks Can be called from any thread!
6086 */
6087int pgmR3PhysChunkMap(PVM pVM, uint32_t idChunk, PPPGMCHUNKR3MAP ppChunk)
6088{
6089 int rc;
6090
6091 PGM_LOCK_ASSERT_OWNER(pVM);
6092
6093 /*
6094 * Move the chunk time forward.
6095 */
6096 pVM->pgm.s.ChunkR3Map.iNow++;
6097 if (pVM->pgm.s.ChunkR3Map.iNow == 0)
6098 {
6099 pVM->pgm.s.ChunkR3Map.iNow = 4;
6100 RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkAgeingRolloverCallback, NULL);
6101 }
6102
6103 /*
6104 * Allocate a new tracking structure first.
6105 */
6106 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)MMR3HeapAllocZ(pVM, MM_TAG_PGM_CHUNK_MAPPING, sizeof(*pChunk));
6107 AssertReturn(pChunk, VERR_NO_MEMORY);
6108 pChunk->Core.Key = idChunk;
6109 pChunk->iLastUsed = pVM->pgm.s.ChunkR3Map.iNow;
6110
6111 /*
6112 * Request the ring-0 part to map the chunk in question.
6113 */
6114 GMMMAPUNMAPCHUNKREQ Req;
6115 Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
6116 Req.Hdr.cbReq = sizeof(Req);
6117 Req.pvR3 = NULL;
6118 Req.idChunkMap = idChunk;
6119 Req.idChunkUnmap = NIL_GMM_CHUNKID;
6120
6121 /* Must be callable from any thread, so can't use VMMR3CallR0. */
6122 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatChunkMap, a);
6123 rc = SUPR3CallVMMR0Ex(VMCC_GET_VMR0_FOR_CALL(pVM), NIL_VMCPUID, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr);
6124 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkMap, a);
6125 if (RT_SUCCESS(rc))
6126 {
6127 pChunk->pv = Req.pvR3;
6128
6129 /*
6130 * If we're running out of virtual address space, then we should
6131 * unmap another chunk.
6132 *
6133 * Currently, an unmap operation requires that all other virtual CPUs
6134 * are idling and not by chance making use of the memory we're
6135 * unmapping. So, we create an async unmap operation here.
6136 *
6137 * Now, when creating or restoring a saved state this wont work very
6138 * well since we may want to restore all guest RAM + a little something.
6139 * So, we have to do the unmap synchronously. Fortunately for us
6140 * though, during these operations the other virtual CPUs are inactive
6141 * and it should be safe to do this.
6142 */
6143 /** @todo Eventually we should lock all memory when used and do
6144 * map+unmap as one kernel call without any rendezvous or
6145 * other precautions. */
6146 if (pVM->pgm.s.ChunkR3Map.c + 1 >= pVM->pgm.s.ChunkR3Map.cMax)
6147 {
6148 switch (VMR3GetState(pVM))
6149 {
6150 case VMSTATE_LOADING:
6151 case VMSTATE_SAVING:
6152 {
6153 PVMCPU pVCpu = VMMGetCpu(pVM);
6154 if ( pVCpu
6155 && pVM->pgm.s.cDeprecatedPageLocks == 0)
6156 {
6157 pgmR3PhysUnmapChunkRendezvous(pVM, pVCpu, NULL);
6158 break;
6159 }
6160 }
6161 RT_FALL_THRU();
6162 default:
6163 rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysUnmapChunk, 1, pVM);
6164 AssertRC(rc);
6165 break;
6166 }
6167 }
6168
6169 /*
6170 * Update the tree. We must do this after any unmapping to make sure
6171 * the chunk we're going to return isn't unmapped by accident.
6172 */
6173 AssertPtr(Req.pvR3);
6174 bool fRc = RTAvlU32Insert(&pVM->pgm.s.ChunkR3Map.pTree, &pChunk->Core);
6175 AssertRelease(fRc);
6176 pVM->pgm.s.ChunkR3Map.c++;
6177 pVM->pgm.s.cMappedChunks++;
6178 }
6179 else
6180 {
6181 /** @todo this may fail because of /proc/sys/vm/max_map_count, so we
6182 * should probably restrict ourselves on linux. */
6183 AssertRC(rc);
6184 MMR3HeapFree(pChunk);
6185 pChunk = NULL;
6186 }
6187
6188 *ppChunk = pChunk;
6189 return rc;
6190}
6191
6192
6193/**
6194 * Invalidates the TLB for the ring-3 mapping cache.
6195 *
6196 * @param pVM The cross context VM structure.
6197 * @param fInRendezvous Set if we're in a rendezvous.
6198 */
6199DECLHIDDEN(void) pgmR3PhysChunkInvalidateTLB(PVM pVM, bool fInRendezvous)
6200{
6201 PGM_LOCK_VOID(pVM);
6202 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++)
6203 {
6204 pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk = NIL_GMM_CHUNKID;
6205 pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk = NULL;
6206 }
6207 /* The page map TLB references chunks, so invalidate that one too. */
6208 pgmPhysInvalidatePageMapTLB(pVM, fInRendezvous);
6209 PGM_UNLOCK(pVM);
6210}
6211
6212
6213/**
6214 * Response to VM_FF_PGM_NEED_HANDY_PAGES and helper for pgmPhysEnsureHandyPage.
6215 *
6216 * This function will also work the VM_FF_PGM_NO_MEMORY force action flag, to
6217 * signal and clear the out of memory condition. When called, this API is used
6218 * to try clear the condition when the user wants to resume.
6219 *
6220 * @returns The following VBox status codes.
6221 * @retval VINF_SUCCESS on success. FFs cleared.
6222 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is not cleared in
6223 * this case and it gets accompanied by VM_FF_PGM_NO_MEMORY.
6224 *
6225 * @param pVM The cross context VM structure.
6226 *
6227 * @remarks The VINF_EM_NO_MEMORY status is for the benefit of the FF processing
6228 * in EM.cpp and shouldn't be propagated outside TRPM, HM, EM and
6229 * pgmPhysEnsureHandyPage. There is one exception to this in the \#PF
6230 * handler.
6231 */
6232VMMR3DECL(int) PGMR3PhysAllocateHandyPages(PVM pVM)
6233{
6234 PGM_LOCK_VOID(pVM);
6235
6236 /*
6237 * Allocate more pages, noting down the index of the first new page.
6238 */
6239 uint32_t iClear = pVM->pgm.s.cHandyPages;
6240 AssertMsgReturn(iClear <= RT_ELEMENTS(pVM->pgm.s.aHandyPages), ("%d", iClear), VERR_PGM_HANDY_PAGE_IPE);
6241 Log(("PGMR3PhysAllocateHandyPages: %d -> %d\n", iClear, RT_ELEMENTS(pVM->pgm.s.aHandyPages)));
6242 int rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES, 0, NULL);
6243 /** @todo we should split this up into an allocate and flush operation. sometimes you want to flush and not allocate more (which will trigger the vm account limit error) */
6244 if ( rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT
6245 && pVM->pgm.s.cHandyPages > 0)
6246 {
6247 /* Still handy pages left, so don't panic. */
6248 rc = VINF_SUCCESS;
6249 }
6250
6251 if (RT_SUCCESS(rc))
6252 {
6253 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
6254 Assert(pVM->pgm.s.cHandyPages > 0);
6255# ifdef VBOX_STRICT
6256 uint32_t i;
6257 for (i = iClear; i < pVM->pgm.s.cHandyPages; i++)
6258 if ( pVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID
6259 || pVM->pgm.s.aHandyPages[i].idSharedPage != NIL_GMM_PAGEID
6260 || (pVM->pgm.s.aHandyPages[i].HCPhysGCPhys & GUEST_PAGE_OFFSET_MASK))
6261 break;
6262 if (i != pVM->pgm.s.cHandyPages)
6263 {
6264 RTAssertMsg1Weak(NULL, __LINE__, __FILE__, __FUNCTION__);
6265 RTAssertMsg2Weak("i=%d iClear=%d cHandyPages=%d\n", i, iClear, pVM->pgm.s.cHandyPages);
6266 for (uint32_t j = iClear; j < pVM->pgm.s.cHandyPages; j++)
6267 RTAssertMsg2Add("%03d: idPage=%d HCPhysGCPhys=%RHp idSharedPage=%d%s\n", j,
6268 pVM->pgm.s.aHandyPages[j].idPage,
6269 pVM->pgm.s.aHandyPages[j].HCPhysGCPhys,
6270 pVM->pgm.s.aHandyPages[j].idSharedPage,
6271 j == i ? " <---" : "");
6272 RTAssertPanic();
6273 }
6274# endif
6275 }
6276 else
6277 {
6278 /*
6279 * We should never get here unless there is a genuine shortage of
6280 * memory (or some internal error). Flag the error so the VM can be
6281 * suspended ASAP and the user informed. If we're totally out of
6282 * handy pages we will return failure.
6283 */
6284 /* Report the failure. */
6285 LogRel(("PGM: Failed to procure handy pages; rc=%Rrc cHandyPages=%#x\n"
6286 " cAllPages=%#x cPrivatePages=%#x cSharedPages=%#x cZeroPages=%#x\n",
6287 rc, pVM->pgm.s.cHandyPages,
6288 pVM->pgm.s.cAllPages, pVM->pgm.s.cPrivatePages, pVM->pgm.s.cSharedPages, pVM->pgm.s.cZeroPages));
6289
6290 if ( rc != VERR_NO_MEMORY
6291 && rc != VERR_NO_PHYS_MEMORY
6292 && rc != VERR_LOCK_FAILED)
6293 for (uint32_t i = 0; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
6294 {
6295 LogRel(("PGM: aHandyPages[#%#04x] = {.HCPhysGCPhys=%RHp, .idPage=%#08x, .idSharedPage=%#08x}\n",
6296 i, pVM->pgm.s.aHandyPages[i].HCPhysGCPhys, pVM->pgm.s.aHandyPages[i].idPage,
6297 pVM->pgm.s.aHandyPages[i].idSharedPage));
6298 uint32_t const idPage = pVM->pgm.s.aHandyPages[i].idPage;
6299 if (idPage != NIL_GMM_PAGEID)
6300 {
6301 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
6302 for (uint32_t idRamRange = 0; idRamRange <= idRamRangeMax; idRamRange++)
6303 {
6304 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
6305 Assert(pRam || idRamRange == 0);
6306 if (!pRam) continue;
6307 Assert(pRam->idRange == idRamRange);
6308
6309 uint32_t const cPages = pRam->cb >> GUEST_PAGE_SHIFT;
6310 for (uint32_t iPage = 0; iPage < cPages; iPage++)
6311 if (PGM_PAGE_GET_PAGEID(&pRam->aPages[iPage]) == idPage)
6312 LogRel(("PGM: Used by %RGp %R[pgmpage] (%s)\n",
6313 pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), &pRam->aPages[iPage], pRam->pszDesc));
6314 }
6315 }
6316 }
6317
6318 if (rc == VERR_NO_MEMORY)
6319 {
6320 uint64_t cbHostRamAvail = 0;
6321 int rc2 = RTSystemQueryAvailableRam(&cbHostRamAvail);
6322 if (RT_SUCCESS(rc2))
6323 LogRel(("Host RAM: %RU64MB available\n", cbHostRamAvail / _1M));
6324 else
6325 LogRel(("Cannot determine the amount of available host memory\n"));
6326 }
6327
6328 /* Set the FFs and adjust rc. */
6329 VM_FF_SET(pVM, VM_FF_PGM_NEED_HANDY_PAGES);
6330 VM_FF_SET(pVM, VM_FF_PGM_NO_MEMORY);
6331 if ( rc == VERR_NO_MEMORY
6332 || rc == VERR_NO_PHYS_MEMORY
6333 || rc == VERR_LOCK_FAILED)
6334 rc = VINF_EM_NO_MEMORY;
6335 }
6336
6337 PGM_UNLOCK(pVM);
6338 return rc;
6339}
6340
6341#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
6342
6343
6344/*********************************************************************************************************************************
6345* Other Stuff *
6346*********************************************************************************************************************************/
6347
6348#ifdef VBOX_VMM_TARGET_X86
6349/**
6350 * Sets the Address Gate 20 state.
6351 *
6352 * @param pVCpu The cross context virtual CPU structure.
6353 * @param fEnable True if the gate should be enabled.
6354 * False if the gate should be disabled.
6355 */
6356VMMDECL(void) PGMR3PhysSetA20(PVMCPU pVCpu, bool fEnable)
6357{
6358 LogFlow(("PGMR3PhysSetA20 %d (was %d)\n", fEnable, pVCpu->pgm.s.fA20Enabled));
6359 if (pVCpu->pgm.s.fA20Enabled != fEnable)
6360 {
6361# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6362 PCCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu);
6363 if ( CPUMIsGuestInVmxRootMode(pCtx)
6364 && !fEnable)
6365 {
6366 Log(("Cannot enter A20M mode while in VMX root mode\n"));
6367 return;
6368 }
6369# endif
6370 pVCpu->pgm.s.fA20Enabled = fEnable;
6371 pVCpu->pgm.s.GCPhysA20Mask = ~((RTGCPHYS)!fEnable << 20);
6372 if (VM_IS_NEM_ENABLED(pVCpu->CTX_SUFF(pVM)))
6373 NEMR3NotifySetA20(pVCpu, fEnable);
6374# ifdef PGM_WITH_A20
6375 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
6376# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
6377 pgmR3RefreshShadowModeAfterA20Change(pVCpu);
6378 HMFlushTlb(pVCpu);
6379# endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
6380# endif
6381# if 0 /* PGMGetPage will apply the A20 mask to the GCPhys it returns, so we must invalid both sides of the TLB. */
6382 IEMTlbInvalidateAllPhysical(pVCpu);
6383# else
6384 IEMTlbInvalidateAllGlobal(pVCpu);
6385# endif
6386 STAM_REL_COUNTER_INC(&pVCpu->pgm.s.cA20Changes);
6387 }
6388}
6389#endif /* VBOX_VMM_TARGET_X86 */
6390
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette