VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PGMPhys.cpp@ 108400

Last change on this file since 108400 was 108132, checked in by vboxsync, 3 months ago

VMM/PGM: Merge and deduplicate code targeting x86 & amd64 in PGM.cpp. Don't bother compiling pool stuff on arm and darwin.amd64. jiraref:VBP-1531

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 250.9 KB
Line 
1/* $Id: PGMPhys.cpp 108132 2025-02-10 11:05:23Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Physical Memory Addressing.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_PHYS
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/iem.h>
36#include <VBox/vmm/iom.h>
37#include <VBox/vmm/mm.h>
38#include <VBox/vmm/nem.h>
39#include <VBox/vmm/stam.h>
40#include <VBox/vmm/pdmdev.h>
41#include "PGMInternal.h"
42#include <VBox/vmm/vmcc.h>
43
44#include "PGMInline.h"
45
46#include <VBox/sup.h>
47#include <VBox/param.h>
48#include <VBox/err.h>
49#include <VBox/log.h>
50#include <iprt/assert.h>
51#include <iprt/alloc.h>
52#include <iprt/asm.h>
53#ifdef VBOX_STRICT
54# include <iprt/crc.h>
55#endif
56#include <iprt/thread.h>
57#include <iprt/string.h>
58#include <iprt/system.h>
59
60
61/*********************************************************************************************************************************
62* Defined Constants And Macros *
63*********************************************************************************************************************************/
64/** The number of pages to free in one batch. */
65#define PGMPHYS_FREE_PAGE_BATCH_SIZE 128
66
67
68
69/*********************************************************************************************************************************
70* Reading and Writing Guest Pysical Memory *
71*********************************************************************************************************************************/
72
73/*
74 * PGMR3PhysReadU8-64
75 * PGMR3PhysWriteU8-64
76 */
77#define PGMPHYSFN_READNAME PGMR3PhysReadU8
78#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU8
79#define PGMPHYS_DATASIZE 1
80#define PGMPHYS_DATATYPE uint8_t
81#include "PGMPhysRWTmpl.h"
82
83#define PGMPHYSFN_READNAME PGMR3PhysReadU16
84#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU16
85#define PGMPHYS_DATASIZE 2
86#define PGMPHYS_DATATYPE uint16_t
87#include "PGMPhysRWTmpl.h"
88
89#define PGMPHYSFN_READNAME PGMR3PhysReadU32
90#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU32
91#define PGMPHYS_DATASIZE 4
92#define PGMPHYS_DATATYPE uint32_t
93#include "PGMPhysRWTmpl.h"
94
95#define PGMPHYSFN_READNAME PGMR3PhysReadU64
96#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU64
97#define PGMPHYS_DATASIZE 8
98#define PGMPHYS_DATATYPE uint64_t
99#include "PGMPhysRWTmpl.h"
100
101
102/**
103 * EMT worker for PGMR3PhysReadExternal.
104 */
105static DECLCALLBACK(int) pgmR3PhysReadExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, void *pvBuf, size_t cbRead,
106 PGMACCESSORIGIN enmOrigin)
107{
108 VBOXSTRICTRC rcStrict = PGMPhysRead(pVM, *pGCPhys, pvBuf, cbRead, enmOrigin);
109 AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
110 return VINF_SUCCESS;
111}
112
113
114/**
115 * Read from physical memory, external users.
116 *
117 * @returns VBox status code.
118 * @retval VINF_SUCCESS.
119 *
120 * @param pVM The cross context VM structure.
121 * @param GCPhys Physical address to read from.
122 * @param pvBuf Where to read into.
123 * @param cbRead How many bytes to read.
124 * @param enmOrigin Who is calling.
125 *
126 * @thread Any but EMTs.
127 */
128VMMR3DECL(int) PGMR3PhysReadExternal(PVM pVM, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead, PGMACCESSORIGIN enmOrigin)
129{
130 VM_ASSERT_OTHER_THREAD(pVM);
131
132 AssertMsgReturn(cbRead > 0, ("don't even think about reading zero bytes!\n"), VINF_SUCCESS);
133 LogFlow(("PGMR3PhysReadExternal: %RGp %d\n", GCPhys, cbRead));
134
135 PGM_LOCK_VOID(pVM);
136
137 /*
138 * Copy loop on ram ranges.
139 */
140 for (;;)
141 {
142 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
143
144 /* Inside range or not? */
145 if (pRam && GCPhys >= pRam->GCPhys)
146 {
147 /*
148 * Must work our way thru this page by page.
149 */
150 RTGCPHYS off = GCPhys - pRam->GCPhys;
151 while (off < pRam->cb)
152 {
153 unsigned iPage = off >> GUEST_PAGE_SHIFT;
154 PPGMPAGE pPage = &pRam->aPages[iPage];
155
156 /*
157 * If the page has an ALL access handler, we'll have to
158 * delegate the job to EMT.
159 */
160 if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
161 || PGM_PAGE_IS_SPECIAL_ALIAS_MMIO(pPage))
162 {
163 PGM_UNLOCK(pVM);
164
165 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysReadExternalEMT, 5,
166 pVM, &GCPhys, pvBuf, cbRead, enmOrigin);
167 }
168 Assert(!PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage));
169
170 /*
171 * Simple stuff, go ahead.
172 */
173 size_t cb = GUEST_PAGE_SIZE - (off & GUEST_PAGE_OFFSET_MASK);
174 if (cb > cbRead)
175 cb = cbRead;
176 PGMPAGEMAPLOCK PgMpLck;
177 const void *pvSrc;
178 int rc = pgmPhysGCPhys2CCPtrInternalReadOnly(pVM, pPage, pRam->GCPhys + off, &pvSrc, &PgMpLck);
179 if (RT_SUCCESS(rc))
180 {
181 memcpy(pvBuf, pvSrc, cb);
182 pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck);
183 }
184 else
185 {
186 AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternalReadOnly failed on %RGp / %R[pgmpage] -> %Rrc\n",
187 pRam->GCPhys + off, pPage, rc));
188 memset(pvBuf, 0xff, cb);
189 }
190
191 /* next page */
192 if (cb >= cbRead)
193 {
194 PGM_UNLOCK(pVM);
195 return VINF_SUCCESS;
196 }
197 cbRead -= cb;
198 off += cb;
199 GCPhys += cb;
200 pvBuf = (char *)pvBuf + cb;
201 } /* walk pages in ram range. */
202 }
203 else
204 {
205 LogFlow(("PGMPhysRead: Unassigned %RGp size=%u\n", GCPhys, cbRead));
206
207 /*
208 * Unassigned address space.
209 */
210 size_t cb = pRam ? pRam->GCPhys - GCPhys : ~(size_t)0;
211 if (cb >= cbRead)
212 {
213 memset(pvBuf, 0xff, cbRead);
214 break;
215 }
216 memset(pvBuf, 0xff, cb);
217
218 cbRead -= cb;
219 pvBuf = (char *)pvBuf + cb;
220 GCPhys += cb;
221 }
222 } /* Ram range walk */
223
224 PGM_UNLOCK(pVM);
225
226 return VINF_SUCCESS;
227}
228
229
230/**
231 * EMT worker for PGMR3PhysWriteExternal.
232 */
233static DECLCALLBACK(int) pgmR3PhysWriteExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, const void *pvBuf, size_t cbWrite,
234 PGMACCESSORIGIN enmOrigin)
235{
236 /** @todo VERR_EM_NO_MEMORY */
237 VBOXSTRICTRC rcStrict = PGMPhysWrite(pVM, *pGCPhys, pvBuf, cbWrite, enmOrigin);
238 AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
239 return VINF_SUCCESS;
240}
241
242
243/**
244 * Write to physical memory, external users.
245 *
246 * @returns VBox status code.
247 * @retval VINF_SUCCESS.
248 * @retval VERR_EM_NO_MEMORY.
249 *
250 * @param pVM The cross context VM structure.
251 * @param GCPhys Physical address to write to.
252 * @param pvBuf What to write.
253 * @param cbWrite How many bytes to write.
254 * @param enmOrigin Who is calling.
255 *
256 * @thread Any but EMTs.
257 */
258VMMDECL(int) PGMR3PhysWriteExternal(PVM pVM, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite, PGMACCESSORIGIN enmOrigin)
259{
260 VM_ASSERT_OTHER_THREAD(pVM);
261
262 AssertMsg(!pVM->pgm.s.fNoMorePhysWrites,
263 ("Calling PGMR3PhysWriteExternal after pgmR3Save()! GCPhys=%RGp cbWrite=%#x enmOrigin=%d\n",
264 GCPhys, cbWrite, enmOrigin));
265 AssertMsgReturn(cbWrite > 0, ("don't even think about writing zero bytes!\n"), VINF_SUCCESS);
266 LogFlow(("PGMR3PhysWriteExternal: %RGp %d\n", GCPhys, cbWrite));
267
268 PGM_LOCK_VOID(pVM);
269
270 /*
271 * Copy loop on ram ranges, stop when we hit something difficult.
272 */
273 for (;;)
274 {
275 PPGMRAMRANGE const pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
276
277 /* Inside range or not? */
278 if (pRam && GCPhys >= pRam->GCPhys)
279 {
280 /*
281 * Must work our way thru this page by page.
282 */
283 RTGCPTR off = GCPhys - pRam->GCPhys;
284 while (off < pRam->cb)
285 {
286 RTGCPTR iPage = off >> GUEST_PAGE_SHIFT;
287 PPGMPAGE pPage = &pRam->aPages[iPage];
288
289 /*
290 * Is the page problematic, we have to do the work on the EMT.
291 *
292 * Allocating writable pages and access handlers are
293 * problematic, write monitored pages are simple and can be
294 * dealt with here.
295 */
296 if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
297 || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
298 || PGM_PAGE_IS_SPECIAL_ALIAS_MMIO(pPage))
299 {
300 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
301 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
302 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage, GCPhys);
303 else
304 {
305 PGM_UNLOCK(pVM);
306
307 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysWriteExternalEMT, 5,
308 pVM, &GCPhys, pvBuf, cbWrite, enmOrigin);
309 }
310 }
311 Assert(!PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage));
312
313 /*
314 * Simple stuff, go ahead.
315 */
316 size_t cb = GUEST_PAGE_SIZE - (off & GUEST_PAGE_OFFSET_MASK);
317 if (cb > cbWrite)
318 cb = cbWrite;
319 PGMPAGEMAPLOCK PgMpLck;
320 void *pvDst;
321 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, pRam->GCPhys + off, &pvDst, &PgMpLck);
322 if (RT_SUCCESS(rc))
323 {
324 memcpy(pvDst, pvBuf, cb);
325 pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck);
326 }
327 else
328 AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternal failed on %RGp / %R[pgmpage] -> %Rrc\n",
329 pRam->GCPhys + off, pPage, rc));
330
331 /* next page */
332 if (cb >= cbWrite)
333 {
334 PGM_UNLOCK(pVM);
335 return VINF_SUCCESS;
336 }
337
338 cbWrite -= cb;
339 off += cb;
340 GCPhys += cb;
341 pvBuf = (const char *)pvBuf + cb;
342 } /* walk pages in ram range */
343 }
344 else
345 {
346 /*
347 * Unassigned address space, skip it.
348 */
349 if (!pRam)
350 break;
351 size_t cb = pRam->GCPhys - GCPhys;
352 if (cb >= cbWrite)
353 break;
354 cbWrite -= cb;
355 pvBuf = (const char *)pvBuf + cb;
356 GCPhys += cb;
357 }
358 } /* Ram range walk */
359
360 PGM_UNLOCK(pVM);
361 return VINF_SUCCESS;
362}
363
364
365/*********************************************************************************************************************************
366* Mapping Guest Physical Memory *
367*********************************************************************************************************************************/
368
369/**
370 * VMR3ReqCall worker for PGMR3PhysGCPhys2CCPtrExternal to make pages writable.
371 *
372 * @returns see PGMR3PhysGCPhys2CCPtrExternal
373 * @param pVM The cross context VM structure.
374 * @param pGCPhys Pointer to the guest physical address.
375 * @param ppv Where to store the mapping address.
376 * @param pLock Where to store the lock.
377 */
378static DECLCALLBACK(int) pgmR3PhysGCPhys2CCPtrDelegated(PVM pVM, PRTGCPHYS pGCPhys, void **ppv, PPGMPAGEMAPLOCK pLock)
379{
380 /*
381 * Just hand it to PGMPhysGCPhys2CCPtr and check that it's not a page with
382 * an access handler after it succeeds.
383 */
384 int rc = PGM_LOCK(pVM);
385 AssertRCReturn(rc, rc);
386
387 rc = PGMPhysGCPhys2CCPtr(pVM, *pGCPhys, ppv, pLock);
388 if (RT_SUCCESS(rc))
389 {
390 PPGMPAGEMAPTLBE pTlbe;
391 int rc2 = pgmPhysPageQueryTlbe(pVM, *pGCPhys, &pTlbe);
392 AssertFatalRC(rc2);
393 PPGMPAGE pPage = pTlbe->pPage;
394 if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage))
395 {
396 PGMPhysReleasePageMappingLock(pVM, pLock);
397 rc = VERR_PGM_PHYS_PAGE_RESERVED;
398 }
399 else if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
400#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
401 || pgmPoolIsDirtyPage(pVM, *pGCPhys)
402#endif
403 )
404 {
405 /* We *must* flush any corresponding pgm pool page here, otherwise we'll
406 * not be informed about writes and keep bogus gst->shw mappings around.
407 */
408#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
409 pgmPoolFlushPageByGCPhys(pVM, *pGCPhys);
410#endif
411 Assert(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage));
412 /** @todo r=bird: return VERR_PGM_PHYS_PAGE_RESERVED here if it still has
413 * active handlers, see the PGMR3PhysGCPhys2CCPtrExternal docs. */
414 }
415 }
416
417 PGM_UNLOCK(pVM);
418 return rc;
419}
420
421
422/**
423 * Requests the mapping of a guest page into ring-3, external threads.
424 *
425 * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to
426 * release it.
427 *
428 * This API will assume your intention is to write to the page, and will
429 * therefore replace shared and zero pages. If you do not intend to modify the
430 * page, use the PGMR3PhysGCPhys2CCPtrReadOnlyExternal() API.
431 *
432 * @returns VBox status code.
433 * @retval VINF_SUCCESS on success.
434 * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical
435 * backing or if the page has any active access handlers. The caller
436 * must fall back on using PGMR3PhysWriteExternal.
437 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address.
438 *
439 * @param pVM The cross context VM structure.
440 * @param GCPhys The guest physical address of the page that should be mapped.
441 * @param ppv Where to store the address corresponding to GCPhys.
442 * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs.
443 *
444 * @remark Avoid calling this API from within critical sections (other than the
445 * PGM one) because of the deadlock risk when we have to delegating the
446 * task to an EMT.
447 * @thread Any.
448 */
449VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrExternal(PVM pVM, RTGCPHYS GCPhys, void **ppv, PPGMPAGEMAPLOCK pLock)
450{
451 AssertPtr(ppv);
452 AssertPtr(pLock);
453
454 Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM));
455
456 int rc = PGM_LOCK(pVM);
457 AssertRCReturn(rc, rc);
458
459 /*
460 * Query the Physical TLB entry for the page (may fail).
461 */
462 PPGMPAGEMAPTLBE pTlbe;
463 rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
464 if (RT_SUCCESS(rc))
465 {
466 PPGMPAGE pPage = pTlbe->pPage;
467 if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage))
468 rc = VERR_PGM_PHYS_PAGE_RESERVED;
469 else
470 {
471 /*
472 * If the page is shared, the zero page, or being write monitored
473 * it must be converted to an page that's writable if possible.
474 * We can only deal with write monitored pages here, the rest have
475 * to be on an EMT.
476 */
477 if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
478 || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
479#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
480 || pgmPoolIsDirtyPage(pVM, GCPhys)
481#endif
482 )
483 {
484 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
485 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
486#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
487 && !pgmPoolIsDirtyPage(pVM, GCPhys) /** @todo we're very likely doing this twice. */
488#endif
489 )
490 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage, GCPhys);
491 else
492 {
493 PGM_UNLOCK(pVM);
494
495 return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysGCPhys2CCPtrDelegated, 4,
496 pVM, &GCPhys, ppv, pLock);
497 }
498 }
499
500 /*
501 * Now, just perform the locking and calculate the return address.
502 */
503 PPGMPAGEMAP pMap = pTlbe->pMap;
504 if (pMap)
505 pMap->cRefs++;
506
507 unsigned cLocks = PGM_PAGE_GET_WRITE_LOCKS(pPage);
508 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
509 {
510 if (cLocks == 0)
511 pVM->pgm.s.cWriteLockedPages++;
512 PGM_PAGE_INC_WRITE_LOCKS(pPage);
513 }
514 else if (cLocks != PGM_PAGE_GET_WRITE_LOCKS(pPage))
515 {
516 PGM_PAGE_INC_WRITE_LOCKS(pPage);
517 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent write locked state!\n", GCPhys, pPage));
518 if (pMap)
519 pMap->cRefs++; /* Extra ref to prevent it from going away. */
520 }
521
522 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & GUEST_PAGE_OFFSET_MASK));
523 pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_WRITE;
524 pLock->pvMap = pMap;
525 }
526 }
527
528 PGM_UNLOCK(pVM);
529 return rc;
530}
531
532
533/**
534 * Requests the mapping of a guest page into ring-3, external threads.
535 *
536 * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to
537 * release it.
538 *
539 * @returns VBox status code.
540 * @retval VINF_SUCCESS on success.
541 * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical
542 * backing or if the page as an active ALL access handler. The caller
543 * must fall back on using PGMPhysRead.
544 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address.
545 *
546 * @param pVM The cross context VM structure.
547 * @param GCPhys The guest physical address of the page that should be mapped.
548 * @param ppv Where to store the address corresponding to GCPhys.
549 * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs.
550 *
551 * @remark Avoid calling this API from within critical sections (other than
552 * the PGM one) because of the deadlock risk.
553 * @thread Any.
554 */
555VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrReadOnlyExternal(PVM pVM, RTGCPHYS GCPhys, void const **ppv, PPGMPAGEMAPLOCK pLock)
556{
557 int rc = PGM_LOCK(pVM);
558 AssertRCReturn(rc, rc);
559
560 /*
561 * Query the Physical TLB entry for the page (may fail).
562 */
563 PPGMPAGEMAPTLBE pTlbe;
564 rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
565 if (RT_SUCCESS(rc))
566 {
567 PPGMPAGE pPage = pTlbe->pPage;
568#if 1
569 /* MMIO pages doesn't have any readable backing. */
570 if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage))
571 rc = VERR_PGM_PHYS_PAGE_RESERVED;
572#else
573 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
574 rc = VERR_PGM_PHYS_PAGE_RESERVED;
575#endif
576 else
577 {
578 /*
579 * Now, just perform the locking and calculate the return address.
580 */
581 PPGMPAGEMAP pMap = pTlbe->pMap;
582 if (pMap)
583 pMap->cRefs++;
584
585 unsigned cLocks = PGM_PAGE_GET_READ_LOCKS(pPage);
586 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
587 {
588 if (cLocks == 0)
589 pVM->pgm.s.cReadLockedPages++;
590 PGM_PAGE_INC_READ_LOCKS(pPage);
591 }
592 else if (cLocks != PGM_PAGE_GET_READ_LOCKS(pPage))
593 {
594 PGM_PAGE_INC_READ_LOCKS(pPage);
595 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent readonly locked state!\n", GCPhys, pPage));
596 if (pMap)
597 pMap->cRefs++; /* Extra ref to prevent it from going away. */
598 }
599
600 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & GUEST_PAGE_OFFSET_MASK));
601 pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_READ;
602 pLock->pvMap = pMap;
603 }
604 }
605
606 PGM_UNLOCK(pVM);
607 return rc;
608}
609
610
611/**
612 * Requests the mapping of multiple guest page into ring-3, external threads.
613 *
614 * When you're done with the pages, call PGMPhysBulkReleasePageMappingLock()
615 * ASAP to release them.
616 *
617 * This API will assume your intention is to write to the pages, and will
618 * therefore replace shared and zero pages. If you do not intend to modify the
619 * pages, use the PGMR3PhysBulkGCPhys2CCPtrReadOnlyExternal() API.
620 *
621 * @returns VBox status code.
622 * @retval VINF_SUCCESS on success.
623 * @retval VERR_PGM_PHYS_PAGE_RESERVED if any of the pages has no physical
624 * backing or if any of the pages the page has any active access
625 * handlers. The caller must fall back on using PGMR3PhysWriteExternal.
626 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if @a paGCPhysPages contains
627 * an invalid physical address.
628 *
629 * @param pVM The cross context VM structure.
630 * @param cPages Number of pages to lock.
631 * @param paGCPhysPages The guest physical address of the pages that
632 * should be mapped (@a cPages entries).
633 * @param papvPages Where to store the ring-3 mapping addresses
634 * corresponding to @a paGCPhysPages.
635 * @param paLocks Where to store the locking information that
636 * pfnPhysBulkReleasePageMappingLock needs (@a cPages
637 * in length).
638 *
639 * @remark Avoid calling this API from within critical sections (other than the
640 * PGM one) because of the deadlock risk when we have to delegating the
641 * task to an EMT.
642 * @thread Any.
643 */
644VMMR3DECL(int) PGMR3PhysBulkGCPhys2CCPtrExternal(PVM pVM, uint32_t cPages, PCRTGCPHYS paGCPhysPages,
645 void **papvPages, PPGMPAGEMAPLOCK paLocks)
646{
647 Assert(cPages > 0);
648 AssertPtr(papvPages);
649 AssertPtr(paLocks);
650
651 Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM));
652
653 int rc = PGM_LOCK(pVM);
654 AssertRCReturn(rc, rc);
655
656 /*
657 * Lock the pages one by one.
658 * The loop body is similar to PGMR3PhysGCPhys2CCPtrExternal.
659 */
660 int32_t cNextYield = 128;
661 uint32_t iPage;
662 for (iPage = 0; iPage < cPages; iPage++)
663 {
664 if (--cNextYield > 0)
665 { /* likely */ }
666 else
667 {
668 PGM_UNLOCK(pVM);
669 ASMNopPause();
670 PGM_LOCK_VOID(pVM);
671 cNextYield = 128;
672 }
673
674 /*
675 * Query the Physical TLB entry for the page (may fail).
676 */
677 PPGMPAGEMAPTLBE pTlbe;
678 rc = pgmPhysPageQueryTlbe(pVM, paGCPhysPages[iPage], &pTlbe);
679 if (RT_SUCCESS(rc))
680 { }
681 else
682 break;
683 PPGMPAGE pPage = pTlbe->pPage;
684
685 /*
686 * No MMIO or active access handlers.
687 */
688 if ( !PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage)
689 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
690 { }
691 else
692 {
693 rc = VERR_PGM_PHYS_PAGE_RESERVED;
694 break;
695 }
696
697 /*
698 * The page must be in the allocated state and not be a dirty pool page.
699 * We can handle converting a write monitored page to an allocated one, but
700 * anything more complicated must be delegated to an EMT.
701 */
702 bool fDelegateToEmt = false;
703 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
704#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
705 fDelegateToEmt = pgmPoolIsDirtyPage(pVM, paGCPhysPages[iPage]);
706#else
707 fDelegateToEmt = false;
708#endif
709 else if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED)
710 {
711#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
712 if (!pgmPoolIsDirtyPage(pVM, paGCPhysPages[iPage]))
713 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage, paGCPhysPages[iPage]);
714 else
715 fDelegateToEmt = true;
716#endif
717 }
718 else
719 fDelegateToEmt = true;
720 if (!fDelegateToEmt)
721 { }
722 else
723 {
724 /* We could do this delegation in bulk, but considered too much work vs gain. */
725 PGM_UNLOCK(pVM);
726 rc = VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysGCPhys2CCPtrDelegated, 4,
727 pVM, &paGCPhysPages[iPage], &papvPages[iPage], &paLocks[iPage]);
728 PGM_LOCK_VOID(pVM);
729 if (RT_FAILURE(rc))
730 break;
731 cNextYield = 128;
732 }
733
734 /*
735 * Now, just perform the locking and address calculation.
736 */
737 PPGMPAGEMAP pMap = pTlbe->pMap;
738 if (pMap)
739 pMap->cRefs++;
740
741 unsigned cLocks = PGM_PAGE_GET_WRITE_LOCKS(pPage);
742 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
743 {
744 if (cLocks == 0)
745 pVM->pgm.s.cWriteLockedPages++;
746 PGM_PAGE_INC_WRITE_LOCKS(pPage);
747 }
748 else if (cLocks != PGM_PAGE_GET_WRITE_LOCKS(pPage))
749 {
750 PGM_PAGE_INC_WRITE_LOCKS(pPage);
751 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent write locked state!\n", paGCPhysPages[iPage], pPage));
752 if (pMap)
753 pMap->cRefs++; /* Extra ref to prevent it from going away. */
754 }
755
756 papvPages[iPage] = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(paGCPhysPages[iPage] & GUEST_PAGE_OFFSET_MASK));
757 paLocks[iPage].uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_WRITE;
758 paLocks[iPage].pvMap = pMap;
759 }
760
761 PGM_UNLOCK(pVM);
762
763 /*
764 * On failure we must unlock any pages we managed to get already.
765 */
766 if (RT_FAILURE(rc) && iPage > 0)
767 PGMPhysBulkReleasePageMappingLocks(pVM, iPage, paLocks);
768
769 return rc;
770}
771
772
773/**
774 * Requests the mapping of multiple guest page into ring-3, for reading only,
775 * external threads.
776 *
777 * When you're done with the pages, call PGMPhysReleasePageMappingLock() ASAP
778 * to release them.
779 *
780 * @returns VBox status code.
781 * @retval VINF_SUCCESS on success.
782 * @retval VERR_PGM_PHYS_PAGE_RESERVED if any of the pages has no physical
783 * backing or if any of the pages the page has an active ALL access
784 * handler. The caller must fall back on using PGMR3PhysWriteExternal.
785 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if @a paGCPhysPages contains
786 * an invalid physical address.
787 *
788 * @param pVM The cross context VM structure.
789 * @param cPages Number of pages to lock.
790 * @param paGCPhysPages The guest physical address of the pages that
791 * should be mapped (@a cPages entries).
792 * @param papvPages Where to store the ring-3 mapping addresses
793 * corresponding to @a paGCPhysPages.
794 * @param paLocks Where to store the lock information that
795 * pfnPhysReleasePageMappingLock needs (@a cPages
796 * in length).
797 *
798 * @remark Avoid calling this API from within critical sections (other than
799 * the PGM one) because of the deadlock risk.
800 * @thread Any.
801 */
802VMMR3DECL(int) PGMR3PhysBulkGCPhys2CCPtrReadOnlyExternal(PVM pVM, uint32_t cPages, PCRTGCPHYS paGCPhysPages,
803 void const **papvPages, PPGMPAGEMAPLOCK paLocks)
804{
805 Assert(cPages > 0);
806 AssertPtr(papvPages);
807 AssertPtr(paLocks);
808
809 Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM));
810
811 int rc = PGM_LOCK(pVM);
812 AssertRCReturn(rc, rc);
813
814 /*
815 * Lock the pages one by one.
816 * The loop body is similar to PGMR3PhysGCPhys2CCPtrReadOnlyExternal.
817 */
818 int32_t cNextYield = 256;
819 uint32_t iPage;
820 for (iPage = 0; iPage < cPages; iPage++)
821 {
822 if (--cNextYield > 0)
823 { /* likely */ }
824 else
825 {
826 PGM_UNLOCK(pVM);
827 ASMNopPause();
828 PGM_LOCK_VOID(pVM);
829 cNextYield = 256;
830 }
831
832 /*
833 * Query the Physical TLB entry for the page (may fail).
834 */
835 PPGMPAGEMAPTLBE pTlbe;
836 rc = pgmPhysPageQueryTlbe(pVM, paGCPhysPages[iPage], &pTlbe);
837 if (RT_SUCCESS(rc))
838 { }
839 else
840 break;
841 PPGMPAGE pPage = pTlbe->pPage;
842
843 /*
844 * No MMIO or active all access handlers, everything else can be accessed.
845 */
846 if ( !PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage)
847 && !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
848 { }
849 else
850 {
851 rc = VERR_PGM_PHYS_PAGE_RESERVED;
852 break;
853 }
854
855 /*
856 * Now, just perform the locking and address calculation.
857 */
858 PPGMPAGEMAP pMap = pTlbe->pMap;
859 if (pMap)
860 pMap->cRefs++;
861
862 unsigned cLocks = PGM_PAGE_GET_READ_LOCKS(pPage);
863 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
864 {
865 if (cLocks == 0)
866 pVM->pgm.s.cReadLockedPages++;
867 PGM_PAGE_INC_READ_LOCKS(pPage);
868 }
869 else if (cLocks != PGM_PAGE_GET_READ_LOCKS(pPage))
870 {
871 PGM_PAGE_INC_READ_LOCKS(pPage);
872 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent readonly locked state!\n", paGCPhysPages[iPage], pPage));
873 if (pMap)
874 pMap->cRefs++; /* Extra ref to prevent it from going away. */
875 }
876
877 papvPages[iPage] = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(paGCPhysPages[iPage] & GUEST_PAGE_OFFSET_MASK));
878 paLocks[iPage].uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_READ;
879 paLocks[iPage].pvMap = pMap;
880 }
881
882 PGM_UNLOCK(pVM);
883
884 /*
885 * On failure we must unlock any pages we managed to get already.
886 */
887 if (RT_FAILURE(rc) && iPage > 0)
888 PGMPhysBulkReleasePageMappingLocks(pVM, iPage, paLocks);
889
890 return rc;
891}
892
893
894/**
895 * Converts a GC physical address to a HC ring-3 pointer, with some
896 * additional checks.
897 *
898 * @returns VBox status code.
899 * @retval VINF_SUCCESS on success.
900 * @retval VINF_PGM_PHYS_TLB_CATCH_WRITE and *ppv set if the page has a write
901 * access handler of some kind.
902 * @retval VERR_PGM_PHYS_TLB_CATCH_ALL if the page has a handler catching all
903 * accesses or is odd in any way.
904 * @retval VERR_PGM_PHYS_TLB_UNASSIGNED if the page doesn't exist.
905 *
906 * @param pVM The cross context VM structure.
907 * @param GCPhys The GC physical address to convert. Since this is only
908 * used for filling the REM TLB, the A20 mask must be
909 * applied before calling this API.
910 * @param fWritable Whether write access is required.
911 * @param ppv Where to store the pointer corresponding to GCPhys on
912 * success.
913 */
914VMMR3DECL(int) PGMR3PhysTlbGCPhys2Ptr(PVM pVM, RTGCPHYS GCPhys, bool fWritable, void **ppv)
915{
916 PGM_LOCK_VOID(pVM);
917 PGM_A20_ASSERT_MASKED(VMMGetCpu(pVM), GCPhys);
918
919 PPGMRAMRANGE pRam;
920 PPGMPAGE pPage;
921 int rc = pgmPhysGetPageAndRangeEx(pVM, GCPhys, &pPage, &pRam);
922 if (RT_SUCCESS(rc))
923 {
924 if (PGM_PAGE_IS_BALLOONED(pPage))
925 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
926 else if (!PGM_PAGE_HAS_ANY_HANDLERS(pPage))
927 rc = VINF_SUCCESS;
928 else
929 {
930 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)) /* catches MMIO */
931 rc = VERR_PGM_PHYS_TLB_CATCH_ALL;
932 else if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
933 {
934 /** @todo Handle TLB loads of virtual handlers so ./test.sh can be made to work
935 * in -norawr0 mode. */
936 if (fWritable)
937 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
938 }
939 else
940 {
941 /* Temporarily disabled physical handler(s), since the recompiler
942 doesn't get notified when it's reset we'll have to pretend it's
943 operating normally. */
944 if (pgmHandlerPhysicalIsAll(pVM, GCPhys))
945 rc = VERR_PGM_PHYS_TLB_CATCH_ALL;
946 else
947 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
948 }
949 }
950 if (RT_SUCCESS(rc))
951 {
952 int rc2;
953
954 /* Make sure what we return is writable. */
955 if (fWritable)
956 switch (PGM_PAGE_GET_STATE(pPage))
957 {
958 case PGM_PAGE_STATE_ALLOCATED:
959 break;
960 case PGM_PAGE_STATE_BALLOONED:
961 AssertFailed();
962 break;
963 case PGM_PAGE_STATE_ZERO:
964 case PGM_PAGE_STATE_SHARED:
965 if (rc == VINF_PGM_PHYS_TLB_CATCH_WRITE)
966 break;
967 RT_FALL_THRU();
968 case PGM_PAGE_STATE_WRITE_MONITORED:
969 rc2 = pgmPhysPageMakeWritable(pVM, pPage, GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
970 AssertLogRelRCReturn(rc2, rc2);
971 break;
972 }
973
974 /* Get a ring-3 mapping of the address. */
975 PPGMPAGER3MAPTLBE pTlbe;
976 rc2 = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
977 AssertLogRelRCReturn(rc2, rc2);
978 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & GUEST_PAGE_OFFSET_MASK));
979 /** @todo mapping/locking hell; this isn't horribly efficient since
980 * pgmPhysPageLoadIntoTlb will repeat the lookup we've done here. */
981
982 Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage] *ppv=%p\n", GCPhys, rc, pPage, *ppv));
983 }
984 else
985 Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage]\n", GCPhys, rc, pPage));
986
987 /* else: handler catching all access, no pointer returned. */
988 }
989 else
990 rc = VERR_PGM_PHYS_TLB_UNASSIGNED;
991
992 PGM_UNLOCK(pVM);
993 return rc;
994}
995
996
997
998/*********************************************************************************************************************************
999* RAM Range Management *
1000*********************************************************************************************************************************/
1001
1002/**
1003 * Given the range @a GCPhys thru @a GCPhysLast, find overlapping RAM range or
1004 * the correct insertion point.
1005 *
1006 * @returns Pointer to overlapping RAM range if found, NULL if not.
1007 * @param pVM The cross context VM structure.
1008 * @param GCPhys The address of the first byte in the range.
1009 * @param GCPhysLast The address of the last byte in the range.
1010 * @param pidxInsert Where to return the lookup table index to insert the
1011 * range at when returning NULL. Set to UINT32_MAX when
1012 * returning the pointer to an overlapping range.
1013 * @note Caller must own the PGM lock.
1014 */
1015static PPGMRAMRANGE pgmR3PhysRamRangeFindOverlapping(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast, uint32_t *pidxInsert)
1016{
1017 PGM_LOCK_ASSERT_OWNER(pVM);
1018 uint32_t iStart = 0;
1019 uint32_t iEnd = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1020 for (;;)
1021 {
1022 uint32_t idxLookup = iStart + (iEnd - iStart) / 2;
1023 RTGCPHYS const GCPhysEntryFirst = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1024 if (GCPhysLast < GCPhysEntryFirst)
1025 {
1026 if (idxLookup > iStart)
1027 iEnd = idxLookup;
1028 else
1029 {
1030 *pidxInsert = idxLookup;
1031 return NULL;
1032 }
1033 }
1034 else
1035 {
1036 RTGCPHYS const GCPhysEntryLast = pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast;
1037 if (GCPhys > GCPhysEntryLast)
1038 {
1039 idxLookup += 1;
1040 if (idxLookup < iEnd)
1041 iStart = idxLookup;
1042 else
1043 {
1044 *pidxInsert = idxLookup;
1045 return NULL;
1046 }
1047 }
1048 else
1049 {
1050 /* overlap */
1051 Assert(GCPhysEntryLast > GCPhys && GCPhysEntryFirst < GCPhysLast);
1052 *pidxInsert = UINT32_MAX;
1053 return pVM->pgm.s.apRamRanges[PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup])];
1054 }
1055 }
1056 }
1057}
1058
1059
1060/**
1061 * Given the range @a GCPhys thru @a GCPhysLast, find the lookup table entry
1062 * that's overlapping it.
1063 *
1064 * @returns The lookup table index of the overlapping entry, UINT32_MAX if not
1065 * found.
1066 * @param pVM The cross context VM structure.
1067 * @param GCPhys The address of the first byte in the range.
1068 * @param GCPhysLast The address of the last byte in the range.
1069 * @note Caller must own the PGM lock.
1070 */
1071static uint32_t pgmR3PhysRamRangeFindOverlappingIndex(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast)
1072{
1073 PGM_LOCK_ASSERT_OWNER(pVM);
1074 uint32_t iStart = 0;
1075 uint32_t iEnd = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1076 for (;;)
1077 {
1078 uint32_t idxLookup = iStart + (iEnd - iStart) / 2;
1079 RTGCPHYS const GCPhysEntryFirst = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1080 if (GCPhysLast < GCPhysEntryFirst)
1081 {
1082 if (idxLookup > iStart)
1083 iEnd = idxLookup;
1084 else
1085 return UINT32_MAX;
1086 }
1087 else
1088 {
1089 RTGCPHYS const GCPhysEntryLast = pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast;
1090 if (GCPhys > GCPhysEntryLast)
1091 {
1092 idxLookup += 1;
1093 if (idxLookup < iEnd)
1094 iStart = idxLookup;
1095 else
1096 return UINT32_MAX;
1097 }
1098 else
1099 {
1100 /* overlap */
1101 Assert(GCPhysEntryLast > GCPhys && GCPhysEntryFirst < GCPhysLast);
1102 return idxLookup;
1103 }
1104 }
1105 }
1106}
1107
1108
1109/**
1110 * Insert @a pRam into the lookup table.
1111 *
1112 * @returns VBox status code.
1113 * @param pVM The cross context VM structure.
1114 * @param pRam The RAM range to insert into the lookup table.
1115 * @param GCPhys The new mapping address to assign @a pRam on insertion.
1116 * @param pidxLookup Optional lookup table hint. This is updated.
1117 * @note Caller must own PGM lock.
1118 */
1119static int pgmR3PhysRamRangeInsertLookup(PVM pVM, PPGMRAMRANGE pRam, RTGCPHYS GCPhys, uint32_t *pidxLookup)
1120{
1121 PGM_LOCK_ASSERT_OWNER(pVM);
1122#ifdef DEBUG_bird
1123 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, true /*fRamRelaxed*/);
1124#endif
1125 AssertMsg(pRam->pszDesc, ("%RGp-%RGp\n", pRam->GCPhys, pRam->GCPhysLast));
1126 AssertLogRelMsgReturn( pRam->GCPhys == NIL_RTGCPHYS
1127 && pRam->GCPhysLast == NIL_RTGCPHYS,
1128 ("GCPhys=%RGp; range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n",
1129 GCPhys, pRam->GCPhys, pRam->cb, pRam->GCPhysLast, pRam->pszDesc),
1130 VERR_ALREADY_EXISTS);
1131 uint32_t const idRamRange = pRam->idRange;
1132 AssertReturn(pVM->pgm.s.apRamRanges[idRamRange] == pRam, VERR_INTERNAL_ERROR_2);
1133
1134 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
1135 RTGCPHYS const GCPhysLast = GCPhys + pRam->cb - 1U;
1136 AssertReturn(GCPhysLast > GCPhys, VERR_INTERNAL_ERROR_4);
1137 LogFlowFunc(("GCPhys=%RGp LB %RGp GCPhysLast=%RGp id=%#x %s\n", GCPhys, pRam->cb, GCPhysLast, idRamRange, pRam->pszDesc));
1138
1139 /*
1140 * Find the lookup table location if necessary.
1141 */
1142 uint32_t const cLookupEntries = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1143 AssertLogRelMsgReturn(cLookupEntries + 1 < RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup), /* id=0 is unused, so < is correct. */
1144 ("%#x\n", cLookupEntries), VERR_INTERNAL_ERROR_3);
1145
1146 uint32_t idxLookup = pidxLookup ? *pidxLookup : UINT32_MAX;
1147 if (cLookupEntries == 0)
1148 idxLookup = 0; /* special case: empty table */
1149 else
1150 {
1151 if ( idxLookup > cLookupEntries
1152 || ( idxLookup != 0
1153 && pVM->pgm.s.aRamRangeLookup[idxLookup - 1].GCPhysLast >= GCPhys)
1154 || ( idxLookup < cLookupEntries
1155 && PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]) < GCPhysLast))
1156 {
1157 PPGMRAMRANGE pOverlapping = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxLookup);
1158 AssertLogRelMsgReturn(!pOverlapping,
1159 ("GCPhys=%RGp; GCPhysLast=%RGp %s - overlaps %RGp...%RGp %s\n",
1160 GCPhys, GCPhysLast, pRam->pszDesc,
1161 pOverlapping->GCPhys, pOverlapping->GCPhysLast, pOverlapping->pszDesc),
1162 VERR_PGM_RAM_CONFLICT);
1163 AssertLogRelMsgReturn(idxLookup <= cLookupEntries, ("%#x vs %#x\n", idxLookup, cLookupEntries), VERR_INTERNAL_ERROR_5);
1164 }
1165 /* else we've got a good hint. */
1166 }
1167
1168 /*
1169 * Do the actual job.
1170 *
1171 * The moving of existing table entries is done in a way that allows other
1172 * EMTs to perform concurrent lookups with the updating.
1173 */
1174 bool const fUseAtomic = pVM->enmVMState != VMSTATE_CREATING
1175 && pVM->cCpus > 1
1176#ifdef RT_ARCH_AMD64
1177 && g_CpumHostFeatures.s.fCmpXchg16b
1178#endif
1179 ;
1180
1181 /* Signal that we're modifying the lookup table: */
1182 uint32_t const idGeneration = (pVM->pgm.s.RamRangeUnion.idGeneration + 1) | 1; /* paranoia^3 */
1183 ASMAtomicWriteU32(&pVM->pgm.s.RamRangeUnion.idGeneration, idGeneration);
1184
1185 /* Update the RAM range entry. */
1186 pRam->GCPhys = GCPhys;
1187 pRam->GCPhysLast = GCPhysLast;
1188
1189 /* Do we need to shift any lookup table entries? */
1190 if (idxLookup != cLookupEntries)
1191 {
1192 /* We do. Make a copy of the final entry first. */
1193 uint32_t cToMove = cLookupEntries - idxLookup;
1194 PGMRAMRANGELOOKUPENTRY *pCur = &pVM->pgm.s.aRamRangeLookup[cLookupEntries];
1195 pCur->GCPhysFirstAndId = pCur[-1].GCPhysFirstAndId;
1196 pCur->GCPhysLast = pCur[-1].GCPhysLast;
1197
1198 /* Then increase the table size. This will ensure that anyone starting
1199 a search from here on should have consistent data. */
1200 ASMAtomicWriteU32(&pVM->pgm.s.RamRangeUnion.cLookupEntries, cLookupEntries + 1);
1201
1202 /* Transfer the rest of the entries. */
1203 cToMove -= 1;
1204 if (cToMove > 0)
1205 {
1206 if (!fUseAtomic)
1207 do
1208 {
1209 pCur -= 1;
1210 pCur->GCPhysFirstAndId = pCur[-1].GCPhysFirstAndId;
1211 pCur->GCPhysLast = pCur[-1].GCPhysLast;
1212 cToMove -= 1;
1213 } while (cToMove > 0);
1214 else
1215 {
1216#if RTASM_HAVE_WRITE_U128 >= 2
1217 do
1218 {
1219 pCur -= 1;
1220 ASMAtomicWriteU128U(&pCur->u128Volatile, pCur[-1].u128Normal);
1221 cToMove -= 1;
1222 } while (cToMove > 0);
1223
1224#else
1225 uint64_t u64PrevLo = pCur[-1].u128Normal.s.Lo;
1226 uint64_t u64PrevHi = pCur[-1].u128Normal.s.Hi;
1227 do
1228 {
1229 pCur -= 1;
1230 uint64_t const u64CurLo = pCur[-1].u128Normal.s.Lo;
1231 uint64_t const u64CurHi = pCur[-1].u128Normal.s.Hi;
1232 uint128_t uOldIgn;
1233 AssertStmt(ASMAtomicCmpXchgU128v2(&pCur->u128Volatile.u, u64CurHi, u64CurLo, u64PrevHi, u64PrevLo, &uOldIgn),
1234 (pCur->u128Volatile.s.Lo = u64CurLo, pCur->u128Volatile.s.Hi = u64CurHi));
1235 u64PrevLo = u64CurLo;
1236 u64PrevHi = u64CurHi;
1237 cToMove -= 1;
1238 } while (cToMove > 0);
1239#endif
1240 }
1241 }
1242 }
1243
1244 /*
1245 * Write the new entry.
1246 */
1247 PGMRAMRANGELOOKUPENTRY *pInsert = &pVM->pgm.s.aRamRangeLookup[idxLookup];
1248 if (!fUseAtomic)
1249 {
1250 pInsert->GCPhysFirstAndId = idRamRange | GCPhys;
1251 pInsert->GCPhysLast = GCPhysLast;
1252 }
1253 else
1254 {
1255 PGMRAMRANGELOOKUPENTRY NewEntry;
1256 NewEntry.GCPhysFirstAndId = idRamRange | GCPhys;
1257 NewEntry.GCPhysLast = GCPhysLast;
1258 ASMAtomicWriteU128v2(&pInsert->u128Volatile.u, NewEntry.u128Normal.s.Hi, NewEntry.u128Normal.s.Lo);
1259 }
1260
1261 /*
1262 * Update the generation and count in one go, signaling the end of the updating.
1263 */
1264 PGM::PGMRAMRANGEGENANDLOOKUPCOUNT GenAndCount;
1265 GenAndCount.cLookupEntries = cLookupEntries + 1;
1266 GenAndCount.idGeneration = idGeneration + 1;
1267 ASMAtomicWriteU64(&pVM->pgm.s.RamRangeUnion.u64Combined, GenAndCount.u64Combined);
1268
1269 if (pidxLookup)
1270 *pidxLookup = idxLookup + 1;
1271
1272#ifdef DEBUG_bird
1273 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
1274#endif
1275 return VINF_SUCCESS;
1276}
1277
1278
1279/**
1280 * Removes @a pRam from the lookup table.
1281 *
1282 * @returns VBox status code.
1283 * @param pVM The cross context VM structure.
1284 * @param pRam The RAM range to insert into the lookup table.
1285 * @param pidxLookup Optional lookup table hint. This is updated.
1286 * @note Caller must own PGM lock.
1287 */
1288static int pgmR3PhysRamRangeRemoveLookup(PVM pVM, PPGMRAMRANGE pRam, uint32_t *pidxLookup)
1289{
1290 PGM_LOCK_ASSERT_OWNER(pVM);
1291 AssertMsg(pRam->pszDesc, ("%RGp-%RGp\n", pRam->GCPhys, pRam->GCPhysLast));
1292
1293 RTGCPHYS const GCPhys = pRam->GCPhys;
1294 RTGCPHYS const GCPhysLast = pRam->GCPhysLast;
1295 AssertLogRelMsgReturn( GCPhys != NIL_RTGCPHYS
1296 || GCPhysLast != NIL_RTGCPHYS,
1297 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n", GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1298 VERR_NOT_FOUND);
1299 AssertLogRelMsgReturn( GCPhys != NIL_RTGCPHYS
1300 && GCPhysLast == GCPhys + pRam->cb - 1U
1301 && (GCPhys & GUEST_PAGE_OFFSET_MASK) == 0
1302 && (GCPhysLast & GUEST_PAGE_OFFSET_MASK) == GUEST_PAGE_OFFSET_MASK
1303 && GCPhysLast > GCPhys,
1304 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n", GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1305 VERR_INTERNAL_ERROR_5);
1306 uint32_t const idRamRange = pRam->idRange;
1307 AssertReturn(pVM->pgm.s.apRamRanges[idRamRange] == pRam, VERR_INTERNAL_ERROR_4);
1308 LogFlowFunc(("GCPhys=%RGp LB %RGp GCPhysLast=%RGp id=%#x %s\n", GCPhys, pRam->cb, GCPhysLast, idRamRange, pRam->pszDesc));
1309
1310 /*
1311 * Find the lookup table location.
1312 */
1313 uint32_t const cLookupEntries = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1314 AssertLogRelMsgReturn( cLookupEntries > 0
1315 && cLookupEntries < RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup), /* id=0 is unused, so < is correct. */
1316 ("%#x\n", cLookupEntries), VERR_INTERNAL_ERROR_3);
1317
1318 uint32_t idxLookup = pidxLookup ? *pidxLookup : UINT32_MAX;
1319 if ( idxLookup >= cLookupEntries
1320 || pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast != GCPhysLast
1321 || pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysFirstAndId != (GCPhys | idRamRange))
1322 {
1323 uint32_t iStart = 0;
1324 uint32_t iEnd = cLookupEntries;
1325 for (;;)
1326 {
1327 idxLookup = iStart + (iEnd - iStart) / 2;
1328 RTGCPHYS const GCPhysEntryFirst = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1329 if (GCPhysLast < GCPhysEntryFirst)
1330 {
1331 AssertLogRelMsgReturn(idxLookup > iStart,
1332 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n",
1333 GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1334 VERR_NOT_FOUND);
1335 iEnd = idxLookup;
1336 }
1337 else
1338 {
1339 RTGCPHYS const GCPhysEntryLast = pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast;
1340 if (GCPhys > GCPhysEntryLast)
1341 {
1342 idxLookup += 1;
1343 AssertLogRelMsgReturn(idxLookup < iEnd,
1344 ("range: GCPhys=%RGp LB %RGp GCPhysLast=%RGp %s\n",
1345 GCPhys, pRam->cb, GCPhysLast, pRam->pszDesc),
1346 VERR_NOT_FOUND);
1347 iStart = idxLookup;
1348 }
1349 else
1350 {
1351 uint32_t const idEntry = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1352 AssertLogRelMsgReturn( GCPhysEntryFirst == GCPhys
1353 && GCPhysEntryLast == GCPhysLast
1354 && idEntry == idRamRange,
1355 ("Found: %RGp..%RGp id=%#x; Wanted: GCPhys=%RGp LB %RGp GCPhysLast=%RGp id=%#x %s\n",
1356 GCPhysEntryFirst, GCPhysEntryLast, idEntry,
1357 GCPhys, pRam->cb, GCPhysLast, pRam->idRange, pRam->pszDesc),
1358 VERR_NOT_FOUND);
1359 break;
1360 }
1361 }
1362 }
1363 }
1364 /* else we've got a good hint. */
1365
1366 /*
1367 * Do the actual job.
1368 *
1369 * The moving of existing table entries is done in a way that allows other
1370 * EMTs to perform concurrent lookups with the updating.
1371 */
1372 bool const fUseAtomic = pVM->enmVMState != VMSTATE_CREATING
1373 && pVM->cCpus > 1
1374#ifdef RT_ARCH_AMD64
1375 && g_CpumHostFeatures.s.fCmpXchg16b
1376#endif
1377 ;
1378
1379 /* Signal that we're modifying the lookup table: */
1380 uint32_t const idGeneration = (pVM->pgm.s.RamRangeUnion.idGeneration + 1) | 1; /* paranoia^3 */
1381 ASMAtomicWriteU32(&pVM->pgm.s.RamRangeUnion.idGeneration, idGeneration);
1382
1383 /* Do we need to shift any lookup table entries? (This is a lot simpler
1384 than insertion.) */
1385 if (idxLookup + 1U < cLookupEntries)
1386 {
1387 uint32_t cToMove = cLookupEntries - idxLookup - 1U;
1388 PGMRAMRANGELOOKUPENTRY *pCur = &pVM->pgm.s.aRamRangeLookup[idxLookup];
1389 if (!fUseAtomic)
1390 do
1391 {
1392 pCur->GCPhysFirstAndId = pCur[1].GCPhysFirstAndId;
1393 pCur->GCPhysLast = pCur[1].GCPhysLast;
1394 pCur += 1;
1395 cToMove -= 1;
1396 } while (cToMove > 0);
1397 else
1398 {
1399#if RTASM_HAVE_WRITE_U128 >= 2
1400 do
1401 {
1402 ASMAtomicWriteU128U(&pCur->u128Volatile, pCur[1].u128Normal);
1403 pCur += 1;
1404 cToMove -= 1;
1405 } while (cToMove > 0);
1406
1407#else
1408 uint64_t u64PrevLo = pCur->u128Normal.s.Lo;
1409 uint64_t u64PrevHi = pCur->u128Normal.s.Hi;
1410 do
1411 {
1412 uint64_t const u64CurLo = pCur[1].u128Normal.s.Lo;
1413 uint64_t const u64CurHi = pCur[1].u128Normal.s.Hi;
1414 uint128_t uOldIgn;
1415 AssertStmt(ASMAtomicCmpXchgU128v2(&pCur->u128Volatile.u, u64CurHi, u64CurLo, u64PrevHi, u64PrevLo, &uOldIgn),
1416 (pCur->u128Volatile.s.Lo = u64CurLo, pCur->u128Volatile.s.Hi = u64CurHi));
1417 u64PrevLo = u64CurLo;
1418 u64PrevHi = u64CurHi;
1419 pCur += 1;
1420 cToMove -= 1;
1421 } while (cToMove > 0);
1422#endif
1423 }
1424 }
1425
1426 /* Update the RAM range entry to indicate that it is no longer mapped.
1427 The GCPhys member is accessed by the lockless TLB lookup code, so update
1428 it last and atomically to be on the safe side. */
1429 pRam->GCPhysLast = NIL_RTGCPHYS;
1430 ASMAtomicWriteU64(&pRam->GCPhys, NIL_RTGCPHYS);
1431
1432 /*
1433 * Update the generation and count in one go, signaling the end of the updating.
1434 */
1435 PGM::PGMRAMRANGEGENANDLOOKUPCOUNT GenAndCount;
1436 GenAndCount.cLookupEntries = cLookupEntries - 1;
1437 GenAndCount.idGeneration = idGeneration + 1;
1438 ASMAtomicWriteU64(&pVM->pgm.s.RamRangeUnion.u64Combined, GenAndCount.u64Combined);
1439
1440 if (pidxLookup)
1441 *pidxLookup = idxLookup + 1;
1442
1443 return VINF_SUCCESS;
1444}
1445
1446
1447/**
1448 * Gets the number of ram ranges.
1449 *
1450 * @returns Number of ram ranges. Returns UINT32_MAX if @a pVM is invalid.
1451 * @param pVM The cross context VM structure.
1452 */
1453VMMR3DECL(uint32_t) PGMR3PhysGetRamRangeCount(PVM pVM)
1454{
1455 VM_ASSERT_VALID_EXT_RETURN(pVM, UINT32_MAX);
1456
1457 PGM_LOCK_VOID(pVM);
1458 uint32_t const cRamRanges = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
1459 PGM_UNLOCK(pVM);
1460 return cRamRanges;
1461}
1462
1463
1464/**
1465 * Get information about a range.
1466 *
1467 * @returns VINF_SUCCESS or VERR_OUT_OF_RANGE.
1468 * @param pVM The cross context VM structure.
1469 * @param iRange The ordinal of the range.
1470 * @param pGCPhysStart Where to return the start of the range. Optional.
1471 * @param pGCPhysLast Where to return the address of the last byte in the
1472 * range. Optional.
1473 * @param ppszDesc Where to return the range description. Optional.
1474 * @param pfIsMmio Where to indicate that this is a pure MMIO range.
1475 * Optional.
1476 */
1477VMMR3DECL(int) PGMR3PhysGetRange(PVM pVM, uint32_t iRange, PRTGCPHYS pGCPhysStart, PRTGCPHYS pGCPhysLast,
1478 const char **ppszDesc, bool *pfIsMmio)
1479{
1480 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1481
1482 PGM_LOCK_VOID(pVM);
1483 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
1484 if (iRange < cLookupEntries)
1485 {
1486 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[iRange]);
1487 Assert(idRamRange && idRamRange <= pVM->pgm.s.idRamRangeMax);
1488 PGMRAMRANGE const * const pRamRange = pVM->pgm.s.apRamRanges[idRamRange];
1489 AssertPtr(pRamRange);
1490
1491 if (pGCPhysStart)
1492 *pGCPhysStart = pRamRange->GCPhys;
1493 if (pGCPhysLast)
1494 *pGCPhysLast = pRamRange->GCPhysLast;
1495 if (ppszDesc)
1496 *ppszDesc = pRamRange->pszDesc;
1497 if (pfIsMmio)
1498 *pfIsMmio = !!(pRamRange->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO);
1499
1500 PGM_UNLOCK(pVM);
1501 return VINF_SUCCESS;
1502 }
1503 PGM_UNLOCK(pVM);
1504 return VERR_OUT_OF_RANGE;
1505}
1506
1507
1508/**
1509 * Gets RAM ranges that are supposed to be zero'ed at boot.
1510 *
1511 * This function gets all RAM ranges that are not ad hoc (ROM, MMIO, MMIO2) memory.
1512 * The RAM hole (if any) is -NOT- included because we don't return 0s when it is
1513 * read anyway.
1514 *
1515 * @returns VBox status code.
1516 * @param pVM The cross context VM structure.
1517 * @param pRanges Where to store the physical RAM ranges.
1518 * @param cMaxRanges The maximum ranges that can be stored.
1519 */
1520VMMR3_INT_DECL(int) PGMR3PhysGetRamBootZeroedRanges(PVM pVM, PPGMPHYSRANGES pRanges, uint32_t cMaxRanges)
1521{
1522 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1523 AssertPtrReturn(pRanges, VERR_INVALID_PARAMETER);
1524 AssertReturn(cMaxRanges > 0, VERR_INVALID_PARAMETER);
1525
1526 int rc = VINF_SUCCESS;
1527 uint32_t idxRange = 0;
1528 PGM_LOCK_VOID(pVM);
1529
1530 /*
1531 * The primary purpose of this API is the GIM Hyper-V hypercall which recommends (not
1532 * requires) that the largest ranges are reported earlier. Therefore, here we iterate
1533 * the ranges in reverse because in PGM the largest range is generally at the end.
1534 */
1535 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
1536 for (int32_t idxLookup = cLookupEntries - 1; idxLookup >= 0; idxLookup--)
1537 {
1538 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1539 Assert(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
1540 PPGMRAMRANGE const pCur = pVM->pgm.s.apRamRanges[idRamRange];
1541 AssertContinue(pCur);
1542
1543 if (!PGM_RAM_RANGE_IS_AD_HOC(pCur))
1544 {
1545 if (idxRange < cMaxRanges)
1546 {
1547 /* Combine with previous range if it is contiguous, otherwise add it as a new range. */
1548 if ( idxRange > 0
1549 && pRanges->aRanges[idxRange - 1].GCPhysStart == pCur->GCPhysLast + 1U)
1550 {
1551 pRanges->aRanges[idxRange - 1].GCPhysStart = pCur->GCPhys;
1552 pRanges->aRanges[idxRange - 1].cPages += (pCur->cb >> GUEST_PAGE_SHIFT);
1553 }
1554 else
1555 {
1556 pRanges->aRanges[idxRange].GCPhysStart = pCur->GCPhys;
1557 pRanges->aRanges[idxRange].cPages = pCur->cb >> GUEST_PAGE_SHIFT;
1558 ++idxRange;
1559 }
1560 }
1561 else
1562 {
1563 rc = VERR_BUFFER_OVERFLOW;
1564 break;
1565 }
1566 }
1567 }
1568 pRanges->cRanges = idxRange;
1569 PGM_UNLOCK(pVM);
1570 return rc;
1571}
1572
1573
1574/*********************************************************************************************************************************
1575* RAM *
1576*********************************************************************************************************************************/
1577
1578/**
1579 * Frees the specified RAM page and replaces it with the ZERO page.
1580 *
1581 * This is used by ballooning, remapping MMIO2, RAM reset and state loading.
1582 *
1583 * @param pVM The cross context VM structure.
1584 * @param pReq Pointer to the request. This is NULL when doing a
1585 * bulk free in NEM memory mode.
1586 * @param pcPendingPages Where the number of pages waiting to be freed are
1587 * kept. This will normally be incremented. This is
1588 * NULL when doing a bulk free in NEM memory mode.
1589 * @param pPage Pointer to the page structure.
1590 * @param GCPhys The guest physical address of the page, if applicable.
1591 * @param enmNewType New page type for NEM notification, since several
1592 * callers will change the type upon successful return.
1593 *
1594 * @remarks The caller must own the PGM lock.
1595 */
1596int pgmPhysFreePage(PVM pVM, PGMMFREEPAGESREQ pReq, uint32_t *pcPendingPages, PPGMPAGE pPage, RTGCPHYS GCPhys,
1597 PGMPAGETYPE enmNewType)
1598{
1599 RT_NOREF(enmNewType, pcPendingPages);
1600
1601 /*
1602 * Assert sanity.
1603 */
1604 PGM_LOCK_ASSERT_OWNER(pVM);
1605 if (RT_UNLIKELY( PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM
1606 && PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_ROM_SHADOW))
1607 {
1608 AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage));
1609 return VMSetError(pVM, VERR_PGM_PHYS_NOT_RAM, RT_SRC_POS, "GCPhys=%RGp type=%d", GCPhys, PGM_PAGE_GET_TYPE(pPage));
1610 }
1611
1612 /** @todo What about ballooning of large pages??! */
1613 Assert( PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE
1614 && PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE_DISABLED);
1615
1616 if ( PGM_PAGE_IS_ZERO(pPage)
1617 || PGM_PAGE_IS_BALLOONED(pPage))
1618 return VINF_SUCCESS;
1619
1620 const uint32_t idPage = PGM_PAGE_GET_PAGEID(pPage);
1621 Log3(("pgmPhysFreePage: idPage=%#x GCPhys=%RGp pPage=%R[pgmpage]\n", idPage, GCPhys, pPage));
1622 if (RT_UNLIKELY(!PGM_IS_IN_NEM_MODE(pVM)
1623 ? idPage == NIL_GMM_PAGEID
1624 || idPage > GMM_PAGEID_LAST
1625 || PGM_PAGE_GET_CHUNKID(pPage) == NIL_GMM_CHUNKID
1626 : idPage != NIL_GMM_PAGEID))
1627 {
1628 AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage));
1629 return VMSetError(pVM, VERR_PGM_PHYS_INVALID_PAGE_ID, RT_SRC_POS, "GCPhys=%RGp idPage=%#x", GCPhys, pPage);
1630 }
1631#if defined(VBOX_WITH_NATIVE_NEM) && !defined(VBOX_WITH_ONLY_PGM_NEM_MODE)
1632 const RTHCPHYS HCPhysPrev = PGM_PAGE_GET_HCPHYS(pPage);
1633#endif
1634
1635 /* update page count stats. */
1636 if (PGM_PAGE_IS_SHARED(pPage))
1637 pVM->pgm.s.cSharedPages--;
1638 else
1639 pVM->pgm.s.cPrivatePages--;
1640 pVM->pgm.s.cZeroPages++;
1641
1642 /* Deal with write monitored pages. */
1643 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED)
1644 {
1645 PGM_PAGE_SET_WRITTEN_TO(pVM, pPage);
1646 pVM->pgm.s.cWrittenToPages++;
1647 }
1648 PGM_PAGE_CLEAR_CODE_PAGE(pVM, pPage); /* No callback needed, IEMTlbInvalidateAllPhysicalAllCpus is called below. */
1649
1650 /*
1651 * pPage = ZERO page.
1652 */
1653 PGM_PAGE_SET_HCPHYS(pVM, pPage, pVM->pgm.s.HCPhysZeroPg);
1654 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
1655 PGM_PAGE_SET_PAGEID(pVM, pPage, NIL_GMM_PAGEID);
1656 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_DONTCARE);
1657 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0);
1658 PGM_PAGE_SET_TRACKING(pVM, pPage, 0);
1659
1660 /* Flush physical page map TLB entry. */
1661 pgmPhysInvalidatePageMapTLBEntry(pVM, GCPhys);
1662 IEMTlbInvalidateAllPhysicalAllCpus(pVM, NIL_VMCPUID, IEMTLBPHYSFLUSHREASON_FREED); /// @todo move to the perform step.
1663
1664#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1665# ifdef VBOX_WITH_PGM_NEM_MODE
1666 /*
1667 * Skip the rest if we're doing a bulk free in NEM memory mode.
1668 */
1669 if (!pReq)
1670 return VINF_SUCCESS;
1671 AssertLogRelReturn(!pVM->pgm.s.fNemMode, VERR_PGM_NOT_SUPPORTED_FOR_NEM_MODE);
1672# endif
1673
1674# ifdef VBOX_WITH_NATIVE_NEM
1675 /* Notify NEM. */
1676 /** @todo Remove this one? */
1677 if (VM_IS_NEM_ENABLED(pVM))
1678 {
1679 uint8_t u2State = PGM_PAGE_GET_NEM_STATE(pPage);
1680 NEMHCNotifyPhysPageChanged(pVM, GCPhys, HCPhysPrev, pVM->pgm.s.HCPhysZeroPg, pVM->pgm.s.abZeroPg,
1681 pgmPhysPageCalcNemProtection(pPage, enmNewType), enmNewType, &u2State);
1682 PGM_PAGE_SET_NEM_STATE(pPage, u2State);
1683 }
1684# endif
1685
1686 /*
1687 * Make sure it's not in the handy page array.
1688 */
1689 for (uint32_t i = pVM->pgm.s.cHandyPages; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
1690 {
1691 if (pVM->pgm.s.aHandyPages[i].idPage == idPage)
1692 {
1693 pVM->pgm.s.aHandyPages[i].HCPhysGCPhys = NIL_GMMPAGEDESC_PHYS;
1694 pVM->pgm.s.aHandyPages[i].fZeroed = false;
1695 pVM->pgm.s.aHandyPages[i].idPage = NIL_GMM_PAGEID;
1696 break;
1697 }
1698 if (pVM->pgm.s.aHandyPages[i].idSharedPage == idPage)
1699 {
1700 pVM->pgm.s.aHandyPages[i].idSharedPage = NIL_GMM_PAGEID;
1701 break;
1702 }
1703 }
1704
1705 /*
1706 * Push it onto the page array.
1707 */
1708 uint32_t iPage = *pcPendingPages;
1709 Assert(iPage < PGMPHYS_FREE_PAGE_BATCH_SIZE);
1710 *pcPendingPages += 1;
1711
1712 pReq->aPages[iPage].idPage = idPage;
1713
1714 if (iPage + 1 < PGMPHYS_FREE_PAGE_BATCH_SIZE)
1715 return VINF_SUCCESS;
1716
1717 /*
1718 * Flush the pages.
1719 */
1720 int rc = GMMR3FreePagesPerform(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE);
1721 if (RT_SUCCESS(rc))
1722 {
1723 GMMR3FreePagesRePrep(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
1724 *pcPendingPages = 0;
1725 }
1726 return rc;
1727
1728#else /* VBOX_WITH_ONLY_PGM_NEM_MODE */
1729 Assert(!pReq); RT_NOREF(pReq);
1730 return VINF_SUCCESS;
1731#endif /* VBOX_WITH_ONLY_PGM_NEM_MODE */
1732}
1733
1734
1735/**
1736 * Frees a range of pages, replacing them with MMIO ZERO pages.
1737 *
1738 * @returns VBox status code.
1739 * @param pVM The cross context VM structure.
1740 * @param pRam The RAM range in which the pages resides.
1741 * @param GCPhys The address of the first page.
1742 * @param GCPhysLast The address of the last page.
1743 * @param pvMmio2 Pointer to the ring-3 mapping of any MMIO2 memory that
1744 * will replace the pages we're freeing up.
1745 */
1746static int pgmR3PhysFreePageRange(PVM pVM, PPGMRAMRANGE pRam, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast, void *pvMmio2)
1747{
1748 PGM_LOCK_ASSERT_OWNER(pVM);
1749 /** @todo pvMmio2 is always NULL. */
1750
1751#ifdef VBOX_WITH_PGM_NEM_MODE
1752 /*
1753 * In simplified memory mode we don't actually free the memory,
1754 * we just unmap it and let NEM do any unlocking of it.
1755 */
1756# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1757 if (pVM->pgm.s.fNemMode)
1758# endif
1759 {
1760 Assert(VM_IS_NEM_ENABLED(pVM) || VM_IS_EXEC_ENGINE_IEM(pVM));
1761 uint8_t u2State = 0; /* (We don't support UINT8_MAX here.) */
1762 if (VM_IS_NEM_ENABLED(pVM))
1763 {
1764 uint32_t const fNemNotify = (pvMmio2 ? NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2 : 0) | NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE;
1765 int rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhys, GCPhysLast - GCPhys + 1, fNemNotify,
1766 pRam->pbR3 ? pRam->pbR3 + GCPhys - pRam->GCPhys : NULL,
1767 pvMmio2, &u2State, NULL /*puNemRange*/);
1768 AssertLogRelRCReturn(rc, rc);
1769 }
1770
1771 /* Iterate the pages. */
1772 PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT];
1773 uint32_t cPagesLeft = ((GCPhysLast - GCPhys) >> GUEST_PAGE_SHIFT) + 1;
1774 while (cPagesLeft-- > 0)
1775 {
1776 int rc = pgmPhysFreePage(pVM, NULL, NULL, pPageDst, GCPhys, PGMPAGETYPE_MMIO);
1777 AssertLogRelRCReturn(rc, rc); /* We're done for if this goes wrong. */
1778
1779 PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO);
1780 PGM_PAGE_SET_NEM_STATE(pPageDst, u2State);
1781
1782 GCPhys += GUEST_PAGE_SIZE;
1783 pPageDst++;
1784 }
1785 return VINF_SUCCESS;
1786 }
1787#else /* !VBOX_WITH_PGM_NEM_MODE */
1788 RT_NOREF(pvMmio2);
1789#endif /* !VBOX_WITH_PGM_NEM_MODE */
1790#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1791
1792 /*
1793 * Regular mode.
1794 */
1795 /* Prepare. */
1796 uint32_t cPendingPages = 0;
1797 PGMMFREEPAGESREQ pReq;
1798 int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
1799 AssertLogRelRCReturn(rc, rc);
1800
1801# ifdef VBOX_WITH_NATIVE_NEM
1802 /* Tell NEM up-front. */
1803 uint8_t u2State = UINT8_MAX;
1804 if (VM_IS_NEM_ENABLED(pVM))
1805 {
1806 uint32_t const fNemNotify = (pvMmio2 ? NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2 : 0) | NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE;
1807 rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhys, GCPhysLast - GCPhys + 1, fNemNotify, NULL, pvMmio2,
1808 &u2State, NULL /*puNemRange*/);
1809 AssertLogRelRCReturnStmt(rc, GMMR3FreePagesCleanup(pReq), rc);
1810 }
1811# endif
1812
1813 /* Iterate the pages. */
1814 PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT];
1815 uint32_t cPagesLeft = ((GCPhysLast - GCPhys) >> GUEST_PAGE_SHIFT) + 1;
1816 while (cPagesLeft-- > 0)
1817 {
1818 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPageDst, GCPhys, PGMPAGETYPE_MMIO);
1819 AssertLogRelRCReturn(rc, rc); /* We're done for if this goes wrong. */
1820
1821 PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO);
1822# ifdef VBOX_WITH_NATIVE_NEM
1823 if (u2State != UINT8_MAX)
1824 PGM_PAGE_SET_NEM_STATE(pPageDst, u2State);
1825# endif
1826
1827 GCPhys += GUEST_PAGE_SIZE;
1828 pPageDst++;
1829 }
1830
1831 /* Finish pending and cleanup. */
1832 if (cPendingPages)
1833 {
1834 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
1835 AssertLogRelRCReturn(rc, rc);
1836 }
1837 GMMR3FreePagesCleanup(pReq);
1838
1839 return rc;
1840#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
1841}
1842
1843
1844/**
1845 * Wrapper around VMMR0_DO_PGM_PHYS_ALLOCATE_RAM_RANGE.
1846 */
1847static int pgmR3PhysAllocateRamRange(PVM pVM, PVMCPU pVCpu, uint32_t cGuestPages, uint32_t fFlags, PPGMRAMRANGE *ppRamRange)
1848{
1849 int rc;
1850 PGMPHYSALLOCATERAMRANGEREQ AllocRangeReq;
1851 AllocRangeReq.idNewRange = UINT32_MAX / 4;
1852#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
1853 if (!SUPR3IsDriverless())
1854 {
1855 AllocRangeReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
1856 AllocRangeReq.Hdr.cbReq = sizeof(AllocRangeReq);
1857 AllocRangeReq.cbGuestPage = GUEST_PAGE_SIZE;
1858 AllocRangeReq.cGuestPages = cGuestPages;
1859 AllocRangeReq.fFlags = fFlags;
1860 rc = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_ALLOCATE_RAM_RANGE, 0 /*u64Arg*/, &AllocRangeReq.Hdr);
1861 }
1862 else
1863#endif
1864 rc = pgmPhysRamRangeAllocCommon(pVM, cGuestPages, fFlags, &AllocRangeReq.idNewRange);
1865 if (RT_SUCCESS(rc))
1866 {
1867 Assert(AllocRangeReq.idNewRange != 0);
1868 Assert(AllocRangeReq.idNewRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
1869 AssertPtr(pVM->pgm.s.apRamRanges[AllocRangeReq.idNewRange]);
1870 *ppRamRange = pVM->pgm.s.apRamRanges[AllocRangeReq.idNewRange];
1871 return VINF_SUCCESS;
1872 }
1873
1874 RT_NOREF(pVCpu);
1875 *ppRamRange = NULL;
1876 return rc;
1877}
1878
1879
1880/**
1881 * PGMR3PhysRegisterRam worker that initializes and links a RAM range.
1882 *
1883 * In NEM mode, this will allocate the pages backing the RAM range and this may
1884 * fail. NEM registration may also fail. (In regular HM mode it won't fail.)
1885 *
1886 * @returns VBox status code.
1887 * @param pVM The cross context VM structure.
1888 * @param pNew The new RAM range.
1889 * @param GCPhys The address of the RAM range.
1890 * @param GCPhysLast The last address of the RAM range.
1891 * @param pszDesc The description.
1892 * @param pidxLookup The lookup table insertion point.
1893 */
1894static int pgmR3PhysInitAndLinkRamRange(PVM pVM, PPGMRAMRANGE pNew, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast,
1895 const char *pszDesc, uint32_t *pidxLookup)
1896{
1897 /*
1898 * Initialize the range.
1899 */
1900 Assert(pNew->cb == GCPhysLast - GCPhys + 1U); RT_NOREF(GCPhysLast);
1901 pNew->pszDesc = pszDesc;
1902 pNew->uNemRange = UINT32_MAX;
1903 pNew->pbR3 = NULL;
1904 pNew->paLSPages = NULL;
1905
1906 uint32_t const cPages = pNew->cb >> GUEST_PAGE_SHIFT;
1907#ifdef VBOX_WITH_PGM_NEM_MODE
1908 if (PGM_IS_IN_NEM_MODE(pVM))
1909 {
1910 int rc = SUPR3PageAlloc(RT_ALIGN_Z(pNew->cb, HOST_PAGE_SIZE) >> HOST_PAGE_SHIFT,
1911 pVM->pgm.s.fUseLargePages ? SUP_PAGE_ALLOC_F_LARGE_PAGES : 0, (void **)&pNew->pbR3);
1912 if (RT_FAILURE(rc))
1913 return rc;
1914
1915 RTGCPHYS iPage = cPages;
1916 while (iPage-- > 0)
1917 PGM_PAGE_INIT(&pNew->aPages[iPage], UINT64_C(0x0000fffffffff000), NIL_GMM_PAGEID,
1918 PGMPAGETYPE_RAM, PGM_PAGE_STATE_ALLOCATED);
1919
1920 /* Update the page count stats. */
1921 pVM->pgm.s.cPrivatePages += cPages;
1922 pVM->pgm.s.cAllPages += cPages;
1923 }
1924 else
1925#endif
1926 {
1927#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
1928 RTGCPHYS iPage = cPages;
1929 while (iPage-- > 0)
1930 PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_RAM);
1931
1932 /* Update the page count stats. */
1933 pVM->pgm.s.cZeroPages += cPages;
1934 pVM->pgm.s.cAllPages += cPages;
1935#endif
1936 }
1937
1938 /*
1939 * Insert it into the lookup table.
1940 */
1941 int rc = pgmR3PhysRamRangeInsertLookup(pVM, pNew, GCPhys, pidxLookup);
1942 AssertRCReturn(rc, rc);
1943
1944#ifdef VBOX_WITH_NATIVE_NEM
1945 /*
1946 * Notify NEM now that it has been linked.
1947 *
1948 * As above, it is assumed that on failure the VM creation will fail, so
1949 * no extra cleanup is needed here.
1950 */
1951 if (VM_IS_NEM_ENABLED(pVM))
1952 {
1953 uint8_t u2State = UINT8_MAX;
1954 rc = NEMR3NotifyPhysRamRegister(pVM, GCPhys, pNew->cb, pNew->pbR3, &u2State, &pNew->uNemRange);
1955 if (RT_SUCCESS(rc) && u2State != UINT8_MAX)
1956 pgmPhysSetNemStateForPages(&pNew->aPages[0], cPages, u2State);
1957 return rc;
1958 }
1959#endif
1960 return VINF_SUCCESS;
1961}
1962
1963
1964/**
1965 * Worker for PGMR3PhysRegisterRam called with the PGM lock.
1966 *
1967 * The caller releases the lock.
1968 */
1969static int pgmR3PhysRegisterRamWorker(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, RTGCPHYS cb, const char *pszDesc,
1970 uint32_t const cRamRanges, RTGCPHYS const GCPhysLast)
1971{
1972#ifdef VBOX_STRICT
1973 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
1974#endif
1975
1976 /*
1977 * Check that we've got enough free RAM ranges.
1978 */
1979 AssertLogRelMsgReturn((uint64_t)pVM->pgm.s.idRamRangeMax + cRamRanges + 1 <= RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup),
1980 ("idRamRangeMax=%#RX32 vs GCPhys=%RGp cb=%RGp / %#RX32 ranges (%s)\n",
1981 pVM->pgm.s.idRamRangeMax, GCPhys, cb, cRamRanges, pszDesc),
1982 VERR_PGM_TOO_MANY_RAM_RANGES);
1983
1984 /*
1985 * Check for conflicts via the lookup table. We search it backwards,
1986 * assuming that memory is added in ascending order by address.
1987 */
1988 uint32_t idxLookup = pVM->pgm.s.RamRangeUnion.cLookupEntries;
1989 while (idxLookup)
1990 {
1991 if (GCPhys > pVM->pgm.s.aRamRangeLookup[idxLookup - 1].GCPhysLast)
1992 break;
1993 idxLookup--;
1994 RTGCPHYS const GCPhysCur = PGMRAMRANGELOOKUPENTRY_GET_FIRST(pVM->pgm.s.aRamRangeLookup[idxLookup]);
1995 AssertLogRelMsgReturn( GCPhysLast < GCPhysCur
1996 || GCPhys > pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast,
1997 ("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n",
1998 GCPhys, GCPhysLast, pszDesc, GCPhysCur, pVM->pgm.s.aRamRangeLookup[idxLookup].GCPhysLast,
1999 pVM->pgm.s.apRamRanges[PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup])]->pszDesc),
2000 VERR_PGM_RAM_CONFLICT);
2001 }
2002
2003 /*
2004 * Register it with GMM (the API bitches).
2005 */
2006 const RTGCPHYS cPages = cb >> GUEST_PAGE_SHIFT;
2007 int rc = MMR3IncreaseBaseReservation(pVM, cPages);
2008 if (RT_FAILURE(rc))
2009 return rc;
2010
2011 /*
2012 * Create the required chunks.
2013 */
2014 RTGCPHYS cPagesLeft = cPages;
2015 RTGCPHYS GCPhysChunk = GCPhys;
2016 uint32_t idxChunk = 0;
2017 while (cPagesLeft > 0)
2018 {
2019 uint32_t cPagesInChunk = cPagesLeft;
2020 if (cPagesInChunk > PGM_MAX_PAGES_PER_RAM_RANGE)
2021 cPagesInChunk = PGM_MAX_PAGES_PER_RAM_RANGE;
2022
2023 const char *pszDescChunk = idxChunk == 0
2024 ? pszDesc
2025 : MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s (#%u)", pszDesc, idxChunk + 1);
2026 AssertReturn(pszDescChunk, VERR_NO_MEMORY);
2027
2028 /*
2029 * Allocate a RAM range.
2030 */
2031 PPGMRAMRANGE pNew = NULL;
2032 rc = pgmR3PhysAllocateRamRange(pVM, pVCpu, cPagesInChunk, 0 /*fFlags*/, &pNew);
2033 AssertLogRelMsgReturn(RT_SUCCESS(rc),
2034 ("pgmR3PhysAllocateRamRange failed: GCPhysChunk=%RGp cPagesInChunk=%#RX32 (%s): %Rrc\n",
2035 GCPhysChunk, cPagesInChunk, pszDescChunk, rc),
2036 rc);
2037
2038 /*
2039 * Ok, init and link the range.
2040 */
2041 rc = pgmR3PhysInitAndLinkRamRange(pVM, pNew, GCPhysChunk,
2042 GCPhysChunk + ((RTGCPHYS)cPagesInChunk << GUEST_PAGE_SHIFT) - 1U,
2043 pszDescChunk, &idxLookup);
2044 AssertLogRelMsgReturn(RT_SUCCESS(rc),
2045 ("pgmR3PhysInitAndLinkRamRange failed: GCPhysChunk=%RGp cPagesInChunk=%#RX32 (%s): %Rrc\n",
2046 GCPhysChunk, cPagesInChunk, pszDescChunk, rc),
2047 rc);
2048
2049 /* advance */
2050 GCPhysChunk += (RTGCPHYS)cPagesInChunk << GUEST_PAGE_SHIFT;
2051 cPagesLeft -= cPagesInChunk;
2052 idxChunk++;
2053 }
2054
2055 return rc;
2056}
2057
2058
2059/**
2060 * Sets up a range RAM.
2061 *
2062 * This will check for conflicting registrations, make a resource reservation
2063 * for the memory (with GMM), and setup the per-page tracking structures
2064 * (PGMPAGE).
2065 *
2066 * @returns VBox status code.
2067 * @param pVM The cross context VM structure.
2068 * @param GCPhys The physical address of the RAM.
2069 * @param cb The size of the RAM.
2070 * @param pszDesc The description - not copied, so, don't free or change it.
2071 */
2072VMMR3DECL(int) PGMR3PhysRegisterRam(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, const char *pszDesc)
2073{
2074 /*
2075 * Validate input.
2076 */
2077 Log(("PGMR3PhysRegisterRam: GCPhys=%RGp cb=%RGp pszDesc=%s\n", GCPhys, cb, pszDesc));
2078 AssertReturn(RT_ALIGN_T(GCPhys, GUEST_PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER);
2079 AssertReturn(RT_ALIGN_T(cb, GUEST_PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER);
2080 AssertReturn(cb > 0, VERR_INVALID_PARAMETER);
2081 RTGCPHYS const GCPhysLast = GCPhys + (cb - 1);
2082 AssertMsgReturn(GCPhysLast > GCPhys, ("The range wraps! GCPhys=%RGp cb=%RGp\n", GCPhys, cb), VERR_INVALID_PARAMETER);
2083 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
2084 PVMCPU const pVCpu = VMMGetCpu(pVM);
2085 AssertReturn(pVCpu, VERR_VM_THREAD_NOT_EMT);
2086 AssertReturn(pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
2087
2088 /*
2089 * Calculate the number of RAM ranges required.
2090 * See also pgmPhysMmio2CalcChunkCount.
2091 */
2092 uint32_t const cPagesPerChunk = PGM_MAX_PAGES_PER_RAM_RANGE;
2093 uint32_t const cRamRanges = (uint32_t)(((cb >> GUEST_PAGE_SHIFT) + cPagesPerChunk - 1) / cPagesPerChunk);
2094 AssertLogRelMsgReturn(cRamRanges * (RTGCPHYS)cPagesPerChunk * GUEST_PAGE_SIZE >= cb,
2095 ("cb=%RGp cRamRanges=%#RX32 cPagesPerChunk=%#RX32\n", cb, cRamRanges, cPagesPerChunk),
2096 VERR_OUT_OF_RANGE);
2097
2098 PGM_LOCK_VOID(pVM);
2099
2100 int rc = pgmR3PhysRegisterRamWorker(pVM, pVCpu, GCPhys, cb, pszDesc, cRamRanges, GCPhysLast);
2101#ifdef VBOX_STRICT
2102 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
2103#endif
2104
2105 PGM_UNLOCK(pVM);
2106 return rc;
2107}
2108
2109
2110#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
2111/**
2112 * Worker called by PGMR3InitFinalize if we're configured to pre-allocate RAM.
2113 *
2114 * We do this late in the init process so that all the ROM and MMIO ranges have
2115 * been registered already and we don't go wasting memory on them.
2116 *
2117 * @returns VBox status code.
2118 *
2119 * @param pVM The cross context VM structure.
2120 */
2121int pgmR3PhysRamPreAllocate(PVM pVM)
2122{
2123 Assert(pVM->pgm.s.fRamPreAlloc);
2124 Log(("pgmR3PhysRamPreAllocate: enter\n"));
2125# ifdef VBOX_WITH_PGM_NEM_MODE
2126 if (VM_IS_NEM_ENABLED(pVM))
2127 {
2128 LogRel(("PGM: Pre-alloc ignored in NEM mode.\n"));
2129 return VINF_SUCCESS;
2130 }
2131# endif
2132
2133 /*
2134 * Walk the RAM ranges and allocate all RAM pages, halt at
2135 * the first allocation error.
2136 */
2137 uint64_t cPages = 0;
2138 uint64_t NanoTS = RTTimeNanoTS();
2139 PGM_LOCK_VOID(pVM);
2140 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
2141 for (uint32_t idxLookup = 0; idxLookup < cLookupEntries; idxLookup++)
2142 {
2143 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
2144 AssertContinue(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
2145 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2146 AssertContinue(pRam);
2147
2148 PPGMPAGE pPage = &pRam->aPages[0];
2149 RTGCPHYS GCPhys = pRam->GCPhys;
2150 uint32_t cLeft = pRam->cb >> GUEST_PAGE_SHIFT;
2151 while (cLeft-- > 0)
2152 {
2153 if (PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2154 {
2155 switch (PGM_PAGE_GET_STATE(pPage))
2156 {
2157 case PGM_PAGE_STATE_ZERO:
2158 {
2159 int rc = pgmPhysAllocPage(pVM, pPage, GCPhys);
2160 if (RT_FAILURE(rc))
2161 {
2162 LogRel(("PGM: RAM Pre-allocation failed at %RGp (in %s) with rc=%Rrc\n", GCPhys, pRam->pszDesc, rc));
2163 PGM_UNLOCK(pVM);
2164 return rc;
2165 }
2166 cPages++;
2167 break;
2168 }
2169
2170 case PGM_PAGE_STATE_BALLOONED:
2171 case PGM_PAGE_STATE_ALLOCATED:
2172 case PGM_PAGE_STATE_WRITE_MONITORED:
2173 case PGM_PAGE_STATE_SHARED:
2174 /* nothing to do here. */
2175 break;
2176 }
2177 }
2178
2179 /* next */
2180 pPage++;
2181 GCPhys += GUEST_PAGE_SIZE;
2182 }
2183 }
2184 PGM_UNLOCK(pVM);
2185 NanoTS = RTTimeNanoTS() - NanoTS;
2186
2187 LogRel(("PGM: Pre-allocated %llu pages in %llu ms\n", cPages, NanoTS / 1000000));
2188 Log(("pgmR3PhysRamPreAllocate: returns VINF_SUCCESS\n"));
2189 return VINF_SUCCESS;
2190}
2191#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
2192
2193
2194/**
2195 * Checks shared page checksums.
2196 *
2197 * @param pVM The cross context VM structure.
2198 */
2199void pgmR3PhysAssertSharedPageChecksums(PVM pVM)
2200{
2201#ifdef VBOX_STRICT
2202 PGM_LOCK_VOID(pVM);
2203
2204 if (pVM->pgm.s.cSharedPages > 0)
2205 {
2206 /*
2207 * Walk the ram ranges.
2208 */
2209 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
2210 for (uint32_t idxLookup = 0; idxLookup < cLookupEntries; idxLookup++)
2211 {
2212 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
2213 AssertContinue(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
2214 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2215 AssertContinue(pRam);
2216
2217 uint32_t iPage = pRam->cb >> GUEST_PAGE_SHIFT;
2218 AssertMsg(((RTGCPHYS)iPage << GUEST_PAGE_SHIFT) == pRam->cb,
2219 ("%RGp %RGp\n", (RTGCPHYS)iPage << GUEST_PAGE_SHIFT, pRam->cb));
2220
2221 while (iPage-- > 0)
2222 {
2223 PPGMPAGE pPage = &pRam->aPages[iPage];
2224 if (PGM_PAGE_IS_SHARED(pPage))
2225 {
2226 uint32_t u32Checksum = pPage->s.u2Unused0/* | ((uint32_t)pPage->s.u2Unused1 << 8)*/;
2227 if (!u32Checksum)
2228 {
2229 RTGCPHYS GCPhysPage = pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT);
2230 void const *pvPage;
2231 int rc = pgmPhysPageMapReadOnly(pVM, pPage, GCPhysPage, &pvPage);
2232 if (RT_SUCCESS(rc))
2233 {
2234 uint32_t u32Checksum2 = RTCrc32(pvPage, GUEST_PAGE_SIZE);
2235# if 0
2236 AssertMsg((u32Checksum2 & /*UINT32_C(0x00000303)*/ 0x3) == u32Checksum, ("GCPhysPage=%RGp\n", GCPhysPage));
2237# else
2238 if ((u32Checksum2 & /*UINT32_C(0x00000303)*/ 0x3) == u32Checksum)
2239 LogFlow(("shpg %#x @ %RGp %#x [OK]\n", PGM_PAGE_GET_PAGEID(pPage), GCPhysPage, u32Checksum2));
2240 else
2241 AssertMsgFailed(("shpg %#x @ %RGp %#x\n", PGM_PAGE_GET_PAGEID(pPage), GCPhysPage, u32Checksum2));
2242# endif
2243 }
2244 else
2245 AssertRC(rc);
2246 }
2247 }
2248
2249 } /* for each page */
2250
2251 } /* for each ram range */
2252 }
2253
2254 PGM_UNLOCK(pVM);
2255#endif /* VBOX_STRICT */
2256 NOREF(pVM);
2257}
2258
2259
2260/**
2261 * Resets the physical memory state.
2262 *
2263 * ASSUMES that the caller owns the PGM lock.
2264 *
2265 * @returns VBox status code.
2266 * @param pVM The cross context VM structure.
2267 */
2268int pgmR3PhysRamReset(PVM pVM)
2269{
2270 PGM_LOCK_ASSERT_OWNER(pVM);
2271
2272#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
2273 /* Reset the memory balloon. */
2274 int rc1 = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0);
2275 AssertRC(rc1);
2276#endif
2277
2278#ifdef VBOX_WITH_PAGE_SHARING
2279 /* Clear all registered shared modules. */
2280 pgmR3PhysAssertSharedPageChecksums(pVM);
2281 int rc2 = GMMR3ResetSharedModules(pVM);
2282 AssertRC(rc2);
2283#endif
2284 /* Reset counters. */
2285 pVM->pgm.s.cReusedSharedPages = 0;
2286 pVM->pgm.s.cBalloonedPages = 0;
2287
2288 return VINF_SUCCESS;
2289}
2290
2291
2292/**
2293 * Resets (zeros) the RAM after all devices and components have been reset.
2294 *
2295 * ASSUMES that the caller owns the PGM lock.
2296 *
2297 * @returns VBox status code.
2298 * @param pVM The cross context VM structure.
2299 */
2300int pgmR3PhysRamZeroAll(PVM pVM)
2301{
2302 PGM_LOCK_ASSERT_OWNER(pVM);
2303
2304 /*
2305 * We batch up pages that should be freed instead of calling GMM for
2306 * each and every one of them.
2307 */
2308 uint32_t cPendingPages = 0;
2309 PGMMFREEPAGESREQ pReq;
2310 int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
2311 AssertLogRelRCReturn(rc, rc);
2312
2313 /*
2314 * Walk the ram ranges.
2315 */
2316 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
2317 for (uint32_t idRamRange = 0; idRamRange <= idRamRangeMax; idRamRange++)
2318 {
2319 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2320 Assert(pRam || idRamRange == 0);
2321 if (!pRam) continue;
2322 Assert(pRam->idRange == idRamRange);
2323
2324 uint32_t iPage = pRam->cb >> GUEST_PAGE_SHIFT;
2325 AssertMsg(((RTGCPHYS)iPage << GUEST_PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << GUEST_PAGE_SHIFT, pRam->cb));
2326
2327#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
2328 if ( !pVM->pgm.s.fRamPreAlloc
2329# ifdef VBOX_WITH_PGM_NEM_MODE
2330 && !pVM->pgm.s.fNemMode
2331# endif
2332 && pVM->pgm.s.fZeroRamPagesOnReset)
2333 {
2334 /* Replace all RAM pages by ZERO pages. */
2335 while (iPage-- > 0)
2336 {
2337 PPGMPAGE pPage = &pRam->aPages[iPage];
2338 switch (PGM_PAGE_GET_TYPE(pPage))
2339 {
2340 case PGMPAGETYPE_RAM:
2341 /* Do not replace pages part of a 2 MB continuous range
2342 with zero pages, but zero them instead. */
2343 if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE
2344 || PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
2345 {
2346 void *pvPage;
2347 rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), &pvPage);
2348 AssertLogRelRCReturn(rc, rc);
2349 RT_BZERO(pvPage, GUEST_PAGE_SIZE);
2350 }
2351 else if (PGM_PAGE_IS_BALLOONED(pPage))
2352 {
2353 /* Turn into a zero page; the balloon status is lost when the VM reboots. */
2354 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
2355 }
2356 else if (!PGM_PAGE_IS_ZERO(pPage))
2357 {
2358 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage,
2359 pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), PGMPAGETYPE_RAM);
2360 AssertLogRelRCReturn(rc, rc);
2361 }
2362 break;
2363
2364 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
2365 case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: /** @todo perhaps leave the special page alone? I don't think VT-x copes with this code. */
2366 pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT),
2367 pRam, true /*fDoAccounting*/, false /*fFlushIemTlbs*/);
2368 break;
2369
2370 case PGMPAGETYPE_MMIO2:
2371 case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */
2372 case PGMPAGETYPE_ROM:
2373 case PGMPAGETYPE_MMIO:
2374 break;
2375 default:
2376 AssertFailed();
2377 }
2378 } /* for each page */
2379 }
2380 else
2381#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
2382
2383 {
2384 /* Zero the memory. */
2385 while (iPage-- > 0)
2386 {
2387 PPGMPAGE pPage = &pRam->aPages[iPage];
2388 switch (PGM_PAGE_GET_TYPE(pPage))
2389 {
2390 case PGMPAGETYPE_RAM:
2391 switch (PGM_PAGE_GET_STATE(pPage))
2392 {
2393 case PGM_PAGE_STATE_ZERO:
2394 break;
2395
2396 case PGM_PAGE_STATE_BALLOONED:
2397 /* Turn into a zero page; the balloon status is lost when the VM reboots. */
2398 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
2399 break;
2400
2401 case PGM_PAGE_STATE_SHARED:
2402 case PGM_PAGE_STATE_WRITE_MONITORED:
2403 rc = pgmPhysPageMakeWritable(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT));
2404 AssertLogRelRCReturn(rc, rc);
2405 RT_FALL_THRU();
2406
2407 case PGM_PAGE_STATE_ALLOCATED:
2408 if (pVM->pgm.s.fZeroRamPagesOnReset)
2409 {
2410 void *pvPage;
2411 rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), &pvPage);
2412 AssertLogRelRCReturn(rc, rc);
2413 RT_BZERO(pvPage, GUEST_PAGE_SIZE);
2414 }
2415 break;
2416 }
2417 break;
2418
2419 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
2420 case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: /** @todo perhaps leave the special page alone? I don't think VT-x copes with this code. */
2421 pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT),
2422 pRam, true /*fDoAccounting*/, false /*fFlushIemTlbs*/);
2423 break;
2424
2425 case PGMPAGETYPE_MMIO2:
2426 case PGMPAGETYPE_ROM_SHADOW:
2427 case PGMPAGETYPE_ROM:
2428 case PGMPAGETYPE_MMIO:
2429 break;
2430 default:
2431 AssertFailed();
2432
2433 }
2434 } /* for each page */
2435 }
2436 }
2437
2438 /*
2439 * Finish off any pages pending freeing.
2440 */
2441 if (cPendingPages)
2442 {
2443 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
2444 AssertLogRelRCReturn(rc, rc);
2445 }
2446 GMMR3FreePagesCleanup(pReq);
2447
2448 /*
2449 * Flush the IEM TLB, just to be sure it really is done.
2450 */
2451 IEMTlbInvalidateAllPhysicalAllCpus(pVM, NIL_VMCPUID, IEMTLBPHYSFLUSHREASON_ZERO_ALL);
2452
2453 return VINF_SUCCESS;
2454}
2455
2456
2457/**
2458 * Frees all RAM during VM termination
2459 *
2460 * ASSUMES that the caller owns the PGM lock.
2461 *
2462 * @returns VBox status code.
2463 * @param pVM The cross context VM structure.
2464 */
2465int pgmR3PhysRamTerm(PVM pVM)
2466{
2467 int rc;
2468 PGM_LOCK_ASSERT_OWNER(pVM);
2469
2470#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
2471 /* Reset the memory balloon. */
2472 rc = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0);
2473 AssertRC(rc);
2474#endif
2475
2476#ifdef VBOX_WITH_PAGE_SHARING
2477
2478 /*
2479 * Clear all registered shared modules.
2480 */
2481 pgmR3PhysAssertSharedPageChecksums(pVM);
2482 rc = GMMR3ResetSharedModules(pVM);
2483 AssertRC(rc);
2484
2485 /*
2486 * Flush the handy pages updates to make sure no shared pages are hiding
2487 * in there. (Not unlikely if the VM shuts down, apparently.)
2488 */
2489# ifdef VBOX_WITH_PGM_NEM_MODE
2490 if (!pVM->pgm.s.fNemMode)
2491# endif
2492 rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_FLUSH_HANDY_PAGES, 0, NULL);
2493#endif
2494
2495 /*
2496 * We batch up pages that should be freed instead of calling GMM for
2497 * each and every one of them.
2498 */
2499 uint32_t cPendingPages = 0;
2500 PGMMFREEPAGESREQ pReq;
2501 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
2502 AssertLogRelRCReturn(rc, rc);
2503
2504 /*
2505 * Walk the ram ranges.
2506 */
2507 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
2508 for (uint32_t idRamRange = 0; idRamRange <= idRamRangeMax; idRamRange++)
2509 {
2510 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
2511 Assert(pRam || idRamRange == 0);
2512 if (!pRam) continue;
2513 Assert(pRam->idRange == idRamRange);
2514
2515 uint32_t iPage = pRam->cb >> GUEST_PAGE_SHIFT;
2516 AssertMsg(((RTGCPHYS)iPage << GUEST_PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << GUEST_PAGE_SHIFT, pRam->cb));
2517
2518 while (iPage-- > 0)
2519 {
2520 PPGMPAGE pPage = &pRam->aPages[iPage];
2521 switch (PGM_PAGE_GET_TYPE(pPage))
2522 {
2523 case PGMPAGETYPE_RAM:
2524 /* Free all shared pages. Private pages are automatically freed during GMM VM cleanup. */
2525 /** @todo change this to explicitly free private pages here. */
2526 if (PGM_PAGE_IS_SHARED(pPage))
2527 {
2528 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage,
2529 pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), PGMPAGETYPE_RAM);
2530 AssertLogRelRCReturn(rc, rc);
2531 }
2532 break;
2533
2534 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
2535 case PGMPAGETYPE_SPECIAL_ALIAS_MMIO:
2536 case PGMPAGETYPE_MMIO2:
2537 case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */
2538 case PGMPAGETYPE_ROM:
2539 case PGMPAGETYPE_MMIO:
2540 break;
2541 default:
2542 AssertFailed();
2543 }
2544 } /* for each page */
2545 }
2546
2547 /*
2548 * Finish off any pages pending freeing.
2549 */
2550 if (cPendingPages)
2551 {
2552 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
2553 AssertLogRelRCReturn(rc, rc);
2554 }
2555 GMMR3FreePagesCleanup(pReq);
2556 return VINF_SUCCESS;
2557}
2558
2559
2560
2561/*********************************************************************************************************************************
2562* MMIO *
2563*********************************************************************************************************************************/
2564
2565/**
2566 * This is the interface IOM is using to register an MMIO region (unmapped).
2567 *
2568 *
2569 * @returns VBox status code.
2570 *
2571 * @param pVM The cross context VM structure.
2572 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2573 * @param cb The size of the MMIO region.
2574 * @param pszDesc The description of the MMIO region.
2575 * @param pidRamRange Where to return the RAM range ID for the MMIO region
2576 * on success.
2577 * @thread EMT(0)
2578 */
2579VMMR3_INT_DECL(int) PGMR3PhysMmioRegister(PVM pVM, PVMCPU pVCpu, RTGCPHYS cb, const char *pszDesc, uint16_t *pidRamRange)
2580{
2581 /*
2582 * Assert assumptions.
2583 */
2584 AssertPtrReturn(pidRamRange, VERR_INVALID_POINTER);
2585 *pidRamRange = UINT16_MAX;
2586 AssertReturn(pVCpu == VMMGetCpu(pVM) && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
2587 VM_ASSERT_STATE_RETURN(pVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
2588 /// @todo AssertReturn(!pVM->pgm.s.fRamRangesFrozen, VERR_WRONG_ORDER);
2589 AssertReturn(cb <= ((RTGCPHYS)PGM_MAX_PAGES_PER_RAM_RANGE << GUEST_PAGE_SHIFT), VERR_OUT_OF_RANGE);
2590 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2591 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
2592 AssertReturn(*pszDesc != '\0', VERR_INVALID_POINTER);
2593
2594 /*
2595 * Take the PGM lock and allocate an ad-hoc MMIO RAM range.
2596 */
2597 int rc = PGM_LOCK(pVM);
2598 AssertRCReturn(rc, rc);
2599
2600 uint32_t const cPages = cb >> GUEST_PAGE_SHIFT;
2601 PPGMRAMRANGE pNew = NULL;
2602 rc = pgmR3PhysAllocateRamRange(pVM, pVCpu, cPages, PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO, &pNew);
2603 AssertLogRelMsg(RT_SUCCESS(rc), ("pgmR3PhysAllocateRamRange failed: cPages=%#RX32 (%s): %Rrc\n", cPages, pszDesc, rc));
2604 if (RT_SUCCESS(rc))
2605 {
2606 /* Initialize the range. */
2607 pNew->pszDesc = pszDesc;
2608 pNew->uNemRange = UINT32_MAX;
2609 pNew->pbR3 = NULL;
2610 pNew->paLSPages = NULL;
2611 Assert(pNew->fFlags == PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO && pNew->cb == cb);
2612
2613 uint32_t iPage = cPages;
2614 while (iPage-- > 0)
2615 PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_MMIO);
2616 Assert(PGM_PAGE_GET_TYPE(&pNew->aPages[0]) == PGMPAGETYPE_MMIO);
2617
2618 /* update the page count stats. */
2619 pVM->pgm.s.cPureMmioPages += cPages;
2620 pVM->pgm.s.cAllPages += cPages;
2621
2622 /*
2623 * Set the return value, release lock and return to IOM.
2624 */
2625 *pidRamRange = pNew->idRange;
2626 }
2627
2628 PGM_UNLOCK(pVM);
2629 return rc;
2630}
2631
2632
2633/**
2634 * Worker for PGMR3PhysMmioMap that's called owning the lock.
2635 */
2636static int pgmR3PhysMmioMapLocked(PVM pVM, PVMCPU pVCpu, RTGCPHYS const GCPhys, RTGCPHYS const cb, RTGCPHYS const GCPhysLast,
2637 PPGMRAMRANGE const pMmioRamRange, PGMPHYSHANDLERTYPE const hType, uint64_t const uUser)
2638{
2639 /* Check that the range isn't mapped already. */
2640 AssertLogRelMsgReturn(pMmioRamRange->GCPhys == NIL_RTGCPHYS,
2641 ("desired %RGp mapping for '%s' - already mapped at %RGp!\n",
2642 GCPhys, pMmioRamRange->pszDesc, pMmioRamRange->GCPhys),
2643 VERR_ALREADY_EXISTS);
2644
2645 /*
2646 * Now, check if this falls into a regular RAM range or if we should use
2647 * the ad-hoc one (idRamRange).
2648 */
2649 int rc;
2650 uint32_t idxInsert = UINT32_MAX;
2651 PPGMRAMRANGE const pOverlappingRange = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxInsert);
2652 if (pOverlappingRange)
2653 {
2654 /* Simplification: all within the same range. */
2655 AssertLogRelMsgReturn( GCPhys >= pOverlappingRange->GCPhys
2656 && GCPhysLast <= pOverlappingRange->GCPhysLast,
2657 ("%RGp-%RGp (MMIO/%s) falls partly outside %RGp-%RGp (%s)\n",
2658 GCPhys, GCPhysLast, pMmioRamRange->pszDesc,
2659 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
2660 VERR_PGM_RAM_CONFLICT);
2661
2662 /* Check that is isn't an ad hoc range, but a real RAM range. */
2663 AssertLogRelMsgReturn(!PGM_RAM_RANGE_IS_AD_HOC(pOverlappingRange),
2664 ("%RGp-%RGp (MMIO/%s) mapping attempt in non-RAM range: %RGp-%RGp (%s)\n",
2665 GCPhys, GCPhysLast, pMmioRamRange->pszDesc,
2666 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
2667 VERR_PGM_RAM_CONFLICT);
2668
2669 /* Check that it's all RAM or MMIO pages. */
2670 PCPGMPAGE pPage = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
2671 uint32_t cLeft = cb >> GUEST_PAGE_SHIFT;
2672 while (cLeft-- > 0)
2673 {
2674 AssertLogRelMsgReturn( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2675 || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO, /** @todo MMIO type isn't right */
2676 ("%RGp-%RGp (MMIO/%s): %RGp is not a RAM or MMIO page - type=%d desc=%s\n",
2677 GCPhys, GCPhysLast, pMmioRamRange->pszDesc, pOverlappingRange->GCPhys,
2678 PGM_PAGE_GET_TYPE(pPage), pOverlappingRange->pszDesc),
2679 VERR_PGM_RAM_CONFLICT);
2680 pPage++;
2681 }
2682
2683 /*
2684 * Make all the pages in the range MMIO/ZERO pages, freeing any
2685 * RAM pages currently mapped here. This might not be 100% correct
2686 * for PCI memory, but we're doing the same thing for MMIO2 pages.
2687 */
2688 rc = pgmR3PhysFreePageRange(pVM, pOverlappingRange, GCPhys, GCPhysLast, NULL);
2689 AssertRCReturn(rc, rc);
2690
2691 /* Force a PGM pool flush as guest ram references have been changed. */
2692 /** @todo not entirely SMP safe; assuming for now the guest takes
2693 * care of this internally (not touch mapped mmio while changing the
2694 * mapping). */
2695 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2696 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2697 }
2698 else
2699 {
2700 /*
2701 * No RAM range, use the ad hoc one (idRamRange).
2702 *
2703 * Note that we don't have to tell REM about this range because
2704 * PGMHandlerPhysicalRegisterEx will do that for us.
2705 */
2706 AssertLogRelReturn(idxInsert <= pVM->pgm.s.RamRangeUnion.cLookupEntries, VERR_INTERNAL_ERROR_4);
2707 Log(("PGMR3PhysMmioMap: Inserting ad hoc MMIO range #%x for %RGp-%RGp %s\n",
2708 pMmioRamRange->idRange, GCPhys, GCPhysLast, pMmioRamRange->pszDesc));
2709
2710 Assert(PGM_PAGE_GET_TYPE(&pMmioRamRange->aPages[0]) == PGMPAGETYPE_MMIO);
2711
2712 /* We ASSUME that all the pages in the ad-hoc range are in the proper
2713 state and all that and that we don't need to re-initialize them here. */
2714
2715#ifdef VBOX_WITH_NATIVE_NEM
2716 /* Notify NEM. */
2717 if (VM_IS_NEM_ENABLED(pVM))
2718 {
2719 uint8_t u2State = 0; /* (must have valid state as there can't be anything to preserve) */
2720 rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhys, cb, 0 /*fFlags*/, NULL, NULL, &u2State, &pMmioRamRange->uNemRange);
2721 AssertLogRelRCReturn(rc, rc);
2722
2723 uint32_t iPage = cb >> GUEST_PAGE_SHIFT;
2724 while (iPage-- > 0)
2725 PGM_PAGE_SET_NEM_STATE(&pMmioRamRange->aPages[iPage], u2State);
2726 }
2727#endif
2728 /* Insert it into the lookup table (may in theory fail). */
2729 rc = pgmR3PhysRamRangeInsertLookup(pVM, pMmioRamRange, GCPhys, &idxInsert);
2730 }
2731 if (RT_SUCCESS(rc))
2732 {
2733 /*
2734 * Register the access handler.
2735 */
2736 rc = PGMHandlerPhysicalRegister(pVM, GCPhys, GCPhysLast, hType, uUser, pMmioRamRange->pszDesc);
2737 if (RT_SUCCESS(rc))
2738 {
2739#ifdef VBOX_WITH_NATIVE_NEM
2740 /* Late NEM notification (currently not used by anyone). */
2741 if (VM_IS_NEM_ENABLED(pVM))
2742 {
2743 if (pOverlappingRange)
2744 rc = NEMR3NotifyPhysMmioExMapLate(pVM, GCPhys, cb, NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE,
2745 pOverlappingRange->pbR3 + (uintptr_t)(GCPhys - pOverlappingRange->GCPhys),
2746 NULL /*pvMmio2*/, NULL /*puNemRange*/);
2747 else
2748 rc = NEMR3NotifyPhysMmioExMapLate(pVM, GCPhys, cb, 0 /*fFlags*/, NULL /*pvRam*/, NULL /*pvMmio2*/,
2749 &pMmioRamRange->uNemRange);
2750 AssertLogRelRC(rc);
2751 }
2752 if (RT_SUCCESS(rc))
2753#endif
2754 {
2755 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
2756 return VINF_SUCCESS;
2757 }
2758
2759#ifdef VBOX_WITH_NATIVE_NEM
2760 /*
2761 * Failed, so revert it all as best as we can (the memory content in
2762 * the overlapping case is gone).
2763 */
2764 PGMHandlerPhysicalDeregister(pVM, GCPhys);
2765#endif
2766 }
2767 }
2768
2769 if (!pOverlappingRange)
2770 {
2771#ifdef VBOX_WITH_NATIVE_NEM
2772 /* Notify NEM about the sudden removal of the RAM range we just told it about. */
2773 NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, cb, 0 /*fFlags*/, NULL /*pvRam*/, NULL /*pvMmio2*/,
2774 NULL /*pu2State*/, &pMmioRamRange->uNemRange);
2775#endif
2776
2777 /* Remove the ad hoc range from the lookup table. */
2778 idxInsert -= 1;
2779 pgmR3PhysRamRangeRemoveLookup(pVM, pMmioRamRange, &idxInsert);
2780 }
2781
2782 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
2783 return rc;
2784}
2785
2786
2787/**
2788 * This is the interface IOM is using to map an MMIO region.
2789 *
2790 * It will check for conflicts and ensure that a RAM range structure
2791 * is present before calling the PGMR3HandlerPhysicalRegister API to
2792 * register the callbacks.
2793 *
2794 * @returns VBox status code.
2795 *
2796 * @param pVM The cross context VM structure.
2797 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2798 * @param GCPhys The start of the MMIO region.
2799 * @param cb The size of the MMIO region.
2800 * @param idRamRange The RAM range ID for the MMIO region as returned by
2801 * PGMR3PhysMmioRegister().
2802 * @param hType The physical access handler type registration.
2803 * @param uUser The user argument.
2804 * @thread EMT(pVCpu)
2805 */
2806VMMR3_INT_DECL(int) PGMR3PhysMmioMap(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, RTGCPHYS cb, uint16_t idRamRange,
2807 PGMPHYSHANDLERTYPE hType, uint64_t uUser)
2808{
2809 /*
2810 * Assert on some assumption.
2811 */
2812 VMCPU_ASSERT_EMT(pVCpu);
2813 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2814 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2815 RTGCPHYS const GCPhysLast = GCPhys + cb - 1U;
2816 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
2817#ifdef VBOX_STRICT
2818 PCPGMPHYSHANDLERTYPEINT pType = pgmHandlerPhysicalTypeHandleToPtr(pVM, hType);
2819 Assert(pType);
2820 Assert(pType->enmKind == PGMPHYSHANDLERKIND_MMIO);
2821#endif
2822 AssertReturn(idRamRange <= pVM->pgm.s.idRamRangeMax && idRamRange > 0, VERR_INVALID_HANDLE);
2823 PPGMRAMRANGE const pMmioRamRange = pVM->pgm.s.apRamRanges[idRamRange];
2824 AssertReturn(pMmioRamRange, VERR_INVALID_HANDLE);
2825 AssertReturn(pMmioRamRange->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO, VERR_INVALID_HANDLE);
2826 AssertReturn(pMmioRamRange->cb == cb, VERR_OUT_OF_RANGE);
2827
2828 /*
2829 * Take the PGM lock and do the work.
2830 */
2831 int rc = PGM_LOCK(pVM);
2832 AssertRCReturn(rc, rc);
2833
2834 rc = pgmR3PhysMmioMapLocked(pVM, pVCpu, GCPhys, cb, GCPhysLast, pMmioRamRange, hType, uUser);
2835#ifdef VBOX_STRICT
2836 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
2837#endif
2838
2839 PGM_UNLOCK(pVM);
2840 return rc;
2841}
2842
2843
2844/**
2845 * Worker for PGMR3PhysMmioUnmap that's called with the PGM lock held.
2846 */
2847static int pgmR3PhysMmioUnmapLocked(PVM pVM, PVMCPU pVCpu, RTGCPHYS const GCPhys, RTGCPHYS const cb,
2848 RTGCPHYS const GCPhysLast, PPGMRAMRANGE const pMmioRamRange)
2849{
2850 /*
2851 * Lookup the RAM range containing the region to make sure it is actually mapped.
2852 */
2853 uint32_t idxLookup = pgmR3PhysRamRangeFindOverlappingIndex(pVM, GCPhys, GCPhysLast);
2854 AssertLogRelMsgReturn(idxLookup < pVM->pgm.s.RamRangeUnion.cLookupEntries,
2855 ("MMIO range not found at %RGp LB %RGp! (%s)\n", GCPhys, cb, pMmioRamRange->pszDesc),
2856 VERR_NOT_FOUND);
2857
2858 uint32_t const idLookupRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
2859 AssertLogRelReturn(idLookupRange != 0 && idLookupRange <= pVM->pgm.s.idRamRangeMax, VERR_INTERNAL_ERROR_5);
2860 PPGMRAMRANGE const pLookupRange = pVM->pgm.s.apRamRanges[idLookupRange];
2861 AssertLogRelReturn(pLookupRange, VERR_INTERNAL_ERROR_4);
2862
2863 AssertLogRelMsgReturn(pLookupRange == pMmioRamRange || !PGM_RAM_RANGE_IS_AD_HOC(pLookupRange),
2864 ("MMIO unmap mixup at %RGp LB %RGp (%s) vs %RGp LB %RGp (%s)\n",
2865 GCPhys, cb, pMmioRamRange->pszDesc, pLookupRange->GCPhys, pLookupRange->cb, pLookupRange->pszDesc),
2866 VERR_NOT_FOUND);
2867
2868 /*
2869 * Deregister the handler. This should reset any aliases, so an ad hoc
2870 * range will only contain MMIO type pages afterwards.
2871 */
2872 int rc = PGMHandlerPhysicalDeregister(pVM, GCPhys);
2873 if (RT_SUCCESS(rc))
2874 {
2875 if (pLookupRange != pMmioRamRange)
2876 {
2877 /*
2878 * Turn the pages back into RAM pages.
2879 */
2880 Log(("pgmR3PhysMmioUnmapLocked: Reverting MMIO range %RGp-%RGp (%s) in %RGp-%RGp (%s) to RAM.\n",
2881 GCPhys, GCPhysLast, pMmioRamRange->pszDesc,
2882 pLookupRange->GCPhys, pLookupRange->GCPhysLast, pLookupRange->pszDesc));
2883
2884 RTGCPHYS const offRange = GCPhys - pLookupRange->GCPhys;
2885 uint32_t iPage = offRange >> GUEST_PAGE_SHIFT;
2886 uint32_t cLeft = cb >> GUEST_PAGE_SHIFT;
2887 while (cLeft--)
2888 {
2889 PPGMPAGE pPage = &pLookupRange->aPages[iPage];
2890 AssertMsg( (PGM_PAGE_IS_MMIO(pPage) && PGM_PAGE_IS_ZERO(pPage))
2891 //|| PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO2_ALIAS_MMIO
2892 //|| PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_SPECIAL_ALIAS_MMIO
2893 , ("%RGp %R[pgmpage]\n", pLookupRange->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), pPage));
2894/** @todo this isn't entirely correct, is it now... aliases must be converted
2895 * to zero pages as they won't be. however, shouldn't
2896 * PGMHandlerPhysicalDeregister deal with this already? */
2897 if (PGM_PAGE_IS_MMIO_OR_ALIAS(pPage))
2898 PGM_PAGE_SET_TYPE(pVM, pPage, PGMPAGETYPE_RAM);
2899 iPage++;
2900 }
2901
2902#ifdef VBOX_WITH_NATIVE_NEM
2903 /* Notify REM (failure will probably leave things in a non-working state). */
2904 if (VM_IS_NEM_ENABLED(pVM))
2905 {
2906 uint8_t u2State = UINT8_MAX;
2907 rc = NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, GCPhysLast - GCPhys + 1, NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE,
2908 pLookupRange->pbR3 ? pLookupRange->pbR3 + GCPhys - pLookupRange->GCPhys : NULL,
2909 NULL, &u2State, &pLookupRange->uNemRange);
2910 AssertLogRelRC(rc);
2911 /** @todo status code propagation here... This is likely fatal, right? */
2912 if (u2State != UINT8_MAX)
2913 pgmPhysSetNemStateForPages(&pLookupRange->aPages[(GCPhys - pLookupRange->GCPhys) >> GUEST_PAGE_SHIFT],
2914 cb >> GUEST_PAGE_SHIFT, u2State);
2915 }
2916#endif
2917 }
2918 else
2919 {
2920 /*
2921 * Unlink the ad hoc range.
2922 */
2923#ifdef VBOX_STRICT
2924 uint32_t iPage = cb >> GUEST_PAGE_SHIFT;
2925 while (iPage-- > 0)
2926 {
2927 PPGMPAGE const pPage = &pMmioRamRange->aPages[iPage];
2928 Assert(PGM_PAGE_IS_MMIO(pPage));
2929 }
2930#endif
2931
2932 Log(("pgmR3PhysMmioUnmapLocked: Unmapping ad hoc MMIO range for %RGp-%RGp %s\n",
2933 GCPhys, GCPhysLast, pMmioRamRange->pszDesc));
2934
2935#ifdef VBOX_WITH_NATIVE_NEM
2936 if (VM_IS_NEM_ENABLED(pVM)) /* Notify REM before we unlink the range. */
2937 {
2938 rc = NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, GCPhysLast - GCPhys + 1, 0 /*fFlags*/,
2939 NULL, NULL, NULL, &pMmioRamRange->uNemRange);
2940 AssertLogRelRCReturn(rc, rc); /* we're up the creek if this hits. */
2941 }
2942#endif
2943
2944 pgmR3PhysRamRangeRemoveLookup(pVM, pMmioRamRange, &idxLookup);
2945 }
2946 }
2947
2948 /* Force a PGM pool flush as guest ram references have been changed. */
2949 /** @todo Not entirely SMP safe; assuming for now the guest takes care of
2950 * this internally (not touch mapped mmio while changing the mapping). */
2951 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2952 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2953
2954 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
2955 /*pgmPhysInvalidRamRangeTlbs(pVM); - not necessary */
2956
2957 return rc;
2958}
2959
2960
2961/**
2962 * This is the interface IOM is using to register an MMIO region.
2963 *
2964 * It will take care of calling PGMHandlerPhysicalDeregister and clean up
2965 * any ad hoc PGMRAMRANGE left behind.
2966 *
2967 * @returns VBox status code.
2968 * @param pVM The cross context VM structure.
2969 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2970 * @param GCPhys The start of the MMIO region.
2971 * @param cb The size of the MMIO region.
2972 * @param idRamRange The RAM range ID for the MMIO region as returned by
2973 * PGMR3PhysMmioRegister().
2974 * @thread EMT(pVCpu)
2975 */
2976VMMR3_INT_DECL(int) PGMR3PhysMmioUnmap(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, RTGCPHYS cb, uint16_t idRamRange)
2977{
2978 /*
2979 * Input validation.
2980 */
2981 VMCPU_ASSERT_EMT(pVCpu);
2982 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2983 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2984 RTGCPHYS const GCPhysLast = GCPhys + cb - 1U;
2985 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
2986 AssertReturn(idRamRange <= pVM->pgm.s.idRamRangeMax && idRamRange > 0, VERR_INVALID_HANDLE);
2987 PPGMRAMRANGE const pMmioRamRange = pVM->pgm.s.apRamRanges[idRamRange];
2988 AssertReturn(pMmioRamRange, VERR_INVALID_HANDLE);
2989 AssertReturn(pMmioRamRange->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO, VERR_INVALID_HANDLE);
2990 AssertReturn(pMmioRamRange->cb == cb, VERR_OUT_OF_RANGE);
2991
2992 /*
2993 * Take the PGM lock and do what's asked.
2994 */
2995 int rc = PGM_LOCK(pVM);
2996 AssertRCReturn(rc, rc);
2997
2998 rc = pgmR3PhysMmioUnmapLocked(pVM, pVCpu, GCPhys, cb, GCPhysLast, pMmioRamRange);
2999#ifdef VBOX_STRICT
3000 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
3001#endif
3002
3003 PGM_UNLOCK(pVM);
3004 return rc;
3005}
3006
3007
3008
3009/*********************************************************************************************************************************
3010* MMIO2 *
3011*********************************************************************************************************************************/
3012
3013/**
3014 * Validates the claim to an MMIO2 range and returns the pointer to it.
3015 *
3016 * @returns The MMIO2 entry index on success, negative error status on failure.
3017 * @param pVM The cross context VM structure.
3018 * @param pDevIns The device instance owning the region.
3019 * @param hMmio2 Handle to look up.
3020 * @param pcChunks Where to return the number of chunks associated with
3021 * this handle.
3022 */
3023static int32_t pgmR3PhysMmio2ResolveHandle(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, uint32_t *pcChunks)
3024{
3025 *pcChunks = 0;
3026 uint32_t const idxFirst = hMmio2 - 1U;
3027 uint32_t const cMmio2Ranges = RT_MIN(pVM->pgm.s.cMmio2Ranges, RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges));
3028 AssertReturn(idxFirst < cMmio2Ranges, VERR_INVALID_HANDLE);
3029
3030 PPGMREGMMIO2RANGE const pFirst = &pVM->pgm.s.aMmio2Ranges[idxFirst];
3031 AssertReturn(pFirst->idMmio2 == hMmio2, VERR_INVALID_HANDLE);
3032 AssertReturn((pFirst->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK), VERR_INVALID_HANDLE);
3033 AssertReturn(pFirst->pDevInsR3 == pDevIns && RT_VALID_PTR(pDevIns), VERR_NOT_OWNER);
3034
3035 /* Figure out how many chunks this handle spans. */
3036 if (pFirst->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3037 *pcChunks = 1;
3038 else
3039 {
3040 uint32_t cChunks = 1;
3041 for (uint32_t idx = idxFirst + 1;; idx++)
3042 {
3043 cChunks++;
3044 AssertReturn(idx < cMmio2Ranges, VERR_INTERNAL_ERROR_2);
3045 PPGMREGMMIO2RANGE const pCur = &pVM->pgm.s.aMmio2Ranges[idx];
3046 AssertLogRelMsgReturn( pCur->pDevInsR3 == pDevIns
3047 && pCur->idMmio2 == idx + 1
3048 && pCur->iSubDev == pFirst->iSubDev
3049 && pCur->iRegion == pFirst->iRegion
3050 && !(pCur->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK),
3051 ("cur: %p/%#x/%#x/%#x/%#x/%s; first: %p/%#x/%#x/%#x/%#x/%s\n",
3052 pCur->pDevInsR3, pCur->idMmio2, pCur->iSubDev, pCur->iRegion, pCur->fFlags,
3053 pVM->pgm.s.apMmio2RamRanges[idx]->pszDesc,
3054 pDevIns, idx + 1, pFirst->iSubDev, pFirst->iRegion, pFirst->fFlags,
3055 pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc),
3056 VERR_INTERNAL_ERROR_3);
3057 if (pCur->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3058 break;
3059 }
3060 *pcChunks = cChunks;
3061 }
3062
3063 return (int32_t)idxFirst;
3064}
3065
3066
3067/**
3068 * Check if a device has already registered a MMIO2 region.
3069 *
3070 * @returns NULL if not registered, otherwise pointer to the MMIO2.
3071 * @param pVM The cross context VM structure.
3072 * @param pDevIns The device instance owning the region.
3073 * @param iSubDev The sub-device number.
3074 * @param iRegion The region.
3075 */
3076DECLINLINE(PPGMREGMMIO2RANGE) pgmR3PhysMmio2Find(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion)
3077{
3078 /*
3079 * Search the array. There shouldn't be many entries.
3080 */
3081 uint32_t idx = RT_MIN(pVM->pgm.s.cMmio2Ranges, RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges));
3082 while (idx-- > 0)
3083 if (RT_LIKELY( pVM->pgm.s.aMmio2Ranges[idx].pDevInsR3 != pDevIns
3084 || pVM->pgm.s.aMmio2Ranges[idx].iRegion != iRegion
3085 || pVM->pgm.s.aMmio2Ranges[idx].iSubDev != iSubDev))
3086 { /* likely */ }
3087 else
3088 return &pVM->pgm.s.aMmio2Ranges[idx];
3089 return NULL;
3090}
3091
3092/**
3093 * Worker for PGMR3PhysMmio2ControlDirtyPageTracking and PGMR3PhysMmio2Map.
3094 */
3095static int pgmR3PhysMmio2EnableDirtyPageTracing(PVM pVM, uint32_t idx, uint32_t cChunks)
3096{
3097 int rc = VINF_SUCCESS;
3098 while (cChunks-- > 0)
3099 {
3100 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3101 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3102
3103 Assert(!(pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_TRACKING));
3104 int rc2 = pgmHandlerPhysicalExRegister(pVM, pMmio2->pPhysHandlerR3, pRamRange->GCPhys, pRamRange->GCPhysLast);
3105 if (RT_SUCCESS(rc2))
3106 pMmio2->fFlags |= PGMREGMMIO2RANGE_F_IS_TRACKING;
3107 else
3108 AssertLogRelMsgFailedStmt(("%#RGp-%#RGp %s failed -> %Rrc\n",
3109 pRamRange->GCPhys, pRamRange->GCPhysLast, pRamRange->pszDesc, rc2),
3110 rc = RT_SUCCESS(rc) ? rc2 : rc);
3111
3112 idx++;
3113 }
3114 return rc;
3115}
3116
3117
3118/**
3119 * Worker for PGMR3PhysMmio2ControlDirtyPageTracking and PGMR3PhysMmio2Unmap.
3120 */
3121static int pgmR3PhysMmio2DisableDirtyPageTracing(PVM pVM, uint32_t idx, uint32_t cChunks)
3122{
3123 int rc = VINF_SUCCESS;
3124 while (cChunks-- > 0)
3125 {
3126 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3127 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3128 if (pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_TRACKING)
3129 {
3130 int rc2 = pgmHandlerPhysicalExDeregister(pVM, pMmio2->pPhysHandlerR3);
3131 AssertLogRelMsgStmt(RT_SUCCESS(rc2),
3132 ("%#RGp-%#RGp %s failed -> %Rrc\n",
3133 pRamRange->GCPhys, pRamRange->GCPhysLast, pRamRange->pszDesc, rc2),
3134 rc = RT_SUCCESS(rc) ? rc2 : rc);
3135 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_IS_TRACKING;
3136 }
3137 idx++;
3138 }
3139 return rc;
3140}
3141
3142#if 0 // temp
3143
3144/**
3145 * Common worker PGMR3PhysMmio2PreRegister & PGMR3PhysMMIO2Register that links a
3146 * complete registration entry into the lists and lookup tables.
3147 *
3148 * @param pVM The cross context VM structure.
3149 * @param pNew The new MMIO / MMIO2 registration to link.
3150 */
3151static void pgmR3PhysMmio2Link(PVM pVM, PPGMREGMMIO2RANGE pNew)
3152{
3153 Assert(pNew->idMmio2 != UINT8_MAX);
3154
3155 /*
3156 * Link it into the list (order doesn't matter, so insert it at the head).
3157 *
3158 * Note! The range we're linking may consist of multiple chunks, so we
3159 * have to find the last one.
3160 */
3161 PPGMREGMMIO2RANGE pLast = pNew;
3162 for (pLast = pNew; ; pLast = pLast->pNextR3)
3163 {
3164 if (pLast->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3165 break;
3166 Assert(pLast->pNextR3);
3167 Assert(pLast->pNextR3->pDevInsR3 == pNew->pDevInsR3);
3168 Assert(pLast->pNextR3->iSubDev == pNew->iSubDev);
3169 Assert(pLast->pNextR3->iRegion == pNew->iRegion);
3170 Assert(pLast->pNextR3->idMmio2 == pLast->idMmio2 + 1);
3171 }
3172
3173 PGM_LOCK_VOID(pVM);
3174
3175 /* Link in the chain of ranges at the head of the list. */
3176 pLast->pNextR3 = pVM->pgm.s.pRegMmioRangesR3;
3177 pVM->pgm.s.pRegMmioRangesR3 = pNew;
3178
3179 /* Insert the MMIO2 range/page IDs. */
3180 uint8_t idMmio2 = pNew->idMmio2;
3181 for (;;)
3182 {
3183 Assert(pVM->pgm.s.apMmio2RangesR3[idMmio2 - 1] == NULL);
3184 Assert(pVM->pgm.s.apMmio2RangesR0[idMmio2 - 1] == NIL_RTR0PTR);
3185 pVM->pgm.s.apMmio2RangesR3[idMmio2 - 1] = pNew;
3186 pVM->pgm.s.apMmio2RangesR0[idMmio2 - 1] = pNew->RamRange.pSelfR0 - RT_UOFFSETOF(PGMREGMMIO2RANGE, RamRange);
3187 if (pNew->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK)
3188 break;
3189 pNew = pNew->pNextR3;
3190 idMmio2++;
3191 }
3192
3193 pgmPhysInvalidatePageMapTLB(pVM);
3194 PGM_UNLOCK(pVM);
3195}
3196#endif
3197
3198
3199/**
3200 * Allocate and register an MMIO2 region.
3201 *
3202 * As mentioned elsewhere, MMIO2 is just RAM spelled differently. It's RAM
3203 * associated with a device. It is also non-shared memory with a permanent
3204 * ring-3 mapping and page backing (presently).
3205 *
3206 * A MMIO2 range may overlap with base memory if a lot of RAM is configured for
3207 * the VM, in which case we'll drop the base memory pages. Presently we will
3208 * make no attempt to preserve anything that happens to be present in the base
3209 * memory that is replaced, this is of course incorrect but it's too much
3210 * effort.
3211 *
3212 * @returns VBox status code.
3213 * @retval VINF_SUCCESS on success, *ppv pointing to the R3 mapping of the
3214 * memory.
3215 * @retval VERR_ALREADY_EXISTS if the region already exists.
3216 *
3217 * @param pVM The cross context VM structure.
3218 * @param pDevIns The device instance owning the region.
3219 * @param iSubDev The sub-device number.
3220 * @param iRegion The region number. If the MMIO2 memory is a PCI
3221 * I/O region this number has to be the number of that
3222 * region. Otherwise it can be any number save
3223 * UINT8_MAX.
3224 * @param cb The size of the region. Must be page aligned.
3225 * @param fFlags Reserved for future use, must be zero.
3226 * @param pszDesc The description.
3227 * @param ppv Where to store the pointer to the ring-3 mapping of
3228 * the memory.
3229 * @param phRegion Where to return the MMIO2 region handle. Optional.
3230 * @thread EMT(0)
3231 *
3232 * @note Only callable at VM creation time and during VM state loading.
3233 * The latter is for PCNet saved state compatibility with pre 4.3.6
3234 * state.
3235 */
3236VMMR3_INT_DECL(int) PGMR3PhysMmio2Register(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, RTGCPHYS cb,
3237 uint32_t fFlags, const char *pszDesc, void **ppv, PGMMMIO2HANDLE *phRegion)
3238{
3239 /*
3240 * Validate input.
3241 */
3242 AssertPtrReturn(ppv, VERR_INVALID_POINTER);
3243 *ppv = NULL;
3244 if (phRegion)
3245 {
3246 AssertPtrReturn(phRegion, VERR_INVALID_POINTER);
3247 *phRegion = NIL_PGMMMIO2HANDLE;
3248 }
3249 PVMCPU const pVCpu = VMMGetCpu(pVM);
3250 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
3251 VMSTATE const enmVMState = VMR3GetState(pVM);
3252 AssertMsgReturn(enmVMState == VMSTATE_CREATING || enmVMState == VMSTATE_LOADING,
3253 ("state %s, expected CREATING or LOADING\n", VMGetStateName(enmVMState)),
3254 VERR_VM_INVALID_VM_STATE);
3255
3256 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
3257 AssertReturn(iSubDev <= UINT8_MAX, VERR_INVALID_PARAMETER);
3258 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
3259
3260 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
3261 AssertReturn(*pszDesc, VERR_INVALID_PARAMETER);
3262
3263 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3264 AssertReturn(cb, VERR_INVALID_PARAMETER);
3265 AssertReturn(!(fFlags & ~PGMPHYS_MMIO2_FLAGS_VALID_MASK), VERR_INVALID_FLAGS);
3266
3267 const uint32_t cGuestPages = cb >> GUEST_PAGE_SHIFT;
3268 AssertLogRelReturn(((RTGCPHYS)cGuestPages << GUEST_PAGE_SHIFT) == cb, VERR_INVALID_PARAMETER);
3269 AssertLogRelReturn(cGuestPages <= PGM_MAX_PAGES_PER_MMIO2_REGION, VERR_OUT_OF_RANGE);
3270 AssertLogRelReturn(cGuestPages <= (MM_MMIO_64_MAX >> GUEST_PAGE_SHIFT), VERR_OUT_OF_RANGE);
3271
3272 AssertReturn(pgmR3PhysMmio2Find(pVM, pDevIns, iSubDev, iRegion) == NULL, VERR_ALREADY_EXISTS);
3273
3274 /*
3275 * For the 2nd+ instance, mangle the description string so it's unique.
3276 */
3277 if (pDevIns->iInstance > 0) /** @todo Move to PDMDevHlp.cpp and use a real string cache. */
3278 {
3279 pszDesc = MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s [%u]", pszDesc, pDevIns->iInstance);
3280 if (!pszDesc)
3281 return VERR_NO_MEMORY;
3282 }
3283
3284 /*
3285 * Check that we've got sufficient MMIO2 ID space for this request (the
3286 * allocation will be done later once we've got the backing memory secured,
3287 * but given the EMT0 restriction, that's not going to be a problem).
3288 *
3289 * The zero ID is not used as it could be confused with NIL_GMM_PAGEID, so
3290 * the IDs goes from 1 thru PGM_MAX_MMIO2_RANGES.
3291 */
3292 unsigned const cChunks = pgmPhysMmio2CalcChunkCount(cb, NULL);
3293
3294 int rc = PGM_LOCK(pVM);
3295 AssertRCReturn(rc, rc);
3296
3297 AssertCompile(PGM_MAX_MMIO2_RANGES < 255);
3298 uint8_t const idMmio2 = pVM->pgm.s.cMmio2Ranges + 1;
3299 AssertLogRelReturnStmt(idMmio2 + cChunks <= PGM_MAX_MMIO2_RANGES, PGM_UNLOCK(pVM), VERR_PGM_TOO_MANY_MMIO2_RANGES);
3300
3301 /*
3302 * Try reserve and allocate the backing memory first as this is what is
3303 * most likely to fail.
3304 */
3305 rc = MMR3AdjustFixedReservation(pVM, cGuestPages, pszDesc);
3306 if (RT_SUCCESS(rc))
3307 {
3308 /*
3309 * If we're in driverless we'll be doing the work here, otherwise we
3310 * must call ring-0 to do the job as we'll need physical addresses
3311 * and maybe a ring-0 mapping address for it all.
3312 */
3313#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
3314 if (!SUPR3IsDriverless())
3315 {
3316 PGMPHYSMMIO2REGISTERREQ Mmio2RegReq;
3317 Mmio2RegReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
3318 Mmio2RegReq.Hdr.cbReq = sizeof(Mmio2RegReq);
3319 Mmio2RegReq.cbGuestPage = GUEST_PAGE_SIZE;
3320 Mmio2RegReq.cGuestPages = cGuestPages;
3321 Mmio2RegReq.idMmio2 = idMmio2;
3322 Mmio2RegReq.cChunks = cChunks;
3323 Mmio2RegReq.iSubDev = (uint8_t)iSubDev;
3324 Mmio2RegReq.iRegion = (uint8_t)iRegion;
3325 Mmio2RegReq.fFlags = fFlags;
3326 Mmio2RegReq.pDevIns = pDevIns;
3327 rc = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_MMIO2_REGISTER, 0 /*u64Arg*/, &Mmio2RegReq.Hdr);
3328 }
3329 else
3330#endif
3331 rc = pgmPhysMmio2RegisterWorker(pVM, cGuestPages, idMmio2, cChunks, pDevIns, iSubDev, iRegion, fFlags);
3332 if (RT_SUCCESS(rc))
3333 {
3334 Assert(idMmio2 + cChunks - 1 == pVM->pgm.s.cMmio2Ranges);
3335
3336 /*
3337 * There are two things left to do:
3338 * 1. Add the description to the associated RAM ranges.
3339 * 2. Pre-allocate access handlers for dirty bit tracking if necessary.
3340 */
3341 bool const fNeedHandler = (fFlags & PGMPHYS_MMIO2_FLAGS_TRACK_DIRTY_PAGES)
3342#ifdef VBOX_WITH_PGM_NEM_MODE
3343 && (!VM_IS_NEM_ENABLED(pVM) || !NEMR3IsMmio2DirtyPageTrackingSupported(pVM))
3344#endif
3345 ;
3346 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
3347 {
3348 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idxChunk + idMmio2 - 1];
3349 Assert(pMmio2->idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
3350 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apRamRanges[pMmio2->idRamRange];
3351 Assert(pRamRange->pbR3 == pMmio2->pbR3);
3352 Assert(pRamRange->cb == pMmio2->cbReal);
3353
3354 pRamRange->pszDesc = pszDesc; /** @todo mangle this if we got more than one chunk */
3355 if (fNeedHandler)
3356 {
3357 rc = pgmHandlerPhysicalExCreate(pVM, pVM->pgm.s.hMmio2DirtyPhysHandlerType, pMmio2->idMmio2,
3358 pszDesc, &pMmio2->pPhysHandlerR3);
3359 AssertLogRelMsgReturnStmt(RT_SUCCESS(rc),
3360 ("idMmio2=%#x idxChunk=%#x rc=%Rc\n", idMmio2, idxChunk, rc),
3361 PGM_UNLOCK(pVM),
3362 rc); /* PGMR3Term will take care of it all. */
3363 }
3364 }
3365
3366 /*
3367 * Done!
3368 */
3369 if (phRegion)
3370 *phRegion = idMmio2;
3371 *ppv = pVM->pgm.s.aMmio2Ranges[idMmio2 - 1].pbR3;
3372
3373 PGM_UNLOCK(pVM);
3374 return VINF_SUCCESS;
3375 }
3376
3377 MMR3AdjustFixedReservation(pVM, -(int32_t)cGuestPages, pszDesc);
3378 }
3379 if (pDevIns->iInstance > 0)
3380 MMR3HeapFree((void *)pszDesc);
3381 return rc;
3382}
3383
3384/**
3385 * Deregisters and frees an MMIO2 region.
3386 *
3387 * Any physical access handlers registered for the region must be deregistered
3388 * before calling this function.
3389 *
3390 * @returns VBox status code.
3391 * @param pVM The cross context VM structure.
3392 * @param pDevIns The device instance owning the region.
3393 * @param hMmio2 The MMIO2 handle to deregister, or NIL if all
3394 * regions for the given device is to be deregistered.
3395 * @thread EMT(0)
3396 *
3397 * @note Only callable during VM state loading. This is to jettison an unused
3398 * MMIO2 section present in PCNet saved state prior to VBox v4.3.6.
3399 */
3400VMMR3_INT_DECL(int) PGMR3PhysMmio2Deregister(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
3401{
3402 /*
3403 * Validate input.
3404 */
3405 PVMCPU const pVCpu = VMMGetCpu(pVM);
3406 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
3407 VMSTATE const enmVMState = VMR3GetState(pVM);
3408 AssertMsgReturn(enmVMState == VMSTATE_LOADING,
3409 ("state %s, expected LOADING\n", VMGetStateName(enmVMState)),
3410 VERR_VM_INVALID_VM_STATE);
3411
3412 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
3413
3414 /*
3415 * Take the PGM lock and scan for registrations matching the requirements.
3416 * We do this backwards to more easily reduce the cMmio2Ranges count when
3417 * stuff is removed.
3418 */
3419 PGM_LOCK_VOID(pVM);
3420
3421 int rc = VINF_SUCCESS;
3422 unsigned cFound = 0;
3423 uint32_t const cMmio2Ranges = RT_MIN(pVM->pgm.s.cMmio2Ranges, RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges));
3424 uint32_t idx = cMmio2Ranges;
3425 while (idx-- > 0)
3426 {
3427 PPGMREGMMIO2RANGE pCur = &pVM->pgm.s.aMmio2Ranges[idx];
3428 if ( pCur->pDevInsR3 == pDevIns
3429 && ( hMmio2 == NIL_PGMMMIO2HANDLE
3430 || pCur->idMmio2 == hMmio2))
3431 {
3432 cFound++;
3433
3434 /*
3435 * Wind back the first chunk for this registration.
3436 */
3437 AssertLogRelMsgReturnStmt(pCur->fFlags & PGMREGMMIO2RANGE_F_LAST_CHUNK, ("idx=%u fFlags=%#x\n", idx, pCur->fFlags),
3438 PGM_UNLOCK(pVM), VERR_INTERNAL_ERROR_3);
3439 uint32_t cGuestPages = pCur->cbReal >> GUEST_PAGE_SHIFT;
3440 uint32_t cChunks = 1;
3441 while ( idx > 0
3442 && !(pCur->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK))
3443 {
3444 AssertLogRelMsgReturnStmt( pCur[-1].pDevInsR3 == pDevIns
3445 && pCur[-1].iRegion == pCur->iRegion
3446 && pCur[-1].iSubDev == pCur->iSubDev,
3447 ("[%u]: %p/%#x/%#x/fl=%#x; [%u]: %p/%#x/%#x/fl=%#x; cChunks=%#x\n",
3448 idx - 1, pCur[-1].pDevInsR3, pCur[-1].iRegion, pCur[-1].iSubDev, pCur[-1].fFlags,
3449 idx, pCur->pDevInsR3, pCur->iRegion, pCur->iSubDev, pCur->fFlags, cChunks),
3450 PGM_UNLOCK(pVM), VERR_INTERNAL_ERROR_3);
3451 cChunks++;
3452 pCur--;
3453 idx--;
3454 cGuestPages += pCur->cbReal >> GUEST_PAGE_SHIFT;
3455 }
3456 AssertLogRelMsgReturnStmt(pCur->fFlags & PGMREGMMIO2RANGE_F_FIRST_CHUNK,
3457 ("idx=%u fFlags=%#x cChunks=%#x\n", idx, pCur->fFlags, cChunks),
3458 PGM_UNLOCK(pVM), VERR_INTERNAL_ERROR_3);
3459
3460 /*
3461 * Unmap it if it's mapped.
3462 */
3463 if (pCur->fFlags & PGMREGMMIO2RANGE_F_MAPPED)
3464 {
3465 int rc2 = PGMR3PhysMmio2Unmap(pVM, pCur->pDevInsR3, idx + 1, pCur->GCPhys);
3466 AssertRC(rc2);
3467 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
3468 rc = rc2;
3469 }
3470
3471 /*
3472 * Destroy access handlers.
3473 */
3474 for (uint32_t iChunk = 0; iChunk < cChunks; iChunk++)
3475 if (pCur[iChunk].pPhysHandlerR3)
3476 {
3477 pgmHandlerPhysicalExDestroy(pVM, pCur[iChunk].pPhysHandlerR3);
3478 pCur[iChunk].pPhysHandlerR3 = NULL;
3479 }
3480
3481 /*
3482 * Call kernel mode / worker to do the actual deregistration.
3483 */
3484 const char * const pszDesc = pVM->pgm.s.apMmio2RamRanges[idx] ? pVM->pgm.s.apMmio2RamRanges[idx]->pszDesc : NULL;
3485 int rc2;
3486#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
3487 if (!SUPR3IsDriverless())
3488 {
3489 PGMPHYSMMIO2DEREGISTERREQ Mmio2DeregReq;
3490 Mmio2DeregReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
3491 Mmio2DeregReq.Hdr.cbReq = sizeof(Mmio2DeregReq);
3492 Mmio2DeregReq.idMmio2 = idx + 1;
3493 Mmio2DeregReq.cChunks = cChunks;
3494 Mmio2DeregReq.pDevIns = pDevIns;
3495 rc2 = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_MMIO2_DEREGISTER, 0 /*u64Arg*/, &Mmio2DeregReq.Hdr);
3496 AssertLogRelMsgStmt(RT_SUCCESS(rc2),
3497 ("VMMR0_DO_PGM_PHYS_MMIO2_DEREGISTER: rc=%Rrc idx=%#x cChunks=%#x %s\n",
3498 rc2, idx, cChunks, pszDesc),
3499 rc = RT_SUCCESS(rc) ? rc2 : rc);
3500 pgmPhysInvalidRamRangeTlbs(pVM); /* Ensure no stale pointers in the ring-3 RAM range TLB. */
3501 }
3502 else
3503#endif
3504 {
3505 Assert(PGM_IS_IN_NEM_MODE(pVM));
3506 rc2 = pgmPhysMmio2DeregisterWorker(pVM, idx, cChunks, pDevIns);
3507 AssertLogRelMsgStmt(RT_SUCCESS(rc2),
3508 ("pgmPhysMmio2DeregisterWorker: rc=%Rrc idx=%#x cChunks=%#x %s\n",
3509 rc2, idx, cChunks, pszDesc),
3510 rc = RT_SUCCESS(rc) ? rc2 : rc);
3511 }
3512 if (RT_FAILURE(rc2))
3513 {
3514 LogRel(("PGMR3PhysMmio2Deregister: Deregistration failed: %Rrc; cChunks=%u %s\n", rc, cChunks, pszDesc));
3515 if (RT_SUCCESS(rc))
3516 rc = rc2;
3517 }
3518
3519 /*
3520 * Adjust the memory reservation.
3521 */
3522 if (!PGM_IS_IN_NEM_MODE(pVM) && RT_SUCCESS(rc2))
3523 {
3524 rc2 = MMR3AdjustFixedReservation(pVM, -(int32_t)cGuestPages, pszDesc);
3525 AssertLogRelMsgStmt(RT_SUCCESS(rc2), ("rc=%Rrc cGuestPages=%#x\n", rc, cGuestPages),
3526 rc = RT_SUCCESS(rc) ? rc2 : rc);
3527 }
3528
3529 /* Are we done? */
3530 if (hMmio2 != NIL_PGMMMIO2HANDLE)
3531 break;
3532 }
3533 }
3534 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
3535 PGM_UNLOCK(pVM);
3536 return !cFound && hMmio2 != NIL_PGMMMIO2HANDLE ? VERR_NOT_FOUND : rc;
3537}
3538
3539
3540/**
3541 * Worker form PGMR3PhysMmio2Map.
3542 */
3543static int pgmR3PhysMmio2MapLocked(PVM pVM, uint32_t const idxFirst, uint32_t const cChunks,
3544 RTGCPHYS const GCPhys, RTGCPHYS const GCPhysLast)
3545{
3546 /*
3547 * Validate the mapped status now that we've got the lock.
3548 */
3549 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3550 {
3551 AssertReturn( pVM->pgm.s.aMmio2Ranges[idx].GCPhys == NIL_RTGCPHYS
3552 && !(pVM->pgm.s.aMmio2Ranges[idx].fFlags & PGMREGMMIO2RANGE_F_MAPPED),
3553 VERR_WRONG_ORDER);
3554 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3555 AssertReturn(pRamRange->GCPhys == NIL_RTGCPHYS, VERR_INTERNAL_ERROR_3);
3556 AssertReturn(pRamRange->GCPhysLast == NIL_RTGCPHYS, VERR_INTERNAL_ERROR_3);
3557 Assert(pRamRange->pbR3 == pVM->pgm.s.aMmio2Ranges[idx].pbR3);
3558 Assert(pRamRange->idRange == pVM->pgm.s.aMmio2Ranges[idx].idRamRange);
3559 }
3560
3561 const char * const pszDesc = pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc;
3562#ifdef VBOX_WITH_NATIVE_NEM
3563 uint32_t const fNemFlags = NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2
3564 | (pVM->pgm.s.aMmio2Ranges[idxFirst].fFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES
3565 ? NEM_NOTIFY_PHYS_MMIO_EX_F_TRACK_DIRTY_PAGES : 0);
3566#endif
3567
3568 /*
3569 * Now, check if this falls into a regular RAM range or if we should use
3570 * the ad-hoc one.
3571 *
3572 * Note! For reasons of simplictly, we're considering the whole MMIO2 area
3573 * here rather than individual chunks.
3574 */
3575 int rc = VINF_SUCCESS;
3576 uint32_t idxInsert = UINT32_MAX;
3577 PPGMRAMRANGE const pOverlappingRange = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxInsert);
3578 if (pOverlappingRange)
3579 {
3580 /* Simplification: all within the same range. */
3581 AssertLogRelMsgReturn( GCPhys >= pOverlappingRange->GCPhys
3582 && GCPhysLast <= pOverlappingRange->GCPhysLast,
3583 ("%RGp-%RGp (MMIO2/%s) falls partly outside %RGp-%RGp (%s)\n",
3584 GCPhys, GCPhysLast, pszDesc,
3585 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
3586 VERR_PGM_RAM_CONFLICT);
3587
3588 /* Check that is isn't an ad hoc range, but a real RAM range. */
3589 AssertLogRelMsgReturn(!PGM_RAM_RANGE_IS_AD_HOC(pOverlappingRange),
3590 ("%RGp-%RGp (MMIO2/%s) mapping attempt in non-RAM range: %RGp-%RGp (%s)\n",
3591 GCPhys, GCPhysLast, pszDesc,
3592 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
3593 VERR_PGM_RAM_CONFLICT);
3594
3595 /* There can only be one MMIO2 chunk matching here! */
3596 AssertLogRelMsgReturn(cChunks == 1,
3597 ("%RGp-%RGp (MMIO2/%s) consists of %u chunks whereas the RAM (%s) somehow doesn't!\n",
3598 GCPhys, GCPhysLast, pszDesc, cChunks, pOverlappingRange->pszDesc),
3599 VERR_PGM_PHYS_MMIO_EX_IPE);
3600
3601 /* Check that it's all RAM pages. */
3602 PCPGMPAGE pPage = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
3603 uint32_t const cMmio2Pages = pVM->pgm.s.apMmio2RamRanges[idxFirst]->cb >> GUEST_PAGE_SHIFT;
3604 uint32_t cPagesLeft = cMmio2Pages;
3605 while (cPagesLeft-- > 0)
3606 {
3607 AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM,
3608 ("%RGp-%RGp (MMIO2/%s): %RGp is not a RAM page - type=%d desc=%s\n", GCPhys, GCPhysLast,
3609 pszDesc, pOverlappingRange->GCPhys, PGM_PAGE_GET_TYPE(pPage), pOverlappingRange->pszDesc),
3610 VERR_PGM_RAM_CONFLICT);
3611 pPage++;
3612 }
3613
3614#ifdef VBOX_WITH_PGM_NEM_MODE
3615 /* We cannot mix MMIO2 into a RAM range in simplified memory mode because pOverlappingRange->pbR3 can't point
3616 both at the RAM and MMIO2, so we won't ever write & read from the actual MMIO2 memory if we try. */
3617 AssertLogRelMsgReturn(!VM_IS_NEM_ENABLED(pVM),
3618 ("Putting %s at %RGp-%RGp is not possible in NEM mode because existing %RGp-%RGp (%s) mapping\n",
3619 pszDesc, GCPhys, GCPhysLast,
3620 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
3621 VERR_PGM_NOT_SUPPORTED_FOR_NEM_MODE);
3622#endif
3623
3624 /*
3625 * Make all the pages in the range MMIO/ZERO pages, freeing any
3626 * RAM pages currently mapped here. This might not be 100% correct,
3627 * but so what, we do the same from MMIO...
3628 */
3629 rc = pgmR3PhysFreePageRange(pVM, pOverlappingRange, GCPhys, GCPhysLast, NULL);
3630 AssertRCReturn(rc, rc);
3631
3632 Log(("PGMR3PhysMmio2Map: %RGp-%RGp %s - inside %RGp-%RGp %s\n", GCPhys, GCPhysLast, pszDesc,
3633 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc));
3634
3635 /*
3636 * We're all in for mapping it now. Update the MMIO2 range to reflect it.
3637 */
3638 pVM->pgm.s.aMmio2Ranges[idxFirst].GCPhys = GCPhys;
3639 pVM->pgm.s.aMmio2Ranges[idxFirst].fFlags |= PGMREGMMIO2RANGE_F_OVERLAPPING | PGMREGMMIO2RANGE_F_MAPPED;
3640
3641 /*
3642 * Replace the pages in the range.
3643 */
3644 PPGMPAGE pPageSrc = &pVM->pgm.s.apMmio2RamRanges[idxFirst]->aPages[0];
3645 PPGMPAGE pPageDst = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
3646 cPagesLeft = cMmio2Pages;
3647 while (cPagesLeft-- > 0)
3648 {
3649 Assert(PGM_PAGE_IS_MMIO(pPageDst));
3650
3651 RTHCPHYS const HCPhys = PGM_PAGE_GET_HCPHYS(pPageSrc); RT_NOREF_PV(HCPhys);
3652 uint32_t const idPage = PGM_PAGE_GET_PAGEID(pPageSrc);
3653 PGM_PAGE_SET_PAGEID(pVM, pPageDst, idPage);
3654 PGM_PAGE_SET_HCPHYS(pVM, pPageDst, HCPhys);
3655 PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO2);
3656 PGM_PAGE_SET_STATE(pVM, pPageDst, PGM_PAGE_STATE_ALLOCATED);
3657 PGM_PAGE_SET_PDE_TYPE(pVM, pPageDst, PGM_PAGE_PDE_TYPE_DONTCARE);
3658 PGM_PAGE_SET_PTE_INDEX(pVM, pPageDst, 0);
3659 PGM_PAGE_SET_TRACKING(pVM, pPageDst, 0);
3660 /* NEM state is not relevant, see VERR_PGM_NOT_SUPPORTED_FOR_NEM_MODE above. */
3661
3662 pVM->pgm.s.cZeroPages--;
3663 pPageSrc++;
3664 pPageDst++;
3665 }
3666
3667 /* Force a PGM pool flush as guest ram references have been changed. */
3668 /** @todo not entirely SMP safe; assuming for now the guest takes
3669 * care of this internally (not touch mapped mmio while changing the
3670 * mapping). */
3671 PVMCPU pVCpu = VMMGetCpu(pVM);
3672 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3673 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3674 }
3675 else
3676 {
3677 /*
3678 * No RAM range, insert the ones prepared during registration.
3679 */
3680 Log(("PGMR3PhysMmio2Map: %RGp-%RGp %s - no RAM overlap\n", GCPhys, GCPhysLast, pszDesc));
3681 RTGCPHYS GCPhysCur = GCPhys;
3682 uint32_t iChunk = 0;
3683 uint32_t idx = idxFirst;
3684 for (; iChunk < cChunks; iChunk++, idx++)
3685 {
3686 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3687 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3688 Assert(pRamRange->idRange == pMmio2->idRamRange);
3689 Assert(pMmio2->GCPhys == NIL_RTGCPHYS);
3690
3691#ifdef VBOX_WITH_NATIVE_NEM
3692 /* Tell NEM and get the new NEM state for the pages. */
3693 uint8_t u2NemState = 0;
3694 if (VM_IS_NEM_ENABLED(pVM))
3695 {
3696 rc = NEMR3NotifyPhysMmioExMapEarly(pVM, GCPhysCur, pRamRange->cb, fNemFlags, NULL /*pvRam*/, pRamRange->pbR3,
3697 &u2NemState, &pRamRange->uNemRange);
3698 AssertLogRelMsgBreak(RT_SUCCESS(rc),
3699 ("%RGp LB %RGp fFlags=%#x (%s)\n",
3700 GCPhysCur, pRamRange->cb, pMmio2->fFlags, pRamRange->pszDesc));
3701 pMmio2->fFlags |= PGMREGMMIO2RANGE_F_MAPPED; /* Set this early to indicate that NEM has been notified. */
3702 }
3703#endif
3704
3705 /* Clear the tracking data of pages we're going to reactivate. */
3706 PPGMPAGE pPageSrc = &pRamRange->aPages[0];
3707 uint32_t cPagesLeft = pRamRange->cb >> GUEST_PAGE_SHIFT;
3708 while (cPagesLeft-- > 0)
3709 {
3710 PGM_PAGE_SET_TRACKING(pVM, pPageSrc, 0);
3711 PGM_PAGE_SET_PTE_INDEX(pVM, pPageSrc, 0);
3712#ifdef VBOX_WITH_NATIVE_NEM
3713 PGM_PAGE_SET_NEM_STATE(pPageSrc, u2NemState);
3714#endif
3715 pPageSrc++;
3716 }
3717
3718 /* Insert the RAM range into the lookup table. */
3719 rc = pgmR3PhysRamRangeInsertLookup(pVM, pRamRange, GCPhysCur, &idxInsert);
3720 AssertRCBreak(rc);
3721
3722 /* Mark the range as fully mapped. */
3723 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_OVERLAPPING;
3724 pMmio2->fFlags |= PGMREGMMIO2RANGE_F_MAPPED;
3725 pMmio2->GCPhys = GCPhysCur;
3726
3727 /* Advance. */
3728 GCPhysCur += pRamRange->cb;
3729 }
3730 if (RT_FAILURE(rc))
3731 {
3732 /*
3733 * Bail out anything we've done so far.
3734 */
3735 idxInsert -= 1;
3736 do
3737 {
3738 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3739 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3740
3741#ifdef VBOX_WITH_NATIVE_NEM
3742 if ( VM_IS_NEM_ENABLED(pVM)
3743 && (pVM->pgm.s.aMmio2Ranges[idx].fFlags & PGMREGMMIO2RANGE_F_MAPPED))
3744 {
3745 uint8_t u2NemState = UINT8_MAX;
3746 NEMR3NotifyPhysMmioExUnmap(pVM, GCPhysCur, pRamRange->cb, fNemFlags, NULL, pRamRange->pbR3,
3747 &u2NemState, &pRamRange->uNemRange);
3748 if (u2NemState != UINT8_MAX)
3749 pgmPhysSetNemStateForPages(pRamRange->aPages, pRamRange->cb >> GUEST_PAGE_SHIFT, u2NemState);
3750 }
3751#endif
3752 if (pMmio2->GCPhys != NIL_RTGCPHYS)
3753 pgmR3PhysRamRangeRemoveLookup(pVM, pRamRange, &idxInsert);
3754
3755 pMmio2->GCPhys = NIL_RTGCPHYS;
3756 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_MAPPED;
3757
3758 idx--;
3759 } while (iChunk-- > 0);
3760 return rc;
3761 }
3762 }
3763
3764 /*
3765 * If the range have dirty page monitoring enabled, enable that.
3766 *
3767 * We ignore failures here for now because if we fail, the whole mapping
3768 * will have to be reversed and we'll end up with nothing at all on the
3769 * screen and a grumpy guest, whereas if we just go on, we'll only have
3770 * visual distortions to gripe about. There will be something in the
3771 * release log.
3772 */
3773 if ( pVM->pgm.s.aMmio2Ranges[idxFirst].pPhysHandlerR3
3774 && (pVM->pgm.s.aMmio2Ranges[idxFirst].fFlags & PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
3775 pgmR3PhysMmio2EnableDirtyPageTracing(pVM, idxFirst, cChunks);
3776
3777 /* Flush physical page map TLB. */
3778 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
3779
3780#ifdef VBOX_WITH_NATIVE_NEM
3781 /*
3782 * Late NEM notification (currently unused).
3783 */
3784 if (VM_IS_NEM_ENABLED(pVM))
3785 {
3786 if (pOverlappingRange)
3787 {
3788 uint8_t * const pbRam = pOverlappingRange->pbR3 ? &pOverlappingRange->pbR3[GCPhys - pOverlappingRange->GCPhys] : NULL;
3789 rc = NEMR3NotifyPhysMmioExMapLate(pVM, GCPhys, GCPhysLast - GCPhys + 1U,
3790 fNemFlags | NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE, pbRam,
3791 pVM->pgm.s.aMmio2Ranges[idxFirst].pbR3, NULL /*puNemRange*/);
3792 }
3793 else
3794 {
3795 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3796 {
3797 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3798 Assert(pVM->pgm.s.aMmio2Ranges[idx].GCPhys == pRamRange->GCPhys);
3799
3800 rc = NEMR3NotifyPhysMmioExMapLate(pVM, pRamRange->GCPhys, pRamRange->cb, fNemFlags, NULL /*pvRam*/,
3801 pRamRange->pbR3, &pRamRange->uNemRange);
3802 AssertRCBreak(rc);
3803 }
3804 }
3805 AssertLogRelRCReturnStmt(rc,
3806 PGMR3PhysMmio2Unmap(pVM, pVM->pgm.s.aMmio2Ranges[idxFirst].pDevInsR3, idxFirst + 1, GCPhys),
3807 rc);
3808 }
3809#endif
3810
3811 return VINF_SUCCESS;
3812}
3813
3814
3815/**
3816 * Maps a MMIO2 region.
3817 *
3818 * This is typically done when a guest / the bios / state loading changes the
3819 * PCI config. The replacing of base memory has the same restrictions as during
3820 * registration, of course.
3821 *
3822 * @returns VBox status code.
3823 *
3824 * @param pVM The cross context VM structure.
3825 * @param pDevIns The device instance owning the region.
3826 * @param hMmio2 The handle of the region to map.
3827 * @param GCPhys The guest-physical address to be remapped.
3828 */
3829VMMR3_INT_DECL(int) PGMR3PhysMmio2Map(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, RTGCPHYS GCPhys)
3830{
3831 /*
3832 * Validate input.
3833 */
3834 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
3835 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
3836 AssertReturn(GCPhys != NIL_RTGCPHYS, VERR_INVALID_PARAMETER);
3837 AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER);
3838 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3839 AssertReturn(hMmio2 != NIL_PGMMMIO2HANDLE, VERR_INVALID_HANDLE);
3840
3841 uint32_t cChunks = 0;
3842 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
3843 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
3844
3845 /* Gather the full range size so we can validate the mapping address properly. */
3846 RTGCPHYS cbRange = 0;
3847 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3848 cbRange += pVM->pgm.s.apMmio2RamRanges[idx]->cb;
3849
3850 RTGCPHYS const GCPhysLast = GCPhys + cbRange - 1;
3851 AssertLogRelReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
3852
3853 /*
3854 * Take the PGM lock and call worker.
3855 */
3856 int rc = PGM_LOCK(pVM);
3857 AssertRCReturn(rc, rc);
3858
3859 rc = pgmR3PhysMmio2MapLocked(pVM, idxFirst, cChunks, GCPhys, GCPhysLast);
3860#ifdef VBOX_STRICT
3861 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
3862#endif
3863
3864 PGM_UNLOCK(pVM);
3865 return rc;
3866}
3867
3868
3869/**
3870 * Worker form PGMR3PhysMmio2Map.
3871 */
3872static int pgmR3PhysMmio2UnmapLocked(PVM pVM, uint32_t const idxFirst, uint32_t const cChunks, RTGCPHYS const GCPhysIn)
3873{
3874 /*
3875 * Validate input.
3876 */
3877 RTGCPHYS cbRange = 0;
3878 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3879 {
3880 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3881 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3882 AssertReturn(pMmio2->idRamRange == pRamRange->idRange, VERR_INTERNAL_ERROR_3);
3883 AssertReturn(pMmio2->fFlags & PGMREGMMIO2RANGE_F_MAPPED, VERR_WRONG_ORDER);
3884 AssertReturn(pMmio2->GCPhys != NIL_RTGCPHYS, VERR_WRONG_ORDER);
3885 cbRange += pRamRange->cb;
3886 }
3887
3888 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
3889 PPGMRAMRANGE const pFirstRamRange = pVM->pgm.s.apMmio2RamRanges[idxFirst];
3890 const char * const pszDesc = pFirstRamRange->pszDesc;
3891 AssertLogRelMsgReturn(GCPhysIn == pFirstMmio2->GCPhys || GCPhysIn == NIL_RTGCPHYS,
3892 ("GCPhys=%RGp, actual address is %RGp\n", GCPhysIn, pFirstMmio2->GCPhys),
3893 VERR_MISMATCH);
3894 RTGCPHYS const GCPhys = pFirstMmio2->GCPhys; /* (it's always NIL_RTGCPHYS) */
3895 Log(("PGMR3PhysMmio2Unmap: %RGp-%RGp %s\n", GCPhys, GCPhys + cbRange - 1U, pszDesc));
3896
3897 uint16_t const fOldFlags = pFirstMmio2->fFlags;
3898 Assert(fOldFlags & PGMREGMMIO2RANGE_F_MAPPED);
3899
3900 /* Find the first entry in the lookup table and verify the overlapping flag. */
3901 uint32_t idxLookup = pgmR3PhysRamRangeFindOverlappingIndex(pVM, GCPhys, GCPhys + pFirstRamRange->cb - 1U);
3902 AssertLogRelMsgReturn(idxLookup < pVM->pgm.s.RamRangeUnion.cLookupEntries,
3903 ("MMIO2 range not found at %RGp LB %RGp in the lookup table! (%s)\n",
3904 GCPhys, pFirstRamRange->cb, pszDesc),
3905 VERR_INTERNAL_ERROR_2);
3906
3907 uint32_t const idLookupRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
3908 AssertLogRelReturn(idLookupRange != 0 && idLookupRange <= pVM->pgm.s.idRamRangeMax, VERR_INTERNAL_ERROR_5);
3909 PPGMRAMRANGE const pLookupRange = pVM->pgm.s.apRamRanges[idLookupRange];
3910 AssertLogRelReturn(pLookupRange, VERR_INTERNAL_ERROR_3);
3911
3912 AssertLogRelMsgReturn(fOldFlags & PGMREGMMIO2RANGE_F_OVERLAPPING
3913 ? pLookupRange != pFirstRamRange : pLookupRange == pFirstRamRange,
3914 ("MMIO2 unmap mixup at %RGp LB %RGp fl=%#x (%s) vs %RGp LB %RGp (%s)\n",
3915 GCPhys, cbRange, fOldFlags, pszDesc, pLookupRange->GCPhys, pLookupRange->cb, pLookupRange->pszDesc),
3916 VERR_INTERNAL_ERROR_4);
3917
3918 /*
3919 * If monitoring dirty pages, we must deregister the handlers first.
3920 */
3921 if ( pFirstMmio2->pPhysHandlerR3
3922 && (fOldFlags & PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
3923 pgmR3PhysMmio2DisableDirtyPageTracing(pVM, idxFirst, cChunks);
3924
3925 /*
3926 * Unmap it.
3927 */
3928 int rcRet = VINF_SUCCESS;
3929#ifdef VBOX_WITH_NATIVE_NEM
3930 uint32_t const fNemFlags = NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2
3931 | (fOldFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES
3932 ? NEM_NOTIFY_PHYS_MMIO_EX_F_TRACK_DIRTY_PAGES : 0);
3933#endif
3934 if (fOldFlags & PGMREGMMIO2RANGE_F_OVERLAPPING)
3935 {
3936 /*
3937 * We've replaced RAM, replace with zero pages.
3938 *
3939 * Note! This is where we might differ a little from a real system, because
3940 * it's likely to just show the RAM pages as they were before the
3941 * MMIO2 region was mapped here.
3942 */
3943 /* Only one chunk allowed when overlapping! */
3944 Assert(cChunks == 1);
3945 /* No NEM stuff should ever get here, see assertion in the mapping function. */
3946 AssertReturn(!VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
3947
3948 /* Restore the RAM pages we've replaced. */
3949 PPGMPAGE pPageDst = &pLookupRange->aPages[(pFirstRamRange->GCPhys - pLookupRange->GCPhys) >> GUEST_PAGE_SHIFT];
3950 uint32_t cPagesLeft = pFirstRamRange->cb >> GUEST_PAGE_SHIFT;
3951 pVM->pgm.s.cZeroPages += cPagesLeft;
3952 while (cPagesLeft-- > 0)
3953 {
3954 PGM_PAGE_INIT_ZERO(pPageDst, pVM, PGMPAGETYPE_RAM);
3955 pPageDst++;
3956 }
3957
3958 /* Update range state. */
3959 pFirstMmio2->fFlags &= ~(PGMREGMMIO2RANGE_F_OVERLAPPING | PGMREGMMIO2RANGE_F_MAPPED);
3960 pFirstMmio2->GCPhys = NIL_RTGCPHYS;
3961 Assert(pFirstRamRange->GCPhys == NIL_RTGCPHYS);
3962 Assert(pFirstRamRange->GCPhysLast == NIL_RTGCPHYS);
3963 }
3964 else
3965 {
3966 /*
3967 * Unlink the chunks related to the MMIO/MMIO2 region.
3968 */
3969 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
3970 {
3971 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
3972 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
3973 Assert(pMmio2->idRamRange == pRamRange->idRange);
3974 Assert(pMmio2->GCPhys == pRamRange->GCPhys);
3975
3976#ifdef VBOX_WITH_NATIVE_NEM
3977 if (VM_IS_NEM_ENABLED(pVM)) /* Notify NEM. */
3978 {
3979 uint8_t u2State = UINT8_MAX;
3980 int rc = NEMR3NotifyPhysMmioExUnmap(pVM, pRamRange->GCPhys, pRamRange->cb, fNemFlags,
3981 NULL, pMmio2->pbR3, &u2State, &pRamRange->uNemRange);
3982 AssertLogRelMsgStmt(RT_SUCCESS(rc),
3983 ("NEMR3NotifyPhysMmioExUnmap failed: %Rrc - GCPhys=RGp LB %RGp fNemFlags=%#x pbR3=%p %s\n",
3984 rc, pRamRange->GCPhys, pRamRange->cb, fNemFlags, pMmio2->pbR3, pRamRange->pszDesc),
3985 rcRet = rc);
3986 if (u2State != UINT8_MAX)
3987 pgmPhysSetNemStateForPages(pRamRange->aPages, pRamRange->cb >> GUEST_PAGE_SHIFT, u2State);
3988 }
3989#endif
3990
3991 int rc = pgmR3PhysRamRangeRemoveLookup(pVM, pRamRange, &idxLookup);
3992 AssertLogRelMsgStmt(RT_SUCCESS(rc),
3993 ("pgmR3PhysRamRangeRemoveLookup failed: %Rrc - GCPhys=%RGp LB %RGp %s\n",
3994 rc, pRamRange->GCPhys, pRamRange->cb, pRamRange->pszDesc),
3995 rcRet = rc);
3996
3997 pMmio2->GCPhys = NIL_RTGCPHYS;
3998 pMmio2->fFlags &= ~(PGMREGMMIO2RANGE_F_OVERLAPPING | PGMREGMMIO2RANGE_F_MAPPED);
3999 Assert(pRamRange->GCPhys == NIL_RTGCPHYS);
4000 Assert(pRamRange->GCPhysLast == NIL_RTGCPHYS);
4001 }
4002 }
4003
4004 /* Force a PGM pool flush as guest ram references have been changed. */
4005 /** @todo not entirely SMP safe; assuming for now the guest takes care
4006 * of this internally (not touch mapped mmio while changing the
4007 * mapping). */
4008 PVMCPU pVCpu = VMMGetCpu(pVM);
4009 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
4010 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4011
4012 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
4013 /* pgmPhysInvalidRamRangeTlbs(pVM); - not necessary */
4014
4015 return rcRet;
4016}
4017
4018
4019/**
4020 * Unmaps an MMIO2 region.
4021 *
4022 * This is typically done when a guest / the bios / state loading changes the
4023 * PCI config. The replacing of base memory has the same restrictions as during
4024 * registration, of course.
4025 */
4026VMMR3_INT_DECL(int) PGMR3PhysMmio2Unmap(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, RTGCPHYS GCPhys)
4027{
4028 /*
4029 * Validate input
4030 */
4031 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
4032 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
4033 AssertReturn(hMmio2 != NIL_PGMMMIO2HANDLE, VERR_INVALID_HANDLE);
4034 if (GCPhys != NIL_RTGCPHYS)
4035 {
4036 AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER);
4037 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
4038 }
4039
4040 uint32_t cChunks = 0;
4041 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4042 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4043
4044
4045 /*
4046 * Take the PGM lock and call worker.
4047 */
4048 int rc = PGM_LOCK(pVM);
4049 AssertRCReturn(rc, rc);
4050
4051 rc = pgmR3PhysMmio2UnmapLocked(pVM, idxFirst, cChunks, GCPhys);
4052#ifdef VBOX_STRICT
4053 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
4054#endif
4055
4056 PGM_UNLOCK(pVM);
4057 return rc;
4058}
4059
4060
4061/**
4062 * Reduces the mapping size of a MMIO2 region.
4063 *
4064 * This is mainly for dealing with old saved states after changing the default
4065 * size of a mapping region. See PDMDevHlpMmio2Reduce and
4066 * PDMPCIDEV::pfnRegionLoadChangeHookR3.
4067 *
4068 * The region must not currently be mapped when making this call. The VM state
4069 * must be state restore or VM construction.
4070 *
4071 * @returns VBox status code.
4072 * @param pVM The cross context VM structure.
4073 * @param pDevIns The device instance owning the region.
4074 * @param hMmio2 The handle of the region to reduce.
4075 * @param cbRegion The new mapping size.
4076 */
4077VMMR3_INT_DECL(int) PGMR3PhysMmio2Reduce(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, RTGCPHYS cbRegion)
4078{
4079 /*
4080 * Validate input
4081 */
4082 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
4083 AssertReturn(hMmio2 != NIL_PGMMMIO2HANDLE && hMmio2 != 0 && hMmio2 <= RT_ELEMENTS(pVM->pgm.s.aMmio2Ranges),
4084 VERR_INVALID_HANDLE);
4085 AssertReturn(cbRegion >= GUEST_PAGE_SIZE, VERR_INVALID_PARAMETER);
4086 AssertReturn(!(cbRegion & GUEST_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
4087
4088 PVMCPU const pVCpu = VMMGetCpu(pVM);
4089 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
4090
4091 VMSTATE const enmVmState = VMR3GetState(pVM);
4092 AssertLogRelMsgReturn( enmVmState == VMSTATE_CREATING
4093 || enmVmState == VMSTATE_LOADING,
4094 ("enmVmState=%d (%s)\n", enmVmState, VMR3GetStateName(enmVmState)),
4095 VERR_VM_INVALID_VM_STATE);
4096
4097 /*
4098 * Grab the PGM lock and validate the request properly.
4099 */
4100 int rc = PGM_LOCK(pVM);
4101 AssertRCReturn(rc, rc);
4102
4103 uint32_t cChunks = 0;
4104 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4105 if ((int32_t)idxFirst >= 0)
4106 {
4107 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
4108 PPGMRAMRANGE const pFirstRamRange = pVM->pgm.s.apMmio2RamRanges[idxFirst];
4109 if ( !(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_MAPPED)
4110 && pFirstMmio2->GCPhys == NIL_RTGCPHYS)
4111 {
4112 /*
4113 * NOTE! Current implementation does not support multiple ranges.
4114 * Implement when there is a real world need and thus a testcase.
4115 */
4116 if (cChunks == 1)
4117 {
4118 /*
4119 * The request has to be within the initial size.
4120 */
4121 if (cbRegion <= pFirstMmio2->cbReal)
4122 {
4123 /*
4124 * All we have to do is modify the size stored in the RAM range,
4125 * as it is the one used when mapping it and such.
4126 * The two page counts stored in PGMR0PERVM remain unchanged.
4127 */
4128 Log(("PGMR3PhysMmio2Reduce: %s changes from %#RGp bytes (%#RGp) to %#RGp bytes.\n",
4129 pFirstRamRange->pszDesc, pFirstRamRange->cb, pFirstMmio2->cbReal, cbRegion));
4130 pFirstRamRange->cb = cbRegion;
4131 rc = VINF_SUCCESS;
4132 }
4133 else
4134 {
4135 AssertLogRelMsgFailed(("MMIO2/%s: cbRegion=%#RGp > cbReal=%#RGp\n",
4136 pFirstRamRange->pszDesc, cbRegion, pFirstMmio2->cbReal));
4137 rc = VERR_OUT_OF_RANGE;
4138 }
4139 }
4140 else
4141 {
4142 AssertLogRelMsgFailed(("MMIO2/%s: more than one chunk: %d (flags=%#x)\n",
4143 pFirstRamRange->pszDesc, cChunks, pFirstMmio2->fFlags));
4144 rc = VERR_NOT_SUPPORTED;
4145 }
4146 }
4147 else
4148 {
4149 AssertLogRelMsgFailed(("MMIO2/%s: cannot change size of mapped range: %RGp..%RGp\n", pFirstRamRange->pszDesc,
4150 pFirstMmio2->GCPhys, pFirstMmio2->GCPhys + pFirstRamRange->cb - 1U));
4151 rc = VERR_WRONG_ORDER;
4152 }
4153 }
4154 else
4155 rc = (int32_t)idxFirst;
4156
4157 PGM_UNLOCK(pVM);
4158 return rc;
4159}
4160
4161
4162/**
4163 * Validates @a hMmio2, making sure it belongs to @a pDevIns.
4164 *
4165 * @returns VBox status code.
4166 * @param pVM The cross context VM structure.
4167 * @param pDevIns The device which allegedly owns @a hMmio2.
4168 * @param hMmio2 The handle to validate.
4169 */
4170VMMR3_INT_DECL(int) PGMR3PhysMmio2ValidateHandle(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
4171{
4172 /*
4173 * Validate input
4174 */
4175 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
4176 AssertPtrReturn(pDevIns, VERR_INVALID_POINTER);
4177
4178 /*
4179 * Just do this the simple way.
4180 */
4181 int rc = PGM_LOCK_VOID(pVM);
4182 AssertRCReturn(rc, rc);
4183 uint32_t cChunks;
4184 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4185 PGM_UNLOCK(pVM);
4186 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4187 return VINF_SUCCESS;
4188}
4189
4190
4191/**
4192 * Gets the mapping address of an MMIO2 region.
4193 *
4194 * @returns Mapping address, NIL_RTGCPHYS if not mapped or invalid handle.
4195 *
4196 * @param pVM The cross context VM structure.
4197 * @param pDevIns The device owning the MMIO2 handle.
4198 * @param hMmio2 The region handle.
4199 */
4200VMMR3_INT_DECL(RTGCPHYS) PGMR3PhysMmio2GetMappingAddress(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
4201{
4202 RTGCPHYS GCPhysRet = NIL_RTGCPHYS;
4203
4204 int rc = PGM_LOCK_VOID(pVM);
4205 AssertRCReturn(rc, NIL_RTGCPHYS);
4206
4207 uint32_t cChunks;
4208 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4209 if ((int32_t)idxFirst >= 0)
4210 GCPhysRet = pVM->pgm.s.aMmio2Ranges[idxFirst].GCPhys;
4211
4212 PGM_UNLOCK(pVM);
4213 return GCPhysRet;
4214}
4215
4216
4217/**
4218 * Worker for PGMR3PhysMmio2QueryAndResetDirtyBitmap.
4219 *
4220 * Called holding the PGM lock.
4221 */
4222static int pgmR3PhysMmio2QueryAndResetDirtyBitmapLocked(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
4223 void *pvBitmap, size_t cbBitmap)
4224{
4225 /*
4226 * Continue validation.
4227 */
4228 uint32_t cChunks;
4229 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4230 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4231 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
4232 AssertReturn(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES, VERR_INVALID_FUNCTION);
4233
4234 int rc = VINF_SUCCESS;
4235 if (cbBitmap || pvBitmap)
4236 {
4237 /*
4238 * Check the bitmap size and collect all the dirty flags.
4239 */
4240 RTGCPHYS cbTotal = 0;
4241 uint16_t fTotalDirty = 0;
4242 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4243 {
4244 /* Not using cbReal here, because NEM is not in on the creating, only the mapping. */
4245 cbTotal += pVM->pgm.s.apMmio2RamRanges[idx]->cb;
4246 fTotalDirty |= pVM->pgm.s.aMmio2Ranges[idx].fFlags;
4247 }
4248 size_t const cbTotalBitmap = RT_ALIGN_T(cbTotal, GUEST_PAGE_SIZE * 64, RTGCPHYS) / GUEST_PAGE_SIZE / 8;
4249
4250 AssertPtrReturn(pvBitmap, VERR_INVALID_POINTER);
4251 AssertReturn(RT_ALIGN_P(pvBitmap, sizeof(uint64_t)) == pvBitmap, VERR_INVALID_POINTER);
4252 AssertReturn(cbBitmap == cbTotalBitmap, VERR_INVALID_PARAMETER);
4253
4254#ifdef VBOX_WITH_PGM_NEM_MODE
4255 /*
4256 * If there is no physical handler we must be in NEM mode and NEM
4257 * taking care of the dirty bit collecting.
4258 */
4259 if (pFirstMmio2->pPhysHandlerR3 == NULL)
4260 {
4261/** @todo This does not integrate at all with --execute-all-in-iem, leaving the
4262 * screen blank when using it together with --driverless. Fixing this won't be
4263 * entirely easy as we take the PGM_PAGE_HNDL_PHYS_STATE_DISABLED page status to
4264 * mean a dirty page. */
4265 AssertReturn(VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
4266 uint8_t *pbBitmap = (uint8_t *)pvBitmap;
4267 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4268 {
4269 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
4270 size_t const cbBitmapChunk = (pRamRange->cb / GUEST_PAGE_SIZE + 7) / 8;
4271 Assert((RTGCPHYS)cbBitmapChunk * GUEST_PAGE_SIZE * 8 == pRamRange->cb);
4272 Assert(pRamRange->GCPhys == pVM->pgm.s.aMmio2Ranges[idx].GCPhys); /* (No MMIO2 inside RAM in NEM mode!)*/
4273 int rc2 = NEMR3PhysMmio2QueryAndResetDirtyBitmap(pVM, pRamRange->GCPhys, pRamRange->cb,
4274 pRamRange->uNemRange, pbBitmap, cbBitmapChunk);
4275 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4276 rc = rc2;
4277 pbBitmap += pRamRange->cb / GUEST_PAGE_SIZE / 8;
4278 }
4279 }
4280 else
4281#endif
4282 if (fTotalDirty & PGMREGMMIO2RANGE_F_IS_DIRTY)
4283 {
4284 if ( (pFirstMmio2->fFlags & (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4285 == (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4286 {
4287 /*
4288 * Reset each chunk, gathering dirty bits.
4289 */
4290 RT_BZERO(pvBitmap, cbBitmap); /* simpler for now. */
4291 for (uint32_t iChunk = 0, idx = idxFirst, iPageNo = 0; iChunk < cChunks; iChunk++, idx++)
4292 {
4293 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
4294 if (pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_DIRTY)
4295 {
4296 int rc2 = pgmHandlerPhysicalResetMmio2WithBitmap(pVM, pMmio2->GCPhys, pvBitmap, iPageNo);
4297 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4298 rc = rc2;
4299 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_IS_DIRTY;
4300 }
4301 iPageNo += pVM->pgm.s.apMmio2RamRanges[idx]->cb >> GUEST_PAGE_SHIFT;
4302 }
4303 }
4304 else
4305 {
4306 /*
4307 * If not mapped or tracking is disabled, we return the
4308 * PGMREGMMIO2RANGE_F_IS_DIRTY status for all pages. We cannot
4309 * get more accurate data than that after unmapping or disabling.
4310 */
4311 RT_BZERO(pvBitmap, cbBitmap);
4312 for (uint32_t iChunk = 0, idx = idxFirst, iPageNo = 0; iChunk < cChunks; iChunk++, idx++)
4313 {
4314 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
4315 PPGMREGMMIO2RANGE const pMmio2 = &pVM->pgm.s.aMmio2Ranges[idx];
4316 if (pMmio2->fFlags & PGMREGMMIO2RANGE_F_IS_DIRTY)
4317 {
4318 ASMBitSetRange(pvBitmap, iPageNo, iPageNo + (pRamRange->cb >> GUEST_PAGE_SHIFT));
4319 pMmio2->fFlags &= ~PGMREGMMIO2RANGE_F_IS_DIRTY;
4320 }
4321 iPageNo += pRamRange->cb >> GUEST_PAGE_SHIFT;
4322 }
4323 }
4324 }
4325 /*
4326 * No dirty chunks.
4327 */
4328 else
4329 RT_BZERO(pvBitmap, cbBitmap);
4330 }
4331 /*
4332 * No bitmap. Reset the region if tracking is currently enabled.
4333 */
4334 else if ( (pFirstMmio2->fFlags & (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4335 == (PGMREGMMIO2RANGE_F_MAPPED | PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4336 {
4337#ifdef VBOX_WITH_PGM_NEM_MODE
4338 if (pFirstMmio2->pPhysHandlerR3 == NULL)
4339 {
4340 AssertReturn(VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
4341 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4342 {
4343 PPGMRAMRANGE const pRamRange = pVM->pgm.s.apMmio2RamRanges[idx];
4344 Assert(pRamRange->GCPhys == pVM->pgm.s.aMmio2Ranges[idx].GCPhys); /* (No MMIO2 inside RAM in NEM mode!)*/
4345 int rc2 = NEMR3PhysMmio2QueryAndResetDirtyBitmap(pVM, pRamRange->GCPhys, pRamRange->cb,
4346 pRamRange->uNemRange, NULL, 0);
4347 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4348 rc = rc2;
4349 }
4350 }
4351 else
4352#endif
4353 {
4354 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4355 {
4356 pVM->pgm.s.aMmio2Ranges[idx].fFlags &= ~PGMREGMMIO2RANGE_F_IS_DIRTY;
4357 int rc2 = PGMHandlerPhysicalReset(pVM, pVM->pgm.s.aMmio2Ranges[idx].GCPhys);
4358 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4359 rc = rc2;
4360 }
4361 }
4362 }
4363
4364 return rc;
4365}
4366
4367
4368/**
4369 * Queries the dirty page bitmap and resets the monitoring.
4370 *
4371 * The PGMPHYS_MMIO2_FLAGS_TRACK_DIRTY_PAGES flag must be specified when
4372 * creating the range for this to work.
4373 *
4374 * @returns VBox status code.
4375 * @retval VERR_INVALID_FUNCTION if not created using
4376 * PGMPHYS_MMIO2_FLAGS_TRACK_DIRTY_PAGES.
4377 * @param pVM The cross context VM structure.
4378 * @param pDevIns The device owning the MMIO2 handle.
4379 * @param hMmio2 The region handle.
4380 * @param pvBitmap The output bitmap. Must be 8-byte aligned. Ignored
4381 * when @a cbBitmap is zero.
4382 * @param cbBitmap The size of the bitmap. Must be the size of the whole
4383 * MMIO2 range, rounded up to the nearest 8 bytes.
4384 * When zero only a reset is done.
4385 */
4386VMMR3_INT_DECL(int) PGMR3PhysMmio2QueryAndResetDirtyBitmap(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
4387 void *pvBitmap, size_t cbBitmap)
4388{
4389 /*
4390 * Do some basic validation before grapping the PGM lock and continuing.
4391 */
4392 AssertPtrReturn(pDevIns, VERR_INVALID_POINTER);
4393 AssertReturn(RT_ALIGN_Z(cbBitmap, sizeof(uint64_t)) == cbBitmap, VERR_INVALID_PARAMETER);
4394 int rc = PGM_LOCK(pVM);
4395 if (RT_SUCCESS(rc))
4396 {
4397 STAM_PROFILE_START(&pVM->pgm.s.StatMmio2QueryAndResetDirtyBitmap, a);
4398 rc = pgmR3PhysMmio2QueryAndResetDirtyBitmapLocked(pVM, pDevIns, hMmio2, pvBitmap, cbBitmap);
4399 STAM_PROFILE_STOP(&pVM->pgm.s.StatMmio2QueryAndResetDirtyBitmap, a);
4400 PGM_UNLOCK(pVM);
4401 }
4402 return rc;
4403}
4404
4405
4406/**
4407 * Worker for PGMR3PhysMmio2ControlDirtyPageTracking
4408 *
4409 * Called owning the PGM lock.
4410 */
4411static int pgmR3PhysMmio2ControlDirtyPageTrackingLocked(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, bool fEnabled)
4412{
4413 /*
4414 * Continue validation.
4415 */
4416 uint32_t cChunks;
4417 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4418 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
4419 PPGMREGMMIO2RANGE const pFirstMmio2 = &pVM->pgm.s.aMmio2Ranges[idxFirst];
4420 AssertReturn(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_TRACK_DIRTY_PAGES, VERR_INVALID_FUNCTION);
4421
4422#ifdef VBOX_WITH_PGM_NEM_MODE
4423 /*
4424 * This is a nop if NEM is responsible for doing the tracking, we simply
4425 * leave the tracking on all the time there.
4426 */
4427 if (pFirstMmio2->pPhysHandlerR3 == NULL)
4428 {
4429 AssertReturn(VM_IS_NEM_ENABLED(pVM), VERR_INTERNAL_ERROR_4);
4430 return VINF_SUCCESS;
4431 }
4432#endif
4433
4434 /*
4435 * Anything needing doing?
4436 */
4437 if (fEnabled != RT_BOOL(pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_TRACKING_ENABLED))
4438 {
4439 LogFlowFunc(("fEnabled=%RTbool %s\n", fEnabled, pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc));
4440
4441 /*
4442 * Update the PGMREGMMIO2RANGE_F_TRACKING_ENABLED flag.
4443 */
4444 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4445 if (fEnabled)
4446 pVM->pgm.s.aMmio2Ranges[idx].fFlags |= PGMREGMMIO2RANGE_F_TRACKING_ENABLED;
4447 else
4448 pVM->pgm.s.aMmio2Ranges[idx].fFlags &= ~PGMREGMMIO2RANGE_F_TRACKING_ENABLED;
4449
4450 /*
4451 * Enable/disable handlers if currently mapped.
4452 *
4453 * We ignore status codes here as we've already changed the flags and
4454 * returning a failure status now would be confusing. Besides, the two
4455 * functions will continue past failures. As argued in the mapping code,
4456 * it's in the release log.
4457 */
4458 if (pFirstMmio2->fFlags & PGMREGMMIO2RANGE_F_MAPPED)
4459 {
4460 if (fEnabled)
4461 pgmR3PhysMmio2EnableDirtyPageTracing(pVM, idxFirst, cChunks);
4462 else
4463 pgmR3PhysMmio2DisableDirtyPageTracing(pVM, idxFirst, cChunks);
4464 }
4465 }
4466 else
4467 LogFlowFunc(("fEnabled=%RTbool %s - no change\n", fEnabled, pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc));
4468
4469 return VINF_SUCCESS;
4470}
4471
4472
4473/**
4474 * Controls the dirty page tracking for an MMIO2 range.
4475 *
4476 * @returns VBox status code.
4477 * @param pVM The cross context VM structure.
4478 * @param pDevIns The device owning the MMIO2 memory.
4479 * @param hMmio2 The handle of the region.
4480 * @param fEnabled The new tracking state.
4481 */
4482VMMR3_INT_DECL(int) PGMR3PhysMmio2ControlDirtyPageTracking(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, bool fEnabled)
4483{
4484 /*
4485 * Do some basic validation before grapping the PGM lock and continuing.
4486 */
4487 AssertPtrReturn(pDevIns, VERR_INVALID_POINTER);
4488 int rc = PGM_LOCK(pVM);
4489 if (RT_SUCCESS(rc))
4490 {
4491 rc = pgmR3PhysMmio2ControlDirtyPageTrackingLocked(pVM, pDevIns, hMmio2, fEnabled);
4492 PGM_UNLOCK(pVM);
4493 }
4494 return rc;
4495}
4496
4497
4498/**
4499 * Changes the region number of an MMIO2 region.
4500 *
4501 * This is only for dealing with save state issues, nothing else.
4502 *
4503 * @return VBox status code.
4504 *
4505 * @param pVM The cross context VM structure.
4506 * @param pDevIns The device owning the MMIO2 memory.
4507 * @param hMmio2 The handle of the region.
4508 * @param iNewRegion The new region index.
4509 *
4510 * @thread EMT(0)
4511 * @sa @bugref{9359}
4512 */
4513VMMR3_INT_DECL(int) PGMR3PhysMmio2ChangeRegionNo(PVM pVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2, uint32_t iNewRegion)
4514{
4515 /*
4516 * Validate input.
4517 */
4518 VM_ASSERT_EMT0_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
4519 VM_ASSERT_STATE_RETURN(pVM, VMSTATE_LOADING, VERR_VM_INVALID_VM_STATE);
4520 AssertReturn(iNewRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
4521
4522 int rc = PGM_LOCK(pVM);
4523 AssertRCReturn(rc, rc);
4524
4525 /* Validate and resolve the handle. */
4526 uint32_t cChunks;
4527 uint32_t const idxFirst = pgmR3PhysMmio2ResolveHandle(pVM, pDevIns, hMmio2, &cChunks);
4528 if ((int32_t)idxFirst >= 0)
4529 {
4530 /* Check that the new range number is unused. */
4531 PPGMREGMMIO2RANGE const pConflict = pgmR3PhysMmio2Find(pVM, pDevIns, pVM->pgm.s.aMmio2Ranges[idxFirst].iSubDev,
4532 iNewRegion);
4533 if (!pConflict)
4534 {
4535 /*
4536 * Make the change.
4537 */
4538 for (uint32_t iChunk = 0, idx = idxFirst; iChunk < cChunks; iChunk++, idx++)
4539 pVM->pgm.s.aMmio2Ranges[idx].iRegion = (uint8_t)iNewRegion;
4540 rc = VINF_SUCCESS;
4541 }
4542 else
4543 {
4544 AssertLogRelMsgFailed(("MMIO2/%s: iNewRegion=%d conflicts with %s\n", pVM->pgm.s.apMmio2RamRanges[idxFirst]->pszDesc,
4545 iNewRegion, pVM->pgm.s.apMmio2RamRanges[pConflict->idRamRange]->pszDesc));
4546 rc = VERR_RESOURCE_IN_USE;
4547 }
4548 }
4549 else
4550 rc = (int32_t)idxFirst;
4551
4552 PGM_UNLOCK(pVM);
4553 return rc;
4554}
4555
4556
4557
4558/*********************************************************************************************************************************
4559* ROM *
4560*********************************************************************************************************************************/
4561
4562/**
4563 * Worker for PGMR3PhysRomRegister.
4564 *
4565 * This is here to simplify lock management, i.e. the caller does all the
4566 * locking and we can simply return without needing to remember to unlock
4567 * anything first.
4568 *
4569 * @returns VBox status code.
4570 * @param pVM The cross context VM structure.
4571 * @param pDevIns The device instance owning the ROM.
4572 * @param GCPhys First physical address in the range.
4573 * Must be page aligned!
4574 * @param cb The size of the range (in bytes).
4575 * Must be page aligned!
4576 * @param pvBinary Pointer to the binary data backing the ROM image.
4577 * @param cbBinary The size of the binary data pvBinary points to.
4578 * This must be less or equal to @a cb.
4579 * @param fFlags Mask of flags. PGMPHYS_ROM_FLAGS_SHADOWED
4580 * and/or PGMPHYS_ROM_FLAGS_PERMANENT_BINARY.
4581 * @param pszDesc Pointer to description string. This must not be freed.
4582 */
4583static int pgmR3PhysRomRegisterLocked(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb,
4584 const void *pvBinary, uint32_t cbBinary, uint8_t fFlags, const char *pszDesc)
4585{
4586 /*
4587 * Validate input.
4588 */
4589 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
4590 AssertReturn(RT_ALIGN_T(GCPhys, GUEST_PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER);
4591 AssertReturn(RT_ALIGN_T(cb, GUEST_PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER);
4592 RTGCPHYS const GCPhysLast = GCPhys + (cb - 1);
4593 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
4594 AssertPtrReturn(pvBinary, VERR_INVALID_PARAMETER);
4595 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
4596 AssertReturn(!(fFlags & ~PGMPHYS_ROM_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
4597
4598 PVMCPU const pVCpu = VMMGetCpu(pVM);
4599 AssertReturn(pVCpu && pVCpu->idCpu == 0, VERR_VM_THREAD_NOT_EMT);
4600 VM_ASSERT_STATE_RETURN(pVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
4601
4602 const uint32_t cGuestPages = cb >> GUEST_PAGE_SHIFT;
4603 AssertReturn(cGuestPages <= PGM_MAX_PAGES_PER_ROM_RANGE, VERR_OUT_OF_RANGE);
4604
4605#ifdef VBOX_WITH_PGM_NEM_MODE
4606 const uint32_t cHostPages = RT_ALIGN_T(cb, HOST_PAGE_SIZE, RTGCPHYS) >> HOST_PAGE_SHIFT;
4607#endif
4608
4609 /*
4610 * Make sure we've got a free ROM range.
4611 */
4612 uint8_t const idRomRange = pVM->pgm.s.cRomRanges;
4613 AssertLogRelReturn(idRomRange < RT_ELEMENTS(pVM->pgm.s.apRomRanges), VERR_PGM_TOO_MANY_ROM_RANGES);
4614
4615 /*
4616 * Look thru the existing ROM range and make sure there aren't any
4617 * overlapping registration.
4618 */
4619 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
4620 for (uint32_t idx = 0; idx < cRomRanges; idx++)
4621 {
4622 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
4623 AssertLogRelMsgReturn( GCPhys > pRom->GCPhysLast
4624 || GCPhysLast < pRom->GCPhys,
4625 ("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n",
4626 GCPhys, GCPhysLast, pszDesc,
4627 pRom->GCPhys, pRom->GCPhysLast, pRom->pszDesc),
4628 VERR_PGM_RAM_CONFLICT);
4629 }
4630
4631 /*
4632 * Find the RAM location and check for conflicts.
4633 *
4634 * Conflict detection is a bit different than for RAM registration since a
4635 * ROM can be located within a RAM range. So, what we have to check for is
4636 * other memory types (other than RAM that is) and that we don't span more
4637 * than one RAM range (lazy).
4638 */
4639 uint32_t idxInsert = UINT32_MAX;
4640 PPGMRAMRANGE const pOverlappingRange = pgmR3PhysRamRangeFindOverlapping(pVM, GCPhys, GCPhysLast, &idxInsert);
4641 if (pOverlappingRange)
4642 {
4643 /* completely within? */
4644 AssertLogRelMsgReturn( GCPhys >= pOverlappingRange->GCPhys
4645 && GCPhysLast <= pOverlappingRange->GCPhysLast,
4646 ("%RGp-%RGp (%s) falls partly outside %RGp-%RGp (%s)\n",
4647 GCPhys, GCPhysLast, pszDesc,
4648 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
4649 VERR_PGM_RAM_CONFLICT);
4650
4651 /* Check that is isn't an ad hoc range, but a real RAM range. */
4652 AssertLogRelMsgReturn(!PGM_RAM_RANGE_IS_AD_HOC(pOverlappingRange),
4653 ("%RGp-%RGp (ROM/%s) mapping attempt in non-RAM range: %RGp-%RGp (%s)\n",
4654 GCPhys, GCPhysLast, pszDesc,
4655 pOverlappingRange->GCPhys, pOverlappingRange->GCPhysLast, pOverlappingRange->pszDesc),
4656 VERR_PGM_RAM_CONFLICT);
4657
4658 /* All the pages must be RAM pages. */
4659 PPGMPAGE pPage = &pOverlappingRange->aPages[(GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT];
4660 uint32_t cPagesLeft = cGuestPages;
4661 while (cPagesLeft-- > 0)
4662 {
4663 AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM,
4664 ("%RGp (%R[pgmpage]) isn't a RAM page - registering %RGp-%RGp (%s).\n",
4665 GCPhys + ((RTGCPHYS)cPagesLeft << GUEST_PAGE_SHIFT), pPage, GCPhys, GCPhysLast, pszDesc),
4666 VERR_PGM_RAM_CONFLICT);
4667 AssertLogRelMsgReturn(PGM_PAGE_IS_ZERO(pPage) || PGM_IS_IN_NEM_MODE(pVM),
4668 ("%RGp (%R[pgmpage]) is not a ZERO page - registering %RGp-%RGp (%s).\n",
4669 GCPhys + ((RTGCPHYS)cPagesLeft << GUEST_PAGE_SHIFT), pPage, GCPhys, GCPhysLast, pszDesc),
4670 VERR_PGM_UNEXPECTED_PAGE_STATE);
4671 pPage++;
4672 }
4673 }
4674
4675 /*
4676 * Update the base memory reservation if necessary.
4677 */
4678 uint32_t const cExtraBaseCost = (pOverlappingRange ? 0 : cGuestPages)
4679 + (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED ? cGuestPages : 0);
4680 if (cExtraBaseCost)
4681 {
4682 int rc = MMR3IncreaseBaseReservation(pVM, cExtraBaseCost);
4683 AssertRCReturn(rc, rc);
4684 }
4685
4686#ifdef VBOX_WITH_NATIVE_NEM
4687 /*
4688 * Early NEM notification before we've made any changes or anything.
4689 */
4690 uint32_t const fNemNotify = (pOverlappingRange ? NEM_NOTIFY_PHYS_ROM_F_REPLACE : 0)
4691 | (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED ? NEM_NOTIFY_PHYS_ROM_F_SHADOW : 0);
4692 uint8_t u2NemState = UINT8_MAX;
4693 uint32_t uNemRange = 0;
4694 if (VM_IS_NEM_ENABLED(pVM))
4695 {
4696 int rc = NEMR3NotifyPhysRomRegisterEarly(pVM, GCPhys, cGuestPages << GUEST_PAGE_SHIFT,
4697 pOverlappingRange
4698 ? PGM_RAMRANGE_CALC_PAGE_R3PTR(pOverlappingRange, GCPhys) : NULL,
4699 fNemNotify, &u2NemState,
4700 pOverlappingRange ? &pOverlappingRange->uNemRange : &uNemRange);
4701 AssertLogRelRCReturn(rc, rc);
4702 }
4703#endif
4704
4705 /*
4706 * Allocate memory for the virgin copy of the RAM. In simplified memory
4707 * mode, we allocate memory for any ad-hoc RAM range and for shadow pages.
4708 */
4709 int rc;
4710 PGMMALLOCATEPAGESREQ pReq = NULL;
4711#ifdef VBOX_WITH_PGM_NEM_MODE
4712 void *pvRam = NULL;
4713 void *pvAlt = NULL;
4714 if (PGM_IS_IN_NEM_MODE(pVM))
4715 {
4716 if (!pOverlappingRange)
4717 {
4718 rc = SUPR3PageAlloc(cHostPages, 0, &pvRam);
4719 if (RT_FAILURE(rc))
4720 return rc;
4721 }
4722 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
4723 {
4724 rc = SUPR3PageAlloc(cHostPages, 0, &pvAlt);
4725 if (RT_FAILURE(rc))
4726 {
4727 if (pvRam)
4728 SUPR3PageFree(pvRam, cHostPages);
4729 return rc;
4730 }
4731 }
4732 }
4733 else
4734#endif
4735 {
4736#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
4737 rc = GMMR3AllocatePagesPrepare(pVM, &pReq, cGuestPages, GMMACCOUNT_BASE);
4738 AssertRCReturn(rc, rc);
4739
4740 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++)
4741 {
4742 pReq->aPages[iPage].HCPhysGCPhys = GCPhys + (iPage << GUEST_PAGE_SHIFT);
4743 pReq->aPages[iPage].fZeroed = false;
4744 pReq->aPages[iPage].idPage = NIL_GMM_PAGEID;
4745 pReq->aPages[iPage].idSharedPage = NIL_GMM_PAGEID;
4746 }
4747
4748 rc = GMMR3AllocatePagesPerform(pVM, pReq);
4749 if (RT_FAILURE(rc))
4750 {
4751 GMMR3AllocatePagesCleanup(pReq);
4752 return rc;
4753 }
4754#endif
4755 }
4756
4757 /*
4758 * Allocate a RAM range if required.
4759 * Note! We don't clean up the RAM range here on failure, VM destruction does that.
4760 */
4761 rc = VINF_SUCCESS;
4762 PPGMRAMRANGE pRamRange = NULL;
4763 if (!pOverlappingRange)
4764 rc = pgmR3PhysAllocateRamRange(pVM, pVCpu, cGuestPages, PGM_RAM_RANGE_FLAGS_AD_HOC_ROM, &pRamRange);
4765 if (RT_SUCCESS(rc))
4766 {
4767 /*
4768 * Allocate a ROM range.
4769 * Note! We don't clean up the ROM range here on failure, VM destruction does that.
4770 */
4771#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
4772 if (!SUPR3IsDriverless())
4773 {
4774 PGMPHYSROMALLOCATERANGEREQ RomRangeReq;
4775 RomRangeReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
4776 RomRangeReq.Hdr.cbReq = sizeof(RomRangeReq);
4777 RomRangeReq.cbGuestPage = GUEST_PAGE_SIZE;
4778 RomRangeReq.cGuestPages = cGuestPages;
4779 RomRangeReq.idRomRange = idRomRange;
4780 RomRangeReq.fFlags = fFlags;
4781 rc = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_PGM_PHYS_ROM_ALLOCATE_RANGE, 0 /*u64Arg*/, &RomRangeReq.Hdr);
4782 }
4783 else
4784#endif
4785 rc = pgmPhysRomRangeAllocCommon(pVM, cGuestPages, idRomRange, fFlags);
4786 }
4787 if (RT_SUCCESS(rc))
4788 {
4789 /*
4790 * Initialize and map the RAM range (if required).
4791 */
4792 PPGMROMRANGE const pRomRange = pVM->pgm.s.apRomRanges[idRomRange];
4793 AssertPtr(pRomRange);
4794 uint32_t const idxFirstRamPage = pOverlappingRange ? (GCPhys - pOverlappingRange->GCPhys) >> GUEST_PAGE_SHIFT : 0;
4795 PPGMROMPAGE pRomPage = &pRomRange->aPages[0];
4796 if (!pOverlappingRange)
4797 {
4798 /* Initialize the new RAM range and insert it into the lookup table. */
4799 pRamRange->pszDesc = pszDesc;
4800#ifdef VBOX_WITH_NATIVE_NEM
4801 pRamRange->uNemRange = uNemRange;
4802#endif
4803
4804 PPGMPAGE pRamPage = &pRamRange->aPages[idxFirstRamPage];
4805#ifdef VBOX_WITH_PGM_NEM_MODE
4806 if (PGM_IS_IN_NEM_MODE(pVM))
4807 {
4808 AssertPtr(pvRam); Assert(pReq == NULL);
4809 pRamRange->pbR3 = (uint8_t *)pvRam;
4810 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4811 {
4812 PGM_PAGE_INIT(pRamPage, UINT64_C(0x0000fffffffff000), NIL_GMM_PAGEID,
4813 PGMPAGETYPE_ROM, PGM_PAGE_STATE_ALLOCATED);
4814 pRomPage->Virgin = *pRamPage;
4815 }
4816 }
4817 else
4818#endif
4819 {
4820#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
4821 Assert(!pRamRange->pbR3); Assert(!pvRam);
4822 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4823 {
4824 PGM_PAGE_INIT(pRamPage,
4825 pReq->aPages[iPage].HCPhysGCPhys,
4826 pReq->aPages[iPage].idPage,
4827 PGMPAGETYPE_ROM,
4828 PGM_PAGE_STATE_ALLOCATED);
4829
4830 pRomPage->Virgin = *pRamPage;
4831 }
4832#endif
4833 }
4834
4835 pVM->pgm.s.cAllPages += cGuestPages;
4836 pVM->pgm.s.cPrivatePages += cGuestPages;
4837
4838 rc = pgmR3PhysRamRangeInsertLookup(pVM, pRamRange, GCPhys, &idxInsert);
4839 }
4840 else
4841 {
4842 /* Insert the ROM into an existing RAM range. */
4843 PPGMPAGE pRamPage = &pOverlappingRange->aPages[idxFirstRamPage];
4844#ifdef VBOX_WITH_PGM_NEM_MODE
4845 if (PGM_IS_IN_NEM_MODE(pVM))
4846 {
4847 Assert(pvRam == NULL); Assert(pReq == NULL);
4848 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4849 {
4850 Assert(PGM_PAGE_GET_HCPHYS(pRamPage) == UINT64_C(0x0000fffffffff000) || PGM_PAGE_GET_HCPHYS(pRamPage) == 0);
4851 Assert(PGM_PAGE_GET_PAGEID(pRamPage) == NIL_GMM_PAGEID);
4852 Assert(PGM_PAGE_GET_STATE(pRamPage) == PGM_PAGE_STATE_ALLOCATED);
4853 PGM_PAGE_SET_TYPE(pVM, pRamPage, PGMPAGETYPE_ROM);
4854 PGM_PAGE_SET_STATE(pVM, pRamPage, PGM_PAGE_STATE_ALLOCATED);
4855 PGM_PAGE_SET_PDE_TYPE(pVM, pRamPage, PGM_PAGE_PDE_TYPE_DONTCARE);
4856 PGM_PAGE_SET_PTE_INDEX(pVM, pRamPage, 0);
4857 PGM_PAGE_SET_TRACKING(pVM, pRamPage, 0);
4858
4859 pRomPage->Virgin = *pRamPage;
4860 }
4861 }
4862 else
4863#endif
4864 {
4865#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
4866 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
4867 {
4868 PGM_PAGE_SET_TYPE(pVM, pRamPage, PGMPAGETYPE_ROM);
4869 PGM_PAGE_SET_HCPHYS(pVM, pRamPage, pReq->aPages[iPage].HCPhysGCPhys);
4870 PGM_PAGE_SET_STATE(pVM, pRamPage, PGM_PAGE_STATE_ALLOCATED);
4871 PGM_PAGE_SET_PAGEID(pVM, pRamPage, pReq->aPages[iPage].idPage);
4872 PGM_PAGE_SET_PDE_TYPE(pVM, pRamPage, PGM_PAGE_PDE_TYPE_DONTCARE);
4873 PGM_PAGE_SET_PTE_INDEX(pVM, pRamPage, 0);
4874 PGM_PAGE_SET_TRACKING(pVM, pRamPage, 0);
4875
4876 pRomPage->Virgin = *pRamPage;
4877 }
4878 pVM->pgm.s.cZeroPages -= cGuestPages;
4879 pVM->pgm.s.cPrivatePages += cGuestPages;
4880#endif
4881 }
4882 pRamRange = pOverlappingRange;
4883 }
4884
4885 if (RT_SUCCESS(rc))
4886 {
4887#ifdef VBOX_WITH_NATIVE_NEM
4888 /* Set the NEM state of the pages if needed. */
4889 if (u2NemState != UINT8_MAX)
4890 pgmPhysSetNemStateForPages(&pRamRange->aPages[idxFirstRamPage], cGuestPages, u2NemState);
4891#endif
4892
4893 /* Flush physical page map TLB. */
4894 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
4895
4896 /*
4897 * Register the ROM access handler.
4898 */
4899 rc = PGMHandlerPhysicalRegister(pVM, GCPhys, GCPhysLast, pVM->pgm.s.hRomPhysHandlerType, idRomRange, pszDesc);
4900 if (RT_SUCCESS(rc))
4901 {
4902 /*
4903 * Copy the image over to the virgin pages.
4904 * This must be done after linking in the RAM range.
4905 */
4906 size_t cbBinaryLeft = cbBinary;
4907 PPGMPAGE pRamPage = &pRamRange->aPages[idxFirstRamPage];
4908 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++)
4909 {
4910 void *pvDstPage;
4911 rc = pgmPhysPageMap(pVM, pRamPage, GCPhys + (iPage << GUEST_PAGE_SHIFT), &pvDstPage);
4912 if (RT_FAILURE(rc))
4913 {
4914 VMSetError(pVM, rc, RT_SRC_POS, "Failed to map virgin ROM page at %RGp", GCPhys);
4915 break;
4916 }
4917 if (cbBinaryLeft >= GUEST_PAGE_SIZE)
4918 {
4919 memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << GUEST_PAGE_SHIFT), GUEST_PAGE_SIZE);
4920 cbBinaryLeft -= GUEST_PAGE_SIZE;
4921 }
4922 else
4923 {
4924 RT_BZERO(pvDstPage, GUEST_PAGE_SIZE); /* (shouldn't be necessary, but can't hurt either) */
4925 if (cbBinaryLeft > 0)
4926 {
4927 memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << GUEST_PAGE_SHIFT), cbBinaryLeft);
4928 cbBinaryLeft = 0;
4929 }
4930 }
4931 }
4932 if (RT_SUCCESS(rc))
4933 {
4934 /*
4935 * Initialize the ROM range.
4936 * Note that the Virgin member of the pages has already been initialized above.
4937 */
4938 Assert(pRomRange->cb == cb);
4939 Assert(pRomRange->fFlags == fFlags);
4940 Assert(pRomRange->idSavedState == UINT8_MAX);
4941 pRomRange->GCPhys = GCPhys;
4942 pRomRange->GCPhysLast = GCPhysLast;
4943 pRomRange->cbOriginal = cbBinary;
4944 pRomRange->pszDesc = pszDesc;
4945#ifdef VBOX_WITH_PGM_NEM_MODE
4946 pRomRange->pbR3Alternate = (uint8_t *)pvAlt;
4947#endif
4948 pRomRange->pvOriginal = fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY
4949 ? pvBinary : RTMemDup(pvBinary, cbBinary);
4950 if (pRomRange->pvOriginal)
4951 {
4952 for (unsigned iPage = 0; iPage < cGuestPages; iPage++)
4953 {
4954 PPGMROMPAGE const pPage = &pRomRange->aPages[iPage];
4955 pPage->enmProt = PGMROMPROT_READ_ROM_WRITE_IGNORE;
4956#ifdef VBOX_WITH_PGM_NEM_MODE
4957 if (PGM_IS_IN_NEM_MODE(pVM))
4958 PGM_PAGE_INIT(&pPage->Shadow, UINT64_C(0x0000fffffffff000), NIL_GMM_PAGEID,
4959 PGMPAGETYPE_ROM_SHADOW, PGM_PAGE_STATE_ALLOCATED);
4960 else
4961#endif
4962 PGM_PAGE_INIT_ZERO(&pPage->Shadow, pVM, PGMPAGETYPE_ROM_SHADOW);
4963 }
4964
4965 /* update the page count stats for the shadow pages. */
4966 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
4967 {
4968 if (PGM_IS_IN_NEM_MODE(pVM))
4969 pVM->pgm.s.cPrivatePages += cGuestPages;
4970 else
4971 pVM->pgm.s.cZeroPages += cGuestPages;
4972 pVM->pgm.s.cAllPages += cGuestPages;
4973 }
4974
4975#ifdef VBOX_WITH_NATIVE_NEM
4976 /*
4977 * Notify NEM again.
4978 */
4979 if (VM_IS_NEM_ENABLED(pVM))
4980 {
4981 u2NemState = UINT8_MAX;
4982 rc = NEMR3NotifyPhysRomRegisterLate(pVM, GCPhys, cb, PGM_RAMRANGE_CALC_PAGE_R3PTR(pRamRange, GCPhys),
4983 fNemNotify, &u2NemState, &pRamRange->uNemRange);
4984 if (u2NemState != UINT8_MAX)
4985 pgmPhysSetNemStateForPages(&pRamRange->aPages[idxFirstRamPage], cGuestPages, u2NemState);
4986 }
4987 else
4988#endif
4989 GMMR3AllocatePagesCleanup(pReq);
4990 if (RT_SUCCESS(rc))
4991 {
4992 /*
4993 * Done!
4994 */
4995#ifdef VBOX_STRICT
4996 pgmPhysAssertRamRangesLocked(pVM, false /*fInUpdate*/, false /*fRamRelaxed*/);
4997#endif
4998 return rc;
4999 }
5000
5001 /*
5002 * bail out
5003 */
5004#ifdef VBOX_WITH_NATIVE_NEM
5005 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
5006 {
5007 Assert(VM_IS_NEM_ENABLED(pVM));
5008 pVM->pgm.s.cPrivatePages -= cGuestPages;
5009 pVM->pgm.s.cAllPages -= cGuestPages;
5010 }
5011#endif
5012 }
5013 else
5014 rc = VERR_NO_MEMORY;
5015 }
5016
5017 int rc2 = PGMHandlerPhysicalDeregister(pVM, GCPhys);
5018 AssertRC(rc2);
5019 }
5020
5021 idxInsert -= 1;
5022 if (!pOverlappingRange)
5023 pgmR3PhysRamRangeRemoveLookup(pVM, pRamRange, &idxInsert);
5024 }
5025 /* else: lookup insertion failed. */
5026
5027 if (pOverlappingRange)
5028 {
5029 PPGMPAGE pRamPage = &pOverlappingRange->aPages[idxFirstRamPage];
5030#ifdef VBOX_WITH_PGM_NEM_MODE
5031 if (PGM_IS_IN_NEM_MODE(pVM))
5032 {
5033 Assert(pvRam == NULL); Assert(pReq == NULL);
5034 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++, pRomPage++)
5035 {
5036 Assert(PGM_PAGE_GET_HCPHYS(pRamPage) == UINT64_C(0x0000fffffffff000) || PGM_PAGE_GET_HCPHYS(pRamPage) == 0);
5037 Assert(PGM_PAGE_GET_PAGEID(pRamPage) == NIL_GMM_PAGEID);
5038 Assert(PGM_PAGE_GET_STATE(pRamPage) == PGM_PAGE_STATE_ALLOCATED);
5039 PGM_PAGE_SET_TYPE(pVM, pRamPage, PGMPAGETYPE_RAM);
5040 PGM_PAGE_SET_STATE(pVM, pRamPage, PGM_PAGE_STATE_ALLOCATED);
5041 }
5042 }
5043 else
5044#endif
5045 {
5046#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5047 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++, pRamPage++)
5048 PGM_PAGE_INIT_ZERO(pRamPage, pVM, PGMPAGETYPE_RAM);
5049 pVM->pgm.s.cZeroPages += cGuestPages;
5050 pVM->pgm.s.cPrivatePages -= cGuestPages;
5051#endif
5052 }
5053 }
5054 }
5055 pgmPhysInvalidatePageMapTLB(pVM, false /*fInRendezvous*/);
5056 pgmPhysInvalidRamRangeTlbs(pVM);
5057
5058#ifdef VBOX_WITH_PGM_NEM_MODE
5059 if (PGM_IS_IN_NEM_MODE(pVM))
5060 {
5061 Assert(!pReq);
5062 if (pvRam)
5063 SUPR3PageFree(pvRam, cHostPages);
5064 if (pvAlt)
5065 SUPR3PageFree(pvAlt, cHostPages);
5066 }
5067 else
5068#endif
5069 {
5070#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5071 GMMR3FreeAllocatedPages(pVM, pReq);
5072 GMMR3AllocatePagesCleanup(pReq);
5073#endif
5074 }
5075
5076 /* We don't bother to actually free either the ROM nor the RAM ranges
5077 themselves, as already mentioned above, we'll leave that to the VM
5078 termination cleanup code. */
5079 return rc;
5080}
5081
5082
5083/**
5084 * Registers a ROM image.
5085 *
5086 * Shadowed ROM images requires double the amount of backing memory, so,
5087 * don't use that unless you have to. Shadowing of ROM images is process
5088 * where we can select where the reads go and where the writes go. On real
5089 * hardware the chipset provides means to configure this. We provide
5090 * PGMR3PhysRomProtect() for this purpose.
5091 *
5092 * A read-only copy of the ROM image will always be kept around while we
5093 * will allocate RAM pages for the changes on demand (unless all memory
5094 * is configured to be preallocated).
5095 *
5096 * @returns VBox status code.
5097 * @param pVM The cross context VM structure.
5098 * @param pDevIns The device instance owning the ROM.
5099 * @param GCPhys First physical address in the range.
5100 * Must be page aligned!
5101 * @param cb The size of the range (in bytes).
5102 * Must be page aligned!
5103 * @param pvBinary Pointer to the binary data backing the ROM image.
5104 * @param cbBinary The size of the binary data pvBinary points to.
5105 * This must be less or equal to @a cb.
5106 * @param fFlags Mask of flags, PGMPHYS_ROM_FLAGS_XXX.
5107 * @param pszDesc Pointer to description string. This must not be freed.
5108 *
5109 * @remark There is no way to remove the rom, automatically on device cleanup or
5110 * manually from the device yet. This isn't difficult in any way, it's
5111 * just not something we expect to be necessary for a while.
5112 */
5113VMMR3DECL(int) PGMR3PhysRomRegister(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb,
5114 const void *pvBinary, uint32_t cbBinary, uint8_t fFlags, const char *pszDesc)
5115{
5116 Log(("PGMR3PhysRomRegister: pDevIns=%p GCPhys=%RGp(-%RGp) cb=%RGp pvBinary=%p cbBinary=%#x fFlags=%#x pszDesc=%s\n",
5117 pDevIns, GCPhys, GCPhys + cb, cb, pvBinary, cbBinary, fFlags, pszDesc));
5118 PGM_LOCK_VOID(pVM);
5119
5120 int rc = pgmR3PhysRomRegisterLocked(pVM, pDevIns, GCPhys, cb, pvBinary, cbBinary, fFlags, pszDesc);
5121
5122 PGM_UNLOCK(pVM);
5123 return rc;
5124}
5125
5126
5127/**
5128 * Called by PGMR3MemSetup to reset the shadow, switch to the virgin, and verify
5129 * that the virgin part is untouched.
5130 *
5131 * This is done after the normal memory has been cleared.
5132 *
5133 * ASSUMES that the caller owns the PGM lock.
5134 *
5135 * @param pVM The cross context VM structure.
5136 */
5137int pgmR3PhysRomReset(PVM pVM)
5138{
5139 PGM_LOCK_ASSERT_OWNER(pVM);
5140 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
5141 for (uint32_t idx = 0; idx < cRomRanges; idx++)
5142 {
5143 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
5144 uint32_t const cGuestPages = pRom->cb >> GUEST_PAGE_SHIFT;
5145
5146 if (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
5147 {
5148 /*
5149 * Reset the physical handler.
5150 */
5151 int rc = PGMR3PhysRomProtect(pVM, pRom->GCPhys, pRom->cb, PGMROMPROT_READ_ROM_WRITE_IGNORE);
5152 AssertRCReturn(rc, rc);
5153
5154 /*
5155 * What we do with the shadow pages depends on the memory
5156 * preallocation option. If not enabled, we'll just throw
5157 * out all the dirty pages and replace them by the zero page.
5158 */
5159#ifdef VBOX_WITH_PGM_NEM_MODE
5160 if (PGM_IS_IN_NEM_MODE(pVM))
5161 {
5162 /* Clear all the shadow pages (currently using alternate backing). */
5163 RT_BZERO(pRom->pbR3Alternate, pRom->cb);
5164 }
5165# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5166 else
5167# endif
5168#endif
5169#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5170 if (!pVM->pgm.s.fRamPreAlloc)
5171 {
5172 /* Free the dirty pages. */
5173 uint32_t cPendingPages = 0;
5174 PGMMFREEPAGESREQ pReq;
5175 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
5176 AssertRCReturn(rc, rc);
5177
5178 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++)
5179 if ( !PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow)
5180 && !PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow))
5181 {
5182 Assert(PGM_PAGE_GET_STATE(&pRom->aPages[iPage].Shadow) == PGM_PAGE_STATE_ALLOCATED);
5183 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, &pRom->aPages[iPage].Shadow,
5184 pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT),
5185 (PGMPAGETYPE)PGM_PAGE_GET_TYPE(&pRom->aPages[iPage].Shadow));
5186 AssertLogRelRCReturn(rc, rc);
5187 }
5188
5189 if (cPendingPages)
5190 {
5191 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
5192 AssertLogRelRCReturn(rc, rc);
5193 }
5194 GMMR3FreePagesCleanup(pReq);
5195 }
5196 else
5197 {
5198 /* clear all the shadow pages. */
5199 for (uint32_t iPage = 0; iPage < cGuestPages; iPage++)
5200 {
5201 if (PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow))
5202 continue;
5203 Assert(!PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow));
5204 void *pvDstPage;
5205 RTGCPHYS const GCPhys = pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT);
5206 rc = pgmPhysPageMakeWritableAndMap(pVM, &pRom->aPages[iPage].Shadow, GCPhys, &pvDstPage);
5207 if (RT_FAILURE(rc))
5208 break;
5209 RT_BZERO(pvDstPage, GUEST_PAGE_SIZE);
5210 }
5211 AssertRCReturn(rc, rc);
5212 }
5213#endif
5214 }
5215
5216 /*
5217 * Restore the original ROM pages after a saved state load.
5218 * Also, in strict builds check that ROM pages remain unmodified.
5219 */
5220#ifndef VBOX_STRICT
5221 if (pVM->pgm.s.fRestoreRomPagesOnReset)
5222#endif
5223 {
5224 size_t cbSrcLeft = pRom->cbOriginal;
5225 uint8_t const *pbSrcPage = (uint8_t const *)pRom->pvOriginal;
5226 uint32_t cRestored = 0;
5227 for (uint32_t iPage = 0; iPage < cGuestPages && cbSrcLeft > 0; iPage++, pbSrcPage += GUEST_PAGE_SIZE)
5228 {
5229 RTGCPHYS const GCPhys = pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT);
5230 PPGMPAGE const pPage = pgmPhysGetPage(pVM, GCPhys);
5231 void const *pvDstPage = NULL;
5232 int rc = pgmPhysPageMapReadOnly(pVM, pPage, GCPhys, &pvDstPage);
5233 if (RT_FAILURE(rc))
5234 break;
5235
5236 if (memcmp(pvDstPage, pbSrcPage, RT_MIN(cbSrcLeft, GUEST_PAGE_SIZE)))
5237 {
5238 if (pVM->pgm.s.fRestoreRomPagesOnReset)
5239 {
5240 void *pvDstPageW = NULL;
5241 rc = pgmPhysPageMap(pVM, pPage, GCPhys, &pvDstPageW);
5242 AssertLogRelRCReturn(rc, rc);
5243 memcpy(pvDstPageW, pbSrcPage, RT_MIN(cbSrcLeft, GUEST_PAGE_SIZE));
5244 cRestored++;
5245 }
5246 else
5247 LogRel(("pgmR3PhysRomReset: %RGp: ROM page changed (%s)\n", GCPhys, pRom->pszDesc));
5248 }
5249 cbSrcLeft -= RT_MIN(cbSrcLeft, GUEST_PAGE_SIZE);
5250 }
5251 if (cRestored > 0)
5252 LogRel(("PGM: ROM \"%s\": Reloaded %u of %u pages.\n", pRom->pszDesc, cRestored, cGuestPages));
5253 }
5254 }
5255
5256 /* Clear the ROM restore flag now as we only need to do this once after
5257 loading saved state. */
5258 pVM->pgm.s.fRestoreRomPagesOnReset = false;
5259
5260 return VINF_SUCCESS;
5261}
5262
5263
5264/**
5265 * Called by PGMR3Term to free resources.
5266 *
5267 * ASSUMES that the caller owns the PGM lock.
5268 *
5269 * @param pVM The cross context VM structure.
5270 */
5271void pgmR3PhysRomTerm(PVM pVM)
5272{
5273 /*
5274 * Free the heap copy of the original bits.
5275 */
5276 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
5277 for (uint32_t idx = 0; idx < cRomRanges; idx++)
5278 {
5279 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
5280 if ( pRom->pvOriginal
5281 && !(pRom->fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY))
5282 {
5283 RTMemFree((void *)pRom->pvOriginal);
5284 pRom->pvOriginal = NULL;
5285 }
5286 }
5287}
5288
5289
5290/**
5291 * Change the shadowing of a range of ROM pages.
5292 *
5293 * This is intended for implementing chipset specific memory registers
5294 * and will not be very strict about the input. It will silently ignore
5295 * any pages that are not the part of a shadowed ROM.
5296 *
5297 * @returns VBox status code.
5298 * @retval VINF_PGM_SYNC_CR3
5299 *
5300 * @param pVM The cross context VM structure.
5301 * @param GCPhys Where to start. Page aligned.
5302 * @param cb How much to change. Page aligned.
5303 * @param enmProt The new ROM protection.
5304 */
5305VMMR3DECL(int) PGMR3PhysRomProtect(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, PGMROMPROT enmProt)
5306{
5307 LogFlow(("PGMR3PhysRomProtect: GCPhys=%RGp cb=%RGp enmProt=%d\n", GCPhys, cb, enmProt));
5308
5309 /*
5310 * Check input
5311 */
5312 if (!cb)
5313 return VINF_SUCCESS;
5314 AssertReturn(!(GCPhys & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
5315 AssertReturn(!(cb & GUEST_PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
5316 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
5317 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
5318 AssertReturn(enmProt >= PGMROMPROT_INVALID && enmProt <= PGMROMPROT_END, VERR_INVALID_PARAMETER);
5319
5320 /*
5321 * Process the request.
5322 */
5323 PGM_LOCK_VOID(pVM);
5324 int rc = VINF_SUCCESS;
5325 bool fFlushTLB = false;
5326 uint32_t const cRomRanges = RT_MIN(pVM->pgm.s.cRomRanges, RT_ELEMENTS(pVM->pgm.s.apRomRanges));
5327 for (uint32_t idx = 0; idx < cRomRanges; idx++)
5328 {
5329 PPGMROMRANGE const pRom = pVM->pgm.s.apRomRanges[idx];
5330 if ( GCPhys <= pRom->GCPhysLast
5331 && GCPhysLast >= pRom->GCPhys
5332 && (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED))
5333 {
5334 /*
5335 * Iterate the relevant pages and make necessary the changes.
5336 */
5337#ifdef VBOX_WITH_NATIVE_NEM
5338 PPGMRAMRANGE const pRam = pgmPhysGetRange(pVM, GCPhys);
5339 AssertPtrReturn(pRam, VERR_INTERNAL_ERROR_3);
5340#endif
5341 bool fChanges = false;
5342 uint32_t const cPages = pRom->GCPhysLast <= GCPhysLast
5343 ? pRom->cb >> GUEST_PAGE_SHIFT
5344 : (GCPhysLast - pRom->GCPhys + 1) >> GUEST_PAGE_SHIFT;
5345 for (uint32_t iPage = (GCPhys - pRom->GCPhys) >> GUEST_PAGE_SHIFT;
5346 iPage < cPages;
5347 iPage++)
5348 {
5349 PPGMROMPAGE pRomPage = &pRom->aPages[iPage];
5350 if (PGMROMPROT_IS_ROM(pRomPage->enmProt) != PGMROMPROT_IS_ROM(enmProt))
5351 {
5352 fChanges = true;
5353
5354 /* flush references to the page. */
5355 RTGCPHYS const GCPhysPage = pRom->GCPhys + (iPage << GUEST_PAGE_SHIFT);
5356 PPGMPAGE pRamPage = pgmPhysGetPage(pVM, GCPhysPage);
5357#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5358 int rc2 = pgmPoolTrackUpdateGCPhys(pVM, GCPhysPage, pRamPage, true /*fFlushPTEs*/, &fFlushTLB);
5359 if (rc2 != VINF_SUCCESS && (rc == VINF_SUCCESS || RT_FAILURE(rc2)))
5360 rc = rc2;
5361#endif
5362#ifdef VBOX_WITH_NATIVE_NEM
5363 uint8_t u2State = PGM_PAGE_GET_NEM_STATE(pRamPage);
5364#endif
5365
5366 PPGMPAGE pOld = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Virgin : &pRomPage->Shadow;
5367 PPGMPAGE pNew = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Shadow : &pRomPage->Virgin;
5368
5369 *pOld = *pRamPage;
5370 *pRamPage = *pNew;
5371 /** @todo preserve the volatile flags (handlers) when these have been moved out of HCPhys! */
5372
5373#ifdef VBOX_WITH_NATIVE_NEM
5374# ifdef VBOX_WITH_PGM_NEM_MODE
5375 /* In simplified mode we have to switch the page data around too. */
5376 if (PGM_IS_IN_NEM_MODE(pVM))
5377 {
5378 uint8_t abPage[GUEST_PAGE_SIZE];
5379 uint8_t * const pbRamPage = PGM_RAMRANGE_CALC_PAGE_R3PTR(pRam, GCPhysPage);
5380 memcpy(abPage, &pRom->pbR3Alternate[(size_t)iPage << GUEST_PAGE_SHIFT], sizeof(abPage));
5381 memcpy(&pRom->pbR3Alternate[(size_t)iPage << GUEST_PAGE_SHIFT], pbRamPage, sizeof(abPage));
5382 memcpy(pbRamPage, abPage, sizeof(abPage));
5383 }
5384# endif
5385 /* Tell NEM about the backing and protection change. */
5386 if (VM_IS_NEM_ENABLED(pVM))
5387 {
5388 PGMPAGETYPE enmType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pNew);
5389 NEMHCNotifyPhysPageChanged(pVM, GCPhys, PGM_PAGE_GET_HCPHYS(pOld), PGM_PAGE_GET_HCPHYS(pNew),
5390 PGM_RAMRANGE_CALC_PAGE_R3PTR(pRam, GCPhysPage),
5391 pgmPhysPageCalcNemProtection(pRamPage, enmType), enmType, &u2State);
5392 PGM_PAGE_SET_NEM_STATE(pRamPage, u2State);
5393 }
5394#endif
5395 }
5396 pRomPage->enmProt = enmProt;
5397 }
5398
5399 /*
5400 * Reset the access handler if we made changes, no need to optimize this.
5401 */
5402 if (fChanges)
5403 {
5404 int rc2 = PGMHandlerPhysicalReset(pVM, pRom->GCPhys);
5405 if (RT_FAILURE(rc2))
5406 {
5407 PGM_UNLOCK(pVM);
5408 AssertRC(rc);
5409 return rc2;
5410 }
5411
5412 /* Explicitly flush IEM. Not sure if this is really necessary, but better
5413 be on the safe side. This shouldn't be a high volume flush source. */
5414 IEMTlbInvalidateAllPhysicalAllCpus(pVM, NIL_VMCPUID, IEMTLBPHYSFLUSHREASON_ROM_PROTECT);
5415 }
5416
5417 /* Advance - cb isn't updated. */
5418 GCPhys = pRom->GCPhys + (cPages << GUEST_PAGE_SHIFT);
5419 }
5420 }
5421 PGM_UNLOCK(pVM);
5422 if (fFlushTLB)
5423 PGM_INVL_ALL_VCPU_TLBS(pVM);
5424
5425 return rc;
5426}
5427
5428
5429
5430/*********************************************************************************************************************************
5431* Ballooning *
5432*********************************************************************************************************************************/
5433
5434#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD))
5435
5436/**
5437 * Rendezvous callback used by PGMR3ChangeMemBalloon that changes the memory balloon size
5438 *
5439 * This is only called on one of the EMTs while the other ones are waiting for
5440 * it to complete this function.
5441 *
5442 * @returns VINF_SUCCESS (VBox strict status code).
5443 * @param pVM The cross context VM structure.
5444 * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused.
5445 * @param pvUser User parameter
5446 */
5447static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysChangeMemBalloonRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
5448{
5449 uintptr_t *paUser = (uintptr_t *)pvUser;
5450 bool fInflate = !!paUser[0];
5451 unsigned cPages = paUser[1];
5452 RTGCPHYS *paPhysPage = (RTGCPHYS *)paUser[2];
5453 int rc = VINF_SUCCESS;
5454 RT_NOREF(pVCpu);
5455
5456 Log(("pgmR3PhysChangeMemBalloonRendezvous: %s %x pages\n", (fInflate) ? "inflate" : "deflate", cPages));
5457 PGM_LOCK_VOID(pVM);
5458
5459 if (fInflate)
5460 {
5461# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5462 /* Flush the PGM pool cache as we might have stale references to pages that we just freed. */
5463 pgmR3PoolClearAllRendezvous(pVM, pVCpu, NULL);
5464# endif
5465
5466 /* Replace pages with ZERO pages. */
5467 uint32_t cPendingPages = 0;
5468 PGMMFREEPAGESREQ pReq = NULL;
5469 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
5470 if (RT_FAILURE(rc))
5471 {
5472 PGM_UNLOCK(pVM);
5473 AssertLogRelRC(rc);
5474 return rc;
5475 }
5476
5477 /* Iterate the pages. */
5478 for (unsigned i = 0; i < cPages; i++)
5479 {
5480 PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]);
5481 if ( pPage == NULL
5482 || PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM)
5483 {
5484 Log(("pgmR3PhysChangeMemBalloonRendezvous: invalid physical page %RGp pPage->u3Type=%d\n", paPhysPage[i], pPage ? PGM_PAGE_GET_TYPE(pPage) : 0));
5485 break;
5486 }
5487
5488 LogFlow(("balloon page: %RGp\n", paPhysPage[i]));
5489
5490# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5491 /* Flush the shadow PT if this page was previously used as a guest page table. */
5492 pgmPoolFlushPageByGCPhys(pVM, paPhysPage[i]);
5493# endif
5494
5495 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, paPhysPage[i], (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage));
5496 if (RT_FAILURE(rc))
5497 {
5498 PGM_UNLOCK(pVM);
5499 AssertLogRelRC(rc);
5500 return rc;
5501 }
5502 Assert(PGM_PAGE_IS_ZERO(pPage));
5503 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_BALLOONED);
5504 }
5505
5506 if (cPendingPages)
5507 {
5508 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
5509 if (RT_FAILURE(rc))
5510 {
5511 PGM_UNLOCK(pVM);
5512 AssertLogRelRC(rc);
5513 return rc;
5514 }
5515 }
5516 GMMR3FreePagesCleanup(pReq);
5517 }
5518 else
5519 {
5520 /* Iterate the pages. */
5521 for (unsigned i = 0; i < cPages; i++)
5522 {
5523 PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]);
5524 AssertBreak(pPage && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM);
5525
5526 LogFlow(("Free ballooned page: %RGp\n", paPhysPage[i]));
5527
5528 Assert(PGM_PAGE_IS_BALLOONED(pPage));
5529
5530 /* Change back to zero page. (NEM does not need to be informed.) */
5531 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
5532 }
5533
5534 /* Note that we currently do not map any ballooned pages in our shadow page tables, so no need to flush the pgm pool. */
5535 }
5536
5537# if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
5538 /* Notify GMM about the balloon change. */
5539 rc = GMMR3BalloonedPages(pVM, (fInflate) ? GMMBALLOONACTION_INFLATE : GMMBALLOONACTION_DEFLATE, cPages);
5540 if (RT_SUCCESS(rc))
5541 {
5542 if (!fInflate)
5543 {
5544 Assert(pVM->pgm.s.cBalloonedPages >= cPages);
5545 pVM->pgm.s.cBalloonedPages -= cPages;
5546 }
5547 else
5548 pVM->pgm.s.cBalloonedPages += cPages;
5549 }
5550# endif
5551
5552 PGM_UNLOCK(pVM);
5553
5554 /* Flush the recompiler's TLB as well. */
5555 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5556 CPUMSetChangedFlags(pVM->apCpusR3[i], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5557
5558 AssertLogRelRC(rc);
5559 return rc;
5560}
5561
5562
5563/**
5564 * Frees a range of ram pages, replacing them with ZERO pages; helper for PGMR3PhysFreeRamPages
5565 *
5566 * @param pVM The cross context VM structure.
5567 * @param fInflate Inflate or deflate memory balloon
5568 * @param cPages Number of pages to free
5569 * @param paPhysPage Array of guest physical addresses
5570 */
5571static DECLCALLBACK(void) pgmR3PhysChangeMemBalloonHelper(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage)
5572{
5573 uintptr_t paUser[3];
5574
5575 paUser[0] = fInflate;
5576 paUser[1] = cPages;
5577 paUser[2] = (uintptr_t)paPhysPage;
5578 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser);
5579 AssertRC(rc);
5580
5581 /* Made a copy in PGMR3PhysFreeRamPages; free it here. */
5582 RTMemFree(paPhysPage);
5583}
5584
5585#endif /* 64-bit host && (Windows || Solaris || Linux || FreeBSD) */
5586
5587/**
5588 * Inflate or deflate a memory balloon
5589 *
5590 * @returns VBox status code.
5591 * @param pVM The cross context VM structure.
5592 * @param fInflate Inflate or deflate memory balloon
5593 * @param cPages Number of pages to free
5594 * @param paPhysPage Array of guest physical addresses
5595 */
5596VMMR3DECL(int) PGMR3PhysChangeMemBalloon(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage)
5597{
5598 /* This must match GMMR0Init; currently we only support memory ballooning on all 64-bit hosts except Mac OS X */
5599#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD))
5600 int rc;
5601
5602 /* Older additions (ancient non-functioning balloon code) pass wrong physical addresses. */
5603 AssertReturn(!(paPhysPage[0] & 0xfff), VERR_INVALID_PARAMETER);
5604
5605 /* We own the IOM lock here and could cause a deadlock by waiting for another VCPU that is blocking on the IOM lock.
5606 * In the SMP case we post a request packet to postpone the job.
5607 */
5608 if (pVM->cCpus > 1)
5609 {
5610 unsigned cbPhysPage = cPages * sizeof(paPhysPage[0]);
5611 RTGCPHYS *paPhysPageCopy = (RTGCPHYS *)RTMemAlloc(cbPhysPage);
5612 AssertReturn(paPhysPageCopy, VERR_NO_MEMORY);
5613
5614 memcpy(paPhysPageCopy, paPhysPage, cbPhysPage);
5615
5616 rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysChangeMemBalloonHelper, 4,
5617 pVM, fInflate, cPages, paPhysPageCopy);
5618 AssertRC(rc);
5619 }
5620 else
5621 {
5622 uintptr_t paUser[3];
5623
5624 paUser[0] = fInflate;
5625 paUser[1] = cPages;
5626 paUser[2] = (uintptr_t)paPhysPage;
5627 rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser);
5628 AssertRC(rc);
5629 }
5630 return rc;
5631
5632#else
5633 NOREF(pVM); NOREF(fInflate); NOREF(cPages); NOREF(paPhysPage);
5634 return VERR_NOT_IMPLEMENTED;
5635#endif
5636}
5637
5638
5639
5640/*********************************************************************************************************************************
5641* Write Monitoring *
5642*********************************************************************************************************************************/
5643
5644/**
5645 * Rendezvous callback used by PGMR3WriteProtectRAM that write protects all
5646 * physical RAM.
5647 *
5648 * This is only called on one of the EMTs while the other ones are waiting for
5649 * it to complete this function.
5650 *
5651 * @returns VINF_SUCCESS (VBox strict status code).
5652 * @param pVM The cross context VM structure.
5653 * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused.
5654 * @param pvUser User parameter, unused.
5655 */
5656static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysWriteProtectRAMRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
5657{
5658 int rc = VINF_SUCCESS;
5659 NOREF(pvUser); NOREF(pVCpu);
5660
5661 PGM_LOCK_VOID(pVM);
5662#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5663 pgmPoolResetDirtyPages(pVM);
5664#endif
5665
5666 uint32_t const cLookupEntries = RT_MIN(pVM->pgm.s.RamRangeUnion.cLookupEntries, RT_ELEMENTS(pVM->pgm.s.aRamRangeLookup));
5667 for (uint32_t idxLookup = 0; idxLookup < cLookupEntries; idxLookup++)
5668 {
5669 uint32_t const idRamRange = PGMRAMRANGELOOKUPENTRY_GET_ID(pVM->pgm.s.aRamRangeLookup[idxLookup]);
5670 AssertContinue(idRamRange < RT_ELEMENTS(pVM->pgm.s.apRamRanges));
5671 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
5672 AssertContinue(pRam);
5673
5674 uint32_t cPages = pRam->cb >> GUEST_PAGE_SHIFT;
5675 for (uint32_t iPage = 0; iPage < cPages; iPage++)
5676 {
5677 PPGMPAGE const pPage = &pRam->aPages[iPage];
5678 PGMPAGETYPE const enmPageType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage);
5679
5680 if ( RT_LIKELY(enmPageType == PGMPAGETYPE_RAM)
5681 || enmPageType == PGMPAGETYPE_MMIO2)
5682 {
5683 /*
5684 * A RAM page.
5685 */
5686 switch (PGM_PAGE_GET_STATE(pPage))
5687 {
5688 case PGM_PAGE_STATE_ALLOCATED:
5689 /** @todo Optimize this: Don't always re-enable write
5690 * monitoring if the page is known to be very busy. */
5691 if (PGM_PAGE_IS_WRITTEN_TO(pPage))
5692 PGM_PAGE_CLEAR_WRITTEN_TO(pVM, pPage);
5693
5694 pgmPhysPageWriteMonitor(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT));
5695 break;
5696
5697 case PGM_PAGE_STATE_SHARED:
5698 AssertFailed();
5699 break;
5700
5701 case PGM_PAGE_STATE_WRITE_MONITORED: /* nothing to change. */
5702 default:
5703 break;
5704 }
5705 }
5706 }
5707 }
5708#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5709 pgmR3PoolWriteProtectPages(pVM);
5710#endif
5711 PGM_INVL_ALL_VCPU_TLBS(pVM);
5712 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
5713 CPUMSetChangedFlags(pVM->apCpusR3[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5714
5715 PGM_UNLOCK(pVM);
5716 return rc;
5717}
5718
5719/**
5720 * Protect all physical RAM to monitor writes
5721 *
5722 * @returns VBox status code.
5723 * @param pVM The cross context VM structure.
5724 */
5725VMMR3DECL(int) PGMR3PhysWriteProtectRAM(PVM pVM)
5726{
5727 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
5728
5729 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysWriteProtectRAMRendezvous, NULL);
5730 AssertRC(rc);
5731 return rc;
5732}
5733
5734
5735/*********************************************************************************************************************************
5736* Stats. *
5737*********************************************************************************************************************************/
5738
5739/**
5740 * Query the amount of free memory inside VMMR0
5741 *
5742 * @returns VBox status code.
5743 * @param pUVM The user mode VM handle.
5744 * @param pcbAllocMem Where to return the amount of memory allocated
5745 * by VMs.
5746 * @param pcbFreeMem Where to return the amount of memory that is
5747 * allocated from the host but not currently used
5748 * by any VMs.
5749 * @param pcbBallonedMem Where to return the sum of memory that is
5750 * currently ballooned by the VMs.
5751 * @param pcbSharedMem Where to return the amount of memory that is
5752 * currently shared.
5753 */
5754VMMR3DECL(int) PGMR3QueryGlobalMemoryStats(PUVM pUVM, uint64_t *pcbAllocMem, uint64_t *pcbFreeMem,
5755 uint64_t *pcbBallonedMem, uint64_t *pcbSharedMem)
5756{
5757 UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE);
5758 VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE);
5759
5760 uint64_t cAllocPages = 0;
5761 uint64_t cFreePages = 0;
5762 uint64_t cBalloonPages = 0;
5763 uint64_t cSharedPages = 0;
5764#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
5765 if (!SUPR3IsDriverless())
5766 {
5767 int rc = GMMR3QueryHypervisorMemoryStats(pUVM->pVM, &cAllocPages, &cFreePages, &cBalloonPages, &cSharedPages);
5768 AssertRCReturn(rc, rc);
5769 }
5770#endif
5771
5772 if (pcbAllocMem)
5773 *pcbAllocMem = cAllocPages * _4K;
5774
5775 if (pcbFreeMem)
5776 *pcbFreeMem = cFreePages * _4K;
5777
5778 if (pcbBallonedMem)
5779 *pcbBallonedMem = cBalloonPages * _4K;
5780
5781 if (pcbSharedMem)
5782 *pcbSharedMem = cSharedPages * _4K;
5783
5784 Log(("PGMR3QueryVMMMemoryStats: all=%llx free=%llx ballooned=%llx shared=%llx\n",
5785 cAllocPages, cFreePages, cBalloonPages, cSharedPages));
5786 return VINF_SUCCESS;
5787}
5788
5789
5790/**
5791 * Query memory stats for the VM.
5792 *
5793 * @returns VBox status code.
5794 * @param pUVM The user mode VM handle.
5795 * @param pcbTotalMem Where to return total amount memory the VM may
5796 * possibly use.
5797 * @param pcbPrivateMem Where to return the amount of private memory
5798 * currently allocated.
5799 * @param pcbSharedMem Where to return the amount of actually shared
5800 * memory currently used by the VM.
5801 * @param pcbZeroMem Where to return the amount of memory backed by
5802 * zero pages.
5803 *
5804 * @remarks The total mem is normally larger than the sum of the three
5805 * components. There are two reasons for this, first the amount of
5806 * shared memory is what we're sure is shared instead of what could
5807 * possibly be shared with someone. Secondly, because the total may
5808 * include some pure MMIO pages that doesn't go into any of the three
5809 * sub-counts.
5810 *
5811 * @todo Why do we return reused shared pages instead of anything that could
5812 * potentially be shared? Doesn't this mean the first VM gets a much
5813 * lower number of shared pages?
5814 */
5815VMMR3DECL(int) PGMR3QueryMemoryStats(PUVM pUVM, uint64_t *pcbTotalMem, uint64_t *pcbPrivateMem,
5816 uint64_t *pcbSharedMem, uint64_t *pcbZeroMem)
5817{
5818 UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE);
5819 PVM pVM = pUVM->pVM;
5820 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
5821
5822 if (pcbTotalMem)
5823 *pcbTotalMem = (uint64_t)pVM->pgm.s.cAllPages * GUEST_PAGE_SIZE;
5824
5825 if (pcbPrivateMem)
5826 *pcbPrivateMem = (uint64_t)pVM->pgm.s.cPrivatePages * GUEST_PAGE_SIZE;
5827
5828 if (pcbSharedMem)
5829 *pcbSharedMem = (uint64_t)pVM->pgm.s.cReusedSharedPages * GUEST_PAGE_SIZE;
5830
5831 if (pcbZeroMem)
5832 *pcbZeroMem = (uint64_t)pVM->pgm.s.cZeroPages * GUEST_PAGE_SIZE;
5833
5834 Log(("PGMR3QueryMemoryStats: all=%x private=%x reused=%x zero=%x\n", pVM->pgm.s.cAllPages, pVM->pgm.s.cPrivatePages, pVM->pgm.s.cReusedSharedPages, pVM->pgm.s.cZeroPages));
5835 return VINF_SUCCESS;
5836}
5837
5838
5839
5840/*********************************************************************************************************************************
5841* Chunk Mappings and Page Allocation *
5842*********************************************************************************************************************************/
5843#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5844
5845/**
5846 * Tree enumeration callback for dealing with age rollover.
5847 * It will perform a simple compression of the current age.
5848 */
5849static DECLCALLBACK(int) pgmR3PhysChunkAgeingRolloverCallback(PAVLU32NODECORE pNode, void *pvUser)
5850{
5851 /* Age compression - ASSUMES iNow == 4. */
5852 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode;
5853 if (pChunk->iLastUsed >= UINT32_C(0xffffff00))
5854 pChunk->iLastUsed = 3;
5855 else if (pChunk->iLastUsed >= UINT32_C(0xfffff000))
5856 pChunk->iLastUsed = 2;
5857 else if (pChunk->iLastUsed)
5858 pChunk->iLastUsed = 1;
5859 else /* iLastUsed = 0 */
5860 pChunk->iLastUsed = 4;
5861
5862 NOREF(pvUser);
5863 return 0;
5864}
5865
5866
5867/**
5868 * The structure passed in the pvUser argument of pgmR3PhysChunkUnmapCandidateCallback().
5869 */
5870typedef struct PGMR3PHYSCHUNKUNMAPCB
5871{
5872 PVM pVM; /**< Pointer to the VM. */
5873 PPGMCHUNKR3MAP pChunk; /**< The chunk to unmap. */
5874} PGMR3PHYSCHUNKUNMAPCB, *PPGMR3PHYSCHUNKUNMAPCB;
5875
5876
5877/**
5878 * Callback used to find the mapping that's been unused for
5879 * the longest time.
5880 */
5881static DECLCALLBACK(int) pgmR3PhysChunkUnmapCandidateCallback(PAVLU32NODECORE pNode, void *pvUser)
5882{
5883 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode;
5884 PPGMR3PHYSCHUNKUNMAPCB pArg = (PPGMR3PHYSCHUNKUNMAPCB)pvUser;
5885
5886 /*
5887 * Check for locks and compare when last used.
5888 */
5889 if (pChunk->cRefs)
5890 return 0;
5891 if (pChunk->cPermRefs)
5892 return 0;
5893 if ( pArg->pChunk
5894 && pChunk->iLastUsed >= pArg->pChunk->iLastUsed)
5895 return 0;
5896
5897 /*
5898 * Check that it's not in any of the TLBs.
5899 */
5900 PVM pVM = pArg->pVM;
5901 if ( pVM->pgm.s.ChunkR3Map.Tlb.aEntries[PGM_CHUNKR3MAPTLB_IDX(pChunk->Core.Key)].idChunk
5902 == pChunk->Core.Key)
5903 {
5904 pChunk = NULL;
5905 return 0;
5906 }
5907# ifdef VBOX_STRICT
5908 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++)
5909 {
5910 Assert(pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk != pChunk);
5911 Assert(pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk != pChunk->Core.Key);
5912 }
5913# endif
5914
5915# if 0 /* This is too much work with the PGMCPU::PhysTlb as well. We flush them all instead. */
5916 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.PhysTlbR3.aEntries); i++)
5917 if (pVM->pgm.s.PhysTlbR3.aEntries[i].pMap == pChunk)
5918 return 0;
5919# endif
5920
5921 pArg->pChunk = pChunk;
5922 return 0;
5923}
5924
5925
5926/**
5927 * Finds a good candidate for unmapping when the ring-3 mapping cache is full.
5928 *
5929 * The candidate will not be part of any TLBs, so no need to flush
5930 * anything afterwards.
5931 *
5932 * @returns Chunk id.
5933 * @param pVM The cross context VM structure.
5934 */
5935static int32_t pgmR3PhysChunkFindUnmapCandidate(PVM pVM)
5936{
5937 PGM_LOCK_ASSERT_OWNER(pVM);
5938
5939 /*
5940 * Enumerate the age tree starting with the left most node.
5941 */
5942 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatChunkFindCandidate, a);
5943 PGMR3PHYSCHUNKUNMAPCB Args;
5944 Args.pVM = pVM;
5945 Args.pChunk = NULL;
5946 RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkUnmapCandidateCallback, &Args);
5947 Assert(Args.pChunk);
5948 if (Args.pChunk)
5949 {
5950 Assert(Args.pChunk->cRefs == 0);
5951 Assert(Args.pChunk->cPermRefs == 0);
5952 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkFindCandidate, a);
5953 return Args.pChunk->Core.Key;
5954 }
5955
5956 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkFindCandidate, a);
5957 return INT32_MAX;
5958}
5959
5960
5961/**
5962 * Rendezvous callback used by pgmR3PhysUnmapChunk that unmaps a chunk
5963 *
5964 * This is only called on one of the EMTs while the other ones are waiting for
5965 * it to complete this function.
5966 *
5967 * @returns VINF_SUCCESS (VBox strict status code).
5968 * @param pVM The cross context VM structure.
5969 * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused.
5970 * @param pvUser User pointer. Unused
5971 *
5972 */
5973static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysUnmapChunkRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
5974{
5975 int rc = VINF_SUCCESS;
5976 PGM_LOCK_VOID(pVM);
5977 NOREF(pVCpu); NOREF(pvUser);
5978
5979 if (pVM->pgm.s.ChunkR3Map.c >= pVM->pgm.s.ChunkR3Map.cMax)
5980 {
5981#ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
5982 /* Flush the pgm pool cache; call the internal rendezvous handler as we're already in a rendezvous handler here. */
5983 /** @todo also not really efficient to unmap a chunk that contains PD
5984 * or PT pages. */
5985 pgmR3PoolClearAllRendezvous(pVM, pVM->apCpusR3[0], NULL /* no need to flush the REM TLB as we already did that above */);
5986#endif
5987
5988 /*
5989 * Request the ring-0 part to unmap a chunk to make space in the mapping cache.
5990 */
5991 GMMMAPUNMAPCHUNKREQ Req;
5992 Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
5993 Req.Hdr.cbReq = sizeof(Req);
5994 Req.pvR3 = NULL;
5995 Req.idChunkMap = NIL_GMM_CHUNKID;
5996 Req.idChunkUnmap = pgmR3PhysChunkFindUnmapCandidate(pVM);
5997 if (Req.idChunkUnmap != INT32_MAX)
5998 {
5999 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatChunkUnmap, a);
6000 rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr);
6001 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkUnmap, a);
6002 if (RT_SUCCESS(rc))
6003 {
6004 /*
6005 * Remove the unmapped one.
6006 */
6007 PPGMCHUNKR3MAP pUnmappedChunk = (PPGMCHUNKR3MAP)RTAvlU32Remove(&pVM->pgm.s.ChunkR3Map.pTree, Req.idChunkUnmap);
6008 AssertRelease(pUnmappedChunk);
6009 AssertRelease(!pUnmappedChunk->cRefs);
6010 AssertRelease(!pUnmappedChunk->cPermRefs);
6011 pUnmappedChunk->pv = NULL;
6012 pUnmappedChunk->Core.Key = UINT32_MAX;
6013 MMR3HeapFree(pUnmappedChunk);
6014 pVM->pgm.s.ChunkR3Map.c--;
6015 pVM->pgm.s.cUnmappedChunks++;
6016
6017 /*
6018 * Flush dangling PGM pointers (R3 & R0 ptrs to GC physical addresses).
6019 */
6020 /** @todo We should not flush chunks which include cr3 mappings. */
6021 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
6022 {
6023 PPGMCPU pPGM = &pVM->apCpusR3[idCpu]->pgm.s;
6024
6025 pPGM->pGst32BitPdR3 = NULL;
6026 pPGM->pGstPaePdptR3 = NULL;
6027 pPGM->pGstAmd64Pml4R3 = NULL;
6028 pPGM->pGstEptPml4R3 = NULL;
6029 pPGM->pGst32BitPdR0 = NIL_RTR0PTR;
6030 pPGM->pGstPaePdptR0 = NIL_RTR0PTR;
6031 pPGM->pGstAmd64Pml4R0 = NIL_RTR0PTR;
6032 pPGM->pGstEptPml4R0 = NIL_RTR0PTR;
6033 for (unsigned i = 0; i < RT_ELEMENTS(pPGM->apGstPaePDsR3); i++)
6034 {
6035 pPGM->apGstPaePDsR3[i] = NULL;
6036 pPGM->apGstPaePDsR0[i] = NIL_RTR0PTR;
6037 }
6038
6039 /* Flush REM TLBs. */
6040 CPUMSetChangedFlags(pVM->apCpusR3[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
6041 }
6042
6043 pgmR3PhysChunkInvalidateTLB(pVM, true /*fInRendezvous*/); /* includes pgmPhysInvalidatePageMapTLB call */
6044 }
6045 }
6046 }
6047 PGM_UNLOCK(pVM);
6048 return rc;
6049}
6050
6051/**
6052 * Unmap a chunk to free up virtual address space (request packet handler for pgmR3PhysChunkMap)
6053 *
6054 * @param pVM The cross context VM structure.
6055 */
6056static DECLCALLBACK(void) pgmR3PhysUnmapChunk(PVM pVM)
6057{
6058 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysUnmapChunkRendezvous, NULL);
6059 AssertRC(rc);
6060}
6061
6062
6063/**
6064 * Maps the given chunk into the ring-3 mapping cache.
6065 *
6066 * This will call ring-0.
6067 *
6068 * @returns VBox status code.
6069 * @param pVM The cross context VM structure.
6070 * @param idChunk The chunk in question.
6071 * @param ppChunk Where to store the chunk tracking structure.
6072 *
6073 * @remarks Called from within the PGM critical section.
6074 * @remarks Can be called from any thread!
6075 */
6076int pgmR3PhysChunkMap(PVM pVM, uint32_t idChunk, PPPGMCHUNKR3MAP ppChunk)
6077{
6078 int rc;
6079
6080 PGM_LOCK_ASSERT_OWNER(pVM);
6081
6082 /*
6083 * Move the chunk time forward.
6084 */
6085 pVM->pgm.s.ChunkR3Map.iNow++;
6086 if (pVM->pgm.s.ChunkR3Map.iNow == 0)
6087 {
6088 pVM->pgm.s.ChunkR3Map.iNow = 4;
6089 RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkAgeingRolloverCallback, NULL);
6090 }
6091
6092 /*
6093 * Allocate a new tracking structure first.
6094 */
6095 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)MMR3HeapAllocZ(pVM, MM_TAG_PGM_CHUNK_MAPPING, sizeof(*pChunk));
6096 AssertReturn(pChunk, VERR_NO_MEMORY);
6097 pChunk->Core.Key = idChunk;
6098 pChunk->iLastUsed = pVM->pgm.s.ChunkR3Map.iNow;
6099
6100 /*
6101 * Request the ring-0 part to map the chunk in question.
6102 */
6103 GMMMAPUNMAPCHUNKREQ Req;
6104 Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
6105 Req.Hdr.cbReq = sizeof(Req);
6106 Req.pvR3 = NULL;
6107 Req.idChunkMap = idChunk;
6108 Req.idChunkUnmap = NIL_GMM_CHUNKID;
6109
6110 /* Must be callable from any thread, so can't use VMMR3CallR0. */
6111 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatChunkMap, a);
6112 rc = SUPR3CallVMMR0Ex(VMCC_GET_VMR0_FOR_CALL(pVM), NIL_VMCPUID, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr);
6113 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatChunkMap, a);
6114 if (RT_SUCCESS(rc))
6115 {
6116 pChunk->pv = Req.pvR3;
6117
6118 /*
6119 * If we're running out of virtual address space, then we should
6120 * unmap another chunk.
6121 *
6122 * Currently, an unmap operation requires that all other virtual CPUs
6123 * are idling and not by chance making use of the memory we're
6124 * unmapping. So, we create an async unmap operation here.
6125 *
6126 * Now, when creating or restoring a saved state this wont work very
6127 * well since we may want to restore all guest RAM + a little something.
6128 * So, we have to do the unmap synchronously. Fortunately for us
6129 * though, during these operations the other virtual CPUs are inactive
6130 * and it should be safe to do this.
6131 */
6132 /** @todo Eventually we should lock all memory when used and do
6133 * map+unmap as one kernel call without any rendezvous or
6134 * other precautions. */
6135 if (pVM->pgm.s.ChunkR3Map.c + 1 >= pVM->pgm.s.ChunkR3Map.cMax)
6136 {
6137 switch (VMR3GetState(pVM))
6138 {
6139 case VMSTATE_LOADING:
6140 case VMSTATE_SAVING:
6141 {
6142 PVMCPU pVCpu = VMMGetCpu(pVM);
6143 if ( pVCpu
6144 && pVM->pgm.s.cDeprecatedPageLocks == 0)
6145 {
6146 pgmR3PhysUnmapChunkRendezvous(pVM, pVCpu, NULL);
6147 break;
6148 }
6149 }
6150 RT_FALL_THRU();
6151 default:
6152 rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysUnmapChunk, 1, pVM);
6153 AssertRC(rc);
6154 break;
6155 }
6156 }
6157
6158 /*
6159 * Update the tree. We must do this after any unmapping to make sure
6160 * the chunk we're going to return isn't unmapped by accident.
6161 */
6162 AssertPtr(Req.pvR3);
6163 bool fRc = RTAvlU32Insert(&pVM->pgm.s.ChunkR3Map.pTree, &pChunk->Core);
6164 AssertRelease(fRc);
6165 pVM->pgm.s.ChunkR3Map.c++;
6166 pVM->pgm.s.cMappedChunks++;
6167 }
6168 else
6169 {
6170 /** @todo this may fail because of /proc/sys/vm/max_map_count, so we
6171 * should probably restrict ourselves on linux. */
6172 AssertRC(rc);
6173 MMR3HeapFree(pChunk);
6174 pChunk = NULL;
6175 }
6176
6177 *ppChunk = pChunk;
6178 return rc;
6179}
6180
6181
6182/**
6183 * Invalidates the TLB for the ring-3 mapping cache.
6184 *
6185 * @param pVM The cross context VM structure.
6186 * @param fInRendezvous Set if we're in a rendezvous.
6187 */
6188DECLHIDDEN(void) pgmR3PhysChunkInvalidateTLB(PVM pVM, bool fInRendezvous)
6189{
6190 PGM_LOCK_VOID(pVM);
6191 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++)
6192 {
6193 pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk = NIL_GMM_CHUNKID;
6194 pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk = NULL;
6195 }
6196 /* The page map TLB references chunks, so invalidate that one too. */
6197 pgmPhysInvalidatePageMapTLB(pVM, fInRendezvous);
6198 PGM_UNLOCK(pVM);
6199}
6200
6201
6202/**
6203 * Response to VM_FF_PGM_NEED_HANDY_PAGES and helper for pgmPhysEnsureHandyPage.
6204 *
6205 * This function will also work the VM_FF_PGM_NO_MEMORY force action flag, to
6206 * signal and clear the out of memory condition. When called, this API is used
6207 * to try clear the condition when the user wants to resume.
6208 *
6209 * @returns The following VBox status codes.
6210 * @retval VINF_SUCCESS on success. FFs cleared.
6211 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is not cleared in
6212 * this case and it gets accompanied by VM_FF_PGM_NO_MEMORY.
6213 *
6214 * @param pVM The cross context VM structure.
6215 *
6216 * @remarks The VINF_EM_NO_MEMORY status is for the benefit of the FF processing
6217 * in EM.cpp and shouldn't be propagated outside TRPM, HM, EM and
6218 * pgmPhysEnsureHandyPage. There is one exception to this in the \#PF
6219 * handler.
6220 */
6221VMMR3DECL(int) PGMR3PhysAllocateHandyPages(PVM pVM)
6222{
6223 PGM_LOCK_VOID(pVM);
6224
6225 /*
6226 * Allocate more pages, noting down the index of the first new page.
6227 */
6228 uint32_t iClear = pVM->pgm.s.cHandyPages;
6229 AssertMsgReturn(iClear <= RT_ELEMENTS(pVM->pgm.s.aHandyPages), ("%d", iClear), VERR_PGM_HANDY_PAGE_IPE);
6230 Log(("PGMR3PhysAllocateHandyPages: %d -> %d\n", iClear, RT_ELEMENTS(pVM->pgm.s.aHandyPages)));
6231 int rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES, 0, NULL);
6232 /** @todo we should split this up into an allocate and flush operation. sometimes you want to flush and not allocate more (which will trigger the vm account limit error) */
6233 if ( rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT
6234 && pVM->pgm.s.cHandyPages > 0)
6235 {
6236 /* Still handy pages left, so don't panic. */
6237 rc = VINF_SUCCESS;
6238 }
6239
6240 if (RT_SUCCESS(rc))
6241 {
6242 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
6243 Assert(pVM->pgm.s.cHandyPages > 0);
6244# ifdef VBOX_STRICT
6245 uint32_t i;
6246 for (i = iClear; i < pVM->pgm.s.cHandyPages; i++)
6247 if ( pVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID
6248 || pVM->pgm.s.aHandyPages[i].idSharedPage != NIL_GMM_PAGEID
6249 || (pVM->pgm.s.aHandyPages[i].HCPhysGCPhys & GUEST_PAGE_OFFSET_MASK))
6250 break;
6251 if (i != pVM->pgm.s.cHandyPages)
6252 {
6253 RTAssertMsg1Weak(NULL, __LINE__, __FILE__, __FUNCTION__);
6254 RTAssertMsg2Weak("i=%d iClear=%d cHandyPages=%d\n", i, iClear, pVM->pgm.s.cHandyPages);
6255 for (uint32_t j = iClear; j < pVM->pgm.s.cHandyPages; j++)
6256 RTAssertMsg2Add("%03d: idPage=%d HCPhysGCPhys=%RHp idSharedPage=%d%s\n", j,
6257 pVM->pgm.s.aHandyPages[j].idPage,
6258 pVM->pgm.s.aHandyPages[j].HCPhysGCPhys,
6259 pVM->pgm.s.aHandyPages[j].idSharedPage,
6260 j == i ? " <---" : "");
6261 RTAssertPanic();
6262 }
6263# endif
6264 }
6265 else
6266 {
6267 /*
6268 * We should never get here unless there is a genuine shortage of
6269 * memory (or some internal error). Flag the error so the VM can be
6270 * suspended ASAP and the user informed. If we're totally out of
6271 * handy pages we will return failure.
6272 */
6273 /* Report the failure. */
6274 LogRel(("PGM: Failed to procure handy pages; rc=%Rrc cHandyPages=%#x\n"
6275 " cAllPages=%#x cPrivatePages=%#x cSharedPages=%#x cZeroPages=%#x\n",
6276 rc, pVM->pgm.s.cHandyPages,
6277 pVM->pgm.s.cAllPages, pVM->pgm.s.cPrivatePages, pVM->pgm.s.cSharedPages, pVM->pgm.s.cZeroPages));
6278
6279 if ( rc != VERR_NO_MEMORY
6280 && rc != VERR_NO_PHYS_MEMORY
6281 && rc != VERR_LOCK_FAILED)
6282 for (uint32_t i = 0; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
6283 {
6284 LogRel(("PGM: aHandyPages[#%#04x] = {.HCPhysGCPhys=%RHp, .idPage=%#08x, .idSharedPage=%#08x}\n",
6285 i, pVM->pgm.s.aHandyPages[i].HCPhysGCPhys, pVM->pgm.s.aHandyPages[i].idPage,
6286 pVM->pgm.s.aHandyPages[i].idSharedPage));
6287 uint32_t const idPage = pVM->pgm.s.aHandyPages[i].idPage;
6288 if (idPage != NIL_GMM_PAGEID)
6289 {
6290 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
6291 for (uint32_t idRamRange = 0; idRamRange <= idRamRangeMax; idRamRange++)
6292 {
6293 PPGMRAMRANGE const pRam = pVM->pgm.s.apRamRanges[idRamRange];
6294 Assert(pRam || idRamRange == 0);
6295 if (!pRam) continue;
6296 Assert(pRam->idRange == idRamRange);
6297
6298 uint32_t const cPages = pRam->cb >> GUEST_PAGE_SHIFT;
6299 for (uint32_t iPage = 0; iPage < cPages; iPage++)
6300 if (PGM_PAGE_GET_PAGEID(&pRam->aPages[iPage]) == idPage)
6301 LogRel(("PGM: Used by %RGp %R[pgmpage] (%s)\n",
6302 pRam->GCPhys + ((RTGCPHYS)iPage << GUEST_PAGE_SHIFT), &pRam->aPages[iPage], pRam->pszDesc));
6303 }
6304 }
6305 }
6306
6307 if (rc == VERR_NO_MEMORY)
6308 {
6309 uint64_t cbHostRamAvail = 0;
6310 int rc2 = RTSystemQueryAvailableRam(&cbHostRamAvail);
6311 if (RT_SUCCESS(rc2))
6312 LogRel(("Host RAM: %RU64MB available\n", cbHostRamAvail / _1M));
6313 else
6314 LogRel(("Cannot determine the amount of available host memory\n"));
6315 }
6316
6317 /* Set the FFs and adjust rc. */
6318 VM_FF_SET(pVM, VM_FF_PGM_NEED_HANDY_PAGES);
6319 VM_FF_SET(pVM, VM_FF_PGM_NO_MEMORY);
6320 if ( rc == VERR_NO_MEMORY
6321 || rc == VERR_NO_PHYS_MEMORY
6322 || rc == VERR_LOCK_FAILED)
6323 rc = VINF_EM_NO_MEMORY;
6324 }
6325
6326 PGM_UNLOCK(pVM);
6327 return rc;
6328}
6329
6330#endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
6331
6332
6333/*********************************************************************************************************************************
6334* Other Stuff *
6335*********************************************************************************************************************************/
6336
6337#ifdef VBOX_VMM_TARGET_X86
6338/**
6339 * Sets the Address Gate 20 state.
6340 *
6341 * @param pVCpu The cross context virtual CPU structure.
6342 * @param fEnable True if the gate should be enabled.
6343 * False if the gate should be disabled.
6344 */
6345VMMDECL(void) PGMR3PhysSetA20(PVMCPU pVCpu, bool fEnable)
6346{
6347 LogFlow(("PGMR3PhysSetA20 %d (was %d)\n", fEnable, pVCpu->pgm.s.fA20Enabled));
6348 if (pVCpu->pgm.s.fA20Enabled != fEnable)
6349 {
6350# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6351 PCCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu);
6352 if ( CPUMIsGuestInVmxRootMode(pCtx)
6353 && !fEnable)
6354 {
6355 Log(("Cannot enter A20M mode while in VMX root mode\n"));
6356 return;
6357 }
6358# endif
6359 pVCpu->pgm.s.fA20Enabled = fEnable;
6360 pVCpu->pgm.s.GCPhysA20Mask = ~((RTGCPHYS)!fEnable << 20);
6361 if (VM_IS_NEM_ENABLED(pVCpu->CTX_SUFF(pVM)))
6362 NEMR3NotifySetA20(pVCpu, fEnable);
6363# ifdef PGM_WITH_A20
6364 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
6365# ifndef VBOX_WITH_ONLY_PGM_NEM_MODE
6366 pgmR3RefreshShadowModeAfterA20Change(pVCpu);
6367 HMFlushTlb(pVCpu);
6368# endif /* !VBOX_WITH_ONLY_PGM_NEM_MODE */
6369# endif
6370# if 0 /* PGMGetPage will apply the A20 mask to the GCPhys it returns, so we must invalid both sides of the TLB. */
6371 IEMTlbInvalidateAllPhysical(pVCpu);
6372# else
6373 IEMTlbInvalidateAllGlobal(pVCpu);
6374# endif
6375 STAM_REL_COUNTER_INC(&pVCpu->pgm.s.cA20Changes);
6376 }
6377}
6378#endif /* VBOX_VMM_TARGET_X86 */
6379
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette