VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDRVShared.c@ 7193

Last change on this file since 7193 was 7130, checked in by vboxsync, 17 years ago

Linux kmod: VBOX_REDHAT_KABI

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 142.5 KB
Line 
1/* $Revision: 7130 $ */
2/** @file
3 * VirtualBox Support Driver - Shared code.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "SUPDRV.h"
32#ifndef PAGE_SHIFT
33# include <iprt/param.h>
34#endif
35#include <iprt/alloc.h>
36#include <iprt/semaphore.h>
37#include <iprt/spinlock.h>
38#include <iprt/thread.h>
39#include <iprt/process.h>
40#include <iprt/log.h>
41
42/*
43 * Logging assignments:
44 * Log - useful stuff, like failures.
45 * LogFlow - program flow, except the really noisy bits.
46 * Log2 - Cleanup and IDTE
47 * Log3 - Loader flow noise.
48 * Log4 - Call VMMR0 flow noise.
49 * Log5 - Native yet-to-be-defined noise.
50 * Log6 - Native ioctl flow noise.
51 *
52 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
53 * instanciation in log-vbox.c(pp).
54 */
55
56
57/*******************************************************************************
58* Defined Constants And Macros *
59*******************************************************************************/
60/* from x86.h - clashes with linux thus this duplication */
61#undef X86_CR0_PG
62#define X86_CR0_PG RT_BIT(31)
63#undef X86_CR0_PE
64#define X86_CR0_PE RT_BIT(0)
65#undef X86_CPUID_AMD_FEATURE_EDX_NX
66#define X86_CPUID_AMD_FEATURE_EDX_NX RT_BIT(20)
67#undef MSR_K6_EFER
68#define MSR_K6_EFER 0xc0000080
69#undef MSR_K6_EFER_NXE
70#define MSR_K6_EFER_NXE RT_BIT(11)
71#undef MSR_K6_EFER_LMA
72#define MSR_K6_EFER_LMA RT_BIT(10)
73#undef X86_CR4_PGE
74#define X86_CR4_PGE RT_BIT(7)
75#undef X86_CR4_PAE
76#define X86_CR4_PAE RT_BIT(5)
77#undef X86_CPUID_AMD_FEATURE_EDX_LONG_MODE
78#define X86_CPUID_AMD_FEATURE_EDX_LONG_MODE RT_BIT(29)
79
80
81/** The frequency by which we recalculate the u32UpdateHz and
82 * u32UpdateIntervalNS GIP members. The value must be a power of 2. */
83#define GIP_UPDATEHZ_RECALC_FREQ 0x800
84
85/**
86 * Validates a session pointer.
87 *
88 * @returns true/false accordingly.
89 * @param pSession The session.
90 */
91#define SUP_IS_SESSION_VALID(pSession) \
92 ( VALID_PTR(pSession) \
93 && pSession->u32Cookie == BIRD_INV)
94
95
96/*******************************************************************************
97* Global Variables *
98*******************************************************************************/
99/**
100 * Array of the R0 SUP API.
101 */
102static SUPFUNC g_aFunctions[] =
103{
104 /* name function */
105 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
106 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
107 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
108 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
109 { "SUPR0LockMem", (void *)SUPR0LockMem },
110 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
111 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
112 { "SUPR0ContFree", (void *)SUPR0ContFree },
113 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
114 { "SUPR0LowFree", (void *)SUPR0LowFree },
115 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
116 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
117 { "SUPR0MemFree", (void *)SUPR0MemFree },
118 { "SUPR0PageAlloc", (void *)SUPR0PageAlloc },
119 { "SUPR0PageFree", (void *)SUPR0PageFree },
120 { "SUPR0Printf", (void *)SUPR0Printf },
121 { "RTMemAlloc", (void *)RTMemAlloc },
122 { "RTMemAllocZ", (void *)RTMemAllocZ },
123 { "RTMemFree", (void *)RTMemFree },
124 /*{ "RTMemDup", (void *)RTMemDup },*/
125 { "RTMemRealloc", (void *)RTMemRealloc },
126 { "RTR0MemObjAllocLow", (void *)RTR0MemObjAllocLow },
127 { "RTR0MemObjAllocPage", (void *)RTR0MemObjAllocPage },
128 { "RTR0MemObjAllocPhys", (void *)RTR0MemObjAllocPhys },
129 { "RTR0MemObjAllocPhysNC", (void *)RTR0MemObjAllocPhysNC },
130 { "RTR0MemObjLockUser", (void *)RTR0MemObjLockUser },
131 { "RTR0MemObjMapKernel", (void *)RTR0MemObjMapKernel },
132 { "RTR0MemObjMapUser", (void *)RTR0MemObjMapUser },
133 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
134 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
135 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
136 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
137 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
138 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
139/* These doesn't work yet on linux - use fast mutexes!
140 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
141 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
142 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
143 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
144*/
145 { "RTProcSelf", (void *)RTProcSelf },
146 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
147 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
148 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
149 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
150 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
151 { "RTSemEventCreate", (void *)RTSemEventCreate },
152 { "RTSemEventSignal", (void *)RTSemEventSignal },
153 { "RTSemEventWait", (void *)RTSemEventWait },
154 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
155 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
156 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
157 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
158 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
159 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
160 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
161 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
162 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
163 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
164 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
165 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
166 { "RTSpinlockAcquireNoInts", (void *)RTSpinlockAcquireNoInts },
167 { "RTSpinlockReleaseNoInts", (void *)RTSpinlockReleaseNoInts },
168 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
169 { "RTThreadSleep", (void *)RTThreadSleep },
170 { "RTThreadYield", (void *)RTThreadYield },
171#if 0 /* Thread APIs, Part 2. */
172 { "RTThreadSelf", (void *)RTThreadSelf },
173 { "RTThreadCreate", (void *)RTThreadCreate },
174 { "RTThreadGetNative", (void *)RTThreadGetNative },
175 { "RTThreadWait", (void *)RTThreadWait },
176 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
177 { "RTThreadGetName", (void *)RTThreadGetName },
178 { "RTThreadSelfName", (void *)RTThreadSelfName },
179 { "RTThreadGetType", (void *)RTThreadGetType },
180 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
181 { "RTThreadUserReset", (void *)RTThreadUserReset },
182 { "RTThreadUserWait", (void *)RTThreadUserWait },
183 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
184#endif
185 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
186 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
187 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
188 { "RTLogLogger", (void *)RTLogLogger },
189 { "RTLogLoggerEx", (void *)RTLogLoggerEx },
190 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
191 { "RTLogPrintf", (void *)RTLogPrintf },
192 { "RTLogPrintfV", (void *)RTLogPrintfV },
193 { "AssertMsg1", (void *)AssertMsg1 },
194 { "AssertMsg2", (void *)AssertMsg2 },
195};
196
197
198/*******************************************************************************
199* Internal Functions *
200*******************************************************************************/
201static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
202static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
203#ifdef VBOX_WITH_IDT_PATCHING
204static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq);
205static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
206static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession);
207static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
208static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry);
209#endif /* VBOX_WITH_IDT_PATCHING */
210static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
211static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
212static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
213static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
214static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
215static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt);
216static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
217static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
218static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void);
219static SUPGIPMODE supdrvGipDeterminTscMode(void);
220#ifdef RT_OS_WINDOWS
221static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages);
222static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3);
223#endif
224#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
225static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
226static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
227static DECLCALLBACK(void) supdrvGipTimer(PRTTIMER pTimer, void *pvUser);
228#endif
229
230
231/**
232 * Initializes the device extentsion structure.
233 *
234 * @returns IPRT status code.
235 * @param pDevExt The device extension to initialize.
236 */
237int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt)
238{
239 /*
240 * Initialize it.
241 */
242 int rc;
243 memset(pDevExt, 0, sizeof(*pDevExt));
244 rc = RTSpinlockCreate(&pDevExt->Spinlock);
245 if (!rc)
246 {
247 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
248 if (!rc)
249 {
250 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
251 if (!rc)
252 {
253#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
254 rc = supdrvGipCreate(pDevExt);
255 if (RT_SUCCESS(rc))
256 {
257 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
258 return VINF_SUCCESS;
259 }
260#else
261 pDevExt->u32Cookie = BIRD;
262 return VINF_SUCCESS;
263#endif
264 }
265 RTSemFastMutexDestroy(pDevExt->mtxLdr);
266 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
267 }
268 RTSpinlockDestroy(pDevExt->Spinlock);
269 pDevExt->Spinlock = NIL_RTSPINLOCK;
270 }
271 return rc;
272}
273
274
275/**
276 * Delete the device extension (e.g. cleanup members).
277 *
278 * @param pDevExt The device extension to delete.
279 */
280void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
281{
282#ifdef VBOX_WITH_IDT_PATCHING
283 PSUPDRVPATCH pPatch;
284#endif
285 PSUPDRVOBJ pObj;
286 PSUPDRVUSAGE pUsage;
287
288 /*
289 * Kill mutexes and spinlocks.
290 */
291 RTSemFastMutexDestroy(pDevExt->mtxGip);
292 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
293 RTSemFastMutexDestroy(pDevExt->mtxLdr);
294 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
295 RTSpinlockDestroy(pDevExt->Spinlock);
296 pDevExt->Spinlock = NIL_RTSPINLOCK;
297
298 /*
299 * Free lists.
300 */
301#ifdef VBOX_WITH_IDT_PATCHING
302 /* patches */
303 /** @todo make sure we don't uninstall patches which has been patched by someone else. */
304 pPatch = pDevExt->pIdtPatchesFree;
305 pDevExt->pIdtPatchesFree = NULL;
306 while (pPatch)
307 {
308 void *pvFree = pPatch;
309 pPatch = pPatch->pNext;
310 RTMemExecFree(pvFree);
311 }
312#endif /* VBOX_WITH_IDT_PATCHING */
313
314 /* objects. */
315 pObj = pDevExt->pObjs;
316#if !defined(DEBUG_bird) || !defined(RT_OS_LINUX) /* breaks unloading, temporary, remove me! */
317 Assert(!pObj); /* (can trigger on forced unloads) */
318#endif
319 pDevExt->pObjs = NULL;
320 while (pObj)
321 {
322 void *pvFree = pObj;
323 pObj = pObj->pNext;
324 RTMemFree(pvFree);
325 }
326
327 /* usage records. */
328 pUsage = pDevExt->pUsageFree;
329 pDevExt->pUsageFree = NULL;
330 while (pUsage)
331 {
332 void *pvFree = pUsage;
333 pUsage = pUsage->pNext;
334 RTMemFree(pvFree);
335 }
336
337#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
338 /* kill the GIP */
339 supdrvGipDestroy(pDevExt);
340#endif
341}
342
343
344/**
345 * Create session.
346 *
347 * @returns IPRT status code.
348 * @param pDevExt Device extension.
349 * @param ppSession Where to store the pointer to the session data.
350 */
351int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION *ppSession)
352{
353 /*
354 * Allocate memory for the session data.
355 */
356 int rc = VERR_NO_MEMORY;
357 PSUPDRVSESSION pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(sizeof(*pSession));
358 if (pSession)
359 {
360 /* Initialize session data. */
361 rc = RTSpinlockCreate(&pSession->Spinlock);
362 if (!rc)
363 {
364 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
365 pSession->pDevExt = pDevExt;
366 pSession->u32Cookie = BIRD_INV;
367 /*pSession->pLdrUsage = NULL;
368 pSession->pPatchUsage = NULL;
369 pSession->pUsage = NULL;
370 pSession->pGip = NULL;
371 pSession->fGipReferenced = false;
372 pSession->Bundle.cUsed = 0 */
373
374 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
375 return VINF_SUCCESS;
376 }
377
378 RTMemFree(pSession);
379 *ppSession = NULL;
380 Log(("Failed to create spinlock, rc=%d!\n", rc));
381 }
382
383 return rc;
384}
385
386
387/**
388 * Shared code for cleaning up a session.
389 *
390 * @param pDevExt Device extension.
391 * @param pSession Session data.
392 * This data will be freed by this routine.
393 */
394void VBOXCALL supdrvCloseSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
395{
396 /*
397 * Cleanup the session first.
398 */
399 supdrvCleanupSession(pDevExt, pSession);
400
401 /*
402 * Free the rest of the session stuff.
403 */
404 RTSpinlockDestroy(pSession->Spinlock);
405 pSession->Spinlock = NIL_RTSPINLOCK;
406 pSession->pDevExt = NULL;
407 RTMemFree(pSession);
408 LogFlow(("supdrvCloseSession: returns\n"));
409}
410
411
412/**
413 * Shared code for cleaning up a session (but not quite freeing it).
414 *
415 * This is primarily intended for MAC OS X where we have to clean up the memory
416 * stuff before the file handle is closed.
417 *
418 * @param pDevExt Device extension.
419 * @param pSession Session data.
420 * This data will be freed by this routine.
421 */
422void VBOXCALL supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
423{
424 PSUPDRVBUNDLE pBundle;
425 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
426
427 /*
428 * Remove logger instances related to this session.
429 */
430 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
431
432#ifdef VBOX_WITH_IDT_PATCHING
433 /*
434 * Uninstall any IDT patches installed for this session.
435 */
436 supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
437#endif
438
439 /*
440 * Release object references made in this session.
441 * In theory there should be noone racing us in this session.
442 */
443 Log2(("release objects - start\n"));
444 if (pSession->pUsage)
445 {
446 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
447 PSUPDRVUSAGE pUsage;
448 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
449
450 while ((pUsage = pSession->pUsage) != NULL)
451 {
452 PSUPDRVOBJ pObj = pUsage->pObj;
453 pSession->pUsage = pUsage->pNext;
454
455 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
456 if (pUsage->cUsage < pObj->cUsage)
457 {
458 pObj->cUsage -= pUsage->cUsage;
459 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
460 }
461 else
462 {
463 /* Destroy the object and free the record. */
464 if (pDevExt->pObjs == pObj)
465 pDevExt->pObjs = pObj->pNext;
466 else
467 {
468 PSUPDRVOBJ pObjPrev;
469 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
470 if (pObjPrev->pNext == pObj)
471 {
472 pObjPrev->pNext = pObj->pNext;
473 break;
474 }
475 Assert(pObjPrev);
476 }
477 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
478
479 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
480 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
481 if (pObj->pfnDestructor)
482 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
483 RTMemFree(pObj);
484 }
485
486 /* free it and continue. */
487 RTMemFree(pUsage);
488
489 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
490 }
491
492 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
493 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
494 }
495 Log2(("release objects - done\n"));
496
497 /*
498 * Release memory allocated in the session.
499 *
500 * We do not serialize this as we assume that the application will
501 * not allocated memory while closing the file handle object.
502 */
503 Log2(("freeing memory:\n"));
504 pBundle = &pSession->Bundle;
505 while (pBundle)
506 {
507 PSUPDRVBUNDLE pToFree;
508 unsigned i;
509
510 /*
511 * Check and unlock all entries in the bundle.
512 */
513 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
514 {
515 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
516 {
517 int rc;
518 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
519 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
520 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
521 {
522 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
523 AssertRC(rc); /** @todo figure out how to handle this. */
524 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
525 }
526 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, false);
527 AssertRC(rc); /** @todo figure out how to handle this. */
528 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
529 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
530 }
531 }
532
533 /*
534 * Advance and free previous bundle.
535 */
536 pToFree = pBundle;
537 pBundle = pBundle->pNext;
538
539 pToFree->pNext = NULL;
540 pToFree->cUsed = 0;
541 if (pToFree != &pSession->Bundle)
542 RTMemFree(pToFree);
543 }
544 Log2(("freeing memory - done\n"));
545
546 /*
547 * Loaded images needs to be dereferenced and possibly freed up.
548 */
549 RTSemFastMutexRequest(pDevExt->mtxLdr);
550 Log2(("freeing images:\n"));
551 if (pSession->pLdrUsage)
552 {
553 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
554 pSession->pLdrUsage = NULL;
555 while (pUsage)
556 {
557 void *pvFree = pUsage;
558 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
559 if (pImage->cUsage > pUsage->cUsage)
560 pImage->cUsage -= pUsage->cUsage;
561 else
562 supdrvLdrFree(pDevExt, pImage);
563 pUsage->pImage = NULL;
564 pUsage = pUsage->pNext;
565 RTMemFree(pvFree);
566 }
567 }
568 RTSemFastMutexRelease(pDevExt->mtxLdr);
569 Log2(("freeing images - done\n"));
570
571 /*
572 * Unmap the GIP.
573 */
574 Log2(("umapping GIP:\n"));
575#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
576 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
577#else
578 if (pSession->pGip)
579#endif
580 {
581 SUPR0GipUnmap(pSession);
582#ifndef USE_NEW_OS_INTERFACE_FOR_GIP
583 pSession->pGip = NULL;
584#endif
585 pSession->fGipReferenced = 0;
586 }
587 Log2(("umapping GIP - done\n"));
588}
589
590
591/**
592 * Fast path I/O Control worker.
593 *
594 * @returns VBox status code that should be passed down to ring-3 unchanged.
595 * @param uIOCtl Function number.
596 * @param pDevExt Device extention.
597 * @param pSession Session data.
598 */
599int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
600{
601 int rc;
602
603 /*
604 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
605 */
606 if (RT_LIKELY(pSession->pVM && pDevExt->pfnVMMR0EntryFast))
607 {
608 switch (uIOCtl)
609 {
610 case SUP_IOCTL_FAST_DO_RAW_RUN:
611 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_RAW_RUN);
612 break;
613 case SUP_IOCTL_FAST_DO_HWACC_RUN:
614 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_HWACC_RUN);
615 break;
616 case SUP_IOCTL_FAST_DO_NOP:
617 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_NOP);
618 break;
619 default:
620 rc = VERR_INTERNAL_ERROR;
621 break;
622 }
623 }
624 else
625 rc = VERR_INTERNAL_ERROR;
626
627 return rc;
628}
629
630
631/**
632 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
633 * We would use strpbrk here if this function would be contained in the RedHat kABI white
634 * list, see http://www.kerneldrivers.org/RHEL5.
635 *
636 * @return 1 if pszStr does contain any character of pszChars, 0 otherwise.
637 * @param pszStr String to check
638 * @param pszChars Character set
639 */
640static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
641{
642 int chCur;
643 while ((chCur = *pszStr++) != '\0')
644 {
645 int ch;
646 const char *psz = pszChars;
647 while ((ch = *psz++) != '\0')
648 if (ch == chCur)
649 return 1;
650
651 }
652 return 0;
653}
654
655
656/**
657 * I/O Control worker.
658 *
659 * @returns 0 on success.
660 * @returns VERR_INVALID_PARAMETER if the request is invalid.
661 *
662 * @param uIOCtl Function number.
663 * @param pDevExt Device extention.
664 * @param pSession Session data.
665 * @param pReqHdr The request header.
666 */
667int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
668{
669 /*
670 * Validate the request.
671 */
672 /* this first check could probably be omitted as its also done by the OS specific code... */
673 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
674 || pReqHdr->cbIn < sizeof(*pReqHdr)
675 || pReqHdr->cbOut < sizeof(*pReqHdr)))
676 {
677 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
678 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
679 return VERR_INVALID_PARAMETER;
680 }
681 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
682 {
683 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
684 {
685 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
686 return VERR_INVALID_PARAMETER;
687 }
688 }
689 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
690 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
691 {
692 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
693 return VERR_INVALID_PARAMETER;
694 }
695
696/*
697 * Validation macros
698 */
699#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
700 do { \
701 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
702 { \
703 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
704 (long)pReq->Hdr.cbIn, (long)(cbInExpect), (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
705 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
706 } \
707 } while (0)
708
709#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
710
711#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
712 do { \
713 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
714 { \
715 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
716 (long)pReq->Hdr.cbIn, (long)(cbInExpect))); \
717 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
718 } \
719 } while (0)
720
721#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
722 do { \
723 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
724 { \
725 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
726 (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
727 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
728 } \
729 } while (0)
730
731#define REQ_CHECK_EXPR(Name, expr) \
732 do { \
733 if (RT_UNLIKELY(!(expr))) \
734 { \
735 OSDBGPRINT(( #Name ": %s\n", #expr)); \
736 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
737 } \
738 } while (0)
739
740#define REQ_CHECK_EXPR_FMT(expr, fmt) \
741 do { \
742 if (RT_UNLIKELY(!(expr))) \
743 { \
744 OSDBGPRINT( fmt ); \
745 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
746 } \
747 } while (0)
748
749
750 /*
751 * The switch.
752 */
753 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
754 {
755 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
756 {
757 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
758 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
759 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
760 {
761 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
762 pReq->Hdr.rc = VERR_INVALID_MAGIC;
763 return 0;
764 }
765
766#if 0
767 /*
768 * Call out to the OS specific code and let it do permission checks on the
769 * client process.
770 */
771 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
772 {
773 pReq->u.Out.u32Cookie = 0xffffffff;
774 pReq->u.Out.u32SessionCookie = 0xffffffff;
775 pReq->u.Out.u32SessionVersion = 0xffffffff;
776 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
777 pReq->u.Out.pSession = NULL;
778 pReq->u.Out.cFunctions = 0;
779 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
780 return 0;
781 }
782#endif
783
784 /*
785 * Match the version.
786 * The current logic is very simple, match the major interface version.
787 */
788 if ( pReq->u.In.u32MinVersion > SUPDRVIOC_VERSION
789 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRVIOC_VERSION & 0xffff0000))
790 {
791 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
792 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRVIOC_VERSION));
793 pReq->u.Out.u32Cookie = 0xffffffff;
794 pReq->u.Out.u32SessionCookie = 0xffffffff;
795 pReq->u.Out.u32SessionVersion = 0xffffffff;
796 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
797 pReq->u.Out.pSession = NULL;
798 pReq->u.Out.cFunctions = 0;
799 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
800 return 0;
801 }
802
803 /*
804 * Fill in return data and be gone.
805 * N.B. The first one to change SUPDRVIOC_VERSION shall makes sure that
806 * u32SessionVersion <= u32ReqVersion!
807 */
808 /** @todo Somehow validate the client and negotiate a secure cookie... */
809 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
810 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
811 pReq->u.Out.u32SessionVersion = SUPDRVIOC_VERSION;
812 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
813 pReq->u.Out.pSession = pSession;
814 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
815 pReq->Hdr.rc = VINF_SUCCESS;
816 return 0;
817 }
818
819 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
820 {
821 /* validate */
822 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
823 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
824
825 /* execute */
826 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
827 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
828 pReq->Hdr.rc = VINF_SUCCESS;
829 return 0;
830 }
831
832 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_INSTALL):
833 {
834 /* validate */
835 PSUPIDTINSTALL pReq = (PSUPIDTINSTALL)pReqHdr;
836 REQ_CHECK_SIZES(SUP_IOCTL_IDT_INSTALL);
837
838 /* execute */
839#ifdef VBOX_WITH_IDT_PATCHING
840 pReq->Hdr.rc = supdrvIOCtl_IdtInstall(pDevExt, pSession, pReq);
841#else
842 pReq->u.Out.u8Idt = 3;
843 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
844#endif
845 return 0;
846 }
847
848 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_REMOVE):
849 {
850 /* validate */
851 PSUPIDTREMOVE pReq = (PSUPIDTREMOVE)pReqHdr;
852 REQ_CHECK_SIZES(SUP_IOCTL_IDT_REMOVE);
853
854 /* execute */
855#ifdef VBOX_WITH_IDT_PATCHING
856 pReq->Hdr.rc = supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
857#else
858 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
859#endif
860 return 0;
861 }
862
863 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
864 {
865 /* validate */
866 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
867 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
868 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
869 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
870 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
871
872 /* execute */
873 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
874 if (RT_FAILURE(pReq->Hdr.rc))
875 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
876 return 0;
877 }
878
879 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
880 {
881 /* validate */
882 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
883 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
884
885 /* execute */
886 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
887 return 0;
888 }
889
890 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
891 {
892 /* validate */
893 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
894 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
895
896 /* execute */
897 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
898 if (RT_FAILURE(pReq->Hdr.rc))
899 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
900 return 0;
901 }
902
903 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
904 {
905 /* validate */
906 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
907 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
908
909 /* execute */
910 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
911 return 0;
912 }
913
914 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
915 {
916 /* validate */
917 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
918 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
919 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage > 0);
920 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage < _1M*16);
921 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
922 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, memchr(pReq->u.In.szName, '\0', sizeof(pReq->u.In.szName)));
923 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
924
925 /* execute */
926 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
927 return 0;
928 }
929
930 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
931 {
932 /* validate */
933 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
934 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
935 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImage), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
936 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
937 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
938 || ( pReq->u.In.offSymbols < pReq->u.In.cbImage
939 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImage),
940 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImage=%#lx\n", (long)pReq->u.In.offSymbols,
941 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImage));
942 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
943 || ( pReq->u.In.offStrTab < pReq->u.In.cbImage
944 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImage
945 && pReq->u.In.cbStrTab <= pReq->u.In.cbImage),
946 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImage=%#lx\n", (long)pReq->u.In.offStrTab,
947 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImage));
948
949 if (pReq->u.In.cSymbols)
950 {
951 uint32_t i;
952 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.achImage[pReq->u.In.offSymbols];
953 for (i = 0; i < pReq->u.In.cSymbols; i++)
954 {
955 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImage,
956 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImage));
957 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
958 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
959 REQ_CHECK_EXPR_FMT(memchr(&pReq->u.In.achImage[pReq->u.In.offStrTab + paSyms[i].offName], '\0', pReq->u.In.cbStrTab - paSyms[i].offName),
960 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
961 }
962 }
963
964 /* execute */
965 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
966 return 0;
967 }
968
969 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
970 {
971 /* validate */
972 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
973 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
974
975 /* execute */
976 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
977 return 0;
978 }
979
980 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
981 {
982 /* validate */
983 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
984 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
985 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, memchr(pReq->u.In.szSymbol, '\0', sizeof(pReq->u.In.szSymbol)));
986
987 /* execute */
988 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
989 return 0;
990 }
991
992 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
993 {
994 /* validate */
995 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
996 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
997 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
998
999 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1000 {
1001 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1002
1003 /* execute */
1004 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1005 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg);
1006 else
1007 pReq->Hdr.rc = VERR_WRONG_ORDER;
1008 }
1009 else
1010 {
1011 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1012 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1013 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#x\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1014 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1015 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1016
1017 /* execute */
1018 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1019 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg);
1020 else
1021 pReq->Hdr.rc = VERR_WRONG_ORDER;
1022 }
1023
1024 if ( RT_FAILURE(pReq->Hdr.rc)
1025 && pReq->Hdr.rc != VERR_INTERRUPTED
1026 && pReq->Hdr.rc != VERR_TIMEOUT)
1027 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1028 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1029 else
1030 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1031 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1032 return 0;
1033 }
1034
1035 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1036 {
1037 /* validate */
1038 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1039 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1040
1041 /* execute */
1042 pReq->Hdr.rc = VINF_SUCCESS;
1043 pReq->u.Out.enmMode = supdrvIOCtl_GetPagingMode();
1044 return 0;
1045 }
1046
1047 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1048 {
1049 /* validate */
1050 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1051 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1052 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1053
1054 /* execute */
1055 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1056 if (RT_FAILURE(pReq->Hdr.rc))
1057 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1058 return 0;
1059 }
1060
1061 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1062 {
1063 /* validate */
1064 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1065 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1066
1067 /* execute */
1068 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1069 return 0;
1070 }
1071
1072 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1073 {
1074 /* validate */
1075 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1076 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1077
1078 /* execute */
1079 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1080 if (RT_SUCCESS(pReq->Hdr.rc))
1081 pReq->u.Out.pGipR0 = pDevExt->pGip;
1082 return 0;
1083 }
1084
1085 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1086 {
1087 /* validate */
1088 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1089 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1090
1091 /* execute */
1092 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1093 return 0;
1094 }
1095
1096 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1097 {
1098 /* validate */
1099 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1100 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1101 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1102 || ( VALID_PTR(pReq->u.In.pVMR0)
1103 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1104 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1105 /* execute */
1106 pSession->pVM = pReq->u.In.pVMR0;
1107 pReq->Hdr.rc = VINF_SUCCESS;
1108 return 0;
1109 }
1110
1111 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC):
1112 {
1113 /* validate */
1114 PSUPPAGEALLOC pReq = (PSUPPAGEALLOC)pReqHdr;
1115 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_SIZE_IN);
1116 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC, SUP_IOCTL_PAGE_ALLOC_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1117
1118 /* execute */
1119 pReq->Hdr.rc = SUPR0PageAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1120 if (RT_FAILURE(pReq->Hdr.rc))
1121 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1122 return 0;
1123 }
1124
1125 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1126 {
1127 /* validate */
1128 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1129 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1130
1131 /* execute */
1132 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1133 return 0;
1134 }
1135
1136 default:
1137 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
1138 break;
1139 }
1140 return SUPDRV_ERR_GENERAL_FAILURE;
1141}
1142
1143
1144/**
1145 * Register a object for reference counting.
1146 * The object is registered with one reference in the specified session.
1147 *
1148 * @returns Unique identifier on success (pointer).
1149 * All future reference must use this identifier.
1150 * @returns NULL on failure.
1151 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
1152 * @param pvUser1 The first user argument.
1153 * @param pvUser2 The second user argument.
1154 */
1155SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
1156{
1157 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1158 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1159 PSUPDRVOBJ pObj;
1160 PSUPDRVUSAGE pUsage;
1161
1162 /*
1163 * Validate the input.
1164 */
1165 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
1166 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
1167 AssertPtrReturn(pfnDestructor, NULL);
1168
1169 /*
1170 * Allocate and initialize the object.
1171 */
1172 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
1173 if (!pObj)
1174 return NULL;
1175 pObj->u32Magic = SUPDRVOBJ_MAGIC;
1176 pObj->enmType = enmType;
1177 pObj->pNext = NULL;
1178 pObj->cUsage = 1;
1179 pObj->pfnDestructor = pfnDestructor;
1180 pObj->pvUser1 = pvUser1;
1181 pObj->pvUser2 = pvUser2;
1182 pObj->CreatorUid = pSession->Uid;
1183 pObj->CreatorGid = pSession->Gid;
1184 pObj->CreatorProcess= pSession->Process;
1185 supdrvOSObjInitCreator(pObj, pSession);
1186
1187 /*
1188 * Allocate the usage record.
1189 * (We keep freed usage records around to simplity SUPR0ObjAddRef().)
1190 */
1191 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1192
1193 pUsage = pDevExt->pUsageFree;
1194 if (pUsage)
1195 pDevExt->pUsageFree = pUsage->pNext;
1196 else
1197 {
1198 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1199 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
1200 if (!pUsage)
1201 {
1202 RTMemFree(pObj);
1203 return NULL;
1204 }
1205 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1206 }
1207
1208 /*
1209 * Insert the object and create the session usage record.
1210 */
1211 /* The object. */
1212 pObj->pNext = pDevExt->pObjs;
1213 pDevExt->pObjs = pObj;
1214
1215 /* The session record. */
1216 pUsage->cUsage = 1;
1217 pUsage->pObj = pObj;
1218 pUsage->pNext = pSession->pUsage;
1219 Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1220 pSession->pUsage = pUsage;
1221
1222 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1223
1224 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
1225 return pObj;
1226}
1227
1228
1229/**
1230 * Increment the reference counter for the object associating the reference
1231 * with the specified session.
1232 *
1233 * @returns IPRT status code.
1234 * @param pvObj The identifier returned by SUPR0ObjRegister().
1235 * @param pSession The session which is referencing the object.
1236 */
1237SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
1238{
1239 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1240 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1241 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1242 PSUPDRVUSAGE pUsagePre;
1243 PSUPDRVUSAGE pUsage;
1244
1245 /*
1246 * Validate the input.
1247 */
1248 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1249 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1250 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1251 VERR_INVALID_PARAMETER);
1252
1253 /*
1254 * Preallocate the usage record.
1255 */
1256 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1257
1258 pUsagePre = pDevExt->pUsageFree;
1259 if (pUsagePre)
1260 pDevExt->pUsageFree = pUsagePre->pNext;
1261 else
1262 {
1263 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1264 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
1265 if (!pUsagePre)
1266 return VERR_NO_MEMORY;
1267 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1268 }
1269
1270 /*
1271 * Reference the object.
1272 */
1273 pObj->cUsage++;
1274
1275 /*
1276 * Look for the session record.
1277 */
1278 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
1279 {
1280 Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1281 if (pUsage->pObj == pObj)
1282 break;
1283 }
1284 if (pUsage)
1285 pUsage->cUsage++;
1286 else
1287 {
1288 /* create a new session record. */
1289 pUsagePre->cUsage = 1;
1290 pUsagePre->pObj = pObj;
1291 pUsagePre->pNext = pSession->pUsage;
1292 pSession->pUsage = pUsagePre;
1293 Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));
1294
1295 pUsagePre = NULL;
1296 }
1297
1298 /*
1299 * Put any unused usage record into the free list..
1300 */
1301 if (pUsagePre)
1302 {
1303 pUsagePre->pNext = pDevExt->pUsageFree;
1304 pDevExt->pUsageFree = pUsagePre;
1305 }
1306
1307 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1308
1309 return VINF_SUCCESS;
1310}
1311
1312
1313/**
1314 * Decrement / destroy a reference counter record for an object.
1315 *
1316 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
1317 *
1318 * @returns IPRT status code.
1319 * @param pvObj The identifier returned by SUPR0ObjRegister().
1320 * @param pSession The session which is referencing the object.
1321 */
1322SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
1323{
1324 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1325 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1326 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1327 bool fDestroy = false;
1328 PSUPDRVUSAGE pUsage;
1329 PSUPDRVUSAGE pUsagePrev;
1330
1331 /*
1332 * Validate the input.
1333 */
1334 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1335 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1336 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1337 VERR_INVALID_PARAMETER);
1338
1339 /*
1340 * Acquire the spinlock and look for the usage record.
1341 */
1342 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1343
1344 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
1345 pUsage;
1346 pUsagePrev = pUsage, pUsage = pUsage->pNext)
1347 {
1348 Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1349 if (pUsage->pObj == pObj)
1350 {
1351 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
1352 if (pUsage->cUsage > 1)
1353 {
1354 pObj->cUsage--;
1355 pUsage->cUsage--;
1356 }
1357 else
1358 {
1359 /*
1360 * Free the session record.
1361 */
1362 if (pUsagePrev)
1363 pUsagePrev->pNext = pUsage->pNext;
1364 else
1365 pSession->pUsage = pUsage->pNext;
1366 pUsage->pNext = pDevExt->pUsageFree;
1367 pDevExt->pUsageFree = pUsage;
1368
1369 /* What about the object? */
1370 if (pObj->cUsage > 1)
1371 pObj->cUsage--;
1372 else
1373 {
1374 /*
1375 * Object is to be destroyed, unlink it.
1376 */
1377 pObj->u32Magic = SUPDRVOBJ_MAGIC + 1;
1378 fDestroy = true;
1379 if (pDevExt->pObjs == pObj)
1380 pDevExt->pObjs = pObj->pNext;
1381 else
1382 {
1383 PSUPDRVOBJ pObjPrev;
1384 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
1385 if (pObjPrev->pNext == pObj)
1386 {
1387 pObjPrev->pNext = pObj->pNext;
1388 break;
1389 }
1390 Assert(pObjPrev);
1391 }
1392 }
1393 }
1394 break;
1395 }
1396 }
1397
1398 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1399
1400 /*
1401 * Call the destructor and free the object if required.
1402 */
1403 if (fDestroy)
1404 {
1405 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
1406 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
1407 if (pObj->pfnDestructor)
1408 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
1409 RTMemFree(pObj);
1410 }
1411
1412 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
1413 return pUsage ? VINF_SUCCESS : VERR_INVALID_PARAMETER;
1414}
1415
1416/**
1417 * Verifies that the current process can access the specified object.
1418 *
1419 * @returns The following IPRT status code:
1420 * @retval VINF_SUCCESS if access was granted.
1421 * @retval VERR_PERMISSION_DENIED if denied access.
1422 * @retval VERR_INVALID_PARAMETER if invalid parameter.
1423 *
1424 * @param pvObj The identifier returned by SUPR0ObjRegister().
1425 * @param pSession The session which wishes to access the object.
1426 * @param pszObjName Object string name. This is optional and depends on the object type.
1427 *
1428 * @remark The caller is responsible for making sure the object isn't removed while
1429 * we're inside this function. If uncertain about this, just call AddRef before calling us.
1430 */
1431SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
1432{
1433 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1434 int rc;
1435
1436 /*
1437 * Validate the input.
1438 */
1439 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1440 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1441 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1442 VERR_INVALID_PARAMETER);
1443
1444 /*
1445 * Check access. (returns true if a decision has been made.)
1446 */
1447 rc = VERR_INTERNAL_ERROR;
1448 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
1449 return rc;
1450
1451 /*
1452 * Default policy is to allow the user to access his own
1453 * stuff but nothing else.
1454 */
1455 if (pObj->CreatorUid == pSession->Uid)
1456 return VINF_SUCCESS;
1457 return VERR_PERMISSION_DENIED;
1458}
1459
1460
1461/**
1462 * Lock pages.
1463 *
1464 * @returns IPRT status code.
1465 * @param pSession Session to which the locked memory should be associated.
1466 * @param pvR3 Start of the memory range to lock.
1467 * This must be page aligned.
1468 * @param cb Size of the memory range to lock.
1469 * This must be page aligned.
1470 */
1471SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
1472{
1473 int rc;
1474 SUPDRVMEMREF Mem = {0};
1475 const size_t cb = (size_t)cPages << PAGE_SHIFT;
1476 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
1477
1478 /*
1479 * Verify input.
1480 */
1481 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1482 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
1483 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
1484 || !pvR3)
1485 {
1486 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
1487 return VERR_INVALID_PARAMETER;
1488 }
1489
1490#ifdef RT_OS_WINDOWS /* A temporary hack for windows, will be removed once all ring-3 code has been cleaned up. */
1491 /* First check if we allocated it using SUPPageAlloc; if so then we don't need to lock it again */
1492 rc = supdrvPageGetPhys(pSession, pvR3, cPages, paPages);
1493 if (RT_SUCCESS(rc))
1494 return rc;
1495#endif
1496
1497 /*
1498 * Let IPRT do the job.
1499 */
1500 Mem.eType = MEMREF_TYPE_LOCKED;
1501 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTR0ProcHandleSelf());
1502 if (RT_SUCCESS(rc))
1503 {
1504 uint32_t iPage = cPages;
1505 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
1506 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
1507
1508 while (iPage-- > 0)
1509 {
1510 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1511 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
1512 {
1513 AssertMsgFailed(("iPage=%d\n", iPage));
1514 rc = VERR_INTERNAL_ERROR;
1515 break;
1516 }
1517 }
1518 if (RT_SUCCESS(rc))
1519 rc = supdrvMemAdd(&Mem, pSession);
1520 if (RT_FAILURE(rc))
1521 {
1522 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
1523 AssertRC(rc2);
1524 }
1525 }
1526
1527 return rc;
1528}
1529
1530
1531/**
1532 * Unlocks the memory pointed to by pv.
1533 *
1534 * @returns IPRT status code.
1535 * @param pSession Session to which the memory was locked.
1536 * @param pvR3 Memory to unlock.
1537 */
1538SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1539{
1540 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1541 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1542#ifdef RT_OS_WINDOWS
1543 /*
1544 * Temporary hack for windows - SUPR0PageFree will unlock SUPR0PageAlloc
1545 * allocations; ignore this call.
1546 */
1547 if (supdrvPageWasLockedByPageAlloc(pSession, pvR3))
1548 {
1549 Log(("Page will be unlocked in SUPR0PageFree -> ignore\n"));
1550 return VINF_SUCCESS;
1551 }
1552#endif
1553 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
1554}
1555
1556
1557/**
1558 * Allocates a chunk of page aligned memory with contiguous and fixed physical
1559 * backing.
1560 *
1561 * @returns IPRT status code.
1562 * @param pSession Session data.
1563 * @param cb Number of bytes to allocate.
1564 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
1565 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
1566 * @param pHCPhys Where to put the physical address of allocated memory.
1567 */
1568SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
1569{
1570 int rc;
1571 SUPDRVMEMREF Mem = {0};
1572 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
1573
1574 /*
1575 * Validate input.
1576 */
1577 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1578 if (!ppvR3 || !ppvR0 || !pHCPhys)
1579 {
1580 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
1581 pSession, ppvR0, ppvR3, pHCPhys));
1582 return VERR_INVALID_PARAMETER;
1583
1584 }
1585 if (cPages < 1 || cPages >= 256)
1586 {
1587 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256\n", cPages));
1588 return VERR_INVALID_PARAMETER;
1589 }
1590
1591 /*
1592 * Let IPRT do the job.
1593 */
1594 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
1595 if (RT_SUCCESS(rc))
1596 {
1597 int rc2;
1598 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1599 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1600 if (RT_SUCCESS(rc))
1601 {
1602 Mem.eType = MEMREF_TYPE_CONT;
1603 rc = supdrvMemAdd(&Mem, pSession);
1604 if (!rc)
1605 {
1606 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1607 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1608 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
1609 return 0;
1610 }
1611
1612 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1613 AssertRC(rc2);
1614 }
1615 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1616 AssertRC(rc2);
1617 }
1618
1619 return rc;
1620}
1621
1622
1623/**
1624 * Frees memory allocated using SUPR0ContAlloc().
1625 *
1626 * @returns IPRT status code.
1627 * @param pSession The session to which the memory was allocated.
1628 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1629 */
1630SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1631{
1632 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1633 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1634 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
1635}
1636
1637
1638/**
1639 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
1640 *
1641 * The memory isn't zeroed.
1642 *
1643 * @returns IPRT status code.
1644 * @param pSession Session data.
1645 * @param cPages Number of pages to allocate.
1646 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
1647 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
1648 * @param paPages Where to put the physical addresses of allocated memory.
1649 */
1650SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1651{
1652 unsigned iPage;
1653 int rc;
1654 SUPDRVMEMREF Mem = {0};
1655 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
1656
1657 /*
1658 * Validate input.
1659 */
1660 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1661 if (!ppvR3 || !ppvR0 || !paPages)
1662 {
1663 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
1664 pSession, ppvR3, ppvR0, paPages));
1665 return VERR_INVALID_PARAMETER;
1666
1667 }
1668 if (cPages < 1 || cPages > 256)
1669 {
1670 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
1671 return VERR_INVALID_PARAMETER;
1672 }
1673
1674 /*
1675 * Let IPRT do the work.
1676 */
1677 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
1678 if (RT_SUCCESS(rc))
1679 {
1680 int rc2;
1681 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1682 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1683 if (RT_SUCCESS(rc))
1684 {
1685 Mem.eType = MEMREF_TYPE_LOW;
1686 rc = supdrvMemAdd(&Mem, pSession);
1687 if (!rc)
1688 {
1689 for (iPage = 0; iPage < cPages; iPage++)
1690 {
1691 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1692 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%VHp\n", paPages[iPage]));
1693 }
1694 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1695 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1696 return 0;
1697 }
1698
1699 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1700 AssertRC(rc2);
1701 }
1702
1703 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1704 AssertRC(rc2);
1705 }
1706
1707 return rc;
1708}
1709
1710
1711/**
1712 * Frees memory allocated using SUPR0LowAlloc().
1713 *
1714 * @returns IPRT status code.
1715 * @param pSession The session to which the memory was allocated.
1716 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1717 */
1718SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1719{
1720 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1721 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1722 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
1723}
1724
1725
1726
1727/**
1728 * Allocates a chunk of memory with both R0 and R3 mappings.
1729 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1730 *
1731 * @returns IPRT status code.
1732 * @param pSession The session to associated the allocation with.
1733 * @param cb Number of bytes to allocate.
1734 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1735 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1736 */
1737SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1738{
1739 int rc;
1740 SUPDRVMEMREF Mem = {0};
1741 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
1742
1743 /*
1744 * Validate input.
1745 */
1746 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1747 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
1748 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1749 if (cb < 1 || cb >= _4M)
1750 {
1751 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
1752 return VERR_INVALID_PARAMETER;
1753 }
1754
1755 /*
1756 * Let IPRT do the work.
1757 */
1758 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
1759 if (RT_SUCCESS(rc))
1760 {
1761 int rc2;
1762 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1763 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1764 if (RT_SUCCESS(rc))
1765 {
1766 Mem.eType = MEMREF_TYPE_MEM;
1767 rc = supdrvMemAdd(&Mem, pSession);
1768 if (!rc)
1769 {
1770 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1771 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1772 return VINF_SUCCESS;
1773 }
1774 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1775 AssertRC(rc2);
1776 }
1777
1778 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1779 AssertRC(rc2);
1780 }
1781
1782 return rc;
1783}
1784
1785
1786/**
1787 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
1788 *
1789 * @returns IPRT status code.
1790 * @param pSession The session to which the memory was allocated.
1791 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1792 * @param paPages Where to store the physical addresses.
1793 */
1794SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
1795{
1796 PSUPDRVBUNDLE pBundle;
1797 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1798 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
1799
1800 /*
1801 * Validate input.
1802 */
1803 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1804 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
1805 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
1806
1807 /*
1808 * Search for the address.
1809 */
1810 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1811 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1812 {
1813 if (pBundle->cUsed > 0)
1814 {
1815 unsigned i;
1816 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1817 {
1818 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
1819 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1820 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
1821 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1822 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
1823 )
1824 )
1825 {
1826 const unsigned cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
1827 unsigned iPage;
1828 for (iPage = 0; iPage < cPages; iPage++)
1829 {
1830 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
1831 paPages[iPage].uReserved = 0;
1832 }
1833 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1834 return VINF_SUCCESS;
1835 }
1836 }
1837 }
1838 }
1839 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1840 Log(("Failed to find %p!!!\n", (void *)uPtr));
1841 return VERR_INVALID_PARAMETER;
1842}
1843
1844
1845/**
1846 * Free memory allocated by SUPR0MemAlloc().
1847 *
1848 * @returns IPRT status code.
1849 * @param pSession The session owning the allocation.
1850 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1851 */
1852SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1853{
1854 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1855 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1856 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
1857}
1858
1859
1860/**
1861 * Allocates a chunk of memory with only a R3 mappings.
1862 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1863 *
1864 * @returns IPRT status code.
1865 * @param pSession The session to associated the allocation with.
1866 * @param cPages The number of pages to allocate.
1867 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1868 * @param paPages Where to store the addresses of the pages. Optional.
1869 */
1870SUPR0DECL(int) SUPR0PageAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1871{
1872 int rc;
1873 SUPDRVMEMREF Mem = {0};
1874 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
1875
1876 /*
1877 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
1878 */
1879 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1880 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1881 if (cPages < 1 || cPages > (128 * _1M)/PAGE_SIZE)
1882 {
1883 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than 128MB.\n", cPages));
1884 return VERR_INVALID_PARAMETER;
1885 }
1886
1887 /*
1888 * Let IPRT do the work.
1889 */
1890 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
1891 if (RT_SUCCESS(rc))
1892 {
1893 int rc2;
1894 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1895 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1896 if (RT_SUCCESS(rc))
1897 {
1898 Mem.eType = MEMREF_TYPE_LOCKED_SUP;
1899 rc = supdrvMemAdd(&Mem, pSession);
1900 if (!rc)
1901 {
1902 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1903 if (paPages)
1904 {
1905 uint32_t iPage = cPages;
1906 while (iPage-- > 0)
1907 {
1908 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
1909 Assert(paPages[iPage] != NIL_RTHCPHYS);
1910 }
1911 }
1912 return VINF_SUCCESS;
1913 }
1914 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1915 AssertRC(rc2);
1916 }
1917
1918 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1919 AssertRC(rc2);
1920 }
1921 return rc;
1922}
1923
1924
1925#ifdef RT_OS_WINDOWS
1926/**
1927 * Check if the pages were locked by SUPR0PageAlloc
1928 *
1929 * This function will be removed along with the lock/unlock hacks when
1930 * we've cleaned up the ring-3 code properly.
1931 *
1932 * @returns boolean
1933 * @param pSession The session to which the memory was allocated.
1934 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1935 */
1936static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1937{
1938 PSUPDRVBUNDLE pBundle;
1939 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1940 LogFlow(("SUPR0PageIsLockedByPageAlloc: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1941
1942 /*
1943 * Search for the address.
1944 */
1945 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1946 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1947 {
1948 if (pBundle->cUsed > 0)
1949 {
1950 unsigned i;
1951 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1952 {
1953 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
1954 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1955 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1956 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
1957 {
1958 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1959 return true;
1960 }
1961 }
1962 }
1963 }
1964 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1965 return false;
1966}
1967
1968
1969/**
1970 * Get the physical addresses of memory allocated using SUPR0PageAlloc().
1971 *
1972 * This function will be removed along with the lock/unlock hacks when
1973 * we've cleaned up the ring-3 code properly.
1974 *
1975 * @returns IPRT status code.
1976 * @param pSession The session to which the memory was allocated.
1977 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1978 * @param cPages Number of pages in paPages
1979 * @param paPages Where to store the physical addresses.
1980 */
1981static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
1982{
1983 PSUPDRVBUNDLE pBundle;
1984 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1985 LogFlow(("supdrvPageGetPhys: pSession=%p pvR3=%p cPages=%#lx paPages=%p\n", pSession, (void *)pvR3, (long)cPages, paPages));
1986
1987 /*
1988 * Search for the address.
1989 */
1990 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1991 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1992 {
1993 if (pBundle->cUsed > 0)
1994 {
1995 unsigned i;
1996 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1997 {
1998 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
1999 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2000 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2001 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
2002 {
2003 uint32_t iPage = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
2004 cPages = RT_MIN(iPage, cPages);
2005 for (iPage = 0; iPage < cPages; iPage++)
2006 paPages[iPage] = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
2007 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2008 return VINF_SUCCESS;
2009 }
2010 }
2011 }
2012 }
2013 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2014 return VERR_INVALID_PARAMETER;
2015}
2016#endif /* RT_OS_WINDOWS */
2017
2018
2019/**
2020 * Free memory allocated by SUPR0PageAlloc().
2021 *
2022 * @returns IPRT status code.
2023 * @param pSession The session owning the allocation.
2024 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
2025 */
2026SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
2027{
2028 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
2029 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2030 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED_SUP);
2031}
2032
2033
2034/**
2035 * Maps the GIP into userspace and/or get the physical address of the GIP.
2036 *
2037 * @returns IPRT status code.
2038 * @param pSession Session to which the GIP mapping should belong.
2039 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
2040 * @param pHCPhysGip Where to store the physical address. (optional)
2041 *
2042 * @remark There is no reference counting on the mapping, so one call to this function
2043 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
2044 * and remove the session as a GIP user.
2045 */
2046SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
2047{
2048 int rc = 0;
2049 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2050 RTR3PTR pGip = NIL_RTR3PTR;
2051 RTHCPHYS HCPhys = NIL_RTHCPHYS;
2052 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
2053
2054 /*
2055 * Validate
2056 */
2057 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2058 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
2059 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
2060
2061 RTSemFastMutexRequest(pDevExt->mtxGip);
2062 if (pDevExt->pGip)
2063 {
2064 /*
2065 * Map it?
2066 */
2067 if (ppGipR3)
2068 {
2069#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2070 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
2071 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
2072 RTMEM_PROT_READ, RTR0ProcHandleSelf());
2073 if (RT_SUCCESS(rc))
2074 {
2075 pGip = RTR0MemObjAddressR3(pSession->GipMapObjR3);
2076 rc = VINF_SUCCESS; /** @todo remove this and replace the !rc below with RT_SUCCESS(rc). */
2077 }
2078#else /* !USE_NEW_OS_INTERFACE_FOR_GIP */
2079 if (!pSession->pGip)
2080 rc = supdrvOSGipMap(pSession->pDevExt, &pSession->pGip);
2081 if (!rc)
2082 pGip = (RTR3PTR)pSession->pGip;
2083#endif /* !USE_NEW_OS_INTERFACE_FOR_GIP */
2084 }
2085
2086 /*
2087 * Get physical address.
2088 */
2089 if (pHCPhysGip && !rc)
2090 HCPhys = pDevExt->HCPhysGip;
2091
2092 /*
2093 * Reference globally.
2094 */
2095 if (!pSession->fGipReferenced && !rc)
2096 {
2097 pSession->fGipReferenced = 1;
2098 pDevExt->cGipUsers++;
2099 if (pDevExt->cGipUsers == 1)
2100 {
2101 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2102 unsigned i;
2103
2104 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
2105
2106 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
2107 ASMAtomicXchgU32(&pGip->aCPUs[i].u32TransactionId, pGip->aCPUs[i].u32TransactionId & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
2108 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, 0);
2109
2110#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2111 rc = RTTimerStart(pDevExt->pGipTimer, 0);
2112 AssertRC(rc); rc = VINF_SUCCESS;
2113#else
2114 supdrvOSGipResume(pDevExt);
2115#endif
2116 }
2117 }
2118 }
2119 else
2120 {
2121 rc = SUPDRV_ERR_GENERAL_FAILURE;
2122 Log(("SUPR0GipMap: GIP is not available!\n"));
2123 }
2124 RTSemFastMutexRelease(pDevExt->mtxGip);
2125
2126 /*
2127 * Write returns.
2128 */
2129 if (pHCPhysGip)
2130 *pHCPhysGip = HCPhys;
2131 if (ppGipR3)
2132 *ppGipR3 = pGip;
2133
2134#ifdef DEBUG_DARWIN_GIP
2135 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGip=%p GipMapObjR3\n", rc, (unsigned long)HCPhys, pGip, pSession->GipMapObjR3));
2136#else
2137 LogFlow(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)(uintptr_t)pGip));
2138#endif
2139 return rc;
2140}
2141
2142
2143/**
2144 * Unmaps any user mapping of the GIP and terminates all GIP access
2145 * from this session.
2146 *
2147 * @returns IPRT status code.
2148 * @param pSession Session to which the GIP mapping should belong.
2149 */
2150SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
2151{
2152 int rc = VINF_SUCCESS;
2153 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2154#ifdef DEBUG_DARWIN_GIP
2155 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
2156 pSession,
2157 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
2158 pSession->GipMapObjR3));
2159#else
2160 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
2161#endif
2162 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2163
2164 RTSemFastMutexRequest(pDevExt->mtxGip);
2165
2166 /*
2167 * Unmap anything?
2168 */
2169#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2170 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
2171 {
2172 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
2173 AssertRC(rc);
2174 if (RT_SUCCESS(rc))
2175 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
2176 }
2177#else
2178 if (pSession->pGip)
2179 {
2180 rc = supdrvOSGipUnmap(pDevExt, pSession->pGip);
2181 if (!rc)
2182 pSession->pGip = NULL;
2183 }
2184#endif
2185
2186 /*
2187 * Dereference global GIP.
2188 */
2189 if (pSession->fGipReferenced && !rc)
2190 {
2191 pSession->fGipReferenced = 0;
2192 if ( pDevExt->cGipUsers > 0
2193 && !--pDevExt->cGipUsers)
2194 {
2195 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
2196#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2197 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = 0;
2198#else
2199 supdrvOSGipSuspend(pDevExt);
2200#endif
2201 }
2202 }
2203
2204 RTSemFastMutexRelease(pDevExt->mtxGip);
2205
2206 return rc;
2207}
2208
2209
2210/**
2211 * Adds a memory object to the session.
2212 *
2213 * @returns IPRT status code.
2214 * @param pMem Memory tracking structure containing the
2215 * information to track.
2216 * @param pSession The session.
2217 */
2218static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
2219{
2220 PSUPDRVBUNDLE pBundle;
2221 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2222
2223 /*
2224 * Find free entry and record the allocation.
2225 */
2226 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2227 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2228 {
2229 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
2230 {
2231 unsigned i;
2232 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2233 {
2234 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
2235 {
2236 pBundle->cUsed++;
2237 pBundle->aMem[i] = *pMem;
2238 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2239 return VINF_SUCCESS;
2240 }
2241 }
2242 AssertFailed(); /* !!this can't be happening!!! */
2243 }
2244 }
2245 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2246
2247 /*
2248 * Need to allocate a new bundle.
2249 * Insert into the last entry in the bundle.
2250 */
2251 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
2252 if (!pBundle)
2253 return VERR_NO_MEMORY;
2254
2255 /* take last entry. */
2256 pBundle->cUsed++;
2257 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
2258
2259 /* insert into list. */
2260 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2261 pBundle->pNext = pSession->Bundle.pNext;
2262 pSession->Bundle.pNext = pBundle;
2263 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2264
2265 return VINF_SUCCESS;
2266}
2267
2268
2269/**
2270 * Releases a memory object referenced by pointer and type.
2271 *
2272 * @returns IPRT status code.
2273 * @param pSession Session data.
2274 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
2275 * @param eType Memory type.
2276 */
2277static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
2278{
2279 PSUPDRVBUNDLE pBundle;
2280 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2281
2282 /*
2283 * Validate input.
2284 */
2285 if (!uPtr)
2286 {
2287 Log(("Illegal address %p\n", (void *)uPtr));
2288 return VERR_INVALID_PARAMETER;
2289 }
2290
2291 /*
2292 * Search for the address.
2293 */
2294 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2295 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2296 {
2297 if (pBundle->cUsed > 0)
2298 {
2299 unsigned i;
2300 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2301 {
2302 if ( pBundle->aMem[i].eType == eType
2303 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2304 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
2305 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2306 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
2307 )
2308 {
2309 /* Make a copy of it and release it outside the spinlock. */
2310 SUPDRVMEMREF Mem = pBundle->aMem[i];
2311 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
2312 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
2313 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
2314 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2315
2316 if (Mem.MapObjR3)
2317 {
2318 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
2319 AssertRC(rc); /** @todo figure out how to handle this. */
2320 }
2321 if (Mem.MemObj)
2322 {
2323 int rc = RTR0MemObjFree(Mem.MemObj, false);
2324 AssertRC(rc); /** @todo figure out how to handle this. */
2325 }
2326 return VINF_SUCCESS;
2327 }
2328 }
2329 }
2330 }
2331 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2332 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
2333 return VERR_INVALID_PARAMETER;
2334}
2335
2336
2337#ifdef VBOX_WITH_IDT_PATCHING
2338/**
2339 * Install IDT for the current CPU.
2340 *
2341 * @returns One of the following IPRT status codes:
2342 * @retval VINF_SUCCESS on success.
2343 * @retval VERR_IDT_FAILED.
2344 * @retval VERR_NO_MEMORY.
2345 * @param pDevExt The device extension.
2346 * @param pSession The session data.
2347 * @param pReq The request.
2348 */
2349static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq)
2350{
2351 PSUPDRVPATCHUSAGE pUsagePre;
2352 PSUPDRVPATCH pPatchPre;
2353 RTIDTR Idtr;
2354 PSUPDRVPATCH pPatch;
2355 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2356 LogFlow(("supdrvIOCtl_IdtInstall\n"));
2357
2358 /*
2359 * Preallocate entry for this CPU cause we don't wanna do
2360 * that inside the spinlock!
2361 */
2362 pUsagePre = (PSUPDRVPATCHUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2363 if (!pUsagePre)
2364 return VERR_NO_MEMORY;
2365
2366 /*
2367 * Take the spinlock and see what we need to do.
2368 */
2369 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2370
2371 /* check if we already got a free patch. */
2372 if (!pDevExt->pIdtPatchesFree)
2373 {
2374 /*
2375 * Allocate a patch - outside the spinlock of course.
2376 */
2377 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2378
2379 pPatchPre = (PSUPDRVPATCH)RTMemExecAlloc(sizeof(*pPatchPre));
2380 if (!pPatchPre)
2381 return VERR_NO_MEMORY;
2382
2383 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2384 }
2385 else
2386 {
2387 pPatchPre = pDevExt->pIdtPatchesFree;
2388 pDevExt->pIdtPatchesFree = pPatchPre->pNext;
2389 }
2390
2391 /* look for matching patch entry */
2392 ASMGetIDTR(&Idtr);
2393 pPatch = pDevExt->pIdtPatches;
2394 while (pPatch && pPatch->pvIdt != (void *)Idtr.pIdt)
2395 pPatch = pPatch->pNext;
2396
2397 if (!pPatch)
2398 {
2399 /*
2400 * Create patch.
2401 */
2402 pPatch = supdrvIdtPatchOne(pDevExt, pPatchPre);
2403 if (pPatch)
2404 pPatchPre = NULL; /* mark as used. */
2405 }
2406 else
2407 {
2408 /*
2409 * Simply increment patch usage.
2410 */
2411 pPatch->cUsage++;
2412 }
2413
2414 if (pPatch)
2415 {
2416 /*
2417 * Increment and add if need be the session usage record for this patch.
2418 */
2419 PSUPDRVPATCHUSAGE pUsage = pSession->pPatchUsage;
2420 while (pUsage && pUsage->pPatch != pPatch)
2421 pUsage = pUsage->pNext;
2422
2423 if (!pUsage)
2424 {
2425 /*
2426 * Add usage record.
2427 */
2428 pUsagePre->cUsage = 1;
2429 pUsagePre->pPatch = pPatch;
2430 pUsagePre->pNext = pSession->pPatchUsage;
2431 pSession->pPatchUsage = pUsagePre;
2432 pUsagePre = NULL; /* mark as used. */
2433 }
2434 else
2435 {
2436 /*
2437 * Increment usage count.
2438 */
2439 pUsage->cUsage++;
2440 }
2441 }
2442
2443 /* free patch - we accumulate them for paranoid saftly reasons. */
2444 if (pPatchPre)
2445 {
2446 pPatchPre->pNext = pDevExt->pIdtPatchesFree;
2447 pDevExt->pIdtPatchesFree = pPatchPre;
2448 }
2449
2450 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2451
2452 /*
2453 * Free unused preallocated buffers.
2454 */
2455 if (pUsagePre)
2456 RTMemFree(pUsagePre);
2457
2458 pReq->u.Out.u8Idt = pDevExt->u8Idt;
2459
2460 return pPatch ? VINF_SUCCESS : VERR_IDT_FAILED;
2461}
2462
2463
2464/**
2465 * This creates a IDT patch entry.
2466 * If the first patch being installed it'll also determin the IDT entry
2467 * to use.
2468 *
2469 * @returns pPatch on success.
2470 * @returns NULL on failure.
2471 * @param pDevExt Pointer to globals.
2472 * @param pPatch Patch entry to use.
2473 * This will be linked into SUPDRVDEVEXT::pIdtPatches on
2474 * successful return.
2475 * @remark Call must be owning the SUPDRVDEVEXT::Spinlock!
2476 */
2477static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2478{
2479 RTIDTR Idtr;
2480 PSUPDRVIDTE paIdt;
2481 LogFlow(("supdrvIOCtl_IdtPatchOne: pPatch=%p\n", pPatch));
2482
2483 /*
2484 * Get IDT.
2485 */
2486 ASMGetIDTR(&Idtr);
2487 paIdt = (PSUPDRVIDTE)Idtr.pIdt;
2488 /*
2489 * Recent Linux kernels can be configured to 1G user /3G kernel.
2490 */
2491 if ((uintptr_t)paIdt < 0x40000000)
2492 {
2493 AssertMsgFailed(("bad paIdt=%p\n", paIdt));
2494 return NULL;
2495 }
2496
2497 if (!pDevExt->u8Idt)
2498 {
2499 /*
2500 * Test out the alternatives.
2501 *
2502 * At the moment we do not support chaining thus we ASSUME that one of
2503 * these 48 entries is unused (which is not a problem on Win32 and
2504 * Linux to my knowledge).
2505 */
2506 /** @todo we MUST change this detection to try grab an entry which is NOT in use. This can be
2507 * combined with gathering info about which guest system call gates we can hook up directly. */
2508 unsigned i;
2509 uint8_t u8Idt = 0;
2510 static uint8_t au8Ints[] =
2511 {
2512#ifdef RT_OS_WINDOWS /* We don't use 0xef and above because they are system stuff on linux (ef is IPI,
2513 * local apic timer, or some other frequently fireing thing). */
2514 0xef, 0xee, 0xed, 0xec,
2515#endif
2516 0xeb, 0xea, 0xe9, 0xe8,
2517 0xdf, 0xde, 0xdd, 0xdc,
2518 0x7b, 0x7a, 0x79, 0x78,
2519 0xbf, 0xbe, 0xbd, 0xbc,
2520 };
2521#if defined(RT_ARCH_AMD64) && defined(DEBUG)
2522 static int s_iWobble = 0;
2523 unsigned iMax = !(s_iWobble++ % 2) ? 0x80 : 0x100;
2524 Log2(("IDT: Idtr=%p:%#x\n", (void *)Idtr.pIdt, (unsigned)Idtr.cbIdt));
2525 for (i = iMax - 0x80; i*16+15 < Idtr.cbIdt && i < iMax; i++)
2526 {
2527 Log2(("%#x: %04x:%08x%04x%04x P=%d DPL=%d IST=%d Type1=%#x u32Reserved=%#x u5Reserved=%#x\n",
2528 i, paIdt[i].u16SegSel, paIdt[i].u32OffsetTop, paIdt[i].u16OffsetHigh, paIdt[i].u16OffsetLow,
2529 paIdt[i].u1Present, paIdt[i].u2DPL, paIdt[i].u3IST, paIdt[i].u5Type2,
2530 paIdt[i].u32Reserved, paIdt[i].u5Reserved));
2531 }
2532#endif
2533 /* look for entries which are not present or otherwise unused. */
2534 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2535 {
2536 u8Idt = au8Ints[i];
2537 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2538 && ( !paIdt[u8Idt].u1Present
2539 || paIdt[u8Idt].u5Type2 == 0))
2540 break;
2541 u8Idt = 0;
2542 }
2543 if (!u8Idt)
2544 {
2545 /* try again, look for a compatible entry .*/
2546 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2547 {
2548 u8Idt = au8Ints[i];
2549 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2550 && paIdt[u8Idt].u1Present
2551 && paIdt[u8Idt].u5Type2 == SUPDRV_IDTE_TYPE2_INTERRUPT_GATE
2552 && !(paIdt[u8Idt].u16SegSel & 3))
2553 break;
2554 u8Idt = 0;
2555 }
2556 if (!u8Idt)
2557 {
2558 Log(("Failed to find appropirate IDT entry!!\n"));
2559 return NULL;
2560 }
2561 }
2562 pDevExt->u8Idt = u8Idt;
2563 LogFlow(("supdrvIOCtl_IdtPatchOne: u8Idt=%x\n", u8Idt));
2564 }
2565
2566 /*
2567 * Prepare the patch
2568 */
2569 memset(pPatch, 0, sizeof(*pPatch));
2570 pPatch->pvIdt = paIdt;
2571 pPatch->cUsage = 1;
2572 pPatch->pIdtEntry = &paIdt[pDevExt->u8Idt];
2573 pPatch->SavedIdt = paIdt[pDevExt->u8Idt];
2574 pPatch->ChangedIdt.u16OffsetLow = (uint32_t)((uintptr_t)&pPatch->auCode[0] & 0xffff);
2575 pPatch->ChangedIdt.u16OffsetHigh = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 16);
2576#ifdef RT_ARCH_AMD64
2577 pPatch->ChangedIdt.u32OffsetTop = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 32);
2578#endif
2579 pPatch->ChangedIdt.u16SegSel = ASMGetCS();
2580#ifdef RT_ARCH_AMD64
2581 pPatch->ChangedIdt.u3IST = 0;
2582 pPatch->ChangedIdt.u5Reserved = 0;
2583#else /* x86 */
2584 pPatch->ChangedIdt.u5Reserved = 0;
2585 pPatch->ChangedIdt.u3Type1 = 0;
2586#endif /* x86 */
2587 pPatch->ChangedIdt.u5Type2 = SUPDRV_IDTE_TYPE2_INTERRUPT_GATE;
2588 pPatch->ChangedIdt.u2DPL = 3;
2589 pPatch->ChangedIdt.u1Present = 1;
2590
2591 /*
2592 * Generate the patch code.
2593 */
2594 {
2595#ifdef RT_ARCH_AMD64
2596 union
2597 {
2598 uint8_t *pb;
2599 uint32_t *pu32;
2600 uint64_t *pu64;
2601 } u, uFixJmp, uFixCall, uNotNested;
2602 u.pb = &pPatch->auCode[0];
2603
2604 /* check the cookie */
2605 *u.pb++ = 0x3d; // cmp eax, GLOBALCOOKIE
2606 *u.pu32++ = pDevExt->u32Cookie;
2607
2608 *u.pb++ = 0x74; // jz @VBoxCall
2609 *u.pb++ = 2;
2610
2611 /* jump to forwarder code. */
2612 *u.pb++ = 0xeb;
2613 uFixJmp = u;
2614 *u.pb++ = 0xfe;
2615
2616 // @VBoxCall:
2617 *u.pb++ = 0x0f; // swapgs
2618 *u.pb++ = 0x01;
2619 *u.pb++ = 0xf8;
2620
2621 /*
2622 * Call VMMR0Entry
2623 * We don't have to push the arguments here, but we have top
2624 * reserve some stack space for the interrupt forwarding.
2625 */
2626# ifdef RT_OS_WINDOWS
2627 *u.pb++ = 0x50; // push rax ; alignment filler.
2628 *u.pb++ = 0x41; // push r8 ; uArg
2629 *u.pb++ = 0x50;
2630 *u.pb++ = 0x52; // push rdx ; uOperation
2631 *u.pb++ = 0x51; // push rcx ; pVM
2632# else
2633 *u.pb++ = 0x51; // push rcx ; alignment filler.
2634 *u.pb++ = 0x52; // push rdx ; uArg
2635 *u.pb++ = 0x56; // push rsi ; uOperation
2636 *u.pb++ = 0x57; // push rdi ; pVM
2637# endif
2638
2639 *u.pb++ = 0xff; // call qword [pfnVMMR0EntryInt wrt rip]
2640 *u.pb++ = 0x15;
2641 uFixCall = u;
2642 *u.pu32++ = 0;
2643
2644 *u.pb++ = 0x48; // add rsp, 20h ; remove call frame.
2645 *u.pb++ = 0x81;
2646 *u.pb++ = 0xc4;
2647 *u.pu32++ = 0x20;
2648
2649 *u.pb++ = 0x0f; // swapgs
2650 *u.pb++ = 0x01;
2651 *u.pb++ = 0xf8;
2652
2653 /* Return to R3. */
2654 uNotNested = u;
2655 *u.pb++ = 0x48; // iretq
2656 *u.pb++ = 0xcf;
2657
2658 while ((uintptr_t)u.pb & 0x7) // align 8
2659 *u.pb++ = 0xcc;
2660
2661 /* Pointer to the VMMR0Entry. */ // pfnVMMR0EntryInt dq StubVMMR0Entry
2662 *uFixCall.pu32 = (uint32_t)(u.pb - uFixCall.pb - 4); uFixCall.pb = NULL;
2663 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2664 *u.pu64++ = pDevExt->pvVMMR0 ? (uint64_t)pDevExt->pfnVMMR0EntryInt : (uint64_t)u.pb + 8;
2665
2666 /* stub entry. */ // StubVMMR0Entry:
2667 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2668 *u.pb++ = 0x33; // xor eax, eax
2669 *u.pb++ = 0xc0;
2670
2671 *u.pb++ = 0x48; // dec rax
2672 *u.pb++ = 0xff;
2673 *u.pb++ = 0xc8;
2674
2675 *u.pb++ = 0xc3; // ret
2676
2677 /* forward to the original handler using a retf. */
2678 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1); uFixJmp.pb = NULL;
2679
2680 *u.pb++ = 0x68; // push <target cs>
2681 *u.pu32++ = !pPatch->SavedIdt.u5Type2 ? ASMGetCS() : pPatch->SavedIdt.u16SegSel;
2682
2683 *u.pb++ = 0x68; // push <low target rip>
2684 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2685 ? (uint32_t)(uintptr_t)uNotNested.pb
2686 : (uint32_t)pPatch->SavedIdt.u16OffsetLow
2687 | (uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16;
2688
2689 *u.pb++ = 0xc7; // mov dword [rsp + 4], <high target rip>
2690 *u.pb++ = 0x44;
2691 *u.pb++ = 0x24;
2692 *u.pb++ = 0x04;
2693 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2694 ? (uint32_t)((uint64_t)uNotNested.pb >> 32)
2695 : pPatch->SavedIdt.u32OffsetTop;
2696
2697 *u.pb++ = 0x48; // retf ; does this require prefix?
2698 *u.pb++ = 0xcb;
2699
2700#else /* RT_ARCH_X86 */
2701
2702 union
2703 {
2704 uint8_t *pb;
2705 uint16_t *pu16;
2706 uint32_t *pu32;
2707 } u, uFixJmpNotNested, uFixJmp, uFixCall, uNotNested;
2708 u.pb = &pPatch->auCode[0];
2709
2710 /* check the cookie */
2711 *u.pb++ = 0x81; // cmp esi, GLOBALCOOKIE
2712 *u.pb++ = 0xfe;
2713 *u.pu32++ = pDevExt->u32Cookie;
2714
2715 *u.pb++ = 0x74; // jz VBoxCall
2716 uFixJmp = u;
2717 *u.pb++ = 0;
2718
2719 /* jump (far) to the original handler / not-nested-stub. */
2720 *u.pb++ = 0xea; // jmp far NotNested
2721 uFixJmpNotNested = u;
2722 *u.pu32++ = 0;
2723 *u.pu16++ = 0;
2724
2725 /* save selector registers. */ // VBoxCall:
2726 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1);
2727 *u.pb++ = 0x0f; // push fs
2728 *u.pb++ = 0xa0;
2729
2730 *u.pb++ = 0x1e; // push ds
2731
2732 *u.pb++ = 0x06; // push es
2733
2734 /* call frame */
2735 *u.pb++ = 0x51; // push ecx
2736
2737 *u.pb++ = 0x52; // push edx
2738
2739 *u.pb++ = 0x50; // push eax
2740
2741 /* load ds, es and perhaps fs before call. */
2742 *u.pb++ = 0xb8; // mov eax, KernelDS
2743 *u.pu32++ = ASMGetDS();
2744
2745 *u.pb++ = 0x8e; // mov ds, eax
2746 *u.pb++ = 0xd8;
2747
2748 *u.pb++ = 0x8e; // mov es, eax
2749 *u.pb++ = 0xc0;
2750
2751#ifdef RT_OS_WINDOWS
2752 *u.pb++ = 0xb8; // mov eax, KernelFS
2753 *u.pu32++ = ASMGetFS();
2754
2755 *u.pb++ = 0x8e; // mov fs, eax
2756 *u.pb++ = 0xe0;
2757#endif
2758
2759 /* do the call. */
2760 *u.pb++ = 0xe8; // call _VMMR0Entry / StubVMMR0Entry
2761 uFixCall = u;
2762 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2763 *u.pu32++ = 0xfffffffb;
2764
2765 *u.pb++ = 0x83; // add esp, 0ch ; cdecl
2766 *u.pb++ = 0xc4;
2767 *u.pb++ = 0x0c;
2768
2769 /* restore selector registers. */
2770 *u.pb++ = 0x07; // pop es
2771 //
2772 *u.pb++ = 0x1f; // pop ds
2773
2774 *u.pb++ = 0x0f; // pop fs
2775 *u.pb++ = 0xa1;
2776
2777 uNotNested = u; // NotNested:
2778 *u.pb++ = 0xcf; // iretd
2779
2780 /* the stub VMMR0Entry. */ // StubVMMR0Entry:
2781 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2782 *u.pb++ = 0x33; // xor eax, eax
2783 *u.pb++ = 0xc0;
2784
2785 *u.pb++ = 0x48; // dec eax
2786
2787 *u.pb++ = 0xc3; // ret
2788
2789 /* Fixup the VMMR0Entry call. */
2790 if (pDevExt->pvVMMR0)
2791 *uFixCall.pu32 = (uint32_t)pDevExt->pfnVMMR0EntryInt - (uint32_t)(uFixCall.pu32 + 1);
2792 else
2793 *uFixCall.pu32 = (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)(uFixCall.pu32 + 1);
2794
2795 /* Fixup the forward / nested far jump. */
2796 if (!pPatch->SavedIdt.u5Type2)
2797 {
2798 *uFixJmpNotNested.pu32++ = (uint32_t)uNotNested.pb;
2799 *uFixJmpNotNested.pu16++ = ASMGetCS();
2800 }
2801 else
2802 {
2803 *uFixJmpNotNested.pu32++ = ((uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16) | pPatch->SavedIdt.u16OffsetLow;
2804 *uFixJmpNotNested.pu16++ = pPatch->SavedIdt.u16SegSel;
2805 }
2806#endif /* RT_ARCH_X86 */
2807 Assert(u.pb <= &pPatch->auCode[sizeof(pPatch->auCode)]);
2808#if 0
2809 /* dump the patch code */
2810 Log2(("patch code: %p\n", &pPatch->auCode[0]));
2811 for (uFixCall.pb = &pPatch->auCode[0]; uFixCall.pb < u.pb; uFixCall.pb++)
2812 Log2(("0x%02x,\n", *uFixCall.pb));
2813#endif
2814 }
2815
2816 /*
2817 * Install the patch.
2818 */
2819 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->ChangedIdt);
2820 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The stupid change code didn't work!!!!!\n"));
2821
2822 /*
2823 * Link in the patch.
2824 */
2825 pPatch->pNext = pDevExt->pIdtPatches;
2826 pDevExt->pIdtPatches = pPatch;
2827
2828 return pPatch;
2829}
2830
2831
2832/**
2833 * Removes the sessions IDT references.
2834 * This will uninstall our IDT patch if we left unreferenced.
2835 *
2836 * @returns VINF_SUCCESS.
2837 * @param pDevExt Device globals.
2838 * @param pSession Session data.
2839 */
2840static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
2841{
2842 PSUPDRVPATCHUSAGE pUsage;
2843 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2844 LogFlow(("supdrvIOCtl_IdtRemoveAll: pSession=%p\n", pSession));
2845
2846 /*
2847 * Take the spinlock.
2848 */
2849 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2850
2851 /*
2852 * Walk usage list, removing patches as their usage count reaches zero.
2853 */
2854 pUsage = pSession->pPatchUsage;
2855 while (pUsage)
2856 {
2857 if (pUsage->pPatch->cUsage <= pUsage->cUsage)
2858 supdrvIdtRemoveOne(pDevExt, pUsage->pPatch);
2859 else
2860 pUsage->pPatch->cUsage -= pUsage->cUsage;
2861
2862 /* next */
2863 pUsage = pUsage->pNext;
2864 }
2865
2866 /*
2867 * Empty the usage chain and we're done inside the spinlock.
2868 */
2869 pUsage = pSession->pPatchUsage;
2870 pSession->pPatchUsage = NULL;
2871
2872 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2873
2874 /*
2875 * Free usage entries.
2876 */
2877 while (pUsage)
2878 {
2879 void *pvToFree = pUsage;
2880 pUsage->cUsage = 0;
2881 pUsage->pPatch = NULL;
2882 pUsage = pUsage->pNext;
2883 RTMemFree(pvToFree);
2884 }
2885
2886 return VINF_SUCCESS;
2887}
2888
2889
2890/**
2891 * Remove one patch.
2892 *
2893 * Worker for supdrvIOCtl_IdtRemoveAll.
2894 *
2895 * @param pDevExt Device globals.
2896 * @param pPatch Patch entry to remove.
2897 * @remark Caller must own SUPDRVDEVEXT::Spinlock!
2898 */
2899static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2900{
2901 LogFlow(("supdrvIdtRemoveOne: pPatch=%p\n", pPatch));
2902
2903 pPatch->cUsage = 0;
2904
2905 /*
2906 * If the IDT entry was changed it have to kick around for ever!
2907 * This will be attempted freed again, perhaps next time we'll succeed :-)
2908 */
2909 if (memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)))
2910 {
2911 AssertMsgFailed(("The hijacked IDT entry has CHANGED!!!\n"));
2912 return;
2913 }
2914
2915 /*
2916 * Unlink it.
2917 */
2918 if (pDevExt->pIdtPatches != pPatch)
2919 {
2920 PSUPDRVPATCH pPatchPrev = pDevExt->pIdtPatches;
2921 while (pPatchPrev)
2922 {
2923 if (pPatchPrev->pNext == pPatch)
2924 {
2925 pPatchPrev->pNext = pPatch->pNext;
2926 break;
2927 }
2928 pPatchPrev = pPatchPrev->pNext;
2929 }
2930 Assert(!pPatchPrev);
2931 }
2932 else
2933 pDevExt->pIdtPatches = pPatch->pNext;
2934 pPatch->pNext = NULL;
2935
2936
2937 /*
2938 * Verify and restore the IDT.
2939 */
2940 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2941 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->SavedIdt);
2942 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->SavedIdt, sizeof(pPatch->SavedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2943
2944 /*
2945 * Put it in the free list.
2946 * (This free list stuff is to calm my paranoia.)
2947 */
2948 pPatch->pvIdt = NULL;
2949 pPatch->pIdtEntry = NULL;
2950
2951 pPatch->pNext = pDevExt->pIdtPatchesFree;
2952 pDevExt->pIdtPatchesFree = pPatch;
2953}
2954
2955
2956/**
2957 * Write to an IDT entry.
2958 *
2959 * @param pvIdtEntry Where to write.
2960 * @param pNewIDTEntry What to write.
2961 */
2962static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry)
2963{
2964 RTUINTREG uCR0;
2965 RTUINTREG uFlags;
2966
2967 /*
2968 * On SMP machines (P4 hyperthreading included) we must preform a
2969 * 64-bit locked write when updating the IDT entry.
2970 *
2971 * The F00F bugfix for linux (and probably other OSes) causes
2972 * the IDT to be pointing to an readonly mapping. We get around that
2973 * by temporarily turning of WP. Since we're inside a spinlock at this
2974 * point, interrupts are disabled and there isn't any way the WP bit
2975 * flipping can cause any trouble.
2976 */
2977
2978 /* Save & Clear interrupt flag; Save & clear WP. */
2979 uFlags = ASMGetFlags();
2980 ASMSetFlags(uFlags & ~(RTUINTREG)(1 << 9)); /*X86_EFL_IF*/
2981 Assert(!(ASMGetFlags() & (1 << 9)));
2982 uCR0 = ASMGetCR0();
2983 ASMSetCR0(uCR0 & ~(RTUINTREG)(1 << 16)); /*X86_CR0_WP*/
2984
2985 /* Update IDT Entry */
2986#ifdef RT_ARCH_AMD64
2987 ASMAtomicXchgU128((volatile uint128_t *)pvIdtEntry, *(uint128_t *)(uintptr_t)pNewIDTEntry);
2988#else
2989 ASMAtomicXchgU64((volatile uint64_t *)pvIdtEntry, *(uint64_t *)(uintptr_t)pNewIDTEntry);
2990#endif
2991
2992 /* Restore CR0 & Flags */
2993 ASMSetCR0(uCR0);
2994 ASMSetFlags(uFlags);
2995}
2996#endif /* VBOX_WITH_IDT_PATCHING */
2997
2998
2999/**
3000 * Opens an image. If it's the first time it's opened the call must upload
3001 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
3002 *
3003 * This is the 1st step of the loading.
3004 *
3005 * @returns IPRT status code.
3006 * @param pDevExt Device globals.
3007 * @param pSession Session data.
3008 * @param pReq The open request.
3009 */
3010static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
3011{
3012 PSUPDRVLDRIMAGE pImage;
3013 unsigned cb;
3014 void *pv;
3015 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImage=%d\n", pReq->u.In.szName, pReq->u.In.cbImage));
3016
3017 /*
3018 * Check if we got an instance of the image already.
3019 */
3020 RTSemFastMutexRequest(pDevExt->mtxLdr);
3021 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
3022 {
3023 if (!strcmp(pImage->szName, pReq->u.In.szName))
3024 {
3025 pImage->cUsage++;
3026 pReq->u.Out.pvImageBase = pImage->pvImage;
3027 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
3028 supdrvLdrAddUsage(pSession, pImage);
3029 RTSemFastMutexRelease(pDevExt->mtxLdr);
3030 return VINF_SUCCESS;
3031 }
3032 }
3033 /* (not found - add it!) */
3034
3035 /*
3036 * Allocate memory.
3037 */
3038 cb = pReq->u.In.cbImage + sizeof(SUPDRVLDRIMAGE) + 31;
3039 pv = RTMemExecAlloc(cb);
3040 if (!pv)
3041 {
3042 RTSemFastMutexRelease(pDevExt->mtxLdr);
3043 Log(("supdrvIOCtl_LdrOpen: RTMemExecAlloc(%u) failed\n", cb));
3044 return VERR_NO_MEMORY;
3045 }
3046
3047 /*
3048 * Setup and link in the LDR stuff.
3049 */
3050 pImage = (PSUPDRVLDRIMAGE)pv;
3051 pImage->pvImage = RT_ALIGN_P(pImage + 1, 32);
3052 pImage->cbImage = pReq->u.In.cbImage;
3053 pImage->pfnModuleInit = NULL;
3054 pImage->pfnModuleTerm = NULL;
3055 pImage->uState = SUP_IOCTL_LDR_OPEN;
3056 pImage->cUsage = 1;
3057 strcpy(pImage->szName, pReq->u.In.szName);
3058
3059 pImage->pNext = pDevExt->pLdrImages;
3060 pDevExt->pLdrImages = pImage;
3061
3062 supdrvLdrAddUsage(pSession, pImage);
3063
3064 pReq->u.Out.pvImageBase = pImage->pvImage;
3065 pReq->u.Out.fNeedsLoading = true;
3066 RTSemFastMutexRelease(pDevExt->mtxLdr);
3067 return VINF_SUCCESS;
3068}
3069
3070
3071/**
3072 * Loads the image bits.
3073 *
3074 * This is the 2nd step of the loading.
3075 *
3076 * @returns IPRT status code.
3077 * @param pDevExt Device globals.
3078 * @param pSession Session data.
3079 * @param pReq The request.
3080 */
3081static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
3082{
3083 PSUPDRVLDRUSAGE pUsage;
3084 PSUPDRVLDRIMAGE pImage;
3085 int rc;
3086 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImage=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImage));
3087
3088 /*
3089 * Find the ldr image.
3090 */
3091 RTSemFastMutexRequest(pDevExt->mtxLdr);
3092 pUsage = pSession->pLdrUsage;
3093 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3094 pUsage = pUsage->pNext;
3095 if (!pUsage)
3096 {
3097 RTSemFastMutexRelease(pDevExt->mtxLdr);
3098 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
3099 return VERR_INVALID_HANDLE;
3100 }
3101 pImage = pUsage->pImage;
3102 if (pImage->cbImage != pReq->u.In.cbImage)
3103 {
3104 RTSemFastMutexRelease(pDevExt->mtxLdr);
3105 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load)\n", pImage->cbImage, pReq->u.In.cbImage));
3106 return VERR_INVALID_HANDLE;
3107 }
3108 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
3109 {
3110 unsigned uState = pImage->uState;
3111 RTSemFastMutexRelease(pDevExt->mtxLdr);
3112 if (uState != SUP_IOCTL_LDR_LOAD)
3113 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
3114 return SUPDRV_ERR_ALREADY_LOADED;
3115 }
3116 switch (pReq->u.In.eEPType)
3117 {
3118 case SUPLDRLOADEP_NOTHING:
3119 break;
3120 case SUPLDRLOADEP_VMMR0:
3121 if ( !pReq->u.In.EP.VMMR0.pvVMMR0
3122 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryInt
3123 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryFast
3124 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryEx)
3125 {
3126 RTSemFastMutexRelease(pDevExt->mtxLdr);
3127 Log(("NULL pointer: pvVMMR0=%p pvVMMR0EntryInt=%p pvVMMR0EntryFast=%p pvVMMR0EntryEx=%p!\n",
3128 pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3129 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3130 return VERR_INVALID_PARAMETER;
3131 }
3132 if ( (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryInt - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3133 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryFast - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3134 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryEx - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3135 {
3136 RTSemFastMutexRelease(pDevExt->mtxLdr);
3137 Log(("Out of range (%p LB %#x): pvVMMR0EntryInt=%p, pvVMMR0EntryFast=%p or pvVMMR0EntryEx=%p is NULL!\n",
3138 pImage->pvImage, pReq->u.In.cbImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3139 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3140 return VERR_INVALID_PARAMETER;
3141 }
3142 break;
3143 default:
3144 RTSemFastMutexRelease(pDevExt->mtxLdr);
3145 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
3146 return VERR_INVALID_PARAMETER;
3147 }
3148 if ( pReq->u.In.pfnModuleInit
3149 && (uintptr_t)pReq->u.In.pfnModuleInit - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3150 {
3151 RTSemFastMutexRelease(pDevExt->mtxLdr);
3152 Log(("SUP_IOCTL_LDR_LOAD: pfnModuleInit=%p is outside the image (%p %d bytes)\n",
3153 pReq->u.In.pfnModuleInit, pImage->pvImage, pReq->u.In.cbImage));
3154 return VERR_INVALID_PARAMETER;
3155 }
3156 if ( pReq->u.In.pfnModuleTerm
3157 && (uintptr_t)pReq->u.In.pfnModuleTerm - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3158 {
3159 RTSemFastMutexRelease(pDevExt->mtxLdr);
3160 Log(("SUP_IOCTL_LDR_LOAD: pfnModuleTerm=%p is outside the image (%p %d bytes)\n",
3161 pReq->u.In.pfnModuleTerm, pImage->pvImage, pReq->u.In.cbImage));
3162 return VERR_INVALID_PARAMETER;
3163 }
3164
3165 /*
3166 * Copy the memory.
3167 */
3168 /* no need to do try/except as this is a buffered request. */
3169 memcpy(pImage->pvImage, &pReq->u.In.achImage[0], pImage->cbImage);
3170 pImage->uState = SUP_IOCTL_LDR_LOAD;
3171 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
3172 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
3173 pImage->offSymbols = pReq->u.In.offSymbols;
3174 pImage->cSymbols = pReq->u.In.cSymbols;
3175 pImage->offStrTab = pReq->u.In.offStrTab;
3176 pImage->cbStrTab = pReq->u.In.cbStrTab;
3177
3178 /*
3179 * Update any entry points.
3180 */
3181 switch (pReq->u.In.eEPType)
3182 {
3183 default:
3184 case SUPLDRLOADEP_NOTHING:
3185 rc = VINF_SUCCESS;
3186 break;
3187 case SUPLDRLOADEP_VMMR0:
3188 rc = supdrvLdrSetR0EP(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3189 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
3190 break;
3191 }
3192
3193 /*
3194 * On success call the module initialization.
3195 */
3196 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
3197 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
3198 {
3199 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
3200 rc = pImage->pfnModuleInit();
3201 if (rc && pDevExt->pvVMMR0 == pImage->pvImage)
3202 supdrvLdrUnsetR0EP(pDevExt);
3203 }
3204
3205 if (rc)
3206 pImage->uState = SUP_IOCTL_LDR_OPEN;
3207
3208 RTSemFastMutexRelease(pDevExt->mtxLdr);
3209 return rc;
3210}
3211
3212
3213/**
3214 * Frees a previously loaded (prep'ed) image.
3215 *
3216 * @returns IPRT status code.
3217 * @param pDevExt Device globals.
3218 * @param pSession Session data.
3219 * @param pReq The request.
3220 */
3221static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
3222{
3223 int rc;
3224 PSUPDRVLDRUSAGE pUsagePrev;
3225 PSUPDRVLDRUSAGE pUsage;
3226 PSUPDRVLDRIMAGE pImage;
3227 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
3228
3229 /*
3230 * Find the ldr image.
3231 */
3232 RTSemFastMutexRequest(pDevExt->mtxLdr);
3233 pUsagePrev = NULL;
3234 pUsage = pSession->pLdrUsage;
3235 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3236 {
3237 pUsagePrev = pUsage;
3238 pUsage = pUsage->pNext;
3239 }
3240 if (!pUsage)
3241 {
3242 RTSemFastMutexRelease(pDevExt->mtxLdr);
3243 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
3244 return VERR_INVALID_HANDLE;
3245 }
3246
3247 /*
3248 * Check if we can remove anything.
3249 */
3250 rc = VINF_SUCCESS;
3251 pImage = pUsage->pImage;
3252 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
3253 {
3254 /*
3255 * Check if there are any objects with destructors in the image, if
3256 * so leave it for the session cleanup routine so we get a chance to
3257 * clean things up in the right order and not leave them all dangling.
3258 */
3259 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3260 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3261 if (pImage->cUsage <= 1)
3262 {
3263 PSUPDRVOBJ pObj;
3264 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3265 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3266 {
3267 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3268 break;
3269 }
3270 }
3271 else
3272 {
3273 PSUPDRVUSAGE pGenUsage;
3274 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
3275 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3276 {
3277 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3278 break;
3279 }
3280 }
3281 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3282 if (rc == VINF_SUCCESS)
3283 {
3284 /* unlink it */
3285 if (pUsagePrev)
3286 pUsagePrev->pNext = pUsage->pNext;
3287 else
3288 pSession->pLdrUsage = pUsage->pNext;
3289
3290 /* free it */
3291 pUsage->pImage = NULL;
3292 pUsage->pNext = NULL;
3293 RTMemFree(pUsage);
3294
3295 /*
3296 * Derefrence the image.
3297 */
3298 if (pImage->cUsage <= 1)
3299 supdrvLdrFree(pDevExt, pImage);
3300 else
3301 pImage->cUsage--;
3302 }
3303 else
3304 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
3305 }
3306 else
3307 {
3308 /*
3309 * Dereference both image and usage.
3310 */
3311 pImage->cUsage--;
3312 pUsage->cUsage--;
3313 }
3314
3315 RTSemFastMutexRelease(pDevExt->mtxLdr);
3316 return VINF_SUCCESS;
3317}
3318
3319
3320/**
3321 * Gets the address of a symbol in an open image.
3322 *
3323 * @returns 0 on success.
3324 * @returns SUPDRV_ERR_* on failure.
3325 * @param pDevExt Device globals.
3326 * @param pSession Session data.
3327 * @param pReq The request buffer.
3328 */
3329static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
3330{
3331 PSUPDRVLDRIMAGE pImage;
3332 PSUPDRVLDRUSAGE pUsage;
3333 uint32_t i;
3334 PSUPLDRSYM paSyms;
3335 const char *pchStrings;
3336 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
3337 void *pvSymbol = NULL;
3338 int rc = VERR_GENERAL_FAILURE;
3339 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
3340
3341 /*
3342 * Find the ldr image.
3343 */
3344 RTSemFastMutexRequest(pDevExt->mtxLdr);
3345 pUsage = pSession->pLdrUsage;
3346 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3347 pUsage = pUsage->pNext;
3348 if (!pUsage)
3349 {
3350 RTSemFastMutexRelease(pDevExt->mtxLdr);
3351 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
3352 return VERR_INVALID_HANDLE;
3353 }
3354 pImage = pUsage->pImage;
3355 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
3356 {
3357 unsigned uState = pImage->uState;
3358 RTSemFastMutexRelease(pDevExt->mtxLdr);
3359 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
3360 return VERR_ALREADY_LOADED;
3361 }
3362
3363 /*
3364 * Search the symbol string.
3365 */
3366 pchStrings = (const char *)((uint8_t *)pImage->pvImage + pImage->offStrTab);
3367 paSyms = (PSUPLDRSYM)((uint8_t *)pImage->pvImage + pImage->offSymbols);
3368 for (i = 0; i < pImage->cSymbols; i++)
3369 {
3370 if ( paSyms[i].offSymbol < pImage->cbImage /* paranoia */
3371 && paSyms[i].offName + cbSymbol <= pImage->cbStrTab
3372 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
3373 {
3374 pvSymbol = (uint8_t *)pImage->pvImage + paSyms[i].offSymbol;
3375 rc = VINF_SUCCESS;
3376 break;
3377 }
3378 }
3379 RTSemFastMutexRelease(pDevExt->mtxLdr);
3380 pReq->u.Out.pvSymbol = pvSymbol;
3381 return rc;
3382}
3383
3384
3385/**
3386 * Updates the IDT patches to point to the specified VMM R0 entry
3387 * point (i.e. VMMR0Enter()).
3388 *
3389 * @returns IPRT status code.
3390 * @param pDevExt Device globals.
3391 * @param pSession Session data.
3392 * @param pVMMR0 VMMR0 image handle.
3393 * @param pvVMMR0EntryInt VMMR0EntryInt address.
3394 * @param pvVMMR0EntryFast VMMR0EntryFast address.
3395 * @param pvVMMR0EntryEx VMMR0EntryEx address.
3396 * @remark Caller must own the loader mutex.
3397 */
3398static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
3399{
3400 int rc = VINF_SUCCESS;
3401 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
3402
3403
3404 /*
3405 * Check if not yet set.
3406 */
3407 if (!pDevExt->pvVMMR0)
3408 {
3409#ifdef VBOX_WITH_IDT_PATCHING
3410 PSUPDRVPATCH pPatch;
3411#endif
3412
3413 /*
3414 * Set it and update IDT patch code.
3415 */
3416 pDevExt->pvVMMR0 = pvVMMR0;
3417 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
3418 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
3419 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
3420#ifdef VBOX_WITH_IDT_PATCHING
3421 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3422 {
3423# ifdef RT_ARCH_AMD64
3424 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup], (uint64_t)pvVMMR0);
3425# else /* RT_ARCH_X86 */
3426 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3427 (uint32_t)pvVMMR0 - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3428# endif
3429 }
3430#endif /* VBOX_WITH_IDT_PATCHING */
3431 }
3432 else
3433 {
3434 /*
3435 * Return failure or success depending on whether the values match or not.
3436 */
3437 if ( pDevExt->pvVMMR0 != pvVMMR0
3438 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
3439 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
3440 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
3441 {
3442 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
3443 rc = VERR_INVALID_PARAMETER;
3444 }
3445 }
3446 return rc;
3447}
3448
3449
3450/**
3451 * Unsets the R0 entry point installed by supdrvLdrSetR0EP.
3452 *
3453 * @param pDevExt Device globals.
3454 */
3455static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt)
3456{
3457#ifdef VBOX_WITH_IDT_PATCHING
3458 PSUPDRVPATCH pPatch;
3459#endif
3460
3461 pDevExt->pvVMMR0 = NULL;
3462 pDevExt->pfnVMMR0EntryInt = NULL;
3463 pDevExt->pfnVMMR0EntryFast = NULL;
3464 pDevExt->pfnVMMR0EntryEx = NULL;
3465
3466#ifdef VBOX_WITH_IDT_PATCHING
3467 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3468 {
3469# ifdef RT_ARCH_AMD64
3470 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3471 (uint64_t)&pPatch->auCode[pPatch->offStub]);
3472# else /* RT_ARCH_X86 */
3473 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3474 (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3475# endif
3476 }
3477#endif /* VBOX_WITH_IDT_PATCHING */
3478}
3479
3480
3481/**
3482 * Adds a usage reference in the specified session of an image.
3483 *
3484 * @param pSession Session in question.
3485 * @param pImage Image which the session is using.
3486 */
3487static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
3488{
3489 PSUPDRVLDRUSAGE pUsage;
3490 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
3491
3492 /*
3493 * Referenced it already?
3494 */
3495 pUsage = pSession->pLdrUsage;
3496 while (pUsage)
3497 {
3498 if (pUsage->pImage == pImage)
3499 {
3500 pUsage->cUsage++;
3501 return;
3502 }
3503 pUsage = pUsage->pNext;
3504 }
3505
3506 /*
3507 * Allocate new usage record.
3508 */
3509 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
3510 Assert(pUsage);
3511 if (pUsage)
3512 {
3513 pUsage->cUsage = 1;
3514 pUsage->pImage = pImage;
3515 pUsage->pNext = pSession->pLdrUsage;
3516 pSession->pLdrUsage = pUsage;
3517 }
3518 /* ignore errors... */
3519}
3520
3521
3522/**
3523 * Frees a load image.
3524 *
3525 * @param pDevExt Pointer to device extension.
3526 * @param pImage Pointer to the image we're gonna free.
3527 * This image must exit!
3528 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
3529 */
3530static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
3531{
3532 PSUPDRVLDRIMAGE pImagePrev;
3533 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
3534
3535 /* find it - arg. should've used doubly linked list. */
3536 Assert(pDevExt->pLdrImages);
3537 pImagePrev = NULL;
3538 if (pDevExt->pLdrImages != pImage)
3539 {
3540 pImagePrev = pDevExt->pLdrImages;
3541 while (pImagePrev->pNext != pImage)
3542 pImagePrev = pImagePrev->pNext;
3543 Assert(pImagePrev->pNext == pImage);
3544 }
3545
3546 /* unlink */
3547 if (pImagePrev)
3548 pImagePrev->pNext = pImage->pNext;
3549 else
3550 pDevExt->pLdrImages = pImage->pNext;
3551
3552 /* check if this is VMMR0.r0 and fix the Idt patches if it is. */
3553 if (pDevExt->pvVMMR0 == pImage->pvImage)
3554 supdrvLdrUnsetR0EP(pDevExt);
3555
3556 /* check for objects with destructors in this image. (Shouldn't happen.) */
3557 if (pDevExt->pObjs)
3558 {
3559 unsigned cObjs = 0;
3560 PSUPDRVOBJ pObj;
3561 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3562 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3563 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3564 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3565 {
3566 pObj->pfnDestructor = NULL;
3567 cObjs++;
3568 }
3569 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3570 if (cObjs)
3571 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
3572 }
3573
3574 /* call termination function if fully loaded. */
3575 if ( pImage->pfnModuleTerm
3576 && pImage->uState == SUP_IOCTL_LDR_LOAD)
3577 {
3578 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
3579 pImage->pfnModuleTerm();
3580 }
3581
3582 /* free the image */
3583 pImage->cUsage = 0;
3584 pImage->pNext = 0;
3585 pImage->uState = SUP_IOCTL_LDR_FREE;
3586 RTMemExecFree(pImage);
3587}
3588
3589
3590/**
3591 * Gets the current paging mode of the CPU and stores in in pOut.
3592 */
3593static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void)
3594{
3595 SUPPAGINGMODE enmMode;
3596
3597 RTUINTREG cr0 = ASMGetCR0();
3598 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3599 enmMode = SUPPAGINGMODE_INVALID;
3600 else
3601 {
3602 RTUINTREG cr4 = ASMGetCR4();
3603 uint32_t fNXEPlusLMA = 0;
3604 if (cr4 & X86_CR4_PAE)
3605 {
3606 uint32_t fAmdFeatures = ASMCpuId_EDX(0x80000001);
3607 if (fAmdFeatures & (X86_CPUID_AMD_FEATURE_EDX_NX | X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
3608 {
3609 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3610 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3611 fNXEPlusLMA |= RT_BIT(0);
3612 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3613 fNXEPlusLMA |= RT_BIT(1);
3614 }
3615 }
3616
3617 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3618 {
3619 case 0:
3620 enmMode = SUPPAGINGMODE_32_BIT;
3621 break;
3622
3623 case X86_CR4_PGE:
3624 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3625 break;
3626
3627 case X86_CR4_PAE:
3628 enmMode = SUPPAGINGMODE_PAE;
3629 break;
3630
3631 case X86_CR4_PAE | RT_BIT(0):
3632 enmMode = SUPPAGINGMODE_PAE_NX;
3633 break;
3634
3635 case X86_CR4_PAE | X86_CR4_PGE:
3636 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3637 break;
3638
3639 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3640 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3641 break;
3642
3643 case RT_BIT(1) | X86_CR4_PAE:
3644 enmMode = SUPPAGINGMODE_AMD64;
3645 break;
3646
3647 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3648 enmMode = SUPPAGINGMODE_AMD64_NX;
3649 break;
3650
3651 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3652 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3653 break;
3654
3655 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3656 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3657 break;
3658
3659 default:
3660 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3661 enmMode = SUPPAGINGMODE_INVALID;
3662 break;
3663 }
3664 }
3665 return enmMode;
3666}
3667
3668
3669#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
3670/**
3671 * Creates the GIP.
3672 *
3673 * @returns negative errno.
3674 * @param pDevExt Instance data. GIP stuff may be updated.
3675 */
3676static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
3677{
3678 PSUPGLOBALINFOPAGE pGip;
3679 RTHCPHYS HCPhysGip;
3680 uint32_t u32SystemResolution;
3681 uint32_t u32Interval;
3682 int rc;
3683
3684 LogFlow(("supdrvGipCreate:\n"));
3685
3686 /* assert order */
3687 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
3688 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
3689 Assert(!pDevExt->pGipTimer);
3690
3691 /*
3692 * Allocate a suitable page with a default kernel mapping.
3693 */
3694 rc = RTR0MemObjAllocLow(&pDevExt->GipMemObj, PAGE_SIZE, false);
3695 if (RT_FAILURE(rc))
3696 {
3697 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
3698 return rc;
3699 }
3700 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
3701 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
3702
3703 /*
3704 * Try bump up the system timer resolution.
3705 * The more interrupts the better...
3706 */
3707 if ( RT_SUCCESS(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3708 || RT_SUCCESS(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3709 || RT_SUCCESS(RTTimerRequestSystemGranularity( 3906250 /* 256 HZ */, &u32SystemResolution))
3710 || RT_SUCCESS(RTTimerRequestSystemGranularity( 4000000 /* 250 HZ */, &u32SystemResolution))
3711 || RT_SUCCESS(RTTimerRequestSystemGranularity( 7812500 /* 128 HZ */, &u32SystemResolution))
3712 || RT_SUCCESS(RTTimerRequestSystemGranularity(10000000 /* 100 HZ */, &u32SystemResolution))
3713 || RT_SUCCESS(RTTimerRequestSystemGranularity(15625000 /* 64 HZ */, &u32SystemResolution))
3714 || RT_SUCCESS(RTTimerRequestSystemGranularity(31250000 /* 32 HZ */, &u32SystemResolution))
3715 )
3716 {
3717 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3718 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3719 }
3720
3721 /*
3722 * Find a reasonable update interval, something close to 10ms would be nice,
3723 * and create a recurring timer.
3724 */
3725 u32Interval = u32SystemResolution = RTTimerGetSystemGranularity();
3726 while (u32Interval < 10000000 /* 10 ms */)
3727 u32Interval += u32SystemResolution;
3728
3729 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0, supdrvGipTimer, pDevExt);
3730 if (RT_FAILURE(rc))
3731 {
3732 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %RU32 ns interval. rc=%d\n", u32Interval, rc));
3733 Assert(!pDevExt->pGipTimer);
3734 supdrvGipDestroy(pDevExt);
3735 return rc;
3736 }
3737
3738 /*
3739 * We're good.
3740 */
3741 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), 1000000000 / u32Interval /*=Hz*/);
3742 return VINF_SUCCESS;
3743}
3744
3745
3746/**
3747 * Terminates the GIP.
3748 *
3749 * @param pDevExt Instance data. GIP stuff may be updated.
3750 */
3751static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
3752{
3753 int rc;
3754#ifdef DEBUG_DARWIN_GIP
3755 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
3756 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
3757 pDevExt->pGipTimer, pDevExt->GipMemObj));
3758#endif
3759
3760 /*
3761 * Invalid the GIP data.
3762 */
3763 if (pDevExt->pGip)
3764 {
3765 supdrvGipTerm(pDevExt->pGip);
3766 pDevExt->pGip = NULL;
3767 }
3768
3769 /*
3770 * Destroy the timer and free the GIP memory object.
3771 */
3772 if (pDevExt->pGipTimer)
3773 {
3774 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
3775 pDevExt->pGipTimer = NULL;
3776 }
3777
3778 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
3779 {
3780 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
3781 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
3782 }
3783
3784 /*
3785 * Finally, release the system timer resolution request if one succeeded.
3786 */
3787 if (pDevExt->u32SystemTimerGranularityGrant)
3788 {
3789 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
3790 pDevExt->u32SystemTimerGranularityGrant = 0;
3791 }
3792}
3793
3794
3795/**
3796 * Timer callback function.
3797 * @param pTimer The timer.
3798 * @param pvUser The device extension.
3799 */
3800static DECLCALLBACK(void) supdrvGipTimer(PRTTIMER pTimer, void *pvUser)
3801{
3802 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3803 supdrvGipUpdate(pDevExt->pGip, RTTimeSystemNanoTS());
3804}
3805#endif /* USE_NEW_OS_INTERFACE_FOR_GIP */
3806
3807
3808/**
3809 * Initializes the GIP data.
3810 *
3811 * @returns IPRT status code.
3812 * @param pDevExt Pointer to the device instance data.
3813 * @param pGip Pointer to the read-write kernel mapping of the GIP.
3814 * @param HCPhys The physical address of the GIP.
3815 * @param u64NanoTS The current nanosecond timestamp.
3816 * @param uUpdateHz The update freqence.
3817 */
3818int VBOXCALL supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS, unsigned uUpdateHz)
3819{
3820 unsigned i;
3821#ifdef DEBUG_DARWIN_GIP
3822 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3823#else
3824 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3825#endif
3826
3827 /*
3828 * Initialize the structure.
3829 */
3830 memset(pGip, 0, PAGE_SIZE);
3831 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
3832 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
3833 pGip->u32Mode = supdrvGipDeterminTscMode();
3834 pGip->u32UpdateHz = uUpdateHz;
3835 pGip->u32UpdateIntervalNS = 1000000000 / uUpdateHz;
3836 pGip->u64NanoTSLastUpdateHz = u64NanoTS;
3837
3838 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
3839 {
3840 pGip->aCPUs[i].u32TransactionId = 2;
3841 pGip->aCPUs[i].u64NanoTS = u64NanoTS;
3842 pGip->aCPUs[i].u64TSC = ASMReadTSC();
3843
3844 /*
3845 * We don't know the following values until we've executed updates.
3846 * So, we'll just insert very high values.
3847 */
3848 pGip->aCPUs[i].u64CpuHz = _4G + 1;
3849 pGip->aCPUs[i].u32UpdateIntervalTSC = _2G / 4;
3850 pGip->aCPUs[i].au32TSCHistory[0] = _2G / 4;
3851 pGip->aCPUs[i].au32TSCHistory[1] = _2G / 4;
3852 pGip->aCPUs[i].au32TSCHistory[2] = _2G / 4;
3853 pGip->aCPUs[i].au32TSCHistory[3] = _2G / 4;
3854 pGip->aCPUs[i].au32TSCHistory[4] = _2G / 4;
3855 pGip->aCPUs[i].au32TSCHistory[5] = _2G / 4;
3856 pGip->aCPUs[i].au32TSCHistory[6] = _2G / 4;
3857 pGip->aCPUs[i].au32TSCHistory[7] = _2G / 4;
3858 }
3859
3860 /*
3861 * Link it to the device extension.
3862 */
3863 pDevExt->pGip = pGip;
3864 pDevExt->HCPhysGip = HCPhys;
3865 pDevExt->cGipUsers = 0;
3866
3867 return VINF_SUCCESS;
3868}
3869
3870
3871/**
3872 * Determin the GIP TSC mode.
3873 *
3874 * @returns The most suitable TSC mode.
3875 */
3876static SUPGIPMODE supdrvGipDeterminTscMode(void)
3877{
3878#ifndef USE_NEW_OS_INTERFACE_FOR_GIP
3879 /*
3880 * The problem here is that AMD processors with power management features
3881 * may easily end up with different TSCs because the CPUs or even cores
3882 * on the same physical chip run at different frequencies to save power.
3883 *
3884 * It is rumoured that this will be corrected with Barcelona and it's
3885 * expected that this will be indicated by the TscInvariant bit in
3886 * cpuid(0x80000007). So, the "difficult" bit here is to correctly
3887 * identify the older CPUs which don't do different frequency and
3888 * can be relied upon to have somewhat uniform TSC between the cpus.
3889 */
3890 if (supdrvOSGetCPUCount() > 1)
3891 {
3892 uint32_t uEAX, uEBX, uECX, uEDX;
3893
3894 /* Permit user users override. */
3895 if (supdrvOSGetForcedAsyncTscMode())
3896 return SUPGIPMODE_ASYNC_TSC;
3897
3898 /* Check for "AuthenticAMD" */
3899 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
3900 if (uEAX >= 1 && uEBX == 0x68747541 && uECX == 0x444d4163 && uEDX == 0x69746e65)
3901 {
3902 /* Check for APM support and that TscInvariant is cleared. */
3903 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
3904 if (uEAX >= 0x80000007)
3905 {
3906 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
3907 if ( !(uEDX & RT_BIT(8))/* TscInvariant */
3908 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
3909 return SUPGIPMODE_ASYNC_TSC;
3910 }
3911 }
3912 }
3913#endif
3914 return SUPGIPMODE_SYNC_TSC;
3915}
3916
3917
3918/**
3919 * Invalidates the GIP data upon termination.
3920 *
3921 * @param pGip Pointer to the read-write kernel mapping of the GIP.
3922 */
3923void VBOXCALL supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
3924{
3925 unsigned i;
3926 pGip->u32Magic = 0;
3927 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
3928 {
3929 pGip->aCPUs[i].u64NanoTS = 0;
3930 pGip->aCPUs[i].u64TSC = 0;
3931 pGip->aCPUs[i].iTSCHistoryHead = 0;
3932 }
3933}
3934
3935
3936/**
3937 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
3938 * updates all the per cpu data except the transaction id.
3939 *
3940 * @param pGip The GIP.
3941 * @param pGipCpu Pointer to the per cpu data.
3942 * @param u64NanoTS The current time stamp.
3943 */
3944static void supdrvGipDoUpdateCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3945{
3946 uint64_t u64TSC;
3947 uint64_t u64TSCDelta;
3948 uint32_t u32UpdateIntervalTSC;
3949 uint32_t u32UpdateIntervalTSCSlack;
3950 unsigned iTSCHistoryHead;
3951 uint64_t u64CpuHz;
3952
3953 /*
3954 * Update the NanoTS.
3955 */
3956 ASMAtomicXchgU64(&pGipCpu->u64NanoTS, u64NanoTS);
3957
3958 /*
3959 * Calc TSC delta.
3960 */
3961 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
3962 u64TSC = ASMReadTSC();
3963 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
3964 ASMAtomicXchgU64(&pGipCpu->u64TSC, u64TSC);
3965
3966 if (u64TSCDelta >> 32)
3967 {
3968 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
3969 pGipCpu->cErrors++;
3970 }
3971
3972 /*
3973 * TSC History.
3974 */
3975 Assert(ELEMENTS(pGipCpu->au32TSCHistory) == 8);
3976
3977 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
3978 ASMAtomicXchgU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
3979 ASMAtomicXchgU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
3980
3981 /*
3982 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
3983 */
3984 if (pGip->u32UpdateHz >= 1000)
3985 {
3986 uint32_t u32;
3987 u32 = pGipCpu->au32TSCHistory[0];
3988 u32 += pGipCpu->au32TSCHistory[1];
3989 u32 += pGipCpu->au32TSCHistory[2];
3990 u32 += pGipCpu->au32TSCHistory[3];
3991 u32 >>= 2;
3992 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
3993 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
3994 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
3995 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
3996 u32UpdateIntervalTSC >>= 2;
3997 u32UpdateIntervalTSC += u32;
3998 u32UpdateIntervalTSC >>= 1;
3999
4000 /* Value choosen for a 2GHz Athlon64 running linux 2.6.10/11, . */
4001 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
4002 }
4003 else if (pGip->u32UpdateHz >= 90)
4004 {
4005 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
4006 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
4007 u32UpdateIntervalTSC >>= 1;
4008
4009 /* value choosen on a 2GHz thinkpad running windows */
4010 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
4011 }
4012 else
4013 {
4014 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
4015
4016 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
4017 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
4018 }
4019 ASMAtomicXchgU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
4020
4021 /*
4022 * CpuHz.
4023 */
4024 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, pGip->u32UpdateHz);
4025 ASMAtomicXchgU64(&pGipCpu->u64CpuHz, u64CpuHz);
4026}
4027
4028
4029/**
4030 * Updates the GIP.
4031 *
4032 * @param pGip Pointer to the GIP.
4033 * @param u64NanoTS The current nanosecond timesamp.
4034 */
4035void VBOXCALL supdrvGipUpdate(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS)
4036{
4037 /*
4038 * Determin the relevant CPU data.
4039 */
4040 PSUPGIPCPU pGipCpu;
4041 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
4042 pGipCpu = &pGip->aCPUs[0];
4043 else
4044 {
4045 unsigned iCpu = ASMGetApicId();
4046 if (RT_LIKELY(iCpu >= RT_ELEMENTS(pGip->aCPUs)))
4047 return;
4048 pGipCpu = &pGip->aCPUs[iCpu];
4049 }
4050
4051 /*
4052 * Start update transaction.
4053 */
4054 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
4055 {
4056 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
4057 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
4058 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4059 pGipCpu->cErrors++;
4060 return;
4061 }
4062
4063 /*
4064 * Recalc the update frequency every 0x800th time.
4065 */
4066 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
4067 {
4068 if (pGip->u64NanoTSLastUpdateHz)
4069 {
4070#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
4071 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
4072 uint32_t u32UpdateHz = (uint32_t)((UINT64_C(1000000000) * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
4073 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
4074 {
4075 ASMAtomicXchgU32(&pGip->u32UpdateHz, u32UpdateHz);
4076 ASMAtomicXchgU32(&pGip->u32UpdateIntervalNS, 1000000000 / u32UpdateHz);
4077 }
4078#endif
4079 }
4080 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS);
4081 }
4082
4083 /*
4084 * Update the data.
4085 */
4086 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4087
4088 /*
4089 * Complete transaction.
4090 */
4091 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4092}
4093
4094
4095/**
4096 * Updates the per cpu GIP data for the calling cpu.
4097 *
4098 * @param pGip Pointer to the GIP.
4099 * @param u64NanoTS The current nanosecond timesamp.
4100 * @param iCpu The CPU index.
4101 */
4102void VBOXCALL supdrvGipUpdatePerCpu(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS, unsigned iCpu)
4103{
4104 PSUPGIPCPU pGipCpu;
4105
4106 if (RT_LIKELY(iCpu < RT_ELEMENTS(pGip->aCPUs)))
4107 {
4108 pGipCpu = &pGip->aCPUs[iCpu];
4109
4110 /*
4111 * Start update transaction.
4112 */
4113 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
4114 {
4115 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
4116 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4117 pGipCpu->cErrors++;
4118 return;
4119 }
4120
4121 /*
4122 * Update the data.
4123 */
4124 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4125
4126 /*
4127 * Complete transaction.
4128 */
4129 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4130 }
4131}
4132
4133
4134#ifndef DEBUG /** @todo change #ifndef DEBUG -> #ifdef LOG_ENABLED */
4135/**
4136 * Stub function for non-debug builds.
4137 */
4138RTDECL(PRTLOGGER) RTLogDefaultInstance(void)
4139{
4140 return NULL;
4141}
4142
4143RTDECL(PRTLOGGER) RTLogRelDefaultInstance(void)
4144{
4145 return NULL;
4146}
4147
4148/**
4149 * Stub function for non-debug builds.
4150 */
4151RTDECL(int) RTLogSetDefaultInstanceThread(PRTLOGGER pLogger, uintptr_t uKey)
4152{
4153 return 0;
4154}
4155
4156/**
4157 * Stub function for non-debug builds.
4158 */
4159RTDECL(void) RTLogLogger(PRTLOGGER pLogger, void *pvCallerRet, const char *pszFormat, ...)
4160{
4161}
4162
4163/**
4164 * Stub function for non-debug builds.
4165 */
4166RTDECL(void) RTLogLoggerEx(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, ...)
4167{
4168}
4169
4170/**
4171 * Stub function for non-debug builds.
4172 */
4173RTDECL(void) RTLogLoggerExV(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, va_list args)
4174{
4175}
4176
4177/**
4178 * Stub function for non-debug builds.
4179 */
4180RTDECL(void) RTLogPrintf(const char *pszFormat, ...)
4181{
4182}
4183
4184/**
4185 * Stub function for non-debug builds.
4186 */
4187RTDECL(void) RTLogPrintfV(const char *pszFormat, va_list args)
4188{
4189}
4190#endif /* !DEBUG */
4191
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette