VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDRVShared.c@ 5584

Last change on this file since 5584 was 5231, checked in by vboxsync, 17 years ago

Export the multiple release event semaphores.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 139.5 KB
Line 
1/* $Revision: 5231 $ */
2/** @file
3 * VirtualBox Support Driver - Shared code.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include "SUPDRV.h"
23#ifndef PAGE_SHIFT
24# include <iprt/param.h>
25#endif
26#include <iprt/alloc.h>
27#include <iprt/semaphore.h>
28#include <iprt/spinlock.h>
29#include <iprt/thread.h>
30#include <iprt/process.h>
31#include <iprt/log.h>
32
33
34/*******************************************************************************
35* Defined Constants And Macros *
36*******************************************************************************/
37/* from x86.h - clashes with linux thus this duplication */
38#undef X86_CR0_PG
39#define X86_CR0_PG BIT(31)
40#undef X86_CR0_PE
41#define X86_CR0_PE BIT(0)
42#undef X86_CPUID_AMD_FEATURE_EDX_NX
43#define X86_CPUID_AMD_FEATURE_EDX_NX BIT(20)
44#undef MSR_K6_EFER
45#define MSR_K6_EFER 0xc0000080
46#undef MSR_K6_EFER_NXE
47#define MSR_K6_EFER_NXE BIT(11)
48#undef MSR_K6_EFER_LMA
49#define MSR_K6_EFER_LMA BIT(10)
50#undef X86_CR4_PGE
51#define X86_CR4_PGE BIT(7)
52#undef X86_CR4_PAE
53#define X86_CR4_PAE BIT(5)
54#undef X86_CPUID_AMD_FEATURE_EDX_LONG_MODE
55#define X86_CPUID_AMD_FEATURE_EDX_LONG_MODE BIT(29)
56
57
58/** The frequency by which we recalculate the u32UpdateHz and
59 * u32UpdateIntervalNS GIP members. The value must be a power of 2. */
60#define GIP_UPDATEHZ_RECALC_FREQ 0x800
61
62/**
63 * Validates a session pointer.
64 *
65 * @returns true/false accordingly.
66 * @param pSession The session.
67 */
68#define SUP_IS_SESSION_VALID(pSession) \
69 ( VALID_PTR(pSession) \
70 && pSession->u32Cookie == BIRD_INV)
71
72
73/*******************************************************************************
74* Global Variables *
75*******************************************************************************/
76/**
77 * Array of the R0 SUP API.
78 */
79static SUPFUNC g_aFunctions[] =
80{
81 /* name function */
82 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
83 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
84 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
85 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
86 { "SUPR0LockMem", (void *)SUPR0LockMem },
87 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
88 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
89 { "SUPR0ContFree", (void *)SUPR0ContFree },
90 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
91 { "SUPR0LowFree", (void *)SUPR0LowFree },
92 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
93 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
94 { "SUPR0MemFree", (void *)SUPR0MemFree },
95 { "SUPR0PageAlloc", (void *)SUPR0PageAlloc },
96 { "SUPR0PageFree", (void *)SUPR0PageFree },
97 { "SUPR0Printf", (void *)SUPR0Printf },
98 { "RTMemAlloc", (void *)RTMemAlloc },
99 { "RTMemAllocZ", (void *)RTMemAllocZ },
100 { "RTMemFree", (void *)RTMemFree },
101 /*{ "RTMemDup", (void *)RTMemDup },*/
102 { "RTMemRealloc", (void *)RTMemRealloc },
103 { "RTR0MemObjAllocLow", (void *)RTR0MemObjAllocLow },
104 { "RTR0MemObjAllocPage", (void *)RTR0MemObjAllocPage },
105 { "RTR0MemObjAllocPhys", (void *)RTR0MemObjAllocPhys },
106 { "RTR0MemObjAllocPhysNC", (void *)RTR0MemObjAllocPhysNC },
107 { "RTR0MemObjLockUser", (void *)RTR0MemObjLockUser },
108 { "RTR0MemObjMapKernel", (void *)RTR0MemObjMapKernel },
109 { "RTR0MemObjMapUser", (void *)RTR0MemObjMapUser },
110 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
111 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
112 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
113 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
114 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
115 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
116/* These doesn't work yet on linux - use fast mutexes!
117 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
118 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
119 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
120 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
121*/
122 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
123 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
124 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
125 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
126 { "RTSemEventCreate", (void *)RTSemEventCreate },
127 { "RTSemEventSignal", (void *)RTSemEventSignal },
128 { "RTSemEventWait", (void *)RTSemEventWait },
129 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
130 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
131 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
132 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
133 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
134 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
135 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
136 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
137 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
138 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
139 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
140 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
141 { "RTSpinlockAcquireNoInts", (void *)RTSpinlockAcquireNoInts },
142 { "RTSpinlockReleaseNoInts", (void *)RTSpinlockReleaseNoInts },
143 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
144 { "RTThreadSleep", (void *)RTThreadSleep },
145 { "RTThreadYield", (void *)RTThreadYield },
146#if 0 /* Thread APIs, Part 2. */
147 { "RTThreadSelf", (void *)RTThreadSelf },
148 { "RTThreadCreate", (void *)RTThreadCreate },
149 { "RTThreadGetNative", (void *)RTThreadGetNative },
150 { "RTThreadWait", (void *)RTThreadWait },
151 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
152 { "RTThreadGetName", (void *)RTThreadGetName },
153 { "RTThreadSelfName", (void *)RTThreadSelfName },
154 { "RTThreadGetType", (void *)RTThreadGetType },
155 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
156 { "RTThreadUserReset", (void *)RTThreadUserReset },
157 { "RTThreadUserWait", (void *)RTThreadUserWait },
158 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
159#endif
160 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
161 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
162 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
163 { "RTLogLogger", (void *)RTLogLogger },
164 { "RTLogLoggerEx", (void *)RTLogLoggerEx },
165 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
166 { "RTLogPrintf", (void *)RTLogPrintf },
167 { "RTLogPrintfV", (void *)RTLogPrintfV },
168 { "AssertMsg1", (void *)AssertMsg1 },
169 { "AssertMsg2", (void *)AssertMsg2 },
170};
171
172
173/*******************************************************************************
174* Internal Functions *
175*******************************************************************************/
176static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
177static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
178#ifdef VBOX_WITH_IDT_PATCHING
179static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq);
180static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
181static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession);
182static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
183static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry);
184#endif /* VBOX_WITH_IDT_PATCHING */
185static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
186static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
187static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
188static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
189static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
190static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt);
191static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
192static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
193static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void);
194static SUPGIPMODE supdrvGipDeterminTscMode(void);
195#ifdef RT_OS_WINDOWS
196static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages);
197static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3);
198#endif
199#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
200static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
201static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
202static DECLCALLBACK(void) supdrvGipTimer(PRTTIMER pTimer, void *pvUser);
203#endif
204
205
206/**
207 * Initializes the device extentsion structure.
208 *
209 * @returns IPRT status code.
210 * @param pDevExt The device extension to initialize.
211 */
212int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt)
213{
214 /*
215 * Initialize it.
216 */
217 int rc;
218 memset(pDevExt, 0, sizeof(*pDevExt));
219 rc = RTSpinlockCreate(&pDevExt->Spinlock);
220 if (!rc)
221 {
222 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
223 if (!rc)
224 {
225 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
226 if (!rc)
227 {
228#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
229 rc = supdrvGipCreate(pDevExt);
230 if (RT_SUCCESS(rc))
231 {
232 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
233 return VINF_SUCCESS;
234 }
235#else
236 pDevExt->u32Cookie = BIRD;
237 return VINF_SUCCESS;
238#endif
239 }
240 RTSemFastMutexDestroy(pDevExt->mtxLdr);
241 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
242 }
243 RTSpinlockDestroy(pDevExt->Spinlock);
244 pDevExt->Spinlock = NIL_RTSPINLOCK;
245 }
246 return rc;
247}
248
249
250/**
251 * Delete the device extension (e.g. cleanup members).
252 *
253 * @param pDevExt The device extension to delete.
254 */
255void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
256{
257#ifdef VBOX_WITH_IDT_PATCHING
258 PSUPDRVPATCH pPatch;
259#endif
260 PSUPDRVOBJ pObj;
261 PSUPDRVUSAGE pUsage;
262
263 /*
264 * Kill mutexes and spinlocks.
265 */
266 RTSemFastMutexDestroy(pDevExt->mtxGip);
267 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
268 RTSemFastMutexDestroy(pDevExt->mtxLdr);
269 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
270 RTSpinlockDestroy(pDevExt->Spinlock);
271 pDevExt->Spinlock = NIL_RTSPINLOCK;
272
273 /*
274 * Free lists.
275 */
276#ifdef VBOX_WITH_IDT_PATCHING
277 /* patches */
278 /** @todo make sure we don't uninstall patches which has been patched by someone else. */
279 pPatch = pDevExt->pIdtPatchesFree;
280 pDevExt->pIdtPatchesFree = NULL;
281 while (pPatch)
282 {
283 void *pvFree = pPatch;
284 pPatch = pPatch->pNext;
285 RTMemExecFree(pvFree);
286 }
287#endif /* VBOX_WITH_IDT_PATCHING */
288
289 /* objects. */
290 pObj = pDevExt->pObjs;
291#if !defined(DEBUG_bird) || !defined(RT_OS_LINUX) /* breaks unloading, temporary, remove me! */
292 Assert(!pObj); /* (can trigger on forced unloads) */
293#endif
294 pDevExt->pObjs = NULL;
295 while (pObj)
296 {
297 void *pvFree = pObj;
298 pObj = pObj->pNext;
299 RTMemFree(pvFree);
300 }
301
302 /* usage records. */
303 pUsage = pDevExt->pUsageFree;
304 pDevExt->pUsageFree = NULL;
305 while (pUsage)
306 {
307 void *pvFree = pUsage;
308 pUsage = pUsage->pNext;
309 RTMemFree(pvFree);
310 }
311
312#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
313 /* kill the GIP */
314 supdrvGipDestroy(pDevExt);
315#endif
316}
317
318
319/**
320 * Create session.
321 *
322 * @returns IPRT status code.
323 * @param pDevExt Device extension.
324 * @param ppSession Where to store the pointer to the session data.
325 */
326int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION *ppSession)
327{
328 /*
329 * Allocate memory for the session data.
330 */
331 int rc = VERR_NO_MEMORY;
332 PSUPDRVSESSION pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(sizeof(*pSession));
333 if (pSession)
334 {
335 /* Initialize session data. */
336 rc = RTSpinlockCreate(&pSession->Spinlock);
337 if (!rc)
338 {
339 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
340 pSession->pDevExt = pDevExt;
341 pSession->u32Cookie = BIRD_INV;
342 /*pSession->pLdrUsage = NULL;
343 pSession->pPatchUsage = NULL;
344 pSession->pUsage = NULL;
345 pSession->pGip = NULL;
346 pSession->fGipReferenced = false;
347 pSession->Bundle.cUsed = 0 */
348
349 dprintf(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
350 return VINF_SUCCESS;
351 }
352
353 RTMemFree(pSession);
354 *ppSession = NULL;
355 }
356
357 dprintf(("Failed to create spinlock, rc=%d!\n", rc));
358 return rc;
359}
360
361
362/**
363 * Shared code for cleaning up a session.
364 *
365 * @param pDevExt Device extension.
366 * @param pSession Session data.
367 * This data will be freed by this routine.
368 */
369void VBOXCALL supdrvCloseSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
370{
371 /*
372 * Cleanup the session first.
373 */
374 supdrvCleanupSession(pDevExt, pSession);
375
376 /*
377 * Free the rest of the session stuff.
378 */
379 RTSpinlockDestroy(pSession->Spinlock);
380 pSession->Spinlock = NIL_RTSPINLOCK;
381 pSession->pDevExt = NULL;
382 RTMemFree(pSession);
383 dprintf2(("supdrvCloseSession: returns\n"));
384}
385
386
387/**
388 * Shared code for cleaning up a session (but not quite freeing it).
389 *
390 * This is primarily intended for MAC OS X where we have to clean up the memory
391 * stuff before the file handle is closed.
392 *
393 * @param pDevExt Device extension.
394 * @param pSession Session data.
395 * This data will be freed by this routine.
396 */
397void VBOXCALL supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
398{
399 PSUPDRVBUNDLE pBundle;
400 dprintf(("supdrvCleanupSession: pSession=%p\n", pSession));
401
402 /*
403 * Remove logger instances related to this session.
404 * (This assumes the dprintf and dprintf2 macros doesn't use the normal logging.)
405 */
406 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
407
408#ifdef VBOX_WITH_IDT_PATCHING
409 /*
410 * Uninstall any IDT patches installed for this session.
411 */
412 supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
413#endif
414
415 /*
416 * Release object references made in this session.
417 * In theory there should be noone racing us in this session.
418 */
419 dprintf2(("release objects - start\n"));
420 if (pSession->pUsage)
421 {
422 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
423 PSUPDRVUSAGE pUsage;
424 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
425
426 while ((pUsage = pSession->pUsage) != NULL)
427 {
428 PSUPDRVOBJ pObj = pUsage->pObj;
429 pSession->pUsage = pUsage->pNext;
430
431 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
432 if (pUsage->cUsage < pObj->cUsage)
433 {
434 pObj->cUsage -= pUsage->cUsage;
435 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
436 }
437 else
438 {
439 /* Destroy the object and free the record. */
440 if (pDevExt->pObjs == pObj)
441 pDevExt->pObjs = pObj->pNext;
442 else
443 {
444 PSUPDRVOBJ pObjPrev;
445 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
446 if (pObjPrev->pNext == pObj)
447 {
448 pObjPrev->pNext = pObj->pNext;
449 break;
450 }
451 Assert(pObjPrev);
452 }
453 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
454
455 if (pObj->pfnDestructor)
456 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
457 RTMemFree(pObj);
458 }
459
460 /* free it and continue. */
461 RTMemFree(pUsage);
462
463 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
464 }
465
466 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
467 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
468 }
469 dprintf2(("release objects - done\n"));
470
471 /*
472 * Release memory allocated in the session.
473 *
474 * We do not serialize this as we assume that the application will
475 * not allocated memory while closing the file handle object.
476 */
477 dprintf2(("freeing memory:\n"));
478 pBundle = &pSession->Bundle;
479 while (pBundle)
480 {
481 PSUPDRVBUNDLE pToFree;
482 unsigned i;
483
484 /*
485 * Check and unlock all entries in the bundle.
486 */
487 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
488 {
489 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
490 {
491 int rc;
492 dprintf2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
493 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
494 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
495 {
496 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
497 AssertRC(rc); /** @todo figure out how to handle this. */
498 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
499 }
500 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, false);
501 AssertRC(rc); /** @todo figure out how to handle this. */
502 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
503 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
504 }
505 }
506
507 /*
508 * Advance and free previous bundle.
509 */
510 pToFree = pBundle;
511 pBundle = pBundle->pNext;
512
513 pToFree->pNext = NULL;
514 pToFree->cUsed = 0;
515 if (pToFree != &pSession->Bundle)
516 RTMemFree(pToFree);
517 }
518 dprintf2(("freeing memory - done\n"));
519
520 /*
521 * Loaded images needs to be dereferenced and possibly freed up.
522 */
523 RTSemFastMutexRequest(pDevExt->mtxLdr);
524 dprintf2(("freeing images:\n"));
525 if (pSession->pLdrUsage)
526 {
527 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
528 pSession->pLdrUsage = NULL;
529 while (pUsage)
530 {
531 void *pvFree = pUsage;
532 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
533 if (pImage->cUsage > pUsage->cUsage)
534 pImage->cUsage -= pUsage->cUsage;
535 else
536 supdrvLdrFree(pDevExt, pImage);
537 pUsage->pImage = NULL;
538 pUsage = pUsage->pNext;
539 RTMemFree(pvFree);
540 }
541 }
542 RTSemFastMutexRelease(pDevExt->mtxLdr);
543 dprintf2(("freeing images - done\n"));
544
545 /*
546 * Unmap the GIP.
547 */
548 dprintf2(("umapping GIP:\n"));
549#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
550 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
551#else
552 if (pSession->pGip)
553#endif
554 {
555 SUPR0GipUnmap(pSession);
556#ifndef USE_NEW_OS_INTERFACE_FOR_GIP
557 pSession->pGip = NULL;
558#endif
559 pSession->fGipReferenced = 0;
560 }
561 dprintf2(("umapping GIP - done\n"));
562}
563
564
565/**
566 * Fast path I/O Control worker.
567 *
568 * @returns VBox status code that should be passed down to ring-3 unchanged.
569 * @param uIOCtl Function number.
570 * @param pDevExt Device extention.
571 * @param pSession Session data.
572 */
573int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
574{
575 int rc;
576
577 /*
578 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
579 */
580 if (RT_LIKELY(pSession->pVM && pDevExt->pfnVMMR0EntryFast))
581 {
582 switch (uIOCtl)
583 {
584 case SUP_IOCTL_FAST_DO_RAW_RUN:
585 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_RAW_RUN);
586 break;
587 case SUP_IOCTL_FAST_DO_HWACC_RUN:
588 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_HWACC_RUN);
589 break;
590 case SUP_IOCTL_FAST_DO_NOP:
591 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_NOP);
592 break;
593 default:
594 rc = VERR_INTERNAL_ERROR;
595 break;
596 }
597 }
598 else
599 rc = VERR_INTERNAL_ERROR;
600
601 return rc;
602}
603
604
605/**
606 * I/O Control worker.
607 *
608 * @returns 0 on success.
609 * @returns VERR_INVALID_PARAMETER if the request is invalid.
610 *
611 * @param uIOCtl Function number.
612 * @param pDevExt Device extention.
613 * @param pSession Session data.
614 * @param pReqHdr The request header.
615 */
616int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
617{
618 /*
619 * Validate the request.
620 */
621 /* this first check could probably be omitted as its also done by the OS specific code... */
622 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
623 || pReqHdr->cbIn < sizeof(*pReqHdr)
624 || pReqHdr->cbOut < sizeof(*pReqHdr)))
625 {
626 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
627 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
628 return VERR_INVALID_PARAMETER;
629 }
630 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
631 {
632 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
633 {
634 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
635 return VERR_INVALID_PARAMETER;
636 }
637 }
638 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
639 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
640 {
641 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
642 return VERR_INVALID_PARAMETER;
643 }
644
645/*
646 * Validation macros
647 */
648#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
649 do { \
650 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
651 { \
652 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
653 (long)pReq->Hdr.cbIn, (long)(cbInExpect), (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
654 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
655 } \
656 } while (0)
657
658#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
659
660#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
661 do { \
662 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
663 { \
664 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
665 (long)pReq->Hdr.cbIn, (long)(cbInExpect))); \
666 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
667 } \
668 } while (0)
669
670#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
671 do { \
672 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
673 { \
674 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
675 (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
676 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
677 } \
678 } while (0)
679
680#define REQ_CHECK_EXPR(Name, expr) \
681 do { \
682 if (RT_UNLIKELY(!(expr))) \
683 { \
684 OSDBGPRINT(( #Name ": %s\n", #expr)); \
685 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
686 } \
687 } while (0)
688
689#define REQ_CHECK_EXPR_FMT(expr, fmt) \
690 do { \
691 if (RT_UNLIKELY(!(expr))) \
692 { \
693 OSDBGPRINT( fmt ); \
694 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
695 } \
696 } while (0)
697
698
699 /*
700 * The switch.
701 */
702 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
703 {
704 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
705 {
706 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
707 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
708 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
709 {
710 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
711 pReq->Hdr.rc = VERR_INVALID_MAGIC;
712 return 0;
713 }
714
715#if 0
716 /*
717 * Call out to the OS specific code and let it do permission checks on the
718 * client process.
719 */
720 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
721 {
722 pReq->u.Out.u32Cookie = 0xffffffff;
723 pReq->u.Out.u32SessionCookie = 0xffffffff;
724 pReq->u.Out.u32SessionVersion = 0xffffffff;
725 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
726 pReq->u.Out.pSession = NULL;
727 pReq->u.Out.cFunctions = 0;
728 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
729 return 0;
730 }
731#endif
732
733 /*
734 * Match the version.
735 * The current logic is very simple, match the major interface version.
736 */
737 if ( pReq->u.In.u32MinVersion > SUPDRVIOC_VERSION
738 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRVIOC_VERSION & 0xffff0000))
739 {
740 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
741 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRVIOC_VERSION));
742 pReq->u.Out.u32Cookie = 0xffffffff;
743 pReq->u.Out.u32SessionCookie = 0xffffffff;
744 pReq->u.Out.u32SessionVersion = 0xffffffff;
745 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
746 pReq->u.Out.pSession = NULL;
747 pReq->u.Out.cFunctions = 0;
748 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
749 return 0;
750 }
751
752 /*
753 * Fill in return data and be gone.
754 * N.B. The first one to change SUPDRVIOC_VERSION shall makes sure that
755 * u32SessionVersion <= u32ReqVersion!
756 */
757 /** @todo Somehow validate the client and negotiate a secure cookie... */
758 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
759 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
760 pReq->u.Out.u32SessionVersion = SUPDRVIOC_VERSION;
761 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
762 pReq->u.Out.pSession = pSession;
763 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
764 pReq->Hdr.rc = VINF_SUCCESS;
765 return 0;
766 }
767
768 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
769 {
770 /* validate */
771 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
772 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
773
774 /* execute */
775 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
776 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
777 pReq->Hdr.rc = VINF_SUCCESS;
778 return 0;
779 }
780
781 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_INSTALL):
782 {
783 /* validate */
784 PSUPIDTINSTALL pReq = (PSUPIDTINSTALL)pReqHdr;
785 REQ_CHECK_SIZES(SUP_IOCTL_IDT_INSTALL);
786
787 /* execute */
788#ifdef VBOX_WITH_IDT_PATCHING
789 pReq->Hdr.rc = supdrvIOCtl_IdtInstall(pDevExt, pSession, pReq);
790#else
791 pReq->u.Out.u8Idt = 3;
792 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
793#endif
794 return 0;
795 }
796
797 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_REMOVE):
798 {
799 /* validate */
800 PSUPIDTREMOVE pReq = (PSUPIDTREMOVE)pReqHdr;
801 REQ_CHECK_SIZES(SUP_IOCTL_IDT_REMOVE);
802
803 /* execute */
804#ifdef VBOX_WITH_IDT_PATCHING
805 pReq->Hdr.rc = supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
806#else
807 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
808#endif
809 return 0;
810 }
811
812 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
813 {
814 /* validate */
815 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
816 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
817 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
818 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
819 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
820
821 /* execute */
822 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
823 if (RT_FAILURE(pReq->Hdr.rc))
824 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
825 return 0;
826 }
827
828 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
829 {
830 /* validate */
831 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
832 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
833
834 /* execute */
835 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
836 return 0;
837 }
838
839 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
840 {
841 /* validate */
842 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
843 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
844
845 /* execute */
846 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
847 if (RT_FAILURE(pReq->Hdr.rc))
848 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
849 return 0;
850 }
851
852 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
853 {
854 /* validate */
855 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
856 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
857
858 /* execute */
859 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
860 return 0;
861 }
862
863 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
864 {
865 /* validate */
866 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
867 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
868 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage > 0);
869 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage < _1M*16);
870 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
871 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, memchr(pReq->u.In.szName, '\0', sizeof(pReq->u.In.szName)));
872 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !strpbrk(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
873
874 /* execute */
875 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
876 return 0;
877 }
878
879 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
880 {
881 /* validate */
882 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
883 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
884 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImage), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
885 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
886 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
887 || ( pReq->u.In.offSymbols < pReq->u.In.cbImage
888 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImage),
889 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImage=%#lx\n", (long)pReq->u.In.offSymbols,
890 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImage));
891 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
892 || ( pReq->u.In.offStrTab < pReq->u.In.cbImage
893 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImage
894 && pReq->u.In.cbStrTab <= pReq->u.In.cbImage),
895 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImage=%#lx\n", (long)pReq->u.In.offStrTab,
896 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImage));
897
898 if (pReq->u.In.cSymbols)
899 {
900 uint32_t i;
901 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.achImage[pReq->u.In.offSymbols];
902 for (i = 0; i < pReq->u.In.cSymbols; i++)
903 {
904 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImage,
905 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImage));
906 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
907 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
908 REQ_CHECK_EXPR_FMT(memchr(&pReq->u.In.achImage[pReq->u.In.offStrTab + paSyms[i].offName], '\0', pReq->u.In.cbStrTab - paSyms[i].offName),
909 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
910 }
911 }
912
913 /* execute */
914 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
915 return 0;
916 }
917
918 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
919 {
920 /* validate */
921 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
922 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
923
924 /* execute */
925 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
926 return 0;
927 }
928
929 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
930 {
931 /* validate */
932 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
933 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
934 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, memchr(pReq->u.In.szSymbol, '\0', sizeof(pReq->u.In.szSymbol)));
935
936 /* execute */
937 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
938 return 0;
939 }
940
941 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
942 {
943 /* validate */
944 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
945 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
946 {
947 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
948
949 /* execute */
950 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
951 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg);
952 else
953 pReq->Hdr.rc = VERR_WRONG_ORDER;
954 }
955 else
956 {
957 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
958 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
959 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#x\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
960 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
961 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
962
963 /* execute */
964 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
965 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg);
966 else
967 pReq->Hdr.rc = VERR_WRONG_ORDER;
968 }
969 return 0;
970 }
971
972 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
973 {
974 /* validate */
975 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
976 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
977
978 /* execute */
979 pReq->Hdr.rc = VINF_SUCCESS;
980 pReq->u.Out.enmMode = supdrvIOCtl_GetPagingMode();
981 return 0;
982 }
983
984 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
985 {
986 /* validate */
987 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
988 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
989 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
990
991 /* execute */
992 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
993 if (RT_FAILURE(pReq->Hdr.rc))
994 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
995 return 0;
996 }
997
998 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
999 {
1000 /* validate */
1001 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1002 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1003
1004 /* execute */
1005 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1006 return 0;
1007 }
1008
1009 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1010 {
1011 /* validate */
1012 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1013 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1014
1015 /* execute */
1016 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1017 if (RT_SUCCESS(pReq->Hdr.rc))
1018 pReq->u.Out.pGipR0 = pDevExt->pGip;
1019 return 0;
1020 }
1021
1022 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1023 {
1024 /* validate */
1025 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1026 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1027
1028 /* execute */
1029 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1030 return 0;
1031 }
1032
1033 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1034 {
1035 /* validate */
1036 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1037 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1038 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1039 || ( VALID_PTR(pReq->u.In.pVMR0)
1040 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1041 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1042 /* execute */
1043 pSession->pVM = pReq->u.In.pVMR0;
1044 pReq->Hdr.rc = VINF_SUCCESS;
1045 return 0;
1046 }
1047
1048 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC):
1049 {
1050 /* validate */
1051 PSUPPAGEALLOC pReq = (PSUPPAGEALLOC)pReqHdr;
1052 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_SIZE_IN);
1053 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC, SUP_IOCTL_PAGE_ALLOC_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1054
1055 /* execute */
1056 pReq->Hdr.rc = SUPR0PageAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1057 if (RT_FAILURE(pReq->Hdr.rc))
1058 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1059 return 0;
1060 }
1061
1062 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1063 {
1064 /* validate */
1065 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1066 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1067
1068 /* execute */
1069 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1070 return 0;
1071 }
1072
1073 default:
1074 dprintf(("Unknown IOCTL %#lx\n", (long)uIOCtl));
1075 break;
1076 }
1077 return SUPDRV_ERR_GENERAL_FAILURE;
1078}
1079
1080
1081/**
1082 * Register a object for reference counting.
1083 * The object is registered with one reference in the specified session.
1084 *
1085 * @returns Unique identifier on success (pointer).
1086 * All future reference must use this identifier.
1087 * @returns NULL on failure.
1088 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
1089 * @param pvUser1 The first user argument.
1090 * @param pvUser2 The second user argument.
1091 */
1092SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
1093{
1094 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1095 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1096 PSUPDRVOBJ pObj;
1097 PSUPDRVUSAGE pUsage;
1098
1099 /*
1100 * Validate the input.
1101 */
1102 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
1103 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
1104 AssertPtrReturn(pfnDestructor, NULL);
1105
1106 /*
1107 * Allocate and initialize the object.
1108 */
1109 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
1110 if (!pObj)
1111 return NULL;
1112 pObj->u32Magic = SUPDRVOBJ_MAGIC;
1113 pObj->enmType = enmType;
1114 pObj->pNext = NULL;
1115 pObj->cUsage = 1;
1116 pObj->pfnDestructor = pfnDestructor;
1117 pObj->pvUser1 = pvUser1;
1118 pObj->pvUser2 = pvUser2;
1119 pObj->CreatorUid = pSession->Uid;
1120 pObj->CreatorGid = pSession->Gid;
1121 pObj->CreatorProcess= pSession->Process;
1122 supdrvOSObjInitCreator(pObj, pSession);
1123
1124 /*
1125 * Allocate the usage record.
1126 * (We keep freed usage records around to simplity SUPR0ObjAddRef().)
1127 */
1128 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1129
1130 pUsage = pDevExt->pUsageFree;
1131 if (pUsage)
1132 pDevExt->pUsageFree = pUsage->pNext;
1133 else
1134 {
1135 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1136 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
1137 if (!pUsage)
1138 {
1139 RTMemFree(pObj);
1140 return NULL;
1141 }
1142 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1143 }
1144
1145 /*
1146 * Insert the object and create the session usage record.
1147 */
1148 /* The object. */
1149 pObj->pNext = pDevExt->pObjs;
1150 pDevExt->pObjs = pObj;
1151
1152 /* The session record. */
1153 pUsage->cUsage = 1;
1154 pUsage->pObj = pObj;
1155 pUsage->pNext = pSession->pUsage;
1156 dprintf(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1157 pSession->pUsage = pUsage;
1158
1159 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1160
1161 dprintf(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
1162 return pObj;
1163}
1164
1165
1166/**
1167 * Increment the reference counter for the object associating the reference
1168 * with the specified session.
1169 *
1170 * @returns IPRT status code.
1171 * @param pvObj The identifier returned by SUPR0ObjRegister().
1172 * @param pSession The session which is referencing the object.
1173 */
1174SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
1175{
1176 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1177 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1178 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1179 PSUPDRVUSAGE pUsagePre;
1180 PSUPDRVUSAGE pUsage;
1181
1182 /*
1183 * Validate the input.
1184 */
1185 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1186 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1187 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1188 VERR_INVALID_PARAMETER);
1189
1190 /*
1191 * Preallocate the usage record.
1192 */
1193 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1194
1195 pUsagePre = pDevExt->pUsageFree;
1196 if (pUsagePre)
1197 pDevExt->pUsageFree = pUsagePre->pNext;
1198 else
1199 {
1200 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1201 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
1202 if (!pUsagePre)
1203 return VERR_NO_MEMORY;
1204 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1205 }
1206
1207 /*
1208 * Reference the object.
1209 */
1210 pObj->cUsage++;
1211
1212 /*
1213 * Look for the session record.
1214 */
1215 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
1216 {
1217 dprintf(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1218 if (pUsage->pObj == pObj)
1219 break;
1220 }
1221 if (pUsage)
1222 pUsage->cUsage++;
1223 else
1224 {
1225 /* create a new session record. */
1226 pUsagePre->cUsage = 1;
1227 pUsagePre->pObj = pObj;
1228 pUsagePre->pNext = pSession->pUsage;
1229 pSession->pUsage = pUsagePre;
1230 dprintf(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));
1231
1232 pUsagePre = NULL;
1233 }
1234
1235 /*
1236 * Put any unused usage record into the free list..
1237 */
1238 if (pUsagePre)
1239 {
1240 pUsagePre->pNext = pDevExt->pUsageFree;
1241 pDevExt->pUsageFree = pUsagePre;
1242 }
1243
1244 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1245
1246 return VINF_SUCCESS;
1247}
1248
1249
1250/**
1251 * Decrement / destroy a reference counter record for an object.
1252 *
1253 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
1254 *
1255 * @returns IPRT status code.
1256 * @param pvObj The identifier returned by SUPR0ObjRegister().
1257 * @param pSession The session which is referencing the object.
1258 */
1259SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
1260{
1261 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1262 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1263 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1264 bool fDestroy = false;
1265 PSUPDRVUSAGE pUsage;
1266 PSUPDRVUSAGE pUsagePrev;
1267
1268 /*
1269 * Validate the input.
1270 */
1271 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1272 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1273 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1274 VERR_INVALID_PARAMETER);
1275
1276 /*
1277 * Acquire the spinlock and look for the usage record.
1278 */
1279 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1280
1281 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
1282 pUsage;
1283 pUsagePrev = pUsage, pUsage = pUsage->pNext)
1284 {
1285 dprintf(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1286 if (pUsage->pObj == pObj)
1287 {
1288 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
1289 if (pUsage->cUsage > 1)
1290 {
1291 pObj->cUsage--;
1292 pUsage->cUsage--;
1293 }
1294 else
1295 {
1296 /*
1297 * Free the session record.
1298 */
1299 if (pUsagePrev)
1300 pUsagePrev->pNext = pUsage->pNext;
1301 else
1302 pSession->pUsage = pUsage->pNext;
1303 pUsage->pNext = pDevExt->pUsageFree;
1304 pDevExt->pUsageFree = pUsage;
1305
1306 /* What about the object? */
1307 if (pObj->cUsage > 1)
1308 pObj->cUsage--;
1309 else
1310 {
1311 /*
1312 * Object is to be destroyed, unlink it.
1313 */
1314 fDestroy = true;
1315 if (pDevExt->pObjs == pObj)
1316 pDevExt->pObjs = pObj->pNext;
1317 else
1318 {
1319 PSUPDRVOBJ pObjPrev;
1320 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
1321 if (pObjPrev->pNext == pObj)
1322 {
1323 pObjPrev->pNext = pObj->pNext;
1324 break;
1325 }
1326 Assert(pObjPrev);
1327 }
1328 }
1329 }
1330 break;
1331 }
1332 }
1333
1334 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1335
1336 /*
1337 * Call the destructor and free the object if required.
1338 */
1339 if (fDestroy)
1340 {
1341 pObj->u32Magic++;
1342 if (pObj->pfnDestructor)
1343 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
1344 RTMemFree(pObj);
1345 }
1346
1347 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
1348 return pUsage ? VINF_SUCCESS : VERR_INVALID_PARAMETER;
1349}
1350
1351/**
1352 * Verifies that the current process can access the specified object.
1353 *
1354 * @returns The following IPRT status code:
1355 * @retval VINF_SUCCESS if access was granted.
1356 * @retval VERR_PERMISSION_DENIED if denied access.
1357 * @retval VERR_INVALID_PARAMETER if invalid parameter.
1358 *
1359 * @param pvObj The identifier returned by SUPR0ObjRegister().
1360 * @param pSession The session which wishes to access the object.
1361 * @param pszObjName Object string name. This is optional and depends on the object type.
1362 *
1363 * @remark The caller is responsible for making sure the object isn't removed while
1364 * we're inside this function. If uncertain about this, just call AddRef before calling us.
1365 */
1366SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
1367{
1368 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1369 int rc;
1370
1371 /*
1372 * Validate the input.
1373 */
1374 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1375 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1376 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1377 VERR_INVALID_PARAMETER);
1378
1379 /*
1380 * Check access. (returns true if a decision has been made.)
1381 */
1382 rc = VERR_INTERNAL_ERROR;
1383 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
1384 return rc;
1385
1386 /*
1387 * Default policy is to allow the user to access his own
1388 * stuff but nothing else.
1389 */
1390 if (pObj->CreatorUid == pSession->Uid)
1391 return VINF_SUCCESS;
1392 return VERR_PERMISSION_DENIED;
1393}
1394
1395
1396/**
1397 * Lock pages.
1398 *
1399 * @returns IPRT status code.
1400 * @param pSession Session to which the locked memory should be associated.
1401 * @param pvR3 Start of the memory range to lock.
1402 * This must be page aligned.
1403 * @param cb Size of the memory range to lock.
1404 * This must be page aligned.
1405 */
1406SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
1407{
1408 int rc;
1409 SUPDRVMEMREF Mem = {0};
1410 const size_t cb = (size_t)cPages << PAGE_SHIFT;
1411 dprintf(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
1412
1413 /*
1414 * Verify input.
1415 */
1416 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1417 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
1418 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
1419 || !pvR3)
1420 {
1421 dprintf(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
1422 return VERR_INVALID_PARAMETER;
1423 }
1424
1425#ifdef RT_OS_WINDOWS /* A temporary hack for windows, will be removed once all ring-3 code has been cleaned up. */
1426 /* First check if we allocated it using SUPPageAlloc; if so then we don't need to lock it again */
1427 rc = supdrvPageGetPhys(pSession, pvR3, cPages, paPages);
1428 if (RT_SUCCESS(rc))
1429 return rc;
1430#endif
1431
1432 /*
1433 * Let IPRT do the job.
1434 */
1435 Mem.eType = MEMREF_TYPE_LOCKED;
1436 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTR0ProcHandleSelf());
1437 if (RT_SUCCESS(rc))
1438 {
1439 uint32_t iPage = cPages;
1440 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
1441 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
1442
1443 while (iPage-- > 0)
1444 {
1445 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1446 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
1447 {
1448 AssertMsgFailed(("iPage=%d\n", iPage));
1449 rc = VERR_INTERNAL_ERROR;
1450 break;
1451 }
1452 }
1453 if (RT_SUCCESS(rc))
1454 rc = supdrvMemAdd(&Mem, pSession);
1455 if (RT_FAILURE(rc))
1456 {
1457 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
1458 AssertRC(rc2);
1459 }
1460 }
1461
1462 return rc;
1463}
1464
1465
1466/**
1467 * Unlocks the memory pointed to by pv.
1468 *
1469 * @returns IPRT status code.
1470 * @param pSession Session to which the memory was locked.
1471 * @param pvR3 Memory to unlock.
1472 */
1473SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1474{
1475 dprintf(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1476 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1477#ifdef RT_OS_WINDOWS
1478 /*
1479 * Temporary hack for windows - SUPR0PageFree will unlock SUPR0PageAlloc
1480 * allocations; ignore this call.
1481 */
1482 if (supdrvPageWasLockedByPageAlloc(pSession, pvR3))
1483 {
1484 dprintf(("Page will be unlocked in SUPR0PageFree -> ignore\n"));
1485 return VINF_SUCCESS;
1486 }
1487#endif
1488 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
1489}
1490
1491
1492/**
1493 * Allocates a chunk of page aligned memory with contiguous and fixed physical
1494 * backing.
1495 *
1496 * @returns IPRT status code.
1497 * @param pSession Session data.
1498 * @param cb Number of bytes to allocate.
1499 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
1500 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
1501 * @param pHCPhys Where to put the physical address of allocated memory.
1502 */
1503SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
1504{
1505 int rc;
1506 SUPDRVMEMREF Mem = {0};
1507 dprintf(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
1508
1509 /*
1510 * Validate input.
1511 */
1512 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1513 if (!ppvR3 || !ppvR0 || !pHCPhys)
1514 {
1515 dprintf(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
1516 pSession, ppvR0, ppvR3, pHCPhys));
1517 return VERR_INVALID_PARAMETER;
1518
1519 }
1520 if (cPages < 1 || cPages >= 256)
1521 {
1522 dprintf(("Illegal request cPages=%d, must be greater than 0 and smaller than 256\n", cPages));
1523 return VERR_INVALID_PARAMETER;
1524 }
1525
1526 /*
1527 * Let IPRT do the job.
1528 */
1529 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
1530 if (RT_SUCCESS(rc))
1531 {
1532 int rc2;
1533 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1534 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1535 if (RT_SUCCESS(rc))
1536 {
1537 Mem.eType = MEMREF_TYPE_CONT;
1538 rc = supdrvMemAdd(&Mem, pSession);
1539 if (!rc)
1540 {
1541 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1542 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1543 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
1544 return 0;
1545 }
1546
1547 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1548 AssertRC(rc2);
1549 }
1550 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1551 AssertRC(rc2);
1552 }
1553
1554 return rc;
1555}
1556
1557
1558/**
1559 * Frees memory allocated using SUPR0ContAlloc().
1560 *
1561 * @returns IPRT status code.
1562 * @param pSession The session to which the memory was allocated.
1563 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1564 */
1565SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1566{
1567 dprintf(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1568 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1569 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
1570}
1571
1572
1573/**
1574 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
1575 *
1576 * @returns IPRT status code.
1577 * @param pSession Session data.
1578 * @param cPages Number of pages to allocate.
1579 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
1580 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
1581 * @param paPages Where to put the physical addresses of allocated memory.
1582 */
1583SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1584{
1585 unsigned iPage;
1586 int rc;
1587 SUPDRVMEMREF Mem = {0};
1588 dprintf(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
1589
1590 /*
1591 * Validate input.
1592 */
1593 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1594 if (!ppvR3 || !ppvR0 || !paPages)
1595 {
1596 dprintf(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
1597 pSession, ppvR3, ppvR0, paPages));
1598 return VERR_INVALID_PARAMETER;
1599
1600 }
1601 if (cPages < 1 || cPages > 256)
1602 {
1603 dprintf(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
1604 return VERR_INVALID_PARAMETER;
1605 }
1606
1607 /*
1608 * Let IPRT do the work.
1609 */
1610 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
1611 if (RT_SUCCESS(rc))
1612 {
1613 int rc2;
1614 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1615 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1616 if (RT_SUCCESS(rc))
1617 {
1618 Mem.eType = MEMREF_TYPE_LOW;
1619 rc = supdrvMemAdd(&Mem, pSession);
1620 if (!rc)
1621 {
1622 for (iPage = 0; iPage < cPages; iPage++)
1623 {
1624 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1625 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%VHp\n", paPages[iPage]));
1626 }
1627 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1628 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1629 return 0;
1630 }
1631
1632 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1633 AssertRC(rc2);
1634 }
1635
1636 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1637 AssertRC(rc2);
1638 }
1639
1640 return rc;
1641}
1642
1643
1644/**
1645 * Frees memory allocated using SUPR0LowAlloc().
1646 *
1647 * @returns IPRT status code.
1648 * @param pSession The session to which the memory was allocated.
1649 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1650 */
1651SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1652{
1653 dprintf(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1654 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1655 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
1656}
1657
1658
1659
1660/**
1661 * Allocates a chunk of memory with both R0 and R3 mappings.
1662 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1663 *
1664 * @returns IPRT status code.
1665 * @param pSession The session to associated the allocation with.
1666 * @param cb Number of bytes to allocate.
1667 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1668 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1669 */
1670SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1671{
1672 int rc;
1673 SUPDRVMEMREF Mem = {0};
1674 dprintf(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
1675
1676 /*
1677 * Validate input.
1678 */
1679 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1680 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
1681 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1682 if (cb < 1 || cb >= _4M)
1683 {
1684 dprintf(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
1685 return VERR_INVALID_PARAMETER;
1686 }
1687
1688 /*
1689 * Let IPRT do the work.
1690 */
1691 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
1692 if (RT_SUCCESS(rc))
1693 {
1694 int rc2;
1695 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1696 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1697 if (RT_SUCCESS(rc))
1698 {
1699 Mem.eType = MEMREF_TYPE_MEM;
1700 rc = supdrvMemAdd(&Mem, pSession);
1701 if (!rc)
1702 {
1703 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1704 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1705 return VINF_SUCCESS;
1706 }
1707 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1708 AssertRC(rc2);
1709 }
1710
1711 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1712 AssertRC(rc2);
1713 }
1714
1715 return rc;
1716}
1717
1718
1719/**
1720 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
1721 *
1722 * @returns IPRT status code.
1723 * @param pSession The session to which the memory was allocated.
1724 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1725 * @param paPages Where to store the physical addresses.
1726 */
1727SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
1728{
1729 PSUPDRVBUNDLE pBundle;
1730 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1731 dprintf(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
1732
1733 /*
1734 * Validate input.
1735 */
1736 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1737 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
1738 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
1739
1740 /*
1741 * Search for the address.
1742 */
1743 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1744 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1745 {
1746 if (pBundle->cUsed > 0)
1747 {
1748 unsigned i;
1749 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1750 {
1751 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
1752 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1753 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
1754 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1755 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
1756 )
1757 )
1758 {
1759 const unsigned cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
1760 unsigned iPage;
1761 for (iPage = 0; iPage < cPages; iPage++)
1762 {
1763 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
1764 paPages[iPage].uReserved = 0;
1765 }
1766 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1767 return VINF_SUCCESS;
1768 }
1769 }
1770 }
1771 }
1772 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1773 dprintf(("Failed to find %p!!!\n", (void *)uPtr));
1774 return VERR_INVALID_PARAMETER;
1775}
1776
1777
1778/**
1779 * Free memory allocated by SUPR0MemAlloc().
1780 *
1781 * @returns IPRT status code.
1782 * @param pSession The session owning the allocation.
1783 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1784 */
1785SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1786{
1787 dprintf(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1788 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1789 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
1790}
1791
1792
1793/**
1794 * Allocates a chunk of memory with only a R3 mappings.
1795 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1796 *
1797 * @returns IPRT status code.
1798 * @param pSession The session to associated the allocation with.
1799 * @param cPages The number of pages to allocate.
1800 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1801 * @param paPages Where to store the addresses of the pages. Optional.
1802 */
1803SUPR0DECL(int) SUPR0PageAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1804{
1805 int rc;
1806 SUPDRVMEMREF Mem = {0};
1807 dprintf(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
1808
1809 /*
1810 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
1811 */
1812 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1813 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1814 if (cPages < 1 || cPages > (128 * _1M)/PAGE_SIZE)
1815 {
1816 dprintf(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than 128MB.\n", cPages));
1817 return VERR_INVALID_PARAMETER;
1818 }
1819
1820 /*
1821 * Let IPRT do the work.
1822 */
1823 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
1824 if (RT_SUCCESS(rc))
1825 {
1826 int rc2;
1827 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1828 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1829 if (RT_SUCCESS(rc))
1830 {
1831 Mem.eType = MEMREF_TYPE_LOCKED_SUP;
1832 rc = supdrvMemAdd(&Mem, pSession);
1833 if (!rc)
1834 {
1835 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1836 if (paPages)
1837 {
1838 uint32_t iPage = cPages;
1839 while (iPage-- > 0)
1840 {
1841 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
1842 Assert(paPages[iPage] != NIL_RTHCPHYS);
1843 }
1844 }
1845 return VINF_SUCCESS;
1846 }
1847 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1848 AssertRC(rc2);
1849 }
1850
1851 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1852 AssertRC(rc2);
1853 }
1854 return rc;
1855}
1856
1857
1858#ifdef RT_OS_WINDOWS
1859/**
1860 * Check if the pages were locked by SUPR0PageAlloc
1861 *
1862 * This function will be removed along with the lock/unlock hacks when
1863 * we've cleaned up the ring-3 code properly.
1864 *
1865 * @returns boolean
1866 * @param pSession The session to which the memory was allocated.
1867 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1868 */
1869static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1870{
1871 PSUPDRVBUNDLE pBundle;
1872 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1873 dprintf(("SUPR0PageIsLockedByPageAlloc: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1874
1875 /*
1876 * Search for the address.
1877 */
1878 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1879 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1880 {
1881 if (pBundle->cUsed > 0)
1882 {
1883 unsigned i;
1884 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1885 {
1886 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
1887 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1888 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1889 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
1890 {
1891 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1892 return true;
1893 }
1894 }
1895 }
1896 }
1897 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1898 return false;
1899}
1900
1901
1902/**
1903 * Get the physical addresses of memory allocated using SUPR0PageAlloc().
1904 *
1905 * This function will be removed along with the lock/unlock hacks when
1906 * we've cleaned up the ring-3 code properly.
1907 *
1908 * @returns IPRT status code.
1909 * @param pSession The session to which the memory was allocated.
1910 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1911 * @param cPages Number of pages in paPages
1912 * @param paPages Where to store the physical addresses.
1913 */
1914static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
1915{
1916 PSUPDRVBUNDLE pBundle;
1917 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1918 dprintf(("supdrvPageGetPhys: pSession=%p pvR3=%p cPages=%#lx paPages=%p\n", pSession, (void *)pvR3, (long)cPages, paPages));
1919
1920 /*
1921 * Search for the address.
1922 */
1923 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1924 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1925 {
1926 if (pBundle->cUsed > 0)
1927 {
1928 unsigned i;
1929 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1930 {
1931 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
1932 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1933 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1934 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
1935 {
1936 uint32_t iPage = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
1937 cPages = RT_MIN(iPage, cPages);
1938 for (iPage = 0; iPage < cPages; iPage++)
1939 paPages[iPage] = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
1940 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1941 return VINF_SUCCESS;
1942 }
1943 }
1944 }
1945 }
1946 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1947 return VERR_INVALID_PARAMETER;
1948}
1949#endif /* RT_OS_WINDOWS */
1950
1951
1952/**
1953 * Free memory allocated by SUPR0PageAlloc().
1954 *
1955 * @returns IPRT status code.
1956 * @param pSession The session owning the allocation.
1957 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1958 */
1959SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1960{
1961 dprintf(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1962 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1963 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED_SUP);
1964}
1965
1966
1967/**
1968 * Maps the GIP into userspace and/or get the physical address of the GIP.
1969 *
1970 * @returns IPRT status code.
1971 * @param pSession Session to which the GIP mapping should belong.
1972 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
1973 * @param pHCPhysGip Where to store the physical address. (optional)
1974 *
1975 * @remark There is no reference counting on the mapping, so one call to this function
1976 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
1977 * and remove the session as a GIP user.
1978 */
1979SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
1980{
1981 int rc = 0;
1982 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1983 RTR3PTR pGip = NIL_RTR3PTR;
1984 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1985 dprintf(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
1986
1987 /*
1988 * Validate
1989 */
1990 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1991 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
1992 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
1993
1994 RTSemFastMutexRequest(pDevExt->mtxGip);
1995 if (pDevExt->pGip)
1996 {
1997 /*
1998 * Map it?
1999 */
2000 if (ppGipR3)
2001 {
2002#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2003 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
2004 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
2005 RTMEM_PROT_READ, RTR0ProcHandleSelf());
2006 if (RT_SUCCESS(rc))
2007 {
2008 pGip = RTR0MemObjAddressR3(pSession->GipMapObjR3);
2009 rc = VINF_SUCCESS; /** @todo remove this and replace the !rc below with RT_SUCCESS(rc). */
2010 }
2011#else /* !USE_NEW_OS_INTERFACE_FOR_GIP */
2012 if (!pSession->pGip)
2013 rc = supdrvOSGipMap(pSession->pDevExt, &pSession->pGip);
2014 if (!rc)
2015 pGip = (RTR3PTR)pSession->pGip;
2016#endif /* !USE_NEW_OS_INTERFACE_FOR_GIP */
2017 }
2018
2019 /*
2020 * Get physical address.
2021 */
2022 if (pHCPhysGip && !rc)
2023 HCPhys = pDevExt->HCPhysGip;
2024
2025 /*
2026 * Reference globally.
2027 */
2028 if (!pSession->fGipReferenced && !rc)
2029 {
2030 pSession->fGipReferenced = 1;
2031 pDevExt->cGipUsers++;
2032 if (pDevExt->cGipUsers == 1)
2033 {
2034 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2035 unsigned i;
2036
2037 dprintf(("SUPR0GipMap: Resumes GIP updating\n"));
2038
2039 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
2040 ASMAtomicXchgU32(&pGip->aCPUs[i].u32TransactionId, pGip->aCPUs[i].u32TransactionId & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
2041 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, 0);
2042
2043#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2044 rc = RTTimerStart(pDevExt->pGipTimer, 0);
2045 AssertRC(rc); rc = VINF_SUCCESS;
2046#else
2047 supdrvOSGipResume(pDevExt);
2048#endif
2049 }
2050 }
2051 }
2052 else
2053 {
2054 rc = SUPDRV_ERR_GENERAL_FAILURE;
2055 dprintf(("SUPR0GipMap: GIP is not available!\n"));
2056 }
2057 RTSemFastMutexRelease(pDevExt->mtxGip);
2058
2059 /*
2060 * Write returns.
2061 */
2062 if (pHCPhysGip)
2063 *pHCPhysGip = HCPhys;
2064 if (ppGipR3)
2065 *ppGipR3 = pGip;
2066
2067#ifdef DEBUG_DARWIN_GIP
2068 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGip=%p GipMapObjR3\n", rc, (unsigned long)HCPhys, pGip, pSession->GipMapObjR3));
2069#else
2070 dprintf(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)(uintptr_t)pGip));
2071#endif
2072 return rc;
2073}
2074
2075
2076/**
2077 * Unmaps any user mapping of the GIP and terminates all GIP access
2078 * from this session.
2079 *
2080 * @returns IPRT status code.
2081 * @param pSession Session to which the GIP mapping should belong.
2082 */
2083SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
2084{
2085 int rc = VINF_SUCCESS;
2086 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2087#ifdef DEBUG_DARWIN_GIP
2088 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
2089 pSession,
2090 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
2091 pSession->GipMapObjR3));
2092#else
2093 dprintf(("SUPR0GipUnmap: pSession=%p\n", pSession));
2094#endif
2095 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2096
2097 RTSemFastMutexRequest(pDevExt->mtxGip);
2098
2099 /*
2100 * Unmap anything?
2101 */
2102#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2103 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
2104 {
2105 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
2106 AssertRC(rc);
2107 if (RT_SUCCESS(rc))
2108 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
2109 }
2110#else
2111 if (pSession->pGip)
2112 {
2113 rc = supdrvOSGipUnmap(pDevExt, pSession->pGip);
2114 if (!rc)
2115 pSession->pGip = NULL;
2116 }
2117#endif
2118
2119 /*
2120 * Dereference global GIP.
2121 */
2122 if (pSession->fGipReferenced && !rc)
2123 {
2124 pSession->fGipReferenced = 0;
2125 if ( pDevExt->cGipUsers > 0
2126 && !--pDevExt->cGipUsers)
2127 {
2128 dprintf(("SUPR0GipUnmap: Suspends GIP updating\n"));
2129#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2130 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = 0;
2131#else
2132 supdrvOSGipSuspend(pDevExt);
2133#endif
2134 }
2135 }
2136
2137 RTSemFastMutexRelease(pDevExt->mtxGip);
2138
2139 return rc;
2140}
2141
2142
2143/**
2144 * Adds a memory object to the session.
2145 *
2146 * @returns IPRT status code.
2147 * @param pMem Memory tracking structure containing the
2148 * information to track.
2149 * @param pSession The session.
2150 */
2151static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
2152{
2153 PSUPDRVBUNDLE pBundle;
2154 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2155
2156 /*
2157 * Find free entry and record the allocation.
2158 */
2159 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2160 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2161 {
2162 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
2163 {
2164 unsigned i;
2165 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2166 {
2167 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
2168 {
2169 pBundle->cUsed++;
2170 pBundle->aMem[i] = *pMem;
2171 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2172 return VINF_SUCCESS;
2173 }
2174 }
2175 AssertFailed(); /* !!this can't be happening!!! */
2176 }
2177 }
2178 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2179
2180 /*
2181 * Need to allocate a new bundle.
2182 * Insert into the last entry in the bundle.
2183 */
2184 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
2185 if (!pBundle)
2186 return VERR_NO_MEMORY;
2187
2188 /* take last entry. */
2189 pBundle->cUsed++;
2190 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
2191
2192 /* insert into list. */
2193 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2194 pBundle->pNext = pSession->Bundle.pNext;
2195 pSession->Bundle.pNext = pBundle;
2196 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2197
2198 return VINF_SUCCESS;
2199}
2200
2201
2202/**
2203 * Releases a memory object referenced by pointer and type.
2204 *
2205 * @returns IPRT status code.
2206 * @param pSession Session data.
2207 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
2208 * @param eType Memory type.
2209 */
2210static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
2211{
2212 PSUPDRVBUNDLE pBundle;
2213 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2214
2215 /*
2216 * Validate input.
2217 */
2218 if (!uPtr)
2219 {
2220 dprintf(("Illegal address %p\n", (void *)uPtr));
2221 return VERR_INVALID_PARAMETER;
2222 }
2223
2224 /*
2225 * Search for the address.
2226 */
2227 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2228 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2229 {
2230 if (pBundle->cUsed > 0)
2231 {
2232 unsigned i;
2233 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2234 {
2235 if ( pBundle->aMem[i].eType == eType
2236 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2237 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
2238 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2239 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
2240 )
2241 {
2242 /* Make a copy of it and release it outside the spinlock. */
2243 SUPDRVMEMREF Mem = pBundle->aMem[i];
2244 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
2245 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
2246 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
2247 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2248
2249 if (Mem.MapObjR3)
2250 {
2251 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
2252 AssertRC(rc); /** @todo figure out how to handle this. */
2253 }
2254 if (Mem.MemObj)
2255 {
2256 int rc = RTR0MemObjFree(Mem.MemObj, false);
2257 AssertRC(rc); /** @todo figure out how to handle this. */
2258 }
2259 return VINF_SUCCESS;
2260 }
2261 }
2262 }
2263 }
2264 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2265 dprintf(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
2266 return VERR_INVALID_PARAMETER;
2267}
2268
2269
2270#ifdef VBOX_WITH_IDT_PATCHING
2271/**
2272 * Install IDT for the current CPU.
2273 *
2274 * @returns One of the following IPRT status codes:
2275 * @retval VINF_SUCCESS on success.
2276 * @retval VERR_IDT_FAILED.
2277 * @retval VERR_NO_MEMORY.
2278 * @param pDevExt The device extension.
2279 * @param pSession The session data.
2280 * @param pReq The request.
2281 */
2282static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq)
2283{
2284 PSUPDRVPATCHUSAGE pUsagePre;
2285 PSUPDRVPATCH pPatchPre;
2286 RTIDTR Idtr;
2287 PSUPDRVPATCH pPatch;
2288 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2289 dprintf(("supdrvIOCtl_IdtInstall\n"));
2290
2291 /*
2292 * Preallocate entry for this CPU cause we don't wanna do
2293 * that inside the spinlock!
2294 */
2295 pUsagePre = (PSUPDRVPATCHUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2296 if (!pUsagePre)
2297 return VERR_NO_MEMORY;
2298
2299 /*
2300 * Take the spinlock and see what we need to do.
2301 */
2302 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2303
2304 /* check if we already got a free patch. */
2305 if (!pDevExt->pIdtPatchesFree)
2306 {
2307 /*
2308 * Allocate a patch - outside the spinlock of course.
2309 */
2310 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2311
2312 pPatchPre = (PSUPDRVPATCH)RTMemExecAlloc(sizeof(*pPatchPre));
2313 if (!pPatchPre)
2314 return VERR_NO_MEMORY;
2315
2316 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2317 }
2318 else
2319 {
2320 pPatchPre = pDevExt->pIdtPatchesFree;
2321 pDevExt->pIdtPatchesFree = pPatchPre->pNext;
2322 }
2323
2324 /* look for matching patch entry */
2325 ASMGetIDTR(&Idtr);
2326 pPatch = pDevExt->pIdtPatches;
2327 while (pPatch && pPatch->pvIdt != (void *)Idtr.pIdt)
2328 pPatch = pPatch->pNext;
2329
2330 if (!pPatch)
2331 {
2332 /*
2333 * Create patch.
2334 */
2335 pPatch = supdrvIdtPatchOne(pDevExt, pPatchPre);
2336 if (pPatch)
2337 pPatchPre = NULL; /* mark as used. */
2338 }
2339 else
2340 {
2341 /*
2342 * Simply increment patch usage.
2343 */
2344 pPatch->cUsage++;
2345 }
2346
2347 if (pPatch)
2348 {
2349 /*
2350 * Increment and add if need be the session usage record for this patch.
2351 */
2352 PSUPDRVPATCHUSAGE pUsage = pSession->pPatchUsage;
2353 while (pUsage && pUsage->pPatch != pPatch)
2354 pUsage = pUsage->pNext;
2355
2356 if (!pUsage)
2357 {
2358 /*
2359 * Add usage record.
2360 */
2361 pUsagePre->cUsage = 1;
2362 pUsagePre->pPatch = pPatch;
2363 pUsagePre->pNext = pSession->pPatchUsage;
2364 pSession->pPatchUsage = pUsagePre;
2365 pUsagePre = NULL; /* mark as used. */
2366 }
2367 else
2368 {
2369 /*
2370 * Increment usage count.
2371 */
2372 pUsage->cUsage++;
2373 }
2374 }
2375
2376 /* free patch - we accumulate them for paranoid saftly reasons. */
2377 if (pPatchPre)
2378 {
2379 pPatchPre->pNext = pDevExt->pIdtPatchesFree;
2380 pDevExt->pIdtPatchesFree = pPatchPre;
2381 }
2382
2383 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2384
2385 /*
2386 * Free unused preallocated buffers.
2387 */
2388 if (pUsagePre)
2389 RTMemFree(pUsagePre);
2390
2391 pReq->u.Out.u8Idt = pDevExt->u8Idt;
2392
2393 return pPatch ? VINF_SUCCESS : VERR_IDT_FAILED;
2394}
2395
2396
2397/**
2398 * This creates a IDT patch entry.
2399 * If the first patch being installed it'll also determin the IDT entry
2400 * to use.
2401 *
2402 * @returns pPatch on success.
2403 * @returns NULL on failure.
2404 * @param pDevExt Pointer to globals.
2405 * @param pPatch Patch entry to use.
2406 * This will be linked into SUPDRVDEVEXT::pIdtPatches on
2407 * successful return.
2408 * @remark Call must be owning the SUPDRVDEVEXT::Spinlock!
2409 */
2410static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2411{
2412 RTIDTR Idtr;
2413 PSUPDRVIDTE paIdt;
2414 dprintf(("supdrvIOCtl_IdtPatchOne: pPatch=%p\n", pPatch));
2415
2416 /*
2417 * Get IDT.
2418 */
2419 ASMGetIDTR(&Idtr);
2420 paIdt = (PSUPDRVIDTE)Idtr.pIdt;
2421 /*
2422 * Recent Linux kernels can be configured to 1G user /3G kernel.
2423 */
2424 if ((uintptr_t)paIdt < 0x40000000)
2425 {
2426 AssertMsgFailed(("bad paIdt=%p\n", paIdt));
2427 return NULL;
2428 }
2429
2430 if (!pDevExt->u8Idt)
2431 {
2432 /*
2433 * Test out the alternatives.
2434 *
2435 * At the moment we do not support chaining thus we ASSUME that one of
2436 * these 48 entries is unused (which is not a problem on Win32 and
2437 * Linux to my knowledge).
2438 */
2439 /** @todo we MUST change this detection to try grab an entry which is NOT in use. This can be
2440 * combined with gathering info about which guest system call gates we can hook up directly. */
2441 unsigned i;
2442 uint8_t u8Idt = 0;
2443 static uint8_t au8Ints[] =
2444 {
2445#ifdef RT_OS_WINDOWS /* We don't use 0xef and above because they are system stuff on linux (ef is IPI,
2446 * local apic timer, or some other frequently fireing thing). */
2447 0xef, 0xee, 0xed, 0xec,
2448#endif
2449 0xeb, 0xea, 0xe9, 0xe8,
2450 0xdf, 0xde, 0xdd, 0xdc,
2451 0x7b, 0x7a, 0x79, 0x78,
2452 0xbf, 0xbe, 0xbd, 0xbc,
2453 };
2454#if defined(RT_ARCH_AMD64) && defined(DEBUG)
2455 static int s_iWobble = 0;
2456 unsigned iMax = !(s_iWobble++ % 2) ? 0x80 : 0x100;
2457 dprintf(("IDT: Idtr=%p:%#x\n", (void *)Idtr.pIdt, (unsigned)Idtr.cbIdt));
2458 for (i = iMax - 0x80; i*16+15 < Idtr.cbIdt && i < iMax; i++)
2459 {
2460 dprintf(("%#x: %04x:%08x%04x%04x P=%d DPL=%d IST=%d Type1=%#x u32Reserved=%#x u5Reserved=%#x\n",
2461 i, paIdt[i].u16SegSel, paIdt[i].u32OffsetTop, paIdt[i].u16OffsetHigh, paIdt[i].u16OffsetLow,
2462 paIdt[i].u1Present, paIdt[i].u2DPL, paIdt[i].u3IST, paIdt[i].u5Type2,
2463 paIdt[i].u32Reserved, paIdt[i].u5Reserved));
2464 }
2465#endif
2466 /* look for entries which are not present or otherwise unused. */
2467 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2468 {
2469 u8Idt = au8Ints[i];
2470 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2471 && ( !paIdt[u8Idt].u1Present
2472 || paIdt[u8Idt].u5Type2 == 0))
2473 break;
2474 u8Idt = 0;
2475 }
2476 if (!u8Idt)
2477 {
2478 /* try again, look for a compatible entry .*/
2479 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2480 {
2481 u8Idt = au8Ints[i];
2482 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2483 && paIdt[u8Idt].u1Present
2484 && paIdt[u8Idt].u5Type2 == SUPDRV_IDTE_TYPE2_INTERRUPT_GATE
2485 && !(paIdt[u8Idt].u16SegSel & 3))
2486 break;
2487 u8Idt = 0;
2488 }
2489 if (!u8Idt)
2490 {
2491 dprintf(("Failed to find appropirate IDT entry!!\n"));
2492 return NULL;
2493 }
2494 }
2495 pDevExt->u8Idt = u8Idt;
2496 dprintf(("supdrvIOCtl_IdtPatchOne: u8Idt=%x\n", u8Idt));
2497 }
2498
2499 /*
2500 * Prepare the patch
2501 */
2502 memset(pPatch, 0, sizeof(*pPatch));
2503 pPatch->pvIdt = paIdt;
2504 pPatch->cUsage = 1;
2505 pPatch->pIdtEntry = &paIdt[pDevExt->u8Idt];
2506 pPatch->SavedIdt = paIdt[pDevExt->u8Idt];
2507 pPatch->ChangedIdt.u16OffsetLow = (uint32_t)((uintptr_t)&pPatch->auCode[0] & 0xffff);
2508 pPatch->ChangedIdt.u16OffsetHigh = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 16);
2509#ifdef RT_ARCH_AMD64
2510 pPatch->ChangedIdt.u32OffsetTop = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 32);
2511#endif
2512 pPatch->ChangedIdt.u16SegSel = ASMGetCS();
2513#ifdef RT_ARCH_AMD64
2514 pPatch->ChangedIdt.u3IST = 0;
2515 pPatch->ChangedIdt.u5Reserved = 0;
2516#else /* x86 */
2517 pPatch->ChangedIdt.u5Reserved = 0;
2518 pPatch->ChangedIdt.u3Type1 = 0;
2519#endif /* x86 */
2520 pPatch->ChangedIdt.u5Type2 = SUPDRV_IDTE_TYPE2_INTERRUPT_GATE;
2521 pPatch->ChangedIdt.u2DPL = 3;
2522 pPatch->ChangedIdt.u1Present = 1;
2523
2524 /*
2525 * Generate the patch code.
2526 */
2527 {
2528#ifdef RT_ARCH_AMD64
2529 union
2530 {
2531 uint8_t *pb;
2532 uint32_t *pu32;
2533 uint64_t *pu64;
2534 } u, uFixJmp, uFixCall, uNotNested;
2535 u.pb = &pPatch->auCode[0];
2536
2537 /* check the cookie */
2538 *u.pb++ = 0x3d; // cmp eax, GLOBALCOOKIE
2539 *u.pu32++ = pDevExt->u32Cookie;
2540
2541 *u.pb++ = 0x74; // jz @VBoxCall
2542 *u.pb++ = 2;
2543
2544 /* jump to forwarder code. */
2545 *u.pb++ = 0xeb;
2546 uFixJmp = u;
2547 *u.pb++ = 0xfe;
2548
2549 // @VBoxCall:
2550 *u.pb++ = 0x0f; // swapgs
2551 *u.pb++ = 0x01;
2552 *u.pb++ = 0xf8;
2553
2554 /*
2555 * Call VMMR0Entry
2556 * We don't have to push the arguments here, but we have top
2557 * reserve some stack space for the interrupt forwarding.
2558 */
2559# ifdef RT_OS_WINDOWS
2560 *u.pb++ = 0x50; // push rax ; alignment filler.
2561 *u.pb++ = 0x41; // push r8 ; uArg
2562 *u.pb++ = 0x50;
2563 *u.pb++ = 0x52; // push rdx ; uOperation
2564 *u.pb++ = 0x51; // push rcx ; pVM
2565# else
2566 *u.pb++ = 0x51; // push rcx ; alignment filler.
2567 *u.pb++ = 0x52; // push rdx ; uArg
2568 *u.pb++ = 0x56; // push rsi ; uOperation
2569 *u.pb++ = 0x57; // push rdi ; pVM
2570# endif
2571
2572 *u.pb++ = 0xff; // call qword [pfnVMMR0EntryInt wrt rip]
2573 *u.pb++ = 0x15;
2574 uFixCall = u;
2575 *u.pu32++ = 0;
2576
2577 *u.pb++ = 0x48; // add rsp, 20h ; remove call frame.
2578 *u.pb++ = 0x81;
2579 *u.pb++ = 0xc4;
2580 *u.pu32++ = 0x20;
2581
2582 *u.pb++ = 0x0f; // swapgs
2583 *u.pb++ = 0x01;
2584 *u.pb++ = 0xf8;
2585
2586 /* Return to R3. */
2587 uNotNested = u;
2588 *u.pb++ = 0x48; // iretq
2589 *u.pb++ = 0xcf;
2590
2591 while ((uintptr_t)u.pb & 0x7) // align 8
2592 *u.pb++ = 0xcc;
2593
2594 /* Pointer to the VMMR0Entry. */ // pfnVMMR0EntryInt dq StubVMMR0Entry
2595 *uFixCall.pu32 = (uint32_t)(u.pb - uFixCall.pb - 4); uFixCall.pb = NULL;
2596 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2597 *u.pu64++ = pDevExt->pvVMMR0 ? (uint64_t)pDevExt->pfnVMMR0EntryInt : (uint64_t)u.pb + 8;
2598
2599 /* stub entry. */ // StubVMMR0Entry:
2600 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2601 *u.pb++ = 0x33; // xor eax, eax
2602 *u.pb++ = 0xc0;
2603
2604 *u.pb++ = 0x48; // dec rax
2605 *u.pb++ = 0xff;
2606 *u.pb++ = 0xc8;
2607
2608 *u.pb++ = 0xc3; // ret
2609
2610 /* forward to the original handler using a retf. */
2611 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1); uFixJmp.pb = NULL;
2612
2613 *u.pb++ = 0x68; // push <target cs>
2614 *u.pu32++ = !pPatch->SavedIdt.u5Type2 ? ASMGetCS() : pPatch->SavedIdt.u16SegSel;
2615
2616 *u.pb++ = 0x68; // push <low target rip>
2617 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2618 ? (uint32_t)(uintptr_t)uNotNested.pb
2619 : (uint32_t)pPatch->SavedIdt.u16OffsetLow
2620 | (uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16;
2621
2622 *u.pb++ = 0xc7; // mov dword [rsp + 4], <high target rip>
2623 *u.pb++ = 0x44;
2624 *u.pb++ = 0x24;
2625 *u.pb++ = 0x04;
2626 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2627 ? (uint32_t)((uint64_t)uNotNested.pb >> 32)
2628 : pPatch->SavedIdt.u32OffsetTop;
2629
2630 *u.pb++ = 0x48; // retf ; does this require prefix?
2631 *u.pb++ = 0xcb;
2632
2633#else /* RT_ARCH_X86 */
2634
2635 union
2636 {
2637 uint8_t *pb;
2638 uint16_t *pu16;
2639 uint32_t *pu32;
2640 } u, uFixJmpNotNested, uFixJmp, uFixCall, uNotNested;
2641 u.pb = &pPatch->auCode[0];
2642
2643 /* check the cookie */
2644 *u.pb++ = 0x81; // cmp esi, GLOBALCOOKIE
2645 *u.pb++ = 0xfe;
2646 *u.pu32++ = pDevExt->u32Cookie;
2647
2648 *u.pb++ = 0x74; // jz VBoxCall
2649 uFixJmp = u;
2650 *u.pb++ = 0;
2651
2652 /* jump (far) to the original handler / not-nested-stub. */
2653 *u.pb++ = 0xea; // jmp far NotNested
2654 uFixJmpNotNested = u;
2655 *u.pu32++ = 0;
2656 *u.pu16++ = 0;
2657
2658 /* save selector registers. */ // VBoxCall:
2659 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1);
2660 *u.pb++ = 0x0f; // push fs
2661 *u.pb++ = 0xa0;
2662
2663 *u.pb++ = 0x1e; // push ds
2664
2665 *u.pb++ = 0x06; // push es
2666
2667 /* call frame */
2668 *u.pb++ = 0x51; // push ecx
2669
2670 *u.pb++ = 0x52; // push edx
2671
2672 *u.pb++ = 0x50; // push eax
2673
2674 /* load ds, es and perhaps fs before call. */
2675 *u.pb++ = 0xb8; // mov eax, KernelDS
2676 *u.pu32++ = ASMGetDS();
2677
2678 *u.pb++ = 0x8e; // mov ds, eax
2679 *u.pb++ = 0xd8;
2680
2681 *u.pb++ = 0x8e; // mov es, eax
2682 *u.pb++ = 0xc0;
2683
2684#ifdef RT_OS_WINDOWS
2685 *u.pb++ = 0xb8; // mov eax, KernelFS
2686 *u.pu32++ = ASMGetFS();
2687
2688 *u.pb++ = 0x8e; // mov fs, eax
2689 *u.pb++ = 0xe0;
2690#endif
2691
2692 /* do the call. */
2693 *u.pb++ = 0xe8; // call _VMMR0Entry / StubVMMR0Entry
2694 uFixCall = u;
2695 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2696 *u.pu32++ = 0xfffffffb;
2697
2698 *u.pb++ = 0x83; // add esp, 0ch ; cdecl
2699 *u.pb++ = 0xc4;
2700 *u.pb++ = 0x0c;
2701
2702 /* restore selector registers. */
2703 *u.pb++ = 0x07; // pop es
2704 //
2705 *u.pb++ = 0x1f; // pop ds
2706
2707 *u.pb++ = 0x0f; // pop fs
2708 *u.pb++ = 0xa1;
2709
2710 uNotNested = u; // NotNested:
2711 *u.pb++ = 0xcf; // iretd
2712
2713 /* the stub VMMR0Entry. */ // StubVMMR0Entry:
2714 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2715 *u.pb++ = 0x33; // xor eax, eax
2716 *u.pb++ = 0xc0;
2717
2718 *u.pb++ = 0x48; // dec eax
2719
2720 *u.pb++ = 0xc3; // ret
2721
2722 /* Fixup the VMMR0Entry call. */
2723 if (pDevExt->pvVMMR0)
2724 *uFixCall.pu32 = (uint32_t)pDevExt->pfnVMMR0EntryInt - (uint32_t)(uFixCall.pu32 + 1);
2725 else
2726 *uFixCall.pu32 = (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)(uFixCall.pu32 + 1);
2727
2728 /* Fixup the forward / nested far jump. */
2729 if (!pPatch->SavedIdt.u5Type2)
2730 {
2731 *uFixJmpNotNested.pu32++ = (uint32_t)uNotNested.pb;
2732 *uFixJmpNotNested.pu16++ = ASMGetCS();
2733 }
2734 else
2735 {
2736 *uFixJmpNotNested.pu32++ = ((uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16) | pPatch->SavedIdt.u16OffsetLow;
2737 *uFixJmpNotNested.pu16++ = pPatch->SavedIdt.u16SegSel;
2738 }
2739#endif /* RT_ARCH_X86 */
2740 Assert(u.pb <= &pPatch->auCode[sizeof(pPatch->auCode)]);
2741#if 0
2742 /* dump the patch code */
2743 dprintf(("patch code: %p\n", &pPatch->auCode[0]));
2744 for (uFixCall.pb = &pPatch->auCode[0]; uFixCall.pb < u.pb; uFixCall.pb++)
2745 dprintf(("0x%02x,\n", *uFixCall.pb));
2746#endif
2747 }
2748
2749 /*
2750 * Install the patch.
2751 */
2752 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->ChangedIdt);
2753 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The stupid change code didn't work!!!!!\n"));
2754
2755 /*
2756 * Link in the patch.
2757 */
2758 pPatch->pNext = pDevExt->pIdtPatches;
2759 pDevExt->pIdtPatches = pPatch;
2760
2761 return pPatch;
2762}
2763
2764
2765/**
2766 * Removes the sessions IDT references.
2767 * This will uninstall our IDT patch if we left unreferenced.
2768 *
2769 * @returns VINF_SUCCESS.
2770 * @param pDevExt Device globals.
2771 * @param pSession Session data.
2772 */
2773static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
2774{
2775 PSUPDRVPATCHUSAGE pUsage;
2776 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2777 dprintf(("supdrvIOCtl_IdtRemoveAll: pSession=%p\n", pSession));
2778
2779 /*
2780 * Take the spinlock.
2781 */
2782 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2783
2784 /*
2785 * Walk usage list, removing patches as their usage count reaches zero.
2786 */
2787 pUsage = pSession->pPatchUsage;
2788 while (pUsage)
2789 {
2790 if (pUsage->pPatch->cUsage <= pUsage->cUsage)
2791 supdrvIdtRemoveOne(pDevExt, pUsage->pPatch);
2792 else
2793 pUsage->pPatch->cUsage -= pUsage->cUsage;
2794
2795 /* next */
2796 pUsage = pUsage->pNext;
2797 }
2798
2799 /*
2800 * Empty the usage chain and we're done inside the spinlock.
2801 */
2802 pUsage = pSession->pPatchUsage;
2803 pSession->pPatchUsage = NULL;
2804
2805 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2806
2807 /*
2808 * Free usage entries.
2809 */
2810 while (pUsage)
2811 {
2812 void *pvToFree = pUsage;
2813 pUsage->cUsage = 0;
2814 pUsage->pPatch = NULL;
2815 pUsage = pUsage->pNext;
2816 RTMemFree(pvToFree);
2817 }
2818
2819 return VINF_SUCCESS;
2820}
2821
2822
2823/**
2824 * Remove one patch.
2825 *
2826 * Worker for supdrvIOCtl_IdtRemoveAll.
2827 *
2828 * @param pDevExt Device globals.
2829 * @param pPatch Patch entry to remove.
2830 * @remark Caller must own SUPDRVDEVEXT::Spinlock!
2831 */
2832static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2833{
2834 dprintf(("supdrvIdtRemoveOne: pPatch=%p\n", pPatch));
2835
2836 pPatch->cUsage = 0;
2837
2838 /*
2839 * If the IDT entry was changed it have to kick around for ever!
2840 * This will be attempted freed again, perhaps next time we'll succeed :-)
2841 */
2842 if (memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)))
2843 {
2844 AssertMsgFailed(("The hijacked IDT entry has CHANGED!!!\n"));
2845 return;
2846 }
2847
2848 /*
2849 * Unlink it.
2850 */
2851 if (pDevExt->pIdtPatches != pPatch)
2852 {
2853 PSUPDRVPATCH pPatchPrev = pDevExt->pIdtPatches;
2854 while (pPatchPrev)
2855 {
2856 if (pPatchPrev->pNext == pPatch)
2857 {
2858 pPatchPrev->pNext = pPatch->pNext;
2859 break;
2860 }
2861 pPatchPrev = pPatchPrev->pNext;
2862 }
2863 Assert(!pPatchPrev);
2864 }
2865 else
2866 pDevExt->pIdtPatches = pPatch->pNext;
2867 pPatch->pNext = NULL;
2868
2869
2870 /*
2871 * Verify and restore the IDT.
2872 */
2873 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2874 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->SavedIdt);
2875 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->SavedIdt, sizeof(pPatch->SavedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2876
2877 /*
2878 * Put it in the free list.
2879 * (This free list stuff is to calm my paranoia.)
2880 */
2881 pPatch->pvIdt = NULL;
2882 pPatch->pIdtEntry = NULL;
2883
2884 pPatch->pNext = pDevExt->pIdtPatchesFree;
2885 pDevExt->pIdtPatchesFree = pPatch;
2886}
2887
2888
2889/**
2890 * Write to an IDT entry.
2891 *
2892 * @param pvIdtEntry Where to write.
2893 * @param pNewIDTEntry What to write.
2894 */
2895static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry)
2896{
2897 RTUINTREG uCR0;
2898 RTUINTREG uFlags;
2899
2900 /*
2901 * On SMP machines (P4 hyperthreading included) we must preform a
2902 * 64-bit locked write when updating the IDT entry.
2903 *
2904 * The F00F bugfix for linux (and probably other OSes) causes
2905 * the IDT to be pointing to an readonly mapping. We get around that
2906 * by temporarily turning of WP. Since we're inside a spinlock at this
2907 * point, interrupts are disabled and there isn't any way the WP bit
2908 * flipping can cause any trouble.
2909 */
2910
2911 /* Save & Clear interrupt flag; Save & clear WP. */
2912 uFlags = ASMGetFlags();
2913 ASMSetFlags(uFlags & ~(RTUINTREG)(1 << 9)); /*X86_EFL_IF*/
2914 Assert(!(ASMGetFlags() & (1 << 9)));
2915 uCR0 = ASMGetCR0();
2916 ASMSetCR0(uCR0 & ~(RTUINTREG)(1 << 16)); /*X86_CR0_WP*/
2917
2918 /* Update IDT Entry */
2919#ifdef RT_ARCH_AMD64
2920 ASMAtomicXchgU128((volatile uint128_t *)pvIdtEntry, *(uint128_t *)(uintptr_t)pNewIDTEntry);
2921#else
2922 ASMAtomicXchgU64((volatile uint64_t *)pvIdtEntry, *(uint64_t *)(uintptr_t)pNewIDTEntry);
2923#endif
2924
2925 /* Restore CR0 & Flags */
2926 ASMSetCR0(uCR0);
2927 ASMSetFlags(uFlags);
2928}
2929#endif /* VBOX_WITH_IDT_PATCHING */
2930
2931
2932/**
2933 * Opens an image. If it's the first time it's opened the call must upload
2934 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
2935 *
2936 * This is the 1st step of the loading.
2937 *
2938 * @returns IPRT status code.
2939 * @param pDevExt Device globals.
2940 * @param pSession Session data.
2941 * @param pReq The open request.
2942 */
2943static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
2944{
2945 PSUPDRVLDRIMAGE pImage;
2946 unsigned cb;
2947 void *pv;
2948 dprintf(("supdrvIOCtl_LdrOpen: szName=%s cbImage=%d\n", pReq->u.In.szName, pReq->u.In.cbImage));
2949
2950 /*
2951 * Check if we got an instance of the image already.
2952 */
2953 RTSemFastMutexRequest(pDevExt->mtxLdr);
2954 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
2955 {
2956 if (!strcmp(pImage->szName, pReq->u.In.szName))
2957 {
2958 pImage->cUsage++;
2959 pReq->u.Out.pvImageBase = pImage->pvImage;
2960 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
2961 supdrvLdrAddUsage(pSession, pImage);
2962 RTSemFastMutexRelease(pDevExt->mtxLdr);
2963 return VINF_SUCCESS;
2964 }
2965 }
2966 /* (not found - add it!) */
2967
2968 /*
2969 * Allocate memory.
2970 */
2971 cb = pReq->u.In.cbImage + sizeof(SUPDRVLDRIMAGE) + 31;
2972 pv = RTMemExecAlloc(cb);
2973 if (!pv)
2974 {
2975 RTSemFastMutexRelease(pDevExt->mtxLdr);
2976 return VERR_NO_MEMORY;
2977 }
2978
2979 /*
2980 * Setup and link in the LDR stuff.
2981 */
2982 pImage = (PSUPDRVLDRIMAGE)pv;
2983 pImage->pvImage = ALIGNP(pImage + 1, 32);
2984 pImage->cbImage = pReq->u.In.cbImage;
2985 pImage->pfnModuleInit = NULL;
2986 pImage->pfnModuleTerm = NULL;
2987 pImage->uState = SUP_IOCTL_LDR_OPEN;
2988 pImage->cUsage = 1;
2989 strcpy(pImage->szName, pReq->u.In.szName);
2990
2991 pImage->pNext = pDevExt->pLdrImages;
2992 pDevExt->pLdrImages = pImage;
2993
2994 supdrvLdrAddUsage(pSession, pImage);
2995
2996 pReq->u.Out.pvImageBase = pImage->pvImage;
2997 pReq->u.Out.fNeedsLoading = true;
2998 RTSemFastMutexRelease(pDevExt->mtxLdr);
2999 return VINF_SUCCESS;
3000}
3001
3002
3003/**
3004 * Loads the image bits.
3005 *
3006 * This is the 2nd step of the loading.
3007 *
3008 * @returns IPRT status code.
3009 * @param pDevExt Device globals.
3010 * @param pSession Session data.
3011 * @param pReq The request.
3012 */
3013static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
3014{
3015 PSUPDRVLDRUSAGE pUsage;
3016 PSUPDRVLDRIMAGE pImage;
3017 int rc;
3018 dprintf(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImage=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImage));
3019
3020 /*
3021 * Find the ldr image.
3022 */
3023 RTSemFastMutexRequest(pDevExt->mtxLdr);
3024 pUsage = pSession->pLdrUsage;
3025 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3026 pUsage = pUsage->pNext;
3027 if (!pUsage)
3028 {
3029 RTSemFastMutexRelease(pDevExt->mtxLdr);
3030 dprintf(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
3031 return VERR_INVALID_HANDLE;
3032 }
3033 pImage = pUsage->pImage;
3034 if (pImage->cbImage != pReq->u.In.cbImage)
3035 {
3036 RTSemFastMutexRelease(pDevExt->mtxLdr);
3037 dprintf(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load)\n", pImage->cbImage, pReq->u.In.cbImage));
3038 return VERR_INVALID_HANDLE;
3039 }
3040 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
3041 {
3042 unsigned uState = pImage->uState;
3043 RTSemFastMutexRelease(pDevExt->mtxLdr);
3044 if (uState != SUP_IOCTL_LDR_LOAD)
3045 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
3046 return SUPDRV_ERR_ALREADY_LOADED;
3047 }
3048 switch (pReq->u.In.eEPType)
3049 {
3050 case SUPLDRLOADEP_NOTHING:
3051 break;
3052 case SUPLDRLOADEP_VMMR0:
3053 if ( !pReq->u.In.EP.VMMR0.pvVMMR0
3054 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryInt
3055 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryFast
3056 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryEx)
3057 {
3058 RTSemFastMutexRelease(pDevExt->mtxLdr);
3059 dprintf(("NULL pointer: pvVMMR0=%p pvVMMR0EntryInt=%p pvVMMR0EntryFast=%p pvVMMR0EntryEx=%p!\n",
3060 pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3061 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3062 return VERR_INVALID_PARAMETER;
3063 }
3064 if ( (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryInt - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3065 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryFast - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3066 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryEx - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3067 {
3068 RTSemFastMutexRelease(pDevExt->mtxLdr);
3069 dprintf(("Out of range (%p LB %#x): pvVMMR0EntryInt=%p, pvVMMR0EntryFast=%p or pvVMMR0EntryEx=%p is NULL!\n",
3070 pImage->pvImage, pReq->u.In.cbImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3071 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3072 return VERR_INVALID_PARAMETER;
3073 }
3074 break;
3075 default:
3076 RTSemFastMutexRelease(pDevExt->mtxLdr);
3077 dprintf(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
3078 return VERR_INVALID_PARAMETER;
3079 }
3080 if ( pReq->u.In.pfnModuleInit
3081 && (uintptr_t)pReq->u.In.pfnModuleInit - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3082 {
3083 RTSemFastMutexRelease(pDevExt->mtxLdr);
3084 dprintf(("SUP_IOCTL_LDR_LOAD: pfnModuleInit=%p is outside the image (%p %d bytes)\n",
3085 pReq->u.In.pfnModuleInit, pImage->pvImage, pReq->u.In.cbImage));
3086 return VERR_INVALID_PARAMETER;
3087 }
3088 if ( pReq->u.In.pfnModuleTerm
3089 && (uintptr_t)pReq->u.In.pfnModuleTerm - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3090 {
3091 RTSemFastMutexRelease(pDevExt->mtxLdr);
3092 dprintf(("SUP_IOCTL_LDR_LOAD: pfnModuleTerm=%p is outside the image (%p %d bytes)\n",
3093 pReq->u.In.pfnModuleTerm, pImage->pvImage, pReq->u.In.cbImage));
3094 return VERR_INVALID_PARAMETER;
3095 }
3096
3097 /*
3098 * Copy the memory.
3099 */
3100 /* no need to do try/except as this is a buffered request. */
3101 memcpy(pImage->pvImage, &pReq->u.In.achImage[0], pImage->cbImage);
3102 pImage->uState = SUP_IOCTL_LDR_LOAD;
3103 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
3104 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
3105 pImage->offSymbols = pReq->u.In.offSymbols;
3106 pImage->cSymbols = pReq->u.In.cSymbols;
3107 pImage->offStrTab = pReq->u.In.offStrTab;
3108 pImage->cbStrTab = pReq->u.In.cbStrTab;
3109
3110 /*
3111 * Update any entry points.
3112 */
3113 switch (pReq->u.In.eEPType)
3114 {
3115 default:
3116 case SUPLDRLOADEP_NOTHING:
3117 rc = VINF_SUCCESS;
3118 break;
3119 case SUPLDRLOADEP_VMMR0:
3120 rc = supdrvLdrSetR0EP(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3121 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
3122 break;
3123 }
3124
3125 /*
3126 * On success call the module initialization.
3127 */
3128 dprintf(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
3129 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
3130 {
3131 dprintf(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
3132 rc = pImage->pfnModuleInit();
3133 if (rc && pDevExt->pvVMMR0 == pImage->pvImage)
3134 supdrvLdrUnsetR0EP(pDevExt);
3135 }
3136
3137 if (rc)
3138 pImage->uState = SUP_IOCTL_LDR_OPEN;
3139
3140 RTSemFastMutexRelease(pDevExt->mtxLdr);
3141 return rc;
3142}
3143
3144
3145/**
3146 * Frees a previously loaded (prep'ed) image.
3147 *
3148 * @returns IPRT status code.
3149 * @param pDevExt Device globals.
3150 * @param pSession Session data.
3151 * @param pReq The request.
3152 */
3153static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
3154{
3155 int rc;
3156 PSUPDRVLDRUSAGE pUsagePrev;
3157 PSUPDRVLDRUSAGE pUsage;
3158 PSUPDRVLDRIMAGE pImage;
3159 dprintf(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
3160
3161 /*
3162 * Find the ldr image.
3163 */
3164 RTSemFastMutexRequest(pDevExt->mtxLdr);
3165 pUsagePrev = NULL;
3166 pUsage = pSession->pLdrUsage;
3167 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3168 {
3169 pUsagePrev = pUsage;
3170 pUsage = pUsage->pNext;
3171 }
3172 if (!pUsage)
3173 {
3174 RTSemFastMutexRelease(pDevExt->mtxLdr);
3175 dprintf(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
3176 return VERR_INVALID_HANDLE;
3177 }
3178
3179 /*
3180 * Check if we can remove anything.
3181 */
3182 rc = VINF_SUCCESS;
3183 pImage = pUsage->pImage;
3184 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
3185 {
3186 /*
3187 * Check if there are any objects with destructors in the image, if
3188 * so leave it for the session cleanup routine so we get a chance to
3189 * clean things up in the right order and not leave them all dangling.
3190 */
3191 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3192 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3193 if (pImage->cUsage <= 1)
3194 {
3195 PSUPDRVOBJ pObj;
3196 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3197 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3198 {
3199 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3200 break;
3201 }
3202 }
3203 else
3204 {
3205 PSUPDRVUSAGE pGenUsage;
3206 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
3207 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3208 {
3209 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3210 break;
3211 }
3212 }
3213 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3214 if (rc == VINF_SUCCESS)
3215 {
3216 /* unlink it */
3217 if (pUsagePrev)
3218 pUsagePrev->pNext = pUsage->pNext;
3219 else
3220 pSession->pLdrUsage = pUsage->pNext;
3221
3222 /* free it */
3223 pUsage->pImage = NULL;
3224 pUsage->pNext = NULL;
3225 RTMemFree(pUsage);
3226
3227 /*
3228 * Derefrence the image.
3229 */
3230 if (pImage->cUsage <= 1)
3231 supdrvLdrFree(pDevExt, pImage);
3232 else
3233 pImage->cUsage--;
3234 }
3235 }
3236 else
3237 {
3238 /*
3239 * Dereference both image and usage.
3240 */
3241 pImage->cUsage--;
3242 pUsage->cUsage--;
3243 }
3244
3245 RTSemFastMutexRelease(pDevExt->mtxLdr);
3246 return VINF_SUCCESS;
3247}
3248
3249
3250/**
3251 * Gets the address of a symbol in an open image.
3252 *
3253 * @returns 0 on success.
3254 * @returns SUPDRV_ERR_* on failure.
3255 * @param pDevExt Device globals.
3256 * @param pSession Session data.
3257 * @param pReq The request buffer.
3258 */
3259static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
3260{
3261 PSUPDRVLDRIMAGE pImage;
3262 PSUPDRVLDRUSAGE pUsage;
3263 uint32_t i;
3264 PSUPLDRSYM paSyms;
3265 const char *pchStrings;
3266 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
3267 void *pvSymbol = NULL;
3268 int rc = VERR_GENERAL_FAILURE;
3269 dprintf2(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
3270
3271 /*
3272 * Find the ldr image.
3273 */
3274 RTSemFastMutexRequest(pDevExt->mtxLdr);
3275 pUsage = pSession->pLdrUsage;
3276 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3277 pUsage = pUsage->pNext;
3278 if (!pUsage)
3279 {
3280 RTSemFastMutexRelease(pDevExt->mtxLdr);
3281 dprintf(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
3282 return VERR_INVALID_HANDLE;
3283 }
3284 pImage = pUsage->pImage;
3285 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
3286 {
3287 unsigned uState = pImage->uState;
3288 RTSemFastMutexRelease(pDevExt->mtxLdr);
3289 dprintf(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
3290 return VERR_ALREADY_LOADED;
3291 }
3292
3293 /*
3294 * Search the symbol string.
3295 */
3296 pchStrings = (const char *)((uint8_t *)pImage->pvImage + pImage->offStrTab);
3297 paSyms = (PSUPLDRSYM)((uint8_t *)pImage->pvImage + pImage->offSymbols);
3298 for (i = 0; i < pImage->cSymbols; i++)
3299 {
3300 if ( paSyms[i].offSymbol < pImage->cbImage /* paranoia */
3301 && paSyms[i].offName + cbSymbol <= pImage->cbStrTab
3302 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
3303 {
3304 pvSymbol = (uint8_t *)pImage->pvImage + paSyms[i].offSymbol;
3305 rc = VINF_SUCCESS;
3306 break;
3307 }
3308 }
3309 RTSemFastMutexRelease(pDevExt->mtxLdr);
3310 pReq->u.Out.pvSymbol = pvSymbol;
3311 return rc;
3312}
3313
3314
3315/**
3316 * Updates the IDT patches to point to the specified VMM R0 entry
3317 * point (i.e. VMMR0Enter()).
3318 *
3319 * @returns IPRT status code.
3320 * @param pDevExt Device globals.
3321 * @param pSession Session data.
3322 * @param pVMMR0 VMMR0 image handle.
3323 * @param pvVMMR0EntryInt VMMR0EntryInt address.
3324 * @param pvVMMR0EntryFast VMMR0EntryFast address.
3325 * @param pvVMMR0EntryEx VMMR0EntryEx address.
3326 * @remark Caller must own the loader mutex.
3327 */
3328static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
3329{
3330 int rc = VINF_SUCCESS;
3331 dprintf(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
3332
3333
3334 /*
3335 * Check if not yet set.
3336 */
3337 if (!pDevExt->pvVMMR0)
3338 {
3339#ifdef VBOX_WITH_IDT_PATCHING
3340 PSUPDRVPATCH pPatch;
3341#endif
3342
3343 /*
3344 * Set it and update IDT patch code.
3345 */
3346 pDevExt->pvVMMR0 = pvVMMR0;
3347 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
3348 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
3349 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
3350#ifdef VBOX_WITH_IDT_PATCHING
3351 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3352 {
3353# ifdef RT_ARCH_AMD64
3354 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup], (uint64_t)pvVMMR0);
3355# else /* RT_ARCH_X86 */
3356 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3357 (uint32_t)pvVMMR0 - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3358# endif
3359 }
3360#endif /* VBOX_WITH_IDT_PATCHING */
3361 }
3362 else
3363 {
3364 /*
3365 * Return failure or success depending on whether the values match or not.
3366 */
3367 if ( pDevExt->pvVMMR0 != pvVMMR0
3368 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
3369 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
3370 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
3371 {
3372 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
3373 rc = VERR_INVALID_PARAMETER;
3374 }
3375 }
3376 return rc;
3377}
3378
3379
3380/**
3381 * Unsets the R0 entry point installed by supdrvLdrSetR0EP.
3382 *
3383 * @param pDevExt Device globals.
3384 */
3385static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt)
3386{
3387#ifdef VBOX_WITH_IDT_PATCHING
3388 PSUPDRVPATCH pPatch;
3389#endif
3390
3391 pDevExt->pvVMMR0 = NULL;
3392 pDevExt->pfnVMMR0EntryInt = NULL;
3393 pDevExt->pfnVMMR0EntryFast = NULL;
3394 pDevExt->pfnVMMR0EntryEx = NULL;
3395
3396#ifdef VBOX_WITH_IDT_PATCHING
3397 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3398 {
3399# ifdef RT_ARCH_AMD64
3400 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3401 (uint64_t)&pPatch->auCode[pPatch->offStub]);
3402# else /* RT_ARCH_X86 */
3403 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3404 (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3405# endif
3406 }
3407#endif /* VBOX_WITH_IDT_PATCHING */
3408}
3409
3410
3411/**
3412 * Adds a usage reference in the specified session of an image.
3413 *
3414 * @param pSession Session in question.
3415 * @param pImage Image which the session is using.
3416 */
3417static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
3418{
3419 PSUPDRVLDRUSAGE pUsage;
3420 dprintf(("supdrvLdrAddUsage: pImage=%p\n", pImage));
3421
3422 /*
3423 * Referenced it already?
3424 */
3425 pUsage = pSession->pLdrUsage;
3426 while (pUsage)
3427 {
3428 if (pUsage->pImage == pImage)
3429 {
3430 pUsage->cUsage++;
3431 return;
3432 }
3433 pUsage = pUsage->pNext;
3434 }
3435
3436 /*
3437 * Allocate new usage record.
3438 */
3439 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
3440 Assert(pUsage);
3441 if (pUsage)
3442 {
3443 pUsage->cUsage = 1;
3444 pUsage->pImage = pImage;
3445 pUsage->pNext = pSession->pLdrUsage;
3446 pSession->pLdrUsage = pUsage;
3447 }
3448 /* ignore errors... */
3449}
3450
3451
3452/**
3453 * Frees a load image.
3454 *
3455 * @param pDevExt Pointer to device extension.
3456 * @param pImage Pointer to the image we're gonna free.
3457 * This image must exit!
3458 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
3459 */
3460static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
3461{
3462 PSUPDRVLDRIMAGE pImagePrev;
3463 dprintf(("supdrvLdrFree: pImage=%p\n", pImage));
3464
3465 /* find it - arg. should've used doubly linked list. */
3466 Assert(pDevExt->pLdrImages);
3467 pImagePrev = NULL;
3468 if (pDevExt->pLdrImages != pImage)
3469 {
3470 pImagePrev = pDevExt->pLdrImages;
3471 while (pImagePrev->pNext != pImage)
3472 pImagePrev = pImagePrev->pNext;
3473 Assert(pImagePrev->pNext == pImage);
3474 }
3475
3476 /* unlink */
3477 if (pImagePrev)
3478 pImagePrev->pNext = pImage->pNext;
3479 else
3480 pDevExt->pLdrImages = pImage->pNext;
3481
3482 /* check if this is VMMR0.r0 and fix the Idt patches if it is. */
3483 if (pDevExt->pvVMMR0 == pImage->pvImage)
3484 supdrvLdrUnsetR0EP(pDevExt);
3485
3486 /* check for objects with destructors in this image. (Shouldn't happen.) */
3487 if (pDevExt->pObjs)
3488 {
3489 unsigned cObjs = 0;
3490 PSUPDRVOBJ pObj;
3491 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3492 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3493 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3494 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3495 {
3496 pObj->pfnDestructor = NULL;
3497 cObjs++;
3498 }
3499 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3500 if (cObjs)
3501 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
3502 }
3503
3504 /* call termination function if fully loaded. */
3505 if ( pImage->pfnModuleTerm
3506 && pImage->uState == SUP_IOCTL_LDR_LOAD)
3507 {
3508 dprintf(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
3509 pImage->pfnModuleTerm();
3510 }
3511
3512 /* free the image */
3513 pImage->cUsage = 0;
3514 pImage->pNext = 0;
3515 pImage->uState = SUP_IOCTL_LDR_FREE;
3516 RTMemExecFree(pImage);
3517}
3518
3519
3520/**
3521 * Gets the current paging mode of the CPU and stores in in pOut.
3522 */
3523static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void)
3524{
3525 SUPPAGINGMODE enmMode;
3526
3527 RTUINTREG cr0 = ASMGetCR0();
3528 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3529 enmMode = SUPPAGINGMODE_INVALID;
3530 else
3531 {
3532 RTUINTREG cr4 = ASMGetCR4();
3533 uint32_t fNXEPlusLMA = 0;
3534 if (cr4 & X86_CR4_PAE)
3535 {
3536 uint32_t fAmdFeatures = ASMCpuId_EDX(0x80000001);
3537 if (fAmdFeatures & (X86_CPUID_AMD_FEATURE_EDX_NX | X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
3538 {
3539 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3540 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3541 fNXEPlusLMA |= BIT(0);
3542 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3543 fNXEPlusLMA |= BIT(1);
3544 }
3545 }
3546
3547 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3548 {
3549 case 0:
3550 enmMode = SUPPAGINGMODE_32_BIT;
3551 break;
3552
3553 case X86_CR4_PGE:
3554 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3555 break;
3556
3557 case X86_CR4_PAE:
3558 enmMode = SUPPAGINGMODE_PAE;
3559 break;
3560
3561 case X86_CR4_PAE | BIT(0):
3562 enmMode = SUPPAGINGMODE_PAE_NX;
3563 break;
3564
3565 case X86_CR4_PAE | X86_CR4_PGE:
3566 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3567 break;
3568
3569 case X86_CR4_PAE | X86_CR4_PGE | BIT(0):
3570 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3571 break;
3572
3573 case BIT(1) | X86_CR4_PAE:
3574 enmMode = SUPPAGINGMODE_AMD64;
3575 break;
3576
3577 case BIT(1) | X86_CR4_PAE | BIT(0):
3578 enmMode = SUPPAGINGMODE_AMD64_NX;
3579 break;
3580
3581 case BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3582 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3583 break;
3584
3585 case BIT(1) | X86_CR4_PAE | X86_CR4_PGE | BIT(0):
3586 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3587 break;
3588
3589 default:
3590 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3591 enmMode = SUPPAGINGMODE_INVALID;
3592 break;
3593 }
3594 }
3595 return enmMode;
3596}
3597
3598
3599#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
3600/**
3601 * Creates the GIP.
3602 *
3603 * @returns negative errno.
3604 * @param pDevExt Instance data. GIP stuff may be updated.
3605 */
3606static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
3607{
3608 PSUPGLOBALINFOPAGE pGip;
3609 RTHCPHYS HCPhysGip;
3610 uint32_t u32SystemResolution;
3611 uint32_t u32Interval;
3612 int rc;
3613
3614 dprintf(("supdrvGipCreate:\n"));
3615
3616 /* assert order */
3617 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
3618 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
3619 Assert(!pDevExt->pGipTimer);
3620
3621 /*
3622 * Allocate a suitable page with a default kernel mapping.
3623 */
3624 rc = RTR0MemObjAllocLow(&pDevExt->GipMemObj, PAGE_SIZE, false);
3625 if (RT_FAILURE(rc))
3626 {
3627 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
3628 return rc;
3629 }
3630 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
3631 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
3632
3633 /*
3634 * Try bump up the system timer resolution.
3635 * The more interrupts the better...
3636 */
3637 if ( RT_SUCCESS(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3638 || RT_SUCCESS(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3639 || RT_SUCCESS(RTTimerRequestSystemGranularity( 3906250 /* 256 HZ */, &u32SystemResolution))
3640 || RT_SUCCESS(RTTimerRequestSystemGranularity( 4000000 /* 250 HZ */, &u32SystemResolution))
3641 || RT_SUCCESS(RTTimerRequestSystemGranularity( 7812500 /* 128 HZ */, &u32SystemResolution))
3642 || RT_SUCCESS(RTTimerRequestSystemGranularity(10000000 /* 100 HZ */, &u32SystemResolution))
3643 || RT_SUCCESS(RTTimerRequestSystemGranularity(15625000 /* 64 HZ */, &u32SystemResolution))
3644 || RT_SUCCESS(RTTimerRequestSystemGranularity(31250000 /* 32 HZ */, &u32SystemResolution))
3645 )
3646 {
3647 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3648 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3649 }
3650
3651 /*
3652 * Find a reasonable update interval, something close to 10ms would be nice,
3653 * and create a recurring timer.
3654 */
3655 u32Interval = u32SystemResolution = RTTimerGetSystemGranularity();
3656 while (u32Interval < 10000000 /* 10 ms */)
3657 u32Interval += u32SystemResolution;
3658
3659 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0, supdrvGipTimer, pDevExt);
3660 if (RT_FAILURE(rc))
3661 {
3662 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %RU32 ns interval. rc=%d\n", u32Interval, rc));
3663 Assert(!pDevExt->pGipTimer);
3664 supdrvGipDestroy(pDevExt);
3665 return rc;
3666 }
3667
3668 /*
3669 * We're good.
3670 */
3671 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), 1000000000 / u32Interval /*=Hz*/);
3672 return VINF_SUCCESS;
3673}
3674
3675
3676/**
3677 * Terminates the GIP.
3678 *
3679 * @param pDevExt Instance data. GIP stuff may be updated.
3680 */
3681static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
3682{
3683 int rc;
3684#ifdef DEBUG_DARWIN_GIP
3685 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
3686 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
3687 pDevExt->pGipTimer, pDevExt->GipMemObj));
3688#endif
3689
3690 /*
3691 * Invalid the GIP data.
3692 */
3693 if (pDevExt->pGip)
3694 {
3695 supdrvGipTerm(pDevExt->pGip);
3696 pDevExt->pGip = NULL;
3697 }
3698
3699 /*
3700 * Destroy the timer and free the GIP memory object.
3701 */
3702 if (pDevExt->pGipTimer)
3703 {
3704 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
3705 pDevExt->pGipTimer = NULL;
3706 }
3707
3708 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
3709 {
3710 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
3711 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
3712 }
3713
3714 /*
3715 * Finally, release the system timer resolution request if one succeeded.
3716 */
3717 if (pDevExt->u32SystemTimerGranularityGrant)
3718 {
3719 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
3720 pDevExt->u32SystemTimerGranularityGrant = 0;
3721 }
3722}
3723
3724
3725/**
3726 * Timer callback function.
3727 * @param pTimer The timer.
3728 * @param pvUser The device extension.
3729 */
3730static DECLCALLBACK(void) supdrvGipTimer(PRTTIMER pTimer, void *pvUser)
3731{
3732 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3733 supdrvGipUpdate(pDevExt->pGip, RTTimeSystemNanoTS());
3734}
3735#endif /* USE_NEW_OS_INTERFACE_FOR_GIP */
3736
3737
3738/**
3739 * Initializes the GIP data.
3740 *
3741 * @returns IPRT status code.
3742 * @param pDevExt Pointer to the device instance data.
3743 * @param pGip Pointer to the read-write kernel mapping of the GIP.
3744 * @param HCPhys The physical address of the GIP.
3745 * @param u64NanoTS The current nanosecond timestamp.
3746 * @param uUpdateHz The update freqence.
3747 */
3748int VBOXCALL supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS, unsigned uUpdateHz)
3749{
3750 unsigned i;
3751#ifdef DEBUG_DARWIN_GIP
3752 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3753#else
3754 dprintf(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3755#endif
3756
3757 /*
3758 * Initialize the structure.
3759 */
3760 memset(pGip, 0, PAGE_SIZE);
3761 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
3762 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
3763 pGip->u32Mode = supdrvGipDeterminTscMode();
3764 pGip->u32UpdateHz = uUpdateHz;
3765 pGip->u32UpdateIntervalNS = 1000000000 / uUpdateHz;
3766 pGip->u64NanoTSLastUpdateHz = u64NanoTS;
3767
3768 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
3769 {
3770 pGip->aCPUs[i].u32TransactionId = 2;
3771 pGip->aCPUs[i].u64NanoTS = u64NanoTS;
3772 pGip->aCPUs[i].u64TSC = ASMReadTSC();
3773
3774 /*
3775 * We don't know the following values until we've executed updates.
3776 * So, we'll just insert very high values.
3777 */
3778 pGip->aCPUs[i].u64CpuHz = _4G + 1;
3779 pGip->aCPUs[i].u32UpdateIntervalTSC = _2G / 4;
3780 pGip->aCPUs[i].au32TSCHistory[0] = _2G / 4;
3781 pGip->aCPUs[i].au32TSCHistory[1] = _2G / 4;
3782 pGip->aCPUs[i].au32TSCHistory[2] = _2G / 4;
3783 pGip->aCPUs[i].au32TSCHistory[3] = _2G / 4;
3784 pGip->aCPUs[i].au32TSCHistory[4] = _2G / 4;
3785 pGip->aCPUs[i].au32TSCHistory[5] = _2G / 4;
3786 pGip->aCPUs[i].au32TSCHistory[6] = _2G / 4;
3787 pGip->aCPUs[i].au32TSCHistory[7] = _2G / 4;
3788 }
3789
3790 /*
3791 * Link it to the device extension.
3792 */
3793 pDevExt->pGip = pGip;
3794 pDevExt->HCPhysGip = HCPhys;
3795 pDevExt->cGipUsers = 0;
3796
3797 return VINF_SUCCESS;
3798}
3799
3800
3801/**
3802 * Determin the GIP TSC mode.
3803 *
3804 * @returns The most suitable TSC mode.
3805 */
3806static SUPGIPMODE supdrvGipDeterminTscMode(void)
3807{
3808#ifndef USE_NEW_OS_INTERFACE_FOR_GIP
3809 /*
3810 * The problem here is that AMD processors with power management features
3811 * may easily end up with different TSCs because the CPUs or even cores
3812 * on the same physical chip run at different frequencies to save power.
3813 *
3814 * It is rumoured that this will be corrected with Barcelona and it's
3815 * expected that this will be indicated by the TscInvariant bit in
3816 * cpuid(0x80000007). So, the "difficult" bit here is to correctly
3817 * identify the older CPUs which don't do different frequency and
3818 * can be relied upon to have somewhat uniform TSC between the cpus.
3819 */
3820 if (supdrvOSGetCPUCount() > 1)
3821 {
3822 uint32_t uEAX, uEBX, uECX, uEDX;
3823
3824 /* Permit user users override. */
3825 if (supdrvOSGetForcedAsyncTscMode())
3826 return SUPGIPMODE_ASYNC_TSC;
3827
3828 /* Check for "AuthenticAMD" */
3829 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
3830 if (uEAX >= 1 && uEBX == 0x68747541 && uECX == 0x444d4163 && uEDX == 0x69746e65)
3831 {
3832 /* Check for APM support and that TscInvariant is cleared. */
3833 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
3834 if (uEAX >= 0x80000007)
3835 {
3836 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
3837 if ( !(uEDX & BIT(8))/* TscInvariant */
3838 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
3839 return SUPGIPMODE_ASYNC_TSC;
3840 }
3841 }
3842 }
3843#endif
3844 return SUPGIPMODE_SYNC_TSC;
3845}
3846
3847
3848/**
3849 * Invalidates the GIP data upon termination.
3850 *
3851 * @param pGip Pointer to the read-write kernel mapping of the GIP.
3852 */
3853void VBOXCALL supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
3854{
3855 unsigned i;
3856 pGip->u32Magic = 0;
3857 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
3858 {
3859 pGip->aCPUs[i].u64NanoTS = 0;
3860 pGip->aCPUs[i].u64TSC = 0;
3861 pGip->aCPUs[i].iTSCHistoryHead = 0;
3862 }
3863}
3864
3865
3866/**
3867 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
3868 * updates all the per cpu data except the transaction id.
3869 *
3870 * @param pGip The GIP.
3871 * @param pGipCpu Pointer to the per cpu data.
3872 * @param u64NanoTS The current time stamp.
3873 */
3874static void supdrvGipDoUpdateCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3875{
3876 uint64_t u64TSC;
3877 uint64_t u64TSCDelta;
3878 uint32_t u32UpdateIntervalTSC;
3879 uint32_t u32UpdateIntervalTSCSlack;
3880 unsigned iTSCHistoryHead;
3881 uint64_t u64CpuHz;
3882
3883 /*
3884 * Update the NanoTS.
3885 */
3886 ASMAtomicXchgU64(&pGipCpu->u64NanoTS, u64NanoTS);
3887
3888 /*
3889 * Calc TSC delta.
3890 */
3891 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
3892 u64TSC = ASMReadTSC();
3893 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
3894 ASMAtomicXchgU64(&pGipCpu->u64TSC, u64TSC);
3895
3896 if (u64TSCDelta >> 32)
3897 {
3898 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
3899 pGipCpu->cErrors++;
3900 }
3901
3902 /*
3903 * TSC History.
3904 */
3905 Assert(ELEMENTS(pGipCpu->au32TSCHistory) == 8);
3906
3907 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
3908 ASMAtomicXchgU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
3909 ASMAtomicXchgU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
3910
3911 /*
3912 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
3913 */
3914 if (pGip->u32UpdateHz >= 1000)
3915 {
3916 uint32_t u32;
3917 u32 = pGipCpu->au32TSCHistory[0];
3918 u32 += pGipCpu->au32TSCHistory[1];
3919 u32 += pGipCpu->au32TSCHistory[2];
3920 u32 += pGipCpu->au32TSCHistory[3];
3921 u32 >>= 2;
3922 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
3923 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
3924 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
3925 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
3926 u32UpdateIntervalTSC >>= 2;
3927 u32UpdateIntervalTSC += u32;
3928 u32UpdateIntervalTSC >>= 1;
3929
3930 /* Value choosen for a 2GHz Athlon64 running linux 2.6.10/11, . */
3931 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
3932 }
3933 else if (pGip->u32UpdateHz >= 90)
3934 {
3935 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
3936 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
3937 u32UpdateIntervalTSC >>= 1;
3938
3939 /* value choosen on a 2GHz thinkpad running windows */
3940 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
3941 }
3942 else
3943 {
3944 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
3945
3946 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
3947 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
3948 }
3949 ASMAtomicXchgU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
3950
3951 /*
3952 * CpuHz.
3953 */
3954 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, pGip->u32UpdateHz);
3955 ASMAtomicXchgU64(&pGipCpu->u64CpuHz, u64CpuHz);
3956}
3957
3958
3959/**
3960 * Updates the GIP.
3961 *
3962 * @param pGip Pointer to the GIP.
3963 * @param u64NanoTS The current nanosecond timesamp.
3964 */
3965void VBOXCALL supdrvGipUpdate(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS)
3966{
3967 /*
3968 * Determin the relevant CPU data.
3969 */
3970 PSUPGIPCPU pGipCpu;
3971 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
3972 pGipCpu = &pGip->aCPUs[0];
3973 else
3974 {
3975 unsigned iCpu = ASMGetApicId();
3976 if (RT_LIKELY(iCpu >= RT_ELEMENTS(pGip->aCPUs)))
3977 return;
3978 pGipCpu = &pGip->aCPUs[iCpu];
3979 }
3980
3981 /*
3982 * Start update transaction.
3983 */
3984 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
3985 {
3986 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
3987 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
3988 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
3989 pGipCpu->cErrors++;
3990 return;
3991 }
3992
3993 /*
3994 * Recalc the update frequency every 0x800th time.
3995 */
3996 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
3997 {
3998 if (pGip->u64NanoTSLastUpdateHz)
3999 {
4000#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
4001 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
4002 uint32_t u32UpdateHz = (uint32_t)((UINT64_C(1000000000) * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
4003 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
4004 {
4005 ASMAtomicXchgU32(&pGip->u32UpdateHz, u32UpdateHz);
4006 ASMAtomicXchgU32(&pGip->u32UpdateIntervalNS, 1000000000 / u32UpdateHz);
4007 }
4008#endif
4009 }
4010 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS);
4011 }
4012
4013 /*
4014 * Update the data.
4015 */
4016 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4017
4018 /*
4019 * Complete transaction.
4020 */
4021 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4022}
4023
4024
4025/**
4026 * Updates the per cpu GIP data for the calling cpu.
4027 *
4028 * @param pGip Pointer to the GIP.
4029 * @param u64NanoTS The current nanosecond timesamp.
4030 * @param iCpu The CPU index.
4031 */
4032void VBOXCALL supdrvGipUpdatePerCpu(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS, unsigned iCpu)
4033{
4034 PSUPGIPCPU pGipCpu;
4035
4036 if (RT_LIKELY(iCpu <= RT_ELEMENTS(pGip->aCPUs)))
4037 {
4038 pGipCpu = &pGip->aCPUs[iCpu];
4039
4040 /*
4041 * Start update transaction.
4042 */
4043 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
4044 {
4045 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
4046 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4047 pGipCpu->cErrors++;
4048 return;
4049 }
4050
4051 /*
4052 * Update the data.
4053 */
4054 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4055
4056 /*
4057 * Complete transaction.
4058 */
4059 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4060 }
4061}
4062
4063
4064#ifndef DEBUG /** @todo change #ifndef DEBUG -> #ifdef LOG_ENABLED */
4065/**
4066 * Stub function for non-debug builds.
4067 */
4068RTDECL(PRTLOGGER) RTLogDefaultInstance(void)
4069{
4070 return NULL;
4071}
4072
4073RTDECL(PRTLOGGER) RTLogRelDefaultInstance(void)
4074{
4075 return NULL;
4076}
4077
4078/**
4079 * Stub function for non-debug builds.
4080 */
4081RTDECL(int) RTLogSetDefaultInstanceThread(PRTLOGGER pLogger, uintptr_t uKey)
4082{
4083 return 0;
4084}
4085
4086/**
4087 * Stub function for non-debug builds.
4088 */
4089RTDECL(void) RTLogLogger(PRTLOGGER pLogger, void *pvCallerRet, const char *pszFormat, ...)
4090{
4091}
4092
4093/**
4094 * Stub function for non-debug builds.
4095 */
4096RTDECL(void) RTLogLoggerEx(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, ...)
4097{
4098}
4099
4100/**
4101 * Stub function for non-debug builds.
4102 */
4103RTDECL(void) RTLogLoggerExV(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, va_list args)
4104{
4105}
4106
4107/**
4108 * Stub function for non-debug builds.
4109 */
4110RTDECL(void) RTLogPrintf(const char *pszFormat, ...)
4111{
4112}
4113
4114/**
4115 * Stub function for non-debug builds.
4116 */
4117RTDECL(void) RTLogPrintfV(const char *pszFormat, va_list args)
4118{
4119}
4120#endif /* !DEBUG */
4121
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette