VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDRVShared.c@ 4833

Last change on this file since 4833 was 4831, checked in by vboxsync, 17 years ago

Removed the old MM code.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 135.6 KB
Line 
1/* $Revision: 4831 $ */
2/** @file
3 * VirtualBox Support Driver - Shared code.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include "SUPDRV.h"
23#ifndef PAGE_SHIFT
24# include <iprt/param.h>
25#endif
26#include <iprt/alloc.h>
27#include <iprt/semaphore.h>
28#include <iprt/spinlock.h>
29#include <iprt/thread.h>
30#include <iprt/process.h>
31#include <iprt/log.h>
32
33
34/*******************************************************************************
35* Defined Constants And Macros *
36*******************************************************************************/
37/* from x86.h - clashes with linux thus this duplication */
38#undef X86_CR0_PG
39#define X86_CR0_PG BIT(31)
40#undef X86_CR0_PE
41#define X86_CR0_PE BIT(0)
42#undef X86_CPUID_AMD_FEATURE_EDX_NX
43#define X86_CPUID_AMD_FEATURE_EDX_NX BIT(20)
44#undef MSR_K6_EFER
45#define MSR_K6_EFER 0xc0000080
46#undef MSR_K6_EFER_NXE
47#define MSR_K6_EFER_NXE BIT(11)
48#undef MSR_K6_EFER_LMA
49#define MSR_K6_EFER_LMA BIT(10)
50#undef X86_CR4_PGE
51#define X86_CR4_PGE BIT(7)
52#undef X86_CR4_PAE
53#define X86_CR4_PAE BIT(5)
54#undef X86_CPUID_AMD_FEATURE_EDX_LONG_MODE
55#define X86_CPUID_AMD_FEATURE_EDX_LONG_MODE BIT(29)
56
57
58/** The frequency by which we recalculate the u32UpdateHz and
59 * u32UpdateIntervalNS GIP members. The value must be a power of 2. */
60#define GIP_UPDATEHZ_RECALC_FREQ 0x800
61
62/**
63 * Validates a session pointer.
64 *
65 * @returns true/false accordingly.
66 * @param pSession The session.
67 */
68#define SUP_IS_SESSION_VALID(pSession) \
69 ( VALID_PTR(pSession) \
70 && pSession->u32Cookie == BIRD_INV)
71
72
73/*******************************************************************************
74* Global Variables *
75*******************************************************************************/
76/**
77 * Array of the R0 SUP API.
78 */
79static SUPFUNC g_aFunctions[] =
80{
81 /* name function */
82 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
83 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
84 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
85 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
86 { "SUPR0LockMem", (void *)SUPR0LockMem },
87 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
88 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
89 { "SUPR0ContFree", (void *)SUPR0ContFree },
90 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
91 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
92 { "SUPR0MemFree", (void *)SUPR0MemFree },
93 { "SUPR0PageAlloc", (void *)SUPR0PageAlloc },
94 { "SUPR0PageFree", (void *)SUPR0PageFree },
95 { "SUPR0Printf", (void *)SUPR0Printf },
96 { "RTMemAlloc", (void *)RTMemAlloc },
97 { "RTMemAllocZ", (void *)RTMemAllocZ },
98 { "RTMemFree", (void *)RTMemFree },
99/* These doesn't work yet on linux - use fast mutexes!
100 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
101 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
102 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
103 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
104*/
105 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
106 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
107 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
108 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
109 { "RTSemEventCreate", (void *)RTSemEventCreate },
110 { "RTSemEventSignal", (void *)RTSemEventSignal },
111 { "RTSemEventWait", (void *)RTSemEventWait },
112 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
113 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
114 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
115 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
116 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
117 { "RTSpinlockAcquireNoInts", (void *)RTSpinlockAcquireNoInts },
118 { "RTSpinlockReleaseNoInts", (void *)RTSpinlockReleaseNoInts },
119 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
120 { "RTThreadSleep", (void *)RTThreadSleep },
121 { "RTThreadYield", (void *)RTThreadYield },
122#if 0 /* Thread APIs, Part 2. */
123 { "RTThreadSelf", (void *)RTThreadSelf },
124 { "RTThreadCreate", (void *)RTThreadCreate },
125 { "RTThreadGetNative", (void *)RTThreadGetNative },
126 { "RTThreadWait", (void *)RTThreadWait },
127 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
128 { "RTThreadGetName", (void *)RTThreadGetName },
129 { "RTThreadSelfName", (void *)RTThreadSelfName },
130 { "RTThreadGetType", (void *)RTThreadGetType },
131 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
132 { "RTThreadUserReset", (void *)RTThreadUserReset },
133 { "RTThreadUserWait", (void *)RTThreadUserWait },
134 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
135#endif
136 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
137 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
138 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
139 { "RTLogLogger", (void *)RTLogLogger },
140 { "RTLogLoggerEx", (void *)RTLogLoggerEx },
141 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
142 { "RTLogPrintf", (void *)RTLogPrintf },
143 { "RTLogPrintfV", (void *)RTLogPrintfV },
144 { "AssertMsg1", (void *)AssertMsg1 },
145 { "AssertMsg2", (void *)AssertMsg2 },
146};
147
148
149/*******************************************************************************
150* Internal Functions *
151*******************************************************************************/
152static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
153static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
154#ifdef VBOX_WITH_IDT_PATCHING
155static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq);
156static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
157static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession);
158static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
159static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry);
160#endif /* VBOX_WITH_IDT_PATCHING */
161static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
162static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
163static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
164static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
165static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
166static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt);
167static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
168static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
169static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void);
170static SUPGIPMODE supdrvGipDeterminTscMode(void);
171#ifdef RT_OS_WINDOWS
172static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages);
173static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3);
174#endif
175#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
176static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
177static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
178static DECLCALLBACK(void) supdrvGipTimer(PRTTIMER pTimer, void *pvUser);
179#endif
180
181
182/**
183 * Initializes the device extentsion structure.
184 *
185 * @returns IPRT status code.
186 * @param pDevExt The device extension to initialize.
187 */
188int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt)
189{
190 /*
191 * Initialize it.
192 */
193 int rc;
194 memset(pDevExt, 0, sizeof(*pDevExt));
195 rc = RTSpinlockCreate(&pDevExt->Spinlock);
196 if (!rc)
197 {
198 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
199 if (!rc)
200 {
201 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
202 if (!rc)
203 {
204#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
205 rc = supdrvGipCreate(pDevExt);
206 if (RT_SUCCESS(rc))
207 {
208 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
209 return VINF_SUCCESS;
210 }
211#else
212 pDevExt->u32Cookie = BIRD;
213 return VINF_SUCCESS;
214#endif
215 }
216 RTSemFastMutexDestroy(pDevExt->mtxLdr);
217 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
218 }
219 RTSpinlockDestroy(pDevExt->Spinlock);
220 pDevExt->Spinlock = NIL_RTSPINLOCK;
221 }
222 return rc;
223}
224
225
226/**
227 * Delete the device extension (e.g. cleanup members).
228 *
229 * @param pDevExt The device extension to delete.
230 */
231void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
232{
233#ifdef VBOX_WITH_IDT_PATCHING
234 PSUPDRVPATCH pPatch;
235#endif
236 PSUPDRVOBJ pObj;
237 PSUPDRVUSAGE pUsage;
238
239 /*
240 * Kill mutexes and spinlocks.
241 */
242 RTSemFastMutexDestroy(pDevExt->mtxGip);
243 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
244 RTSemFastMutexDestroy(pDevExt->mtxLdr);
245 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
246 RTSpinlockDestroy(pDevExt->Spinlock);
247 pDevExt->Spinlock = NIL_RTSPINLOCK;
248
249 /*
250 * Free lists.
251 */
252#ifdef VBOX_WITH_IDT_PATCHING
253 /* patches */
254 /** @todo make sure we don't uninstall patches which has been patched by someone else. */
255 pPatch = pDevExt->pIdtPatchesFree;
256 pDevExt->pIdtPatchesFree = NULL;
257 while (pPatch)
258 {
259 void *pvFree = pPatch;
260 pPatch = pPatch->pNext;
261 RTMemExecFree(pvFree);
262 }
263#endif /* VBOX_WITH_IDT_PATCHING */
264
265 /* objects. */
266 pObj = pDevExt->pObjs;
267#if !defined(DEBUG_bird) || !defined(RT_OS_LINUX) /* breaks unloading, temporary, remove me! */
268 Assert(!pObj); /* (can trigger on forced unloads) */
269#endif
270 pDevExt->pObjs = NULL;
271 while (pObj)
272 {
273 void *pvFree = pObj;
274 pObj = pObj->pNext;
275 RTMemFree(pvFree);
276 }
277
278 /* usage records. */
279 pUsage = pDevExt->pUsageFree;
280 pDevExt->pUsageFree = NULL;
281 while (pUsage)
282 {
283 void *pvFree = pUsage;
284 pUsage = pUsage->pNext;
285 RTMemFree(pvFree);
286 }
287
288#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
289 /* kill the GIP */
290 supdrvGipDestroy(pDevExt);
291#endif
292}
293
294
295/**
296 * Create session.
297 *
298 * @returns IPRT status code.
299 * @param pDevExt Device extension.
300 * @param ppSession Where to store the pointer to the session data.
301 */
302int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION *ppSession)
303{
304 /*
305 * Allocate memory for the session data.
306 */
307 int rc = VERR_NO_MEMORY;
308 PSUPDRVSESSION pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(sizeof(*pSession));
309 if (pSession)
310 {
311 /* Initialize session data. */
312 rc = RTSpinlockCreate(&pSession->Spinlock);
313 if (!rc)
314 {
315 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
316 pSession->pDevExt = pDevExt;
317 pSession->u32Cookie = BIRD_INV;
318 /*pSession->pLdrUsage = NULL;
319 pSession->pPatchUsage = NULL;
320 pSession->pUsage = NULL;
321 pSession->pGip = NULL;
322 pSession->fGipReferenced = false;
323 pSession->Bundle.cUsed = 0 */
324
325 dprintf(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
326 return VINF_SUCCESS;
327 }
328
329 RTMemFree(pSession);
330 *ppSession = NULL;
331 }
332
333 dprintf(("Failed to create spinlock, rc=%d!\n", rc));
334 return rc;
335}
336
337
338/**
339 * Shared code for cleaning up a session.
340 *
341 * @param pDevExt Device extension.
342 * @param pSession Session data.
343 * This data will be freed by this routine.
344 */
345void VBOXCALL supdrvCloseSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
346{
347 /*
348 * Cleanup the session first.
349 */
350 supdrvCleanupSession(pDevExt, pSession);
351
352 /*
353 * Free the rest of the session stuff.
354 */
355 RTSpinlockDestroy(pSession->Spinlock);
356 pSession->Spinlock = NIL_RTSPINLOCK;
357 pSession->pDevExt = NULL;
358 RTMemFree(pSession);
359 dprintf2(("supdrvCloseSession: returns\n"));
360}
361
362
363/**
364 * Shared code for cleaning up a session (but not quite freeing it).
365 *
366 * This is primarily intended for MAC OS X where we have to clean up the memory
367 * stuff before the file handle is closed.
368 *
369 * @param pDevExt Device extension.
370 * @param pSession Session data.
371 * This data will be freed by this routine.
372 */
373void VBOXCALL supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
374{
375 PSUPDRVBUNDLE pBundle;
376 dprintf(("supdrvCleanupSession: pSession=%p\n", pSession));
377
378 /*
379 * Remove logger instances related to this session.
380 * (This assumes the dprintf and dprintf2 macros doesn't use the normal logging.)
381 */
382 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
383
384#ifdef VBOX_WITH_IDT_PATCHING
385 /*
386 * Uninstall any IDT patches installed for this session.
387 */
388 supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
389#endif
390
391 /*
392 * Release object references made in this session.
393 * In theory there should be noone racing us in this session.
394 */
395 dprintf2(("release objects - start\n"));
396 if (pSession->pUsage)
397 {
398 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
399 PSUPDRVUSAGE pUsage;
400 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
401
402 while ((pUsage = pSession->pUsage) != NULL)
403 {
404 PSUPDRVOBJ pObj = pUsage->pObj;
405 pSession->pUsage = pUsage->pNext;
406
407 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
408 if (pUsage->cUsage < pObj->cUsage)
409 {
410 pObj->cUsage -= pUsage->cUsage;
411 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
412 }
413 else
414 {
415 /* Destroy the object and free the record. */
416 if (pDevExt->pObjs == pObj)
417 pDevExt->pObjs = pObj->pNext;
418 else
419 {
420 PSUPDRVOBJ pObjPrev;
421 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
422 if (pObjPrev->pNext == pObj)
423 {
424 pObjPrev->pNext = pObj->pNext;
425 break;
426 }
427 Assert(pObjPrev);
428 }
429 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
430
431 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
432 RTMemFree(pObj);
433 }
434
435 /* free it and continue. */
436 RTMemFree(pUsage);
437
438 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
439 }
440
441 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
442 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
443 }
444 dprintf2(("release objects - done\n"));
445
446 /*
447 * Release memory allocated in the session.
448 *
449 * We do not serialize this as we assume that the application will
450 * not allocated memory while closing the file handle object.
451 */
452 dprintf2(("freeing memory:\n"));
453 pBundle = &pSession->Bundle;
454 while (pBundle)
455 {
456 PSUPDRVBUNDLE pToFree;
457 unsigned i;
458
459 /*
460 * Check and unlock all entries in the bundle.
461 */
462 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
463 {
464 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
465 {
466 int rc;
467 dprintf2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
468 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
469 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
470 {
471 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
472 AssertRC(rc); /** @todo figure out how to handle this. */
473 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
474 }
475 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, false);
476 AssertRC(rc); /** @todo figure out how to handle this. */
477 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
478 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
479 }
480 }
481
482 /*
483 * Advance and free previous bundle.
484 */
485 pToFree = pBundle;
486 pBundle = pBundle->pNext;
487
488 pToFree->pNext = NULL;
489 pToFree->cUsed = 0;
490 if (pToFree != &pSession->Bundle)
491 RTMemFree(pToFree);
492 }
493 dprintf2(("freeing memory - done\n"));
494
495 /*
496 * Loaded images needs to be dereferenced and possibly freed up.
497 */
498 RTSemFastMutexRequest(pDevExt->mtxLdr);
499 dprintf2(("freeing images:\n"));
500 if (pSession->pLdrUsage)
501 {
502 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
503 pSession->pLdrUsage = NULL;
504 while (pUsage)
505 {
506 void *pvFree = pUsage;
507 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
508 if (pImage->cUsage > pUsage->cUsage)
509 pImage->cUsage -= pUsage->cUsage;
510 else
511 supdrvLdrFree(pDevExt, pImage);
512 pUsage->pImage = NULL;
513 pUsage = pUsage->pNext;
514 RTMemFree(pvFree);
515 }
516 }
517 RTSemFastMutexRelease(pDevExt->mtxLdr);
518 dprintf2(("freeing images - done\n"));
519
520 /*
521 * Unmap the GIP.
522 */
523 dprintf2(("umapping GIP:\n"));
524#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
525 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
526#else
527 if (pSession->pGip)
528#endif
529 {
530 SUPR0GipUnmap(pSession);
531#ifndef USE_NEW_OS_INTERFACE_FOR_GIP
532 pSession->pGip = NULL;
533#endif
534 pSession->fGipReferenced = 0;
535 }
536 dprintf2(("umapping GIP - done\n"));
537}
538
539
540/**
541 * Fast path I/O Control worker.
542 *
543 * @returns VBox status code that should be passed down to ring-3 unchanged.
544 * @param uIOCtl Function number.
545 * @param pDevExt Device extention.
546 * @param pSession Session data.
547 */
548int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
549{
550 /*
551 * Disable interrupts before invoking VMMR0Entry() because it ASSUMES
552 * that interrupts are disabled. (We check the two prereqs after doing
553 * this only to allow the compiler to optimize things better.)
554 */
555 int rc;
556 RTCCUINTREG uFlags = ASMGetFlags();
557 ASMIntDisable();
558
559 if (RT_LIKELY(pSession->pVM && pDevExt->pfnVMMR0EntryFast))
560 {
561 switch (uIOCtl)
562 {
563 case SUP_IOCTL_FAST_DO_RAW_RUN:
564 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_RAW_RUN);
565 break;
566 case SUP_IOCTL_FAST_DO_HWACC_RUN:
567 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_HWACC_RUN);
568 break;
569 case SUP_IOCTL_FAST_DO_NOP:
570 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_NOP);
571 break;
572 default:
573 rc = VERR_INTERNAL_ERROR;
574 break;
575 }
576 }
577 else
578 rc = VERR_INTERNAL_ERROR;
579
580 ASMSetFlags(uFlags);
581 return rc;
582}
583
584
585/**
586 * I/O Control worker.
587 *
588 * @returns 0 on success.
589 * @returns VERR_INVALID_PARAMETER if the request is invalid.
590 *
591 * @param uIOCtl Function number.
592 * @param pDevExt Device extention.
593 * @param pSession Session data.
594 * @param pReqHdr The request header.
595 */
596int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
597{
598 /*
599 * Validate the request.
600 */
601 /* this first check could probably be omitted as its also done by the OS specific code... */
602 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
603 || pReqHdr->cbIn < sizeof(*pReqHdr)
604 || pReqHdr->cbOut < sizeof(*pReqHdr)))
605 {
606 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
607 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
608 return VERR_INVALID_PARAMETER;
609 }
610 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
611 {
612 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
613 {
614 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
615 return VERR_INVALID_PARAMETER;
616 }
617 }
618 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
619 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
620 {
621 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
622 return VERR_INVALID_PARAMETER;
623 }
624
625/*
626 * Validation macros
627 */
628#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
629 do { \
630 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
631 { \
632 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
633 (long)pReq->Hdr.cbIn, (long)(cbInExpect), (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
634 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
635 } \
636 } while (0)
637
638#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
639
640#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
641 do { \
642 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
643 { \
644 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
645 (long)pReq->Hdr.cbIn, (long)(cbInExpect))); \
646 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
647 } \
648 } while (0)
649
650#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
651 do { \
652 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
653 { \
654 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
655 (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
656 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
657 } \
658 } while (0)
659
660#define REQ_CHECK_EXPR(Name, expr) \
661 do { \
662 if (RT_UNLIKELY(!(expr))) \
663 { \
664 OSDBGPRINT(( #Name ": %s\n", #expr)); \
665 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
666 } \
667 } while (0)
668
669#define REQ_CHECK_EXPR_FMT(expr, fmt) \
670 do { \
671 if (RT_UNLIKELY(!(expr))) \
672 { \
673 OSDBGPRINT( fmt ); \
674 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
675 } \
676 } while (0)
677
678
679 /*
680 * The switch.
681 */
682 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
683 {
684 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
685 {
686 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
687 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
688 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
689 {
690 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
691 pReq->Hdr.rc = VERR_INVALID_MAGIC;
692 return 0;
693 }
694
695#if 0
696 /*
697 * Call out to the OS specific code and let it do permission checks on the
698 * client process.
699 */
700 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
701 {
702 pReq->u.Out.u32Cookie = 0xffffffff;
703 pReq->u.Out.u32SessionCookie = 0xffffffff;
704 pReq->u.Out.u32SessionVersion = 0xffffffff;
705 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
706 pReq->u.Out.pSession = NULL;
707 pReq->u.Out.cFunctions = 0;
708 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
709 return 0;
710 }
711#endif
712
713 /*
714 * Match the version.
715 * The current logic is very simple, match the major interface version.
716 */
717 if ( pReq->u.In.u32MinVersion > SUPDRVIOC_VERSION
718 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRVIOC_VERSION & 0xffff0000))
719 {
720 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
721 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRVIOC_VERSION));
722 pReq->u.Out.u32Cookie = 0xffffffff;
723 pReq->u.Out.u32SessionCookie = 0xffffffff;
724 pReq->u.Out.u32SessionVersion = 0xffffffff;
725 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
726 pReq->u.Out.pSession = NULL;
727 pReq->u.Out.cFunctions = 0;
728 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
729 return 0;
730 }
731
732 /*
733 * Fill in return data and be gone.
734 * N.B. The first one to change SUPDRVIOC_VERSION shall makes sure that
735 * u32SessionVersion <= u32ReqVersion!
736 */
737 /** @todo Somehow validate the client and negotiate a secure cookie... */
738 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
739 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
740 pReq->u.Out.u32SessionVersion = SUPDRVIOC_VERSION;
741 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
742 pReq->u.Out.pSession = pSession;
743 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
744 pReq->Hdr.rc = VINF_SUCCESS;
745 return 0;
746 }
747
748 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
749 {
750 /* validate */
751 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
752 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
753
754 /* execute */
755 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
756 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
757 pReq->Hdr.rc = VINF_SUCCESS;
758 return 0;
759 }
760
761 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_INSTALL):
762 {
763 /* validate */
764 PSUPIDTINSTALL pReq = (PSUPIDTINSTALL)pReqHdr;
765 REQ_CHECK_SIZES(SUP_IOCTL_IDT_INSTALL);
766
767 /* execute */
768#ifdef VBOX_WITH_IDT_PATCHING
769 pReq->Hdr.rc = supdrvIOCtl_IdtInstall(pDevExt, pSession, pReq);
770#else
771 pReq->u.Out.u8Idt = 3;
772 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
773#endif
774 return 0;
775 }
776
777 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_REMOVE):
778 {
779 /* validate */
780 PSUPIDTREMOVE pReq = (PSUPIDTREMOVE)pReqHdr;
781 REQ_CHECK_SIZES(SUP_IOCTL_IDT_REMOVE);
782
783 /* execute */
784#ifdef VBOX_WITH_IDT_PATCHING
785 pReq->Hdr.rc = supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
786#else
787 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
788#endif
789 return 0;
790 }
791
792 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
793 {
794 /* validate */
795 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
796 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
797 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
798 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
799 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
800
801 /* execute */
802 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
803 if (RT_FAILURE(pReq->Hdr.rc))
804 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
805 return 0;
806 }
807
808 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
809 {
810 /* validate */
811 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
812 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
813
814 /* execute */
815 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
816 return 0;
817 }
818
819 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
820 {
821 /* validate */
822 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
823 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
824
825 /* execute */
826 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
827 if (RT_FAILURE(pReq->Hdr.rc))
828 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
829 return 0;
830 }
831
832 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
833 {
834 /* validate */
835 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
836 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
837
838 /* execute */
839 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
840 return 0;
841 }
842
843 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
844 {
845 /* validate */
846 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
847 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
848 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage > 0);
849 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage < _1M*16);
850 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
851 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, memchr(pReq->u.In.szName, '\0', sizeof(pReq->u.In.szName)));
852 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !strpbrk(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
853
854 /* execute */
855 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
856 return 0;
857 }
858
859 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
860 {
861 /* validate */
862 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
863 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
864 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImage), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
865 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
866 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
867 || ( pReq->u.In.offSymbols < pReq->u.In.cbImage
868 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImage),
869 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImage=%#lx\n", (long)pReq->u.In.offSymbols,
870 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImage));
871 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
872 || ( pReq->u.In.offStrTab < pReq->u.In.cbImage
873 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImage
874 && pReq->u.In.cbStrTab <= pReq->u.In.cbImage),
875 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImage=%#lx\n", (long)pReq->u.In.offStrTab,
876 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImage));
877
878 if (pReq->u.In.cSymbols)
879 {
880 uint32_t i;
881 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.achImage[pReq->u.In.offSymbols];
882 for (i = 0; i < pReq->u.In.cSymbols; i++)
883 {
884 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImage,
885 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImage));
886 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
887 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
888 REQ_CHECK_EXPR_FMT(memchr(&pReq->u.In.achImage[pReq->u.In.offStrTab + paSyms[i].offName], '\0', pReq->u.In.cbStrTab - paSyms[i].offName),
889 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
890 }
891 }
892
893 /* execute */
894 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
895 return 0;
896 }
897
898 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
899 {
900 /* validate */
901 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
902 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
903
904 /* execute */
905 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
906 return 0;
907 }
908
909 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
910 {
911 /* validate */
912 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
913 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
914 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, memchr(pReq->u.In.szSymbol, '\0', sizeof(pReq->u.In.szSymbol)));
915
916 /* execute */
917 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
918 return 0;
919 }
920
921 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
922 {
923 /* validate */
924 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
925 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
926 {
927 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
928
929 /* execute */
930 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
931 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg);
932 else
933 pReq->Hdr.rc = VERR_WRONG_ORDER;
934 }
935 else
936 {
937 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
938 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
939 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#x\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
940 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
941 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
942
943 /* execute */
944 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
945 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg);
946 else
947 pReq->Hdr.rc = VERR_WRONG_ORDER;
948 }
949 return 0;
950 }
951
952 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
953 {
954 /* validate */
955 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
956 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
957
958 /* execute */
959 pReq->Hdr.rc = VINF_SUCCESS;
960 pReq->u.Out.enmMode = supdrvIOCtl_GetPagingMode();
961 return 0;
962 }
963
964 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
965 {
966 /* validate */
967 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
968 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
969 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
970
971 /* execute */
972 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
973 if (RT_FAILURE(pReq->Hdr.rc))
974 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
975 return 0;
976 }
977
978 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
979 {
980 /* validate */
981 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
982 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
983
984 /* execute */
985 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
986 return 0;
987 }
988
989 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
990 {
991 /* validate */
992 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
993 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
994
995 /* execute */
996 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
997 if (RT_SUCCESS(pReq->Hdr.rc))
998 pReq->u.Out.pGipR0 = pDevExt->pGip;
999 return 0;
1000 }
1001
1002 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1003 {
1004 /* validate */
1005 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1006 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1007
1008 /* execute */
1009 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1010 return 0;
1011 }
1012
1013 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1014 {
1015 /* validate */
1016 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1017 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1018 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1019 || ( VALID_PTR(pReq->u.In.pVMR0)
1020 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1021 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1022 /* execute */
1023 pSession->pVM = pReq->u.In.pVMR0;
1024 pReq->Hdr.rc = VINF_SUCCESS;
1025 return 0;
1026 }
1027
1028 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC):
1029 {
1030 /* validate */
1031 PSUPPAGEALLOC pReq = (PSUPPAGEALLOC)pReqHdr;
1032 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_SIZE_IN);
1033 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC, SUP_IOCTL_PAGE_ALLOC_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1034
1035 /* execute */
1036 pReq->Hdr.rc = SUPR0PageAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1037 if (RT_FAILURE(pReq->Hdr.rc))
1038 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1039 return 0;
1040 }
1041
1042 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1043 {
1044 /* validate */
1045 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1046 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1047
1048 /* execute */
1049 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1050 return 0;
1051 }
1052
1053 default:
1054 dprintf(("Unknown IOCTL %#lx\n", (long)uIOCtl));
1055 break;
1056 }
1057 return SUPDRV_ERR_GENERAL_FAILURE;
1058}
1059
1060
1061/**
1062 * Register a object for reference counting.
1063 * The object is registered with one reference in the specified session.
1064 *
1065 * @returns Unique identifier on success (pointer).
1066 * All future reference must use this identifier.
1067 * @returns NULL on failure.
1068 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
1069 * @param pvUser1 The first user argument.
1070 * @param pvUser2 The second user argument.
1071 */
1072SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
1073{
1074 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1075 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1076 PSUPDRVOBJ pObj;
1077 PSUPDRVUSAGE pUsage;
1078
1079 /*
1080 * Validate the input.
1081 */
1082 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
1083 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
1084 AssertPtrReturn(pfnDestructor, NULL);
1085
1086 /*
1087 * Allocate and initialize the object.
1088 */
1089 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
1090 if (!pObj)
1091 return NULL;
1092 pObj->u32Magic = SUPDRVOBJ_MAGIC;
1093 pObj->enmType = enmType;
1094 pObj->pNext = NULL;
1095 pObj->cUsage = 1;
1096 pObj->pfnDestructor = pfnDestructor;
1097 pObj->pvUser1 = pvUser1;
1098 pObj->pvUser2 = pvUser2;
1099 pObj->CreatorUid = pSession->Uid;
1100 pObj->CreatorGid = pSession->Gid;
1101 pObj->CreatorProcess= pSession->Process;
1102 supdrvOSObjInitCreator(pObj, pSession);
1103
1104 /*
1105 * Allocate the usage record.
1106 * (We keep freed usage records around to simplity SUPR0ObjAddRef().)
1107 */
1108 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1109
1110 pUsage = pDevExt->pUsageFree;
1111 if (pUsage)
1112 pDevExt->pUsageFree = pUsage->pNext;
1113 else
1114 {
1115 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1116 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
1117 if (!pUsage)
1118 {
1119 RTMemFree(pObj);
1120 return NULL;
1121 }
1122 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1123 }
1124
1125 /*
1126 * Insert the object and create the session usage record.
1127 */
1128 /* The object. */
1129 pObj->pNext = pDevExt->pObjs;
1130 pDevExt->pObjs = pObj;
1131
1132 /* The session record. */
1133 pUsage->cUsage = 1;
1134 pUsage->pObj = pObj;
1135 pUsage->pNext = pSession->pUsage;
1136 dprintf(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1137 pSession->pUsage = pUsage;
1138
1139 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1140
1141 dprintf(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
1142 return pObj;
1143}
1144
1145
1146/**
1147 * Increment the reference counter for the object associating the reference
1148 * with the specified session.
1149 *
1150 * @returns IPRT status code.
1151 * @param pvObj The identifier returned by SUPR0ObjRegister().
1152 * @param pSession The session which is referencing the object.
1153 */
1154SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
1155{
1156 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1157 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1158 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1159 PSUPDRVUSAGE pUsagePre;
1160 PSUPDRVUSAGE pUsage;
1161
1162 /*
1163 * Validate the input.
1164 */
1165 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1166 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1167 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1168 VERR_INVALID_PARAMETER);
1169
1170 /*
1171 * Preallocate the usage record.
1172 */
1173 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1174
1175 pUsagePre = pDevExt->pUsageFree;
1176 if (pUsagePre)
1177 pDevExt->pUsageFree = pUsagePre->pNext;
1178 else
1179 {
1180 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1181 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
1182 if (!pUsagePre)
1183 return VERR_NO_MEMORY;
1184 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1185 }
1186
1187 /*
1188 * Reference the object.
1189 */
1190 pObj->cUsage++;
1191
1192 /*
1193 * Look for the session record.
1194 */
1195 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
1196 {
1197 dprintf(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1198 if (pUsage->pObj == pObj)
1199 break;
1200 }
1201 if (pUsage)
1202 pUsage->cUsage++;
1203 else
1204 {
1205 /* create a new session record. */
1206 pUsagePre->cUsage = 1;
1207 pUsagePre->pObj = pObj;
1208 pUsagePre->pNext = pSession->pUsage;
1209 pSession->pUsage = pUsagePre;
1210 dprintf(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));
1211
1212 pUsagePre = NULL;
1213 }
1214
1215 /*
1216 * Put any unused usage record into the free list..
1217 */
1218 if (pUsagePre)
1219 {
1220 pUsagePre->pNext = pDevExt->pUsageFree;
1221 pDevExt->pUsageFree = pUsagePre;
1222 }
1223
1224 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1225
1226 return VINF_SUCCESS;
1227}
1228
1229
1230/**
1231 * Decrement / destroy a reference counter record for an object.
1232 *
1233 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
1234 *
1235 * @returns IPRT status code.
1236 * @param pvObj The identifier returned by SUPR0ObjRegister().
1237 * @param pSession The session which is referencing the object.
1238 */
1239SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
1240{
1241 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1242 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1243 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1244 bool fDestroy = false;
1245 PSUPDRVUSAGE pUsage;
1246 PSUPDRVUSAGE pUsagePrev;
1247
1248 /*
1249 * Validate the input.
1250 */
1251 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1252 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1253 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1254 VERR_INVALID_PARAMETER);
1255
1256 /*
1257 * Acquire the spinlock and look for the usage record.
1258 */
1259 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1260
1261 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
1262 pUsage;
1263 pUsagePrev = pUsage, pUsage = pUsage->pNext)
1264 {
1265 dprintf(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1266 if (pUsage->pObj == pObj)
1267 {
1268 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
1269 if (pUsage->cUsage > 1)
1270 {
1271 pObj->cUsage--;
1272 pUsage->cUsage--;
1273 }
1274 else
1275 {
1276 /*
1277 * Free the session record.
1278 */
1279 if (pUsagePrev)
1280 pUsagePrev->pNext = pUsage->pNext;
1281 else
1282 pSession->pUsage = pUsage->pNext;
1283 pUsage->pNext = pDevExt->pUsageFree;
1284 pDevExt->pUsageFree = pUsage;
1285
1286 /* What about the object? */
1287 if (pObj->cUsage > 1)
1288 pObj->cUsage--;
1289 else
1290 {
1291 /*
1292 * Object is to be destroyed, unlink it.
1293 */
1294 fDestroy = true;
1295 if (pDevExt->pObjs == pObj)
1296 pDevExt->pObjs = pObj->pNext;
1297 else
1298 {
1299 PSUPDRVOBJ pObjPrev;
1300 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
1301 if (pObjPrev->pNext == pObj)
1302 {
1303 pObjPrev->pNext = pObj->pNext;
1304 break;
1305 }
1306 Assert(pObjPrev);
1307 }
1308 }
1309 }
1310 break;
1311 }
1312 }
1313
1314 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1315
1316 /*
1317 * Call the destructor and free the object if required.
1318 */
1319 if (fDestroy)
1320 {
1321 pObj->u32Magic++;
1322 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
1323 RTMemFree(pObj);
1324 }
1325
1326 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
1327 return pUsage ? VINF_SUCCESS : VERR_INVALID_PARAMETER;
1328}
1329
1330/**
1331 * Verifies that the current process can access the specified object.
1332 *
1333 * @returns The following IPRT status code:
1334 * @retval VINF_SUCCESS if access was granted.
1335 * @retval VERR_PERMISSION_DENIED if denied access.
1336 * @retval VERR_INVALID_PARAMETER if invalid parameter.
1337 *
1338 * @param pvObj The identifier returned by SUPR0ObjRegister().
1339 * @param pSession The session which wishes to access the object.
1340 * @param pszObjName Object string name. This is optional and depends on the object type.
1341 *
1342 * @remark The caller is responsible for making sure the object isn't removed while
1343 * we're inside this function. If uncertain about this, just call AddRef before calling us.
1344 */
1345SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
1346{
1347 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1348 int rc;
1349
1350 /*
1351 * Validate the input.
1352 */
1353 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1354 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1355 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1356 VERR_INVALID_PARAMETER);
1357
1358 /*
1359 * Check access. (returns true if a decision has been made.)
1360 */
1361 rc = VERR_INTERNAL_ERROR;
1362 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
1363 return rc;
1364
1365 /*
1366 * Default policy is to allow the user to access his own
1367 * stuff but nothing else.
1368 */
1369 if (pObj->CreatorUid == pSession->Uid)
1370 return VINF_SUCCESS;
1371 return VERR_PERMISSION_DENIED;
1372}
1373
1374
1375/**
1376 * Lock pages.
1377 *
1378 * @returns IPRT status code.
1379 * @param pSession Session to which the locked memory should be associated.
1380 * @param pvR3 Start of the memory range to lock.
1381 * This must be page aligned.
1382 * @param cb Size of the memory range to lock.
1383 * This must be page aligned.
1384 */
1385SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
1386{
1387 int rc;
1388 SUPDRVMEMREF Mem = {0};
1389 const size_t cb = (size_t)cPages << PAGE_SHIFT;
1390 dprintf(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
1391
1392 /*
1393 * Verify input.
1394 */
1395 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1396 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
1397 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
1398 || !pvR3)
1399 {
1400 dprintf(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
1401 return VERR_INVALID_PARAMETER;
1402 }
1403
1404#ifdef RT_OS_WINDOWS /* A temporary hack for windows, will be removed once all ring-3 code has been cleaned up. */
1405 /* First check if we allocated it using SUPPageAlloc; if so then we don't need to lock it again */
1406 rc = supdrvPageGetPhys(pSession, pvR3, cPages, paPages);
1407 if (RT_SUCCESS(rc))
1408 return rc;
1409#endif
1410
1411 /*
1412 * Let IPRT do the job.
1413 */
1414 Mem.eType = MEMREF_TYPE_LOCKED;
1415 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTR0ProcHandleSelf());
1416 if (RT_SUCCESS(rc))
1417 {
1418 uint32_t iPage = cPages;
1419 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
1420 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
1421
1422 while (iPage-- > 0)
1423 {
1424 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1425 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
1426 {
1427 AssertMsgFailed(("iPage=%d\n", iPage));
1428 rc = VERR_INTERNAL_ERROR;
1429 break;
1430 }
1431 }
1432 if (RT_SUCCESS(rc))
1433 rc = supdrvMemAdd(&Mem, pSession);
1434 if (RT_FAILURE(rc))
1435 {
1436 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
1437 AssertRC(rc2);
1438 }
1439 }
1440
1441 return rc;
1442}
1443
1444
1445/**
1446 * Unlocks the memory pointed to by pv.
1447 *
1448 * @returns IPRT status code.
1449 * @param pSession Session to which the memory was locked.
1450 * @param pvR3 Memory to unlock.
1451 */
1452SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1453{
1454 dprintf(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1455 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1456#ifdef RT_OS_WINDOWS
1457 /*
1458 * Temporary hack for windows - SUPR0PageFree will unlock SUPR0PageAlloc
1459 * allocations; ignore this call.
1460 */
1461 if (supdrvPageWasLockedByPageAlloc(pSession, pvR3))
1462 {
1463 dprintf(("Page will be unlocked in SUPR0PageFree -> ignore\n"));
1464 return VINF_SUCCESS;
1465 }
1466#endif
1467 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
1468}
1469
1470
1471/**
1472 * Allocates a chunk of page aligned memory with contiguous and fixed physical
1473 * backing.
1474 *
1475 * @returns IPRT status code.
1476 * @param pSession Session data.
1477 * @param cb Number of bytes to allocate.
1478 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
1479 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
1480 * @param pHCPhys Where to put the physical address of allocated memory.
1481 */
1482SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
1483{
1484 int rc;
1485 SUPDRVMEMREF Mem = {0};
1486 dprintf(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
1487
1488 /*
1489 * Validate input.
1490 */
1491 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1492 if (!ppvR3 || !ppvR0 || !pHCPhys)
1493 {
1494 dprintf(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
1495 pSession, ppvR0, ppvR3, pHCPhys));
1496 return VERR_INVALID_PARAMETER;
1497
1498 }
1499 if (cPages < 1 || cPages >= 256)
1500 {
1501 dprintf(("Illegal request cPages=%d, must be greater than 0 and smaller than 256\n", cPages));
1502 return VERR_INVALID_PARAMETER;
1503 }
1504
1505 /*
1506 * Let IPRT do the job.
1507 */
1508 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
1509 if (RT_SUCCESS(rc))
1510 {
1511 int rc2;
1512 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1513 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1514 if (RT_SUCCESS(rc))
1515 {
1516 Mem.eType = MEMREF_TYPE_CONT;
1517 rc = supdrvMemAdd(&Mem, pSession);
1518 if (!rc)
1519 {
1520 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1521 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1522 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
1523 return 0;
1524 }
1525
1526 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1527 AssertRC(rc2);
1528 }
1529 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1530 AssertRC(rc2);
1531 }
1532
1533 return rc;
1534}
1535
1536
1537/**
1538 * Frees memory allocated using SUPR0ContAlloc().
1539 *
1540 * @returns IPRT status code.
1541 * @param pSession The session to which the memory was allocated.
1542 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1543 */
1544SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1545{
1546 dprintf(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1547 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1548 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
1549}
1550
1551
1552/**
1553 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
1554 *
1555 * @returns IPRT status code.
1556 * @param pSession Session data.
1557 * @param cPages Number of pages to allocate.
1558 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
1559 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
1560 * @param paPages Where to put the physical addresses of allocated memory.
1561 */
1562SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1563{
1564 unsigned iPage;
1565 int rc;
1566 SUPDRVMEMREF Mem = {0};
1567 dprintf(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
1568
1569 /*
1570 * Validate input.
1571 */
1572 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1573 if (!ppvR3 || !ppvR0 || !paPages)
1574 {
1575 dprintf(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
1576 pSession, ppvR3, ppvR0, paPages));
1577 return VERR_INVALID_PARAMETER;
1578
1579 }
1580 if (cPages < 1 || cPages > 256)
1581 {
1582 dprintf(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
1583 return VERR_INVALID_PARAMETER;
1584 }
1585
1586 /*
1587 * Let IPRT do the work.
1588 */
1589 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
1590 if (RT_SUCCESS(rc))
1591 {
1592 int rc2;
1593 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1594 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1595 if (RT_SUCCESS(rc))
1596 {
1597 Mem.eType = MEMREF_TYPE_LOW;
1598 rc = supdrvMemAdd(&Mem, pSession);
1599 if (!rc)
1600 {
1601 for (iPage = 0; iPage < cPages; iPage++)
1602 {
1603 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1604 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%VHp\n", paPages[iPage]));
1605 }
1606 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1607 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1608 return 0;
1609 }
1610
1611 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1612 AssertRC(rc2);
1613 }
1614
1615 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1616 AssertRC(rc2);
1617 }
1618
1619 return rc;
1620}
1621
1622
1623/**
1624 * Frees memory allocated using SUPR0LowAlloc().
1625 *
1626 * @returns IPRT status code.
1627 * @param pSession The session to which the memory was allocated.
1628 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1629 */
1630SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1631{
1632 dprintf(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1633 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1634 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
1635}
1636
1637
1638
1639/**
1640 * Allocates a chunk of memory with both R0 and R3 mappings.
1641 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1642 *
1643 * @returns IPRT status code.
1644 * @param pSession The session to associated the allocation with.
1645 * @param cb Number of bytes to allocate.
1646 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1647 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1648 */
1649SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1650{
1651 int rc;
1652 SUPDRVMEMREF Mem = {0};
1653 dprintf(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
1654
1655 /*
1656 * Validate input.
1657 */
1658 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1659 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
1660 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1661 if (cb < 1 || cb >= _4M)
1662 {
1663 dprintf(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
1664 return VERR_INVALID_PARAMETER;
1665 }
1666
1667 /*
1668 * Let IPRT do the work.
1669 */
1670 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
1671 if (RT_SUCCESS(rc))
1672 {
1673 int rc2;
1674 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1675 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1676 if (RT_SUCCESS(rc))
1677 {
1678 Mem.eType = MEMREF_TYPE_MEM;
1679 rc = supdrvMemAdd(&Mem, pSession);
1680 if (!rc)
1681 {
1682 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1683 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1684 return VINF_SUCCESS;
1685 }
1686 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1687 AssertRC(rc2);
1688 }
1689
1690 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1691 AssertRC(rc2);
1692 }
1693
1694 return rc;
1695}
1696
1697
1698/**
1699 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
1700 *
1701 * @returns IPRT status code.
1702 * @param pSession The session to which the memory was allocated.
1703 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1704 * @param paPages Where to store the physical addresses.
1705 */
1706SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
1707{
1708 PSUPDRVBUNDLE pBundle;
1709 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1710 dprintf(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
1711
1712 /*
1713 * Validate input.
1714 */
1715 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1716 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
1717 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
1718
1719 /*
1720 * Search for the address.
1721 */
1722 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1723 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1724 {
1725 if (pBundle->cUsed > 0)
1726 {
1727 unsigned i;
1728 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1729 {
1730 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
1731 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1732 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
1733 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1734 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
1735 )
1736 )
1737 {
1738 const unsigned cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
1739 unsigned iPage;
1740 for (iPage = 0; iPage < cPages; iPage++)
1741 {
1742 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
1743 paPages[iPage].uReserved = 0;
1744 }
1745 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1746 return VINF_SUCCESS;
1747 }
1748 }
1749 }
1750 }
1751 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1752 dprintf(("Failed to find %p!!!\n", (void *)uPtr));
1753 return VERR_INVALID_PARAMETER;
1754}
1755
1756
1757/**
1758 * Free memory allocated by SUPR0MemAlloc().
1759 *
1760 * @returns IPRT status code.
1761 * @param pSession The session owning the allocation.
1762 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1763 */
1764SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1765{
1766 dprintf(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1767 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1768 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
1769}
1770
1771
1772/**
1773 * Allocates a chunk of memory with only a R3 mappings.
1774 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1775 *
1776 * @returns IPRT status code.
1777 * @param pSession The session to associated the allocation with.
1778 * @param cPages The number of pages to allocate.
1779 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1780 * @param paPages Where to store the addresses of the pages. Optional.
1781 */
1782SUPR0DECL(int) SUPR0PageAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1783{
1784 int rc;
1785 SUPDRVMEMREF Mem = {0};
1786 dprintf(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
1787
1788 /*
1789 * Validate input.
1790 */
1791 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1792 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1793 if (cPages < 1 || cPages >= 4096)
1794 {
1795 dprintf(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than 16MB.\n", cPages));
1796 return VERR_INVALID_PARAMETER;
1797 }
1798
1799 /*
1800 * Let IPRT do the work.
1801 */
1802 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
1803 if (RT_SUCCESS(rc))
1804 {
1805 int rc2;
1806 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1807 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1808 if (RT_SUCCESS(rc))
1809 {
1810 Mem.eType = MEMREF_TYPE_LOCKED_SUP;
1811 rc = supdrvMemAdd(&Mem, pSession);
1812 if (!rc)
1813 {
1814 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1815 if (paPages)
1816 {
1817 uint32_t iPage = cPages;
1818 while (iPage-- > 0)
1819 {
1820 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
1821 Assert(paPages[iPage] != NIL_RTHCPHYS);
1822 }
1823 }
1824 return VINF_SUCCESS;
1825 }
1826 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1827 AssertRC(rc2);
1828 }
1829
1830 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1831 AssertRC(rc2);
1832 }
1833 return rc;
1834}
1835
1836
1837#ifdef RT_OS_WINDOWS
1838/**
1839 * Check if the pages were locked by SUPR0PageAlloc
1840 *
1841 * This function will be removed along with the lock/unlock hacks when
1842 * we've cleaned up the ring-3 code properly.
1843 *
1844 * @returns boolean
1845 * @param pSession The session to which the memory was allocated.
1846 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1847 */
1848static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1849{
1850 PSUPDRVBUNDLE pBundle;
1851 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1852 dprintf(("SUPR0PageIsLockedByPageAlloc: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1853
1854 /*
1855 * Search for the address.
1856 */
1857 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1858 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1859 {
1860 if (pBundle->cUsed > 0)
1861 {
1862 unsigned i;
1863 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1864 {
1865 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
1866 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1867 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1868 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
1869 {
1870 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1871 return true;
1872 }
1873 }
1874 }
1875 }
1876 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1877 return false;
1878}
1879
1880
1881/**
1882 * Get the physical addresses of memory allocated using SUPR0PageAlloc().
1883 *
1884 * This function will be removed along with the lock/unlock hacks when
1885 * we've cleaned up the ring-3 code properly.
1886 *
1887 * @returns IPRT status code.
1888 * @param pSession The session to which the memory was allocated.
1889 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1890 * @param cPages Number of pages in paPages
1891 * @param paPages Where to store the physical addresses.
1892 */
1893static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
1894{
1895 PSUPDRVBUNDLE pBundle;
1896 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1897 dprintf(("supdrvPageGetPhys: pSession=%p pvR3=%p cPages=%#lx paPages=%p\n", pSession, (void *)pvR3, (long)cPages, paPages));
1898
1899 /*
1900 * Search for the address.
1901 */
1902 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1903 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1904 {
1905 if (pBundle->cUsed > 0)
1906 {
1907 unsigned i;
1908 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1909 {
1910 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
1911 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1912 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1913 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
1914 {
1915 uint32_t iPage = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
1916 cPages = RT_MIN(iPage, cPages);
1917 for (iPage = 0; iPage < cPages; iPage++)
1918 paPages[iPage] = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
1919 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1920 return VINF_SUCCESS;
1921 }
1922 }
1923 }
1924 }
1925 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1926 return VERR_INVALID_PARAMETER;
1927}
1928#endif /* RT_OS_WINDOWS */
1929
1930
1931/**
1932 * Free memory allocated by SUPR0PageAlloc().
1933 *
1934 * @returns IPRT status code.
1935 * @param pSession The session owning the allocation.
1936 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1937 */
1938SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1939{
1940 dprintf(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1941 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1942 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED_SUP);
1943}
1944
1945
1946/**
1947 * Maps the GIP into userspace and/or get the physical address of the GIP.
1948 *
1949 * @returns IPRT status code.
1950 * @param pSession Session to which the GIP mapping should belong.
1951 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
1952 * @param pHCPhysGip Where to store the physical address. (optional)
1953 *
1954 * @remark There is no reference counting on the mapping, so one call to this function
1955 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
1956 * and remove the session as a GIP user.
1957 */
1958SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
1959{
1960 int rc = 0;
1961 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1962 RTR3PTR pGip = NIL_RTR3PTR;
1963 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1964 dprintf(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
1965
1966 /*
1967 * Validate
1968 */
1969 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1970 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
1971 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
1972
1973 RTSemFastMutexRequest(pDevExt->mtxGip);
1974 if (pDevExt->pGip)
1975 {
1976 /*
1977 * Map it?
1978 */
1979 if (ppGipR3)
1980 {
1981#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
1982 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
1983 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
1984 RTMEM_PROT_READ, RTR0ProcHandleSelf());
1985 if (RT_SUCCESS(rc))
1986 {
1987 pGip = RTR0MemObjAddressR3(pSession->GipMapObjR3);
1988 rc = VINF_SUCCESS; /** @todo remove this and replace the !rc below with RT_SUCCESS(rc). */
1989 }
1990#else /* !USE_NEW_OS_INTERFACE_FOR_GIP */
1991 if (!pSession->pGip)
1992 rc = supdrvOSGipMap(pSession->pDevExt, &pSession->pGip);
1993 if (!rc)
1994 pGip = (RTR3PTR)pSession->pGip;
1995#endif /* !USE_NEW_OS_INTERFACE_FOR_GIP */
1996 }
1997
1998 /*
1999 * Get physical address.
2000 */
2001 if (pHCPhysGip && !rc)
2002 HCPhys = pDevExt->HCPhysGip;
2003
2004 /*
2005 * Reference globally.
2006 */
2007 if (!pSession->fGipReferenced && !rc)
2008 {
2009 pSession->fGipReferenced = 1;
2010 pDevExt->cGipUsers++;
2011 if (pDevExt->cGipUsers == 1)
2012 {
2013 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2014 unsigned i;
2015
2016 dprintf(("SUPR0GipMap: Resumes GIP updating\n"));
2017
2018 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
2019 ASMAtomicXchgU32(&pGip->aCPUs[i].u32TransactionId, pGip->aCPUs[i].u32TransactionId & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
2020 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, 0);
2021
2022#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2023 rc = RTTimerStart(pDevExt->pGipTimer, 0);
2024 AssertRC(rc); rc = VINF_SUCCESS;
2025#else
2026 supdrvOSGipResume(pDevExt);
2027#endif
2028 }
2029 }
2030 }
2031 else
2032 {
2033 rc = SUPDRV_ERR_GENERAL_FAILURE;
2034 dprintf(("SUPR0GipMap: GIP is not available!\n"));
2035 }
2036 RTSemFastMutexRelease(pDevExt->mtxGip);
2037
2038 /*
2039 * Write returns.
2040 */
2041 if (pHCPhysGip)
2042 *pHCPhysGip = HCPhys;
2043 if (ppGipR3)
2044 *ppGipR3 = pGip;
2045
2046#ifdef DEBUG_DARWIN_GIP
2047 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGip=%p GipMapObjR3\n", rc, (unsigned long)HCPhys, pGip, pSession->GipMapObjR3));
2048#else
2049 dprintf(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)(uintptr_t)pGip));
2050#endif
2051 return rc;
2052}
2053
2054
2055/**
2056 * Unmaps any user mapping of the GIP and terminates all GIP access
2057 * from this session.
2058 *
2059 * @returns IPRT status code.
2060 * @param pSession Session to which the GIP mapping should belong.
2061 */
2062SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
2063{
2064 int rc = VINF_SUCCESS;
2065 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2066#ifdef DEBUG_DARWIN_GIP
2067 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
2068 pSession,
2069 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
2070 pSession->GipMapObjR3));
2071#else
2072 dprintf(("SUPR0GipUnmap: pSession=%p\n", pSession));
2073#endif
2074 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2075
2076 RTSemFastMutexRequest(pDevExt->mtxGip);
2077
2078 /*
2079 * Unmap anything?
2080 */
2081#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2082 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
2083 {
2084 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
2085 AssertRC(rc);
2086 if (RT_SUCCESS(rc))
2087 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
2088 }
2089#else
2090 if (pSession->pGip)
2091 {
2092 rc = supdrvOSGipUnmap(pDevExt, pSession->pGip);
2093 if (!rc)
2094 pSession->pGip = NULL;
2095 }
2096#endif
2097
2098 /*
2099 * Dereference global GIP.
2100 */
2101 if (pSession->fGipReferenced && !rc)
2102 {
2103 pSession->fGipReferenced = 0;
2104 if ( pDevExt->cGipUsers > 0
2105 && !--pDevExt->cGipUsers)
2106 {
2107 dprintf(("SUPR0GipUnmap: Suspends GIP updating\n"));
2108#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2109 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = 0;
2110#else
2111 supdrvOSGipSuspend(pDevExt);
2112#endif
2113 }
2114 }
2115
2116 RTSemFastMutexRelease(pDevExt->mtxGip);
2117
2118 return rc;
2119}
2120
2121
2122/**
2123 * Adds a memory object to the session.
2124 *
2125 * @returns IPRT status code.
2126 * @param pMem Memory tracking structure containing the
2127 * information to track.
2128 * @param pSession The session.
2129 */
2130static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
2131{
2132 PSUPDRVBUNDLE pBundle;
2133 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2134
2135 /*
2136 * Find free entry and record the allocation.
2137 */
2138 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2139 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2140 {
2141 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
2142 {
2143 unsigned i;
2144 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2145 {
2146 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
2147 {
2148 pBundle->cUsed++;
2149 pBundle->aMem[i] = *pMem;
2150 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2151 return VINF_SUCCESS;
2152 }
2153 }
2154 AssertFailed(); /* !!this can't be happening!!! */
2155 }
2156 }
2157 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2158
2159 /*
2160 * Need to allocate a new bundle.
2161 * Insert into the last entry in the bundle.
2162 */
2163 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
2164 if (!pBundle)
2165 return VERR_NO_MEMORY;
2166
2167 /* take last entry. */
2168 pBundle->cUsed++;
2169 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
2170
2171 /* insert into list. */
2172 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2173 pBundle->pNext = pSession->Bundle.pNext;
2174 pSession->Bundle.pNext = pBundle;
2175 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2176
2177 return VINF_SUCCESS;
2178}
2179
2180
2181/**
2182 * Releases a memory object referenced by pointer and type.
2183 *
2184 * @returns IPRT status code.
2185 * @param pSession Session data.
2186 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
2187 * @param eType Memory type.
2188 */
2189static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
2190{
2191 PSUPDRVBUNDLE pBundle;
2192 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2193
2194 /*
2195 * Validate input.
2196 */
2197 if (!uPtr)
2198 {
2199 dprintf(("Illegal address %p\n", (void *)uPtr));
2200 return VERR_INVALID_PARAMETER;
2201 }
2202
2203 /*
2204 * Search for the address.
2205 */
2206 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2207 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2208 {
2209 if (pBundle->cUsed > 0)
2210 {
2211 unsigned i;
2212 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2213 {
2214 if ( pBundle->aMem[i].eType == eType
2215 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2216 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
2217 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2218 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
2219 )
2220 {
2221 /* Make a copy of it and release it outside the spinlock. */
2222 SUPDRVMEMREF Mem = pBundle->aMem[i];
2223 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
2224 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
2225 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
2226 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2227
2228 if (Mem.MapObjR3)
2229 {
2230 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
2231 AssertRC(rc); /** @todo figure out how to handle this. */
2232 }
2233 if (Mem.MemObj)
2234 {
2235 int rc = RTR0MemObjFree(Mem.MemObj, false);
2236 AssertRC(rc); /** @todo figure out how to handle this. */
2237 }
2238 return VINF_SUCCESS;
2239 }
2240 }
2241 }
2242 }
2243 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2244 dprintf(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
2245 return VERR_INVALID_PARAMETER;
2246}
2247
2248
2249#ifdef VBOX_WITH_IDT_PATCHING
2250/**
2251 * Install IDT for the current CPU.
2252 *
2253 * @returns One of the following IPRT status codes:
2254 * @retval VINF_SUCCESS on success.
2255 * @retval VERR_IDT_FAILED.
2256 * @retval VERR_NO_MEMORY.
2257 * @param pDevExt The device extension.
2258 * @param pSession The session data.
2259 * @param pReq The request.
2260 */
2261static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq)
2262{
2263 PSUPDRVPATCHUSAGE pUsagePre;
2264 PSUPDRVPATCH pPatchPre;
2265 RTIDTR Idtr;
2266 PSUPDRVPATCH pPatch;
2267 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2268 dprintf(("supdrvIOCtl_IdtInstall\n"));
2269
2270 /*
2271 * Preallocate entry for this CPU cause we don't wanna do
2272 * that inside the spinlock!
2273 */
2274 pUsagePre = (PSUPDRVPATCHUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2275 if (!pUsagePre)
2276 return VERR_NO_MEMORY;
2277
2278 /*
2279 * Take the spinlock and see what we need to do.
2280 */
2281 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2282
2283 /* check if we already got a free patch. */
2284 if (!pDevExt->pIdtPatchesFree)
2285 {
2286 /*
2287 * Allocate a patch - outside the spinlock of course.
2288 */
2289 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2290
2291 pPatchPre = (PSUPDRVPATCH)RTMemExecAlloc(sizeof(*pPatchPre));
2292 if (!pPatchPre)
2293 return VERR_NO_MEMORY;
2294
2295 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2296 }
2297 else
2298 {
2299 pPatchPre = pDevExt->pIdtPatchesFree;
2300 pDevExt->pIdtPatchesFree = pPatchPre->pNext;
2301 }
2302
2303 /* look for matching patch entry */
2304 ASMGetIDTR(&Idtr);
2305 pPatch = pDevExt->pIdtPatches;
2306 while (pPatch && pPatch->pvIdt != (void *)Idtr.pIdt)
2307 pPatch = pPatch->pNext;
2308
2309 if (!pPatch)
2310 {
2311 /*
2312 * Create patch.
2313 */
2314 pPatch = supdrvIdtPatchOne(pDevExt, pPatchPre);
2315 if (pPatch)
2316 pPatchPre = NULL; /* mark as used. */
2317 }
2318 else
2319 {
2320 /*
2321 * Simply increment patch usage.
2322 */
2323 pPatch->cUsage++;
2324 }
2325
2326 if (pPatch)
2327 {
2328 /*
2329 * Increment and add if need be the session usage record for this patch.
2330 */
2331 PSUPDRVPATCHUSAGE pUsage = pSession->pPatchUsage;
2332 while (pUsage && pUsage->pPatch != pPatch)
2333 pUsage = pUsage->pNext;
2334
2335 if (!pUsage)
2336 {
2337 /*
2338 * Add usage record.
2339 */
2340 pUsagePre->cUsage = 1;
2341 pUsagePre->pPatch = pPatch;
2342 pUsagePre->pNext = pSession->pPatchUsage;
2343 pSession->pPatchUsage = pUsagePre;
2344 pUsagePre = NULL; /* mark as used. */
2345 }
2346 else
2347 {
2348 /*
2349 * Increment usage count.
2350 */
2351 pUsage->cUsage++;
2352 }
2353 }
2354
2355 /* free patch - we accumulate them for paranoid saftly reasons. */
2356 if (pPatchPre)
2357 {
2358 pPatchPre->pNext = pDevExt->pIdtPatchesFree;
2359 pDevExt->pIdtPatchesFree = pPatchPre;
2360 }
2361
2362 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2363
2364 /*
2365 * Free unused preallocated buffers.
2366 */
2367 if (pUsagePre)
2368 RTMemFree(pUsagePre);
2369
2370 pReq->u.Out.u8Idt = pDevExt->u8Idt;
2371
2372 return pPatch ? VINF_SUCCESS : VERR_IDT_FAILED;
2373}
2374
2375
2376/**
2377 * This creates a IDT patch entry.
2378 * If the first patch being installed it'll also determin the IDT entry
2379 * to use.
2380 *
2381 * @returns pPatch on success.
2382 * @returns NULL on failure.
2383 * @param pDevExt Pointer to globals.
2384 * @param pPatch Patch entry to use.
2385 * This will be linked into SUPDRVDEVEXT::pIdtPatches on
2386 * successful return.
2387 * @remark Call must be owning the SUPDRVDEVEXT::Spinlock!
2388 */
2389static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2390{
2391 RTIDTR Idtr;
2392 PSUPDRVIDTE paIdt;
2393 dprintf(("supdrvIOCtl_IdtPatchOne: pPatch=%p\n", pPatch));
2394
2395 /*
2396 * Get IDT.
2397 */
2398 ASMGetIDTR(&Idtr);
2399 paIdt = (PSUPDRVIDTE)Idtr.pIdt;
2400 /*
2401 * Recent Linux kernels can be configured to 1G user /3G kernel.
2402 */
2403 if ((uintptr_t)paIdt < 0x40000000)
2404 {
2405 AssertMsgFailed(("bad paIdt=%p\n", paIdt));
2406 return NULL;
2407 }
2408
2409 if (!pDevExt->u8Idt)
2410 {
2411 /*
2412 * Test out the alternatives.
2413 *
2414 * At the moment we do not support chaining thus we ASSUME that one of
2415 * these 48 entries is unused (which is not a problem on Win32 and
2416 * Linux to my knowledge).
2417 */
2418 /** @todo we MUST change this detection to try grab an entry which is NOT in use. This can be
2419 * combined with gathering info about which guest system call gates we can hook up directly. */
2420 unsigned i;
2421 uint8_t u8Idt = 0;
2422 static uint8_t au8Ints[] =
2423 {
2424#ifdef RT_OS_WINDOWS /* We don't use 0xef and above because they are system stuff on linux (ef is IPI,
2425 * local apic timer, or some other frequently fireing thing). */
2426 0xef, 0xee, 0xed, 0xec,
2427#endif
2428 0xeb, 0xea, 0xe9, 0xe8,
2429 0xdf, 0xde, 0xdd, 0xdc,
2430 0x7b, 0x7a, 0x79, 0x78,
2431 0xbf, 0xbe, 0xbd, 0xbc,
2432 };
2433#if defined(RT_ARCH_AMD64) && defined(DEBUG)
2434 static int s_iWobble = 0;
2435 unsigned iMax = !(s_iWobble++ % 2) ? 0x80 : 0x100;
2436 dprintf(("IDT: Idtr=%p:%#x\n", (void *)Idtr.pIdt, (unsigned)Idtr.cbIdt));
2437 for (i = iMax - 0x80; i*16+15 < Idtr.cbIdt && i < iMax; i++)
2438 {
2439 dprintf(("%#x: %04x:%08x%04x%04x P=%d DPL=%d IST=%d Type1=%#x u32Reserved=%#x u5Reserved=%#x\n",
2440 i, paIdt[i].u16SegSel, paIdt[i].u32OffsetTop, paIdt[i].u16OffsetHigh, paIdt[i].u16OffsetLow,
2441 paIdt[i].u1Present, paIdt[i].u2DPL, paIdt[i].u3IST, paIdt[i].u5Type2,
2442 paIdt[i].u32Reserved, paIdt[i].u5Reserved));
2443 }
2444#endif
2445 /* look for entries which are not present or otherwise unused. */
2446 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2447 {
2448 u8Idt = au8Ints[i];
2449 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2450 && ( !paIdt[u8Idt].u1Present
2451 || paIdt[u8Idt].u5Type2 == 0))
2452 break;
2453 u8Idt = 0;
2454 }
2455 if (!u8Idt)
2456 {
2457 /* try again, look for a compatible entry .*/
2458 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2459 {
2460 u8Idt = au8Ints[i];
2461 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2462 && paIdt[u8Idt].u1Present
2463 && paIdt[u8Idt].u5Type2 == SUPDRV_IDTE_TYPE2_INTERRUPT_GATE
2464 && !(paIdt[u8Idt].u16SegSel & 3))
2465 break;
2466 u8Idt = 0;
2467 }
2468 if (!u8Idt)
2469 {
2470 dprintf(("Failed to find appropirate IDT entry!!\n"));
2471 return NULL;
2472 }
2473 }
2474 pDevExt->u8Idt = u8Idt;
2475 dprintf(("supdrvIOCtl_IdtPatchOne: u8Idt=%x\n", u8Idt));
2476 }
2477
2478 /*
2479 * Prepare the patch
2480 */
2481 memset(pPatch, 0, sizeof(*pPatch));
2482 pPatch->pvIdt = paIdt;
2483 pPatch->cUsage = 1;
2484 pPatch->pIdtEntry = &paIdt[pDevExt->u8Idt];
2485 pPatch->SavedIdt = paIdt[pDevExt->u8Idt];
2486 pPatch->ChangedIdt.u16OffsetLow = (uint32_t)((uintptr_t)&pPatch->auCode[0] & 0xffff);
2487 pPatch->ChangedIdt.u16OffsetHigh = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 16);
2488#ifdef RT_ARCH_AMD64
2489 pPatch->ChangedIdt.u32OffsetTop = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 32);
2490#endif
2491 pPatch->ChangedIdt.u16SegSel = ASMGetCS();
2492#ifdef RT_ARCH_AMD64
2493 pPatch->ChangedIdt.u3IST = 0;
2494 pPatch->ChangedIdt.u5Reserved = 0;
2495#else /* x86 */
2496 pPatch->ChangedIdt.u5Reserved = 0;
2497 pPatch->ChangedIdt.u3Type1 = 0;
2498#endif /* x86 */
2499 pPatch->ChangedIdt.u5Type2 = SUPDRV_IDTE_TYPE2_INTERRUPT_GATE;
2500 pPatch->ChangedIdt.u2DPL = 3;
2501 pPatch->ChangedIdt.u1Present = 1;
2502
2503 /*
2504 * Generate the patch code.
2505 */
2506 {
2507#ifdef RT_ARCH_AMD64
2508 union
2509 {
2510 uint8_t *pb;
2511 uint32_t *pu32;
2512 uint64_t *pu64;
2513 } u, uFixJmp, uFixCall, uNotNested;
2514 u.pb = &pPatch->auCode[0];
2515
2516 /* check the cookie */
2517 *u.pb++ = 0x3d; // cmp eax, GLOBALCOOKIE
2518 *u.pu32++ = pDevExt->u32Cookie;
2519
2520 *u.pb++ = 0x74; // jz @VBoxCall
2521 *u.pb++ = 2;
2522
2523 /* jump to forwarder code. */
2524 *u.pb++ = 0xeb;
2525 uFixJmp = u;
2526 *u.pb++ = 0xfe;
2527
2528 // @VBoxCall:
2529 *u.pb++ = 0x0f; // swapgs
2530 *u.pb++ = 0x01;
2531 *u.pb++ = 0xf8;
2532
2533 /*
2534 * Call VMMR0Entry
2535 * We don't have to push the arguments here, but we have top
2536 * reserve some stack space for the interrupt forwarding.
2537 */
2538# ifdef RT_OS_WINDOWS
2539 *u.pb++ = 0x50; // push rax ; alignment filler.
2540 *u.pb++ = 0x41; // push r8 ; uArg
2541 *u.pb++ = 0x50;
2542 *u.pb++ = 0x52; // push rdx ; uOperation
2543 *u.pb++ = 0x51; // push rcx ; pVM
2544# else
2545 *u.pb++ = 0x51; // push rcx ; alignment filler.
2546 *u.pb++ = 0x52; // push rdx ; uArg
2547 *u.pb++ = 0x56; // push rsi ; uOperation
2548 *u.pb++ = 0x57; // push rdi ; pVM
2549# endif
2550
2551 *u.pb++ = 0xff; // call qword [pfnVMMR0EntryInt wrt rip]
2552 *u.pb++ = 0x15;
2553 uFixCall = u;
2554 *u.pu32++ = 0;
2555
2556 *u.pb++ = 0x48; // add rsp, 20h ; remove call frame.
2557 *u.pb++ = 0x81;
2558 *u.pb++ = 0xc4;
2559 *u.pu32++ = 0x20;
2560
2561 *u.pb++ = 0x0f; // swapgs
2562 *u.pb++ = 0x01;
2563 *u.pb++ = 0xf8;
2564
2565 /* Return to R3. */
2566 uNotNested = u;
2567 *u.pb++ = 0x48; // iretq
2568 *u.pb++ = 0xcf;
2569
2570 while ((uintptr_t)u.pb & 0x7) // align 8
2571 *u.pb++ = 0xcc;
2572
2573 /* Pointer to the VMMR0Entry. */ // pfnVMMR0EntryInt dq StubVMMR0Entry
2574 *uFixCall.pu32 = (uint32_t)(u.pb - uFixCall.pb - 4); uFixCall.pb = NULL;
2575 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2576 *u.pu64++ = pDevExt->pvVMMR0 ? (uint64_t)pDevExt->pfnVMMR0EntryInt : (uint64_t)u.pb + 8;
2577
2578 /* stub entry. */ // StubVMMR0Entry:
2579 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2580 *u.pb++ = 0x33; // xor eax, eax
2581 *u.pb++ = 0xc0;
2582
2583 *u.pb++ = 0x48; // dec rax
2584 *u.pb++ = 0xff;
2585 *u.pb++ = 0xc8;
2586
2587 *u.pb++ = 0xc3; // ret
2588
2589 /* forward to the original handler using a retf. */
2590 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1); uFixJmp.pb = NULL;
2591
2592 *u.pb++ = 0x68; // push <target cs>
2593 *u.pu32++ = !pPatch->SavedIdt.u5Type2 ? ASMGetCS() : pPatch->SavedIdt.u16SegSel;
2594
2595 *u.pb++ = 0x68; // push <low target rip>
2596 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2597 ? (uint32_t)(uintptr_t)uNotNested.pb
2598 : (uint32_t)pPatch->SavedIdt.u16OffsetLow
2599 | (uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16;
2600
2601 *u.pb++ = 0xc7; // mov dword [rsp + 4], <high target rip>
2602 *u.pb++ = 0x44;
2603 *u.pb++ = 0x24;
2604 *u.pb++ = 0x04;
2605 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2606 ? (uint32_t)((uint64_t)uNotNested.pb >> 32)
2607 : pPatch->SavedIdt.u32OffsetTop;
2608
2609 *u.pb++ = 0x48; // retf ; does this require prefix?
2610 *u.pb++ = 0xcb;
2611
2612#else /* RT_ARCH_X86 */
2613
2614 union
2615 {
2616 uint8_t *pb;
2617 uint16_t *pu16;
2618 uint32_t *pu32;
2619 } u, uFixJmpNotNested, uFixJmp, uFixCall, uNotNested;
2620 u.pb = &pPatch->auCode[0];
2621
2622 /* check the cookie */
2623 *u.pb++ = 0x81; // cmp esi, GLOBALCOOKIE
2624 *u.pb++ = 0xfe;
2625 *u.pu32++ = pDevExt->u32Cookie;
2626
2627 *u.pb++ = 0x74; // jz VBoxCall
2628 uFixJmp = u;
2629 *u.pb++ = 0;
2630
2631 /* jump (far) to the original handler / not-nested-stub. */
2632 *u.pb++ = 0xea; // jmp far NotNested
2633 uFixJmpNotNested = u;
2634 *u.pu32++ = 0;
2635 *u.pu16++ = 0;
2636
2637 /* save selector registers. */ // VBoxCall:
2638 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1);
2639 *u.pb++ = 0x0f; // push fs
2640 *u.pb++ = 0xa0;
2641
2642 *u.pb++ = 0x1e; // push ds
2643
2644 *u.pb++ = 0x06; // push es
2645
2646 /* call frame */
2647 *u.pb++ = 0x51; // push ecx
2648
2649 *u.pb++ = 0x52; // push edx
2650
2651 *u.pb++ = 0x50; // push eax
2652
2653 /* load ds, es and perhaps fs before call. */
2654 *u.pb++ = 0xb8; // mov eax, KernelDS
2655 *u.pu32++ = ASMGetDS();
2656
2657 *u.pb++ = 0x8e; // mov ds, eax
2658 *u.pb++ = 0xd8;
2659
2660 *u.pb++ = 0x8e; // mov es, eax
2661 *u.pb++ = 0xc0;
2662
2663#ifdef RT_OS_WINDOWS
2664 *u.pb++ = 0xb8; // mov eax, KernelFS
2665 *u.pu32++ = ASMGetFS();
2666
2667 *u.pb++ = 0x8e; // mov fs, eax
2668 *u.pb++ = 0xe0;
2669#endif
2670
2671 /* do the call. */
2672 *u.pb++ = 0xe8; // call _VMMR0Entry / StubVMMR0Entry
2673 uFixCall = u;
2674 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2675 *u.pu32++ = 0xfffffffb;
2676
2677 *u.pb++ = 0x83; // add esp, 0ch ; cdecl
2678 *u.pb++ = 0xc4;
2679 *u.pb++ = 0x0c;
2680
2681 /* restore selector registers. */
2682 *u.pb++ = 0x07; // pop es
2683 //
2684 *u.pb++ = 0x1f; // pop ds
2685
2686 *u.pb++ = 0x0f; // pop fs
2687 *u.pb++ = 0xa1;
2688
2689 uNotNested = u; // NotNested:
2690 *u.pb++ = 0xcf; // iretd
2691
2692 /* the stub VMMR0Entry. */ // StubVMMR0Entry:
2693 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2694 *u.pb++ = 0x33; // xor eax, eax
2695 *u.pb++ = 0xc0;
2696
2697 *u.pb++ = 0x48; // dec eax
2698
2699 *u.pb++ = 0xc3; // ret
2700
2701 /* Fixup the VMMR0Entry call. */
2702 if (pDevExt->pvVMMR0)
2703 *uFixCall.pu32 = (uint32_t)pDevExt->pfnVMMR0EntryInt - (uint32_t)(uFixCall.pu32 + 1);
2704 else
2705 *uFixCall.pu32 = (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)(uFixCall.pu32 + 1);
2706
2707 /* Fixup the forward / nested far jump. */
2708 if (!pPatch->SavedIdt.u5Type2)
2709 {
2710 *uFixJmpNotNested.pu32++ = (uint32_t)uNotNested.pb;
2711 *uFixJmpNotNested.pu16++ = ASMGetCS();
2712 }
2713 else
2714 {
2715 *uFixJmpNotNested.pu32++ = ((uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16) | pPatch->SavedIdt.u16OffsetLow;
2716 *uFixJmpNotNested.pu16++ = pPatch->SavedIdt.u16SegSel;
2717 }
2718#endif /* RT_ARCH_X86 */
2719 Assert(u.pb <= &pPatch->auCode[sizeof(pPatch->auCode)]);
2720#if 0
2721 /* dump the patch code */
2722 dprintf(("patch code: %p\n", &pPatch->auCode[0]));
2723 for (uFixCall.pb = &pPatch->auCode[0]; uFixCall.pb < u.pb; uFixCall.pb++)
2724 dprintf(("0x%02x,\n", *uFixCall.pb));
2725#endif
2726 }
2727
2728 /*
2729 * Install the patch.
2730 */
2731 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->ChangedIdt);
2732 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The stupid change code didn't work!!!!!\n"));
2733
2734 /*
2735 * Link in the patch.
2736 */
2737 pPatch->pNext = pDevExt->pIdtPatches;
2738 pDevExt->pIdtPatches = pPatch;
2739
2740 return pPatch;
2741}
2742
2743
2744/**
2745 * Removes the sessions IDT references.
2746 * This will uninstall our IDT patch if we left unreferenced.
2747 *
2748 * @returns VINF_SUCCESS.
2749 * @param pDevExt Device globals.
2750 * @param pSession Session data.
2751 */
2752static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
2753{
2754 PSUPDRVPATCHUSAGE pUsage;
2755 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2756 dprintf(("supdrvIOCtl_IdtRemoveAll: pSession=%p\n", pSession));
2757
2758 /*
2759 * Take the spinlock.
2760 */
2761 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2762
2763 /*
2764 * Walk usage list, removing patches as their usage count reaches zero.
2765 */
2766 pUsage = pSession->pPatchUsage;
2767 while (pUsage)
2768 {
2769 if (pUsage->pPatch->cUsage <= pUsage->cUsage)
2770 supdrvIdtRemoveOne(pDevExt, pUsage->pPatch);
2771 else
2772 pUsage->pPatch->cUsage -= pUsage->cUsage;
2773
2774 /* next */
2775 pUsage = pUsage->pNext;
2776 }
2777
2778 /*
2779 * Empty the usage chain and we're done inside the spinlock.
2780 */
2781 pUsage = pSession->pPatchUsage;
2782 pSession->pPatchUsage = NULL;
2783
2784 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2785
2786 /*
2787 * Free usage entries.
2788 */
2789 while (pUsage)
2790 {
2791 void *pvToFree = pUsage;
2792 pUsage->cUsage = 0;
2793 pUsage->pPatch = NULL;
2794 pUsage = pUsage->pNext;
2795 RTMemFree(pvToFree);
2796 }
2797
2798 return VINF_SUCCESS;
2799}
2800
2801
2802/**
2803 * Remove one patch.
2804 *
2805 * Worker for supdrvIOCtl_IdtRemoveAll.
2806 *
2807 * @param pDevExt Device globals.
2808 * @param pPatch Patch entry to remove.
2809 * @remark Caller must own SUPDRVDEVEXT::Spinlock!
2810 */
2811static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2812{
2813 dprintf(("supdrvIdtRemoveOne: pPatch=%p\n", pPatch));
2814
2815 pPatch->cUsage = 0;
2816
2817 /*
2818 * If the IDT entry was changed it have to kick around for ever!
2819 * This will be attempted freed again, perhaps next time we'll succeed :-)
2820 */
2821 if (memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)))
2822 {
2823 AssertMsgFailed(("The hijacked IDT entry has CHANGED!!!\n"));
2824 return;
2825 }
2826
2827 /*
2828 * Unlink it.
2829 */
2830 if (pDevExt->pIdtPatches != pPatch)
2831 {
2832 PSUPDRVPATCH pPatchPrev = pDevExt->pIdtPatches;
2833 while (pPatchPrev)
2834 {
2835 if (pPatchPrev->pNext == pPatch)
2836 {
2837 pPatchPrev->pNext = pPatch->pNext;
2838 break;
2839 }
2840 pPatchPrev = pPatchPrev->pNext;
2841 }
2842 Assert(!pPatchPrev);
2843 }
2844 else
2845 pDevExt->pIdtPatches = pPatch->pNext;
2846 pPatch->pNext = NULL;
2847
2848
2849 /*
2850 * Verify and restore the IDT.
2851 */
2852 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2853 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->SavedIdt);
2854 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->SavedIdt, sizeof(pPatch->SavedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2855
2856 /*
2857 * Put it in the free list.
2858 * (This free list stuff is to calm my paranoia.)
2859 */
2860 pPatch->pvIdt = NULL;
2861 pPatch->pIdtEntry = NULL;
2862
2863 pPatch->pNext = pDevExt->pIdtPatchesFree;
2864 pDevExt->pIdtPatchesFree = pPatch;
2865}
2866
2867
2868/**
2869 * Write to an IDT entry.
2870 *
2871 * @param pvIdtEntry Where to write.
2872 * @param pNewIDTEntry What to write.
2873 */
2874static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry)
2875{
2876 RTUINTREG uCR0;
2877 RTUINTREG uFlags;
2878
2879 /*
2880 * On SMP machines (P4 hyperthreading included) we must preform a
2881 * 64-bit locked write when updating the IDT entry.
2882 *
2883 * The F00F bugfix for linux (and probably other OSes) causes
2884 * the IDT to be pointing to an readonly mapping. We get around that
2885 * by temporarily turning of WP. Since we're inside a spinlock at this
2886 * point, interrupts are disabled and there isn't any way the WP bit
2887 * flipping can cause any trouble.
2888 */
2889
2890 /* Save & Clear interrupt flag; Save & clear WP. */
2891 uFlags = ASMGetFlags();
2892 ASMSetFlags(uFlags & ~(RTUINTREG)(1 << 9)); /*X86_EFL_IF*/
2893 Assert(!(ASMGetFlags() & (1 << 9)));
2894 uCR0 = ASMGetCR0();
2895 ASMSetCR0(uCR0 & ~(RTUINTREG)(1 << 16)); /*X86_CR0_WP*/
2896
2897 /* Update IDT Entry */
2898#ifdef RT_ARCH_AMD64
2899 ASMAtomicXchgU128((volatile uint128_t *)pvIdtEntry, *(uint128_t *)(uintptr_t)pNewIDTEntry);
2900#else
2901 ASMAtomicXchgU64((volatile uint64_t *)pvIdtEntry, *(uint64_t *)(uintptr_t)pNewIDTEntry);
2902#endif
2903
2904 /* Restore CR0 & Flags */
2905 ASMSetCR0(uCR0);
2906 ASMSetFlags(uFlags);
2907}
2908#endif /* VBOX_WITH_IDT_PATCHING */
2909
2910
2911/**
2912 * Opens an image. If it's the first time it's opened the call must upload
2913 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
2914 *
2915 * This is the 1st step of the loading.
2916 *
2917 * @returns IPRT status code.
2918 * @param pDevExt Device globals.
2919 * @param pSession Session data.
2920 * @param pReq The open request.
2921 */
2922static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
2923{
2924 PSUPDRVLDRIMAGE pImage;
2925 unsigned cb;
2926 void *pv;
2927 dprintf(("supdrvIOCtl_LdrOpen: szName=%s cbImage=%d\n", pReq->u.In.szName, pReq->u.In.cbImage));
2928
2929 /*
2930 * Check if we got an instance of the image already.
2931 */
2932 RTSemFastMutexRequest(pDevExt->mtxLdr);
2933 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
2934 {
2935 if (!strcmp(pImage->szName, pReq->u.In.szName))
2936 {
2937 pImage->cUsage++;
2938 pReq->u.Out.pvImageBase = pImage->pvImage;
2939 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
2940 supdrvLdrAddUsage(pSession, pImage);
2941 RTSemFastMutexRelease(pDevExt->mtxLdr);
2942 return VINF_SUCCESS;
2943 }
2944 }
2945 /* (not found - add it!) */
2946
2947 /*
2948 * Allocate memory.
2949 */
2950 cb = pReq->u.In.cbImage + sizeof(SUPDRVLDRIMAGE) + 31;
2951 pv = RTMemExecAlloc(cb);
2952 if (!pv)
2953 {
2954 RTSemFastMutexRelease(pDevExt->mtxLdr);
2955 return VERR_NO_MEMORY;
2956 }
2957
2958 /*
2959 * Setup and link in the LDR stuff.
2960 */
2961 pImage = (PSUPDRVLDRIMAGE)pv;
2962 pImage->pvImage = ALIGNP(pImage + 1, 32);
2963 pImage->cbImage = pReq->u.In.cbImage;
2964 pImage->pfnModuleInit = NULL;
2965 pImage->pfnModuleTerm = NULL;
2966 pImage->uState = SUP_IOCTL_LDR_OPEN;
2967 pImage->cUsage = 1;
2968 strcpy(pImage->szName, pReq->u.In.szName);
2969
2970 pImage->pNext = pDevExt->pLdrImages;
2971 pDevExt->pLdrImages = pImage;
2972
2973 supdrvLdrAddUsage(pSession, pImage);
2974
2975 pReq->u.Out.pvImageBase = pImage->pvImage;
2976 pReq->u.Out.fNeedsLoading = true;
2977 RTSemFastMutexRelease(pDevExt->mtxLdr);
2978 return VINF_SUCCESS;
2979}
2980
2981
2982/**
2983 * Loads the image bits.
2984 *
2985 * This is the 2nd step of the loading.
2986 *
2987 * @returns IPRT status code.
2988 * @param pDevExt Device globals.
2989 * @param pSession Session data.
2990 * @param pReq The request.
2991 */
2992static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
2993{
2994 PSUPDRVLDRUSAGE pUsage;
2995 PSUPDRVLDRIMAGE pImage;
2996 int rc;
2997 dprintf(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImage=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImage));
2998
2999 /*
3000 * Find the ldr image.
3001 */
3002 RTSemFastMutexRequest(pDevExt->mtxLdr);
3003 pUsage = pSession->pLdrUsage;
3004 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3005 pUsage = pUsage->pNext;
3006 if (!pUsage)
3007 {
3008 RTSemFastMutexRelease(pDevExt->mtxLdr);
3009 dprintf(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
3010 return VERR_INVALID_HANDLE;
3011 }
3012 pImage = pUsage->pImage;
3013 if (pImage->cbImage != pReq->u.In.cbImage)
3014 {
3015 RTSemFastMutexRelease(pDevExt->mtxLdr);
3016 dprintf(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load)\n", pImage->cbImage, pReq->u.In.cbImage));
3017 return VERR_INVALID_HANDLE;
3018 }
3019 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
3020 {
3021 unsigned uState = pImage->uState;
3022 RTSemFastMutexRelease(pDevExt->mtxLdr);
3023 if (uState != SUP_IOCTL_LDR_LOAD)
3024 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
3025 return SUPDRV_ERR_ALREADY_LOADED;
3026 }
3027 switch (pReq->u.In.eEPType)
3028 {
3029 case SUPLDRLOADEP_NOTHING:
3030 break;
3031 case SUPLDRLOADEP_VMMR0:
3032 if ( !pReq->u.In.EP.VMMR0.pvVMMR0
3033 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryInt
3034 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryFast
3035 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryEx)
3036 {
3037 RTSemFastMutexRelease(pDevExt->mtxLdr);
3038 dprintf(("NULL pointer: pvVMMR0=%p pvVMMR0EntryInt=%p pvVMMR0EntryFast=%p pvVMMR0EntryEx=%p!\n",
3039 pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3040 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3041 return VERR_INVALID_PARAMETER;
3042 }
3043 if ( (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryInt - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3044 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryFast - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3045 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryEx - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3046 {
3047 RTSemFastMutexRelease(pDevExt->mtxLdr);
3048 dprintf(("Out of range (%p LB %#x): pvVMMR0EntryInt=%p, pvVMMR0EntryFast=%p or pvVMMR0EntryEx=%p is NULL!\n",
3049 pImage->pvImage, pReq->u.In.cbImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3050 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3051 return VERR_INVALID_PARAMETER;
3052 }
3053 break;
3054 default:
3055 RTSemFastMutexRelease(pDevExt->mtxLdr);
3056 dprintf(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
3057 return VERR_INVALID_PARAMETER;
3058 }
3059 if ( pReq->u.In.pfnModuleInit
3060 && (uintptr_t)pReq->u.In.pfnModuleInit - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3061 {
3062 RTSemFastMutexRelease(pDevExt->mtxLdr);
3063 dprintf(("SUP_IOCTL_LDR_LOAD: pfnModuleInit=%p is outside the image (%p %d bytes)\n",
3064 pReq->u.In.pfnModuleInit, pImage->pvImage, pReq->u.In.cbImage));
3065 return VERR_INVALID_PARAMETER;
3066 }
3067 if ( pReq->u.In.pfnModuleTerm
3068 && (uintptr_t)pReq->u.In.pfnModuleTerm - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3069 {
3070 RTSemFastMutexRelease(pDevExt->mtxLdr);
3071 dprintf(("SUP_IOCTL_LDR_LOAD: pfnModuleTerm=%p is outside the image (%p %d bytes)\n",
3072 pReq->u.In.pfnModuleTerm, pImage->pvImage, pReq->u.In.cbImage));
3073 return VERR_INVALID_PARAMETER;
3074 }
3075
3076 /*
3077 * Copy the memory.
3078 */
3079 /* no need to do try/except as this is a buffered request. */
3080 memcpy(pImage->pvImage, &pReq->u.In.achImage[0], pImage->cbImage);
3081 pImage->uState = SUP_IOCTL_LDR_LOAD;
3082 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
3083 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
3084 pImage->offSymbols = pReq->u.In.offSymbols;
3085 pImage->cSymbols = pReq->u.In.cSymbols;
3086 pImage->offStrTab = pReq->u.In.offStrTab;
3087 pImage->cbStrTab = pReq->u.In.cbStrTab;
3088
3089 /*
3090 * Update any entry points.
3091 */
3092 switch (pReq->u.In.eEPType)
3093 {
3094 default:
3095 case SUPLDRLOADEP_NOTHING:
3096 rc = VINF_SUCCESS;
3097 break;
3098 case SUPLDRLOADEP_VMMR0:
3099 rc = supdrvLdrSetR0EP(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3100 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
3101 break;
3102 }
3103
3104 /*
3105 * On success call the module initialization.
3106 */
3107 dprintf(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
3108 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
3109 {
3110 dprintf(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
3111 rc = pImage->pfnModuleInit();
3112 if (rc && pDevExt->pvVMMR0 == pImage->pvImage)
3113 supdrvLdrUnsetR0EP(pDevExt);
3114 }
3115
3116 if (rc)
3117 pImage->uState = SUP_IOCTL_LDR_OPEN;
3118
3119 RTSemFastMutexRelease(pDevExt->mtxLdr);
3120 return rc;
3121}
3122
3123
3124/**
3125 * Frees a previously loaded (prep'ed) image.
3126 *
3127 * @returns IPRT status code.
3128 * @param pDevExt Device globals.
3129 * @param pSession Session data.
3130 * @param pReq The request.
3131 */
3132static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
3133{
3134 PSUPDRVLDRUSAGE pUsagePrev;
3135 PSUPDRVLDRUSAGE pUsage;
3136 PSUPDRVLDRIMAGE pImage;
3137 dprintf(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
3138
3139 /*
3140 * Find the ldr image.
3141 */
3142 RTSemFastMutexRequest(pDevExt->mtxLdr);
3143 pUsagePrev = NULL;
3144 pUsage = pSession->pLdrUsage;
3145 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3146 {
3147 pUsagePrev = pUsage;
3148 pUsage = pUsage->pNext;
3149 }
3150 if (!pUsage)
3151 {
3152 RTSemFastMutexRelease(pDevExt->mtxLdr);
3153 dprintf(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
3154 return VERR_INVALID_HANDLE;
3155 }
3156
3157 /*
3158 * Check if we can remove anything.
3159 */
3160 pImage = pUsage->pImage;
3161 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
3162 {
3163 /* unlink it */
3164 if (pUsagePrev)
3165 pUsagePrev->pNext = pUsage->pNext;
3166 else
3167 pSession->pLdrUsage = pUsage->pNext;
3168 /* free it */
3169 pUsage->pImage = NULL;
3170 pUsage->pNext = NULL;
3171 RTMemFree(pUsage);
3172
3173 /*
3174 * Derefrence the image.
3175 */
3176 if (pImage->cUsage <= 1)
3177 supdrvLdrFree(pDevExt, pImage);
3178 else
3179 pImage->cUsage--;
3180 }
3181 else
3182 {
3183 /*
3184 * Dereference both image and usage.
3185 */
3186 pImage->cUsage--;
3187 pUsage->cUsage--;
3188 }
3189
3190 RTSemFastMutexRelease(pDevExt->mtxLdr);
3191 return VINF_SUCCESS;
3192}
3193
3194
3195/**
3196 * Gets the address of a symbol in an open image.
3197 *
3198 * @returns 0 on success.
3199 * @returns SUPDRV_ERR_* on failure.
3200 * @param pDevExt Device globals.
3201 * @param pSession Session data.
3202 * @param pReq The request buffer.
3203 */
3204static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
3205{
3206 PSUPDRVLDRIMAGE pImage;
3207 PSUPDRVLDRUSAGE pUsage;
3208 uint32_t i;
3209 PSUPLDRSYM paSyms;
3210 const char *pchStrings;
3211 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
3212 void *pvSymbol = NULL;
3213 int rc = VERR_GENERAL_FAILURE;
3214 dprintf2(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
3215
3216 /*
3217 * Find the ldr image.
3218 */
3219 RTSemFastMutexRequest(pDevExt->mtxLdr);
3220 pUsage = pSession->pLdrUsage;
3221 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3222 pUsage = pUsage->pNext;
3223 if (!pUsage)
3224 {
3225 RTSemFastMutexRelease(pDevExt->mtxLdr);
3226 dprintf(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
3227 return VERR_INVALID_HANDLE;
3228 }
3229 pImage = pUsage->pImage;
3230 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
3231 {
3232 unsigned uState = pImage->uState;
3233 RTSemFastMutexRelease(pDevExt->mtxLdr);
3234 dprintf(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
3235 return VERR_ALREADY_LOADED;
3236 }
3237
3238 /*
3239 * Search the symbol string.
3240 */
3241 pchStrings = (const char *)((uint8_t *)pImage->pvImage + pImage->offStrTab);
3242 paSyms = (PSUPLDRSYM)((uint8_t *)pImage->pvImage + pImage->offSymbols);
3243 for (i = 0; i < pImage->cSymbols; i++)
3244 {
3245 if ( paSyms[i].offSymbol < pImage->cbImage /* paranoia */
3246 && paSyms[i].offName + cbSymbol <= pImage->cbStrTab
3247 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
3248 {
3249 pvSymbol = (uint8_t *)pImage->pvImage + paSyms[i].offSymbol;
3250 rc = VINF_SUCCESS;
3251 break;
3252 }
3253 }
3254 RTSemFastMutexRelease(pDevExt->mtxLdr);
3255 pReq->u.Out.pvSymbol = pvSymbol;
3256 return rc;
3257}
3258
3259
3260/**
3261 * Updates the IDT patches to point to the specified VMM R0 entry
3262 * point (i.e. VMMR0Enter()).
3263 *
3264 * @returns IPRT status code.
3265 * @param pDevExt Device globals.
3266 * @param pSession Session data.
3267 * @param pVMMR0 VMMR0 image handle.
3268 * @param pvVMMR0EntryInt VMMR0EntryInt address.
3269 * @param pvVMMR0EntryFast VMMR0EntryFast address.
3270 * @param pvVMMR0EntryEx VMMR0EntryEx address.
3271 * @remark Caller must own the loader mutex.
3272 */
3273static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
3274{
3275 int rc = VINF_SUCCESS;
3276 dprintf(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
3277
3278
3279 /*
3280 * Check if not yet set.
3281 */
3282 if (!pDevExt->pvVMMR0)
3283 {
3284#ifdef VBOX_WITH_IDT_PATCHING
3285 PSUPDRVPATCH pPatch;
3286#endif
3287
3288 /*
3289 * Set it and update IDT patch code.
3290 */
3291 pDevExt->pvVMMR0 = pvVMMR0;
3292 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
3293 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
3294 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
3295#ifdef VBOX_WITH_IDT_PATCHING
3296 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3297 {
3298# ifdef RT_ARCH_AMD64
3299 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup], (uint64_t)pvVMMR0);
3300# else /* RT_ARCH_X86 */
3301 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3302 (uint32_t)pvVMMR0 - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3303# endif
3304 }
3305#endif /* VBOX_WITH_IDT_PATCHING */
3306 }
3307 else
3308 {
3309 /*
3310 * Return failure or success depending on whether the values match or not.
3311 */
3312 if ( pDevExt->pvVMMR0 != pvVMMR0
3313 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
3314 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
3315 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
3316 {
3317 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
3318 rc = VERR_INVALID_PARAMETER;
3319 }
3320 }
3321 return rc;
3322}
3323
3324
3325/**
3326 * Unsets the R0 entry point installed by supdrvLdrSetR0EP.
3327 *
3328 * @param pDevExt Device globals.
3329 */
3330static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt)
3331{
3332#ifdef VBOX_WITH_IDT_PATCHING
3333 PSUPDRVPATCH pPatch;
3334#endif
3335
3336 pDevExt->pvVMMR0 = NULL;
3337 pDevExt->pfnVMMR0EntryInt = NULL;
3338 pDevExt->pfnVMMR0EntryFast = NULL;
3339 pDevExt->pfnVMMR0EntryEx = NULL;
3340
3341#ifdef VBOX_WITH_IDT_PATCHING
3342 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3343 {
3344# ifdef RT_ARCH_AMD64
3345 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3346 (uint64_t)&pPatch->auCode[pPatch->offStub]);
3347# else /* RT_ARCH_X86 */
3348 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3349 (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3350# endif
3351 }
3352#endif /* VBOX_WITH_IDT_PATCHING */
3353}
3354
3355
3356/**
3357 * Adds a usage reference in the specified session of an image.
3358 *
3359 * @param pSession Session in question.
3360 * @param pImage Image which the session is using.
3361 */
3362static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
3363{
3364 PSUPDRVLDRUSAGE pUsage;
3365 dprintf(("supdrvLdrAddUsage: pImage=%p\n", pImage));
3366
3367 /*
3368 * Referenced it already?
3369 */
3370 pUsage = pSession->pLdrUsage;
3371 while (pUsage)
3372 {
3373 if (pUsage->pImage == pImage)
3374 {
3375 pUsage->cUsage++;
3376 return;
3377 }
3378 pUsage = pUsage->pNext;
3379 }
3380
3381 /*
3382 * Allocate new usage record.
3383 */
3384 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
3385 Assert(pUsage);
3386 if (pUsage)
3387 {
3388 pUsage->cUsage = 1;
3389 pUsage->pImage = pImage;
3390 pUsage->pNext = pSession->pLdrUsage;
3391 pSession->pLdrUsage = pUsage;
3392 }
3393 /* ignore errors... */
3394}
3395
3396
3397/**
3398 * Frees a load image.
3399 *
3400 * @param pDevExt Pointer to device extension.
3401 * @param pImage Pointer to the image we're gonna free.
3402 * This image must exit!
3403 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
3404 */
3405static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
3406{
3407 PSUPDRVLDRIMAGE pImagePrev;
3408 dprintf(("supdrvLdrFree: pImage=%p\n", pImage));
3409
3410 /* find it - arg. should've used doubly linked list. */
3411 Assert(pDevExt->pLdrImages);
3412 pImagePrev = NULL;
3413 if (pDevExt->pLdrImages != pImage)
3414 {
3415 pImagePrev = pDevExt->pLdrImages;
3416 while (pImagePrev->pNext != pImage)
3417 pImagePrev = pImagePrev->pNext;
3418 Assert(pImagePrev->pNext == pImage);
3419 }
3420
3421 /* unlink */
3422 if (pImagePrev)
3423 pImagePrev->pNext = pImage->pNext;
3424 else
3425 pDevExt->pLdrImages = pImage->pNext;
3426
3427 /* check if this is VMMR0.r0 and fix the Idt patches if it is. */
3428 if (pDevExt->pvVMMR0 == pImage->pvImage)
3429 supdrvLdrUnsetR0EP(pDevExt);
3430
3431 /* call termination function if fully loaded. */
3432 if ( pImage->pfnModuleTerm
3433 && pImage->uState == SUP_IOCTL_LDR_LOAD)
3434 {
3435 dprintf(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
3436 pImage->pfnModuleTerm();
3437 }
3438
3439 /* free the image */
3440 pImage->cUsage = 0;
3441 pImage->pNext = 0;
3442 pImage->uState = SUP_IOCTL_LDR_FREE;
3443 RTMemExecFree(pImage);
3444}
3445
3446
3447/**
3448 * Gets the current paging mode of the CPU and stores in in pOut.
3449 */
3450static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void)
3451{
3452 SUPPAGINGMODE enmMode;
3453
3454 RTUINTREG cr0 = ASMGetCR0();
3455 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3456 enmMode = SUPPAGINGMODE_INVALID;
3457 else
3458 {
3459 RTUINTREG cr4 = ASMGetCR4();
3460 uint32_t fNXEPlusLMA = 0;
3461 if (cr4 & X86_CR4_PAE)
3462 {
3463 uint32_t fAmdFeatures = ASMCpuId_EDX(0x80000001);
3464 if (fAmdFeatures & (X86_CPUID_AMD_FEATURE_EDX_NX | X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
3465 {
3466 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3467 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3468 fNXEPlusLMA |= BIT(0);
3469 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3470 fNXEPlusLMA |= BIT(1);
3471 }
3472 }
3473
3474 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3475 {
3476 case 0:
3477 enmMode = SUPPAGINGMODE_32_BIT;
3478 break;
3479
3480 case X86_CR4_PGE:
3481 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3482 break;
3483
3484 case X86_CR4_PAE:
3485 enmMode = SUPPAGINGMODE_PAE;
3486 break;
3487
3488 case X86_CR4_PAE | BIT(0):
3489 enmMode = SUPPAGINGMODE_PAE_NX;
3490 break;
3491
3492 case X86_CR4_PAE | X86_CR4_PGE:
3493 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3494 break;
3495
3496 case X86_CR4_PAE | X86_CR4_PGE | BIT(0):
3497 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3498 break;
3499
3500 case BIT(1) | X86_CR4_PAE:
3501 enmMode = SUPPAGINGMODE_AMD64;
3502 break;
3503
3504 case BIT(1) | X86_CR4_PAE | BIT(0):
3505 enmMode = SUPPAGINGMODE_AMD64_NX;
3506 break;
3507
3508 case BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3509 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3510 break;
3511
3512 case BIT(1) | X86_CR4_PAE | X86_CR4_PGE | BIT(0):
3513 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3514 break;
3515
3516 default:
3517 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3518 enmMode = SUPPAGINGMODE_INVALID;
3519 break;
3520 }
3521 }
3522 return enmMode;
3523}
3524
3525
3526#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
3527/**
3528 * Creates the GIP.
3529 *
3530 * @returns negative errno.
3531 * @param pDevExt Instance data. GIP stuff may be updated.
3532 */
3533static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
3534{
3535 PSUPGLOBALINFOPAGE pGip;
3536 RTHCPHYS HCPhysGip;
3537 uint32_t u32SystemResolution;
3538 uint32_t u32Interval;
3539 int rc;
3540
3541 dprintf(("supdrvGipCreate:\n"));
3542
3543 /* assert order */
3544 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
3545 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
3546 Assert(!pDevExt->pGipTimer);
3547
3548 /*
3549 * Allocate a suitable page with a default kernel mapping.
3550 */
3551 rc = RTR0MemObjAllocLow(&pDevExt->GipMemObj, PAGE_SIZE, false);
3552 if (RT_FAILURE(rc))
3553 {
3554 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
3555 return rc;
3556 }
3557 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
3558 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
3559
3560 /*
3561 * Try bump up the system timer resolution.
3562 * The more interrupts the better...
3563 */
3564 if ( RT_SUCCESS(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3565 || RT_SUCCESS(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3566 || RT_SUCCESS(RTTimerRequestSystemGranularity( 3906250 /* 256 HZ */, &u32SystemResolution))
3567 || RT_SUCCESS(RTTimerRequestSystemGranularity( 4000000 /* 250 HZ */, &u32SystemResolution))
3568 || RT_SUCCESS(RTTimerRequestSystemGranularity( 7812500 /* 128 HZ */, &u32SystemResolution))
3569 || RT_SUCCESS(RTTimerRequestSystemGranularity(10000000 /* 100 HZ */, &u32SystemResolution))
3570 || RT_SUCCESS(RTTimerRequestSystemGranularity(15625000 /* 64 HZ */, &u32SystemResolution))
3571 || RT_SUCCESS(RTTimerRequestSystemGranularity(31250000 /* 32 HZ */, &u32SystemResolution))
3572 )
3573 {
3574 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3575 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3576 }
3577
3578 /*
3579 * Find a reasonable update interval, something close to 10ms would be nice,
3580 * and create a recurring timer.
3581 */
3582 u32Interval = u32SystemResolution = RTTimerGetSystemGranularity();
3583 while (u32Interval < 10000000 /* 10 ms */)
3584 u32Interval += u32SystemResolution;
3585
3586 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0, supdrvGipTimer, pDevExt);
3587 if (RT_FAILURE(rc))
3588 {
3589 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %RU32 ns interval. rc=%d\n", u32Interval, rc));
3590 Assert(!pDevExt->pGipTimer);
3591 supdrvGipDestroy(pDevExt);
3592 return rc;
3593 }
3594
3595 /*
3596 * We're good.
3597 */
3598 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), 1000000000 / u32Interval /*=Hz*/);
3599 return VINF_SUCCESS;
3600}
3601
3602
3603/**
3604 * Terminates the GIP.
3605 *
3606 * @param pDevExt Instance data. GIP stuff may be updated.
3607 */
3608static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
3609{
3610 int rc;
3611#ifdef DEBUG_DARWIN_GIP
3612 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
3613 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
3614 pDevExt->pGipTimer, pDevExt->GipMemObj));
3615#endif
3616
3617 /*
3618 * Invalid the GIP data.
3619 */
3620 if (pDevExt->pGip)
3621 {
3622 supdrvGipTerm(pDevExt->pGip);
3623 pDevExt->pGip = NULL;
3624 }
3625
3626 /*
3627 * Destroy the timer and free the GIP memory object.
3628 */
3629 if (pDevExt->pGipTimer)
3630 {
3631 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
3632 pDevExt->pGipTimer = NULL;
3633 }
3634
3635 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
3636 {
3637 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
3638 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
3639 }
3640
3641 /*
3642 * Finally, release the system timer resolution request if one succeeded.
3643 */
3644 if (pDevExt->u32SystemTimerGranularityGrant)
3645 {
3646 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
3647 pDevExt->u32SystemTimerGranularityGrant = 0;
3648 }
3649}
3650
3651
3652/**
3653 * Timer callback function.
3654 * @param pTimer The timer.
3655 * @param pvUser The device extension.
3656 */
3657static DECLCALLBACK(void) supdrvGipTimer(PRTTIMER pTimer, void *pvUser)
3658{
3659 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3660 supdrvGipUpdate(pDevExt->pGip, RTTimeSystemNanoTS());
3661}
3662#endif /* USE_NEW_OS_INTERFACE_FOR_GIP */
3663
3664
3665/**
3666 * Initializes the GIP data.
3667 *
3668 * @returns IPRT status code.
3669 * @param pDevExt Pointer to the device instance data.
3670 * @param pGip Pointer to the read-write kernel mapping of the GIP.
3671 * @param HCPhys The physical address of the GIP.
3672 * @param u64NanoTS The current nanosecond timestamp.
3673 * @param uUpdateHz The update freqence.
3674 */
3675int VBOXCALL supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS, unsigned uUpdateHz)
3676{
3677 unsigned i;
3678#ifdef DEBUG_DARWIN_GIP
3679 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3680#else
3681 dprintf(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3682#endif
3683
3684 /*
3685 * Initialize the structure.
3686 */
3687 memset(pGip, 0, PAGE_SIZE);
3688 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
3689 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
3690 pGip->u32Mode = supdrvGipDeterminTscMode();
3691 pGip->u32UpdateHz = uUpdateHz;
3692 pGip->u32UpdateIntervalNS = 1000000000 / uUpdateHz;
3693 pGip->u64NanoTSLastUpdateHz = u64NanoTS;
3694
3695 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
3696 {
3697 pGip->aCPUs[i].u32TransactionId = 2;
3698 pGip->aCPUs[i].u64NanoTS = u64NanoTS;
3699 pGip->aCPUs[i].u64TSC = ASMReadTSC();
3700
3701 /*
3702 * We don't know the following values until we've executed updates.
3703 * So, we'll just insert very high values.
3704 */
3705 pGip->aCPUs[i].u64CpuHz = _4G + 1;
3706 pGip->aCPUs[i].u32UpdateIntervalTSC = _2G / 4;
3707 pGip->aCPUs[i].au32TSCHistory[0] = _2G / 4;
3708 pGip->aCPUs[i].au32TSCHistory[1] = _2G / 4;
3709 pGip->aCPUs[i].au32TSCHistory[2] = _2G / 4;
3710 pGip->aCPUs[i].au32TSCHistory[3] = _2G / 4;
3711 pGip->aCPUs[i].au32TSCHistory[4] = _2G / 4;
3712 pGip->aCPUs[i].au32TSCHistory[5] = _2G / 4;
3713 pGip->aCPUs[i].au32TSCHistory[6] = _2G / 4;
3714 pGip->aCPUs[i].au32TSCHistory[7] = _2G / 4;
3715 }
3716
3717 /*
3718 * Link it to the device extension.
3719 */
3720 pDevExt->pGip = pGip;
3721 pDevExt->HCPhysGip = HCPhys;
3722 pDevExt->cGipUsers = 0;
3723
3724 return VINF_SUCCESS;
3725}
3726
3727
3728/**
3729 * Determin the GIP TSC mode.
3730 *
3731 * @returns The most suitable TSC mode.
3732 */
3733static SUPGIPMODE supdrvGipDeterminTscMode(void)
3734{
3735#ifndef USE_NEW_OS_INTERFACE_FOR_GIP
3736 /*
3737 * The problem here is that AMD processors with power management features
3738 * may easily end up with different TSCs because the CPUs or even cores
3739 * on the same physical chip run at different frequencies to save power.
3740 *
3741 * It is rumoured that this will be corrected with Barcelona and it's
3742 * expected that this will be indicated by the TscInvariant bit in
3743 * cpuid(0x80000007). So, the "difficult" bit here is to correctly
3744 * identify the older CPUs which don't do different frequency and
3745 * can be relied upon to have somewhat uniform TSC between the cpus.
3746 */
3747 if (supdrvOSGetCPUCount() > 1)
3748 {
3749 uint32_t uEAX, uEBX, uECX, uEDX;
3750
3751 /* Permit user users override. */
3752 if (supdrvOSGetForcedAsyncTscMode())
3753 return SUPGIPMODE_ASYNC_TSC;
3754
3755 /* Check for "AuthenticAMD" */
3756 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
3757 if (uEAX >= 1 && uEBX == 0x68747541 && uECX == 0x444d4163 && uEDX == 0x69746e65)
3758 {
3759 /* Check for APM support and that TscInvariant is cleared. */
3760 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
3761 if (uEAX >= 0x80000007)
3762 {
3763 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
3764 if ( !(uEDX & BIT(8))/* TscInvariant */
3765 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
3766 return SUPGIPMODE_ASYNC_TSC;
3767 }
3768 }
3769 }
3770#endif
3771 return SUPGIPMODE_SYNC_TSC;
3772}
3773
3774
3775/**
3776 * Invalidates the GIP data upon termination.
3777 *
3778 * @param pGip Pointer to the read-write kernel mapping of the GIP.
3779 */
3780void VBOXCALL supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
3781{
3782 unsigned i;
3783 pGip->u32Magic = 0;
3784 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
3785 {
3786 pGip->aCPUs[i].u64NanoTS = 0;
3787 pGip->aCPUs[i].u64TSC = 0;
3788 pGip->aCPUs[i].iTSCHistoryHead = 0;
3789 }
3790}
3791
3792
3793/**
3794 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
3795 * updates all the per cpu data except the transaction id.
3796 *
3797 * @param pGip The GIP.
3798 * @param pGipCpu Pointer to the per cpu data.
3799 * @param u64NanoTS The current time stamp.
3800 */
3801static void supdrvGipDoUpdateCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3802{
3803 uint64_t u64TSC;
3804 uint64_t u64TSCDelta;
3805 uint32_t u32UpdateIntervalTSC;
3806 uint32_t u32UpdateIntervalTSCSlack;
3807 unsigned iTSCHistoryHead;
3808 uint64_t u64CpuHz;
3809
3810 /*
3811 * Update the NanoTS.
3812 */
3813 ASMAtomicXchgU64(&pGipCpu->u64NanoTS, u64NanoTS);
3814
3815 /*
3816 * Calc TSC delta.
3817 */
3818 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
3819 u64TSC = ASMReadTSC();
3820 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
3821 ASMAtomicXchgU64(&pGipCpu->u64TSC, u64TSC);
3822
3823 if (u64TSCDelta >> 32)
3824 {
3825 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
3826 pGipCpu->cErrors++;
3827 }
3828
3829 /*
3830 * TSC History.
3831 */
3832 Assert(ELEMENTS(pGipCpu->au32TSCHistory) == 8);
3833
3834 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
3835 ASMAtomicXchgU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
3836 ASMAtomicXchgU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
3837
3838 /*
3839 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
3840 */
3841 if (pGip->u32UpdateHz >= 1000)
3842 {
3843 uint32_t u32;
3844 u32 = pGipCpu->au32TSCHistory[0];
3845 u32 += pGipCpu->au32TSCHistory[1];
3846 u32 += pGipCpu->au32TSCHistory[2];
3847 u32 += pGipCpu->au32TSCHistory[3];
3848 u32 >>= 2;
3849 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
3850 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
3851 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
3852 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
3853 u32UpdateIntervalTSC >>= 2;
3854 u32UpdateIntervalTSC += u32;
3855 u32UpdateIntervalTSC >>= 1;
3856
3857 /* Value choosen for a 2GHz Athlon64 running linux 2.6.10/11, . */
3858 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
3859 }
3860 else if (pGip->u32UpdateHz >= 90)
3861 {
3862 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
3863 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
3864 u32UpdateIntervalTSC >>= 1;
3865
3866 /* value choosen on a 2GHz thinkpad running windows */
3867 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
3868 }
3869 else
3870 {
3871 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
3872
3873 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
3874 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
3875 }
3876 ASMAtomicXchgU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
3877
3878 /*
3879 * CpuHz.
3880 */
3881 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, pGip->u32UpdateHz);
3882 ASMAtomicXchgU64(&pGipCpu->u64CpuHz, u64CpuHz);
3883}
3884
3885
3886/**
3887 * Updates the GIP.
3888 *
3889 * @param pGip Pointer to the GIP.
3890 * @param u64NanoTS The current nanosecond timesamp.
3891 */
3892void VBOXCALL supdrvGipUpdate(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS)
3893{
3894 /*
3895 * Determin the relevant CPU data.
3896 */
3897 PSUPGIPCPU pGipCpu;
3898 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
3899 pGipCpu = &pGip->aCPUs[0];
3900 else
3901 {
3902 unsigned iCpu = ASMGetApicId();
3903 if (RT_LIKELY(iCpu >= RT_ELEMENTS(pGip->aCPUs)))
3904 return;
3905 pGipCpu = &pGip->aCPUs[iCpu];
3906 }
3907
3908 /*
3909 * Start update transaction.
3910 */
3911 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
3912 {
3913 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
3914 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
3915 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
3916 pGipCpu->cErrors++;
3917 return;
3918 }
3919
3920 /*
3921 * Recalc the update frequency every 0x800th time.
3922 */
3923 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
3924 {
3925 if (pGip->u64NanoTSLastUpdateHz)
3926 {
3927#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
3928 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
3929 uint32_t u32UpdateHz = (uint32_t)((UINT64_C(1000000000) * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
3930 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
3931 {
3932 ASMAtomicXchgU32(&pGip->u32UpdateHz, u32UpdateHz);
3933 ASMAtomicXchgU32(&pGip->u32UpdateIntervalNS, 1000000000 / u32UpdateHz);
3934 }
3935#endif
3936 }
3937 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS);
3938 }
3939
3940 /*
3941 * Update the data.
3942 */
3943 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
3944
3945 /*
3946 * Complete transaction.
3947 */
3948 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
3949}
3950
3951
3952/**
3953 * Updates the per cpu GIP data for the calling cpu.
3954 *
3955 * @param pGip Pointer to the GIP.
3956 * @param u64NanoTS The current nanosecond timesamp.
3957 * @param iCpu The CPU index.
3958 */
3959void VBOXCALL supdrvGipUpdatePerCpu(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS, unsigned iCpu)
3960{
3961 PSUPGIPCPU pGipCpu;
3962
3963 if (RT_LIKELY(iCpu <= RT_ELEMENTS(pGip->aCPUs)))
3964 {
3965 pGipCpu = &pGip->aCPUs[iCpu];
3966
3967 /*
3968 * Start update transaction.
3969 */
3970 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
3971 {
3972 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
3973 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
3974 pGipCpu->cErrors++;
3975 return;
3976 }
3977
3978 /*
3979 * Update the data.
3980 */
3981 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
3982
3983 /*
3984 * Complete transaction.
3985 */
3986 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
3987 }
3988}
3989
3990
3991#ifndef DEBUG /** @todo change #ifndef DEBUG -> #ifdef LOG_ENABLED */
3992/**
3993 * Stub function for non-debug builds.
3994 */
3995RTDECL(PRTLOGGER) RTLogDefaultInstance(void)
3996{
3997 return NULL;
3998}
3999
4000RTDECL(PRTLOGGER) RTLogRelDefaultInstance(void)
4001{
4002 return NULL;
4003}
4004
4005/**
4006 * Stub function for non-debug builds.
4007 */
4008RTDECL(int) RTLogSetDefaultInstanceThread(PRTLOGGER pLogger, uintptr_t uKey)
4009{
4010 return 0;
4011}
4012
4013/**
4014 * Stub function for non-debug builds.
4015 */
4016RTDECL(void) RTLogLogger(PRTLOGGER pLogger, void *pvCallerRet, const char *pszFormat, ...)
4017{
4018}
4019
4020/**
4021 * Stub function for non-debug builds.
4022 */
4023RTDECL(void) RTLogLoggerEx(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, ...)
4024{
4025}
4026
4027/**
4028 * Stub function for non-debug builds.
4029 */
4030RTDECL(void) RTLogLoggerExV(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, va_list args)
4031{
4032}
4033
4034/**
4035 * Stub function for non-debug builds.
4036 */
4037RTDECL(void) RTLogPrintf(const char *pszFormat, ...)
4038{
4039}
4040
4041/**
4042 * Stub function for non-debug builds.
4043 */
4044RTDECL(void) RTLogPrintfV(const char *pszFormat, va_list args)
4045{
4046}
4047#endif /* !DEBUG */
4048
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette