VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 10255

Last change on this file since 10255 was 10255, checked in by vboxsync, 16 years ago

SUPDRVShared.c -> SUPDrv.c

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 150.8 KB
Line 
1/* $Revision: 10255 $ */
2/** @file
3 * VirtualBox Support Driver - Shared code.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 *
26 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
27 * Clara, CA 95054 USA or visit http://www.sun.com if you need
28 * additional information or have any questions.
29 */
30
31
32/*******************************************************************************
33* Header Files *
34*******************************************************************************/
35#include "SUPDrvInternal.h"
36#ifndef PAGE_SHIFT
37# include <iprt/param.h>
38#endif
39#include <iprt/alloc.h>
40#include <iprt/semaphore.h>
41#include <iprt/spinlock.h>
42#include <iprt/thread.h>
43#include <iprt/process.h>
44#include <iprt/mp.h>
45#include <iprt/cpuset.h>
46#include <iprt/log.h>
47/* VBox/x86.h not compatible with the Linux kernel sources */
48#ifdef RT_OS_LINUX
49# define X86_CPUID_VENDOR_AMD_EBX 0x68747541
50# define X86_CPUID_VENDOR_AMD_ECX 0x444d4163
51# define X86_CPUID_VENDOR_AMD_EDX 0x69746e65
52#else
53# include <VBox/x86.h>
54#endif
55
56/*
57 * Logging assignments:
58 * Log - useful stuff, like failures.
59 * LogFlow - program flow, except the really noisy bits.
60 * Log2 - Cleanup and IDTE
61 * Log3 - Loader flow noise.
62 * Log4 - Call VMMR0 flow noise.
63 * Log5 - Native yet-to-be-defined noise.
64 * Log6 - Native ioctl flow noise.
65 *
66 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
67 * instanciation in log-vbox.c(pp).
68 */
69
70
71/*******************************************************************************
72* Defined Constants And Macros *
73*******************************************************************************/
74/* from x86.h - clashes with linux thus this duplication */
75#undef X86_CR0_PG
76#define X86_CR0_PG RT_BIT(31)
77#undef X86_CR0_PE
78#define X86_CR0_PE RT_BIT(0)
79#undef X86_CPUID_AMD_FEATURE_EDX_NX
80#define X86_CPUID_AMD_FEATURE_EDX_NX RT_BIT(20)
81#undef MSR_K6_EFER
82#define MSR_K6_EFER 0xc0000080
83#undef MSR_K6_EFER_NXE
84#define MSR_K6_EFER_NXE RT_BIT(11)
85#undef MSR_K6_EFER_LMA
86#define MSR_K6_EFER_LMA RT_BIT(10)
87#undef X86_CR4_PGE
88#define X86_CR4_PGE RT_BIT(7)
89#undef X86_CR4_PAE
90#define X86_CR4_PAE RT_BIT(5)
91#undef X86_CPUID_AMD_FEATURE_EDX_LONG_MODE
92#define X86_CPUID_AMD_FEATURE_EDX_LONG_MODE RT_BIT(29)
93
94
95/** The frequency by which we recalculate the u32UpdateHz and
96 * u32UpdateIntervalNS GIP members. The value must be a power of 2. */
97#define GIP_UPDATEHZ_RECALC_FREQ 0x800
98
99/**
100 * Validates a session pointer.
101 *
102 * @returns true/false accordingly.
103 * @param pSession The session.
104 */
105#define SUP_IS_SESSION_VALID(pSession) \
106 ( VALID_PTR(pSession) \
107 && pSession->u32Cookie == BIRD_INV)
108
109
110/*******************************************************************************
111* Global Variables *
112*******************************************************************************/
113/**
114 * Array of the R0 SUP API.
115 */
116static SUPFUNC g_aFunctions[] =
117{
118 /* name function */
119 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
120 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
121 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
122 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
123 { "SUPR0LockMem", (void *)SUPR0LockMem },
124 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
125 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
126 { "SUPR0ContFree", (void *)SUPR0ContFree },
127 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
128 { "SUPR0LowFree", (void *)SUPR0LowFree },
129 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
130 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
131 { "SUPR0MemFree", (void *)SUPR0MemFree },
132 { "SUPR0PageAlloc", (void *)SUPR0PageAlloc },
133 { "SUPR0PageFree", (void *)SUPR0PageFree },
134 { "SUPR0Printf", (void *)SUPR0Printf },
135 { "RTMemAlloc", (void *)RTMemAlloc },
136 { "RTMemAllocZ", (void *)RTMemAllocZ },
137 { "RTMemFree", (void *)RTMemFree },
138 /*{ "RTMemDup", (void *)RTMemDup },*/
139 { "RTMemRealloc", (void *)RTMemRealloc },
140 { "RTR0MemObjAllocLow", (void *)RTR0MemObjAllocLow },
141 { "RTR0MemObjAllocPage", (void *)RTR0MemObjAllocPage },
142 { "RTR0MemObjAllocPhys", (void *)RTR0MemObjAllocPhys },
143 { "RTR0MemObjAllocPhysNC", (void *)RTR0MemObjAllocPhysNC },
144 { "RTR0MemObjAllocCont", (void *)RTR0MemObjAllocCont },
145 { "RTR0MemObjLockUser", (void *)RTR0MemObjLockUser },
146 { "RTR0MemObjMapKernel", (void *)RTR0MemObjMapKernel },
147 { "RTR0MemObjMapUser", (void *)RTR0MemObjMapUser },
148 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
149 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
150 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
151 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
152 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
153 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
154/* These don't work yet on linux - use fast mutexes!
155 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
156 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
157 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
158 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
159*/
160 { "RTProcSelf", (void *)RTProcSelf },
161 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
162 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
163 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
164 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
165 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
166 { "RTSemEventCreate", (void *)RTSemEventCreate },
167 { "RTSemEventSignal", (void *)RTSemEventSignal },
168 { "RTSemEventWait", (void *)RTSemEventWait },
169 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
170 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
171 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
172 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
173 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
174 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
175 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
176 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
177 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
178 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
179 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
180 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
181 { "RTSpinlockAcquireNoInts", (void *)RTSpinlockAcquireNoInts },
182 { "RTSpinlockReleaseNoInts", (void *)RTSpinlockReleaseNoInts },
183 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
184 { "RTThreadSleep", (void *)RTThreadSleep },
185 { "RTThreadYield", (void *)RTThreadYield },
186#if 0 /* Thread APIs, Part 2. */
187 { "RTThreadSelf", (void *)RTThreadSelf },
188 { "RTThreadCreate", (void *)RTThreadCreate },
189 { "RTThreadGetNative", (void *)RTThreadGetNative },
190 { "RTThreadWait", (void *)RTThreadWait },
191 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
192 { "RTThreadGetName", (void *)RTThreadGetName },
193 { "RTThreadSelfName", (void *)RTThreadSelfName },
194 { "RTThreadGetType", (void *)RTThreadGetType },
195 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
196 { "RTThreadUserReset", (void *)RTThreadUserReset },
197 { "RTThreadUserWait", (void *)RTThreadUserWait },
198 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
199#endif
200 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
201 { "RTMpCpuId", (void *)RTMpCpuId },
202 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
203 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
204 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
205 { "RTMpGetCount", (void *)RTMpGetCount },
206 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
207 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
208 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
209 { "RTMpGetSet", (void *)RTMpGetSet },
210 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
211 { "RTMpOnAll", (void *)RTMpOnAll },
212 { "RTMpOnOthers", (void *)RTMpOnOthers },
213 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
214 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
215 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
216 { "RTLogLogger", (void *)RTLogLogger },
217 { "RTLogLoggerEx", (void *)RTLogLoggerEx },
218 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
219 { "RTLogPrintf", (void *)RTLogPrintf },
220 { "RTLogPrintfV", (void *)RTLogPrintfV },
221 { "AssertMsg1", (void *)AssertMsg1 },
222 { "AssertMsg2", (void *)AssertMsg2 },
223};
224
225
226/*******************************************************************************
227* Internal Functions *
228*******************************************************************************/
229static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
230static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
231#ifdef VBOX_WITH_IDT_PATCHING
232static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq);
233static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
234static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession);
235static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
236static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry);
237#endif /* VBOX_WITH_IDT_PATCHING */
238static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
239static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
240static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
241static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
242static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
243static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt);
244static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
245static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
246static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void);
247static SUPGIPMODE supdrvGipDeterminTscMode(PSUPDRVDEVEXT pDevExt);
248#ifdef RT_OS_WINDOWS
249static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages);
250static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3);
251#endif /* RT_OS_WINDOWS */
252static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
253static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
254static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
255static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
256static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
257
258
259/**
260 * Initializes the device extentsion structure.
261 *
262 * @returns IPRT status code.
263 * @param pDevExt The device extension to initialize.
264 */
265int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt)
266{
267 /*
268 * Initialize it.
269 */
270 int rc;
271 memset(pDevExt, 0, sizeof(*pDevExt));
272 rc = RTSpinlockCreate(&pDevExt->Spinlock);
273 if (!rc)
274 {
275 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
276 if (!rc)
277 {
278 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
279 if (!rc)
280 {
281 rc = supdrvGipCreate(pDevExt);
282 if (RT_SUCCESS(rc))
283 {
284 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
285 return VINF_SUCCESS;
286 }
287
288 RTSemFastMutexDestroy(pDevExt->mtxGip);
289 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
290 }
291 RTSemFastMutexDestroy(pDevExt->mtxLdr);
292 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
293 }
294 RTSpinlockDestroy(pDevExt->Spinlock);
295 pDevExt->Spinlock = NIL_RTSPINLOCK;
296 }
297 return rc;
298}
299
300
301/**
302 * Delete the device extension (e.g. cleanup members).
303 *
304 * @param pDevExt The device extension to delete.
305 */
306void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
307{
308#ifdef VBOX_WITH_IDT_PATCHING
309 PSUPDRVPATCH pPatch;
310#endif
311 PSUPDRVOBJ pObj;
312 PSUPDRVUSAGE pUsage;
313
314 /*
315 * Kill mutexes and spinlocks.
316 */
317 RTSemFastMutexDestroy(pDevExt->mtxGip);
318 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
319 RTSemFastMutexDestroy(pDevExt->mtxLdr);
320 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
321 RTSpinlockDestroy(pDevExt->Spinlock);
322 pDevExt->Spinlock = NIL_RTSPINLOCK;
323
324 /*
325 * Free lists.
326 */
327#ifdef VBOX_WITH_IDT_PATCHING
328 /* patches */
329 /** @todo make sure we don't uninstall patches which has been patched by someone else. */
330 pPatch = pDevExt->pIdtPatchesFree;
331 pDevExt->pIdtPatchesFree = NULL;
332 while (pPatch)
333 {
334 void *pvFree = pPatch;
335 pPatch = pPatch->pNext;
336 RTMemExecFree(pvFree);
337 }
338#endif /* VBOX_WITH_IDT_PATCHING */
339
340 /* objects. */
341 pObj = pDevExt->pObjs;
342#if !defined(DEBUG_bird) || !defined(RT_OS_LINUX) /* breaks unloading, temporary, remove me! */
343 Assert(!pObj); /* (can trigger on forced unloads) */
344#endif
345 pDevExt->pObjs = NULL;
346 while (pObj)
347 {
348 void *pvFree = pObj;
349 pObj = pObj->pNext;
350 RTMemFree(pvFree);
351 }
352
353 /* usage records. */
354 pUsage = pDevExt->pUsageFree;
355 pDevExt->pUsageFree = NULL;
356 while (pUsage)
357 {
358 void *pvFree = pUsage;
359 pUsage = pUsage->pNext;
360 RTMemFree(pvFree);
361 }
362
363 /* kill the GIP */
364 supdrvGipDestroy(pDevExt);
365}
366
367
368/**
369 * Create session.
370 *
371 * @returns IPRT status code.
372 * @param pDevExt Device extension.
373 * @param ppSession Where to store the pointer to the session data.
374 */
375int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION *ppSession)
376{
377 /*
378 * Allocate memory for the session data.
379 */
380 int rc = VERR_NO_MEMORY;
381 PSUPDRVSESSION pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(sizeof(*pSession));
382 if (pSession)
383 {
384 /* Initialize session data. */
385 rc = RTSpinlockCreate(&pSession->Spinlock);
386 if (!rc)
387 {
388 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
389 pSession->pDevExt = pDevExt;
390 pSession->u32Cookie = BIRD_INV;
391 /*pSession->pLdrUsage = NULL;
392 pSession->pPatchUsage = NULL;
393 pSession->pUsage = NULL;
394 pSession->pGip = NULL;
395 pSession->fGipReferenced = false;
396 pSession->Bundle.cUsed = 0 */
397
398 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
399 return VINF_SUCCESS;
400 }
401
402 RTMemFree(pSession);
403 *ppSession = NULL;
404 Log(("Failed to create spinlock, rc=%d!\n", rc));
405 }
406
407 return rc;
408}
409
410
411/**
412 * Shared code for cleaning up a session.
413 *
414 * @param pDevExt Device extension.
415 * @param pSession Session data.
416 * This data will be freed by this routine.
417 */
418void VBOXCALL supdrvCloseSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
419{
420 /*
421 * Cleanup the session first.
422 */
423 supdrvCleanupSession(pDevExt, pSession);
424
425 /*
426 * Free the rest of the session stuff.
427 */
428 RTSpinlockDestroy(pSession->Spinlock);
429 pSession->Spinlock = NIL_RTSPINLOCK;
430 pSession->pDevExt = NULL;
431 RTMemFree(pSession);
432 LogFlow(("supdrvCloseSession: returns\n"));
433}
434
435
436/**
437 * Shared code for cleaning up a session (but not quite freeing it).
438 *
439 * This is primarily intended for MAC OS X where we have to clean up the memory
440 * stuff before the file handle is closed.
441 *
442 * @param pDevExt Device extension.
443 * @param pSession Session data.
444 * This data will be freed by this routine.
445 */
446void VBOXCALL supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
447{
448 PSUPDRVBUNDLE pBundle;
449 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
450
451 /*
452 * Remove logger instances related to this session.
453 */
454 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
455
456#ifdef VBOX_WITH_IDT_PATCHING
457 /*
458 * Uninstall any IDT patches installed for this session.
459 */
460 supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
461#endif
462
463 /*
464 * Release object references made in this session.
465 * In theory there should be noone racing us in this session.
466 */
467 Log2(("release objects - start\n"));
468 if (pSession->pUsage)
469 {
470 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
471 PSUPDRVUSAGE pUsage;
472 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
473
474 while ((pUsage = pSession->pUsage) != NULL)
475 {
476 PSUPDRVOBJ pObj = pUsage->pObj;
477 pSession->pUsage = pUsage->pNext;
478
479 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
480 if (pUsage->cUsage < pObj->cUsage)
481 {
482 pObj->cUsage -= pUsage->cUsage;
483 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
484 }
485 else
486 {
487 /* Destroy the object and free the record. */
488 if (pDevExt->pObjs == pObj)
489 pDevExt->pObjs = pObj->pNext;
490 else
491 {
492 PSUPDRVOBJ pObjPrev;
493 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
494 if (pObjPrev->pNext == pObj)
495 {
496 pObjPrev->pNext = pObj->pNext;
497 break;
498 }
499 Assert(pObjPrev);
500 }
501 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
502
503 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
504 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
505 if (pObj->pfnDestructor)
506 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
507 RTMemFree(pObj);
508 }
509
510 /* free it and continue. */
511 RTMemFree(pUsage);
512
513 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
514 }
515
516 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
517 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
518 }
519 Log2(("release objects - done\n"));
520
521 /*
522 * Release memory allocated in the session.
523 *
524 * We do not serialize this as we assume that the application will
525 * not allocated memory while closing the file handle object.
526 */
527 Log2(("freeing memory:\n"));
528 pBundle = &pSession->Bundle;
529 while (pBundle)
530 {
531 PSUPDRVBUNDLE pToFree;
532 unsigned i;
533
534 /*
535 * Check and unlock all entries in the bundle.
536 */
537 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
538 {
539 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
540 {
541 int rc;
542 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
543 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
544 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
545 {
546 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
547 AssertRC(rc); /** @todo figure out how to handle this. */
548 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
549 }
550 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, false);
551 AssertRC(rc); /** @todo figure out how to handle this. */
552 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
553 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
554 }
555 }
556
557 /*
558 * Advance and free previous bundle.
559 */
560 pToFree = pBundle;
561 pBundle = pBundle->pNext;
562
563 pToFree->pNext = NULL;
564 pToFree->cUsed = 0;
565 if (pToFree != &pSession->Bundle)
566 RTMemFree(pToFree);
567 }
568 Log2(("freeing memory - done\n"));
569
570 /*
571 * Loaded images needs to be dereferenced and possibly freed up.
572 */
573 RTSemFastMutexRequest(pDevExt->mtxLdr);
574 Log2(("freeing images:\n"));
575 if (pSession->pLdrUsage)
576 {
577 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
578 pSession->pLdrUsage = NULL;
579 while (pUsage)
580 {
581 void *pvFree = pUsage;
582 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
583 if (pImage->cUsage > pUsage->cUsage)
584 pImage->cUsage -= pUsage->cUsage;
585 else
586 supdrvLdrFree(pDevExt, pImage);
587 pUsage->pImage = NULL;
588 pUsage = pUsage->pNext;
589 RTMemFree(pvFree);
590 }
591 }
592 RTSemFastMutexRelease(pDevExt->mtxLdr);
593 Log2(("freeing images - done\n"));
594
595 /*
596 * Unmap the GIP.
597 */
598 Log2(("umapping GIP:\n"));
599 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
600 {
601 SUPR0GipUnmap(pSession);
602 pSession->fGipReferenced = 0;
603 }
604 Log2(("umapping GIP - done\n"));
605}
606
607
608/**
609 * Fast path I/O Control worker.
610 *
611 * @returns VBox status code that should be passed down to ring-3 unchanged.
612 * @param uIOCtl Function number.
613 * @param pDevExt Device extention.
614 * @param pSession Session data.
615 */
616int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
617{
618 int rc;
619
620 /*
621 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
622 */
623 if (RT_LIKELY(pSession->pVM && pDevExt->pfnVMMR0EntryFast))
624 {
625 switch (uIOCtl)
626 {
627 case SUP_IOCTL_FAST_DO_RAW_RUN:
628 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_RAW_RUN);
629 break;
630 case SUP_IOCTL_FAST_DO_HWACC_RUN:
631 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_HWACC_RUN);
632 break;
633 case SUP_IOCTL_FAST_DO_NOP:
634 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_NOP);
635 break;
636 default:
637 rc = VERR_INTERNAL_ERROR;
638 break;
639 }
640 }
641 else
642 rc = VERR_INTERNAL_ERROR;
643
644 return rc;
645}
646
647
648/**
649 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
650 * We would use strpbrk here if this function would be contained in the RedHat kABI white
651 * list, see http://www.kerneldrivers.org/RHEL5.
652 *
653 * @return 1 if pszStr does contain any character of pszChars, 0 otherwise.
654 * @param pszStr String to check
655 * @param pszChars Character set
656 */
657static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
658{
659 int chCur;
660 while ((chCur = *pszStr++) != '\0')
661 {
662 int ch;
663 const char *psz = pszChars;
664 while ((ch = *psz++) != '\0')
665 if (ch == chCur)
666 return 1;
667
668 }
669 return 0;
670}
671
672
673/**
674 * I/O Control worker.
675 *
676 * @returns 0 on success.
677 * @returns VERR_INVALID_PARAMETER if the request is invalid.
678 *
679 * @param uIOCtl Function number.
680 * @param pDevExt Device extention.
681 * @param pSession Session data.
682 * @param pReqHdr The request header.
683 */
684int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
685{
686 /*
687 * Validate the request.
688 */
689 /* this first check could probably be omitted as its also done by the OS specific code... */
690 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
691 || pReqHdr->cbIn < sizeof(*pReqHdr)
692 || pReqHdr->cbOut < sizeof(*pReqHdr)))
693 {
694 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
695 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
696 return VERR_INVALID_PARAMETER;
697 }
698 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
699 {
700 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
701 {
702 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
703 return VERR_INVALID_PARAMETER;
704 }
705 }
706 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
707 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
708 {
709 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
710 return VERR_INVALID_PARAMETER;
711 }
712
713/*
714 * Validation macros
715 */
716#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
717 do { \
718 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
719 { \
720 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
721 (long)pReq->Hdr.cbIn, (long)(cbInExpect), (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
722 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
723 } \
724 } while (0)
725
726#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
727
728#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
729 do { \
730 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
731 { \
732 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
733 (long)pReq->Hdr.cbIn, (long)(cbInExpect))); \
734 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
735 } \
736 } while (0)
737
738#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
739 do { \
740 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
741 { \
742 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
743 (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
744 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
745 } \
746 } while (0)
747
748#define REQ_CHECK_EXPR(Name, expr) \
749 do { \
750 if (RT_UNLIKELY(!(expr))) \
751 { \
752 OSDBGPRINT(( #Name ": %s\n", #expr)); \
753 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
754 } \
755 } while (0)
756
757#define REQ_CHECK_EXPR_FMT(expr, fmt) \
758 do { \
759 if (RT_UNLIKELY(!(expr))) \
760 { \
761 OSDBGPRINT( fmt ); \
762 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
763 } \
764 } while (0)
765
766
767 /*
768 * The switch.
769 */
770 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
771 {
772 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
773 {
774 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
775 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
776 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
777 {
778 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
779 pReq->Hdr.rc = VERR_INVALID_MAGIC;
780 return 0;
781 }
782
783#if 0
784 /*
785 * Call out to the OS specific code and let it do permission checks on the
786 * client process.
787 */
788 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
789 {
790 pReq->u.Out.u32Cookie = 0xffffffff;
791 pReq->u.Out.u32SessionCookie = 0xffffffff;
792 pReq->u.Out.u32SessionVersion = 0xffffffff;
793 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
794 pReq->u.Out.pSession = NULL;
795 pReq->u.Out.cFunctions = 0;
796 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
797 return 0;
798 }
799#endif
800
801 /*
802 * Match the version.
803 * The current logic is very simple, match the major interface version.
804 */
805 if ( pReq->u.In.u32MinVersion > SUPDRVIOC_VERSION
806 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRVIOC_VERSION & 0xffff0000))
807 {
808 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
809 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRVIOC_VERSION));
810 pReq->u.Out.u32Cookie = 0xffffffff;
811 pReq->u.Out.u32SessionCookie = 0xffffffff;
812 pReq->u.Out.u32SessionVersion = 0xffffffff;
813 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
814 pReq->u.Out.pSession = NULL;
815 pReq->u.Out.cFunctions = 0;
816 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
817 return 0;
818 }
819
820 /*
821 * Fill in return data and be gone.
822 * N.B. The first one to change SUPDRVIOC_VERSION shall makes sure that
823 * u32SessionVersion <= u32ReqVersion!
824 */
825 /** @todo Somehow validate the client and negotiate a secure cookie... */
826 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
827 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
828 pReq->u.Out.u32SessionVersion = SUPDRVIOC_VERSION;
829 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
830 pReq->u.Out.pSession = pSession;
831 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
832 pReq->Hdr.rc = VINF_SUCCESS;
833 return 0;
834 }
835
836 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
837 {
838 /* validate */
839 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
840 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
841
842 /* execute */
843 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
844 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
845 pReq->Hdr.rc = VINF_SUCCESS;
846 return 0;
847 }
848
849 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_INSTALL):
850 {
851 /* validate */
852 PSUPIDTINSTALL pReq = (PSUPIDTINSTALL)pReqHdr;
853 REQ_CHECK_SIZES(SUP_IOCTL_IDT_INSTALL);
854
855 /* execute */
856#ifdef VBOX_WITH_IDT_PATCHING
857 pReq->Hdr.rc = supdrvIOCtl_IdtInstall(pDevExt, pSession, pReq);
858#else
859 pReq->u.Out.u8Idt = 3;
860 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
861#endif
862 return 0;
863 }
864
865 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_REMOVE):
866 {
867 /* validate */
868 PSUPIDTREMOVE pReq = (PSUPIDTREMOVE)pReqHdr;
869 REQ_CHECK_SIZES(SUP_IOCTL_IDT_REMOVE);
870
871 /* execute */
872#ifdef VBOX_WITH_IDT_PATCHING
873 pReq->Hdr.rc = supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
874#else
875 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
876#endif
877 return 0;
878 }
879
880 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
881 {
882 /* validate */
883 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
884 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
885 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
886 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
887 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
888
889 /* execute */
890 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
891 if (RT_FAILURE(pReq->Hdr.rc))
892 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
893 return 0;
894 }
895
896 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
897 {
898 /* validate */
899 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
900 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
901
902 /* execute */
903 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
904 return 0;
905 }
906
907 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
908 {
909 /* validate */
910 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
911 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
912
913 /* execute */
914 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
915 if (RT_FAILURE(pReq->Hdr.rc))
916 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
917 return 0;
918 }
919
920 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
921 {
922 /* validate */
923 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
924 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
925
926 /* execute */
927 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
928 return 0;
929 }
930
931 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
932 {
933 /* validate */
934 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
935 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
936 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage > 0);
937 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage < _1M*16);
938 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
939 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, memchr(pReq->u.In.szName, '\0', sizeof(pReq->u.In.szName)));
940 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
941
942 /* execute */
943 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
944 return 0;
945 }
946
947 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
948 {
949 /* validate */
950 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
951 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
952 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImage), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
953 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
954 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
955 || ( pReq->u.In.offSymbols < pReq->u.In.cbImage
956 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImage),
957 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImage=%#lx\n", (long)pReq->u.In.offSymbols,
958 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImage));
959 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
960 || ( pReq->u.In.offStrTab < pReq->u.In.cbImage
961 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImage
962 && pReq->u.In.cbStrTab <= pReq->u.In.cbImage),
963 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImage=%#lx\n", (long)pReq->u.In.offStrTab,
964 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImage));
965
966 if (pReq->u.In.cSymbols)
967 {
968 uint32_t i;
969 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.achImage[pReq->u.In.offSymbols];
970 for (i = 0; i < pReq->u.In.cSymbols; i++)
971 {
972 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImage,
973 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImage));
974 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
975 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
976 REQ_CHECK_EXPR_FMT(memchr(&pReq->u.In.achImage[pReq->u.In.offStrTab + paSyms[i].offName], '\0', pReq->u.In.cbStrTab - paSyms[i].offName),
977 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
978 }
979 }
980
981 /* execute */
982 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
983 return 0;
984 }
985
986 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
987 {
988 /* validate */
989 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
990 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
991
992 /* execute */
993 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
994 return 0;
995 }
996
997 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
998 {
999 /* validate */
1000 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1001 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1002 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, memchr(pReq->u.In.szSymbol, '\0', sizeof(pReq->u.In.szSymbol)));
1003
1004 /* execute */
1005 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1006 return 0;
1007 }
1008
1009 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1010 {
1011 /* validate */
1012 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1013 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1014 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1015
1016 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1017 {
1018 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1019
1020 /* execute */
1021 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1022 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg);
1023 else
1024 pReq->Hdr.rc = VERR_WRONG_ORDER;
1025 }
1026 else
1027 {
1028 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1029 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1030 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#x\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1031 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1032 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1033
1034 /* execute */
1035 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1036 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg);
1037 else
1038 pReq->Hdr.rc = VERR_WRONG_ORDER;
1039 }
1040
1041 if ( RT_FAILURE(pReq->Hdr.rc)
1042 && pReq->Hdr.rc != VERR_INTERRUPTED
1043 && pReq->Hdr.rc != VERR_TIMEOUT)
1044 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1045 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1046 else
1047 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1048 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1049 return 0;
1050 }
1051
1052 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1053 {
1054 /* validate */
1055 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1056 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1057
1058 /* execute */
1059 pReq->Hdr.rc = VINF_SUCCESS;
1060 pReq->u.Out.enmMode = supdrvIOCtl_GetPagingMode();
1061 return 0;
1062 }
1063
1064 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1065 {
1066 /* validate */
1067 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1068 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1069 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1070
1071 /* execute */
1072 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1073 if (RT_FAILURE(pReq->Hdr.rc))
1074 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1075 return 0;
1076 }
1077
1078 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1079 {
1080 /* validate */
1081 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1082 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1083
1084 /* execute */
1085 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1086 return 0;
1087 }
1088
1089 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1090 {
1091 /* validate */
1092 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1093 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1094
1095 /* execute */
1096 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1097 if (RT_SUCCESS(pReq->Hdr.rc))
1098 pReq->u.Out.pGipR0 = pDevExt->pGip;
1099 return 0;
1100 }
1101
1102 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1103 {
1104 /* validate */
1105 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1106 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1107
1108 /* execute */
1109 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1110 return 0;
1111 }
1112
1113 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1114 {
1115 /* validate */
1116 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1117 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1118 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1119 || ( VALID_PTR(pReq->u.In.pVMR0)
1120 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1121 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1122 /* execute */
1123 pSession->pVM = pReq->u.In.pVMR0;
1124 pReq->Hdr.rc = VINF_SUCCESS;
1125 return 0;
1126 }
1127
1128 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC):
1129 {
1130 /* validate */
1131 PSUPPAGEALLOC pReq = (PSUPPAGEALLOC)pReqHdr;
1132 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_SIZE_IN);
1133 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC, SUP_IOCTL_PAGE_ALLOC_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1134
1135 /* execute */
1136 pReq->Hdr.rc = SUPR0PageAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1137 if (RT_FAILURE(pReq->Hdr.rc))
1138 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1139 return 0;
1140 }
1141
1142 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1143 {
1144 /* validate */
1145 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1146 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1147
1148 /* execute */
1149 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1150 return 0;
1151 }
1152
1153 default:
1154 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
1155 break;
1156 }
1157 return SUPDRV_ERR_GENERAL_FAILURE;
1158}
1159
1160
1161/**
1162 * Register a object for reference counting.
1163 * The object is registered with one reference in the specified session.
1164 *
1165 * @returns Unique identifier on success (pointer).
1166 * All future reference must use this identifier.
1167 * @returns NULL on failure.
1168 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
1169 * @param pvUser1 The first user argument.
1170 * @param pvUser2 The second user argument.
1171 */
1172SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
1173{
1174 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1175 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1176 PSUPDRVOBJ pObj;
1177 PSUPDRVUSAGE pUsage;
1178
1179 /*
1180 * Validate the input.
1181 */
1182 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
1183 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
1184 AssertPtrReturn(pfnDestructor, NULL);
1185
1186 /*
1187 * Allocate and initialize the object.
1188 */
1189 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
1190 if (!pObj)
1191 return NULL;
1192 pObj->u32Magic = SUPDRVOBJ_MAGIC;
1193 pObj->enmType = enmType;
1194 pObj->pNext = NULL;
1195 pObj->cUsage = 1;
1196 pObj->pfnDestructor = pfnDestructor;
1197 pObj->pvUser1 = pvUser1;
1198 pObj->pvUser2 = pvUser2;
1199 pObj->CreatorUid = pSession->Uid;
1200 pObj->CreatorGid = pSession->Gid;
1201 pObj->CreatorProcess= pSession->Process;
1202 supdrvOSObjInitCreator(pObj, pSession);
1203
1204 /*
1205 * Allocate the usage record.
1206 * (We keep freed usage records around to simplify SUPR0ObjAddRef().)
1207 */
1208 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1209
1210 pUsage = pDevExt->pUsageFree;
1211 if (pUsage)
1212 pDevExt->pUsageFree = pUsage->pNext;
1213 else
1214 {
1215 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1216 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
1217 if (!pUsage)
1218 {
1219 RTMemFree(pObj);
1220 return NULL;
1221 }
1222 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1223 }
1224
1225 /*
1226 * Insert the object and create the session usage record.
1227 */
1228 /* The object. */
1229 pObj->pNext = pDevExt->pObjs;
1230 pDevExt->pObjs = pObj;
1231
1232 /* The session record. */
1233 pUsage->cUsage = 1;
1234 pUsage->pObj = pObj;
1235 pUsage->pNext = pSession->pUsage;
1236 Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1237 pSession->pUsage = pUsage;
1238
1239 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1240
1241 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
1242 return pObj;
1243}
1244
1245
1246/**
1247 * Increment the reference counter for the object associating the reference
1248 * with the specified session.
1249 *
1250 * @returns IPRT status code.
1251 * @param pvObj The identifier returned by SUPR0ObjRegister().
1252 * @param pSession The session which is referencing the object.
1253 */
1254SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
1255{
1256 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1257 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1258 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1259 PSUPDRVUSAGE pUsagePre;
1260 PSUPDRVUSAGE pUsage;
1261
1262 /*
1263 * Validate the input.
1264 */
1265 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1266 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1267 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1268 VERR_INVALID_PARAMETER);
1269
1270 /*
1271 * Preallocate the usage record.
1272 */
1273 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1274
1275 pUsagePre = pDevExt->pUsageFree;
1276 if (pUsagePre)
1277 pDevExt->pUsageFree = pUsagePre->pNext;
1278 else
1279 {
1280 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1281 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
1282 if (!pUsagePre)
1283 return VERR_NO_MEMORY;
1284 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1285 }
1286
1287 /*
1288 * Reference the object.
1289 */
1290 pObj->cUsage++;
1291
1292 /*
1293 * Look for the session record.
1294 */
1295 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
1296 {
1297 Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1298 if (pUsage->pObj == pObj)
1299 break;
1300 }
1301 if (pUsage)
1302 pUsage->cUsage++;
1303 else
1304 {
1305 /* create a new session record. */
1306 pUsagePre->cUsage = 1;
1307 pUsagePre->pObj = pObj;
1308 pUsagePre->pNext = pSession->pUsage;
1309 pSession->pUsage = pUsagePre;
1310 Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));
1311
1312 pUsagePre = NULL;
1313 }
1314
1315 /*
1316 * Put any unused usage record into the free list..
1317 */
1318 if (pUsagePre)
1319 {
1320 pUsagePre->pNext = pDevExt->pUsageFree;
1321 pDevExt->pUsageFree = pUsagePre;
1322 }
1323
1324 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1325
1326 return VINF_SUCCESS;
1327}
1328
1329
1330/**
1331 * Decrement / destroy a reference counter record for an object.
1332 *
1333 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
1334 *
1335 * @returns IPRT status code.
1336 * @param pvObj The identifier returned by SUPR0ObjRegister().
1337 * @param pSession The session which is referencing the object.
1338 */
1339SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
1340{
1341 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1342 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1343 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1344 bool fDestroy = false;
1345 PSUPDRVUSAGE pUsage;
1346 PSUPDRVUSAGE pUsagePrev;
1347
1348 /*
1349 * Validate the input.
1350 */
1351 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1352 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1353 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1354 VERR_INVALID_PARAMETER);
1355
1356 /*
1357 * Acquire the spinlock and look for the usage record.
1358 */
1359 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1360
1361 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
1362 pUsage;
1363 pUsagePrev = pUsage, pUsage = pUsage->pNext)
1364 {
1365 Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1366 if (pUsage->pObj == pObj)
1367 {
1368 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
1369 if (pUsage->cUsage > 1)
1370 {
1371 pObj->cUsage--;
1372 pUsage->cUsage--;
1373 }
1374 else
1375 {
1376 /*
1377 * Free the session record.
1378 */
1379 if (pUsagePrev)
1380 pUsagePrev->pNext = pUsage->pNext;
1381 else
1382 pSession->pUsage = pUsage->pNext;
1383 pUsage->pNext = pDevExt->pUsageFree;
1384 pDevExt->pUsageFree = pUsage;
1385
1386 /* What about the object? */
1387 if (pObj->cUsage > 1)
1388 pObj->cUsage--;
1389 else
1390 {
1391 /*
1392 * Object is to be destroyed, unlink it.
1393 */
1394 pObj->u32Magic = SUPDRVOBJ_MAGIC + 1;
1395 fDestroy = true;
1396 if (pDevExt->pObjs == pObj)
1397 pDevExt->pObjs = pObj->pNext;
1398 else
1399 {
1400 PSUPDRVOBJ pObjPrev;
1401 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
1402 if (pObjPrev->pNext == pObj)
1403 {
1404 pObjPrev->pNext = pObj->pNext;
1405 break;
1406 }
1407 Assert(pObjPrev);
1408 }
1409 }
1410 }
1411 break;
1412 }
1413 }
1414
1415 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1416
1417 /*
1418 * Call the destructor and free the object if required.
1419 */
1420 if (fDestroy)
1421 {
1422 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
1423 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
1424 if (pObj->pfnDestructor)
1425 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
1426 RTMemFree(pObj);
1427 }
1428
1429 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
1430 return pUsage ? VINF_SUCCESS : VERR_INVALID_PARAMETER;
1431}
1432
1433/**
1434 * Verifies that the current process can access the specified object.
1435 *
1436 * @returns The following IPRT status code:
1437 * @retval VINF_SUCCESS if access was granted.
1438 * @retval VERR_PERMISSION_DENIED if denied access.
1439 * @retval VERR_INVALID_PARAMETER if invalid parameter.
1440 *
1441 * @param pvObj The identifier returned by SUPR0ObjRegister().
1442 * @param pSession The session which wishes to access the object.
1443 * @param pszObjName Object string name. This is optional and depends on the object type.
1444 *
1445 * @remark The caller is responsible for making sure the object isn't removed while
1446 * we're inside this function. If uncertain about this, just call AddRef before calling us.
1447 */
1448SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
1449{
1450 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1451 int rc;
1452
1453 /*
1454 * Validate the input.
1455 */
1456 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1457 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1458 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1459 VERR_INVALID_PARAMETER);
1460
1461 /*
1462 * Check access. (returns true if a decision has been made.)
1463 */
1464 rc = VERR_INTERNAL_ERROR;
1465 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
1466 return rc;
1467
1468 /*
1469 * Default policy is to allow the user to access his own
1470 * stuff but nothing else.
1471 */
1472 if (pObj->CreatorUid == pSession->Uid)
1473 return VINF_SUCCESS;
1474 return VERR_PERMISSION_DENIED;
1475}
1476
1477
1478/**
1479 * Lock pages.
1480 *
1481 * @returns IPRT status code.
1482 * @param pSession Session to which the locked memory should be associated.
1483 * @param pvR3 Start of the memory range to lock.
1484 * This must be page aligned.
1485 * @param cb Size of the memory range to lock.
1486 * This must be page aligned.
1487 */
1488SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
1489{
1490 int rc;
1491 SUPDRVMEMREF Mem = {0};
1492 const size_t cb = (size_t)cPages << PAGE_SHIFT;
1493 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
1494
1495 /*
1496 * Verify input.
1497 */
1498 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1499 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
1500 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
1501 || !pvR3)
1502 {
1503 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
1504 return VERR_INVALID_PARAMETER;
1505 }
1506
1507#ifdef RT_OS_WINDOWS /* A temporary hack for windows, will be removed once all ring-3 code has been cleaned up. */
1508 /* First check if we allocated it using SUPPageAlloc; if so then we don't need to lock it again */
1509 rc = supdrvPageGetPhys(pSession, pvR3, cPages, paPages);
1510 if (RT_SUCCESS(rc))
1511 return rc;
1512#endif
1513
1514 /*
1515 * Let IPRT do the job.
1516 */
1517 Mem.eType = MEMREF_TYPE_LOCKED;
1518 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTR0ProcHandleSelf());
1519 if (RT_SUCCESS(rc))
1520 {
1521 uint32_t iPage = cPages;
1522 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
1523 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
1524
1525 while (iPage-- > 0)
1526 {
1527 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1528 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
1529 {
1530 AssertMsgFailed(("iPage=%d\n", iPage));
1531 rc = VERR_INTERNAL_ERROR;
1532 break;
1533 }
1534 }
1535 if (RT_SUCCESS(rc))
1536 rc = supdrvMemAdd(&Mem, pSession);
1537 if (RT_FAILURE(rc))
1538 {
1539 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
1540 AssertRC(rc2);
1541 }
1542 }
1543
1544 return rc;
1545}
1546
1547
1548/**
1549 * Unlocks the memory pointed to by pv.
1550 *
1551 * @returns IPRT status code.
1552 * @param pSession Session to which the memory was locked.
1553 * @param pvR3 Memory to unlock.
1554 */
1555SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1556{
1557 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1558 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1559#ifdef RT_OS_WINDOWS
1560 /*
1561 * Temporary hack for windows - SUPR0PageFree will unlock SUPR0PageAlloc
1562 * allocations; ignore this call.
1563 */
1564 if (supdrvPageWasLockedByPageAlloc(pSession, pvR3))
1565 {
1566 LogFlow(("Page will be unlocked in SUPR0PageFree -> ignore\n"));
1567 return VINF_SUCCESS;
1568 }
1569#endif
1570 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
1571}
1572
1573
1574/**
1575 * Allocates a chunk of page aligned memory with contiguous and fixed physical
1576 * backing.
1577 *
1578 * @returns IPRT status code.
1579 * @param pSession Session data.
1580 * @param cb Number of bytes to allocate.
1581 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
1582 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
1583 * @param pHCPhys Where to put the physical address of allocated memory.
1584 */
1585SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
1586{
1587 int rc;
1588 SUPDRVMEMREF Mem = {0};
1589 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
1590
1591 /*
1592 * Validate input.
1593 */
1594 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1595 if (!ppvR3 || !ppvR0 || !pHCPhys)
1596 {
1597 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
1598 pSession, ppvR0, ppvR3, pHCPhys));
1599 return VERR_INVALID_PARAMETER;
1600
1601 }
1602 if (cPages < 1 || cPages >= 256)
1603 {
1604 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256\n", cPages));
1605 return VERR_INVALID_PARAMETER;
1606 }
1607
1608 /*
1609 * Let IPRT do the job.
1610 */
1611 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
1612 if (RT_SUCCESS(rc))
1613 {
1614 int rc2;
1615 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1616 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1617 if (RT_SUCCESS(rc))
1618 {
1619 Mem.eType = MEMREF_TYPE_CONT;
1620 rc = supdrvMemAdd(&Mem, pSession);
1621 if (!rc)
1622 {
1623 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1624 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1625 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
1626 return 0;
1627 }
1628
1629 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1630 AssertRC(rc2);
1631 }
1632 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1633 AssertRC(rc2);
1634 }
1635
1636 return rc;
1637}
1638
1639
1640/**
1641 * Frees memory allocated using SUPR0ContAlloc().
1642 *
1643 * @returns IPRT status code.
1644 * @param pSession The session to which the memory was allocated.
1645 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1646 */
1647SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1648{
1649 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1650 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1651 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
1652}
1653
1654
1655/**
1656 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
1657 *
1658 * The memory isn't zeroed.
1659 *
1660 * @returns IPRT status code.
1661 * @param pSession Session data.
1662 * @param cPages Number of pages to allocate.
1663 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
1664 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
1665 * @param paPages Where to put the physical addresses of allocated memory.
1666 */
1667SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1668{
1669 unsigned iPage;
1670 int rc;
1671 SUPDRVMEMREF Mem = {0};
1672 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
1673
1674 /*
1675 * Validate input.
1676 */
1677 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1678 if (!ppvR3 || !ppvR0 || !paPages)
1679 {
1680 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
1681 pSession, ppvR3, ppvR0, paPages));
1682 return VERR_INVALID_PARAMETER;
1683
1684 }
1685 if (cPages < 1 || cPages > 256)
1686 {
1687 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
1688 return VERR_INVALID_PARAMETER;
1689 }
1690
1691 /*
1692 * Let IPRT do the work.
1693 */
1694 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
1695 if (RT_SUCCESS(rc))
1696 {
1697 int rc2;
1698 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1699 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1700 if (RT_SUCCESS(rc))
1701 {
1702 Mem.eType = MEMREF_TYPE_LOW;
1703 rc = supdrvMemAdd(&Mem, pSession);
1704 if (!rc)
1705 {
1706 for (iPage = 0; iPage < cPages; iPage++)
1707 {
1708 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1709 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%VHp\n", paPages[iPage]));
1710 }
1711 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1712 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1713 return 0;
1714 }
1715
1716 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1717 AssertRC(rc2);
1718 }
1719
1720 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1721 AssertRC(rc2);
1722 }
1723
1724 return rc;
1725}
1726
1727
1728/**
1729 * Frees memory allocated using SUPR0LowAlloc().
1730 *
1731 * @returns IPRT status code.
1732 * @param pSession The session to which the memory was allocated.
1733 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1734 */
1735SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1736{
1737 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1738 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1739 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
1740}
1741
1742
1743
1744/**
1745 * Allocates a chunk of memory with both R0 and R3 mappings.
1746 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1747 *
1748 * @returns IPRT status code.
1749 * @param pSession The session to associated the allocation with.
1750 * @param cb Number of bytes to allocate.
1751 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1752 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1753 */
1754SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1755{
1756 int rc;
1757 SUPDRVMEMREF Mem = {0};
1758 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
1759
1760 /*
1761 * Validate input.
1762 */
1763 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1764 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
1765 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1766 if (cb < 1 || cb >= _4M)
1767 {
1768 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
1769 return VERR_INVALID_PARAMETER;
1770 }
1771
1772 /*
1773 * Let IPRT do the work.
1774 */
1775 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
1776 if (RT_SUCCESS(rc))
1777 {
1778 int rc2;
1779 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1780 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1781 if (RT_SUCCESS(rc))
1782 {
1783 Mem.eType = MEMREF_TYPE_MEM;
1784 rc = supdrvMemAdd(&Mem, pSession);
1785 if (!rc)
1786 {
1787 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1788 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1789 return VINF_SUCCESS;
1790 }
1791 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1792 AssertRC(rc2);
1793 }
1794
1795 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1796 AssertRC(rc2);
1797 }
1798
1799 return rc;
1800}
1801
1802
1803/**
1804 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
1805 *
1806 * @returns IPRT status code.
1807 * @param pSession The session to which the memory was allocated.
1808 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1809 * @param paPages Where to store the physical addresses.
1810 */
1811SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
1812{
1813 PSUPDRVBUNDLE pBundle;
1814 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1815 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
1816
1817 /*
1818 * Validate input.
1819 */
1820 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1821 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
1822 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
1823
1824 /*
1825 * Search for the address.
1826 */
1827 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1828 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1829 {
1830 if (pBundle->cUsed > 0)
1831 {
1832 unsigned i;
1833 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1834 {
1835 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
1836 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1837 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
1838 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1839 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
1840 )
1841 )
1842 {
1843 const unsigned cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
1844 unsigned iPage;
1845 for (iPage = 0; iPage < cPages; iPage++)
1846 {
1847 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
1848 paPages[iPage].uReserved = 0;
1849 }
1850 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1851 return VINF_SUCCESS;
1852 }
1853 }
1854 }
1855 }
1856 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1857 Log(("Failed to find %p!!!\n", (void *)uPtr));
1858 return VERR_INVALID_PARAMETER;
1859}
1860
1861
1862/**
1863 * Free memory allocated by SUPR0MemAlloc().
1864 *
1865 * @returns IPRT status code.
1866 * @param pSession The session owning the allocation.
1867 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1868 */
1869SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1870{
1871 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1872 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1873 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
1874}
1875
1876
1877/**
1878 * Allocates a chunk of memory with only a R3 mappings.
1879 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1880 *
1881 * @returns IPRT status code.
1882 * @param pSession The session to associated the allocation with.
1883 * @param cPages The number of pages to allocate.
1884 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1885 * @param paPages Where to store the addresses of the pages. Optional.
1886 */
1887SUPR0DECL(int) SUPR0PageAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1888{
1889 int rc;
1890 SUPDRVMEMREF Mem = {0};
1891 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
1892
1893 /*
1894 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
1895 */
1896 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1897 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1898 if (cPages < 1 || cPages > (128 * _1M)/PAGE_SIZE)
1899 {
1900 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than 128MB.\n", cPages));
1901 return VERR_INVALID_PARAMETER;
1902 }
1903
1904 /*
1905 * Let IPRT do the work.
1906 */
1907 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
1908 if (RT_SUCCESS(rc))
1909 {
1910 int rc2;
1911 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1912 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1913 if (RT_SUCCESS(rc))
1914 {
1915 Mem.eType = MEMREF_TYPE_LOCKED_SUP;
1916 rc = supdrvMemAdd(&Mem, pSession);
1917 if (!rc)
1918 {
1919 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1920 if (paPages)
1921 {
1922 uint32_t iPage = cPages;
1923 while (iPage-- > 0)
1924 {
1925 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
1926 Assert(paPages[iPage] != NIL_RTHCPHYS);
1927 }
1928 }
1929 return VINF_SUCCESS;
1930 }
1931 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1932 AssertRC(rc2);
1933 }
1934
1935 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1936 AssertRC(rc2);
1937 }
1938 return rc;
1939}
1940
1941
1942#ifdef RT_OS_WINDOWS
1943/**
1944 * Check if the pages were locked by SUPR0PageAlloc
1945 *
1946 * This function will be removed along with the lock/unlock hacks when
1947 * we've cleaned up the ring-3 code properly.
1948 *
1949 * @returns boolean
1950 * @param pSession The session to which the memory was allocated.
1951 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1952 */
1953static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1954{
1955 PSUPDRVBUNDLE pBundle;
1956 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1957 LogFlow(("SUPR0PageIsLockedByPageAlloc: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1958
1959 /*
1960 * Search for the address.
1961 */
1962 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1963 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1964 {
1965 if (pBundle->cUsed > 0)
1966 {
1967 unsigned i;
1968 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1969 {
1970 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
1971 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1972 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1973 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
1974 {
1975 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1976 return true;
1977 }
1978 }
1979 }
1980 }
1981 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1982 return false;
1983}
1984
1985
1986/**
1987 * Get the physical addresses of memory allocated using SUPR0PageAlloc().
1988 *
1989 * This function will be removed along with the lock/unlock hacks when
1990 * we've cleaned up the ring-3 code properly.
1991 *
1992 * @returns IPRT status code.
1993 * @param pSession The session to which the memory was allocated.
1994 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1995 * @param cPages Number of pages in paPages
1996 * @param paPages Where to store the physical addresses.
1997 */
1998static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
1999{
2000 PSUPDRVBUNDLE pBundle;
2001 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2002 LogFlow(("supdrvPageGetPhys: pSession=%p pvR3=%p cPages=%#lx paPages=%p\n", pSession, (void *)pvR3, (long)cPages, paPages));
2003
2004 /*
2005 * Search for the address.
2006 */
2007 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2008 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2009 {
2010 if (pBundle->cUsed > 0)
2011 {
2012 unsigned i;
2013 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2014 {
2015 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
2016 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2017 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2018 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
2019 {
2020 uint32_t iPage = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
2021 cPages = RT_MIN(iPage, cPages);
2022 for (iPage = 0; iPage < cPages; iPage++)
2023 paPages[iPage] = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
2024 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2025 return VINF_SUCCESS;
2026 }
2027 }
2028 }
2029 }
2030 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2031 return VERR_INVALID_PARAMETER;
2032}
2033#endif /* RT_OS_WINDOWS */
2034
2035
2036/**
2037 * Free memory allocated by SUPR0PageAlloc().
2038 *
2039 * @returns IPRT status code.
2040 * @param pSession The session owning the allocation.
2041 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
2042 */
2043SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
2044{
2045 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
2046 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2047 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED_SUP);
2048}
2049
2050
2051/**
2052 * Maps the GIP into userspace and/or get the physical address of the GIP.
2053 *
2054 * @returns IPRT status code.
2055 * @param pSession Session to which the GIP mapping should belong.
2056 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
2057 * @param pHCPhysGip Where to store the physical address. (optional)
2058 *
2059 * @remark There is no reference counting on the mapping, so one call to this function
2060 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
2061 * and remove the session as a GIP user.
2062 */
2063SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
2064{
2065 int rc = 0;
2066 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2067 RTR3PTR pGip = NIL_RTR3PTR;
2068 RTHCPHYS HCPhys = NIL_RTHCPHYS;
2069 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
2070
2071 /*
2072 * Validate
2073 */
2074 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2075 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
2076 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
2077
2078 RTSemFastMutexRequest(pDevExt->mtxGip);
2079 if (pDevExt->pGip)
2080 {
2081 /*
2082 * Map it?
2083 */
2084 if (ppGipR3)
2085 {
2086 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
2087 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
2088 RTMEM_PROT_READ, RTR0ProcHandleSelf());
2089 if (RT_SUCCESS(rc))
2090 {
2091 pGip = RTR0MemObjAddressR3(pSession->GipMapObjR3);
2092 rc = VINF_SUCCESS; /** @todo remove this and replace the !rc below with RT_SUCCESS(rc). */
2093 }
2094 }
2095
2096 /*
2097 * Get physical address.
2098 */
2099 if (pHCPhysGip && !rc)
2100 HCPhys = pDevExt->HCPhysGip;
2101
2102 /*
2103 * Reference globally.
2104 */
2105 if (!pSession->fGipReferenced && !rc)
2106 {
2107 pSession->fGipReferenced = 1;
2108 pDevExt->cGipUsers++;
2109 if (pDevExt->cGipUsers == 1)
2110 {
2111 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2112 unsigned i;
2113
2114 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
2115
2116 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
2117 ASMAtomicXchgU32(&pGip->aCPUs[i].u32TransactionId, pGip->aCPUs[i].u32TransactionId & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
2118 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, 0);
2119
2120 rc = RTTimerStart(pDevExt->pGipTimer, 0);
2121 AssertRC(rc); rc = VINF_SUCCESS;
2122 }
2123 }
2124 }
2125 else
2126 {
2127 rc = SUPDRV_ERR_GENERAL_FAILURE;
2128 Log(("SUPR0GipMap: GIP is not available!\n"));
2129 }
2130 RTSemFastMutexRelease(pDevExt->mtxGip);
2131
2132 /*
2133 * Write returns.
2134 */
2135 if (pHCPhysGip)
2136 *pHCPhysGip = HCPhys;
2137 if (ppGipR3)
2138 *ppGipR3 = pGip;
2139
2140#ifdef DEBUG_DARWIN_GIP
2141 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGip=%p GipMapObjR3\n", rc, (unsigned long)HCPhys, pGip, pSession->GipMapObjR3));
2142#else
2143 LogFlow(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)(uintptr_t)pGip));
2144#endif
2145 return rc;
2146}
2147
2148
2149/**
2150 * Unmaps any user mapping of the GIP and terminates all GIP access
2151 * from this session.
2152 *
2153 * @returns IPRT status code.
2154 * @param pSession Session to which the GIP mapping should belong.
2155 */
2156SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
2157{
2158 int rc = VINF_SUCCESS;
2159 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2160#ifdef DEBUG_DARWIN_GIP
2161 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
2162 pSession,
2163 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
2164 pSession->GipMapObjR3));
2165#else
2166 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
2167#endif
2168 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2169
2170 RTSemFastMutexRequest(pDevExt->mtxGip);
2171
2172 /*
2173 * Unmap anything?
2174 */
2175 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
2176 {
2177 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
2178 AssertRC(rc);
2179 if (RT_SUCCESS(rc))
2180 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
2181 }
2182
2183 /*
2184 * Dereference global GIP.
2185 */
2186 if (pSession->fGipReferenced && !rc)
2187 {
2188 pSession->fGipReferenced = 0;
2189 if ( pDevExt->cGipUsers > 0
2190 && !--pDevExt->cGipUsers)
2191 {
2192 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
2193 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = 0;
2194 }
2195 }
2196
2197 RTSemFastMutexRelease(pDevExt->mtxGip);
2198
2199 return rc;
2200}
2201
2202
2203/**
2204 * Adds a memory object to the session.
2205 *
2206 * @returns IPRT status code.
2207 * @param pMem Memory tracking structure containing the
2208 * information to track.
2209 * @param pSession The session.
2210 */
2211static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
2212{
2213 PSUPDRVBUNDLE pBundle;
2214 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2215
2216 /*
2217 * Find free entry and record the allocation.
2218 */
2219 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2220 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2221 {
2222 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
2223 {
2224 unsigned i;
2225 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2226 {
2227 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
2228 {
2229 pBundle->cUsed++;
2230 pBundle->aMem[i] = *pMem;
2231 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2232 return VINF_SUCCESS;
2233 }
2234 }
2235 AssertFailed(); /* !!this can't be happening!!! */
2236 }
2237 }
2238 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2239
2240 /*
2241 * Need to allocate a new bundle.
2242 * Insert into the last entry in the bundle.
2243 */
2244 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
2245 if (!pBundle)
2246 return VERR_NO_MEMORY;
2247
2248 /* take last entry. */
2249 pBundle->cUsed++;
2250 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
2251
2252 /* insert into list. */
2253 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2254 pBundle->pNext = pSession->Bundle.pNext;
2255 pSession->Bundle.pNext = pBundle;
2256 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2257
2258 return VINF_SUCCESS;
2259}
2260
2261
2262/**
2263 * Releases a memory object referenced by pointer and type.
2264 *
2265 * @returns IPRT status code.
2266 * @param pSession Session data.
2267 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
2268 * @param eType Memory type.
2269 */
2270static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
2271{
2272 PSUPDRVBUNDLE pBundle;
2273 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2274
2275 /*
2276 * Validate input.
2277 */
2278 if (!uPtr)
2279 {
2280 Log(("Illegal address %p\n", (void *)uPtr));
2281 return VERR_INVALID_PARAMETER;
2282 }
2283
2284 /*
2285 * Search for the address.
2286 */
2287 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2288 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2289 {
2290 if (pBundle->cUsed > 0)
2291 {
2292 unsigned i;
2293 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2294 {
2295 if ( pBundle->aMem[i].eType == eType
2296 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2297 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
2298 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2299 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
2300 )
2301 {
2302 /* Make a copy of it and release it outside the spinlock. */
2303 SUPDRVMEMREF Mem = pBundle->aMem[i];
2304 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
2305 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
2306 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
2307 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2308
2309 if (Mem.MapObjR3)
2310 {
2311 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
2312 AssertRC(rc); /** @todo figure out how to handle this. */
2313 }
2314 if (Mem.MemObj)
2315 {
2316 int rc = RTR0MemObjFree(Mem.MemObj, false);
2317 AssertRC(rc); /** @todo figure out how to handle this. */
2318 }
2319 return VINF_SUCCESS;
2320 }
2321 }
2322 }
2323 }
2324 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2325 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
2326 return VERR_INVALID_PARAMETER;
2327}
2328
2329
2330#ifdef VBOX_WITH_IDT_PATCHING
2331/**
2332 * Install IDT for the current CPU.
2333 *
2334 * @returns One of the following IPRT status codes:
2335 * @retval VINF_SUCCESS on success.
2336 * @retval VERR_IDT_FAILED.
2337 * @retval VERR_NO_MEMORY.
2338 * @param pDevExt The device extension.
2339 * @param pSession The session data.
2340 * @param pReq The request.
2341 */
2342static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq)
2343{
2344 PSUPDRVPATCHUSAGE pUsagePre;
2345 PSUPDRVPATCH pPatchPre;
2346 RTIDTR Idtr;
2347 PSUPDRVPATCH pPatch;
2348 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2349 LogFlow(("supdrvIOCtl_IdtInstall\n"));
2350
2351 /*
2352 * Preallocate entry for this CPU cause we don't wanna do
2353 * that inside the spinlock!
2354 */
2355 pUsagePre = (PSUPDRVPATCHUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2356 if (!pUsagePre)
2357 return VERR_NO_MEMORY;
2358
2359 /*
2360 * Take the spinlock and see what we need to do.
2361 */
2362 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2363
2364 /* check if we already got a free patch. */
2365 if (!pDevExt->pIdtPatchesFree)
2366 {
2367 /*
2368 * Allocate a patch - outside the spinlock of course.
2369 */
2370 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2371
2372 pPatchPre = (PSUPDRVPATCH)RTMemExecAlloc(sizeof(*pPatchPre));
2373 if (!pPatchPre)
2374 return VERR_NO_MEMORY;
2375
2376 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2377 }
2378 else
2379 {
2380 pPatchPre = pDevExt->pIdtPatchesFree;
2381 pDevExt->pIdtPatchesFree = pPatchPre->pNext;
2382 }
2383
2384 /* look for matching patch entry */
2385 ASMGetIDTR(&Idtr);
2386 pPatch = pDevExt->pIdtPatches;
2387 while (pPatch && pPatch->pvIdt != (void *)Idtr.pIdt)
2388 pPatch = pPatch->pNext;
2389
2390 if (!pPatch)
2391 {
2392 /*
2393 * Create patch.
2394 */
2395 pPatch = supdrvIdtPatchOne(pDevExt, pPatchPre);
2396 if (pPatch)
2397 pPatchPre = NULL; /* mark as used. */
2398 }
2399 else
2400 {
2401 /*
2402 * Simply increment patch usage.
2403 */
2404 pPatch->cUsage++;
2405 }
2406
2407 if (pPatch)
2408 {
2409 /*
2410 * Increment and add if need be the session usage record for this patch.
2411 */
2412 PSUPDRVPATCHUSAGE pUsage = pSession->pPatchUsage;
2413 while (pUsage && pUsage->pPatch != pPatch)
2414 pUsage = pUsage->pNext;
2415
2416 if (!pUsage)
2417 {
2418 /*
2419 * Add usage record.
2420 */
2421 pUsagePre->cUsage = 1;
2422 pUsagePre->pPatch = pPatch;
2423 pUsagePre->pNext = pSession->pPatchUsage;
2424 pSession->pPatchUsage = pUsagePre;
2425 pUsagePre = NULL; /* mark as used. */
2426 }
2427 else
2428 {
2429 /*
2430 * Increment usage count.
2431 */
2432 pUsage->cUsage++;
2433 }
2434 }
2435
2436 /* free patch - we accumulate them for paranoid saftly reasons. */
2437 if (pPatchPre)
2438 {
2439 pPatchPre->pNext = pDevExt->pIdtPatchesFree;
2440 pDevExt->pIdtPatchesFree = pPatchPre;
2441 }
2442
2443 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2444
2445 /*
2446 * Free unused preallocated buffers.
2447 */
2448 if (pUsagePre)
2449 RTMemFree(pUsagePre);
2450
2451 pReq->u.Out.u8Idt = pDevExt->u8Idt;
2452
2453 return pPatch ? VINF_SUCCESS : VERR_IDT_FAILED;
2454}
2455
2456
2457/**
2458 * This creates a IDT patch entry.
2459 * If the first patch being installed it'll also determin the IDT entry
2460 * to use.
2461 *
2462 * @returns pPatch on success.
2463 * @returns NULL on failure.
2464 * @param pDevExt Pointer to globals.
2465 * @param pPatch Patch entry to use.
2466 * This will be linked into SUPDRVDEVEXT::pIdtPatches on
2467 * successful return.
2468 * @remark Call must be owning the SUPDRVDEVEXT::Spinlock!
2469 */
2470static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2471{
2472 RTIDTR Idtr;
2473 PSUPDRVIDTE paIdt;
2474 LogFlow(("supdrvIOCtl_IdtPatchOne: pPatch=%p\n", pPatch));
2475
2476 /*
2477 * Get IDT.
2478 */
2479 ASMGetIDTR(&Idtr);
2480 paIdt = (PSUPDRVIDTE)Idtr.pIdt;
2481 /*
2482 * Recent Linux kernels can be configured to 1G user /3G kernel.
2483 */
2484 if ((uintptr_t)paIdt < 0x40000000)
2485 {
2486 AssertMsgFailed(("bad paIdt=%p\n", paIdt));
2487 return NULL;
2488 }
2489
2490 if (!pDevExt->u8Idt)
2491 {
2492 /*
2493 * Test out the alternatives.
2494 *
2495 * At the moment we do not support chaining thus we ASSUME that one of
2496 * these 48 entries is unused (which is not a problem on Win32 and
2497 * Linux to my knowledge).
2498 */
2499 /** @todo we MUST change this detection to try grab an entry which is NOT in use. This can be
2500 * combined with gathering info about which guest system call gates we can hook up directly. */
2501 unsigned i;
2502 uint8_t u8Idt = 0;
2503 static uint8_t au8Ints[] =
2504 {
2505#ifdef RT_OS_WINDOWS /* We don't use 0xef and above because they are system stuff on linux (ef is IPI,
2506 * local apic timer, or some other frequently fireing thing). */
2507 0xef, 0xee, 0xed, 0xec,
2508#endif
2509 0xeb, 0xea, 0xe9, 0xe8,
2510 0xdf, 0xde, 0xdd, 0xdc,
2511 0x7b, 0x7a, 0x79, 0x78,
2512 0xbf, 0xbe, 0xbd, 0xbc,
2513 };
2514#if defined(RT_ARCH_AMD64) && defined(DEBUG)
2515 static int s_iWobble = 0;
2516 unsigned iMax = !(s_iWobble++ % 2) ? 0x80 : 0x100;
2517 Log2(("IDT: Idtr=%p:%#x\n", (void *)Idtr.pIdt, (unsigned)Idtr.cbIdt));
2518 for (i = iMax - 0x80; i*16+15 < Idtr.cbIdt && i < iMax; i++)
2519 {
2520 Log2(("%#x: %04x:%08x%04x%04x P=%d DPL=%d IST=%d Type1=%#x u32Reserved=%#x u5Reserved=%#x\n",
2521 i, paIdt[i].u16SegSel, paIdt[i].u32OffsetTop, paIdt[i].u16OffsetHigh, paIdt[i].u16OffsetLow,
2522 paIdt[i].u1Present, paIdt[i].u2DPL, paIdt[i].u3IST, paIdt[i].u5Type2,
2523 paIdt[i].u32Reserved, paIdt[i].u5Reserved));
2524 }
2525#endif
2526 /* look for entries which are not present or otherwise unused. */
2527 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2528 {
2529 u8Idt = au8Ints[i];
2530 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2531 && ( !paIdt[u8Idt].u1Present
2532 || paIdt[u8Idt].u5Type2 == 0))
2533 break;
2534 u8Idt = 0;
2535 }
2536 if (!u8Idt)
2537 {
2538 /* try again, look for a compatible entry .*/
2539 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2540 {
2541 u8Idt = au8Ints[i];
2542 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2543 && paIdt[u8Idt].u1Present
2544 && paIdt[u8Idt].u5Type2 == SUPDRV_IDTE_TYPE2_INTERRUPT_GATE
2545 && !(paIdt[u8Idt].u16SegSel & 3))
2546 break;
2547 u8Idt = 0;
2548 }
2549 if (!u8Idt)
2550 {
2551 Log(("Failed to find appropirate IDT entry!!\n"));
2552 return NULL;
2553 }
2554 }
2555 pDevExt->u8Idt = u8Idt;
2556 LogFlow(("supdrvIOCtl_IdtPatchOne: u8Idt=%x\n", u8Idt));
2557 }
2558
2559 /*
2560 * Prepare the patch
2561 */
2562 memset(pPatch, 0, sizeof(*pPatch));
2563 pPatch->pvIdt = paIdt;
2564 pPatch->cUsage = 1;
2565 pPatch->pIdtEntry = &paIdt[pDevExt->u8Idt];
2566 pPatch->SavedIdt = paIdt[pDevExt->u8Idt];
2567 pPatch->ChangedIdt.u16OffsetLow = (uint32_t)((uintptr_t)&pPatch->auCode[0] & 0xffff);
2568 pPatch->ChangedIdt.u16OffsetHigh = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 16);
2569#ifdef RT_ARCH_AMD64
2570 pPatch->ChangedIdt.u32OffsetTop = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 32);
2571#endif
2572 pPatch->ChangedIdt.u16SegSel = ASMGetCS();
2573#ifdef RT_ARCH_AMD64
2574 pPatch->ChangedIdt.u3IST = 0;
2575 pPatch->ChangedIdt.u5Reserved = 0;
2576#else /* x86 */
2577 pPatch->ChangedIdt.u5Reserved = 0;
2578 pPatch->ChangedIdt.u3Type1 = 0;
2579#endif /* x86 */
2580 pPatch->ChangedIdt.u5Type2 = SUPDRV_IDTE_TYPE2_INTERRUPT_GATE;
2581 pPatch->ChangedIdt.u2DPL = 3;
2582 pPatch->ChangedIdt.u1Present = 1;
2583
2584 /*
2585 * Generate the patch code.
2586 */
2587 {
2588#ifdef RT_ARCH_AMD64
2589 union
2590 {
2591 uint8_t *pb;
2592 uint32_t *pu32;
2593 uint64_t *pu64;
2594 } u, uFixJmp, uFixCall, uNotNested;
2595 u.pb = &pPatch->auCode[0];
2596
2597 /* check the cookie */
2598 *u.pb++ = 0x3d; // cmp eax, GLOBALCOOKIE
2599 *u.pu32++ = pDevExt->u32Cookie;
2600
2601 *u.pb++ = 0x74; // jz @VBoxCall
2602 *u.pb++ = 2;
2603
2604 /* jump to forwarder code. */
2605 *u.pb++ = 0xeb;
2606 uFixJmp = u;
2607 *u.pb++ = 0xfe;
2608
2609 // @VBoxCall:
2610 *u.pb++ = 0x0f; // swapgs
2611 *u.pb++ = 0x01;
2612 *u.pb++ = 0xf8;
2613
2614 /*
2615 * Call VMMR0Entry
2616 * We don't have to push the arguments here, but we have top
2617 * reserve some stack space for the interrupt forwarding.
2618 */
2619# ifdef RT_OS_WINDOWS
2620 *u.pb++ = 0x50; // push rax ; alignment filler.
2621 *u.pb++ = 0x41; // push r8 ; uArg
2622 *u.pb++ = 0x50;
2623 *u.pb++ = 0x52; // push rdx ; uOperation
2624 *u.pb++ = 0x51; // push rcx ; pVM
2625# else
2626 *u.pb++ = 0x51; // push rcx ; alignment filler.
2627 *u.pb++ = 0x52; // push rdx ; uArg
2628 *u.pb++ = 0x56; // push rsi ; uOperation
2629 *u.pb++ = 0x57; // push rdi ; pVM
2630# endif
2631
2632 *u.pb++ = 0xff; // call qword [pfnVMMR0EntryInt wrt rip]
2633 *u.pb++ = 0x15;
2634 uFixCall = u;
2635 *u.pu32++ = 0;
2636
2637 *u.pb++ = 0x48; // add rsp, 20h ; remove call frame.
2638 *u.pb++ = 0x81;
2639 *u.pb++ = 0xc4;
2640 *u.pu32++ = 0x20;
2641
2642 *u.pb++ = 0x0f; // swapgs
2643 *u.pb++ = 0x01;
2644 *u.pb++ = 0xf8;
2645
2646 /* Return to R3. */
2647 uNotNested = u;
2648 *u.pb++ = 0x48; // iretq
2649 *u.pb++ = 0xcf;
2650
2651 while ((uintptr_t)u.pb & 0x7) // align 8
2652 *u.pb++ = 0xcc;
2653
2654 /* Pointer to the VMMR0Entry. */ // pfnVMMR0EntryInt dq StubVMMR0Entry
2655 *uFixCall.pu32 = (uint32_t)(u.pb - uFixCall.pb - 4); uFixCall.pb = NULL;
2656 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2657 *u.pu64++ = pDevExt->pvVMMR0 ? (uint64_t)pDevExt->pfnVMMR0EntryInt : (uint64_t)u.pb + 8;
2658
2659 /* stub entry. */ // StubVMMR0Entry:
2660 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2661 *u.pb++ = 0x33; // xor eax, eax
2662 *u.pb++ = 0xc0;
2663
2664 *u.pb++ = 0x48; // dec rax
2665 *u.pb++ = 0xff;
2666 *u.pb++ = 0xc8;
2667
2668 *u.pb++ = 0xc3; // ret
2669
2670 /* forward to the original handler using a retf. */
2671 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1); uFixJmp.pb = NULL;
2672
2673 *u.pb++ = 0x68; // push <target cs>
2674 *u.pu32++ = !pPatch->SavedIdt.u5Type2 ? ASMGetCS() : pPatch->SavedIdt.u16SegSel;
2675
2676 *u.pb++ = 0x68; // push <low target rip>
2677 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2678 ? (uint32_t)(uintptr_t)uNotNested.pb
2679 : (uint32_t)pPatch->SavedIdt.u16OffsetLow
2680 | (uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16;
2681
2682 *u.pb++ = 0xc7; // mov dword [rsp + 4], <high target rip>
2683 *u.pb++ = 0x44;
2684 *u.pb++ = 0x24;
2685 *u.pb++ = 0x04;
2686 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2687 ? (uint32_t)((uint64_t)uNotNested.pb >> 32)
2688 : pPatch->SavedIdt.u32OffsetTop;
2689
2690 *u.pb++ = 0x48; // retf ; does this require prefix?
2691 *u.pb++ = 0xcb;
2692
2693#else /* RT_ARCH_X86 */
2694
2695 union
2696 {
2697 uint8_t *pb;
2698 uint16_t *pu16;
2699 uint32_t *pu32;
2700 } u, uFixJmpNotNested, uFixJmp, uFixCall, uNotNested;
2701 u.pb = &pPatch->auCode[0];
2702
2703 /* check the cookie */
2704 *u.pb++ = 0x81; // cmp esi, GLOBALCOOKIE
2705 *u.pb++ = 0xfe;
2706 *u.pu32++ = pDevExt->u32Cookie;
2707
2708 *u.pb++ = 0x74; // jz VBoxCall
2709 uFixJmp = u;
2710 *u.pb++ = 0;
2711
2712 /* jump (far) to the original handler / not-nested-stub. */
2713 *u.pb++ = 0xea; // jmp far NotNested
2714 uFixJmpNotNested = u;
2715 *u.pu32++ = 0;
2716 *u.pu16++ = 0;
2717
2718 /* save selector registers. */ // VBoxCall:
2719 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1);
2720 *u.pb++ = 0x0f; // push fs
2721 *u.pb++ = 0xa0;
2722
2723 *u.pb++ = 0x1e; // push ds
2724
2725 *u.pb++ = 0x06; // push es
2726
2727 /* call frame */
2728 *u.pb++ = 0x51; // push ecx
2729
2730 *u.pb++ = 0x52; // push edx
2731
2732 *u.pb++ = 0x50; // push eax
2733
2734 /* load ds, es and perhaps fs before call. */
2735 *u.pb++ = 0xb8; // mov eax, KernelDS
2736 *u.pu32++ = ASMGetDS();
2737
2738 *u.pb++ = 0x8e; // mov ds, eax
2739 *u.pb++ = 0xd8;
2740
2741 *u.pb++ = 0x8e; // mov es, eax
2742 *u.pb++ = 0xc0;
2743
2744#ifdef RT_OS_WINDOWS
2745 *u.pb++ = 0xb8; // mov eax, KernelFS
2746 *u.pu32++ = ASMGetFS();
2747
2748 *u.pb++ = 0x8e; // mov fs, eax
2749 *u.pb++ = 0xe0;
2750#endif
2751
2752 /* do the call. */
2753 *u.pb++ = 0xe8; // call _VMMR0Entry / StubVMMR0Entry
2754 uFixCall = u;
2755 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2756 *u.pu32++ = 0xfffffffb;
2757
2758 *u.pb++ = 0x83; // add esp, 0ch ; cdecl
2759 *u.pb++ = 0xc4;
2760 *u.pb++ = 0x0c;
2761
2762 /* restore selector registers. */
2763 *u.pb++ = 0x07; // pop es
2764 //
2765 *u.pb++ = 0x1f; // pop ds
2766
2767 *u.pb++ = 0x0f; // pop fs
2768 *u.pb++ = 0xa1;
2769
2770 uNotNested = u; // NotNested:
2771 *u.pb++ = 0xcf; // iretd
2772
2773 /* the stub VMMR0Entry. */ // StubVMMR0Entry:
2774 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2775 *u.pb++ = 0x33; // xor eax, eax
2776 *u.pb++ = 0xc0;
2777
2778 *u.pb++ = 0x48; // dec eax
2779
2780 *u.pb++ = 0xc3; // ret
2781
2782 /* Fixup the VMMR0Entry call. */
2783 if (pDevExt->pvVMMR0)
2784 *uFixCall.pu32 = (uint32_t)pDevExt->pfnVMMR0EntryInt - (uint32_t)(uFixCall.pu32 + 1);
2785 else
2786 *uFixCall.pu32 = (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)(uFixCall.pu32 + 1);
2787
2788 /* Fixup the forward / nested far jump. */
2789 if (!pPatch->SavedIdt.u5Type2)
2790 {
2791 *uFixJmpNotNested.pu32++ = (uint32_t)uNotNested.pb;
2792 *uFixJmpNotNested.pu16++ = ASMGetCS();
2793 }
2794 else
2795 {
2796 *uFixJmpNotNested.pu32++ = ((uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16) | pPatch->SavedIdt.u16OffsetLow;
2797 *uFixJmpNotNested.pu16++ = pPatch->SavedIdt.u16SegSel;
2798 }
2799#endif /* RT_ARCH_X86 */
2800 Assert(u.pb <= &pPatch->auCode[sizeof(pPatch->auCode)]);
2801#if 0
2802 /* dump the patch code */
2803 Log2(("patch code: %p\n", &pPatch->auCode[0]));
2804 for (uFixCall.pb = &pPatch->auCode[0]; uFixCall.pb < u.pb; uFixCall.pb++)
2805 Log2(("0x%02x,\n", *uFixCall.pb));
2806#endif
2807 }
2808
2809 /*
2810 * Install the patch.
2811 */
2812 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->ChangedIdt);
2813 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The stupid change code didn't work!!!!!\n"));
2814
2815 /*
2816 * Link in the patch.
2817 */
2818 pPatch->pNext = pDevExt->pIdtPatches;
2819 pDevExt->pIdtPatches = pPatch;
2820
2821 return pPatch;
2822}
2823
2824
2825/**
2826 * Removes the sessions IDT references.
2827 * This will uninstall our IDT patch if we left unreferenced.
2828 *
2829 * @returns VINF_SUCCESS.
2830 * @param pDevExt Device globals.
2831 * @param pSession Session data.
2832 */
2833static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
2834{
2835 PSUPDRVPATCHUSAGE pUsage;
2836 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2837 LogFlow(("supdrvIOCtl_IdtRemoveAll: pSession=%p\n", pSession));
2838
2839 /*
2840 * Take the spinlock.
2841 */
2842 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2843
2844 /*
2845 * Walk usage list, removing patches as their usage count reaches zero.
2846 */
2847 pUsage = pSession->pPatchUsage;
2848 while (pUsage)
2849 {
2850 if (pUsage->pPatch->cUsage <= pUsage->cUsage)
2851 supdrvIdtRemoveOne(pDevExt, pUsage->pPatch);
2852 else
2853 pUsage->pPatch->cUsage -= pUsage->cUsage;
2854
2855 /* next */
2856 pUsage = pUsage->pNext;
2857 }
2858
2859 /*
2860 * Empty the usage chain and we're done inside the spinlock.
2861 */
2862 pUsage = pSession->pPatchUsage;
2863 pSession->pPatchUsage = NULL;
2864
2865 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2866
2867 /*
2868 * Free usage entries.
2869 */
2870 while (pUsage)
2871 {
2872 void *pvToFree = pUsage;
2873 pUsage->cUsage = 0;
2874 pUsage->pPatch = NULL;
2875 pUsage = pUsage->pNext;
2876 RTMemFree(pvToFree);
2877 }
2878
2879 return VINF_SUCCESS;
2880}
2881
2882
2883/**
2884 * Remove one patch.
2885 *
2886 * Worker for supdrvIOCtl_IdtRemoveAll.
2887 *
2888 * @param pDevExt Device globals.
2889 * @param pPatch Patch entry to remove.
2890 * @remark Caller must own SUPDRVDEVEXT::Spinlock!
2891 */
2892static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2893{
2894 LogFlow(("supdrvIdtRemoveOne: pPatch=%p\n", pPatch));
2895
2896 pPatch->cUsage = 0;
2897
2898 /*
2899 * If the IDT entry was changed it have to kick around for ever!
2900 * This will be attempted freed again, perhaps next time we'll succeed :-)
2901 */
2902 if (memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)))
2903 {
2904 AssertMsgFailed(("The hijacked IDT entry has CHANGED!!!\n"));
2905 return;
2906 }
2907
2908 /*
2909 * Unlink it.
2910 */
2911 if (pDevExt->pIdtPatches != pPatch)
2912 {
2913 PSUPDRVPATCH pPatchPrev = pDevExt->pIdtPatches;
2914 while (pPatchPrev)
2915 {
2916 if (pPatchPrev->pNext == pPatch)
2917 {
2918 pPatchPrev->pNext = pPatch->pNext;
2919 break;
2920 }
2921 pPatchPrev = pPatchPrev->pNext;
2922 }
2923 Assert(!pPatchPrev);
2924 }
2925 else
2926 pDevExt->pIdtPatches = pPatch->pNext;
2927 pPatch->pNext = NULL;
2928
2929
2930 /*
2931 * Verify and restore the IDT.
2932 */
2933 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2934 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->SavedIdt);
2935 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->SavedIdt, sizeof(pPatch->SavedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2936
2937 /*
2938 * Put it in the free list.
2939 * (This free list stuff is to calm my paranoia.)
2940 */
2941 pPatch->pvIdt = NULL;
2942 pPatch->pIdtEntry = NULL;
2943
2944 pPatch->pNext = pDevExt->pIdtPatchesFree;
2945 pDevExt->pIdtPatchesFree = pPatch;
2946}
2947
2948
2949/**
2950 * Write to an IDT entry.
2951 *
2952 * @param pvIdtEntry Where to write.
2953 * @param pNewIDTEntry What to write.
2954 */
2955static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry)
2956{
2957 RTR0UINTREG uCR0;
2958 RTR0UINTREG uFlags;
2959
2960 /*
2961 * On SMP machines (P4 hyperthreading included) we must preform a
2962 * 64-bit locked write when updating the IDT entry.
2963 *
2964 * The F00F bugfix for linux (and probably other OSes) causes
2965 * the IDT to be pointing to an readonly mapping. We get around that
2966 * by temporarily turning of WP. Since we're inside a spinlock at this
2967 * point, interrupts are disabled and there isn't any way the WP bit
2968 * flipping can cause any trouble.
2969 */
2970
2971 /* Save & Clear interrupt flag; Save & clear WP. */
2972 uFlags = ASMGetFlags();
2973 ASMSetFlags(uFlags & ~(RTR0UINTREG)(1 << 9)); /*X86_EFL_IF*/
2974 Assert(!(ASMGetFlags() & (1 << 9)));
2975 uCR0 = ASMGetCR0();
2976 ASMSetCR0(uCR0 & ~(RTR0UINTREG)(1 << 16)); /*X86_CR0_WP*/
2977
2978 /* Update IDT Entry */
2979#ifdef RT_ARCH_AMD64
2980 ASMAtomicXchgU128((volatile uint128_t *)pvIdtEntry, *(uint128_t *)(uintptr_t)pNewIDTEntry);
2981#else
2982 ASMAtomicXchgU64((volatile uint64_t *)pvIdtEntry, *(uint64_t *)(uintptr_t)pNewIDTEntry);
2983#endif
2984
2985 /* Restore CR0 & Flags */
2986 ASMSetCR0(uCR0);
2987 ASMSetFlags(uFlags);
2988}
2989#endif /* VBOX_WITH_IDT_PATCHING */
2990
2991
2992/**
2993 * Opens an image. If it's the first time it's opened the call must upload
2994 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
2995 *
2996 * This is the 1st step of the loading.
2997 *
2998 * @returns IPRT status code.
2999 * @param pDevExt Device globals.
3000 * @param pSession Session data.
3001 * @param pReq The open request.
3002 */
3003static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
3004{
3005 PSUPDRVLDRIMAGE pImage;
3006 unsigned cb;
3007 void *pv;
3008 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImage=%d\n", pReq->u.In.szName, pReq->u.In.cbImage));
3009
3010 /*
3011 * Check if we got an instance of the image already.
3012 */
3013 RTSemFastMutexRequest(pDevExt->mtxLdr);
3014 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
3015 {
3016 if (!strcmp(pImage->szName, pReq->u.In.szName))
3017 {
3018 pImage->cUsage++;
3019 pReq->u.Out.pvImageBase = pImage->pvImage;
3020 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
3021 supdrvLdrAddUsage(pSession, pImage);
3022 RTSemFastMutexRelease(pDevExt->mtxLdr);
3023 return VINF_SUCCESS;
3024 }
3025 }
3026 /* (not found - add it!) */
3027
3028 /*
3029 * Allocate memory.
3030 */
3031 cb = pReq->u.In.cbImage + sizeof(SUPDRVLDRIMAGE) + 31;
3032 pv = RTMemExecAlloc(cb);
3033 if (!pv)
3034 {
3035 RTSemFastMutexRelease(pDevExt->mtxLdr);
3036 Log(("supdrvIOCtl_LdrOpen: RTMemExecAlloc(%u) failed\n", cb));
3037 return VERR_NO_MEMORY;
3038 }
3039
3040 /*
3041 * Setup and link in the LDR stuff.
3042 */
3043 pImage = (PSUPDRVLDRIMAGE)pv;
3044 pImage->pvImage = RT_ALIGN_P(pImage + 1, 32);
3045 pImage->cbImage = pReq->u.In.cbImage;
3046 pImage->pfnModuleInit = NULL;
3047 pImage->pfnModuleTerm = NULL;
3048 pImage->uState = SUP_IOCTL_LDR_OPEN;
3049 pImage->cUsage = 1;
3050 strcpy(pImage->szName, pReq->u.In.szName);
3051
3052 pImage->pNext = pDevExt->pLdrImages;
3053 pDevExt->pLdrImages = pImage;
3054
3055 supdrvLdrAddUsage(pSession, pImage);
3056
3057 pReq->u.Out.pvImageBase = pImage->pvImage;
3058 pReq->u.Out.fNeedsLoading = true;
3059 RTSemFastMutexRelease(pDevExt->mtxLdr);
3060 return VINF_SUCCESS;
3061}
3062
3063
3064/**
3065 * Loads the image bits.
3066 *
3067 * This is the 2nd step of the loading.
3068 *
3069 * @returns IPRT status code.
3070 * @param pDevExt Device globals.
3071 * @param pSession Session data.
3072 * @param pReq The request.
3073 */
3074static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
3075{
3076 PSUPDRVLDRUSAGE pUsage;
3077 PSUPDRVLDRIMAGE pImage;
3078 int rc;
3079 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImage=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImage));
3080
3081 /*
3082 * Find the ldr image.
3083 */
3084 RTSemFastMutexRequest(pDevExt->mtxLdr);
3085 pUsage = pSession->pLdrUsage;
3086 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3087 pUsage = pUsage->pNext;
3088 if (!pUsage)
3089 {
3090 RTSemFastMutexRelease(pDevExt->mtxLdr);
3091 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
3092 return VERR_INVALID_HANDLE;
3093 }
3094 pImage = pUsage->pImage;
3095 if (pImage->cbImage != pReq->u.In.cbImage)
3096 {
3097 RTSemFastMutexRelease(pDevExt->mtxLdr);
3098 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load)\n", pImage->cbImage, pReq->u.In.cbImage));
3099 return VERR_INVALID_HANDLE;
3100 }
3101 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
3102 {
3103 unsigned uState = pImage->uState;
3104 RTSemFastMutexRelease(pDevExt->mtxLdr);
3105 if (uState != SUP_IOCTL_LDR_LOAD)
3106 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
3107 return SUPDRV_ERR_ALREADY_LOADED;
3108 }
3109 switch (pReq->u.In.eEPType)
3110 {
3111 case SUPLDRLOADEP_NOTHING:
3112 break;
3113 case SUPLDRLOADEP_VMMR0:
3114 if ( !pReq->u.In.EP.VMMR0.pvVMMR0
3115 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryInt
3116 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryFast
3117 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryEx)
3118 {
3119 RTSemFastMutexRelease(pDevExt->mtxLdr);
3120 Log(("NULL pointer: pvVMMR0=%p pvVMMR0EntryInt=%p pvVMMR0EntryFast=%p pvVMMR0EntryEx=%p!\n",
3121 pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3122 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3123 return VERR_INVALID_PARAMETER;
3124 }
3125 if ( (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryInt - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3126 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryFast - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3127 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryEx - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3128 {
3129 RTSemFastMutexRelease(pDevExt->mtxLdr);
3130 Log(("Out of range (%p LB %#x): pvVMMR0EntryInt=%p, pvVMMR0EntryFast=%p or pvVMMR0EntryEx=%p is NULL!\n",
3131 pImage->pvImage, pReq->u.In.cbImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3132 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3133 return VERR_INVALID_PARAMETER;
3134 }
3135 break;
3136 default:
3137 RTSemFastMutexRelease(pDevExt->mtxLdr);
3138 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
3139 return VERR_INVALID_PARAMETER;
3140 }
3141 if ( pReq->u.In.pfnModuleInit
3142 && (uintptr_t)pReq->u.In.pfnModuleInit - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3143 {
3144 RTSemFastMutexRelease(pDevExt->mtxLdr);
3145 Log(("SUP_IOCTL_LDR_LOAD: pfnModuleInit=%p is outside the image (%p %d bytes)\n",
3146 pReq->u.In.pfnModuleInit, pImage->pvImage, pReq->u.In.cbImage));
3147 return VERR_INVALID_PARAMETER;
3148 }
3149 if ( pReq->u.In.pfnModuleTerm
3150 && (uintptr_t)pReq->u.In.pfnModuleTerm - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3151 {
3152 RTSemFastMutexRelease(pDevExt->mtxLdr);
3153 Log(("SUP_IOCTL_LDR_LOAD: pfnModuleTerm=%p is outside the image (%p %d bytes)\n",
3154 pReq->u.In.pfnModuleTerm, pImage->pvImage, pReq->u.In.cbImage));
3155 return VERR_INVALID_PARAMETER;
3156 }
3157
3158 /*
3159 * Copy the memory.
3160 */
3161 /* no need to do try/except as this is a buffered request. */
3162 memcpy(pImage->pvImage, &pReq->u.In.achImage[0], pImage->cbImage);
3163 pImage->uState = SUP_IOCTL_LDR_LOAD;
3164 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
3165 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
3166 pImage->offSymbols = pReq->u.In.offSymbols;
3167 pImage->cSymbols = pReq->u.In.cSymbols;
3168 pImage->offStrTab = pReq->u.In.offStrTab;
3169 pImage->cbStrTab = pReq->u.In.cbStrTab;
3170
3171 /*
3172 * Update any entry points.
3173 */
3174 switch (pReq->u.In.eEPType)
3175 {
3176 default:
3177 case SUPLDRLOADEP_NOTHING:
3178 rc = VINF_SUCCESS;
3179 break;
3180 case SUPLDRLOADEP_VMMR0:
3181 rc = supdrvLdrSetR0EP(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3182 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
3183 break;
3184 }
3185
3186 /*
3187 * On success call the module initialization.
3188 */
3189 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
3190 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
3191 {
3192 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
3193 rc = pImage->pfnModuleInit();
3194 if (rc && pDevExt->pvVMMR0 == pImage->pvImage)
3195 supdrvLdrUnsetR0EP(pDevExt);
3196 }
3197
3198 if (rc)
3199 pImage->uState = SUP_IOCTL_LDR_OPEN;
3200
3201 RTSemFastMutexRelease(pDevExt->mtxLdr);
3202 return rc;
3203}
3204
3205
3206/**
3207 * Frees a previously loaded (prep'ed) image.
3208 *
3209 * @returns IPRT status code.
3210 * @param pDevExt Device globals.
3211 * @param pSession Session data.
3212 * @param pReq The request.
3213 */
3214static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
3215{
3216 int rc;
3217 PSUPDRVLDRUSAGE pUsagePrev;
3218 PSUPDRVLDRUSAGE pUsage;
3219 PSUPDRVLDRIMAGE pImage;
3220 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
3221
3222 /*
3223 * Find the ldr image.
3224 */
3225 RTSemFastMutexRequest(pDevExt->mtxLdr);
3226 pUsagePrev = NULL;
3227 pUsage = pSession->pLdrUsage;
3228 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3229 {
3230 pUsagePrev = pUsage;
3231 pUsage = pUsage->pNext;
3232 }
3233 if (!pUsage)
3234 {
3235 RTSemFastMutexRelease(pDevExt->mtxLdr);
3236 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
3237 return VERR_INVALID_HANDLE;
3238 }
3239
3240 /*
3241 * Check if we can remove anything.
3242 */
3243 rc = VINF_SUCCESS;
3244 pImage = pUsage->pImage;
3245 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
3246 {
3247 /*
3248 * Check if there are any objects with destructors in the image, if
3249 * so leave it for the session cleanup routine so we get a chance to
3250 * clean things up in the right order and not leave them all dangling.
3251 */
3252 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3253 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3254 if (pImage->cUsage <= 1)
3255 {
3256 PSUPDRVOBJ pObj;
3257 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3258 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3259 {
3260 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3261 break;
3262 }
3263 }
3264 else
3265 {
3266 PSUPDRVUSAGE pGenUsage;
3267 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
3268 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3269 {
3270 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3271 break;
3272 }
3273 }
3274 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3275 if (rc == VINF_SUCCESS)
3276 {
3277 /* unlink it */
3278 if (pUsagePrev)
3279 pUsagePrev->pNext = pUsage->pNext;
3280 else
3281 pSession->pLdrUsage = pUsage->pNext;
3282
3283 /* free it */
3284 pUsage->pImage = NULL;
3285 pUsage->pNext = NULL;
3286 RTMemFree(pUsage);
3287
3288 /*
3289 * Derefrence the image.
3290 */
3291 if (pImage->cUsage <= 1)
3292 supdrvLdrFree(pDevExt, pImage);
3293 else
3294 pImage->cUsage--;
3295 }
3296 else
3297 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
3298 }
3299 else
3300 {
3301 /*
3302 * Dereference both image and usage.
3303 */
3304 pImage->cUsage--;
3305 pUsage->cUsage--;
3306 }
3307
3308 RTSemFastMutexRelease(pDevExt->mtxLdr);
3309 return VINF_SUCCESS;
3310}
3311
3312
3313/**
3314 * Gets the address of a symbol in an open image.
3315 *
3316 * @returns 0 on success.
3317 * @returns SUPDRV_ERR_* on failure.
3318 * @param pDevExt Device globals.
3319 * @param pSession Session data.
3320 * @param pReq The request buffer.
3321 */
3322static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
3323{
3324 PSUPDRVLDRIMAGE pImage;
3325 PSUPDRVLDRUSAGE pUsage;
3326 uint32_t i;
3327 PSUPLDRSYM paSyms;
3328 const char *pchStrings;
3329 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
3330 void *pvSymbol = NULL;
3331 int rc = VERR_GENERAL_FAILURE;
3332 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
3333
3334 /*
3335 * Find the ldr image.
3336 */
3337 RTSemFastMutexRequest(pDevExt->mtxLdr);
3338 pUsage = pSession->pLdrUsage;
3339 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3340 pUsage = pUsage->pNext;
3341 if (!pUsage)
3342 {
3343 RTSemFastMutexRelease(pDevExt->mtxLdr);
3344 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
3345 return VERR_INVALID_HANDLE;
3346 }
3347 pImage = pUsage->pImage;
3348 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
3349 {
3350 unsigned uState = pImage->uState;
3351 RTSemFastMutexRelease(pDevExt->mtxLdr);
3352 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
3353 return VERR_ALREADY_LOADED;
3354 }
3355
3356 /*
3357 * Search the symbol string.
3358 */
3359 pchStrings = (const char *)((uint8_t *)pImage->pvImage + pImage->offStrTab);
3360 paSyms = (PSUPLDRSYM)((uint8_t *)pImage->pvImage + pImage->offSymbols);
3361 for (i = 0; i < pImage->cSymbols; i++)
3362 {
3363 if ( paSyms[i].offSymbol < pImage->cbImage /* paranoia */
3364 && paSyms[i].offName + cbSymbol <= pImage->cbStrTab
3365 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
3366 {
3367 pvSymbol = (uint8_t *)pImage->pvImage + paSyms[i].offSymbol;
3368 rc = VINF_SUCCESS;
3369 break;
3370 }
3371 }
3372 RTSemFastMutexRelease(pDevExt->mtxLdr);
3373 pReq->u.Out.pvSymbol = pvSymbol;
3374 return rc;
3375}
3376
3377
3378/**
3379 * Updates the IDT patches to point to the specified VMM R0 entry
3380 * point (i.e. VMMR0Enter()).
3381 *
3382 * @returns IPRT status code.
3383 * @param pDevExt Device globals.
3384 * @param pSession Session data.
3385 * @param pVMMR0 VMMR0 image handle.
3386 * @param pvVMMR0EntryInt VMMR0EntryInt address.
3387 * @param pvVMMR0EntryFast VMMR0EntryFast address.
3388 * @param pvVMMR0EntryEx VMMR0EntryEx address.
3389 * @remark Caller must own the loader mutex.
3390 */
3391static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
3392{
3393 int rc = VINF_SUCCESS;
3394 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
3395
3396
3397 /*
3398 * Check if not yet set.
3399 */
3400 if (!pDevExt->pvVMMR0)
3401 {
3402#ifdef VBOX_WITH_IDT_PATCHING
3403 PSUPDRVPATCH pPatch;
3404#endif
3405
3406 /*
3407 * Set it and update IDT patch code.
3408 */
3409 pDevExt->pvVMMR0 = pvVMMR0;
3410 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
3411 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
3412 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
3413#ifdef VBOX_WITH_IDT_PATCHING
3414 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3415 {
3416# ifdef RT_ARCH_AMD64
3417 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup], (uint64_t)pvVMMR0);
3418# else /* RT_ARCH_X86 */
3419 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3420 (uint32_t)pvVMMR0 - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3421# endif
3422 }
3423#endif /* VBOX_WITH_IDT_PATCHING */
3424 }
3425 else
3426 {
3427 /*
3428 * Return failure or success depending on whether the values match or not.
3429 */
3430 if ( pDevExt->pvVMMR0 != pvVMMR0
3431 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
3432 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
3433 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
3434 {
3435 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
3436 rc = VERR_INVALID_PARAMETER;
3437 }
3438 }
3439 return rc;
3440}
3441
3442
3443/**
3444 * Unsets the R0 entry point installed by supdrvLdrSetR0EP.
3445 *
3446 * @param pDevExt Device globals.
3447 */
3448static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt)
3449{
3450#ifdef VBOX_WITH_IDT_PATCHING
3451 PSUPDRVPATCH pPatch;
3452#endif
3453
3454 pDevExt->pvVMMR0 = NULL;
3455 pDevExt->pfnVMMR0EntryInt = NULL;
3456 pDevExt->pfnVMMR0EntryFast = NULL;
3457 pDevExt->pfnVMMR0EntryEx = NULL;
3458
3459#ifdef VBOX_WITH_IDT_PATCHING
3460 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3461 {
3462# ifdef RT_ARCH_AMD64
3463 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3464 (uint64_t)&pPatch->auCode[pPatch->offStub]);
3465# else /* RT_ARCH_X86 */
3466 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3467 (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3468# endif
3469 }
3470#endif /* VBOX_WITH_IDT_PATCHING */
3471}
3472
3473
3474/**
3475 * Adds a usage reference in the specified session of an image.
3476 *
3477 * @param pSession Session in question.
3478 * @param pImage Image which the session is using.
3479 */
3480static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
3481{
3482 PSUPDRVLDRUSAGE pUsage;
3483 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
3484
3485 /*
3486 * Referenced it already?
3487 */
3488 pUsage = pSession->pLdrUsage;
3489 while (pUsage)
3490 {
3491 if (pUsage->pImage == pImage)
3492 {
3493 pUsage->cUsage++;
3494 return;
3495 }
3496 pUsage = pUsage->pNext;
3497 }
3498
3499 /*
3500 * Allocate new usage record.
3501 */
3502 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
3503 Assert(pUsage);
3504 if (pUsage)
3505 {
3506 pUsage->cUsage = 1;
3507 pUsage->pImage = pImage;
3508 pUsage->pNext = pSession->pLdrUsage;
3509 pSession->pLdrUsage = pUsage;
3510 }
3511 /* ignore errors... */
3512}
3513
3514
3515/**
3516 * Frees a load image.
3517 *
3518 * @param pDevExt Pointer to device extension.
3519 * @param pImage Pointer to the image we're gonna free.
3520 * This image must exit!
3521 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
3522 */
3523static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
3524{
3525 PSUPDRVLDRIMAGE pImagePrev;
3526 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
3527
3528 /* find it - arg. should've used doubly linked list. */
3529 Assert(pDevExt->pLdrImages);
3530 pImagePrev = NULL;
3531 if (pDevExt->pLdrImages != pImage)
3532 {
3533 pImagePrev = pDevExt->pLdrImages;
3534 while (pImagePrev->pNext != pImage)
3535 pImagePrev = pImagePrev->pNext;
3536 Assert(pImagePrev->pNext == pImage);
3537 }
3538
3539 /* unlink */
3540 if (pImagePrev)
3541 pImagePrev->pNext = pImage->pNext;
3542 else
3543 pDevExt->pLdrImages = pImage->pNext;
3544
3545 /* check if this is VMMR0.r0 and fix the Idt patches if it is. */
3546 if (pDevExt->pvVMMR0 == pImage->pvImage)
3547 supdrvLdrUnsetR0EP(pDevExt);
3548
3549 /* check for objects with destructors in this image. (Shouldn't happen.) */
3550 if (pDevExt->pObjs)
3551 {
3552 unsigned cObjs = 0;
3553 PSUPDRVOBJ pObj;
3554 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3555 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3556 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3557 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3558 {
3559 pObj->pfnDestructor = NULL;
3560 cObjs++;
3561 }
3562 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3563 if (cObjs)
3564 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
3565 }
3566
3567 /* call termination function if fully loaded. */
3568 if ( pImage->pfnModuleTerm
3569 && pImage->uState == SUP_IOCTL_LDR_LOAD)
3570 {
3571 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
3572 pImage->pfnModuleTerm();
3573 }
3574
3575 /* free the image */
3576 pImage->cUsage = 0;
3577 pImage->pNext = 0;
3578 pImage->uState = SUP_IOCTL_LDR_FREE;
3579 RTMemExecFree(pImage);
3580}
3581
3582
3583/**
3584 * Gets the current paging mode of the CPU and stores in in pOut.
3585 */
3586static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void)
3587{
3588 SUPPAGINGMODE enmMode;
3589
3590 RTR0UINTREG cr0 = ASMGetCR0();
3591 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3592 enmMode = SUPPAGINGMODE_INVALID;
3593 else
3594 {
3595 RTR0UINTREG cr4 = ASMGetCR4();
3596 uint32_t fNXEPlusLMA = 0;
3597 if (cr4 & X86_CR4_PAE)
3598 {
3599 uint32_t fAmdFeatures = ASMCpuId_EDX(0x80000001);
3600 if (fAmdFeatures & (X86_CPUID_AMD_FEATURE_EDX_NX | X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
3601 {
3602 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3603 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3604 fNXEPlusLMA |= RT_BIT(0);
3605 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3606 fNXEPlusLMA |= RT_BIT(1);
3607 }
3608 }
3609
3610 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3611 {
3612 case 0:
3613 enmMode = SUPPAGINGMODE_32_BIT;
3614 break;
3615
3616 case X86_CR4_PGE:
3617 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3618 break;
3619
3620 case X86_CR4_PAE:
3621 enmMode = SUPPAGINGMODE_PAE;
3622 break;
3623
3624 case X86_CR4_PAE | RT_BIT(0):
3625 enmMode = SUPPAGINGMODE_PAE_NX;
3626 break;
3627
3628 case X86_CR4_PAE | X86_CR4_PGE:
3629 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3630 break;
3631
3632 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3633 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3634 break;
3635
3636 case RT_BIT(1) | X86_CR4_PAE:
3637 enmMode = SUPPAGINGMODE_AMD64;
3638 break;
3639
3640 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3641 enmMode = SUPPAGINGMODE_AMD64_NX;
3642 break;
3643
3644 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3645 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3646 break;
3647
3648 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3649 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3650 break;
3651
3652 default:
3653 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3654 enmMode = SUPPAGINGMODE_INVALID;
3655 break;
3656 }
3657 }
3658 return enmMode;
3659}
3660
3661
3662/**
3663 * Creates the GIP.
3664 *
3665 * @returns negative errno.
3666 * @param pDevExt Instance data. GIP stuff may be updated.
3667 */
3668static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
3669{
3670 PSUPGLOBALINFOPAGE pGip;
3671 RTHCPHYS HCPhysGip;
3672 uint32_t u32SystemResolution;
3673 uint32_t u32Interval;
3674 int rc;
3675
3676 LogFlow(("supdrvGipCreate:\n"));
3677
3678 /* assert order */
3679 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
3680 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
3681 Assert(!pDevExt->pGipTimer);
3682
3683 /*
3684 * Allocate a suitable page with a default kernel mapping.
3685 */
3686 rc = RTR0MemObjAllocLow(&pDevExt->GipMemObj, PAGE_SIZE, false);
3687 if (RT_FAILURE(rc))
3688 {
3689 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
3690 return rc;
3691 }
3692 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
3693 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
3694
3695#if 0 /** @todo Disabled this as we didn't used to do it before and causes unnecessary stress on laptops.
3696 * It only applies to Windows and should probably revisited later, if possible made part of the
3697 * timer code (return min granularity in RTTimerGetSystemGranularity and set it in RTTimerStart). */
3698 /*
3699 * Try bump up the system timer resolution.
3700 * The more interrupts the better...
3701 */
3702 if ( RT_SUCCESS(RTTimerRequestSystemGranularity( 488281 /* 2048 HZ */, &u32SystemResolution))
3703 || RT_SUCCESS(RTTimerRequestSystemGranularity( 500000 /* 2000 HZ */, &u32SystemResolution))
3704 || RT_SUCCESS(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3705 || RT_SUCCESS(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3706 || RT_SUCCESS(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
3707 || RT_SUCCESS(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
3708 || RT_SUCCESS(RTTimerRequestSystemGranularity( 3906250 /* 256 HZ */, &u32SystemResolution))
3709 || RT_SUCCESS(RTTimerRequestSystemGranularity( 4000000 /* 250 HZ */, &u32SystemResolution))
3710 || RT_SUCCESS(RTTimerRequestSystemGranularity( 7812500 /* 128 HZ */, &u32SystemResolution))
3711 || RT_SUCCESS(RTTimerRequestSystemGranularity(10000000 /* 100 HZ */, &u32SystemResolution))
3712 || RT_SUCCESS(RTTimerRequestSystemGranularity(15625000 /* 64 HZ */, &u32SystemResolution))
3713 || RT_SUCCESS(RTTimerRequestSystemGranularity(31250000 /* 32 HZ */, &u32SystemResolution))
3714 )
3715 {
3716 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3717 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3718 }
3719#endif
3720
3721 /*
3722 * Find a reasonable update interval and initialize the structure.
3723 */
3724 u32Interval = u32SystemResolution = RTTimerGetSystemGranularity();
3725 while (u32Interval < 10000000 /* 10 ms */)
3726 u32Interval += u32SystemResolution;
3727
3728 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), 1000000000 / u32Interval /*=Hz*/);
3729
3730 /*
3731 * Create the timer.
3732 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
3733 */
3734 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
3735 {
3736 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer, pDevExt);
3737 if (rc == VERR_NOT_SUPPORTED)
3738 {
3739 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
3740 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
3741 }
3742 }
3743 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
3744 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0, supdrvGipSyncTimer, pDevExt);
3745 if (RT_SUCCESS(rc))
3746 {
3747 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
3748 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
3749 if (RT_SUCCESS(rc))
3750 {
3751 /*
3752 * We're good.
3753 */
3754 dprintf(("supdrvGipCreate: %ld ns interval.\n", (long)u32Interval));
3755 return VINF_SUCCESS;
3756 }
3757
3758 OSDBGPRINT(("supdrvGipCreate: failed register MP event notfication. rc=%d\n", rc));
3759 }
3760 else
3761 {
3762 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %ld ns interval. rc=%d\n", (long)u32Interval, rc));
3763 Assert(!pDevExt->pGipTimer);
3764 }
3765 supdrvGipDestroy(pDevExt);
3766 return rc;
3767}
3768
3769
3770/**
3771 * Terminates the GIP.
3772 *
3773 * @param pDevExt Instance data. GIP stuff may be updated.
3774 */
3775static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
3776{
3777 int rc;
3778#ifdef DEBUG_DARWIN_GIP
3779 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
3780 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
3781 pDevExt->pGipTimer, pDevExt->GipMemObj));
3782#endif
3783
3784 /*
3785 * Invalid the GIP data.
3786 */
3787 if (pDevExt->pGip)
3788 {
3789 supdrvGipTerm(pDevExt->pGip);
3790 pDevExt->pGip = NULL;
3791 }
3792
3793 /*
3794 * Destroy the timer and free the GIP memory object.
3795 */
3796 if (pDevExt->pGipTimer)
3797 {
3798 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
3799 pDevExt->pGipTimer = NULL;
3800 }
3801
3802 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
3803 {
3804 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
3805 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
3806 }
3807
3808 /*
3809 * Finally, release the system timer resolution request if one succeeded.
3810 */
3811 if (pDevExt->u32SystemTimerGranularityGrant)
3812 {
3813 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
3814 pDevExt->u32SystemTimerGranularityGrant = 0;
3815 }
3816}
3817
3818
3819/**
3820 * Timer callback function sync GIP mode.
3821 * @param pTimer The timer.
3822 * @param pvUser The device extension.
3823 */
3824static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
3825{
3826 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3827 supdrvGipUpdate(pDevExt->pGip, RTTimeSystemNanoTS());
3828}
3829
3830
3831/**
3832 * Timer callback function for async GIP mode.
3833 * @param pTimer The timer.
3834 * @param pvUser The device extension.
3835 */
3836static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
3837{
3838 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3839 RTCPUID idCpu = RTMpCpuId();
3840 uint64_t NanoTS = RTTimeSystemNanoTS();
3841
3842 /** @todo reset the transaction number and whatnot when iTick == 1. */
3843 if (pDevExt->idGipMaster == idCpu)
3844 supdrvGipUpdate(pDevExt->pGip, NanoTS);
3845 else
3846 supdrvGipUpdatePerCpu(pDevExt->pGip, NanoTS, ASMGetApicId());
3847}
3848
3849
3850/**
3851 * Multiprocessor event notification callback.
3852 *
3853 * This is used to make sue that the GIP master gets passed on to
3854 * another CPU.
3855 *
3856 * @param enmEvent The event.
3857 * @param idCpu The cpu it applies to.
3858 * @param pvUser Pointer to the device extension.
3859 */
3860static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
3861{
3862 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3863 if (enmEvent == RTMPEVENT_OFFLINE)
3864 {
3865 RTCPUID idGipMaster;
3866 ASMAtomicReadSize(&pDevExt->idGipMaster, &idGipMaster);
3867 if (idGipMaster == idCpu)
3868 {
3869 /*
3870 * Find a new GIP master.
3871 */
3872 bool fIgnored;
3873 unsigned i;
3874 RTCPUID idNewGipMaster = NIL_RTCPUID;
3875 RTCPUSET OnlineCpus;
3876 RTMpGetOnlineSet(&OnlineCpus);
3877
3878 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
3879 {
3880 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
3881 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
3882 && idCurCpu != idGipMaster)
3883 {
3884 idNewGipMaster = idCurCpu;
3885 break;
3886 }
3887 }
3888
3889 dprintf(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
3890 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
3891 NOREF(fIgnored);
3892 }
3893 }
3894}
3895
3896
3897/**
3898 * Initializes the GIP data.
3899 *
3900 * @returns IPRT status code.
3901 * @param pDevExt Pointer to the device instance data.
3902 * @param pGip Pointer to the read-write kernel mapping of the GIP.
3903 * @param HCPhys The physical address of the GIP.
3904 * @param u64NanoTS The current nanosecond timestamp.
3905 * @param uUpdateHz The update freqence.
3906 */
3907int VBOXCALL supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS, unsigned uUpdateHz)
3908{
3909 unsigned i;
3910#ifdef DEBUG_DARWIN_GIP
3911 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3912#else
3913 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3914#endif
3915
3916 /*
3917 * Initialize the structure.
3918 */
3919 memset(pGip, 0, PAGE_SIZE);
3920 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
3921 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
3922 pGip->u32Mode = supdrvGipDeterminTscMode(pDevExt);
3923 pGip->u32UpdateHz = uUpdateHz;
3924 pGip->u32UpdateIntervalNS = 1000000000 / uUpdateHz;
3925 pGip->u64NanoTSLastUpdateHz = u64NanoTS;
3926
3927 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
3928 {
3929 pGip->aCPUs[i].u32TransactionId = 2;
3930 pGip->aCPUs[i].u64NanoTS = u64NanoTS;
3931 pGip->aCPUs[i].u64TSC = ASMReadTSC();
3932
3933 /*
3934 * We don't know the following values until we've executed updates.
3935 * So, we'll just insert very high values.
3936 */
3937 pGip->aCPUs[i].u64CpuHz = _4G + 1;
3938 pGip->aCPUs[i].u32UpdateIntervalTSC = _2G / 4;
3939 pGip->aCPUs[i].au32TSCHistory[0] = _2G / 4;
3940 pGip->aCPUs[i].au32TSCHistory[1] = _2G / 4;
3941 pGip->aCPUs[i].au32TSCHistory[2] = _2G / 4;
3942 pGip->aCPUs[i].au32TSCHistory[3] = _2G / 4;
3943 pGip->aCPUs[i].au32TSCHistory[4] = _2G / 4;
3944 pGip->aCPUs[i].au32TSCHistory[5] = _2G / 4;
3945 pGip->aCPUs[i].au32TSCHistory[6] = _2G / 4;
3946 pGip->aCPUs[i].au32TSCHistory[7] = _2G / 4;
3947 }
3948
3949 /*
3950 * Link it to the device extension.
3951 */
3952 pDevExt->pGip = pGip;
3953 pDevExt->HCPhysGip = HCPhys;
3954 pDevExt->cGipUsers = 0;
3955
3956 return VINF_SUCCESS;
3957}
3958
3959
3960/**
3961 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
3962 *
3963 * @param idCpu Ignored.
3964 * @param pvUser1 Where to put the TSC.
3965 * @param pvUser2 Ignored.
3966 */
3967static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3968{
3969#if 1
3970 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
3971#else
3972 *(uint64_t *)pvUser1 = ASMReadTSC();
3973#endif
3974}
3975
3976
3977/**
3978 * Determine if Async GIP mode is required because of TSC drift.
3979 *
3980 * When using the default/normal timer code it is essential that the time stamp counter
3981 * (TSC) runs never backwards, that is, a read operation to the counter should return
3982 * a bigger value than any previous read operation. This is guaranteed by the latest
3983 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
3984 * case we have to choose the asynchronous timer mode.
3985 *
3986 * @param poffMin Pointer to the determined difference between different cores.
3987 * @return false if the time stamp counters appear to be synchron, true otherwise.
3988 */
3989bool VBOXCALL supdrvDetermineAsyncTsc(uint64_t *poffMin)
3990{
3991 /*
3992 * Just iterate all the cpus 8 times and make sure that the TSC is
3993 * ever increasing. We don't bother taking TSC rollover into account.
3994 */
3995 RTCPUSET CpuSet;
3996 int iLastCpu = RTCpuLastIndex(RTMpGetSet(&CpuSet));
3997 int iCpu;
3998 int cLoops = 8;
3999 bool fAsync = false;
4000 int rc;
4001 uint64_t offMax = 0;
4002 uint64_t offMin = ~(uint64_t)0;
4003 uint64_t PrevTsc = ASMReadTSC();
4004
4005 while (cLoops-- > 0)
4006 {
4007 for (iCpu = 0; iCpu <= iLastCpu; iCpu++)
4008 {
4009 uint64_t CurTsc;
4010 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
4011 if (RT_SUCCESS(rc))
4012 {
4013 if (CurTsc <= PrevTsc)
4014 {
4015 fAsync = true;
4016 offMin = offMax = PrevTsc - CurTsc;
4017 dprintf(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
4018 iCpu, cLoops, CurTsc, PrevTsc));
4019 break;
4020 }
4021
4022 /* Gather statistics (except the first time). */
4023 if (iCpu != 0 || cLoops != 7)
4024 {
4025 uint64_t off = CurTsc - PrevTsc;
4026 if (off < offMin)
4027 offMin = off;
4028 if (off > offMax)
4029 offMax = off;
4030 dprintf2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
4031 }
4032
4033 /* Next */
4034 PrevTsc = CurTsc;
4035 }
4036 else if (rc == VERR_NOT_SUPPORTED)
4037 break;
4038 else
4039 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
4040 }
4041
4042 /* broke out of the loop. */
4043 if (iCpu <= iLastCpu)
4044 break;
4045 }
4046
4047 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
4048 dprintf(("supdrvDetermineAsyncTsc: returns %d; iLastCpu=%d rc=%d offMin=%llx offMax=%llx\n",
4049 fAsync, iLastCpu, rc, offMin, offMax));
4050#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
4051 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
4052#endif
4053 return fAsync;
4054}
4055
4056
4057/**
4058 * Determin the GIP TSC mode.
4059 *
4060 * @returns The most suitable TSC mode.
4061 * @param pDevExt Pointer to the device instance data.
4062 */
4063static SUPGIPMODE supdrvGipDeterminTscMode(PSUPDRVDEVEXT pDevExt)
4064{
4065 /*
4066 * On SMP we're faced with two problems:
4067 * (1) There might be a skew between the CPU, so that cpu0
4068 * returns a TSC that is sligtly different from cpu1.
4069 * (2) Power management (and other things) may cause the TSC
4070 * to run at a non-constant speed, and cause the speed
4071 * to be different on the cpus. This will result in (1).
4072 *
4073 * So, on SMP systems we'll have to select the ASYNC update method
4074 * if there are symphoms of these problems.
4075 */
4076 if (RTMpGetCount() > 1)
4077 {
4078 uint32_t uEAX, uEBX, uECX, uEDX;
4079 uint64_t u64DiffCoresIgnored;
4080
4081 /* Permit the user and/or the OS specfic bits to force async mode. */
4082 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
4083 return SUPGIPMODE_ASYNC_TSC;
4084
4085 /* Try check for current differences between the cpus. */
4086 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
4087 return SUPGIPMODE_ASYNC_TSC;
4088
4089 /*
4090 * If the CPU supports power management and is an AMD one we
4091 * won't trust it unless it has the TscInvariant bit is set.
4092 */
4093 /* Check for "AuthenticAMD" */
4094 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
4095 if ( uEAX >= 1
4096 && uEBX == X86_CPUID_VENDOR_AMD_EBX
4097 && uECX == X86_CPUID_VENDOR_AMD_ECX
4098 && uEDX == X86_CPUID_VENDOR_AMD_EDX)
4099 {
4100 /* Check for APM support and that TscInvariant is cleared. */
4101 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
4102 if (uEAX >= 0x80000007)
4103 {
4104 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
4105 if ( !(uEDX & RT_BIT(8))/* TscInvariant */
4106 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
4107 return SUPGIPMODE_ASYNC_TSC;
4108 }
4109 }
4110 }
4111 return SUPGIPMODE_SYNC_TSC;
4112}
4113
4114
4115/**
4116 * Invalidates the GIP data upon termination.
4117 *
4118 * @param pGip Pointer to the read-write kernel mapping of the GIP.
4119 */
4120void VBOXCALL supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
4121{
4122 unsigned i;
4123 pGip->u32Magic = 0;
4124 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
4125 {
4126 pGip->aCPUs[i].u64NanoTS = 0;
4127 pGip->aCPUs[i].u64TSC = 0;
4128 pGip->aCPUs[i].iTSCHistoryHead = 0;
4129 }
4130}
4131
4132
4133/**
4134 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
4135 * updates all the per cpu data except the transaction id.
4136 *
4137 * @param pGip The GIP.
4138 * @param pGipCpu Pointer to the per cpu data.
4139 * @param u64NanoTS The current time stamp.
4140 */
4141static void supdrvGipDoUpdateCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
4142{
4143 uint64_t u64TSC;
4144 uint64_t u64TSCDelta;
4145 uint32_t u32UpdateIntervalTSC;
4146 uint32_t u32UpdateIntervalTSCSlack;
4147 unsigned iTSCHistoryHead;
4148 uint64_t u64CpuHz;
4149
4150 /*
4151 * Update the NanoTS.
4152 */
4153 ASMAtomicXchgU64(&pGipCpu->u64NanoTS, u64NanoTS);
4154
4155 /*
4156 * Calc TSC delta.
4157 */
4158 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
4159 u64TSC = ASMReadTSC();
4160 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
4161 ASMAtomicXchgU64(&pGipCpu->u64TSC, u64TSC);
4162
4163 if (u64TSCDelta >> 32)
4164 {
4165 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
4166 pGipCpu->cErrors++;
4167 }
4168
4169 /*
4170 * TSC History.
4171 */
4172 Assert(ELEMENTS(pGipCpu->au32TSCHistory) == 8);
4173
4174 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
4175 ASMAtomicXchgU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
4176 ASMAtomicXchgU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
4177
4178 /*
4179 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
4180 */
4181 if (pGip->u32UpdateHz >= 1000)
4182 {
4183 uint32_t u32;
4184 u32 = pGipCpu->au32TSCHistory[0];
4185 u32 += pGipCpu->au32TSCHistory[1];
4186 u32 += pGipCpu->au32TSCHistory[2];
4187 u32 += pGipCpu->au32TSCHistory[3];
4188 u32 >>= 2;
4189 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
4190 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
4191 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
4192 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
4193 u32UpdateIntervalTSC >>= 2;
4194 u32UpdateIntervalTSC += u32;
4195 u32UpdateIntervalTSC >>= 1;
4196
4197 /* Value choosen for a 2GHz Athlon64 running linux 2.6.10/11, . */
4198 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
4199 }
4200 else if (pGip->u32UpdateHz >= 90)
4201 {
4202 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
4203 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
4204 u32UpdateIntervalTSC >>= 1;
4205
4206 /* value choosen on a 2GHz thinkpad running windows */
4207 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
4208 }
4209 else
4210 {
4211 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
4212
4213 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
4214 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
4215 }
4216 ASMAtomicXchgU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
4217
4218 /*
4219 * CpuHz.
4220 */
4221 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, pGip->u32UpdateHz);
4222 ASMAtomicXchgU64(&pGipCpu->u64CpuHz, u64CpuHz);
4223}
4224
4225
4226/**
4227 * Updates the GIP.
4228 *
4229 * @param pGip Pointer to the GIP.
4230 * @param u64NanoTS The current nanosecond timesamp.
4231 */
4232void VBOXCALL supdrvGipUpdate(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS)
4233{
4234 /*
4235 * Determin the relevant CPU data.
4236 */
4237 PSUPGIPCPU pGipCpu;
4238 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
4239 pGipCpu = &pGip->aCPUs[0];
4240 else
4241 {
4242 unsigned iCpu = ASMGetApicId();
4243 if (RT_LIKELY(iCpu >= RT_ELEMENTS(pGip->aCPUs)))
4244 return;
4245 pGipCpu = &pGip->aCPUs[iCpu];
4246 }
4247
4248 /*
4249 * Start update transaction.
4250 */
4251 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
4252 {
4253 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
4254 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
4255 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4256 pGipCpu->cErrors++;
4257 return;
4258 }
4259
4260 /*
4261 * Recalc the update frequency every 0x800th time.
4262 */
4263 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
4264 {
4265 if (pGip->u64NanoTSLastUpdateHz)
4266 {
4267#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
4268 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
4269 uint32_t u32UpdateHz = (uint32_t)((UINT64_C(1000000000) * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
4270 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
4271 {
4272 ASMAtomicXchgU32(&pGip->u32UpdateHz, u32UpdateHz);
4273 ASMAtomicXchgU32(&pGip->u32UpdateIntervalNS, 1000000000 / u32UpdateHz);
4274 }
4275#endif
4276 }
4277 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS);
4278 }
4279
4280 /*
4281 * Update the data.
4282 */
4283 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4284
4285 /*
4286 * Complete transaction.
4287 */
4288 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4289}
4290
4291
4292/**
4293 * Updates the per cpu GIP data for the calling cpu.
4294 *
4295 * @param pGip Pointer to the GIP.
4296 * @param u64NanoTS The current nanosecond timesamp.
4297 * @param iCpu The CPU index.
4298 */
4299void VBOXCALL supdrvGipUpdatePerCpu(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS, unsigned iCpu)
4300{
4301 PSUPGIPCPU pGipCpu;
4302
4303 if (RT_LIKELY(iCpu < RT_ELEMENTS(pGip->aCPUs)))
4304 {
4305 pGipCpu = &pGip->aCPUs[iCpu];
4306
4307 /*
4308 * Start update transaction.
4309 */
4310 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
4311 {
4312 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
4313 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4314 pGipCpu->cErrors++;
4315 return;
4316 }
4317
4318 /*
4319 * Update the data.
4320 */
4321 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4322
4323 /*
4324 * Complete transaction.
4325 */
4326 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4327 }
4328}
4329
4330
4331#ifndef DEBUG /** @todo change #ifndef DEBUG -> #ifdef LOG_ENABLED */
4332/**
4333 * Stub function for non-debug builds.
4334 */
4335RTDECL(PRTLOGGER) RTLogDefaultInstance(void)
4336{
4337 return NULL;
4338}
4339
4340RTDECL(PRTLOGGER) RTLogRelDefaultInstance(void)
4341{
4342 return NULL;
4343}
4344
4345/**
4346 * Stub function for non-debug builds.
4347 */
4348RTDECL(int) RTLogSetDefaultInstanceThread(PRTLOGGER pLogger, uintptr_t uKey)
4349{
4350 return 0;
4351}
4352
4353/**
4354 * Stub function for non-debug builds.
4355 */
4356RTDECL(void) RTLogLogger(PRTLOGGER pLogger, void *pvCallerRet, const char *pszFormat, ...)
4357{
4358}
4359
4360/**
4361 * Stub function for non-debug builds.
4362 */
4363RTDECL(void) RTLogLoggerEx(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, ...)
4364{
4365}
4366
4367/**
4368 * Stub function for non-debug builds.
4369 */
4370RTDECL(void) RTLogLoggerExV(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, va_list args)
4371{
4372}
4373
4374/**
4375 * Stub function for non-debug builds.
4376 */
4377RTDECL(void) RTLogPrintf(const char *pszFormat, ...)
4378{
4379}
4380
4381/**
4382 * Stub function for non-debug builds.
4383 */
4384RTDECL(void) RTLogPrintfV(const char *pszFormat, va_list args)
4385{
4386}
4387#endif /* !DEBUG */
4388
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette