VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDRVShared.c@ 9656

Last change on this file since 9656 was 9621, checked in by vboxsync, 17 years ago

The TSC checks are now fully done in the generic code.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 151.0 KB
Line 
1/* $Revision: 9621 $ */
2/** @file
3 * VirtualBox Support Driver - Shared code.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 *
26 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
27 * Clara, CA 95054 USA or visit http://www.sun.com if you need
28 * additional information or have any questions.
29 */
30
31
32/*******************************************************************************
33* Header Files *
34*******************************************************************************/
35#include "SUPDRV.h"
36#ifndef PAGE_SHIFT
37# include <iprt/param.h>
38#endif
39#include <iprt/alloc.h>
40#include <iprt/semaphore.h>
41#include <iprt/spinlock.h>
42#include <iprt/thread.h>
43#include <iprt/process.h>
44#include <iprt/mp.h>
45#include <iprt/cpuset.h>
46#include <iprt/log.h>
47/* VBox/x86.h not compatible with the Linux kernel sources */
48#ifdef RT_OS_LINUX
49# define X86_CPUID_VENDOR_AMD_EBX 0x68747541
50# define X86_CPUID_VENDOR_AMD_ECX 0x444d4163
51# define X86_CPUID_VENDOR_AMD_EDX 0x69746e65
52#else
53# include <VBox/x86.h>
54#endif
55
56/*
57 * Logging assignments:
58 * Log - useful stuff, like failures.
59 * LogFlow - program flow, except the really noisy bits.
60 * Log2 - Cleanup and IDTE
61 * Log3 - Loader flow noise.
62 * Log4 - Call VMMR0 flow noise.
63 * Log5 - Native yet-to-be-defined noise.
64 * Log6 - Native ioctl flow noise.
65 *
66 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
67 * instanciation in log-vbox.c(pp).
68 */
69
70
71/*******************************************************************************
72* Defined Constants And Macros *
73*******************************************************************************/
74/* from x86.h - clashes with linux thus this duplication */
75#undef X86_CR0_PG
76#define X86_CR0_PG RT_BIT(31)
77#undef X86_CR0_PE
78#define X86_CR0_PE RT_BIT(0)
79#undef X86_CPUID_AMD_FEATURE_EDX_NX
80#define X86_CPUID_AMD_FEATURE_EDX_NX RT_BIT(20)
81#undef MSR_K6_EFER
82#define MSR_K6_EFER 0xc0000080
83#undef MSR_K6_EFER_NXE
84#define MSR_K6_EFER_NXE RT_BIT(11)
85#undef MSR_K6_EFER_LMA
86#define MSR_K6_EFER_LMA RT_BIT(10)
87#undef X86_CR4_PGE
88#define X86_CR4_PGE RT_BIT(7)
89#undef X86_CR4_PAE
90#define X86_CR4_PAE RT_BIT(5)
91#undef X86_CPUID_AMD_FEATURE_EDX_LONG_MODE
92#define X86_CPUID_AMD_FEATURE_EDX_LONG_MODE RT_BIT(29)
93
94
95/** The frequency by which we recalculate the u32UpdateHz and
96 * u32UpdateIntervalNS GIP members. The value must be a power of 2. */
97#define GIP_UPDATEHZ_RECALC_FREQ 0x800
98
99/**
100 * Validates a session pointer.
101 *
102 * @returns true/false accordingly.
103 * @param pSession The session.
104 */
105#define SUP_IS_SESSION_VALID(pSession) \
106 ( VALID_PTR(pSession) \
107 && pSession->u32Cookie == BIRD_INV)
108
109
110/*******************************************************************************
111* Global Variables *
112*******************************************************************************/
113/**
114 * Array of the R0 SUP API.
115 */
116static SUPFUNC g_aFunctions[] =
117{
118 /* name function */
119 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
120 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
121 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
122 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
123 { "SUPR0LockMem", (void *)SUPR0LockMem },
124 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
125 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
126 { "SUPR0ContFree", (void *)SUPR0ContFree },
127 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
128 { "SUPR0LowFree", (void *)SUPR0LowFree },
129 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
130 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
131 { "SUPR0MemFree", (void *)SUPR0MemFree },
132 { "SUPR0PageAlloc", (void *)SUPR0PageAlloc },
133 { "SUPR0PageFree", (void *)SUPR0PageFree },
134 { "SUPR0Printf", (void *)SUPR0Printf },
135 { "RTMemAlloc", (void *)RTMemAlloc },
136 { "RTMemAllocZ", (void *)RTMemAllocZ },
137 { "RTMemFree", (void *)RTMemFree },
138 /*{ "RTMemDup", (void *)RTMemDup },*/
139 { "RTMemRealloc", (void *)RTMemRealloc },
140 { "RTR0MemObjAllocLow", (void *)RTR0MemObjAllocLow },
141 { "RTR0MemObjAllocPage", (void *)RTR0MemObjAllocPage },
142 { "RTR0MemObjAllocPhys", (void *)RTR0MemObjAllocPhys },
143 { "RTR0MemObjAllocPhysNC", (void *)RTR0MemObjAllocPhysNC },
144 { "RTR0MemObjAllocCont", (void *)RTR0MemObjAllocCont },
145 { "RTR0MemObjLockUser", (void *)RTR0MemObjLockUser },
146 { "RTR0MemObjMapKernel", (void *)RTR0MemObjMapKernel },
147 { "RTR0MemObjMapUser", (void *)RTR0MemObjMapUser },
148 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
149 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
150 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
151 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
152 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
153 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
154/* These don't work yet on linux - use fast mutexes!
155 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
156 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
157 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
158 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
159*/
160 { "RTProcSelf", (void *)RTProcSelf },
161 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
162 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
163 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
164 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
165 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
166 { "RTSemEventCreate", (void *)RTSemEventCreate },
167 { "RTSemEventSignal", (void *)RTSemEventSignal },
168 { "RTSemEventWait", (void *)RTSemEventWait },
169 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
170 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
171 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
172 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
173 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
174 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
175 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
176 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
177 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
178 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
179 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
180 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
181 { "RTSpinlockAcquireNoInts", (void *)RTSpinlockAcquireNoInts },
182 { "RTSpinlockReleaseNoInts", (void *)RTSpinlockReleaseNoInts },
183 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
184 { "RTThreadSleep", (void *)RTThreadSleep },
185 { "RTThreadYield", (void *)RTThreadYield },
186#if 0 /* Thread APIs, Part 2. */
187 { "RTThreadSelf", (void *)RTThreadSelf },
188 { "RTThreadCreate", (void *)RTThreadCreate },
189 { "RTThreadGetNative", (void *)RTThreadGetNative },
190 { "RTThreadWait", (void *)RTThreadWait },
191 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
192 { "RTThreadGetName", (void *)RTThreadGetName },
193 { "RTThreadSelfName", (void *)RTThreadSelfName },
194 { "RTThreadGetType", (void *)RTThreadGetType },
195 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
196 { "RTThreadUserReset", (void *)RTThreadUserReset },
197 { "RTThreadUserWait", (void *)RTThreadUserWait },
198 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
199#endif
200 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
201 { "RTMpCpuId", (void *)RTMpCpuId },
202 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
203 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
204 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
205 { "RTMpGetCount", (void *)RTMpGetCount },
206 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
207 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
208 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
209 { "RTMpGetSet", (void *)RTMpGetSet },
210 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
211 { "RTMpOnAll", (void *)RTMpOnAll },
212 { "RTMpOnOthers", (void *)RTMpOnOthers },
213 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
214 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
215 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
216 { "RTLogLogger", (void *)RTLogLogger },
217 { "RTLogLoggerEx", (void *)RTLogLoggerEx },
218 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
219 { "RTLogPrintf", (void *)RTLogPrintf },
220 { "RTLogPrintfV", (void *)RTLogPrintfV },
221 { "AssertMsg1", (void *)AssertMsg1 },
222 { "AssertMsg2", (void *)AssertMsg2 },
223};
224
225
226/*******************************************************************************
227* Internal Functions *
228*******************************************************************************/
229static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
230static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
231#ifdef VBOX_WITH_IDT_PATCHING
232static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq);
233static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
234static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession);
235static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
236static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry);
237#endif /* VBOX_WITH_IDT_PATCHING */
238static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
239static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
240static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
241static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
242static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
243static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt);
244static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
245static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
246static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void);
247static SUPGIPMODE supdrvGipDeterminTscMode(PSUPDRVDEVEXT pDevExt);
248#ifdef RT_OS_WINDOWS
249static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages);
250static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3);
251#endif
252#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
253static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
254static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
255static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
256static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
257static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
258#endif
259
260
261/**
262 * Initializes the device extentsion structure.
263 *
264 * @returns IPRT status code.
265 * @param pDevExt The device extension to initialize.
266 */
267int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt)
268{
269 /*
270 * Initialize it.
271 */
272 int rc;
273 memset(pDevExt, 0, sizeof(*pDevExt));
274 rc = RTSpinlockCreate(&pDevExt->Spinlock);
275 if (!rc)
276 {
277 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
278 if (!rc)
279 {
280 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
281 if (!rc)
282 {
283#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
284 rc = supdrvGipCreate(pDevExt);
285 if (RT_SUCCESS(rc))
286 {
287 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
288 return VINF_SUCCESS;
289 }
290#else
291 pDevExt->u32Cookie = BIRD;
292 return VINF_SUCCESS;
293#endif
294 }
295 RTSemFastMutexDestroy(pDevExt->mtxLdr);
296 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
297 }
298 RTSpinlockDestroy(pDevExt->Spinlock);
299 pDevExt->Spinlock = NIL_RTSPINLOCK;
300 }
301 return rc;
302}
303
304
305/**
306 * Delete the device extension (e.g. cleanup members).
307 *
308 * @param pDevExt The device extension to delete.
309 */
310void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
311{
312#ifdef VBOX_WITH_IDT_PATCHING
313 PSUPDRVPATCH pPatch;
314#endif
315 PSUPDRVOBJ pObj;
316 PSUPDRVUSAGE pUsage;
317
318 /*
319 * Kill mutexes and spinlocks.
320 */
321 RTSemFastMutexDestroy(pDevExt->mtxGip);
322 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
323 RTSemFastMutexDestroy(pDevExt->mtxLdr);
324 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
325 RTSpinlockDestroy(pDevExt->Spinlock);
326 pDevExt->Spinlock = NIL_RTSPINLOCK;
327
328 /*
329 * Free lists.
330 */
331#ifdef VBOX_WITH_IDT_PATCHING
332 /* patches */
333 /** @todo make sure we don't uninstall patches which has been patched by someone else. */
334 pPatch = pDevExt->pIdtPatchesFree;
335 pDevExt->pIdtPatchesFree = NULL;
336 while (pPatch)
337 {
338 void *pvFree = pPatch;
339 pPatch = pPatch->pNext;
340 RTMemExecFree(pvFree);
341 }
342#endif /* VBOX_WITH_IDT_PATCHING */
343
344 /* objects. */
345 pObj = pDevExt->pObjs;
346#if !defined(DEBUG_bird) || !defined(RT_OS_LINUX) /* breaks unloading, temporary, remove me! */
347 Assert(!pObj); /* (can trigger on forced unloads) */
348#endif
349 pDevExt->pObjs = NULL;
350 while (pObj)
351 {
352 void *pvFree = pObj;
353 pObj = pObj->pNext;
354 RTMemFree(pvFree);
355 }
356
357 /* usage records. */
358 pUsage = pDevExt->pUsageFree;
359 pDevExt->pUsageFree = NULL;
360 while (pUsage)
361 {
362 void *pvFree = pUsage;
363 pUsage = pUsage->pNext;
364 RTMemFree(pvFree);
365 }
366
367#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
368 /* kill the GIP */
369 supdrvGipDestroy(pDevExt);
370#endif
371}
372
373
374/**
375 * Create session.
376 *
377 * @returns IPRT status code.
378 * @param pDevExt Device extension.
379 * @param ppSession Where to store the pointer to the session data.
380 */
381int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION *ppSession)
382{
383 /*
384 * Allocate memory for the session data.
385 */
386 int rc = VERR_NO_MEMORY;
387 PSUPDRVSESSION pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(sizeof(*pSession));
388 if (pSession)
389 {
390 /* Initialize session data. */
391 rc = RTSpinlockCreate(&pSession->Spinlock);
392 if (!rc)
393 {
394 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
395 pSession->pDevExt = pDevExt;
396 pSession->u32Cookie = BIRD_INV;
397 /*pSession->pLdrUsage = NULL;
398 pSession->pPatchUsage = NULL;
399 pSession->pUsage = NULL;
400 pSession->pGip = NULL;
401 pSession->fGipReferenced = false;
402 pSession->Bundle.cUsed = 0 */
403
404 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
405 return VINF_SUCCESS;
406 }
407
408 RTMemFree(pSession);
409 *ppSession = NULL;
410 Log(("Failed to create spinlock, rc=%d!\n", rc));
411 }
412
413 return rc;
414}
415
416
417/**
418 * Shared code for cleaning up a session.
419 *
420 * @param pDevExt Device extension.
421 * @param pSession Session data.
422 * This data will be freed by this routine.
423 */
424void VBOXCALL supdrvCloseSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
425{
426 /*
427 * Cleanup the session first.
428 */
429 supdrvCleanupSession(pDevExt, pSession);
430
431 /*
432 * Free the rest of the session stuff.
433 */
434 RTSpinlockDestroy(pSession->Spinlock);
435 pSession->Spinlock = NIL_RTSPINLOCK;
436 pSession->pDevExt = NULL;
437 RTMemFree(pSession);
438 LogFlow(("supdrvCloseSession: returns\n"));
439}
440
441
442/**
443 * Shared code for cleaning up a session (but not quite freeing it).
444 *
445 * This is primarily intended for MAC OS X where we have to clean up the memory
446 * stuff before the file handle is closed.
447 *
448 * @param pDevExt Device extension.
449 * @param pSession Session data.
450 * This data will be freed by this routine.
451 */
452void VBOXCALL supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
453{
454 PSUPDRVBUNDLE pBundle;
455 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
456
457 /*
458 * Remove logger instances related to this session.
459 */
460 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
461
462#ifdef VBOX_WITH_IDT_PATCHING
463 /*
464 * Uninstall any IDT patches installed for this session.
465 */
466 supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
467#endif
468
469 /*
470 * Release object references made in this session.
471 * In theory there should be noone racing us in this session.
472 */
473 Log2(("release objects - start\n"));
474 if (pSession->pUsage)
475 {
476 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
477 PSUPDRVUSAGE pUsage;
478 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
479
480 while ((pUsage = pSession->pUsage) != NULL)
481 {
482 PSUPDRVOBJ pObj = pUsage->pObj;
483 pSession->pUsage = pUsage->pNext;
484
485 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
486 if (pUsage->cUsage < pObj->cUsage)
487 {
488 pObj->cUsage -= pUsage->cUsage;
489 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
490 }
491 else
492 {
493 /* Destroy the object and free the record. */
494 if (pDevExt->pObjs == pObj)
495 pDevExt->pObjs = pObj->pNext;
496 else
497 {
498 PSUPDRVOBJ pObjPrev;
499 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
500 if (pObjPrev->pNext == pObj)
501 {
502 pObjPrev->pNext = pObj->pNext;
503 break;
504 }
505 Assert(pObjPrev);
506 }
507 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
508
509 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
510 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
511 if (pObj->pfnDestructor)
512 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
513 RTMemFree(pObj);
514 }
515
516 /* free it and continue. */
517 RTMemFree(pUsage);
518
519 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
520 }
521
522 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
523 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
524 }
525 Log2(("release objects - done\n"));
526
527 /*
528 * Release memory allocated in the session.
529 *
530 * We do not serialize this as we assume that the application will
531 * not allocated memory while closing the file handle object.
532 */
533 Log2(("freeing memory:\n"));
534 pBundle = &pSession->Bundle;
535 while (pBundle)
536 {
537 PSUPDRVBUNDLE pToFree;
538 unsigned i;
539
540 /*
541 * Check and unlock all entries in the bundle.
542 */
543 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
544 {
545 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
546 {
547 int rc;
548 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
549 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
550 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
551 {
552 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
553 AssertRC(rc); /** @todo figure out how to handle this. */
554 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
555 }
556 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, false);
557 AssertRC(rc); /** @todo figure out how to handle this. */
558 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
559 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
560 }
561 }
562
563 /*
564 * Advance and free previous bundle.
565 */
566 pToFree = pBundle;
567 pBundle = pBundle->pNext;
568
569 pToFree->pNext = NULL;
570 pToFree->cUsed = 0;
571 if (pToFree != &pSession->Bundle)
572 RTMemFree(pToFree);
573 }
574 Log2(("freeing memory - done\n"));
575
576 /*
577 * Loaded images needs to be dereferenced and possibly freed up.
578 */
579 RTSemFastMutexRequest(pDevExt->mtxLdr);
580 Log2(("freeing images:\n"));
581 if (pSession->pLdrUsage)
582 {
583 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
584 pSession->pLdrUsage = NULL;
585 while (pUsage)
586 {
587 void *pvFree = pUsage;
588 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
589 if (pImage->cUsage > pUsage->cUsage)
590 pImage->cUsage -= pUsage->cUsage;
591 else
592 supdrvLdrFree(pDevExt, pImage);
593 pUsage->pImage = NULL;
594 pUsage = pUsage->pNext;
595 RTMemFree(pvFree);
596 }
597 }
598 RTSemFastMutexRelease(pDevExt->mtxLdr);
599 Log2(("freeing images - done\n"));
600
601 /*
602 * Unmap the GIP.
603 */
604 Log2(("umapping GIP:\n"));
605#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
606 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
607#else
608 if (pSession->pGip)
609#endif
610 {
611 SUPR0GipUnmap(pSession);
612#ifndef USE_NEW_OS_INTERFACE_FOR_GIP
613 pSession->pGip = NULL;
614#endif
615 pSession->fGipReferenced = 0;
616 }
617 Log2(("umapping GIP - done\n"));
618}
619
620
621/**
622 * Fast path I/O Control worker.
623 *
624 * @returns VBox status code that should be passed down to ring-3 unchanged.
625 * @param uIOCtl Function number.
626 * @param pDevExt Device extention.
627 * @param pSession Session data.
628 */
629int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
630{
631 int rc;
632
633 /*
634 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
635 */
636 if (RT_LIKELY(pSession->pVM && pDevExt->pfnVMMR0EntryFast))
637 {
638 switch (uIOCtl)
639 {
640 case SUP_IOCTL_FAST_DO_RAW_RUN:
641 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_RAW_RUN);
642 break;
643 case SUP_IOCTL_FAST_DO_HWACC_RUN:
644 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_HWACC_RUN);
645 break;
646 case SUP_IOCTL_FAST_DO_NOP:
647 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_NOP);
648 break;
649 default:
650 rc = VERR_INTERNAL_ERROR;
651 break;
652 }
653 }
654 else
655 rc = VERR_INTERNAL_ERROR;
656
657 return rc;
658}
659
660
661/**
662 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
663 * We would use strpbrk here if this function would be contained in the RedHat kABI white
664 * list, see http://www.kerneldrivers.org/RHEL5.
665 *
666 * @return 1 if pszStr does contain any character of pszChars, 0 otherwise.
667 * @param pszStr String to check
668 * @param pszChars Character set
669 */
670static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
671{
672 int chCur;
673 while ((chCur = *pszStr++) != '\0')
674 {
675 int ch;
676 const char *psz = pszChars;
677 while ((ch = *psz++) != '\0')
678 if (ch == chCur)
679 return 1;
680
681 }
682 return 0;
683}
684
685
686/**
687 * I/O Control worker.
688 *
689 * @returns 0 on success.
690 * @returns VERR_INVALID_PARAMETER if the request is invalid.
691 *
692 * @param uIOCtl Function number.
693 * @param pDevExt Device extention.
694 * @param pSession Session data.
695 * @param pReqHdr The request header.
696 */
697int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
698{
699 /*
700 * Validate the request.
701 */
702 /* this first check could probably be omitted as its also done by the OS specific code... */
703 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
704 || pReqHdr->cbIn < sizeof(*pReqHdr)
705 || pReqHdr->cbOut < sizeof(*pReqHdr)))
706 {
707 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
708 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
709 return VERR_INVALID_PARAMETER;
710 }
711 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
712 {
713 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
714 {
715 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
716 return VERR_INVALID_PARAMETER;
717 }
718 }
719 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
720 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
721 {
722 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
723 return VERR_INVALID_PARAMETER;
724 }
725
726/*
727 * Validation macros
728 */
729#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
730 do { \
731 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
732 { \
733 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
734 (long)pReq->Hdr.cbIn, (long)(cbInExpect), (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
735 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
736 } \
737 } while (0)
738
739#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
740
741#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
742 do { \
743 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
744 { \
745 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
746 (long)pReq->Hdr.cbIn, (long)(cbInExpect))); \
747 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
748 } \
749 } while (0)
750
751#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
752 do { \
753 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
754 { \
755 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
756 (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
757 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
758 } \
759 } while (0)
760
761#define REQ_CHECK_EXPR(Name, expr) \
762 do { \
763 if (RT_UNLIKELY(!(expr))) \
764 { \
765 OSDBGPRINT(( #Name ": %s\n", #expr)); \
766 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
767 } \
768 } while (0)
769
770#define REQ_CHECK_EXPR_FMT(expr, fmt) \
771 do { \
772 if (RT_UNLIKELY(!(expr))) \
773 { \
774 OSDBGPRINT( fmt ); \
775 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
776 } \
777 } while (0)
778
779
780 /*
781 * The switch.
782 */
783 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
784 {
785 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
786 {
787 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
788 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
789 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
790 {
791 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
792 pReq->Hdr.rc = VERR_INVALID_MAGIC;
793 return 0;
794 }
795
796#if 0
797 /*
798 * Call out to the OS specific code and let it do permission checks on the
799 * client process.
800 */
801 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
802 {
803 pReq->u.Out.u32Cookie = 0xffffffff;
804 pReq->u.Out.u32SessionCookie = 0xffffffff;
805 pReq->u.Out.u32SessionVersion = 0xffffffff;
806 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
807 pReq->u.Out.pSession = NULL;
808 pReq->u.Out.cFunctions = 0;
809 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
810 return 0;
811 }
812#endif
813
814 /*
815 * Match the version.
816 * The current logic is very simple, match the major interface version.
817 */
818 if ( pReq->u.In.u32MinVersion > SUPDRVIOC_VERSION
819 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRVIOC_VERSION & 0xffff0000))
820 {
821 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
822 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRVIOC_VERSION));
823 pReq->u.Out.u32Cookie = 0xffffffff;
824 pReq->u.Out.u32SessionCookie = 0xffffffff;
825 pReq->u.Out.u32SessionVersion = 0xffffffff;
826 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
827 pReq->u.Out.pSession = NULL;
828 pReq->u.Out.cFunctions = 0;
829 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
830 return 0;
831 }
832
833 /*
834 * Fill in return data and be gone.
835 * N.B. The first one to change SUPDRVIOC_VERSION shall makes sure that
836 * u32SessionVersion <= u32ReqVersion!
837 */
838 /** @todo Somehow validate the client and negotiate a secure cookie... */
839 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
840 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
841 pReq->u.Out.u32SessionVersion = SUPDRVIOC_VERSION;
842 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
843 pReq->u.Out.pSession = pSession;
844 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
845 pReq->Hdr.rc = VINF_SUCCESS;
846 return 0;
847 }
848
849 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
850 {
851 /* validate */
852 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
853 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
854
855 /* execute */
856 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
857 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
858 pReq->Hdr.rc = VINF_SUCCESS;
859 return 0;
860 }
861
862 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_INSTALL):
863 {
864 /* validate */
865 PSUPIDTINSTALL pReq = (PSUPIDTINSTALL)pReqHdr;
866 REQ_CHECK_SIZES(SUP_IOCTL_IDT_INSTALL);
867
868 /* execute */
869#ifdef VBOX_WITH_IDT_PATCHING
870 pReq->Hdr.rc = supdrvIOCtl_IdtInstall(pDevExt, pSession, pReq);
871#else
872 pReq->u.Out.u8Idt = 3;
873 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
874#endif
875 return 0;
876 }
877
878 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_REMOVE):
879 {
880 /* validate */
881 PSUPIDTREMOVE pReq = (PSUPIDTREMOVE)pReqHdr;
882 REQ_CHECK_SIZES(SUP_IOCTL_IDT_REMOVE);
883
884 /* execute */
885#ifdef VBOX_WITH_IDT_PATCHING
886 pReq->Hdr.rc = supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
887#else
888 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
889#endif
890 return 0;
891 }
892
893 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
894 {
895 /* validate */
896 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
897 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
898 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
899 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
900 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
901
902 /* execute */
903 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
904 if (RT_FAILURE(pReq->Hdr.rc))
905 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
906 return 0;
907 }
908
909 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
910 {
911 /* validate */
912 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
913 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
914
915 /* execute */
916 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
917 return 0;
918 }
919
920 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
921 {
922 /* validate */
923 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
924 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
925
926 /* execute */
927 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
928 if (RT_FAILURE(pReq->Hdr.rc))
929 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
930 return 0;
931 }
932
933 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
934 {
935 /* validate */
936 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
937 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
938
939 /* execute */
940 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
941 return 0;
942 }
943
944 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
945 {
946 /* validate */
947 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
948 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
949 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage > 0);
950 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage < _1M*16);
951 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
952 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, memchr(pReq->u.In.szName, '\0', sizeof(pReq->u.In.szName)));
953 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
954
955 /* execute */
956 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
957 return 0;
958 }
959
960 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
961 {
962 /* validate */
963 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
964 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
965 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImage), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
966 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
967 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
968 || ( pReq->u.In.offSymbols < pReq->u.In.cbImage
969 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImage),
970 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImage=%#lx\n", (long)pReq->u.In.offSymbols,
971 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImage));
972 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
973 || ( pReq->u.In.offStrTab < pReq->u.In.cbImage
974 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImage
975 && pReq->u.In.cbStrTab <= pReq->u.In.cbImage),
976 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImage=%#lx\n", (long)pReq->u.In.offStrTab,
977 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImage));
978
979 if (pReq->u.In.cSymbols)
980 {
981 uint32_t i;
982 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.achImage[pReq->u.In.offSymbols];
983 for (i = 0; i < pReq->u.In.cSymbols; i++)
984 {
985 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImage,
986 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImage));
987 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
988 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
989 REQ_CHECK_EXPR_FMT(memchr(&pReq->u.In.achImage[pReq->u.In.offStrTab + paSyms[i].offName], '\0', pReq->u.In.cbStrTab - paSyms[i].offName),
990 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
991 }
992 }
993
994 /* execute */
995 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
996 return 0;
997 }
998
999 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1000 {
1001 /* validate */
1002 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1003 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1004
1005 /* execute */
1006 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1007 return 0;
1008 }
1009
1010 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1011 {
1012 /* validate */
1013 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1014 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1015 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, memchr(pReq->u.In.szSymbol, '\0', sizeof(pReq->u.In.szSymbol)));
1016
1017 /* execute */
1018 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1019 return 0;
1020 }
1021
1022 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1023 {
1024 /* validate */
1025 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1026 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1027 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1028
1029 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1030 {
1031 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1032
1033 /* execute */
1034 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1035 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg);
1036 else
1037 pReq->Hdr.rc = VERR_WRONG_ORDER;
1038 }
1039 else
1040 {
1041 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1042 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1043 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#x\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1044 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1045 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1046
1047 /* execute */
1048 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1049 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg);
1050 else
1051 pReq->Hdr.rc = VERR_WRONG_ORDER;
1052 }
1053
1054 if ( RT_FAILURE(pReq->Hdr.rc)
1055 && pReq->Hdr.rc != VERR_INTERRUPTED
1056 && pReq->Hdr.rc != VERR_TIMEOUT)
1057 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1058 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1059 else
1060 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1061 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1062 return 0;
1063 }
1064
1065 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1066 {
1067 /* validate */
1068 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1069 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1070
1071 /* execute */
1072 pReq->Hdr.rc = VINF_SUCCESS;
1073 pReq->u.Out.enmMode = supdrvIOCtl_GetPagingMode();
1074 return 0;
1075 }
1076
1077 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1078 {
1079 /* validate */
1080 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1081 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1082 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1083
1084 /* execute */
1085 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1086 if (RT_FAILURE(pReq->Hdr.rc))
1087 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1088 return 0;
1089 }
1090
1091 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1092 {
1093 /* validate */
1094 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1095 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1096
1097 /* execute */
1098 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1099 return 0;
1100 }
1101
1102 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1103 {
1104 /* validate */
1105 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1106 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1107
1108 /* execute */
1109 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1110 if (RT_SUCCESS(pReq->Hdr.rc))
1111 pReq->u.Out.pGipR0 = pDevExt->pGip;
1112 return 0;
1113 }
1114
1115 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1116 {
1117 /* validate */
1118 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1119 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1120
1121 /* execute */
1122 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1123 return 0;
1124 }
1125
1126 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1127 {
1128 /* validate */
1129 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1130 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1131 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1132 || ( VALID_PTR(pReq->u.In.pVMR0)
1133 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1134 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1135 /* execute */
1136 pSession->pVM = pReq->u.In.pVMR0;
1137 pReq->Hdr.rc = VINF_SUCCESS;
1138 return 0;
1139 }
1140
1141 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC):
1142 {
1143 /* validate */
1144 PSUPPAGEALLOC pReq = (PSUPPAGEALLOC)pReqHdr;
1145 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_SIZE_IN);
1146 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC, SUP_IOCTL_PAGE_ALLOC_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1147
1148 /* execute */
1149 pReq->Hdr.rc = SUPR0PageAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1150 if (RT_FAILURE(pReq->Hdr.rc))
1151 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1152 return 0;
1153 }
1154
1155 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1156 {
1157 /* validate */
1158 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1159 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1160
1161 /* execute */
1162 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1163 return 0;
1164 }
1165
1166 default:
1167 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
1168 break;
1169 }
1170 return SUPDRV_ERR_GENERAL_FAILURE;
1171}
1172
1173
1174/**
1175 * Register a object for reference counting.
1176 * The object is registered with one reference in the specified session.
1177 *
1178 * @returns Unique identifier on success (pointer).
1179 * All future reference must use this identifier.
1180 * @returns NULL on failure.
1181 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
1182 * @param pvUser1 The first user argument.
1183 * @param pvUser2 The second user argument.
1184 */
1185SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
1186{
1187 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1188 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1189 PSUPDRVOBJ pObj;
1190 PSUPDRVUSAGE pUsage;
1191
1192 /*
1193 * Validate the input.
1194 */
1195 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
1196 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
1197 AssertPtrReturn(pfnDestructor, NULL);
1198
1199 /*
1200 * Allocate and initialize the object.
1201 */
1202 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
1203 if (!pObj)
1204 return NULL;
1205 pObj->u32Magic = SUPDRVOBJ_MAGIC;
1206 pObj->enmType = enmType;
1207 pObj->pNext = NULL;
1208 pObj->cUsage = 1;
1209 pObj->pfnDestructor = pfnDestructor;
1210 pObj->pvUser1 = pvUser1;
1211 pObj->pvUser2 = pvUser2;
1212 pObj->CreatorUid = pSession->Uid;
1213 pObj->CreatorGid = pSession->Gid;
1214 pObj->CreatorProcess= pSession->Process;
1215 supdrvOSObjInitCreator(pObj, pSession);
1216
1217 /*
1218 * Allocate the usage record.
1219 * (We keep freed usage records around to simplity SUPR0ObjAddRef().)
1220 */
1221 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1222
1223 pUsage = pDevExt->pUsageFree;
1224 if (pUsage)
1225 pDevExt->pUsageFree = pUsage->pNext;
1226 else
1227 {
1228 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1229 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
1230 if (!pUsage)
1231 {
1232 RTMemFree(pObj);
1233 return NULL;
1234 }
1235 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1236 }
1237
1238 /*
1239 * Insert the object and create the session usage record.
1240 */
1241 /* The object. */
1242 pObj->pNext = pDevExt->pObjs;
1243 pDevExt->pObjs = pObj;
1244
1245 /* The session record. */
1246 pUsage->cUsage = 1;
1247 pUsage->pObj = pObj;
1248 pUsage->pNext = pSession->pUsage;
1249 Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1250 pSession->pUsage = pUsage;
1251
1252 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1253
1254 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
1255 return pObj;
1256}
1257
1258
1259/**
1260 * Increment the reference counter for the object associating the reference
1261 * with the specified session.
1262 *
1263 * @returns IPRT status code.
1264 * @param pvObj The identifier returned by SUPR0ObjRegister().
1265 * @param pSession The session which is referencing the object.
1266 */
1267SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
1268{
1269 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1270 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1271 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1272 PSUPDRVUSAGE pUsagePre;
1273 PSUPDRVUSAGE pUsage;
1274
1275 /*
1276 * Validate the input.
1277 */
1278 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1279 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1280 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1281 VERR_INVALID_PARAMETER);
1282
1283 /*
1284 * Preallocate the usage record.
1285 */
1286 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1287
1288 pUsagePre = pDevExt->pUsageFree;
1289 if (pUsagePre)
1290 pDevExt->pUsageFree = pUsagePre->pNext;
1291 else
1292 {
1293 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1294 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
1295 if (!pUsagePre)
1296 return VERR_NO_MEMORY;
1297 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1298 }
1299
1300 /*
1301 * Reference the object.
1302 */
1303 pObj->cUsage++;
1304
1305 /*
1306 * Look for the session record.
1307 */
1308 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
1309 {
1310 Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1311 if (pUsage->pObj == pObj)
1312 break;
1313 }
1314 if (pUsage)
1315 pUsage->cUsage++;
1316 else
1317 {
1318 /* create a new session record. */
1319 pUsagePre->cUsage = 1;
1320 pUsagePre->pObj = pObj;
1321 pUsagePre->pNext = pSession->pUsage;
1322 pSession->pUsage = pUsagePre;
1323 Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));
1324
1325 pUsagePre = NULL;
1326 }
1327
1328 /*
1329 * Put any unused usage record into the free list..
1330 */
1331 if (pUsagePre)
1332 {
1333 pUsagePre->pNext = pDevExt->pUsageFree;
1334 pDevExt->pUsageFree = pUsagePre;
1335 }
1336
1337 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1338
1339 return VINF_SUCCESS;
1340}
1341
1342
1343/**
1344 * Decrement / destroy a reference counter record for an object.
1345 *
1346 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
1347 *
1348 * @returns IPRT status code.
1349 * @param pvObj The identifier returned by SUPR0ObjRegister().
1350 * @param pSession The session which is referencing the object.
1351 */
1352SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
1353{
1354 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1355 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1356 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1357 bool fDestroy = false;
1358 PSUPDRVUSAGE pUsage;
1359 PSUPDRVUSAGE pUsagePrev;
1360
1361 /*
1362 * Validate the input.
1363 */
1364 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1365 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1366 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1367 VERR_INVALID_PARAMETER);
1368
1369 /*
1370 * Acquire the spinlock and look for the usage record.
1371 */
1372 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1373
1374 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
1375 pUsage;
1376 pUsagePrev = pUsage, pUsage = pUsage->pNext)
1377 {
1378 Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1379 if (pUsage->pObj == pObj)
1380 {
1381 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
1382 if (pUsage->cUsage > 1)
1383 {
1384 pObj->cUsage--;
1385 pUsage->cUsage--;
1386 }
1387 else
1388 {
1389 /*
1390 * Free the session record.
1391 */
1392 if (pUsagePrev)
1393 pUsagePrev->pNext = pUsage->pNext;
1394 else
1395 pSession->pUsage = pUsage->pNext;
1396 pUsage->pNext = pDevExt->pUsageFree;
1397 pDevExt->pUsageFree = pUsage;
1398
1399 /* What about the object? */
1400 if (pObj->cUsage > 1)
1401 pObj->cUsage--;
1402 else
1403 {
1404 /*
1405 * Object is to be destroyed, unlink it.
1406 */
1407 pObj->u32Magic = SUPDRVOBJ_MAGIC + 1;
1408 fDestroy = true;
1409 if (pDevExt->pObjs == pObj)
1410 pDevExt->pObjs = pObj->pNext;
1411 else
1412 {
1413 PSUPDRVOBJ pObjPrev;
1414 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
1415 if (pObjPrev->pNext == pObj)
1416 {
1417 pObjPrev->pNext = pObj->pNext;
1418 break;
1419 }
1420 Assert(pObjPrev);
1421 }
1422 }
1423 }
1424 break;
1425 }
1426 }
1427
1428 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1429
1430 /*
1431 * Call the destructor and free the object if required.
1432 */
1433 if (fDestroy)
1434 {
1435 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
1436 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
1437 if (pObj->pfnDestructor)
1438 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
1439 RTMemFree(pObj);
1440 }
1441
1442 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
1443 return pUsage ? VINF_SUCCESS : VERR_INVALID_PARAMETER;
1444}
1445
1446/**
1447 * Verifies that the current process can access the specified object.
1448 *
1449 * @returns The following IPRT status code:
1450 * @retval VINF_SUCCESS if access was granted.
1451 * @retval VERR_PERMISSION_DENIED if denied access.
1452 * @retval VERR_INVALID_PARAMETER if invalid parameter.
1453 *
1454 * @param pvObj The identifier returned by SUPR0ObjRegister().
1455 * @param pSession The session which wishes to access the object.
1456 * @param pszObjName Object string name. This is optional and depends on the object type.
1457 *
1458 * @remark The caller is responsible for making sure the object isn't removed while
1459 * we're inside this function. If uncertain about this, just call AddRef before calling us.
1460 */
1461SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
1462{
1463 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1464 int rc;
1465
1466 /*
1467 * Validate the input.
1468 */
1469 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1470 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1471 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1472 VERR_INVALID_PARAMETER);
1473
1474 /*
1475 * Check access. (returns true if a decision has been made.)
1476 */
1477 rc = VERR_INTERNAL_ERROR;
1478 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
1479 return rc;
1480
1481 /*
1482 * Default policy is to allow the user to access his own
1483 * stuff but nothing else.
1484 */
1485 if (pObj->CreatorUid == pSession->Uid)
1486 return VINF_SUCCESS;
1487 return VERR_PERMISSION_DENIED;
1488}
1489
1490
1491/**
1492 * Lock pages.
1493 *
1494 * @returns IPRT status code.
1495 * @param pSession Session to which the locked memory should be associated.
1496 * @param pvR3 Start of the memory range to lock.
1497 * This must be page aligned.
1498 * @param cb Size of the memory range to lock.
1499 * This must be page aligned.
1500 */
1501SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
1502{
1503 int rc;
1504 SUPDRVMEMREF Mem = {0};
1505 const size_t cb = (size_t)cPages << PAGE_SHIFT;
1506 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
1507
1508 /*
1509 * Verify input.
1510 */
1511 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1512 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
1513 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
1514 || !pvR3)
1515 {
1516 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
1517 return VERR_INVALID_PARAMETER;
1518 }
1519
1520#ifdef RT_OS_WINDOWS /* A temporary hack for windows, will be removed once all ring-3 code has been cleaned up. */
1521 /* First check if we allocated it using SUPPageAlloc; if so then we don't need to lock it again */
1522 rc = supdrvPageGetPhys(pSession, pvR3, cPages, paPages);
1523 if (RT_SUCCESS(rc))
1524 return rc;
1525#endif
1526
1527 /*
1528 * Let IPRT do the job.
1529 */
1530 Mem.eType = MEMREF_TYPE_LOCKED;
1531 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTR0ProcHandleSelf());
1532 if (RT_SUCCESS(rc))
1533 {
1534 uint32_t iPage = cPages;
1535 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
1536 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
1537
1538 while (iPage-- > 0)
1539 {
1540 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1541 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
1542 {
1543 AssertMsgFailed(("iPage=%d\n", iPage));
1544 rc = VERR_INTERNAL_ERROR;
1545 break;
1546 }
1547 }
1548 if (RT_SUCCESS(rc))
1549 rc = supdrvMemAdd(&Mem, pSession);
1550 if (RT_FAILURE(rc))
1551 {
1552 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
1553 AssertRC(rc2);
1554 }
1555 }
1556
1557 return rc;
1558}
1559
1560
1561/**
1562 * Unlocks the memory pointed to by pv.
1563 *
1564 * @returns IPRT status code.
1565 * @param pSession Session to which the memory was locked.
1566 * @param pvR3 Memory to unlock.
1567 */
1568SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1569{
1570 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1571 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1572#ifdef RT_OS_WINDOWS
1573 /*
1574 * Temporary hack for windows - SUPR0PageFree will unlock SUPR0PageAlloc
1575 * allocations; ignore this call.
1576 */
1577 if (supdrvPageWasLockedByPageAlloc(pSession, pvR3))
1578 {
1579 LogFlow(("Page will be unlocked in SUPR0PageFree -> ignore\n"));
1580 return VINF_SUCCESS;
1581 }
1582#endif
1583 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
1584}
1585
1586
1587/**
1588 * Allocates a chunk of page aligned memory with contiguous and fixed physical
1589 * backing.
1590 *
1591 * @returns IPRT status code.
1592 * @param pSession Session data.
1593 * @param cb Number of bytes to allocate.
1594 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
1595 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
1596 * @param pHCPhys Where to put the physical address of allocated memory.
1597 */
1598SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
1599{
1600 int rc;
1601 SUPDRVMEMREF Mem = {0};
1602 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
1603
1604 /*
1605 * Validate input.
1606 */
1607 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1608 if (!ppvR3 || !ppvR0 || !pHCPhys)
1609 {
1610 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
1611 pSession, ppvR0, ppvR3, pHCPhys));
1612 return VERR_INVALID_PARAMETER;
1613
1614 }
1615 if (cPages < 1 || cPages >= 256)
1616 {
1617 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256\n", cPages));
1618 return VERR_INVALID_PARAMETER;
1619 }
1620
1621 /*
1622 * Let IPRT do the job.
1623 */
1624 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
1625 if (RT_SUCCESS(rc))
1626 {
1627 int rc2;
1628 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1629 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1630 if (RT_SUCCESS(rc))
1631 {
1632 Mem.eType = MEMREF_TYPE_CONT;
1633 rc = supdrvMemAdd(&Mem, pSession);
1634 if (!rc)
1635 {
1636 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1637 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1638 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
1639 return 0;
1640 }
1641
1642 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1643 AssertRC(rc2);
1644 }
1645 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1646 AssertRC(rc2);
1647 }
1648
1649 return rc;
1650}
1651
1652
1653/**
1654 * Frees memory allocated using SUPR0ContAlloc().
1655 *
1656 * @returns IPRT status code.
1657 * @param pSession The session to which the memory was allocated.
1658 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1659 */
1660SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1661{
1662 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1663 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1664 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
1665}
1666
1667
1668/**
1669 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
1670 *
1671 * The memory isn't zeroed.
1672 *
1673 * @returns IPRT status code.
1674 * @param pSession Session data.
1675 * @param cPages Number of pages to allocate.
1676 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
1677 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
1678 * @param paPages Where to put the physical addresses of allocated memory.
1679 */
1680SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1681{
1682 unsigned iPage;
1683 int rc;
1684 SUPDRVMEMREF Mem = {0};
1685 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
1686
1687 /*
1688 * Validate input.
1689 */
1690 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1691 if (!ppvR3 || !ppvR0 || !paPages)
1692 {
1693 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
1694 pSession, ppvR3, ppvR0, paPages));
1695 return VERR_INVALID_PARAMETER;
1696
1697 }
1698 if (cPages < 1 || cPages > 256)
1699 {
1700 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
1701 return VERR_INVALID_PARAMETER;
1702 }
1703
1704 /*
1705 * Let IPRT do the work.
1706 */
1707 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
1708 if (RT_SUCCESS(rc))
1709 {
1710 int rc2;
1711 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1712 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1713 if (RT_SUCCESS(rc))
1714 {
1715 Mem.eType = MEMREF_TYPE_LOW;
1716 rc = supdrvMemAdd(&Mem, pSession);
1717 if (!rc)
1718 {
1719 for (iPage = 0; iPage < cPages; iPage++)
1720 {
1721 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1722 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%VHp\n", paPages[iPage]));
1723 }
1724 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1725 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1726 return 0;
1727 }
1728
1729 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1730 AssertRC(rc2);
1731 }
1732
1733 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1734 AssertRC(rc2);
1735 }
1736
1737 return rc;
1738}
1739
1740
1741/**
1742 * Frees memory allocated using SUPR0LowAlloc().
1743 *
1744 * @returns IPRT status code.
1745 * @param pSession The session to which the memory was allocated.
1746 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1747 */
1748SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1749{
1750 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1751 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1752 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
1753}
1754
1755
1756
1757/**
1758 * Allocates a chunk of memory with both R0 and R3 mappings.
1759 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1760 *
1761 * @returns IPRT status code.
1762 * @param pSession The session to associated the allocation with.
1763 * @param cb Number of bytes to allocate.
1764 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1765 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1766 */
1767SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1768{
1769 int rc;
1770 SUPDRVMEMREF Mem = {0};
1771 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
1772
1773 /*
1774 * Validate input.
1775 */
1776 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1777 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
1778 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1779 if (cb < 1 || cb >= _4M)
1780 {
1781 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
1782 return VERR_INVALID_PARAMETER;
1783 }
1784
1785 /*
1786 * Let IPRT do the work.
1787 */
1788 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
1789 if (RT_SUCCESS(rc))
1790 {
1791 int rc2;
1792 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1793 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1794 if (RT_SUCCESS(rc))
1795 {
1796 Mem.eType = MEMREF_TYPE_MEM;
1797 rc = supdrvMemAdd(&Mem, pSession);
1798 if (!rc)
1799 {
1800 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1801 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1802 return VINF_SUCCESS;
1803 }
1804 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1805 AssertRC(rc2);
1806 }
1807
1808 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1809 AssertRC(rc2);
1810 }
1811
1812 return rc;
1813}
1814
1815
1816/**
1817 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
1818 *
1819 * @returns IPRT status code.
1820 * @param pSession The session to which the memory was allocated.
1821 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1822 * @param paPages Where to store the physical addresses.
1823 */
1824SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
1825{
1826 PSUPDRVBUNDLE pBundle;
1827 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1828 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
1829
1830 /*
1831 * Validate input.
1832 */
1833 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1834 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
1835 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
1836
1837 /*
1838 * Search for the address.
1839 */
1840 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1841 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1842 {
1843 if (pBundle->cUsed > 0)
1844 {
1845 unsigned i;
1846 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1847 {
1848 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
1849 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1850 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
1851 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1852 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
1853 )
1854 )
1855 {
1856 const unsigned cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
1857 unsigned iPage;
1858 for (iPage = 0; iPage < cPages; iPage++)
1859 {
1860 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
1861 paPages[iPage].uReserved = 0;
1862 }
1863 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1864 return VINF_SUCCESS;
1865 }
1866 }
1867 }
1868 }
1869 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1870 Log(("Failed to find %p!!!\n", (void *)uPtr));
1871 return VERR_INVALID_PARAMETER;
1872}
1873
1874
1875/**
1876 * Free memory allocated by SUPR0MemAlloc().
1877 *
1878 * @returns IPRT status code.
1879 * @param pSession The session owning the allocation.
1880 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1881 */
1882SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1883{
1884 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1885 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1886 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
1887}
1888
1889
1890/**
1891 * Allocates a chunk of memory with only a R3 mappings.
1892 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1893 *
1894 * @returns IPRT status code.
1895 * @param pSession The session to associated the allocation with.
1896 * @param cPages The number of pages to allocate.
1897 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1898 * @param paPages Where to store the addresses of the pages. Optional.
1899 */
1900SUPR0DECL(int) SUPR0PageAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1901{
1902 int rc;
1903 SUPDRVMEMREF Mem = {0};
1904 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
1905
1906 /*
1907 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
1908 */
1909 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1910 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1911 if (cPages < 1 || cPages > (128 * _1M)/PAGE_SIZE)
1912 {
1913 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than 128MB.\n", cPages));
1914 return VERR_INVALID_PARAMETER;
1915 }
1916
1917 /*
1918 * Let IPRT do the work.
1919 */
1920 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
1921 if (RT_SUCCESS(rc))
1922 {
1923 int rc2;
1924 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1925 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1926 if (RT_SUCCESS(rc))
1927 {
1928 Mem.eType = MEMREF_TYPE_LOCKED_SUP;
1929 rc = supdrvMemAdd(&Mem, pSession);
1930 if (!rc)
1931 {
1932 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1933 if (paPages)
1934 {
1935 uint32_t iPage = cPages;
1936 while (iPage-- > 0)
1937 {
1938 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
1939 Assert(paPages[iPage] != NIL_RTHCPHYS);
1940 }
1941 }
1942 return VINF_SUCCESS;
1943 }
1944 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1945 AssertRC(rc2);
1946 }
1947
1948 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1949 AssertRC(rc2);
1950 }
1951 return rc;
1952}
1953
1954
1955#ifdef RT_OS_WINDOWS
1956/**
1957 * Check if the pages were locked by SUPR0PageAlloc
1958 *
1959 * This function will be removed along with the lock/unlock hacks when
1960 * we've cleaned up the ring-3 code properly.
1961 *
1962 * @returns boolean
1963 * @param pSession The session to which the memory was allocated.
1964 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
1965 */
1966static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1967{
1968 PSUPDRVBUNDLE pBundle;
1969 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1970 LogFlow(("SUPR0PageIsLockedByPageAlloc: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1971
1972 /*
1973 * Search for the address.
1974 */
1975 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1976 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1977 {
1978 if (pBundle->cUsed > 0)
1979 {
1980 unsigned i;
1981 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1982 {
1983 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
1984 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1985 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1986 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
1987 {
1988 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1989 return true;
1990 }
1991 }
1992 }
1993 }
1994 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1995 return false;
1996}
1997
1998
1999/**
2000 * Get the physical addresses of memory allocated using SUPR0PageAlloc().
2001 *
2002 * This function will be removed along with the lock/unlock hacks when
2003 * we've cleaned up the ring-3 code properly.
2004 *
2005 * @returns IPRT status code.
2006 * @param pSession The session to which the memory was allocated.
2007 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
2008 * @param cPages Number of pages in paPages
2009 * @param paPages Where to store the physical addresses.
2010 */
2011static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2012{
2013 PSUPDRVBUNDLE pBundle;
2014 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2015 LogFlow(("supdrvPageGetPhys: pSession=%p pvR3=%p cPages=%#lx paPages=%p\n", pSession, (void *)pvR3, (long)cPages, paPages));
2016
2017 /*
2018 * Search for the address.
2019 */
2020 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2021 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2022 {
2023 if (pBundle->cUsed > 0)
2024 {
2025 unsigned i;
2026 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2027 {
2028 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
2029 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2030 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2031 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
2032 {
2033 uint32_t iPage = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
2034 cPages = RT_MIN(iPage, cPages);
2035 for (iPage = 0; iPage < cPages; iPage++)
2036 paPages[iPage] = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
2037 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2038 return VINF_SUCCESS;
2039 }
2040 }
2041 }
2042 }
2043 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2044 return VERR_INVALID_PARAMETER;
2045}
2046#endif /* RT_OS_WINDOWS */
2047
2048
2049/**
2050 * Free memory allocated by SUPR0PageAlloc().
2051 *
2052 * @returns IPRT status code.
2053 * @param pSession The session owning the allocation.
2054 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
2055 */
2056SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
2057{
2058 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
2059 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2060 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED_SUP);
2061}
2062
2063
2064/**
2065 * Maps the GIP into userspace and/or get the physical address of the GIP.
2066 *
2067 * @returns IPRT status code.
2068 * @param pSession Session to which the GIP mapping should belong.
2069 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
2070 * @param pHCPhysGip Where to store the physical address. (optional)
2071 *
2072 * @remark There is no reference counting on the mapping, so one call to this function
2073 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
2074 * and remove the session as a GIP user.
2075 */
2076SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
2077{
2078 int rc = 0;
2079 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2080 RTR3PTR pGip = NIL_RTR3PTR;
2081 RTHCPHYS HCPhys = NIL_RTHCPHYS;
2082 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
2083
2084 /*
2085 * Validate
2086 */
2087 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2088 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
2089 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
2090
2091 RTSemFastMutexRequest(pDevExt->mtxGip);
2092 if (pDevExt->pGip)
2093 {
2094 /*
2095 * Map it?
2096 */
2097 if (ppGipR3)
2098 {
2099#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2100 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
2101 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
2102 RTMEM_PROT_READ, RTR0ProcHandleSelf());
2103 if (RT_SUCCESS(rc))
2104 {
2105 pGip = RTR0MemObjAddressR3(pSession->GipMapObjR3);
2106 rc = VINF_SUCCESS; /** @todo remove this and replace the !rc below with RT_SUCCESS(rc). */
2107 }
2108#else /* !USE_NEW_OS_INTERFACE_FOR_GIP */
2109 if (!pSession->pGip)
2110 rc = supdrvOSGipMap(pSession->pDevExt, &pSession->pGip);
2111 if (!rc)
2112 pGip = (RTR3PTR)pSession->pGip;
2113#endif /* !USE_NEW_OS_INTERFACE_FOR_GIP */
2114 }
2115
2116 /*
2117 * Get physical address.
2118 */
2119 if (pHCPhysGip && !rc)
2120 HCPhys = pDevExt->HCPhysGip;
2121
2122 /*
2123 * Reference globally.
2124 */
2125 if (!pSession->fGipReferenced && !rc)
2126 {
2127 pSession->fGipReferenced = 1;
2128 pDevExt->cGipUsers++;
2129 if (pDevExt->cGipUsers == 1)
2130 {
2131 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2132 unsigned i;
2133
2134 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
2135
2136 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
2137 ASMAtomicXchgU32(&pGip->aCPUs[i].u32TransactionId, pGip->aCPUs[i].u32TransactionId & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
2138 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, 0);
2139
2140#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2141 rc = RTTimerStart(pDevExt->pGipTimer, 0);
2142 AssertRC(rc); rc = VINF_SUCCESS;
2143#else
2144 supdrvOSGipResume(pDevExt);
2145#endif
2146 }
2147 }
2148 }
2149 else
2150 {
2151 rc = SUPDRV_ERR_GENERAL_FAILURE;
2152 Log(("SUPR0GipMap: GIP is not available!\n"));
2153 }
2154 RTSemFastMutexRelease(pDevExt->mtxGip);
2155
2156 /*
2157 * Write returns.
2158 */
2159 if (pHCPhysGip)
2160 *pHCPhysGip = HCPhys;
2161 if (ppGipR3)
2162 *ppGipR3 = pGip;
2163
2164#ifdef DEBUG_DARWIN_GIP
2165 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGip=%p GipMapObjR3\n", rc, (unsigned long)HCPhys, pGip, pSession->GipMapObjR3));
2166#else
2167 LogFlow(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)(uintptr_t)pGip));
2168#endif
2169 return rc;
2170}
2171
2172
2173/**
2174 * Unmaps any user mapping of the GIP and terminates all GIP access
2175 * from this session.
2176 *
2177 * @returns IPRT status code.
2178 * @param pSession Session to which the GIP mapping should belong.
2179 */
2180SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
2181{
2182 int rc = VINF_SUCCESS;
2183 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2184#ifdef DEBUG_DARWIN_GIP
2185 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
2186 pSession,
2187 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
2188 pSession->GipMapObjR3));
2189#else
2190 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
2191#endif
2192 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2193
2194 RTSemFastMutexRequest(pDevExt->mtxGip);
2195
2196 /*
2197 * Unmap anything?
2198 */
2199#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2200 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
2201 {
2202 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
2203 AssertRC(rc);
2204 if (RT_SUCCESS(rc))
2205 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
2206 }
2207#else
2208 if (pSession->pGip)
2209 {
2210 rc = supdrvOSGipUnmap(pDevExt, pSession->pGip);
2211 if (!rc)
2212 pSession->pGip = NULL;
2213 }
2214#endif
2215
2216 /*
2217 * Dereference global GIP.
2218 */
2219 if (pSession->fGipReferenced && !rc)
2220 {
2221 pSession->fGipReferenced = 0;
2222 if ( pDevExt->cGipUsers > 0
2223 && !--pDevExt->cGipUsers)
2224 {
2225 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
2226#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
2227 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = 0;
2228#else
2229 supdrvOSGipSuspend(pDevExt);
2230#endif
2231 }
2232 }
2233
2234 RTSemFastMutexRelease(pDevExt->mtxGip);
2235
2236 return rc;
2237}
2238
2239
2240/**
2241 * Adds a memory object to the session.
2242 *
2243 * @returns IPRT status code.
2244 * @param pMem Memory tracking structure containing the
2245 * information to track.
2246 * @param pSession The session.
2247 */
2248static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
2249{
2250 PSUPDRVBUNDLE pBundle;
2251 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2252
2253 /*
2254 * Find free entry and record the allocation.
2255 */
2256 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2257 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2258 {
2259 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
2260 {
2261 unsigned i;
2262 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2263 {
2264 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
2265 {
2266 pBundle->cUsed++;
2267 pBundle->aMem[i] = *pMem;
2268 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2269 return VINF_SUCCESS;
2270 }
2271 }
2272 AssertFailed(); /* !!this can't be happening!!! */
2273 }
2274 }
2275 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2276
2277 /*
2278 * Need to allocate a new bundle.
2279 * Insert into the last entry in the bundle.
2280 */
2281 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
2282 if (!pBundle)
2283 return VERR_NO_MEMORY;
2284
2285 /* take last entry. */
2286 pBundle->cUsed++;
2287 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
2288
2289 /* insert into list. */
2290 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2291 pBundle->pNext = pSession->Bundle.pNext;
2292 pSession->Bundle.pNext = pBundle;
2293 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2294
2295 return VINF_SUCCESS;
2296}
2297
2298
2299/**
2300 * Releases a memory object referenced by pointer and type.
2301 *
2302 * @returns IPRT status code.
2303 * @param pSession Session data.
2304 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
2305 * @param eType Memory type.
2306 */
2307static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
2308{
2309 PSUPDRVBUNDLE pBundle;
2310 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2311
2312 /*
2313 * Validate input.
2314 */
2315 if (!uPtr)
2316 {
2317 Log(("Illegal address %p\n", (void *)uPtr));
2318 return VERR_INVALID_PARAMETER;
2319 }
2320
2321 /*
2322 * Search for the address.
2323 */
2324 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2325 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2326 {
2327 if (pBundle->cUsed > 0)
2328 {
2329 unsigned i;
2330 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2331 {
2332 if ( pBundle->aMem[i].eType == eType
2333 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2334 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
2335 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2336 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
2337 )
2338 {
2339 /* Make a copy of it and release it outside the spinlock. */
2340 SUPDRVMEMREF Mem = pBundle->aMem[i];
2341 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
2342 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
2343 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
2344 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2345
2346 if (Mem.MapObjR3)
2347 {
2348 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
2349 AssertRC(rc); /** @todo figure out how to handle this. */
2350 }
2351 if (Mem.MemObj)
2352 {
2353 int rc = RTR0MemObjFree(Mem.MemObj, false);
2354 AssertRC(rc); /** @todo figure out how to handle this. */
2355 }
2356 return VINF_SUCCESS;
2357 }
2358 }
2359 }
2360 }
2361 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2362 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
2363 return VERR_INVALID_PARAMETER;
2364}
2365
2366
2367#ifdef VBOX_WITH_IDT_PATCHING
2368/**
2369 * Install IDT for the current CPU.
2370 *
2371 * @returns One of the following IPRT status codes:
2372 * @retval VINF_SUCCESS on success.
2373 * @retval VERR_IDT_FAILED.
2374 * @retval VERR_NO_MEMORY.
2375 * @param pDevExt The device extension.
2376 * @param pSession The session data.
2377 * @param pReq The request.
2378 */
2379static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq)
2380{
2381 PSUPDRVPATCHUSAGE pUsagePre;
2382 PSUPDRVPATCH pPatchPre;
2383 RTIDTR Idtr;
2384 PSUPDRVPATCH pPatch;
2385 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2386 LogFlow(("supdrvIOCtl_IdtInstall\n"));
2387
2388 /*
2389 * Preallocate entry for this CPU cause we don't wanna do
2390 * that inside the spinlock!
2391 */
2392 pUsagePre = (PSUPDRVPATCHUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2393 if (!pUsagePre)
2394 return VERR_NO_MEMORY;
2395
2396 /*
2397 * Take the spinlock and see what we need to do.
2398 */
2399 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2400
2401 /* check if we already got a free patch. */
2402 if (!pDevExt->pIdtPatchesFree)
2403 {
2404 /*
2405 * Allocate a patch - outside the spinlock of course.
2406 */
2407 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2408
2409 pPatchPre = (PSUPDRVPATCH)RTMemExecAlloc(sizeof(*pPatchPre));
2410 if (!pPatchPre)
2411 return VERR_NO_MEMORY;
2412
2413 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2414 }
2415 else
2416 {
2417 pPatchPre = pDevExt->pIdtPatchesFree;
2418 pDevExt->pIdtPatchesFree = pPatchPre->pNext;
2419 }
2420
2421 /* look for matching patch entry */
2422 ASMGetIDTR(&Idtr);
2423 pPatch = pDevExt->pIdtPatches;
2424 while (pPatch && pPatch->pvIdt != (void *)Idtr.pIdt)
2425 pPatch = pPatch->pNext;
2426
2427 if (!pPatch)
2428 {
2429 /*
2430 * Create patch.
2431 */
2432 pPatch = supdrvIdtPatchOne(pDevExt, pPatchPre);
2433 if (pPatch)
2434 pPatchPre = NULL; /* mark as used. */
2435 }
2436 else
2437 {
2438 /*
2439 * Simply increment patch usage.
2440 */
2441 pPatch->cUsage++;
2442 }
2443
2444 if (pPatch)
2445 {
2446 /*
2447 * Increment and add if need be the session usage record for this patch.
2448 */
2449 PSUPDRVPATCHUSAGE pUsage = pSession->pPatchUsage;
2450 while (pUsage && pUsage->pPatch != pPatch)
2451 pUsage = pUsage->pNext;
2452
2453 if (!pUsage)
2454 {
2455 /*
2456 * Add usage record.
2457 */
2458 pUsagePre->cUsage = 1;
2459 pUsagePre->pPatch = pPatch;
2460 pUsagePre->pNext = pSession->pPatchUsage;
2461 pSession->pPatchUsage = pUsagePre;
2462 pUsagePre = NULL; /* mark as used. */
2463 }
2464 else
2465 {
2466 /*
2467 * Increment usage count.
2468 */
2469 pUsage->cUsage++;
2470 }
2471 }
2472
2473 /* free patch - we accumulate them for paranoid saftly reasons. */
2474 if (pPatchPre)
2475 {
2476 pPatchPre->pNext = pDevExt->pIdtPatchesFree;
2477 pDevExt->pIdtPatchesFree = pPatchPre;
2478 }
2479
2480 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2481
2482 /*
2483 * Free unused preallocated buffers.
2484 */
2485 if (pUsagePre)
2486 RTMemFree(pUsagePre);
2487
2488 pReq->u.Out.u8Idt = pDevExt->u8Idt;
2489
2490 return pPatch ? VINF_SUCCESS : VERR_IDT_FAILED;
2491}
2492
2493
2494/**
2495 * This creates a IDT patch entry.
2496 * If the first patch being installed it'll also determin the IDT entry
2497 * to use.
2498 *
2499 * @returns pPatch on success.
2500 * @returns NULL on failure.
2501 * @param pDevExt Pointer to globals.
2502 * @param pPatch Patch entry to use.
2503 * This will be linked into SUPDRVDEVEXT::pIdtPatches on
2504 * successful return.
2505 * @remark Call must be owning the SUPDRVDEVEXT::Spinlock!
2506 */
2507static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2508{
2509 RTIDTR Idtr;
2510 PSUPDRVIDTE paIdt;
2511 LogFlow(("supdrvIOCtl_IdtPatchOne: pPatch=%p\n", pPatch));
2512
2513 /*
2514 * Get IDT.
2515 */
2516 ASMGetIDTR(&Idtr);
2517 paIdt = (PSUPDRVIDTE)Idtr.pIdt;
2518 /*
2519 * Recent Linux kernels can be configured to 1G user /3G kernel.
2520 */
2521 if ((uintptr_t)paIdt < 0x40000000)
2522 {
2523 AssertMsgFailed(("bad paIdt=%p\n", paIdt));
2524 return NULL;
2525 }
2526
2527 if (!pDevExt->u8Idt)
2528 {
2529 /*
2530 * Test out the alternatives.
2531 *
2532 * At the moment we do not support chaining thus we ASSUME that one of
2533 * these 48 entries is unused (which is not a problem on Win32 and
2534 * Linux to my knowledge).
2535 */
2536 /** @todo we MUST change this detection to try grab an entry which is NOT in use. This can be
2537 * combined with gathering info about which guest system call gates we can hook up directly. */
2538 unsigned i;
2539 uint8_t u8Idt = 0;
2540 static uint8_t au8Ints[] =
2541 {
2542#ifdef RT_OS_WINDOWS /* We don't use 0xef and above because they are system stuff on linux (ef is IPI,
2543 * local apic timer, or some other frequently fireing thing). */
2544 0xef, 0xee, 0xed, 0xec,
2545#endif
2546 0xeb, 0xea, 0xe9, 0xe8,
2547 0xdf, 0xde, 0xdd, 0xdc,
2548 0x7b, 0x7a, 0x79, 0x78,
2549 0xbf, 0xbe, 0xbd, 0xbc,
2550 };
2551#if defined(RT_ARCH_AMD64) && defined(DEBUG)
2552 static int s_iWobble = 0;
2553 unsigned iMax = !(s_iWobble++ % 2) ? 0x80 : 0x100;
2554 Log2(("IDT: Idtr=%p:%#x\n", (void *)Idtr.pIdt, (unsigned)Idtr.cbIdt));
2555 for (i = iMax - 0x80; i*16+15 < Idtr.cbIdt && i < iMax; i++)
2556 {
2557 Log2(("%#x: %04x:%08x%04x%04x P=%d DPL=%d IST=%d Type1=%#x u32Reserved=%#x u5Reserved=%#x\n",
2558 i, paIdt[i].u16SegSel, paIdt[i].u32OffsetTop, paIdt[i].u16OffsetHigh, paIdt[i].u16OffsetLow,
2559 paIdt[i].u1Present, paIdt[i].u2DPL, paIdt[i].u3IST, paIdt[i].u5Type2,
2560 paIdt[i].u32Reserved, paIdt[i].u5Reserved));
2561 }
2562#endif
2563 /* look for entries which are not present or otherwise unused. */
2564 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2565 {
2566 u8Idt = au8Ints[i];
2567 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2568 && ( !paIdt[u8Idt].u1Present
2569 || paIdt[u8Idt].u5Type2 == 0))
2570 break;
2571 u8Idt = 0;
2572 }
2573 if (!u8Idt)
2574 {
2575 /* try again, look for a compatible entry .*/
2576 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2577 {
2578 u8Idt = au8Ints[i];
2579 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2580 && paIdt[u8Idt].u1Present
2581 && paIdt[u8Idt].u5Type2 == SUPDRV_IDTE_TYPE2_INTERRUPT_GATE
2582 && !(paIdt[u8Idt].u16SegSel & 3))
2583 break;
2584 u8Idt = 0;
2585 }
2586 if (!u8Idt)
2587 {
2588 Log(("Failed to find appropirate IDT entry!!\n"));
2589 return NULL;
2590 }
2591 }
2592 pDevExt->u8Idt = u8Idt;
2593 LogFlow(("supdrvIOCtl_IdtPatchOne: u8Idt=%x\n", u8Idt));
2594 }
2595
2596 /*
2597 * Prepare the patch
2598 */
2599 memset(pPatch, 0, sizeof(*pPatch));
2600 pPatch->pvIdt = paIdt;
2601 pPatch->cUsage = 1;
2602 pPatch->pIdtEntry = &paIdt[pDevExt->u8Idt];
2603 pPatch->SavedIdt = paIdt[pDevExt->u8Idt];
2604 pPatch->ChangedIdt.u16OffsetLow = (uint32_t)((uintptr_t)&pPatch->auCode[0] & 0xffff);
2605 pPatch->ChangedIdt.u16OffsetHigh = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 16);
2606#ifdef RT_ARCH_AMD64
2607 pPatch->ChangedIdt.u32OffsetTop = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 32);
2608#endif
2609 pPatch->ChangedIdt.u16SegSel = ASMGetCS();
2610#ifdef RT_ARCH_AMD64
2611 pPatch->ChangedIdt.u3IST = 0;
2612 pPatch->ChangedIdt.u5Reserved = 0;
2613#else /* x86 */
2614 pPatch->ChangedIdt.u5Reserved = 0;
2615 pPatch->ChangedIdt.u3Type1 = 0;
2616#endif /* x86 */
2617 pPatch->ChangedIdt.u5Type2 = SUPDRV_IDTE_TYPE2_INTERRUPT_GATE;
2618 pPatch->ChangedIdt.u2DPL = 3;
2619 pPatch->ChangedIdt.u1Present = 1;
2620
2621 /*
2622 * Generate the patch code.
2623 */
2624 {
2625#ifdef RT_ARCH_AMD64
2626 union
2627 {
2628 uint8_t *pb;
2629 uint32_t *pu32;
2630 uint64_t *pu64;
2631 } u, uFixJmp, uFixCall, uNotNested;
2632 u.pb = &pPatch->auCode[0];
2633
2634 /* check the cookie */
2635 *u.pb++ = 0x3d; // cmp eax, GLOBALCOOKIE
2636 *u.pu32++ = pDevExt->u32Cookie;
2637
2638 *u.pb++ = 0x74; // jz @VBoxCall
2639 *u.pb++ = 2;
2640
2641 /* jump to forwarder code. */
2642 *u.pb++ = 0xeb;
2643 uFixJmp = u;
2644 *u.pb++ = 0xfe;
2645
2646 // @VBoxCall:
2647 *u.pb++ = 0x0f; // swapgs
2648 *u.pb++ = 0x01;
2649 *u.pb++ = 0xf8;
2650
2651 /*
2652 * Call VMMR0Entry
2653 * We don't have to push the arguments here, but we have top
2654 * reserve some stack space for the interrupt forwarding.
2655 */
2656# ifdef RT_OS_WINDOWS
2657 *u.pb++ = 0x50; // push rax ; alignment filler.
2658 *u.pb++ = 0x41; // push r8 ; uArg
2659 *u.pb++ = 0x50;
2660 *u.pb++ = 0x52; // push rdx ; uOperation
2661 *u.pb++ = 0x51; // push rcx ; pVM
2662# else
2663 *u.pb++ = 0x51; // push rcx ; alignment filler.
2664 *u.pb++ = 0x52; // push rdx ; uArg
2665 *u.pb++ = 0x56; // push rsi ; uOperation
2666 *u.pb++ = 0x57; // push rdi ; pVM
2667# endif
2668
2669 *u.pb++ = 0xff; // call qword [pfnVMMR0EntryInt wrt rip]
2670 *u.pb++ = 0x15;
2671 uFixCall = u;
2672 *u.pu32++ = 0;
2673
2674 *u.pb++ = 0x48; // add rsp, 20h ; remove call frame.
2675 *u.pb++ = 0x81;
2676 *u.pb++ = 0xc4;
2677 *u.pu32++ = 0x20;
2678
2679 *u.pb++ = 0x0f; // swapgs
2680 *u.pb++ = 0x01;
2681 *u.pb++ = 0xf8;
2682
2683 /* Return to R3. */
2684 uNotNested = u;
2685 *u.pb++ = 0x48; // iretq
2686 *u.pb++ = 0xcf;
2687
2688 while ((uintptr_t)u.pb & 0x7) // align 8
2689 *u.pb++ = 0xcc;
2690
2691 /* Pointer to the VMMR0Entry. */ // pfnVMMR0EntryInt dq StubVMMR0Entry
2692 *uFixCall.pu32 = (uint32_t)(u.pb - uFixCall.pb - 4); uFixCall.pb = NULL;
2693 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2694 *u.pu64++ = pDevExt->pvVMMR0 ? (uint64_t)pDevExt->pfnVMMR0EntryInt : (uint64_t)u.pb + 8;
2695
2696 /* stub entry. */ // StubVMMR0Entry:
2697 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2698 *u.pb++ = 0x33; // xor eax, eax
2699 *u.pb++ = 0xc0;
2700
2701 *u.pb++ = 0x48; // dec rax
2702 *u.pb++ = 0xff;
2703 *u.pb++ = 0xc8;
2704
2705 *u.pb++ = 0xc3; // ret
2706
2707 /* forward to the original handler using a retf. */
2708 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1); uFixJmp.pb = NULL;
2709
2710 *u.pb++ = 0x68; // push <target cs>
2711 *u.pu32++ = !pPatch->SavedIdt.u5Type2 ? ASMGetCS() : pPatch->SavedIdt.u16SegSel;
2712
2713 *u.pb++ = 0x68; // push <low target rip>
2714 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2715 ? (uint32_t)(uintptr_t)uNotNested.pb
2716 : (uint32_t)pPatch->SavedIdt.u16OffsetLow
2717 | (uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16;
2718
2719 *u.pb++ = 0xc7; // mov dword [rsp + 4], <high target rip>
2720 *u.pb++ = 0x44;
2721 *u.pb++ = 0x24;
2722 *u.pb++ = 0x04;
2723 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2724 ? (uint32_t)((uint64_t)uNotNested.pb >> 32)
2725 : pPatch->SavedIdt.u32OffsetTop;
2726
2727 *u.pb++ = 0x48; // retf ; does this require prefix?
2728 *u.pb++ = 0xcb;
2729
2730#else /* RT_ARCH_X86 */
2731
2732 union
2733 {
2734 uint8_t *pb;
2735 uint16_t *pu16;
2736 uint32_t *pu32;
2737 } u, uFixJmpNotNested, uFixJmp, uFixCall, uNotNested;
2738 u.pb = &pPatch->auCode[0];
2739
2740 /* check the cookie */
2741 *u.pb++ = 0x81; // cmp esi, GLOBALCOOKIE
2742 *u.pb++ = 0xfe;
2743 *u.pu32++ = pDevExt->u32Cookie;
2744
2745 *u.pb++ = 0x74; // jz VBoxCall
2746 uFixJmp = u;
2747 *u.pb++ = 0;
2748
2749 /* jump (far) to the original handler / not-nested-stub. */
2750 *u.pb++ = 0xea; // jmp far NotNested
2751 uFixJmpNotNested = u;
2752 *u.pu32++ = 0;
2753 *u.pu16++ = 0;
2754
2755 /* save selector registers. */ // VBoxCall:
2756 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1);
2757 *u.pb++ = 0x0f; // push fs
2758 *u.pb++ = 0xa0;
2759
2760 *u.pb++ = 0x1e; // push ds
2761
2762 *u.pb++ = 0x06; // push es
2763
2764 /* call frame */
2765 *u.pb++ = 0x51; // push ecx
2766
2767 *u.pb++ = 0x52; // push edx
2768
2769 *u.pb++ = 0x50; // push eax
2770
2771 /* load ds, es and perhaps fs before call. */
2772 *u.pb++ = 0xb8; // mov eax, KernelDS
2773 *u.pu32++ = ASMGetDS();
2774
2775 *u.pb++ = 0x8e; // mov ds, eax
2776 *u.pb++ = 0xd8;
2777
2778 *u.pb++ = 0x8e; // mov es, eax
2779 *u.pb++ = 0xc0;
2780
2781#ifdef RT_OS_WINDOWS
2782 *u.pb++ = 0xb8; // mov eax, KernelFS
2783 *u.pu32++ = ASMGetFS();
2784
2785 *u.pb++ = 0x8e; // mov fs, eax
2786 *u.pb++ = 0xe0;
2787#endif
2788
2789 /* do the call. */
2790 *u.pb++ = 0xe8; // call _VMMR0Entry / StubVMMR0Entry
2791 uFixCall = u;
2792 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2793 *u.pu32++ = 0xfffffffb;
2794
2795 *u.pb++ = 0x83; // add esp, 0ch ; cdecl
2796 *u.pb++ = 0xc4;
2797 *u.pb++ = 0x0c;
2798
2799 /* restore selector registers. */
2800 *u.pb++ = 0x07; // pop es
2801 //
2802 *u.pb++ = 0x1f; // pop ds
2803
2804 *u.pb++ = 0x0f; // pop fs
2805 *u.pb++ = 0xa1;
2806
2807 uNotNested = u; // NotNested:
2808 *u.pb++ = 0xcf; // iretd
2809
2810 /* the stub VMMR0Entry. */ // StubVMMR0Entry:
2811 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2812 *u.pb++ = 0x33; // xor eax, eax
2813 *u.pb++ = 0xc0;
2814
2815 *u.pb++ = 0x48; // dec eax
2816
2817 *u.pb++ = 0xc3; // ret
2818
2819 /* Fixup the VMMR0Entry call. */
2820 if (pDevExt->pvVMMR0)
2821 *uFixCall.pu32 = (uint32_t)pDevExt->pfnVMMR0EntryInt - (uint32_t)(uFixCall.pu32 + 1);
2822 else
2823 *uFixCall.pu32 = (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)(uFixCall.pu32 + 1);
2824
2825 /* Fixup the forward / nested far jump. */
2826 if (!pPatch->SavedIdt.u5Type2)
2827 {
2828 *uFixJmpNotNested.pu32++ = (uint32_t)uNotNested.pb;
2829 *uFixJmpNotNested.pu16++ = ASMGetCS();
2830 }
2831 else
2832 {
2833 *uFixJmpNotNested.pu32++ = ((uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16) | pPatch->SavedIdt.u16OffsetLow;
2834 *uFixJmpNotNested.pu16++ = pPatch->SavedIdt.u16SegSel;
2835 }
2836#endif /* RT_ARCH_X86 */
2837 Assert(u.pb <= &pPatch->auCode[sizeof(pPatch->auCode)]);
2838#if 0
2839 /* dump the patch code */
2840 Log2(("patch code: %p\n", &pPatch->auCode[0]));
2841 for (uFixCall.pb = &pPatch->auCode[0]; uFixCall.pb < u.pb; uFixCall.pb++)
2842 Log2(("0x%02x,\n", *uFixCall.pb));
2843#endif
2844 }
2845
2846 /*
2847 * Install the patch.
2848 */
2849 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->ChangedIdt);
2850 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The stupid change code didn't work!!!!!\n"));
2851
2852 /*
2853 * Link in the patch.
2854 */
2855 pPatch->pNext = pDevExt->pIdtPatches;
2856 pDevExt->pIdtPatches = pPatch;
2857
2858 return pPatch;
2859}
2860
2861
2862/**
2863 * Removes the sessions IDT references.
2864 * This will uninstall our IDT patch if we left unreferenced.
2865 *
2866 * @returns VINF_SUCCESS.
2867 * @param pDevExt Device globals.
2868 * @param pSession Session data.
2869 */
2870static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
2871{
2872 PSUPDRVPATCHUSAGE pUsage;
2873 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2874 LogFlow(("supdrvIOCtl_IdtRemoveAll: pSession=%p\n", pSession));
2875
2876 /*
2877 * Take the spinlock.
2878 */
2879 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2880
2881 /*
2882 * Walk usage list, removing patches as their usage count reaches zero.
2883 */
2884 pUsage = pSession->pPatchUsage;
2885 while (pUsage)
2886 {
2887 if (pUsage->pPatch->cUsage <= pUsage->cUsage)
2888 supdrvIdtRemoveOne(pDevExt, pUsage->pPatch);
2889 else
2890 pUsage->pPatch->cUsage -= pUsage->cUsage;
2891
2892 /* next */
2893 pUsage = pUsage->pNext;
2894 }
2895
2896 /*
2897 * Empty the usage chain and we're done inside the spinlock.
2898 */
2899 pUsage = pSession->pPatchUsage;
2900 pSession->pPatchUsage = NULL;
2901
2902 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2903
2904 /*
2905 * Free usage entries.
2906 */
2907 while (pUsage)
2908 {
2909 void *pvToFree = pUsage;
2910 pUsage->cUsage = 0;
2911 pUsage->pPatch = NULL;
2912 pUsage = pUsage->pNext;
2913 RTMemFree(pvToFree);
2914 }
2915
2916 return VINF_SUCCESS;
2917}
2918
2919
2920/**
2921 * Remove one patch.
2922 *
2923 * Worker for supdrvIOCtl_IdtRemoveAll.
2924 *
2925 * @param pDevExt Device globals.
2926 * @param pPatch Patch entry to remove.
2927 * @remark Caller must own SUPDRVDEVEXT::Spinlock!
2928 */
2929static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2930{
2931 LogFlow(("supdrvIdtRemoveOne: pPatch=%p\n", pPatch));
2932
2933 pPatch->cUsage = 0;
2934
2935 /*
2936 * If the IDT entry was changed it have to kick around for ever!
2937 * This will be attempted freed again, perhaps next time we'll succeed :-)
2938 */
2939 if (memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)))
2940 {
2941 AssertMsgFailed(("The hijacked IDT entry has CHANGED!!!\n"));
2942 return;
2943 }
2944
2945 /*
2946 * Unlink it.
2947 */
2948 if (pDevExt->pIdtPatches != pPatch)
2949 {
2950 PSUPDRVPATCH pPatchPrev = pDevExt->pIdtPatches;
2951 while (pPatchPrev)
2952 {
2953 if (pPatchPrev->pNext == pPatch)
2954 {
2955 pPatchPrev->pNext = pPatch->pNext;
2956 break;
2957 }
2958 pPatchPrev = pPatchPrev->pNext;
2959 }
2960 Assert(!pPatchPrev);
2961 }
2962 else
2963 pDevExt->pIdtPatches = pPatch->pNext;
2964 pPatch->pNext = NULL;
2965
2966
2967 /*
2968 * Verify and restore the IDT.
2969 */
2970 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2971 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->SavedIdt);
2972 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->SavedIdt, sizeof(pPatch->SavedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
2973
2974 /*
2975 * Put it in the free list.
2976 * (This free list stuff is to calm my paranoia.)
2977 */
2978 pPatch->pvIdt = NULL;
2979 pPatch->pIdtEntry = NULL;
2980
2981 pPatch->pNext = pDevExt->pIdtPatchesFree;
2982 pDevExt->pIdtPatchesFree = pPatch;
2983}
2984
2985
2986/**
2987 * Write to an IDT entry.
2988 *
2989 * @param pvIdtEntry Where to write.
2990 * @param pNewIDTEntry What to write.
2991 */
2992static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry)
2993{
2994 RTR0UINTREG uCR0;
2995 RTR0UINTREG uFlags;
2996
2997 /*
2998 * On SMP machines (P4 hyperthreading included) we must preform a
2999 * 64-bit locked write when updating the IDT entry.
3000 *
3001 * The F00F bugfix for linux (and probably other OSes) causes
3002 * the IDT to be pointing to an readonly mapping. We get around that
3003 * by temporarily turning of WP. Since we're inside a spinlock at this
3004 * point, interrupts are disabled and there isn't any way the WP bit
3005 * flipping can cause any trouble.
3006 */
3007
3008 /* Save & Clear interrupt flag; Save & clear WP. */
3009 uFlags = ASMGetFlags();
3010 ASMSetFlags(uFlags & ~(RTR0UINTREG)(1 << 9)); /*X86_EFL_IF*/
3011 Assert(!(ASMGetFlags() & (1 << 9)));
3012 uCR0 = ASMGetCR0();
3013 ASMSetCR0(uCR0 & ~(RTR0UINTREG)(1 << 16)); /*X86_CR0_WP*/
3014
3015 /* Update IDT Entry */
3016#ifdef RT_ARCH_AMD64
3017 ASMAtomicXchgU128((volatile uint128_t *)pvIdtEntry, *(uint128_t *)(uintptr_t)pNewIDTEntry);
3018#else
3019 ASMAtomicXchgU64((volatile uint64_t *)pvIdtEntry, *(uint64_t *)(uintptr_t)pNewIDTEntry);
3020#endif
3021
3022 /* Restore CR0 & Flags */
3023 ASMSetCR0(uCR0);
3024 ASMSetFlags(uFlags);
3025}
3026#endif /* VBOX_WITH_IDT_PATCHING */
3027
3028
3029/**
3030 * Opens an image. If it's the first time it's opened the call must upload
3031 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
3032 *
3033 * This is the 1st step of the loading.
3034 *
3035 * @returns IPRT status code.
3036 * @param pDevExt Device globals.
3037 * @param pSession Session data.
3038 * @param pReq The open request.
3039 */
3040static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
3041{
3042 PSUPDRVLDRIMAGE pImage;
3043 unsigned cb;
3044 void *pv;
3045 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImage=%d\n", pReq->u.In.szName, pReq->u.In.cbImage));
3046
3047 /*
3048 * Check if we got an instance of the image already.
3049 */
3050 RTSemFastMutexRequest(pDevExt->mtxLdr);
3051 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
3052 {
3053 if (!strcmp(pImage->szName, pReq->u.In.szName))
3054 {
3055 pImage->cUsage++;
3056 pReq->u.Out.pvImageBase = pImage->pvImage;
3057 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
3058 supdrvLdrAddUsage(pSession, pImage);
3059 RTSemFastMutexRelease(pDevExt->mtxLdr);
3060 return VINF_SUCCESS;
3061 }
3062 }
3063 /* (not found - add it!) */
3064
3065 /*
3066 * Allocate memory.
3067 */
3068 cb = pReq->u.In.cbImage + sizeof(SUPDRVLDRIMAGE) + 31;
3069 pv = RTMemExecAlloc(cb);
3070 if (!pv)
3071 {
3072 RTSemFastMutexRelease(pDevExt->mtxLdr);
3073 Log(("supdrvIOCtl_LdrOpen: RTMemExecAlloc(%u) failed\n", cb));
3074 return VERR_NO_MEMORY;
3075 }
3076
3077 /*
3078 * Setup and link in the LDR stuff.
3079 */
3080 pImage = (PSUPDRVLDRIMAGE)pv;
3081 pImage->pvImage = RT_ALIGN_P(pImage + 1, 32);
3082 pImage->cbImage = pReq->u.In.cbImage;
3083 pImage->pfnModuleInit = NULL;
3084 pImage->pfnModuleTerm = NULL;
3085 pImage->uState = SUP_IOCTL_LDR_OPEN;
3086 pImage->cUsage = 1;
3087 strcpy(pImage->szName, pReq->u.In.szName);
3088
3089 pImage->pNext = pDevExt->pLdrImages;
3090 pDevExt->pLdrImages = pImage;
3091
3092 supdrvLdrAddUsage(pSession, pImage);
3093
3094 pReq->u.Out.pvImageBase = pImage->pvImage;
3095 pReq->u.Out.fNeedsLoading = true;
3096 RTSemFastMutexRelease(pDevExt->mtxLdr);
3097 return VINF_SUCCESS;
3098}
3099
3100
3101/**
3102 * Loads the image bits.
3103 *
3104 * This is the 2nd step of the loading.
3105 *
3106 * @returns IPRT status code.
3107 * @param pDevExt Device globals.
3108 * @param pSession Session data.
3109 * @param pReq The request.
3110 */
3111static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
3112{
3113 PSUPDRVLDRUSAGE pUsage;
3114 PSUPDRVLDRIMAGE pImage;
3115 int rc;
3116 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImage=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImage));
3117
3118 /*
3119 * Find the ldr image.
3120 */
3121 RTSemFastMutexRequest(pDevExt->mtxLdr);
3122 pUsage = pSession->pLdrUsage;
3123 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3124 pUsage = pUsage->pNext;
3125 if (!pUsage)
3126 {
3127 RTSemFastMutexRelease(pDevExt->mtxLdr);
3128 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
3129 return VERR_INVALID_HANDLE;
3130 }
3131 pImage = pUsage->pImage;
3132 if (pImage->cbImage != pReq->u.In.cbImage)
3133 {
3134 RTSemFastMutexRelease(pDevExt->mtxLdr);
3135 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load)\n", pImage->cbImage, pReq->u.In.cbImage));
3136 return VERR_INVALID_HANDLE;
3137 }
3138 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
3139 {
3140 unsigned uState = pImage->uState;
3141 RTSemFastMutexRelease(pDevExt->mtxLdr);
3142 if (uState != SUP_IOCTL_LDR_LOAD)
3143 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
3144 return SUPDRV_ERR_ALREADY_LOADED;
3145 }
3146 switch (pReq->u.In.eEPType)
3147 {
3148 case SUPLDRLOADEP_NOTHING:
3149 break;
3150 case SUPLDRLOADEP_VMMR0:
3151 if ( !pReq->u.In.EP.VMMR0.pvVMMR0
3152 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryInt
3153 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryFast
3154 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryEx)
3155 {
3156 RTSemFastMutexRelease(pDevExt->mtxLdr);
3157 Log(("NULL pointer: pvVMMR0=%p pvVMMR0EntryInt=%p pvVMMR0EntryFast=%p pvVMMR0EntryEx=%p!\n",
3158 pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3159 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3160 return VERR_INVALID_PARAMETER;
3161 }
3162 if ( (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryInt - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3163 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryFast - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3164 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryEx - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3165 {
3166 RTSemFastMutexRelease(pDevExt->mtxLdr);
3167 Log(("Out of range (%p LB %#x): pvVMMR0EntryInt=%p, pvVMMR0EntryFast=%p or pvVMMR0EntryEx=%p is NULL!\n",
3168 pImage->pvImage, pReq->u.In.cbImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3169 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3170 return VERR_INVALID_PARAMETER;
3171 }
3172 break;
3173 default:
3174 RTSemFastMutexRelease(pDevExt->mtxLdr);
3175 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
3176 return VERR_INVALID_PARAMETER;
3177 }
3178 if ( pReq->u.In.pfnModuleInit
3179 && (uintptr_t)pReq->u.In.pfnModuleInit - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3180 {
3181 RTSemFastMutexRelease(pDevExt->mtxLdr);
3182 Log(("SUP_IOCTL_LDR_LOAD: pfnModuleInit=%p is outside the image (%p %d bytes)\n",
3183 pReq->u.In.pfnModuleInit, pImage->pvImage, pReq->u.In.cbImage));
3184 return VERR_INVALID_PARAMETER;
3185 }
3186 if ( pReq->u.In.pfnModuleTerm
3187 && (uintptr_t)pReq->u.In.pfnModuleTerm - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3188 {
3189 RTSemFastMutexRelease(pDevExt->mtxLdr);
3190 Log(("SUP_IOCTL_LDR_LOAD: pfnModuleTerm=%p is outside the image (%p %d bytes)\n",
3191 pReq->u.In.pfnModuleTerm, pImage->pvImage, pReq->u.In.cbImage));
3192 return VERR_INVALID_PARAMETER;
3193 }
3194
3195 /*
3196 * Copy the memory.
3197 */
3198 /* no need to do try/except as this is a buffered request. */
3199 memcpy(pImage->pvImage, &pReq->u.In.achImage[0], pImage->cbImage);
3200 pImage->uState = SUP_IOCTL_LDR_LOAD;
3201 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
3202 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
3203 pImage->offSymbols = pReq->u.In.offSymbols;
3204 pImage->cSymbols = pReq->u.In.cSymbols;
3205 pImage->offStrTab = pReq->u.In.offStrTab;
3206 pImage->cbStrTab = pReq->u.In.cbStrTab;
3207
3208 /*
3209 * Update any entry points.
3210 */
3211 switch (pReq->u.In.eEPType)
3212 {
3213 default:
3214 case SUPLDRLOADEP_NOTHING:
3215 rc = VINF_SUCCESS;
3216 break;
3217 case SUPLDRLOADEP_VMMR0:
3218 rc = supdrvLdrSetR0EP(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3219 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
3220 break;
3221 }
3222
3223 /*
3224 * On success call the module initialization.
3225 */
3226 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
3227 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
3228 {
3229 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
3230 rc = pImage->pfnModuleInit();
3231 if (rc && pDevExt->pvVMMR0 == pImage->pvImage)
3232 supdrvLdrUnsetR0EP(pDevExt);
3233 }
3234
3235 if (rc)
3236 pImage->uState = SUP_IOCTL_LDR_OPEN;
3237
3238 RTSemFastMutexRelease(pDevExt->mtxLdr);
3239 return rc;
3240}
3241
3242
3243/**
3244 * Frees a previously loaded (prep'ed) image.
3245 *
3246 * @returns IPRT status code.
3247 * @param pDevExt Device globals.
3248 * @param pSession Session data.
3249 * @param pReq The request.
3250 */
3251static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
3252{
3253 int rc;
3254 PSUPDRVLDRUSAGE pUsagePrev;
3255 PSUPDRVLDRUSAGE pUsage;
3256 PSUPDRVLDRIMAGE pImage;
3257 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
3258
3259 /*
3260 * Find the ldr image.
3261 */
3262 RTSemFastMutexRequest(pDevExt->mtxLdr);
3263 pUsagePrev = NULL;
3264 pUsage = pSession->pLdrUsage;
3265 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3266 {
3267 pUsagePrev = pUsage;
3268 pUsage = pUsage->pNext;
3269 }
3270 if (!pUsage)
3271 {
3272 RTSemFastMutexRelease(pDevExt->mtxLdr);
3273 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
3274 return VERR_INVALID_HANDLE;
3275 }
3276
3277 /*
3278 * Check if we can remove anything.
3279 */
3280 rc = VINF_SUCCESS;
3281 pImage = pUsage->pImage;
3282 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
3283 {
3284 /*
3285 * Check if there are any objects with destructors in the image, if
3286 * so leave it for the session cleanup routine so we get a chance to
3287 * clean things up in the right order and not leave them all dangling.
3288 */
3289 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3290 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3291 if (pImage->cUsage <= 1)
3292 {
3293 PSUPDRVOBJ pObj;
3294 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3295 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3296 {
3297 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3298 break;
3299 }
3300 }
3301 else
3302 {
3303 PSUPDRVUSAGE pGenUsage;
3304 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
3305 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3306 {
3307 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3308 break;
3309 }
3310 }
3311 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3312 if (rc == VINF_SUCCESS)
3313 {
3314 /* unlink it */
3315 if (pUsagePrev)
3316 pUsagePrev->pNext = pUsage->pNext;
3317 else
3318 pSession->pLdrUsage = pUsage->pNext;
3319
3320 /* free it */
3321 pUsage->pImage = NULL;
3322 pUsage->pNext = NULL;
3323 RTMemFree(pUsage);
3324
3325 /*
3326 * Derefrence the image.
3327 */
3328 if (pImage->cUsage <= 1)
3329 supdrvLdrFree(pDevExt, pImage);
3330 else
3331 pImage->cUsage--;
3332 }
3333 else
3334 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
3335 }
3336 else
3337 {
3338 /*
3339 * Dereference both image and usage.
3340 */
3341 pImage->cUsage--;
3342 pUsage->cUsage--;
3343 }
3344
3345 RTSemFastMutexRelease(pDevExt->mtxLdr);
3346 return VINF_SUCCESS;
3347}
3348
3349
3350/**
3351 * Gets the address of a symbol in an open image.
3352 *
3353 * @returns 0 on success.
3354 * @returns SUPDRV_ERR_* on failure.
3355 * @param pDevExt Device globals.
3356 * @param pSession Session data.
3357 * @param pReq The request buffer.
3358 */
3359static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
3360{
3361 PSUPDRVLDRIMAGE pImage;
3362 PSUPDRVLDRUSAGE pUsage;
3363 uint32_t i;
3364 PSUPLDRSYM paSyms;
3365 const char *pchStrings;
3366 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
3367 void *pvSymbol = NULL;
3368 int rc = VERR_GENERAL_FAILURE;
3369 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
3370
3371 /*
3372 * Find the ldr image.
3373 */
3374 RTSemFastMutexRequest(pDevExt->mtxLdr);
3375 pUsage = pSession->pLdrUsage;
3376 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3377 pUsage = pUsage->pNext;
3378 if (!pUsage)
3379 {
3380 RTSemFastMutexRelease(pDevExt->mtxLdr);
3381 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
3382 return VERR_INVALID_HANDLE;
3383 }
3384 pImage = pUsage->pImage;
3385 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
3386 {
3387 unsigned uState = pImage->uState;
3388 RTSemFastMutexRelease(pDevExt->mtxLdr);
3389 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
3390 return VERR_ALREADY_LOADED;
3391 }
3392
3393 /*
3394 * Search the symbol string.
3395 */
3396 pchStrings = (const char *)((uint8_t *)pImage->pvImage + pImage->offStrTab);
3397 paSyms = (PSUPLDRSYM)((uint8_t *)pImage->pvImage + pImage->offSymbols);
3398 for (i = 0; i < pImage->cSymbols; i++)
3399 {
3400 if ( paSyms[i].offSymbol < pImage->cbImage /* paranoia */
3401 && paSyms[i].offName + cbSymbol <= pImage->cbStrTab
3402 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
3403 {
3404 pvSymbol = (uint8_t *)pImage->pvImage + paSyms[i].offSymbol;
3405 rc = VINF_SUCCESS;
3406 break;
3407 }
3408 }
3409 RTSemFastMutexRelease(pDevExt->mtxLdr);
3410 pReq->u.Out.pvSymbol = pvSymbol;
3411 return rc;
3412}
3413
3414
3415/**
3416 * Updates the IDT patches to point to the specified VMM R0 entry
3417 * point (i.e. VMMR0Enter()).
3418 *
3419 * @returns IPRT status code.
3420 * @param pDevExt Device globals.
3421 * @param pSession Session data.
3422 * @param pVMMR0 VMMR0 image handle.
3423 * @param pvVMMR0EntryInt VMMR0EntryInt address.
3424 * @param pvVMMR0EntryFast VMMR0EntryFast address.
3425 * @param pvVMMR0EntryEx VMMR0EntryEx address.
3426 * @remark Caller must own the loader mutex.
3427 */
3428static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
3429{
3430 int rc = VINF_SUCCESS;
3431 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
3432
3433
3434 /*
3435 * Check if not yet set.
3436 */
3437 if (!pDevExt->pvVMMR0)
3438 {
3439#ifdef VBOX_WITH_IDT_PATCHING
3440 PSUPDRVPATCH pPatch;
3441#endif
3442
3443 /*
3444 * Set it and update IDT patch code.
3445 */
3446 pDevExt->pvVMMR0 = pvVMMR0;
3447 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
3448 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
3449 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
3450#ifdef VBOX_WITH_IDT_PATCHING
3451 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3452 {
3453# ifdef RT_ARCH_AMD64
3454 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup], (uint64_t)pvVMMR0);
3455# else /* RT_ARCH_X86 */
3456 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3457 (uint32_t)pvVMMR0 - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3458# endif
3459 }
3460#endif /* VBOX_WITH_IDT_PATCHING */
3461 }
3462 else
3463 {
3464 /*
3465 * Return failure or success depending on whether the values match or not.
3466 */
3467 if ( pDevExt->pvVMMR0 != pvVMMR0
3468 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
3469 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
3470 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
3471 {
3472 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
3473 rc = VERR_INVALID_PARAMETER;
3474 }
3475 }
3476 return rc;
3477}
3478
3479
3480/**
3481 * Unsets the R0 entry point installed by supdrvLdrSetR0EP.
3482 *
3483 * @param pDevExt Device globals.
3484 */
3485static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt)
3486{
3487#ifdef VBOX_WITH_IDT_PATCHING
3488 PSUPDRVPATCH pPatch;
3489#endif
3490
3491 pDevExt->pvVMMR0 = NULL;
3492 pDevExt->pfnVMMR0EntryInt = NULL;
3493 pDevExt->pfnVMMR0EntryFast = NULL;
3494 pDevExt->pfnVMMR0EntryEx = NULL;
3495
3496#ifdef VBOX_WITH_IDT_PATCHING
3497 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3498 {
3499# ifdef RT_ARCH_AMD64
3500 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3501 (uint64_t)&pPatch->auCode[pPatch->offStub]);
3502# else /* RT_ARCH_X86 */
3503 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3504 (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3505# endif
3506 }
3507#endif /* VBOX_WITH_IDT_PATCHING */
3508}
3509
3510
3511/**
3512 * Adds a usage reference in the specified session of an image.
3513 *
3514 * @param pSession Session in question.
3515 * @param pImage Image which the session is using.
3516 */
3517static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
3518{
3519 PSUPDRVLDRUSAGE pUsage;
3520 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
3521
3522 /*
3523 * Referenced it already?
3524 */
3525 pUsage = pSession->pLdrUsage;
3526 while (pUsage)
3527 {
3528 if (pUsage->pImage == pImage)
3529 {
3530 pUsage->cUsage++;
3531 return;
3532 }
3533 pUsage = pUsage->pNext;
3534 }
3535
3536 /*
3537 * Allocate new usage record.
3538 */
3539 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
3540 Assert(pUsage);
3541 if (pUsage)
3542 {
3543 pUsage->cUsage = 1;
3544 pUsage->pImage = pImage;
3545 pUsage->pNext = pSession->pLdrUsage;
3546 pSession->pLdrUsage = pUsage;
3547 }
3548 /* ignore errors... */
3549}
3550
3551
3552/**
3553 * Frees a load image.
3554 *
3555 * @param pDevExt Pointer to device extension.
3556 * @param pImage Pointer to the image we're gonna free.
3557 * This image must exit!
3558 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
3559 */
3560static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
3561{
3562 PSUPDRVLDRIMAGE pImagePrev;
3563 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
3564
3565 /* find it - arg. should've used doubly linked list. */
3566 Assert(pDevExt->pLdrImages);
3567 pImagePrev = NULL;
3568 if (pDevExt->pLdrImages != pImage)
3569 {
3570 pImagePrev = pDevExt->pLdrImages;
3571 while (pImagePrev->pNext != pImage)
3572 pImagePrev = pImagePrev->pNext;
3573 Assert(pImagePrev->pNext == pImage);
3574 }
3575
3576 /* unlink */
3577 if (pImagePrev)
3578 pImagePrev->pNext = pImage->pNext;
3579 else
3580 pDevExt->pLdrImages = pImage->pNext;
3581
3582 /* check if this is VMMR0.r0 and fix the Idt patches if it is. */
3583 if (pDevExt->pvVMMR0 == pImage->pvImage)
3584 supdrvLdrUnsetR0EP(pDevExt);
3585
3586 /* check for objects with destructors in this image. (Shouldn't happen.) */
3587 if (pDevExt->pObjs)
3588 {
3589 unsigned cObjs = 0;
3590 PSUPDRVOBJ pObj;
3591 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3592 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3593 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3594 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3595 {
3596 pObj->pfnDestructor = NULL;
3597 cObjs++;
3598 }
3599 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3600 if (cObjs)
3601 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
3602 }
3603
3604 /* call termination function if fully loaded. */
3605 if ( pImage->pfnModuleTerm
3606 && pImage->uState == SUP_IOCTL_LDR_LOAD)
3607 {
3608 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
3609 pImage->pfnModuleTerm();
3610 }
3611
3612 /* free the image */
3613 pImage->cUsage = 0;
3614 pImage->pNext = 0;
3615 pImage->uState = SUP_IOCTL_LDR_FREE;
3616 RTMemExecFree(pImage);
3617}
3618
3619
3620/**
3621 * Gets the current paging mode of the CPU and stores in in pOut.
3622 */
3623static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void)
3624{
3625 SUPPAGINGMODE enmMode;
3626
3627 RTR0UINTREG cr0 = ASMGetCR0();
3628 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3629 enmMode = SUPPAGINGMODE_INVALID;
3630 else
3631 {
3632 RTR0UINTREG cr4 = ASMGetCR4();
3633 uint32_t fNXEPlusLMA = 0;
3634 if (cr4 & X86_CR4_PAE)
3635 {
3636 uint32_t fAmdFeatures = ASMCpuId_EDX(0x80000001);
3637 if (fAmdFeatures & (X86_CPUID_AMD_FEATURE_EDX_NX | X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
3638 {
3639 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3640 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3641 fNXEPlusLMA |= RT_BIT(0);
3642 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3643 fNXEPlusLMA |= RT_BIT(1);
3644 }
3645 }
3646
3647 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3648 {
3649 case 0:
3650 enmMode = SUPPAGINGMODE_32_BIT;
3651 break;
3652
3653 case X86_CR4_PGE:
3654 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3655 break;
3656
3657 case X86_CR4_PAE:
3658 enmMode = SUPPAGINGMODE_PAE;
3659 break;
3660
3661 case X86_CR4_PAE | RT_BIT(0):
3662 enmMode = SUPPAGINGMODE_PAE_NX;
3663 break;
3664
3665 case X86_CR4_PAE | X86_CR4_PGE:
3666 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3667 break;
3668
3669 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3670 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3671 break;
3672
3673 case RT_BIT(1) | X86_CR4_PAE:
3674 enmMode = SUPPAGINGMODE_AMD64;
3675 break;
3676
3677 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3678 enmMode = SUPPAGINGMODE_AMD64_NX;
3679 break;
3680
3681 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3682 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3683 break;
3684
3685 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3686 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3687 break;
3688
3689 default:
3690 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3691 enmMode = SUPPAGINGMODE_INVALID;
3692 break;
3693 }
3694 }
3695 return enmMode;
3696}
3697
3698
3699#ifdef USE_NEW_OS_INTERFACE_FOR_GIP
3700/**
3701 * Creates the GIP.
3702 *
3703 * @returns negative errno.
3704 * @param pDevExt Instance data. GIP stuff may be updated.
3705 */
3706static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
3707{
3708 PSUPGLOBALINFOPAGE pGip;
3709 RTHCPHYS HCPhysGip;
3710 uint32_t u32SystemResolution;
3711 uint32_t u32Interval;
3712 int rc;
3713
3714 LogFlow(("supdrvGipCreate:\n"));
3715
3716 /* assert order */
3717 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
3718 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
3719 Assert(!pDevExt->pGipTimer);
3720
3721 /*
3722 * Allocate a suitable page with a default kernel mapping.
3723 */
3724 rc = RTR0MemObjAllocLow(&pDevExt->GipMemObj, PAGE_SIZE, false);
3725 if (RT_FAILURE(rc))
3726 {
3727 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
3728 return rc;
3729 }
3730 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
3731 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
3732
3733 /*
3734 * Try bump up the system timer resolution.
3735 * The more interrupts the better...
3736 */
3737 if ( RT_SUCCESS(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3738 || RT_SUCCESS(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3739 || RT_SUCCESS(RTTimerRequestSystemGranularity( 3906250 /* 256 HZ */, &u32SystemResolution))
3740 || RT_SUCCESS(RTTimerRequestSystemGranularity( 4000000 /* 250 HZ */, &u32SystemResolution))
3741 || RT_SUCCESS(RTTimerRequestSystemGranularity( 7812500 /* 128 HZ */, &u32SystemResolution))
3742 || RT_SUCCESS(RTTimerRequestSystemGranularity(10000000 /* 100 HZ */, &u32SystemResolution))
3743 || RT_SUCCESS(RTTimerRequestSystemGranularity(15625000 /* 64 HZ */, &u32SystemResolution))
3744 || RT_SUCCESS(RTTimerRequestSystemGranularity(31250000 /* 32 HZ */, &u32SystemResolution))
3745 )
3746 {
3747 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3748 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3749 }
3750
3751 /*
3752 * Find a reasonable update interval and initialize the structure.
3753 */
3754 u32Interval = u32SystemResolution = RTTimerGetSystemGranularity();
3755 while (u32Interval < 10000000 /* 10 ms */)
3756 u32Interval += u32SystemResolution;
3757
3758 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), 1000000000 / u32Interval /*=Hz*/);
3759
3760 /*
3761 * Create the timer.
3762 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
3763 */
3764 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
3765 {
3766 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer, pDevExt);
3767 if (rc == VERR_NOT_SUPPORTED)
3768 {
3769 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
3770 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
3771 }
3772 }
3773 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
3774 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0, supdrvGipSyncTimer, pDevExt);
3775 if (RT_SUCCESS(rc))
3776 {
3777 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
3778 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
3779 if (RT_SUCCESS(rc))
3780 {
3781 /*
3782 * We're good.
3783 */
3784 dprintf(("supdrvGipCreate: %ld ns interval.\n", (long)u32Interval));
3785 return VINF_SUCCESS;
3786 }
3787
3788 OSDBGPRINT(("supdrvGipCreate: failed register MP event notfication. rc=%d\n", rc));
3789 }
3790 else
3791 {
3792 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %ld ns interval. rc=%d\n", (long)u32Interval, rc));
3793 Assert(!pDevExt->pGipTimer);
3794 }
3795 supdrvGipDestroy(pDevExt);
3796 return rc;
3797}
3798
3799
3800/**
3801 * Terminates the GIP.
3802 *
3803 * @param pDevExt Instance data. GIP stuff may be updated.
3804 */
3805static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
3806{
3807 int rc;
3808#ifdef DEBUG_DARWIN_GIP
3809 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
3810 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
3811 pDevExt->pGipTimer, pDevExt->GipMemObj));
3812#endif
3813
3814 /*
3815 * Invalid the GIP data.
3816 */
3817 if (pDevExt->pGip)
3818 {
3819 supdrvGipTerm(pDevExt->pGip);
3820 pDevExt->pGip = NULL;
3821 }
3822
3823 /*
3824 * Destroy the timer and free the GIP memory object.
3825 */
3826 if (pDevExt->pGipTimer)
3827 {
3828 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
3829 pDevExt->pGipTimer = NULL;
3830 }
3831
3832 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
3833 {
3834 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
3835 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
3836 }
3837
3838 /*
3839 * Finally, release the system timer resolution request if one succeeded.
3840 */
3841 if (pDevExt->u32SystemTimerGranularityGrant)
3842 {
3843 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
3844 pDevExt->u32SystemTimerGranularityGrant = 0;
3845 }
3846}
3847
3848
3849/**
3850 * Timer callback function sync GIP mode.
3851 * @param pTimer The timer.
3852 * @param pvUser The device extension.
3853 */
3854static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
3855{
3856 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3857 supdrvGipUpdate(pDevExt->pGip, RTTimeSystemNanoTS());
3858}
3859
3860
3861/**
3862 * Timer callback function for async GIP mode.
3863 * @param pTimer The timer.
3864 * @param pvUser The device extension.
3865 */
3866static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
3867{
3868 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3869 RTCPUID idCpu = RTMpCpuId();
3870 uint64_t NanoTS = RTTimeSystemNanoTS();
3871
3872 /** @todo reset the transaction number and whatnot when iTick == 1. */
3873 if (pDevExt->idGipMaster == idCpu)
3874 supdrvGipUpdate(pDevExt->pGip, NanoTS);
3875 else
3876 supdrvGipUpdatePerCpu(pDevExt->pGip, NanoTS, ASMGetApicId());
3877}
3878
3879
3880/**
3881 * Multiprocessor event notification callback.
3882 *
3883 * This is used to make sue that the GIP master gets passed on to
3884 * another CPU.
3885 *
3886 * @param enmEvent The event.
3887 * @param idCpu The cpu it applies to.
3888 * @param pvUser Pointer to the device extension.
3889 */
3890static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
3891{
3892 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3893 if (enmEvent == RTMPEVENT_OFFLINE)
3894 {
3895 RTCPUID idGipMaster;
3896 ASMAtomicReadSize(&pDevExt->idGipMaster, &idGipMaster);
3897 if (idGipMaster == idCpu)
3898 {
3899 /*
3900 * Find a new GIP master.
3901 */
3902 bool fIgnored;
3903 unsigned i;
3904 RTCPUID idNewGipMaster = NIL_RTCPUID;
3905 RTCPUSET OnlineCpus;
3906 RTMpGetOnlineSet(&OnlineCpus);
3907
3908 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
3909 {
3910 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
3911 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
3912 && idCurCpu != idGipMaster)
3913 {
3914 idNewGipMaster = idCurCpu;
3915 break;
3916 }
3917 }
3918
3919 dprintf(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
3920 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
3921 NOREF(fIgnored);
3922 }
3923 }
3924}
3925
3926#endif /* USE_NEW_OS_INTERFACE_FOR_GIP */
3927
3928
3929/**
3930 * Initializes the GIP data.
3931 *
3932 * @returns IPRT status code.
3933 * @param pDevExt Pointer to the device instance data.
3934 * @param pGip Pointer to the read-write kernel mapping of the GIP.
3935 * @param HCPhys The physical address of the GIP.
3936 * @param u64NanoTS The current nanosecond timestamp.
3937 * @param uUpdateHz The update freqence.
3938 */
3939int VBOXCALL supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS, unsigned uUpdateHz)
3940{
3941 unsigned i;
3942#ifdef DEBUG_DARWIN_GIP
3943 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3944#else
3945 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
3946#endif
3947
3948 /*
3949 * Initialize the structure.
3950 */
3951 memset(pGip, 0, PAGE_SIZE);
3952 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
3953 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
3954 pGip->u32Mode = supdrvGipDeterminTscMode(pDevExt);
3955 pGip->u32UpdateHz = uUpdateHz;
3956 pGip->u32UpdateIntervalNS = 1000000000 / uUpdateHz;
3957 pGip->u64NanoTSLastUpdateHz = u64NanoTS;
3958
3959 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
3960 {
3961 pGip->aCPUs[i].u32TransactionId = 2;
3962 pGip->aCPUs[i].u64NanoTS = u64NanoTS;
3963 pGip->aCPUs[i].u64TSC = ASMReadTSC();
3964
3965 /*
3966 * We don't know the following values until we've executed updates.
3967 * So, we'll just insert very high values.
3968 */
3969 pGip->aCPUs[i].u64CpuHz = _4G + 1;
3970 pGip->aCPUs[i].u32UpdateIntervalTSC = _2G / 4;
3971 pGip->aCPUs[i].au32TSCHistory[0] = _2G / 4;
3972 pGip->aCPUs[i].au32TSCHistory[1] = _2G / 4;
3973 pGip->aCPUs[i].au32TSCHistory[2] = _2G / 4;
3974 pGip->aCPUs[i].au32TSCHistory[3] = _2G / 4;
3975 pGip->aCPUs[i].au32TSCHistory[4] = _2G / 4;
3976 pGip->aCPUs[i].au32TSCHistory[5] = _2G / 4;
3977 pGip->aCPUs[i].au32TSCHistory[6] = _2G / 4;
3978 pGip->aCPUs[i].au32TSCHistory[7] = _2G / 4;
3979 }
3980
3981 /*
3982 * Link it to the device extension.
3983 */
3984 pDevExt->pGip = pGip;
3985 pDevExt->HCPhysGip = HCPhys;
3986 pDevExt->cGipUsers = 0;
3987
3988 return VINF_SUCCESS;
3989}
3990
3991
3992/**
3993 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
3994 *
3995 * @param idCpu Ignored.
3996 * @param pvUser1 Where to put the TSC.
3997 * @param pvUser2 Ignored.
3998 */
3999static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4000{
4001#if 1
4002 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
4003#else
4004 *(uint64_t *)pvUser1 = ASMReadTSC();
4005#endif
4006}
4007
4008
4009/**
4010 * Determine if Async GIP mode is required because of TSC drift.
4011 *
4012 * When using the default/normal timer code it is essential that the time stamp counter
4013 * (TSC) runs never backwards, that is, a read operation to the counter should return
4014 * a bigger value than any previous read operation. This is guaranteed by the latest
4015 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
4016 * case we have to choose the asynchronous timer mode.
4017 *
4018 * @param poffMin Pointer to the determined difference between different cores.
4019 * @return false if the time stamp counters appear to be synchron, true otherwise.
4020 */
4021bool VBOXCALL supdrvDetermineAsyncTsc(uint64_t *poffMin)
4022{
4023 /*
4024 * Just iterate all the cpus 8 times and make sure that the TSC is
4025 * ever increasing. We don't bother taking TSC rollover into account.
4026 */
4027 RTCPUSET CpuSet;
4028 int iLastCpu = RTCpuLastIndex(RTMpGetSet(&CpuSet));
4029 int iCpu;
4030 int cLoops = 8;
4031 bool fAsync = false;
4032 int rc;
4033 uint64_t offMax = 0;
4034 uint64_t offMin = ~(uint64_t)0;
4035 uint64_t PrevTsc = ASMReadTSC();
4036
4037 while (cLoops-- > 0)
4038 {
4039 for (iCpu = 0; iCpu <= iLastCpu; iCpu++)
4040 {
4041 uint64_t CurTsc;
4042 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
4043 if (RT_SUCCESS(rc))
4044 {
4045 if (CurTsc <= PrevTsc)
4046 {
4047 fAsync = true;
4048 offMin = offMax = PrevTsc - CurTsc;
4049 dprintf(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
4050 iCpu, cLoops, CurTsc, PrevTsc));
4051 break;
4052 }
4053
4054 /* Gather statistics (except the first time). */
4055 if (iCpu != 0 || cLoops != 7)
4056 {
4057 uint64_t off = CurTsc - PrevTsc;
4058 if (off < offMin)
4059 offMin = off;
4060 if (off > offMax)
4061 offMax = off;
4062 dprintf2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
4063 }
4064
4065 /* Next */
4066 PrevTsc = CurTsc;
4067 }
4068 else if (rc == VERR_NOT_SUPPORTED)
4069 break;
4070 else
4071 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
4072 }
4073
4074 /* broke out of the loop. */
4075 if (iCpu <= iLastCpu)
4076 break;
4077 }
4078
4079 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
4080 dprintf(("supdrvDetermineAsyncTsc: returns %d; iLastCpu=%d rc=%d offMin=%llx offMax=%llx\n",
4081 fAsync, iLastCpu, rc, offMin, offMax));
4082#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
4083 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
4084#endif
4085 return fAsync;
4086}
4087
4088
4089/**
4090 * Determin the GIP TSC mode.
4091 *
4092 * @returns The most suitable TSC mode.
4093 * @param pDevExt Pointer to the device instance data.
4094 */
4095static SUPGIPMODE supdrvGipDeterminTscMode(PSUPDRVDEVEXT pDevExt)
4096{
4097 /*
4098 * On SMP we're faced with two problems:
4099 * (1) There might be a skew between the CPU, so that cpu0
4100 * returns a TSC that is sligtly different from cpu1.
4101 * (2) Power management (and other things) may cause the TSC
4102 * to run at a non-constant speed, and cause the speed
4103 * to be different on the cpus. This will result in (1).
4104 *
4105 * So, on SMP systems we'll have to select the ASYNC update method
4106 * if there are symphoms of these problems.
4107 */
4108 if (RTMpGetCount() > 1)
4109 {
4110 uint32_t uEAX, uEBX, uECX, uEDX;
4111 uint64_t u64DiffCoresIgnored;
4112
4113 /* Permit the user and/or the OS specfic bits to force async mode. */
4114 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
4115 return SUPGIPMODE_ASYNC_TSC;
4116
4117 /* Try check for current differences between the cpus. */
4118 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
4119 return SUPGIPMODE_ASYNC_TSC;
4120
4121 /*
4122 * If the CPU supports power management and is an AMD one we
4123 * won't trust it unless it has the TscInvariant bit is set.
4124 */
4125 /* Check for "AuthenticAMD" */
4126 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
4127 if ( uEAX >= 1
4128 && uEBX == X86_CPUID_VENDOR_AMD_EBX
4129 && uECX == X86_CPUID_VENDOR_AMD_ECX
4130 && uEDX == X86_CPUID_VENDOR_AMD_EDX)
4131 {
4132 /* Check for APM support and that TscInvariant is cleared. */
4133 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
4134 if (uEAX >= 0x80000007)
4135 {
4136 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
4137 if ( !(uEDX & RT_BIT(8))/* TscInvariant */
4138 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
4139 return SUPGIPMODE_ASYNC_TSC;
4140 }
4141 }
4142 }
4143 return SUPGIPMODE_SYNC_TSC;
4144}
4145
4146
4147/**
4148 * Invalidates the GIP data upon termination.
4149 *
4150 * @param pGip Pointer to the read-write kernel mapping of the GIP.
4151 */
4152void VBOXCALL supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
4153{
4154 unsigned i;
4155 pGip->u32Magic = 0;
4156 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
4157 {
4158 pGip->aCPUs[i].u64NanoTS = 0;
4159 pGip->aCPUs[i].u64TSC = 0;
4160 pGip->aCPUs[i].iTSCHistoryHead = 0;
4161 }
4162}
4163
4164
4165/**
4166 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
4167 * updates all the per cpu data except the transaction id.
4168 *
4169 * @param pGip The GIP.
4170 * @param pGipCpu Pointer to the per cpu data.
4171 * @param u64NanoTS The current time stamp.
4172 */
4173static void supdrvGipDoUpdateCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
4174{
4175 uint64_t u64TSC;
4176 uint64_t u64TSCDelta;
4177 uint32_t u32UpdateIntervalTSC;
4178 uint32_t u32UpdateIntervalTSCSlack;
4179 unsigned iTSCHistoryHead;
4180 uint64_t u64CpuHz;
4181
4182 /*
4183 * Update the NanoTS.
4184 */
4185 ASMAtomicXchgU64(&pGipCpu->u64NanoTS, u64NanoTS);
4186
4187 /*
4188 * Calc TSC delta.
4189 */
4190 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
4191 u64TSC = ASMReadTSC();
4192 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
4193 ASMAtomicXchgU64(&pGipCpu->u64TSC, u64TSC);
4194
4195 if (u64TSCDelta >> 32)
4196 {
4197 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
4198 pGipCpu->cErrors++;
4199 }
4200
4201 /*
4202 * TSC History.
4203 */
4204 Assert(ELEMENTS(pGipCpu->au32TSCHistory) == 8);
4205
4206 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
4207 ASMAtomicXchgU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
4208 ASMAtomicXchgU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
4209
4210 /*
4211 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
4212 */
4213 if (pGip->u32UpdateHz >= 1000)
4214 {
4215 uint32_t u32;
4216 u32 = pGipCpu->au32TSCHistory[0];
4217 u32 += pGipCpu->au32TSCHistory[1];
4218 u32 += pGipCpu->au32TSCHistory[2];
4219 u32 += pGipCpu->au32TSCHistory[3];
4220 u32 >>= 2;
4221 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
4222 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
4223 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
4224 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
4225 u32UpdateIntervalTSC >>= 2;
4226 u32UpdateIntervalTSC += u32;
4227 u32UpdateIntervalTSC >>= 1;
4228
4229 /* Value choosen for a 2GHz Athlon64 running linux 2.6.10/11, . */
4230 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
4231 }
4232 else if (pGip->u32UpdateHz >= 90)
4233 {
4234 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
4235 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
4236 u32UpdateIntervalTSC >>= 1;
4237
4238 /* value choosen on a 2GHz thinkpad running windows */
4239 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
4240 }
4241 else
4242 {
4243 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
4244
4245 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
4246 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
4247 }
4248 ASMAtomicXchgU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
4249
4250 /*
4251 * CpuHz.
4252 */
4253 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, pGip->u32UpdateHz);
4254 ASMAtomicXchgU64(&pGipCpu->u64CpuHz, u64CpuHz);
4255}
4256
4257
4258/**
4259 * Updates the GIP.
4260 *
4261 * @param pGip Pointer to the GIP.
4262 * @param u64NanoTS The current nanosecond timesamp.
4263 */
4264void VBOXCALL supdrvGipUpdate(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS)
4265{
4266 /*
4267 * Determin the relevant CPU data.
4268 */
4269 PSUPGIPCPU pGipCpu;
4270 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
4271 pGipCpu = &pGip->aCPUs[0];
4272 else
4273 {
4274 unsigned iCpu = ASMGetApicId();
4275 if (RT_LIKELY(iCpu >= RT_ELEMENTS(pGip->aCPUs)))
4276 return;
4277 pGipCpu = &pGip->aCPUs[iCpu];
4278 }
4279
4280 /*
4281 * Start update transaction.
4282 */
4283 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
4284 {
4285 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
4286 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
4287 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4288 pGipCpu->cErrors++;
4289 return;
4290 }
4291
4292 /*
4293 * Recalc the update frequency every 0x800th time.
4294 */
4295 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
4296 {
4297 if (pGip->u64NanoTSLastUpdateHz)
4298 {
4299#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
4300 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
4301 uint32_t u32UpdateHz = (uint32_t)((UINT64_C(1000000000) * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
4302 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
4303 {
4304 ASMAtomicXchgU32(&pGip->u32UpdateHz, u32UpdateHz);
4305 ASMAtomicXchgU32(&pGip->u32UpdateIntervalNS, 1000000000 / u32UpdateHz);
4306 }
4307#endif
4308 }
4309 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS);
4310 }
4311
4312 /*
4313 * Update the data.
4314 */
4315 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4316
4317 /*
4318 * Complete transaction.
4319 */
4320 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4321}
4322
4323
4324/**
4325 * Updates the per cpu GIP data for the calling cpu.
4326 *
4327 * @param pGip Pointer to the GIP.
4328 * @param u64NanoTS The current nanosecond timesamp.
4329 * @param iCpu The CPU index.
4330 */
4331void VBOXCALL supdrvGipUpdatePerCpu(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS, unsigned iCpu)
4332{
4333 PSUPGIPCPU pGipCpu;
4334
4335 if (RT_LIKELY(iCpu < RT_ELEMENTS(pGip->aCPUs)))
4336 {
4337 pGipCpu = &pGip->aCPUs[iCpu];
4338
4339 /*
4340 * Start update transaction.
4341 */
4342 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
4343 {
4344 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
4345 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4346 pGipCpu->cErrors++;
4347 return;
4348 }
4349
4350 /*
4351 * Update the data.
4352 */
4353 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4354
4355 /*
4356 * Complete transaction.
4357 */
4358 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4359 }
4360}
4361
4362
4363#ifndef DEBUG /** @todo change #ifndef DEBUG -> #ifdef LOG_ENABLED */
4364/**
4365 * Stub function for non-debug builds.
4366 */
4367RTDECL(PRTLOGGER) RTLogDefaultInstance(void)
4368{
4369 return NULL;
4370}
4371
4372RTDECL(PRTLOGGER) RTLogRelDefaultInstance(void)
4373{
4374 return NULL;
4375}
4376
4377/**
4378 * Stub function for non-debug builds.
4379 */
4380RTDECL(int) RTLogSetDefaultInstanceThread(PRTLOGGER pLogger, uintptr_t uKey)
4381{
4382 return 0;
4383}
4384
4385/**
4386 * Stub function for non-debug builds.
4387 */
4388RTDECL(void) RTLogLogger(PRTLOGGER pLogger, void *pvCallerRet, const char *pszFormat, ...)
4389{
4390}
4391
4392/**
4393 * Stub function for non-debug builds.
4394 */
4395RTDECL(void) RTLogLoggerEx(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, ...)
4396{
4397}
4398
4399/**
4400 * Stub function for non-debug builds.
4401 */
4402RTDECL(void) RTLogLoggerExV(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, va_list args)
4403{
4404}
4405
4406/**
4407 * Stub function for non-debug builds.
4408 */
4409RTDECL(void) RTLogPrintf(const char *pszFormat, ...)
4410{
4411}
4412
4413/**
4414 * Stub function for non-debug builds.
4415 */
4416RTDECL(void) RTLogPrintfV(const char *pszFormat, va_list args)
4417{
4418}
4419#endif /* !DEBUG */
4420
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette