VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 53054

Last change on this file since 53054 was 53054, checked in by vboxsync, 10 years ago

HostDrivers/Support: Kernel thread to service TSC-delta measurement requests. Currently disabled.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 272.0 KB
Line 
1/* $Id: SUPDrv.c 53054 2014-10-14 14:52:50Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/x86.h>
58
59#include <VBox/param.h>
60#include <VBox/log.h>
61#include <VBox/err.h>
62#include <VBox/vmm/hm_svm.h>
63#include <VBox/vmm/hm_vmx.h>
64
65#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
66# include "dtrace/SUPDrv.h"
67#else
68# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
69# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
70# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
71# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
72#endif
73
74/*
75 * Logging assignments:
76 * Log - useful stuff, like failures.
77 * LogFlow - program flow, except the really noisy bits.
78 * Log2 - Cleanup.
79 * Log3 - Loader flow noise.
80 * Log4 - Call VMMR0 flow noise.
81 * Log5 - Native yet-to-be-defined noise.
82 * Log6 - Native ioctl flow noise.
83 *
84 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
85 * instantiation in log-vbox.c(pp).
86 */
87
88
89/*******************************************************************************
90* Defined Constants And Macros *
91*******************************************************************************/
92/** The frequency by which we recalculate the u32UpdateHz and
93 * u32UpdateIntervalNS GIP members. The value must be a power of 2. */
94#define GIP_UPDATEHZ_RECALC_FREQ 0x800
95
96/** A reserved TSC value used for synchronization as well as measurement of
97 * TSC deltas. */
98#define GIP_TSC_DELTA_RSVD UINT64_MAX
99/** The number of TSC delta measurement loops in total (includes primer and
100 * read-time loops). */
101#define GIP_TSC_DELTA_LOOPS 96
102/** The number of cache primer loops. */
103#define GIP_TSC_DELTA_PRIMER_LOOPS 4
104/** The number of loops until we keep computing the minumum read time. */
105#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
106/** Stop measurement of TSC delta. */
107#define GIP_TSC_DELTA_SYNC_STOP 0
108/** Start measurement of TSC delta. */
109#define GIP_TSC_DELTA_SYNC_START 1
110/** Worker thread is ready for reading the TSC. */
111#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
112/** Worker thread is done updating TSC delta info. */
113#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
114/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
115 * with a timeout. */
116#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
117/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
118 * master with a timeout. */
119#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
120
121AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
122AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
123
124/** @def VBOX_SVN_REV
125 * The makefile should define this if it can. */
126#ifndef VBOX_SVN_REV
127# define VBOX_SVN_REV 0
128#endif
129
130#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
131# define DO_NOT_START_GIP
132#endif
133
134
135/*******************************************************************************
136* Internal Functions *
137*******************************************************************************/
138static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
139static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
140static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
141static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
142static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
143static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
144static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
145static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
146static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
147static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
148static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
149static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
150static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
151DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
152DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
153static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
154static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
155static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
156static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
157static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
158static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
159static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
160static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
161static bool supdrvIsInvariantTsc(void);
162static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
163 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned cCpus);
164static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
165static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
166static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
167static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
168 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
169static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
170static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
171static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
172static int supdrvIOCtl_ResumeSuspendedKbds(void);
173
174
175/*******************************************************************************
176* Global Variables *
177*******************************************************************************/
178DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
179
180/**
181 * The TSC delta synchronization struct. rounded to cache line size.
182 */
183typedef union SUPTSCDELTASYNC
184{
185 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
186 volatile uint32_t u;
187 /** Padding to cache line size. */
188 uint8_t u8Padding[64];
189} SUPTSCDELTASYNC;
190AssertCompileSize(SUPTSCDELTASYNC, 64);
191typedef SUPTSCDELTASYNC *PSUPTSCDELTASYNC;
192
193/** Pointer to the TSC delta sync. struct. */
194static void *g_pvTscDeltaSync;
195/** Aligned pointer to the TSC delta sync. struct. */
196static PSUPTSCDELTASYNC g_pTscDeltaSync;
197/** The TSC delta measurement initiator Cpu Id. */
198static volatile RTCPUID g_idTscDeltaInitiator = NIL_RTCPUID;
199/** Number of online/offline events, incremented each time a CPU goes online
200 * or offline. */
201static volatile uint32_t g_cMpOnOffEvents;
202
203/**
204 * Array of the R0 SUP API.
205 */
206static SUPFUNC g_aFunctions[] =
207{
208/* SED: START */
209 /* name function */
210 /* Entries with absolute addresses determined at runtime, fixup
211 code makes ugly ASSUMPTIONS about the order here: */
212 { "SUPR0AbsIs64bit", (void *)0 },
213 { "SUPR0Abs64bitKernelCS", (void *)0 },
214 { "SUPR0Abs64bitKernelSS", (void *)0 },
215 { "SUPR0Abs64bitKernelDS", (void *)0 },
216 { "SUPR0AbsKernelCS", (void *)0 },
217 { "SUPR0AbsKernelSS", (void *)0 },
218 { "SUPR0AbsKernelDS", (void *)0 },
219 { "SUPR0AbsKernelES", (void *)0 },
220 { "SUPR0AbsKernelFS", (void *)0 },
221 { "SUPR0AbsKernelGS", (void *)0 },
222 /* Normal function pointers: */
223 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
224 { "SUPGetGIP", (void *)SUPGetGIP },
225 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
226 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
227 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
228 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
229 { "SUPR0ContFree", (void *)SUPR0ContFree },
230 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
231 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
232 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
233 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
234 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
235 { "SUPR0LockMem", (void *)SUPR0LockMem },
236 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
237 { "SUPR0LowFree", (void *)SUPR0LowFree },
238 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
239 { "SUPR0MemFree", (void *)SUPR0MemFree },
240 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
241 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
242 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
243 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
244 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
245 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
246 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
247 { "SUPR0PageFree", (void *)SUPR0PageFree },
248 { "SUPR0Printf", (void *)SUPR0Printf },
249 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
250 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
251 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
252 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
253 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
254 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
255 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
256 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
257 { "SUPSemEventClose", (void *)SUPSemEventClose },
258 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
259 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
260 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
261 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
262 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
263 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
264 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
265 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
266 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
267 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
268 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
269 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
270 { "SUPSemEventWait", (void *)SUPSemEventWait },
271 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
272 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
273 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
274
275 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
276 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
277 { "RTAssertMsg1", (void *)RTAssertMsg1 },
278 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
279 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
280 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
281 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
282 { "RTCrc32", (void *)RTCrc32 },
283 { "RTCrc32Finish", (void *)RTCrc32Finish },
284 { "RTCrc32Process", (void *)RTCrc32Process },
285 { "RTCrc32Start", (void *)RTCrc32Start },
286 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
287 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
288 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
289 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
290 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
291 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
292 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
293 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
294 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
295 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
296 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
297 { "RTLogPrintfV", (void *)RTLogPrintfV },
298 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
299 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
300 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
301 { "RTMemAllocTag", (void *)RTMemAllocTag },
302 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
303 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
304 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
305 { "RTMemDupExTag", (void *)RTMemDupExTag },
306 { "RTMemDupTag", (void *)RTMemDupTag },
307 { "RTMemFree", (void *)RTMemFree },
308 { "RTMemFreeEx", (void *)RTMemFreeEx },
309 { "RTMemReallocTag", (void *)RTMemReallocTag },
310 { "RTMpCpuId", (void *)RTMpCpuId },
311 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
312 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
313 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
314 { "RTMpGetCount", (void *)RTMpGetCount },
315 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
316 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
317 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
318 { "RTMpGetSet", (void *)RTMpGetSet },
319 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
320 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
321 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
322 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
323 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
324 { "RTMpOnAll", (void *)RTMpOnAll },
325 { "RTMpOnOthers", (void *)RTMpOnOthers },
326 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
327 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
328 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
329 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
330 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
331 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
332 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
333 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
334 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
335 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
336 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
337 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
338 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
339 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
340 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
341 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
342 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
343 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
344 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
345 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
346 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
347 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
348 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
349 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
350 { "RTProcSelf", (void *)RTProcSelf },
351 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
352 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
353 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
354 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
355 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
356 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
357 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
358 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
359 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
360 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
361 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
362 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
363 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
364 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
365 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
366 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
367 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
368 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
369 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
370 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
371 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
372 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
373 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
374 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
375 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
376 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
377 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
378 { "RTSemEventCreate", (void *)RTSemEventCreate },
379 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
380 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
381 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
382 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
383 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
384 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
385 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
386 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
387 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
388 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
389 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
390 { "RTSemEventSignal", (void *)RTSemEventSignal },
391 { "RTSemEventWait", (void *)RTSemEventWait },
392 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
393 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
394 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
395 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
396 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
397 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
398 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
399 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
400 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
401 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
402 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
403 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
404 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
405 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
406 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
407 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
408 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
409 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
410 { "RTStrCopy", (void *)RTStrCopy },
411 { "RTStrDupTag", (void *)RTStrDupTag },
412 { "RTStrFormat", (void *)RTStrFormat },
413 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
414 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
415 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
416 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
417 { "RTStrFormatV", (void *)RTStrFormatV },
418 { "RTStrFree", (void *)RTStrFree },
419 { "RTStrNCmp", (void *)RTStrNCmp },
420 { "RTStrPrintf", (void *)RTStrPrintf },
421 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
422 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
423 { "RTStrPrintfV", (void *)RTStrPrintfV },
424 { "RTThreadCreate", (void *)RTThreadCreate },
425 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
426 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
427 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
428 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
429 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
430 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
431 { "RTThreadGetName", (void *)RTThreadGetName },
432 { "RTThreadGetNative", (void *)RTThreadGetNative },
433 { "RTThreadGetType", (void *)RTThreadGetType },
434 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
435 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
436 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
437 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
438 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
439 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
440 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
441 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
442 { "RTThreadSelf", (void *)RTThreadSelf },
443 { "RTThreadSelfName", (void *)RTThreadSelfName },
444 { "RTThreadSleep", (void *)RTThreadSleep },
445 { "RTThreadUserReset", (void *)RTThreadUserReset },
446 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
447 { "RTThreadUserWait", (void *)RTThreadUserWait },
448 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
449 { "RTThreadWait", (void *)RTThreadWait },
450 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
451 { "RTThreadYield", (void *)RTThreadYield },
452 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
453 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
454 { "RTTimeNow", (void *)RTTimeNow },
455 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
456 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
457 { "RTTimerCreate", (void *)RTTimerCreate },
458 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
459 { "RTTimerDestroy", (void *)RTTimerDestroy },
460 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
461 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
462 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
463 { "RTTimerStart", (void *)RTTimerStart },
464 { "RTTimerStop", (void *)RTTimerStop },
465 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
466 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
467 { "RTUuidCompare", (void *)RTUuidCompare },
468 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
469 { "RTUuidFromStr", (void *)RTUuidFromStr },
470/* SED: END */
471};
472
473#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
474/**
475 * Drag in the rest of IRPT since we share it with the
476 * rest of the kernel modules on darwin.
477 */
478PFNRT g_apfnVBoxDrvIPRTDeps[] =
479{
480 /* VBoxNetAdp */
481 (PFNRT)RTRandBytes,
482 /* VBoxUSB */
483 (PFNRT)RTPathStripFilename,
484 NULL
485};
486#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
487
488
489/**
490 * Initializes the device extentsion structure.
491 *
492 * @returns IPRT status code.
493 * @param pDevExt The device extension to initialize.
494 * @param cbSession The size of the session structure. The size of
495 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
496 * defined because we're skipping the OS specific members
497 * then.
498 */
499int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
500{
501 int rc;
502
503#ifdef SUPDRV_WITH_RELEASE_LOGGER
504 /*
505 * Create the release log.
506 */
507 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
508 PRTLOGGER pRelLogger;
509 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
510 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
511 if (RT_SUCCESS(rc))
512 RTLogRelSetDefaultInstance(pRelLogger);
513 /** @todo Add native hook for getting logger config parameters and setting
514 * them. On linux we should use the module parameter stuff... */
515#endif
516
517 /*
518 * Initialize it.
519 */
520 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
521 pDevExt->Spinlock = NIL_RTSPINLOCK;
522 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
523 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
524 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
525 if (RT_SUCCESS(rc))
526 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
527 if (RT_SUCCESS(rc))
528 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
529
530 if (RT_SUCCESS(rc))
531#ifdef SUPDRV_USE_MUTEX_FOR_LDR
532 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
533#else
534 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
535#endif
536 if (RT_SUCCESS(rc))
537 {
538 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
539 if (RT_SUCCESS(rc))
540 {
541#ifdef SUPDRV_USE_MUTEX_FOR_LDR
542 rc = RTSemMutexCreate(&pDevExt->mtxGip);
543#else
544 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
545#endif
546 if (RT_SUCCESS(rc))
547 {
548 rc = supdrvGipCreate(pDevExt);
549 if (RT_SUCCESS(rc))
550 {
551 rc = supdrvTracerInit(pDevExt);
552 if (RT_SUCCESS(rc))
553 {
554 pDevExt->pLdrInitImage = NULL;
555 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
556 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
557 pDevExt->cbSession = (uint32_t)cbSession;
558
559 /*
560 * Fixup the absolute symbols.
561 *
562 * Because of the table indexing assumptions we'll have a little #ifdef orgy
563 * here rather than distributing this to OS specific files. At least for now.
564 */
565#ifdef RT_OS_DARWIN
566# if ARCH_BITS == 32
567 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
568 {
569 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
570 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
571 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
572 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
573 }
574 else
575 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
576 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
577 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
578 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
579 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
580 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
581 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
582# else /* 64-bit darwin: */
583 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
584 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
585 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
586 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
587 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
588 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
589 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
590 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
591 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
592 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
593
594# endif
595#else /* !RT_OS_DARWIN */
596# if ARCH_BITS == 64
597 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
598 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
599 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
600 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
601# else
602 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
603# endif
604 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
605 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
606 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
607 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
608 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
609 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
610#endif /* !RT_OS_DARWIN */
611 return VINF_SUCCESS;
612 }
613
614 supdrvGipDestroy(pDevExt);
615 }
616
617#ifdef SUPDRV_USE_MUTEX_FOR_GIP
618 RTSemMutexDestroy(pDevExt->mtxGip);
619 pDevExt->mtxGip = NIL_RTSEMMUTEX;
620#else
621 RTSemFastMutexDestroy(pDevExt->mtxGip);
622 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
623#endif
624 }
625 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
626 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
627 }
628#ifdef SUPDRV_USE_MUTEX_FOR_LDR
629 RTSemMutexDestroy(pDevExt->mtxLdr);
630 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
631#else
632 RTSemFastMutexDestroy(pDevExt->mtxLdr);
633 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
634#endif
635 }
636
637 RTSpinlockDestroy(pDevExt->Spinlock);
638 pDevExt->Spinlock = NIL_RTSPINLOCK;
639 RTSpinlockDestroy(pDevExt->hGipSpinlock);
640 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
641 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
642 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
643
644#ifdef SUPDRV_WITH_RELEASE_LOGGER
645 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
646 RTLogDestroy(RTLogSetDefaultInstance(NULL));
647#endif
648
649 return rc;
650}
651
652
653/**
654 * Delete the device extension (e.g. cleanup members).
655 *
656 * @param pDevExt The device extension to delete.
657 */
658void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
659{
660 PSUPDRVOBJ pObj;
661 PSUPDRVUSAGE pUsage;
662
663 /*
664 * Kill mutexes and spinlocks.
665 */
666#ifdef SUPDRV_USE_MUTEX_FOR_GIP
667 RTSemMutexDestroy(pDevExt->mtxGip);
668 pDevExt->mtxGip = NIL_RTSEMMUTEX;
669#else
670 RTSemFastMutexDestroy(pDevExt->mtxGip);
671 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
672#endif
673#ifdef SUPDRV_USE_MUTEX_FOR_LDR
674 RTSemMutexDestroy(pDevExt->mtxLdr);
675 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
676#else
677 RTSemFastMutexDestroy(pDevExt->mtxLdr);
678 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
679#endif
680 RTSpinlockDestroy(pDevExt->Spinlock);
681 pDevExt->Spinlock = NIL_RTSPINLOCK;
682 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
683 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
684 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
685 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
686
687 /*
688 * Free lists.
689 */
690 /* objects. */
691 pObj = pDevExt->pObjs;
692 Assert(!pObj); /* (can trigger on forced unloads) */
693 pDevExt->pObjs = NULL;
694 while (pObj)
695 {
696 void *pvFree = pObj;
697 pObj = pObj->pNext;
698 RTMemFree(pvFree);
699 }
700
701 /* usage records. */
702 pUsage = pDevExt->pUsageFree;
703 pDevExt->pUsageFree = NULL;
704 while (pUsage)
705 {
706 void *pvFree = pUsage;
707 pUsage = pUsage->pNext;
708 RTMemFree(pvFree);
709 }
710
711 /* kill the GIP. */
712 supdrvGipDestroy(pDevExt);
713 RTSpinlockDestroy(pDevExt->hGipSpinlock);
714 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
715
716 supdrvTracerTerm(pDevExt);
717
718#ifdef SUPDRV_WITH_RELEASE_LOGGER
719 /* destroy the loggers. */
720 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
721 RTLogDestroy(RTLogSetDefaultInstance(NULL));
722#endif
723}
724
725
726/**
727 * Create session.
728 *
729 * @returns IPRT status code.
730 * @param pDevExt Device extension.
731 * @param fUser Flag indicating whether this is a user or kernel
732 * session.
733 * @param fUnrestricted Unrestricted access (system) or restricted access
734 * (user)?
735 * @param ppSession Where to store the pointer to the session data.
736 */
737int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
738{
739 int rc;
740 PSUPDRVSESSION pSession;
741
742 if (!SUP_IS_DEVEXT_VALID(pDevExt))
743 return VERR_INVALID_PARAMETER;
744
745 /*
746 * Allocate memory for the session data.
747 */
748 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
749 if (pSession)
750 {
751 /* Initialize session data. */
752 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
753 if (!rc)
754 {
755 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
756 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
757 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
758 if (RT_SUCCESS(rc))
759 {
760 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
761 pSession->pDevExt = pDevExt;
762 pSession->u32Cookie = BIRD_INV;
763 pSession->fUnrestricted = fUnrestricted;
764 /*pSession->fInHashTable = false; */
765 pSession->cRefs = 1;
766 /*pSession->pCommonNextHash = NULL;
767 pSession->ppOsSessionPtr = NULL; */
768 if (fUser)
769 {
770 pSession->Process = RTProcSelf();
771 pSession->R0Process = RTR0ProcHandleSelf();
772 }
773 else
774 {
775 pSession->Process = NIL_RTPROCESS;
776 pSession->R0Process = NIL_RTR0PROCESS;
777 }
778 /*pSession->pLdrUsage = NULL;
779 pSession->pVM = NULL;
780 pSession->pUsage = NULL;
781 pSession->pGip = NULL;
782 pSession->fGipReferenced = false;
783 pSession->Bundle.cUsed = 0; */
784 pSession->Uid = NIL_RTUID;
785 pSession->Gid = NIL_RTGID;
786 /*pSession->uTracerData = 0;*/
787 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
788 RTListInit(&pSession->TpProviders);
789 /*pSession->cTpProviders = 0;*/
790 /*pSession->cTpProbesFiring = 0;*/
791 RTListInit(&pSession->TpUmods);
792 /*RT_ZERO(pSession->apTpLookupTable);*/
793
794 VBOXDRV_SESSION_CREATE(pSession, fUser);
795 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
796 return VINF_SUCCESS;
797 }
798
799 RTSpinlockDestroy(pSession->Spinlock);
800 }
801 RTMemFree(pSession);
802 *ppSession = NULL;
803 Log(("Failed to create spinlock, rc=%d!\n", rc));
804 }
805 else
806 rc = VERR_NO_MEMORY;
807
808 return rc;
809}
810
811
812/**
813 * Cleans up the session in the context of the process to which it belongs, the
814 * caller will free the session and the session spinlock.
815 *
816 * This should normally occur when the session is closed or as the process
817 * exits. Careful reference counting in the OS specfic code makes sure that
818 * there cannot be any races between process/handle cleanup callbacks and
819 * threads doing I/O control calls.
820 *
821 * @param pDevExt The device extension.
822 * @param pSession Session data.
823 */
824static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
825{
826 int rc;
827 PSUPDRVBUNDLE pBundle;
828 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
829
830 Assert(!pSession->fInHashTable);
831 Assert(!pSession->ppOsSessionPtr);
832 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
833 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
834
835 /*
836 * Remove logger instances related to this session.
837 */
838 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
839
840 /*
841 * Destroy the handle table.
842 */
843 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
844 AssertRC(rc);
845 pSession->hHandleTable = NIL_RTHANDLETABLE;
846
847 /*
848 * Release object references made in this session.
849 * In theory there should be noone racing us in this session.
850 */
851 Log2(("release objects - start\n"));
852 if (pSession->pUsage)
853 {
854 PSUPDRVUSAGE pUsage;
855 RTSpinlockAcquire(pDevExt->Spinlock);
856
857 while ((pUsage = pSession->pUsage) != NULL)
858 {
859 PSUPDRVOBJ pObj = pUsage->pObj;
860 pSession->pUsage = pUsage->pNext;
861
862 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
863 if (pUsage->cUsage < pObj->cUsage)
864 {
865 pObj->cUsage -= pUsage->cUsage;
866 RTSpinlockRelease(pDevExt->Spinlock);
867 }
868 else
869 {
870 /* Destroy the object and free the record. */
871 if (pDevExt->pObjs == pObj)
872 pDevExt->pObjs = pObj->pNext;
873 else
874 {
875 PSUPDRVOBJ pObjPrev;
876 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
877 if (pObjPrev->pNext == pObj)
878 {
879 pObjPrev->pNext = pObj->pNext;
880 break;
881 }
882 Assert(pObjPrev);
883 }
884 RTSpinlockRelease(pDevExt->Spinlock);
885
886 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
887 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
888 if (pObj->pfnDestructor)
889 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
890 RTMemFree(pObj);
891 }
892
893 /* free it and continue. */
894 RTMemFree(pUsage);
895
896 RTSpinlockAcquire(pDevExt->Spinlock);
897 }
898
899 RTSpinlockRelease(pDevExt->Spinlock);
900 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
901 }
902 Log2(("release objects - done\n"));
903
904 /*
905 * Do tracer cleanups related to this session.
906 */
907 Log2(("release tracer stuff - start\n"));
908 supdrvTracerCleanupSession(pDevExt, pSession);
909 Log2(("release tracer stuff - end\n"));
910
911 /*
912 * Release memory allocated in the session.
913 *
914 * We do not serialize this as we assume that the application will
915 * not allocated memory while closing the file handle object.
916 */
917 Log2(("freeing memory:\n"));
918 pBundle = &pSession->Bundle;
919 while (pBundle)
920 {
921 PSUPDRVBUNDLE pToFree;
922 unsigned i;
923
924 /*
925 * Check and unlock all entries in the bundle.
926 */
927 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
928 {
929 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
930 {
931 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
932 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
933 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
934 {
935 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
936 AssertRC(rc); /** @todo figure out how to handle this. */
937 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
938 }
939 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
940 AssertRC(rc); /** @todo figure out how to handle this. */
941 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
942 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
943 }
944 }
945
946 /*
947 * Advance and free previous bundle.
948 */
949 pToFree = pBundle;
950 pBundle = pBundle->pNext;
951
952 pToFree->pNext = NULL;
953 pToFree->cUsed = 0;
954 if (pToFree != &pSession->Bundle)
955 RTMemFree(pToFree);
956 }
957 Log2(("freeing memory - done\n"));
958
959 /*
960 * Deregister component factories.
961 */
962 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
963 Log2(("deregistering component factories:\n"));
964 if (pDevExt->pComponentFactoryHead)
965 {
966 PSUPDRVFACTORYREG pPrev = NULL;
967 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
968 while (pCur)
969 {
970 if (pCur->pSession == pSession)
971 {
972 /* unlink it */
973 PSUPDRVFACTORYREG pNext = pCur->pNext;
974 if (pPrev)
975 pPrev->pNext = pNext;
976 else
977 pDevExt->pComponentFactoryHead = pNext;
978
979 /* free it */
980 pCur->pNext = NULL;
981 pCur->pSession = NULL;
982 pCur->pFactory = NULL;
983 RTMemFree(pCur);
984
985 /* next */
986 pCur = pNext;
987 }
988 else
989 {
990 /* next */
991 pPrev = pCur;
992 pCur = pCur->pNext;
993 }
994 }
995 }
996 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
997 Log2(("deregistering component factories - done\n"));
998
999 /*
1000 * Loaded images needs to be dereferenced and possibly freed up.
1001 */
1002 supdrvLdrLock(pDevExt);
1003 Log2(("freeing images:\n"));
1004 if (pSession->pLdrUsage)
1005 {
1006 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
1007 pSession->pLdrUsage = NULL;
1008 while (pUsage)
1009 {
1010 void *pvFree = pUsage;
1011 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1012 if (pImage->cUsage > pUsage->cUsage)
1013 pImage->cUsage -= pUsage->cUsage;
1014 else
1015 supdrvLdrFree(pDevExt, pImage);
1016 pUsage->pImage = NULL;
1017 pUsage = pUsage->pNext;
1018 RTMemFree(pvFree);
1019 }
1020 }
1021 supdrvLdrUnlock(pDevExt);
1022 Log2(("freeing images - done\n"));
1023
1024 /*
1025 * Unmap the GIP.
1026 */
1027 Log2(("umapping GIP:\n"));
1028 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1029 {
1030 SUPR0GipUnmap(pSession);
1031 pSession->fGipReferenced = 0;
1032 }
1033 Log2(("umapping GIP - done\n"));
1034}
1035
1036
1037/**
1038 * Common code for freeing a session when the reference count reaches zero.
1039 *
1040 * @param pDevExt Device extension.
1041 * @param pSession Session data.
1042 * This data will be freed by this routine.
1043 */
1044static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1045{
1046 VBOXDRV_SESSION_CLOSE(pSession);
1047
1048 /*
1049 * Cleanup the session first.
1050 */
1051 supdrvCleanupSession(pDevExt, pSession);
1052 supdrvOSCleanupSession(pDevExt, pSession);
1053
1054 /*
1055 * Free the rest of the session stuff.
1056 */
1057 RTSpinlockDestroy(pSession->Spinlock);
1058 pSession->Spinlock = NIL_RTSPINLOCK;
1059 pSession->pDevExt = NULL;
1060 RTMemFree(pSession);
1061 LogFlow(("supdrvDestroySession: returns\n"));
1062}
1063
1064
1065/**
1066 * Inserts the session into the global hash table.
1067 *
1068 * @retval VINF_SUCCESS on success.
1069 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1070 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1071 * session (asserted).
1072 * @retval VERR_DUPLICATE if there is already a session for that pid.
1073 *
1074 * @param pDevExt The device extension.
1075 * @param pSession The session.
1076 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1077 * available and used. This will set to point to the
1078 * session while under the protection of the session
1079 * hash table spinlock. It will also be kept in
1080 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1081 * cleanup use.
1082 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1083 */
1084int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1085 void *pvUser)
1086{
1087 PSUPDRVSESSION pCur;
1088 unsigned iHash;
1089
1090 /*
1091 * Validate input.
1092 */
1093 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1094 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1095
1096 /*
1097 * Calculate the hash table index and acquire the spinlock.
1098 */
1099 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1100
1101 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1102
1103 /*
1104 * If there are a collisions, we need to carefully check if we got a
1105 * duplicate. There can only be one open session per process.
1106 */
1107 pCur = pDevExt->apSessionHashTab[iHash];
1108 if (pCur)
1109 {
1110 while (pCur && pCur->Process != pSession->Process)
1111 pCur = pCur->pCommonNextHash;
1112
1113 if (pCur)
1114 {
1115 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1116 if (pCur == pSession)
1117 {
1118 Assert(pSession->fInHashTable);
1119 AssertFailed();
1120 return VERR_WRONG_ORDER;
1121 }
1122 Assert(!pSession->fInHashTable);
1123 if (pCur->R0Process == pSession->R0Process)
1124 return VERR_RESOURCE_IN_USE;
1125 return VERR_DUPLICATE;
1126 }
1127 }
1128 Assert(!pSession->fInHashTable);
1129 Assert(!pSession->ppOsSessionPtr);
1130
1131 /*
1132 * Insert it, doing a callout to the OS specific code in case it has
1133 * anything it wishes to do while we're holding the spinlock.
1134 */
1135 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1136 pDevExt->apSessionHashTab[iHash] = pSession;
1137 pSession->fInHashTable = true;
1138 ASMAtomicIncS32(&pDevExt->cSessions);
1139
1140 pSession->ppOsSessionPtr = ppOsSessionPtr;
1141 if (ppOsSessionPtr)
1142 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1143
1144 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1145
1146 /*
1147 * Retain a reference for the pointer in the session table.
1148 */
1149 ASMAtomicIncU32(&pSession->cRefs);
1150
1151 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1152 return VINF_SUCCESS;
1153}
1154
1155
1156/**
1157 * Removes the session from the global hash table.
1158 *
1159 * @retval VINF_SUCCESS on success.
1160 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1161 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1162 * session (asserted).
1163 *
1164 * @param pDevExt The device extension.
1165 * @param pSession The session. The caller is expected to have a reference
1166 * to this so it won't croak on us when we release the hash
1167 * table reference.
1168 * @param pvUser OS specific context value for the
1169 * supdrvOSSessionHashTabInserted callback.
1170 */
1171int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1172{
1173 PSUPDRVSESSION pCur;
1174 unsigned iHash;
1175 int32_t cRefs;
1176
1177 /*
1178 * Validate input.
1179 */
1180 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1181 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1182
1183 /*
1184 * Calculate the hash table index and acquire the spinlock.
1185 */
1186 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1187
1188 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1189
1190 /*
1191 * Unlink it.
1192 */
1193 pCur = pDevExt->apSessionHashTab[iHash];
1194 if (pCur == pSession)
1195 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1196 else
1197 {
1198 PSUPDRVSESSION pPrev = pCur;
1199 while (pCur && pCur != pSession)
1200 {
1201 pPrev = pCur;
1202 pCur = pCur->pCommonNextHash;
1203 }
1204 if (pCur)
1205 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1206 else
1207 {
1208 Assert(!pSession->fInHashTable);
1209 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1210 return VERR_NOT_FOUND;
1211 }
1212 }
1213
1214 pSession->pCommonNextHash = NULL;
1215 pSession->fInHashTable = false;
1216
1217 ASMAtomicDecU32(&pDevExt->cSessions);
1218
1219 /*
1220 * Clear OS specific session pointer if available and do the OS callback.
1221 */
1222 if (pSession->ppOsSessionPtr)
1223 {
1224 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1225 pSession->ppOsSessionPtr = NULL;
1226 }
1227
1228 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1229
1230 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1231
1232 /*
1233 * Drop the reference the hash table had to the session. This shouldn't
1234 * be the last reference!
1235 */
1236 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1237 Assert(cRefs > 0 && cRefs < _1M);
1238 if (cRefs == 0)
1239 supdrvDestroySession(pDevExt, pSession);
1240
1241 return VINF_SUCCESS;
1242}
1243
1244
1245/**
1246 * Looks up the session for the current process in the global hash table or in
1247 * OS specific pointer.
1248 *
1249 * @returns Pointer to the session with a reference that the caller must
1250 * release. If no valid session was found, NULL is returned.
1251 *
1252 * @param pDevExt The device extension.
1253 * @param Process The process ID.
1254 * @param R0Process The ring-0 process handle.
1255 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1256 * this is used instead of the hash table. For
1257 * additional safety it must then be equal to the
1258 * SUPDRVSESSION::ppOsSessionPtr member.
1259 * This can be NULL even if the OS has a session
1260 * pointer.
1261 */
1262PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1263 PSUPDRVSESSION *ppOsSessionPtr)
1264{
1265 PSUPDRVSESSION pCur;
1266 unsigned iHash;
1267
1268 /*
1269 * Validate input.
1270 */
1271 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1272
1273 /*
1274 * Calculate the hash table index and acquire the spinlock.
1275 */
1276 iHash = SUPDRV_SESSION_HASH(Process);
1277
1278 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1279
1280 /*
1281 * If an OS session pointer is provided, always use it.
1282 */
1283 if (ppOsSessionPtr)
1284 {
1285 pCur = *ppOsSessionPtr;
1286 if ( pCur
1287 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1288 || pCur->Process != Process
1289 || pCur->R0Process != R0Process) )
1290 pCur = NULL;
1291 }
1292 else
1293 {
1294 /*
1295 * Otherwise, do the hash table lookup.
1296 */
1297 pCur = pDevExt->apSessionHashTab[iHash];
1298 while ( pCur
1299 && ( pCur->Process != Process
1300 || pCur->R0Process != R0Process) )
1301 pCur = pCur->pCommonNextHash;
1302 }
1303
1304 /*
1305 * Retain the session.
1306 */
1307 if (pCur)
1308 {
1309 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1310 NOREF(cRefs);
1311 Assert(cRefs > 1 && cRefs < _1M);
1312 }
1313
1314 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1315
1316 return pCur;
1317}
1318
1319
1320/**
1321 * Retain a session to make sure it doesn't go away while it is in use.
1322 *
1323 * @returns New reference count on success, UINT32_MAX on failure.
1324 * @param pSession Session data.
1325 */
1326uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1327{
1328 uint32_t cRefs;
1329 AssertPtrReturn(pSession, UINT32_MAX);
1330 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1331
1332 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1333 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1334 return cRefs;
1335}
1336
1337
1338/**
1339 * Releases a given session.
1340 *
1341 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1342 * @param pSession Session data.
1343 */
1344uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1345{
1346 uint32_t cRefs;
1347 AssertPtrReturn(pSession, UINT32_MAX);
1348 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1349
1350 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1351 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1352 if (cRefs == 0)
1353 supdrvDestroySession(pSession->pDevExt, pSession);
1354 return cRefs;
1355}
1356
1357
1358/**
1359 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1360 *
1361 * @returns IPRT status code, see SUPR0ObjAddRef.
1362 * @param hHandleTable The handle table handle. Ignored.
1363 * @param pvObj The object pointer.
1364 * @param pvCtx Context, the handle type. Ignored.
1365 * @param pvUser Session pointer.
1366 */
1367static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1368{
1369 NOREF(pvCtx);
1370 NOREF(hHandleTable);
1371 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1372}
1373
1374
1375/**
1376 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1377 *
1378 * @param hHandleTable The handle table handle. Ignored.
1379 * @param h The handle value. Ignored.
1380 * @param pvObj The object pointer.
1381 * @param pvCtx Context, the handle type. Ignored.
1382 * @param pvUser Session pointer.
1383 */
1384static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1385{
1386 NOREF(pvCtx);
1387 NOREF(h);
1388 NOREF(hHandleTable);
1389 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1390}
1391
1392
1393/**
1394 * Fast path I/O Control worker.
1395 *
1396 * @returns VBox status code that should be passed down to ring-3 unchanged.
1397 * @param uIOCtl Function number.
1398 * @param idCpu VMCPU id.
1399 * @param pDevExt Device extention.
1400 * @param pSession Session data.
1401 */
1402int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1403{
1404 /*
1405 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1406 */
1407 if (RT_LIKELY( RT_VALID_PTR(pSession)
1408 && pSession->pVM
1409 && pDevExt->pfnVMMR0EntryFast))
1410 {
1411 switch (uIOCtl)
1412 {
1413 case SUP_IOCTL_FAST_DO_RAW_RUN:
1414 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1415 break;
1416 case SUP_IOCTL_FAST_DO_HM_RUN:
1417 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1418 break;
1419 case SUP_IOCTL_FAST_DO_NOP:
1420 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1421 break;
1422 default:
1423 return VERR_INTERNAL_ERROR;
1424 }
1425 return VINF_SUCCESS;
1426 }
1427 return VERR_INTERNAL_ERROR;
1428}
1429
1430
1431/**
1432 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1433 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1434 * list, see http://www.kerneldrivers.org/RHEL5.
1435 *
1436 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1437 * @param pszStr String to check
1438 * @param pszChars Character set
1439 */
1440static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1441{
1442 int chCur;
1443 while ((chCur = *pszStr++) != '\0')
1444 {
1445 int ch;
1446 const char *psz = pszChars;
1447 while ((ch = *psz++) != '\0')
1448 if (ch == chCur)
1449 return 1;
1450
1451 }
1452 return 0;
1453}
1454
1455
1456
1457/**
1458 * I/O Control inner worker (tracing reasons).
1459 *
1460 * @returns IPRT status code.
1461 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1462 *
1463 * @param uIOCtl Function number.
1464 * @param pDevExt Device extention.
1465 * @param pSession Session data.
1466 * @param pReqHdr The request header.
1467 */
1468static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1469{
1470 /*
1471 * Validation macros
1472 */
1473#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1474 do { \
1475 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1476 { \
1477 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1478 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1479 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1480 } \
1481 } while (0)
1482
1483#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1484
1485#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1486 do { \
1487 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1488 { \
1489 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1490 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1491 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1492 } \
1493 } while (0)
1494
1495#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1496 do { \
1497 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1498 { \
1499 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1500 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1501 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1502 } \
1503 } while (0)
1504
1505#define REQ_CHECK_EXPR(Name, expr) \
1506 do { \
1507 if (RT_UNLIKELY(!(expr))) \
1508 { \
1509 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1510 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1511 } \
1512 } while (0)
1513
1514#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1515 do { \
1516 if (RT_UNLIKELY(!(expr))) \
1517 { \
1518 OSDBGPRINT( fmt ); \
1519 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1520 } \
1521 } while (0)
1522
1523 /*
1524 * The switch.
1525 */
1526 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1527 {
1528 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1529 {
1530 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1531 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1532 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1533 {
1534 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1535 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1536 return 0;
1537 }
1538
1539#if 0
1540 /*
1541 * Call out to the OS specific code and let it do permission checks on the
1542 * client process.
1543 */
1544 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1545 {
1546 pReq->u.Out.u32Cookie = 0xffffffff;
1547 pReq->u.Out.u32SessionCookie = 0xffffffff;
1548 pReq->u.Out.u32SessionVersion = 0xffffffff;
1549 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1550 pReq->u.Out.pSession = NULL;
1551 pReq->u.Out.cFunctions = 0;
1552 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1553 return 0;
1554 }
1555#endif
1556
1557 /*
1558 * Match the version.
1559 * The current logic is very simple, match the major interface version.
1560 */
1561 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1562 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1563 {
1564 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1565 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1566 pReq->u.Out.u32Cookie = 0xffffffff;
1567 pReq->u.Out.u32SessionCookie = 0xffffffff;
1568 pReq->u.Out.u32SessionVersion = 0xffffffff;
1569 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1570 pReq->u.Out.pSession = NULL;
1571 pReq->u.Out.cFunctions = 0;
1572 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1573 return 0;
1574 }
1575
1576 /*
1577 * Fill in return data and be gone.
1578 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1579 * u32SessionVersion <= u32ReqVersion!
1580 */
1581 /** @todo Somehow validate the client and negotiate a secure cookie... */
1582 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1583 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1584 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1585 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1586 pReq->u.Out.pSession = pSession;
1587 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1588 pReq->Hdr.rc = VINF_SUCCESS;
1589 return 0;
1590 }
1591
1592 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1593 {
1594 /* validate */
1595 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1596 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1597
1598 /* execute */
1599 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1600 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1601 pReq->Hdr.rc = VINF_SUCCESS;
1602 return 0;
1603 }
1604
1605 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1606 {
1607 /* validate */
1608 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1609 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1610 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1611 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1612 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1613
1614 /* execute */
1615 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1616 if (RT_FAILURE(pReq->Hdr.rc))
1617 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1618 return 0;
1619 }
1620
1621 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1622 {
1623 /* validate */
1624 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1625 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1626
1627 /* execute */
1628 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1629 return 0;
1630 }
1631
1632 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1633 {
1634 /* validate */
1635 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1636 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1637
1638 /* execute */
1639 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1640 if (RT_FAILURE(pReq->Hdr.rc))
1641 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1642 return 0;
1643 }
1644
1645 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1646 {
1647 /* validate */
1648 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1649 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1650
1651 /* execute */
1652 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1653 return 0;
1654 }
1655
1656 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1657 {
1658 /* validate */
1659 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1660 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1661 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1662 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1663 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1664 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1665 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1666 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1667 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1668 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1669 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1670
1671 /* execute */
1672 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1673 return 0;
1674 }
1675
1676 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1677 {
1678 /* validate */
1679 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1680 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1681 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1682 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1683 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1684 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1685 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1686 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1687 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1688 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1689 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1690 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1691 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1692 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1693 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1694
1695 if (pReq->u.In.cSymbols)
1696 {
1697 uint32_t i;
1698 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1699 for (i = 0; i < pReq->u.In.cSymbols; i++)
1700 {
1701 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1702 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1703 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1704 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1705 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1706 pReq->u.In.cbStrTab - paSyms[i].offName),
1707 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1708 }
1709 }
1710
1711 /* execute */
1712 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1713 return 0;
1714 }
1715
1716 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1717 {
1718 /* validate */
1719 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1720 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1721
1722 /* execute */
1723 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1724 return 0;
1725 }
1726
1727 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1728 {
1729 /* validate */
1730 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1731 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1732 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1733
1734 /* execute */
1735 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1736 return 0;
1737 }
1738
1739 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1740 {
1741 /* validate */
1742 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1743 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1744 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1745
1746 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1747 {
1748 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1749
1750 /* execute */
1751 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1752 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1753 else
1754 pReq->Hdr.rc = VERR_WRONG_ORDER;
1755 }
1756 else
1757 {
1758 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1759 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1760 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1761 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1762 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1763
1764 /* execute */
1765 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1766 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1767 else
1768 pReq->Hdr.rc = VERR_WRONG_ORDER;
1769 }
1770
1771 if ( RT_FAILURE(pReq->Hdr.rc)
1772 && pReq->Hdr.rc != VERR_INTERRUPTED
1773 && pReq->Hdr.rc != VERR_TIMEOUT)
1774 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1775 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1776 else
1777 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1778 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1779 return 0;
1780 }
1781
1782 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1783 {
1784 /* validate */
1785 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1786 PSUPVMMR0REQHDR pVMMReq;
1787 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1788 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1789
1790 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1791 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1792 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1793 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1794 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1795
1796 /* execute */
1797 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1798 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1799 else
1800 pReq->Hdr.rc = VERR_WRONG_ORDER;
1801
1802 if ( RT_FAILURE(pReq->Hdr.rc)
1803 && pReq->Hdr.rc != VERR_INTERRUPTED
1804 && pReq->Hdr.rc != VERR_TIMEOUT)
1805 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1806 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1807 else
1808 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1809 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1810 return 0;
1811 }
1812
1813 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1814 {
1815 /* validate */
1816 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1817 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1818
1819 /* execute */
1820 pReq->Hdr.rc = VINF_SUCCESS;
1821 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1822 return 0;
1823 }
1824
1825 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1826 {
1827 /* validate */
1828 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1829 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1830 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1831
1832 /* execute */
1833 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1834 if (RT_FAILURE(pReq->Hdr.rc))
1835 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1836 return 0;
1837 }
1838
1839 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1840 {
1841 /* validate */
1842 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1843 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1844
1845 /* execute */
1846 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1847 return 0;
1848 }
1849
1850 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1851 {
1852 /* validate */
1853 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1854 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1855
1856 /* execute */
1857 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1858 if (RT_SUCCESS(pReq->Hdr.rc))
1859 pReq->u.Out.pGipR0 = pDevExt->pGip;
1860 return 0;
1861 }
1862
1863 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1864 {
1865 /* validate */
1866 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1867 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1868
1869 /* execute */
1870 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1871 return 0;
1872 }
1873
1874 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1875 {
1876 /* validate */
1877 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1878 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1879 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1880 || ( VALID_PTR(pReq->u.In.pVMR0)
1881 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1882 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1883 /* execute */
1884 pSession->pVM = pReq->u.In.pVMR0;
1885 pReq->Hdr.rc = VINF_SUCCESS;
1886 return 0;
1887 }
1888
1889 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1890 {
1891 /* validate */
1892 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1893 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1894 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1895 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1896 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1897 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1898 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1899 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1900 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1901
1902 /* execute */
1903 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1904 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1905 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1906 &pReq->u.Out.aPages[0]);
1907 if (RT_FAILURE(pReq->Hdr.rc))
1908 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1909 return 0;
1910 }
1911
1912 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1913 {
1914 /* validate */
1915 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1916 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1917 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1918 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1919 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1920 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1921
1922 /* execute */
1923 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1924 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1925 if (RT_FAILURE(pReq->Hdr.rc))
1926 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1927 return 0;
1928 }
1929
1930 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1931 {
1932 /* validate */
1933 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1934 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1935 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1936 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1937 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1938 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1939 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1940
1941 /* execute */
1942 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1943 return 0;
1944 }
1945
1946 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1947 {
1948 /* validate */
1949 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1950 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1951
1952 /* execute */
1953 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1954 return 0;
1955 }
1956
1957 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1958 {
1959 /* validate */
1960 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1961 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1962 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1963
1964 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1965 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1966 else
1967 {
1968 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1969 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1970 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1971 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1972 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1973 }
1974 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1975
1976 /* execute */
1977 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1978 return 0;
1979 }
1980
1981 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1982 {
1983 /* validate */
1984 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1985 size_t cbStrTab;
1986 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1987 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
1988 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
1989 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
1990 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
1991 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
1992 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
1993 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
1994 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
1995 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
1996 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
1997
1998 /* execute */
1999 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
2000 return 0;
2001 }
2002
2003 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
2004 {
2005 /* validate */
2006 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
2007 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
2008 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
2009
2010 /* execute */
2011 switch (pReq->u.In.uType)
2012 {
2013 case SUP_SEM_TYPE_EVENT:
2014 {
2015 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2016 switch (pReq->u.In.uOp)
2017 {
2018 case SUPSEMOP2_WAIT_MS_REL:
2019 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2020 break;
2021 case SUPSEMOP2_WAIT_NS_ABS:
2022 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2023 break;
2024 case SUPSEMOP2_WAIT_NS_REL:
2025 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2026 break;
2027 case SUPSEMOP2_SIGNAL:
2028 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2029 break;
2030 case SUPSEMOP2_CLOSE:
2031 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2032 break;
2033 case SUPSEMOP2_RESET:
2034 default:
2035 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2036 break;
2037 }
2038 break;
2039 }
2040
2041 case SUP_SEM_TYPE_EVENT_MULTI:
2042 {
2043 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2044 switch (pReq->u.In.uOp)
2045 {
2046 case SUPSEMOP2_WAIT_MS_REL:
2047 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2048 break;
2049 case SUPSEMOP2_WAIT_NS_ABS:
2050 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2051 break;
2052 case SUPSEMOP2_WAIT_NS_REL:
2053 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2054 break;
2055 case SUPSEMOP2_SIGNAL:
2056 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2057 break;
2058 case SUPSEMOP2_CLOSE:
2059 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2060 break;
2061 case SUPSEMOP2_RESET:
2062 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2063 break;
2064 default:
2065 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2066 break;
2067 }
2068 break;
2069 }
2070
2071 default:
2072 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2073 break;
2074 }
2075 return 0;
2076 }
2077
2078 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2079 {
2080 /* validate */
2081 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2082 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2083 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2084
2085 /* execute */
2086 switch (pReq->u.In.uType)
2087 {
2088 case SUP_SEM_TYPE_EVENT:
2089 {
2090 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2091 switch (pReq->u.In.uOp)
2092 {
2093 case SUPSEMOP3_CREATE:
2094 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2095 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2096 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2097 break;
2098 case SUPSEMOP3_GET_RESOLUTION:
2099 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2100 pReq->Hdr.rc = VINF_SUCCESS;
2101 pReq->Hdr.cbOut = sizeof(*pReq);
2102 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2103 break;
2104 default:
2105 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2106 break;
2107 }
2108 break;
2109 }
2110
2111 case SUP_SEM_TYPE_EVENT_MULTI:
2112 {
2113 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2114 switch (pReq->u.In.uOp)
2115 {
2116 case SUPSEMOP3_CREATE:
2117 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2118 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2119 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2120 break;
2121 case SUPSEMOP3_GET_RESOLUTION:
2122 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2123 pReq->Hdr.rc = VINF_SUCCESS;
2124 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2125 break;
2126 default:
2127 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2128 break;
2129 }
2130 break;
2131 }
2132
2133 default:
2134 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2135 break;
2136 }
2137 return 0;
2138 }
2139
2140 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2141 {
2142 /* validate */
2143 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2144 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2145
2146 /* execute */
2147 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2148 if (RT_FAILURE(pReq->Hdr.rc))
2149 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2150 return 0;
2151 }
2152
2153 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2154 {
2155 /* validate */
2156 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2157 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2158
2159 /* execute */
2160 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2161 return 0;
2162 }
2163
2164 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2165 {
2166 /* validate */
2167 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2168
2169 /* execute */
2170 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2171 return 0;
2172 }
2173
2174 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2175 {
2176 /* validate */
2177 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2178 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2179
2180 /* execute */
2181 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2182 return 0;
2183 }
2184
2185 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2186 {
2187 /* validate */
2188 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2189 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2190 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2191 return VERR_INVALID_PARAMETER;
2192
2193 /* execute */
2194 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2195 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2196 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2197 pReq->u.In.szName, pReq->u.In.fFlags);
2198 return 0;
2199 }
2200
2201 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2202 {
2203 /* validate */
2204 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2205 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2206
2207 /* execute */
2208 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2209 return 0;
2210 }
2211
2212 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2213 {
2214 /* validate */
2215 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2216 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2217
2218 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2219 pReqHdr->rc = VINF_SUCCESS;
2220 return 0;
2221 }
2222
2223 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2224 {
2225 /* validate */
2226 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2227 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2228 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2229 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2230
2231 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2232 return 0;
2233 }
2234
2235 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2236 {
2237 /* validate */
2238 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2239
2240 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2241 return 0;
2242 }
2243
2244 default:
2245 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2246 break;
2247 }
2248 return VERR_GENERAL_FAILURE;
2249}
2250
2251
2252/**
2253 * I/O Control inner worker for the restricted operations.
2254 *
2255 * @returns IPRT status code.
2256 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2257 *
2258 * @param uIOCtl Function number.
2259 * @param pDevExt Device extention.
2260 * @param pSession Session data.
2261 * @param pReqHdr The request header.
2262 */
2263static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2264{
2265 /*
2266 * The switch.
2267 */
2268 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2269 {
2270 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2271 {
2272 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2273 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2274 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2275 {
2276 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2277 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2278 return 0;
2279 }
2280
2281 /*
2282 * Match the version.
2283 * The current logic is very simple, match the major interface version.
2284 */
2285 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2286 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2287 {
2288 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2289 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2290 pReq->u.Out.u32Cookie = 0xffffffff;
2291 pReq->u.Out.u32SessionCookie = 0xffffffff;
2292 pReq->u.Out.u32SessionVersion = 0xffffffff;
2293 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2294 pReq->u.Out.pSession = NULL;
2295 pReq->u.Out.cFunctions = 0;
2296 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2297 return 0;
2298 }
2299
2300 /*
2301 * Fill in return data and be gone.
2302 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2303 * u32SessionVersion <= u32ReqVersion!
2304 */
2305 /** @todo Somehow validate the client and negotiate a secure cookie... */
2306 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2307 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2308 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2309 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2310 pReq->u.Out.pSession = pSession;
2311 pReq->u.Out.cFunctions = 0;
2312 pReq->Hdr.rc = VINF_SUCCESS;
2313 return 0;
2314 }
2315
2316 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2317 {
2318 /* validate */
2319 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2320 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2321
2322 /* execute */
2323 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2324 if (RT_FAILURE(pReq->Hdr.rc))
2325 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2326 return 0;
2327 }
2328
2329 default:
2330 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2331 break;
2332 }
2333 return VERR_GENERAL_FAILURE;
2334}
2335
2336
2337/**
2338 * I/O Control worker.
2339 *
2340 * @returns IPRT status code.
2341 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2342 *
2343 * @param uIOCtl Function number.
2344 * @param pDevExt Device extention.
2345 * @param pSession Session data.
2346 * @param pReqHdr The request header.
2347 */
2348int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2349{
2350 int rc;
2351 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2352
2353 /*
2354 * Validate the request.
2355 */
2356 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2357 {
2358 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2359 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2360 return VERR_INVALID_PARAMETER;
2361 }
2362 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2363 || pReqHdr->cbIn < sizeof(*pReqHdr)
2364 || pReqHdr->cbIn > cbReq
2365 || pReqHdr->cbOut < sizeof(*pReqHdr)
2366 || pReqHdr->cbOut > cbReq))
2367 {
2368 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2369 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2370 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2371 return VERR_INVALID_PARAMETER;
2372 }
2373 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2374 {
2375 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2376 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2377 return VERR_INVALID_PARAMETER;
2378 }
2379 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2380 {
2381 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2382 {
2383 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2384 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2385 return VERR_INVALID_PARAMETER;
2386 }
2387 }
2388 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2389 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2390 {
2391 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2392 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2393 return VERR_INVALID_PARAMETER;
2394 }
2395
2396 /*
2397 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2398 */
2399 if (pSession->fUnrestricted)
2400 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2401 else
2402 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2403
2404 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2405 return rc;
2406}
2407
2408
2409/**
2410 * Inter-Driver Communication (IDC) worker.
2411 *
2412 * @returns VBox status code.
2413 * @retval VINF_SUCCESS on success.
2414 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2415 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2416 *
2417 * @param uReq The request (function) code.
2418 * @param pDevExt Device extention.
2419 * @param pSession Session data.
2420 * @param pReqHdr The request header.
2421 */
2422int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2423{
2424 /*
2425 * The OS specific code has already validated the pSession
2426 * pointer, and the request size being greater or equal to
2427 * size of the header.
2428 *
2429 * So, just check that pSession is a kernel context session.
2430 */
2431 if (RT_UNLIKELY( pSession
2432 && pSession->R0Process != NIL_RTR0PROCESS))
2433 return VERR_INVALID_PARAMETER;
2434
2435/*
2436 * Validation macro.
2437 */
2438#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2439 do { \
2440 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2441 { \
2442 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2443 (long)pReqHdr->cb, (long)(cbExpect))); \
2444 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2445 } \
2446 } while (0)
2447
2448 switch (uReq)
2449 {
2450 case SUPDRV_IDC_REQ_CONNECT:
2451 {
2452 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2453 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2454
2455 /*
2456 * Validate the cookie and other input.
2457 */
2458 if (pReq->Hdr.pSession != NULL)
2459 {
2460 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2461 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2462 }
2463 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2464 {
2465 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2466 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2467 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2468 }
2469 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2470 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2471 {
2472 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2473 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2474 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2475 }
2476 if (pSession != NULL)
2477 {
2478 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2479 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2480 }
2481
2482 /*
2483 * Match the version.
2484 * The current logic is very simple, match the major interface version.
2485 */
2486 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2487 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2488 {
2489 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2490 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2491 pReq->u.Out.pSession = NULL;
2492 pReq->u.Out.uSessionVersion = 0xffffffff;
2493 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2494 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2495 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2496 return VINF_SUCCESS;
2497 }
2498
2499 pReq->u.Out.pSession = NULL;
2500 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2501 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2502 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2503
2504 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2505 if (RT_FAILURE(pReq->Hdr.rc))
2506 {
2507 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2508 return VINF_SUCCESS;
2509 }
2510
2511 pReq->u.Out.pSession = pSession;
2512 pReq->Hdr.pSession = pSession;
2513
2514 return VINF_SUCCESS;
2515 }
2516
2517 case SUPDRV_IDC_REQ_DISCONNECT:
2518 {
2519 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2520
2521 supdrvSessionRelease(pSession);
2522 return pReqHdr->rc = VINF_SUCCESS;
2523 }
2524
2525 case SUPDRV_IDC_REQ_GET_SYMBOL:
2526 {
2527 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2528 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2529
2530 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2531 return VINF_SUCCESS;
2532 }
2533
2534 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2535 {
2536 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2537 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2538
2539 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2540 return VINF_SUCCESS;
2541 }
2542
2543 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2544 {
2545 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2546 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2547
2548 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2549 return VINF_SUCCESS;
2550 }
2551
2552 default:
2553 Log(("Unknown IDC %#lx\n", (long)uReq));
2554 break;
2555 }
2556
2557#undef REQ_CHECK_IDC_SIZE
2558 return VERR_NOT_SUPPORTED;
2559}
2560
2561
2562/**
2563 * Register a object for reference counting.
2564 * The object is registered with one reference in the specified session.
2565 *
2566 * @returns Unique identifier on success (pointer).
2567 * All future reference must use this identifier.
2568 * @returns NULL on failure.
2569 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2570 * @param pvUser1 The first user argument.
2571 * @param pvUser2 The second user argument.
2572 */
2573SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2574{
2575 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2576 PSUPDRVOBJ pObj;
2577 PSUPDRVUSAGE pUsage;
2578
2579 /*
2580 * Validate the input.
2581 */
2582 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2583 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2584 AssertPtrReturn(pfnDestructor, NULL);
2585
2586 /*
2587 * Allocate and initialize the object.
2588 */
2589 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2590 if (!pObj)
2591 return NULL;
2592 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2593 pObj->enmType = enmType;
2594 pObj->pNext = NULL;
2595 pObj->cUsage = 1;
2596 pObj->pfnDestructor = pfnDestructor;
2597 pObj->pvUser1 = pvUser1;
2598 pObj->pvUser2 = pvUser2;
2599 pObj->CreatorUid = pSession->Uid;
2600 pObj->CreatorGid = pSession->Gid;
2601 pObj->CreatorProcess= pSession->Process;
2602 supdrvOSObjInitCreator(pObj, pSession);
2603
2604 /*
2605 * Allocate the usage record.
2606 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2607 */
2608 RTSpinlockAcquire(pDevExt->Spinlock);
2609
2610 pUsage = pDevExt->pUsageFree;
2611 if (pUsage)
2612 pDevExt->pUsageFree = pUsage->pNext;
2613 else
2614 {
2615 RTSpinlockRelease(pDevExt->Spinlock);
2616 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2617 if (!pUsage)
2618 {
2619 RTMemFree(pObj);
2620 return NULL;
2621 }
2622 RTSpinlockAcquire(pDevExt->Spinlock);
2623 }
2624
2625 /*
2626 * Insert the object and create the session usage record.
2627 */
2628 /* The object. */
2629 pObj->pNext = pDevExt->pObjs;
2630 pDevExt->pObjs = pObj;
2631
2632 /* The session record. */
2633 pUsage->cUsage = 1;
2634 pUsage->pObj = pObj;
2635 pUsage->pNext = pSession->pUsage;
2636 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2637 pSession->pUsage = pUsage;
2638
2639 RTSpinlockRelease(pDevExt->Spinlock);
2640
2641 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2642 return pObj;
2643}
2644
2645
2646/**
2647 * Increment the reference counter for the object associating the reference
2648 * with the specified session.
2649 *
2650 * @returns IPRT status code.
2651 * @param pvObj The identifier returned by SUPR0ObjRegister().
2652 * @param pSession The session which is referencing the object.
2653 *
2654 * @remarks The caller should not own any spinlocks and must carefully protect
2655 * itself against potential race with the destructor so freed memory
2656 * isn't accessed here.
2657 */
2658SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2659{
2660 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2661}
2662
2663
2664/**
2665 * Increment the reference counter for the object associating the reference
2666 * with the specified session.
2667 *
2668 * @returns IPRT status code.
2669 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2670 * couldn't be allocated. (If you see this you're not doing the right
2671 * thing and it won't ever work reliably.)
2672 *
2673 * @param pvObj The identifier returned by SUPR0ObjRegister().
2674 * @param pSession The session which is referencing the object.
2675 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2676 * first reference to an object in a session with this
2677 * argument set.
2678 *
2679 * @remarks The caller should not own any spinlocks and must carefully protect
2680 * itself against potential race with the destructor so freed memory
2681 * isn't accessed here.
2682 */
2683SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2684{
2685 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2686 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2687 int rc = VINF_SUCCESS;
2688 PSUPDRVUSAGE pUsagePre;
2689 PSUPDRVUSAGE pUsage;
2690
2691 /*
2692 * Validate the input.
2693 * Be ready for the destruction race (someone might be stuck in the
2694 * destructor waiting a lock we own).
2695 */
2696 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2697 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2698 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2699 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2700 VERR_INVALID_PARAMETER);
2701
2702 RTSpinlockAcquire(pDevExt->Spinlock);
2703
2704 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2705 {
2706 RTSpinlockRelease(pDevExt->Spinlock);
2707
2708 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2709 return VERR_WRONG_ORDER;
2710 }
2711
2712 /*
2713 * Preallocate the usage record if we can.
2714 */
2715 pUsagePre = pDevExt->pUsageFree;
2716 if (pUsagePre)
2717 pDevExt->pUsageFree = pUsagePre->pNext;
2718 else if (!fNoBlocking)
2719 {
2720 RTSpinlockRelease(pDevExt->Spinlock);
2721 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2722 if (!pUsagePre)
2723 return VERR_NO_MEMORY;
2724
2725 RTSpinlockAcquire(pDevExt->Spinlock);
2726 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2727 {
2728 RTSpinlockRelease(pDevExt->Spinlock);
2729
2730 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2731 return VERR_WRONG_ORDER;
2732 }
2733 }
2734
2735 /*
2736 * Reference the object.
2737 */
2738 pObj->cUsage++;
2739
2740 /*
2741 * Look for the session record.
2742 */
2743 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2744 {
2745 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2746 if (pUsage->pObj == pObj)
2747 break;
2748 }
2749 if (pUsage)
2750 pUsage->cUsage++;
2751 else if (pUsagePre)
2752 {
2753 /* create a new session record. */
2754 pUsagePre->cUsage = 1;
2755 pUsagePre->pObj = pObj;
2756 pUsagePre->pNext = pSession->pUsage;
2757 pSession->pUsage = pUsagePre;
2758 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2759
2760 pUsagePre = NULL;
2761 }
2762 else
2763 {
2764 pObj->cUsage--;
2765 rc = VERR_TRY_AGAIN;
2766 }
2767
2768 /*
2769 * Put any unused usage record into the free list..
2770 */
2771 if (pUsagePre)
2772 {
2773 pUsagePre->pNext = pDevExt->pUsageFree;
2774 pDevExt->pUsageFree = pUsagePre;
2775 }
2776
2777 RTSpinlockRelease(pDevExt->Spinlock);
2778
2779 return rc;
2780}
2781
2782
2783/**
2784 * Decrement / destroy a reference counter record for an object.
2785 *
2786 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2787 *
2788 * @returns IPRT status code.
2789 * @retval VINF_SUCCESS if not destroyed.
2790 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2791 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2792 * string builds.
2793 *
2794 * @param pvObj The identifier returned by SUPR0ObjRegister().
2795 * @param pSession The session which is referencing the object.
2796 */
2797SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2798{
2799 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2800 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2801 int rc = VERR_INVALID_PARAMETER;
2802 PSUPDRVUSAGE pUsage;
2803 PSUPDRVUSAGE pUsagePrev;
2804
2805 /*
2806 * Validate the input.
2807 */
2808 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2809 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2810 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2811 VERR_INVALID_PARAMETER);
2812
2813 /*
2814 * Acquire the spinlock and look for the usage record.
2815 */
2816 RTSpinlockAcquire(pDevExt->Spinlock);
2817
2818 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2819 pUsage;
2820 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2821 {
2822 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2823 if (pUsage->pObj == pObj)
2824 {
2825 rc = VINF_SUCCESS;
2826 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2827 if (pUsage->cUsage > 1)
2828 {
2829 pObj->cUsage--;
2830 pUsage->cUsage--;
2831 }
2832 else
2833 {
2834 /*
2835 * Free the session record.
2836 */
2837 if (pUsagePrev)
2838 pUsagePrev->pNext = pUsage->pNext;
2839 else
2840 pSession->pUsage = pUsage->pNext;
2841 pUsage->pNext = pDevExt->pUsageFree;
2842 pDevExt->pUsageFree = pUsage;
2843
2844 /* What about the object? */
2845 if (pObj->cUsage > 1)
2846 pObj->cUsage--;
2847 else
2848 {
2849 /*
2850 * Object is to be destroyed, unlink it.
2851 */
2852 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2853 rc = VINF_OBJECT_DESTROYED;
2854 if (pDevExt->pObjs == pObj)
2855 pDevExt->pObjs = pObj->pNext;
2856 else
2857 {
2858 PSUPDRVOBJ pObjPrev;
2859 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2860 if (pObjPrev->pNext == pObj)
2861 {
2862 pObjPrev->pNext = pObj->pNext;
2863 break;
2864 }
2865 Assert(pObjPrev);
2866 }
2867 }
2868 }
2869 break;
2870 }
2871 }
2872
2873 RTSpinlockRelease(pDevExt->Spinlock);
2874
2875 /*
2876 * Call the destructor and free the object if required.
2877 */
2878 if (rc == VINF_OBJECT_DESTROYED)
2879 {
2880 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2881 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2882 if (pObj->pfnDestructor)
2883 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2884 RTMemFree(pObj);
2885 }
2886
2887 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2888 return rc;
2889}
2890
2891
2892/**
2893 * Verifies that the current process can access the specified object.
2894 *
2895 * @returns The following IPRT status code:
2896 * @retval VINF_SUCCESS if access was granted.
2897 * @retval VERR_PERMISSION_DENIED if denied access.
2898 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2899 *
2900 * @param pvObj The identifier returned by SUPR0ObjRegister().
2901 * @param pSession The session which wishes to access the object.
2902 * @param pszObjName Object string name. This is optional and depends on the object type.
2903 *
2904 * @remark The caller is responsible for making sure the object isn't removed while
2905 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2906 */
2907SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2908{
2909 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2910 int rc;
2911
2912 /*
2913 * Validate the input.
2914 */
2915 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2916 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2917 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2918 VERR_INVALID_PARAMETER);
2919
2920 /*
2921 * Check access. (returns true if a decision has been made.)
2922 */
2923 rc = VERR_INTERNAL_ERROR;
2924 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2925 return rc;
2926
2927 /*
2928 * Default policy is to allow the user to access his own
2929 * stuff but nothing else.
2930 */
2931 if (pObj->CreatorUid == pSession->Uid)
2932 return VINF_SUCCESS;
2933 return VERR_PERMISSION_DENIED;
2934}
2935
2936
2937/**
2938 * Lock pages.
2939 *
2940 * @returns IPRT status code.
2941 * @param pSession Session to which the locked memory should be associated.
2942 * @param pvR3 Start of the memory range to lock.
2943 * This must be page aligned.
2944 * @param cPages Number of pages to lock.
2945 * @param paPages Where to put the physical addresses of locked memory.
2946 */
2947SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2948{
2949 int rc;
2950 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2951 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2952 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2953
2954 /*
2955 * Verify input.
2956 */
2957 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2958 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2959 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2960 || !pvR3)
2961 {
2962 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2963 return VERR_INVALID_PARAMETER;
2964 }
2965
2966 /*
2967 * Let IPRT do the job.
2968 */
2969 Mem.eType = MEMREF_TYPE_LOCKED;
2970 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
2971 if (RT_SUCCESS(rc))
2972 {
2973 uint32_t iPage = cPages;
2974 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
2975 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
2976
2977 while (iPage-- > 0)
2978 {
2979 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
2980 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
2981 {
2982 AssertMsgFailed(("iPage=%d\n", iPage));
2983 rc = VERR_INTERNAL_ERROR;
2984 break;
2985 }
2986 }
2987 if (RT_SUCCESS(rc))
2988 rc = supdrvMemAdd(&Mem, pSession);
2989 if (RT_FAILURE(rc))
2990 {
2991 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
2992 AssertRC(rc2);
2993 }
2994 }
2995
2996 return rc;
2997}
2998
2999
3000/**
3001 * Unlocks the memory pointed to by pv.
3002 *
3003 * @returns IPRT status code.
3004 * @param pSession Session to which the memory was locked.
3005 * @param pvR3 Memory to unlock.
3006 */
3007SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3008{
3009 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3010 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3011 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3012}
3013
3014
3015/**
3016 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3017 * backing.
3018 *
3019 * @returns IPRT status code.
3020 * @param pSession Session data.
3021 * @param cPages Number of pages to allocate.
3022 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3023 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3024 * @param pHCPhys Where to put the physical address of allocated memory.
3025 */
3026SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3027{
3028 int rc;
3029 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3030 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3031
3032 /*
3033 * Validate input.
3034 */
3035 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3036 if (!ppvR3 || !ppvR0 || !pHCPhys)
3037 {
3038 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3039 pSession, ppvR0, ppvR3, pHCPhys));
3040 return VERR_INVALID_PARAMETER;
3041
3042 }
3043 if (cPages < 1 || cPages >= 256)
3044 {
3045 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3046 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3047 }
3048
3049 /*
3050 * Let IPRT do the job.
3051 */
3052 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3053 if (RT_SUCCESS(rc))
3054 {
3055 int rc2;
3056 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3057 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3058 if (RT_SUCCESS(rc))
3059 {
3060 Mem.eType = MEMREF_TYPE_CONT;
3061 rc = supdrvMemAdd(&Mem, pSession);
3062 if (!rc)
3063 {
3064 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3065 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3066 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3067 return 0;
3068 }
3069
3070 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3071 AssertRC(rc2);
3072 }
3073 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3074 AssertRC(rc2);
3075 }
3076
3077 return rc;
3078}
3079
3080
3081/**
3082 * Frees memory allocated using SUPR0ContAlloc().
3083 *
3084 * @returns IPRT status code.
3085 * @param pSession The session to which the memory was allocated.
3086 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3087 */
3088SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3089{
3090 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3091 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3092 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3093}
3094
3095
3096/**
3097 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3098 *
3099 * The memory isn't zeroed.
3100 *
3101 * @returns IPRT status code.
3102 * @param pSession Session data.
3103 * @param cPages Number of pages to allocate.
3104 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3105 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3106 * @param paPages Where to put the physical addresses of allocated memory.
3107 */
3108SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3109{
3110 unsigned iPage;
3111 int rc;
3112 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3113 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3114
3115 /*
3116 * Validate input.
3117 */
3118 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3119 if (!ppvR3 || !ppvR0 || !paPages)
3120 {
3121 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3122 pSession, ppvR3, ppvR0, paPages));
3123 return VERR_INVALID_PARAMETER;
3124
3125 }
3126 if (cPages < 1 || cPages >= 256)
3127 {
3128 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3129 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3130 }
3131
3132 /*
3133 * Let IPRT do the work.
3134 */
3135 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3136 if (RT_SUCCESS(rc))
3137 {
3138 int rc2;
3139 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3140 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3141 if (RT_SUCCESS(rc))
3142 {
3143 Mem.eType = MEMREF_TYPE_LOW;
3144 rc = supdrvMemAdd(&Mem, pSession);
3145 if (!rc)
3146 {
3147 for (iPage = 0; iPage < cPages; iPage++)
3148 {
3149 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3150 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3151 }
3152 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3153 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3154 return 0;
3155 }
3156
3157 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3158 AssertRC(rc2);
3159 }
3160
3161 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3162 AssertRC(rc2);
3163 }
3164
3165 return rc;
3166}
3167
3168
3169/**
3170 * Frees memory allocated using SUPR0LowAlloc().
3171 *
3172 * @returns IPRT status code.
3173 * @param pSession The session to which the memory was allocated.
3174 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3175 */
3176SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3177{
3178 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3179 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3180 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3181}
3182
3183
3184
3185/**
3186 * Allocates a chunk of memory with both R0 and R3 mappings.
3187 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3188 *
3189 * @returns IPRT status code.
3190 * @param pSession The session to associated the allocation with.
3191 * @param cb Number of bytes to allocate.
3192 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3193 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3194 */
3195SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3196{
3197 int rc;
3198 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3199 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3200
3201 /*
3202 * Validate input.
3203 */
3204 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3205 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3206 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3207 if (cb < 1 || cb >= _4M)
3208 {
3209 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3210 return VERR_INVALID_PARAMETER;
3211 }
3212
3213 /*
3214 * Let IPRT do the work.
3215 */
3216 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3217 if (RT_SUCCESS(rc))
3218 {
3219 int rc2;
3220 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3221 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3222 if (RT_SUCCESS(rc))
3223 {
3224 Mem.eType = MEMREF_TYPE_MEM;
3225 rc = supdrvMemAdd(&Mem, pSession);
3226 if (!rc)
3227 {
3228 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3229 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3230 return VINF_SUCCESS;
3231 }
3232
3233 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3234 AssertRC(rc2);
3235 }
3236
3237 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3238 AssertRC(rc2);
3239 }
3240
3241 return rc;
3242}
3243
3244
3245/**
3246 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3247 *
3248 * @returns IPRT status code.
3249 * @param pSession The session to which the memory was allocated.
3250 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3251 * @param paPages Where to store the physical addresses.
3252 */
3253SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3254{
3255 PSUPDRVBUNDLE pBundle;
3256 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3257
3258 /*
3259 * Validate input.
3260 */
3261 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3262 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3263 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3264
3265 /*
3266 * Search for the address.
3267 */
3268 RTSpinlockAcquire(pSession->Spinlock);
3269 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3270 {
3271 if (pBundle->cUsed > 0)
3272 {
3273 unsigned i;
3274 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3275 {
3276 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3277 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3278 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3279 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3280 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3281 )
3282 )
3283 {
3284 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3285 size_t iPage;
3286 for (iPage = 0; iPage < cPages; iPage++)
3287 {
3288 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3289 paPages[iPage].uReserved = 0;
3290 }
3291 RTSpinlockRelease(pSession->Spinlock);
3292 return VINF_SUCCESS;
3293 }
3294 }
3295 }
3296 }
3297 RTSpinlockRelease(pSession->Spinlock);
3298 Log(("Failed to find %p!!!\n", (void *)uPtr));
3299 return VERR_INVALID_PARAMETER;
3300}
3301
3302
3303/**
3304 * Free memory allocated by SUPR0MemAlloc().
3305 *
3306 * @returns IPRT status code.
3307 * @param pSession The session owning the allocation.
3308 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3309 */
3310SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3311{
3312 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3313 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3314 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3315}
3316
3317
3318/**
3319 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3320 *
3321 * The memory is fixed and it's possible to query the physical addresses using
3322 * SUPR0MemGetPhys().
3323 *
3324 * @returns IPRT status code.
3325 * @param pSession The session to associated the allocation with.
3326 * @param cPages The number of pages to allocate.
3327 * @param fFlags Flags, reserved for the future. Must be zero.
3328 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3329 * NULL if no ring-3 mapping.
3330 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3331 * NULL if no ring-0 mapping.
3332 * @param paPages Where to store the addresses of the pages. Optional.
3333 */
3334SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3335{
3336 int rc;
3337 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3338 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3339
3340 /*
3341 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3342 */
3343 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3344 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3345 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3346 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3347 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3348 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3349 {
3350 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3351 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3352 }
3353
3354 /*
3355 * Let IPRT do the work.
3356 */
3357 if (ppvR0)
3358 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3359 else
3360 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3361 if (RT_SUCCESS(rc))
3362 {
3363 int rc2;
3364 if (ppvR3)
3365 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3366 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3367 else
3368 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3369 if (RT_SUCCESS(rc))
3370 {
3371 Mem.eType = MEMREF_TYPE_PAGE;
3372 rc = supdrvMemAdd(&Mem, pSession);
3373 if (!rc)
3374 {
3375 if (ppvR3)
3376 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3377 if (ppvR0)
3378 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3379 if (paPages)
3380 {
3381 uint32_t iPage = cPages;
3382 while (iPage-- > 0)
3383 {
3384 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3385 Assert(paPages[iPage] != NIL_RTHCPHYS);
3386 }
3387 }
3388 return VINF_SUCCESS;
3389 }
3390
3391 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3392 AssertRC(rc2);
3393 }
3394
3395 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3396 AssertRC(rc2);
3397 }
3398 return rc;
3399}
3400
3401
3402/**
3403 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3404 * space.
3405 *
3406 * @returns IPRT status code.
3407 * @param pSession The session to associated the allocation with.
3408 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3409 * @param offSub Where to start mapping. Must be page aligned.
3410 * @param cbSub How much to map. Must be page aligned.
3411 * @param fFlags Flags, MBZ.
3412 * @param ppvR0 Where to return the address of the ring-0 mapping on
3413 * success.
3414 */
3415SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3416 uint32_t fFlags, PRTR0PTR ppvR0)
3417{
3418 int rc;
3419 PSUPDRVBUNDLE pBundle;
3420 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3421 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3422
3423 /*
3424 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3425 */
3426 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3427 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3428 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3429 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3430 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3431 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3432
3433 /*
3434 * Find the memory object.
3435 */
3436 RTSpinlockAcquire(pSession->Spinlock);
3437 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3438 {
3439 if (pBundle->cUsed > 0)
3440 {
3441 unsigned i;
3442 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3443 {
3444 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3445 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3446 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3447 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3448 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3449 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3450 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3451 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3452 {
3453 hMemObj = pBundle->aMem[i].MemObj;
3454 break;
3455 }
3456 }
3457 }
3458 }
3459 RTSpinlockRelease(pSession->Spinlock);
3460
3461 rc = VERR_INVALID_PARAMETER;
3462 if (hMemObj != NIL_RTR0MEMOBJ)
3463 {
3464 /*
3465 * Do some further input validations before calling IPRT.
3466 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3467 */
3468 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3469 if ( offSub < cbMemObj
3470 && cbSub <= cbMemObj
3471 && offSub + cbSub <= cbMemObj)
3472 {
3473 RTR0MEMOBJ hMapObj;
3474 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3475 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3476 if (RT_SUCCESS(rc))
3477 *ppvR0 = RTR0MemObjAddress(hMapObj);
3478 }
3479 else
3480 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3481
3482 }
3483 return rc;
3484}
3485
3486
3487/**
3488 * Changes the page level protection of one or more pages previously allocated
3489 * by SUPR0PageAllocEx.
3490 *
3491 * @returns IPRT status code.
3492 * @param pSession The session to associated the allocation with.
3493 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3494 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3495 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3496 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3497 * @param offSub Where to start changing. Must be page aligned.
3498 * @param cbSub How much to change. Must be page aligned.
3499 * @param fProt The new page level protection, see RTMEM_PROT_*.
3500 */
3501SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3502{
3503 int rc;
3504 PSUPDRVBUNDLE pBundle;
3505 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3506 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3507 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3508
3509 /*
3510 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3511 */
3512 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3513 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3514 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3515 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3516 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3517
3518 /*
3519 * Find the memory object.
3520 */
3521 RTSpinlockAcquire(pSession->Spinlock);
3522 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3523 {
3524 if (pBundle->cUsed > 0)
3525 {
3526 unsigned i;
3527 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3528 {
3529 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3530 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3531 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3532 || pvR3 == NIL_RTR3PTR)
3533 && ( pvR0 == NIL_RTR0PTR
3534 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3535 && ( pvR3 == NIL_RTR3PTR
3536 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3537 {
3538 if (pvR0 != NIL_RTR0PTR)
3539 hMemObjR0 = pBundle->aMem[i].MemObj;
3540 if (pvR3 != NIL_RTR3PTR)
3541 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3542 break;
3543 }
3544 }
3545 }
3546 }
3547 RTSpinlockRelease(pSession->Spinlock);
3548
3549 rc = VERR_INVALID_PARAMETER;
3550 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3551 || hMemObjR3 != NIL_RTR0MEMOBJ)
3552 {
3553 /*
3554 * Do some further input validations before calling IPRT.
3555 */
3556 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3557 if ( offSub < cbMemObj
3558 && cbSub <= cbMemObj
3559 && offSub + cbSub <= cbMemObj)
3560 {
3561 rc = VINF_SUCCESS;
3562 if (hMemObjR3 != NIL_RTR0PTR)
3563 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3564 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3565 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3566 }
3567 else
3568 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3569
3570 }
3571 return rc;
3572
3573}
3574
3575
3576/**
3577 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3578 *
3579 * @returns IPRT status code.
3580 * @param pSession The session owning the allocation.
3581 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3582 * SUPR0PageAllocEx().
3583 */
3584SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3585{
3586 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3587 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3588 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3589}
3590
3591
3592/**
3593 * Gets the paging mode of the current CPU.
3594 *
3595 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3596 */
3597SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3598{
3599 SUPPAGINGMODE enmMode;
3600
3601 RTR0UINTREG cr0 = ASMGetCR0();
3602 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3603 enmMode = SUPPAGINGMODE_INVALID;
3604 else
3605 {
3606 RTR0UINTREG cr4 = ASMGetCR4();
3607 uint32_t fNXEPlusLMA = 0;
3608 if (cr4 & X86_CR4_PAE)
3609 {
3610 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3611 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3612 {
3613 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3614 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3615 fNXEPlusLMA |= RT_BIT(0);
3616 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3617 fNXEPlusLMA |= RT_BIT(1);
3618 }
3619 }
3620
3621 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3622 {
3623 case 0:
3624 enmMode = SUPPAGINGMODE_32_BIT;
3625 break;
3626
3627 case X86_CR4_PGE:
3628 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3629 break;
3630
3631 case X86_CR4_PAE:
3632 enmMode = SUPPAGINGMODE_PAE;
3633 break;
3634
3635 case X86_CR4_PAE | RT_BIT(0):
3636 enmMode = SUPPAGINGMODE_PAE_NX;
3637 break;
3638
3639 case X86_CR4_PAE | X86_CR4_PGE:
3640 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3641 break;
3642
3643 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3644 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3645 break;
3646
3647 case RT_BIT(1) | X86_CR4_PAE:
3648 enmMode = SUPPAGINGMODE_AMD64;
3649 break;
3650
3651 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3652 enmMode = SUPPAGINGMODE_AMD64_NX;
3653 break;
3654
3655 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3656 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3657 break;
3658
3659 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3660 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3661 break;
3662
3663 default:
3664 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3665 enmMode = SUPPAGINGMODE_INVALID;
3666 break;
3667 }
3668 }
3669 return enmMode;
3670}
3671
3672
3673/**
3674 * Enables or disabled hardware virtualization extensions using native OS APIs.
3675 *
3676 * @returns VBox status code.
3677 * @retval VINF_SUCCESS on success.
3678 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3679 *
3680 * @param fEnable Whether to enable or disable.
3681 */
3682SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3683{
3684#ifdef RT_OS_DARWIN
3685 return supdrvOSEnableVTx(fEnable);
3686#else
3687 return VERR_NOT_SUPPORTED;
3688#endif
3689}
3690
3691
3692/**
3693 * Suspends hardware virtualization extensions using the native OS API.
3694 *
3695 * This is called prior to entering raw-mode context.
3696 *
3697 * @returns @c true if suspended, @c false if not.
3698 */
3699SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3700{
3701#ifdef RT_OS_DARWIN
3702 return supdrvOSSuspendVTxOnCpu();
3703#else
3704 return false;
3705#endif
3706}
3707
3708
3709/**
3710 * Resumes hardware virtualization extensions using the native OS API.
3711 *
3712 * This is called after to entering raw-mode context.
3713 *
3714 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3715 */
3716SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3717{
3718#ifdef RT_OS_DARWIN
3719 supdrvOSResumeVTxOnCpu(fSuspended);
3720#else
3721 Assert(!fSuspended);
3722#endif
3723}
3724
3725
3726/**
3727 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3728 *
3729 * @returns VBox status code.
3730 * @retval VERR_VMX_NO_VMX
3731 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3732 * @retval VERR_VMX_MSR_VMXON_DISABLED
3733 * @retval VERR_VMX_MSR_LOCKING_FAILED
3734 * @retval VERR_SVM_NO_SVM
3735 * @retval VERR_SVM_DISABLED
3736 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3737 * (centaur) CPU.
3738 *
3739 * @param pSession The session handle.
3740 * @param pfCaps Where to store the capabilities.
3741 */
3742SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3743{
3744 int rc = VERR_UNSUPPORTED_CPU;
3745 bool fIsSmxModeAmbiguous = false;
3746 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3747
3748 /*
3749 * Input validation.
3750 */
3751 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3752 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3753
3754 *pfCaps = 0;
3755 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3756 RTThreadPreemptDisable(&PreemptState);
3757 if (ASMHasCpuId())
3758 {
3759 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3760 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3761
3762 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3763 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3764
3765 if ( ASMIsValidStdRange(uMaxId)
3766 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3767 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3768 )
3769 {
3770 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3771 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3772 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3773 )
3774 {
3775 /** @todo Unify code with hmR0InitIntelCpu(). */
3776 uint64_t u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3777 bool const fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3778 bool fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3779 bool fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3780 bool fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3781
3782 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3783 if (fMsrLocked)
3784 {
3785 if (fVmxAllowed && fSmxVmxAllowed)
3786 rc = VINF_SUCCESS;
3787 else if (!fVmxAllowed && !fSmxVmxAllowed)
3788 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3789 else if (!fMaybeSmxMode)
3790 {
3791 if (fVmxAllowed)
3792 rc = VINF_SUCCESS;
3793 else
3794 rc = VERR_VMX_MSR_VMXON_DISABLED;
3795 }
3796 else
3797 {
3798 /*
3799 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3800 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3801 * See @bugref{6873}.
3802 */
3803 Assert(fMaybeSmxMode == true);
3804 fIsSmxModeAmbiguous = true;
3805 rc = VINF_SUCCESS;
3806 }
3807 }
3808 else
3809 {
3810 /*
3811 * MSR is not yet locked; we can change it ourselves here.
3812 * Once the lock bit is set, this MSR can no longer be modified.
3813 *
3814 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3815 * accurately. See @bugref{6873}.
3816 */
3817 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3818 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3819 | MSR_IA32_FEATURE_CONTROL_VMXON;
3820 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3821
3822 /* Verify. */
3823 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3824 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3825 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3826 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3827 if (fSmxVmxAllowed && fVmxAllowed)
3828 rc = VINF_SUCCESS;
3829 else
3830 rc = VERR_VMX_MSR_LOCKING_FAILED;
3831 }
3832
3833 if (rc == VINF_SUCCESS)
3834 {
3835 VMXCAPABILITY vtCaps;
3836
3837 *pfCaps |= SUPVTCAPS_VT_X;
3838
3839 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3840 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3841 {
3842 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3843 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3844 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3845 }
3846 }
3847 }
3848 else
3849 rc = VERR_VMX_NO_VMX;
3850 }
3851 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3852 && ASMIsValidStdRange(uMaxId))
3853 {
3854 uint32_t fExtFeaturesEcx, uExtMaxId;
3855 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3856 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3857 if ( ASMIsValidExtRange(uExtMaxId)
3858 && uExtMaxId >= 0x8000000a
3859 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3860 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3861 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3862 )
3863 {
3864 /* Check if SVM is disabled */
3865 uint64_t u64FeatMsr = ASMRdMsr(MSR_K8_VM_CR);
3866 if (!(u64FeatMsr & MSR_K8_VM_CR_SVM_DISABLE))
3867 {
3868 uint32_t fSvmFeatures;
3869 *pfCaps |= SUPVTCAPS_AMD_V;
3870
3871 /* Query AMD-V features. */
3872 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3873 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3874 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3875
3876 rc = VINF_SUCCESS;
3877 }
3878 else
3879 rc = VERR_SVM_DISABLED;
3880 }
3881 else
3882 rc = VERR_SVM_NO_SVM;
3883 }
3884 }
3885
3886 RTThreadPreemptRestore(&PreemptState);
3887 if (fIsSmxModeAmbiguous)
3888 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3889 return rc;
3890}
3891
3892
3893/**
3894 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3895 * updating.
3896 *
3897 * @param pGipCpu The per CPU structure for this CPU.
3898 * @param u64NanoTS The current time.
3899 */
3900static void supdrvGipReInitCpu(PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3901{
3902 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
3903 pGipCpu->u64NanoTS = u64NanoTS;
3904}
3905
3906
3907/**
3908 * Set the current TSC and NanoTS value for the CPU.
3909 *
3910 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
3911 * @param pvUser1 Pointer to the ring-0 GIP mapping.
3912 * @param pvUser2 Pointer to the variable holding the current time.
3913 */
3914static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3915{
3916 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
3917 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
3918
3919 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
3920 supdrvGipReInitCpu(&pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
3921
3922 NOREF(pvUser2);
3923 NOREF(idCpu);
3924}
3925
3926
3927/**
3928 * Maps the GIP into userspace and/or get the physical address of the GIP.
3929 *
3930 * @returns IPRT status code.
3931 * @param pSession Session to which the GIP mapping should belong.
3932 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
3933 * @param pHCPhysGip Where to store the physical address. (optional)
3934 *
3935 * @remark There is no reference counting on the mapping, so one call to this function
3936 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
3937 * and remove the session as a GIP user.
3938 */
3939SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
3940{
3941 int rc;
3942 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
3943 RTR3PTR pGipR3 = NIL_RTR3PTR;
3944 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3945 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
3946
3947 /*
3948 * Validate
3949 */
3950 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3951 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
3952 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
3953
3954#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3955 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
3956#else
3957 RTSemFastMutexRequest(pDevExt->mtxGip);
3958#endif
3959 if (pDevExt->pGip)
3960 {
3961 /*
3962 * Map it?
3963 */
3964 rc = VINF_SUCCESS;
3965 if (ppGipR3)
3966 {
3967 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
3968 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
3969 RTMEM_PROT_READ, RTR0ProcHandleSelf());
3970 if (RT_SUCCESS(rc))
3971 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
3972 }
3973
3974 /*
3975 * Get physical address.
3976 */
3977 if (pHCPhysGip && RT_SUCCESS(rc))
3978 HCPhys = pDevExt->HCPhysGip;
3979
3980 /*
3981 * Reference globally.
3982 */
3983 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
3984 {
3985 pSession->fGipReferenced = 1;
3986 pDevExt->cGipUsers++;
3987 if (pDevExt->cGipUsers == 1)
3988 {
3989 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
3990 uint64_t u64NanoTS;
3991 uint32_t u32SystemResolution;
3992 unsigned i;
3993
3994 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
3995
3996 /*
3997 * Try bump up the system timer resolution.
3998 * The more interrupts the better...
3999 */
4000 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
4001 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
4002 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
4003 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
4004 )
4005 {
4006 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
4007 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
4008 }
4009
4010 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4011 {
4012 for (i = 0; i < pGipR0->cCpus; i++)
4013 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4014 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4015 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4016 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4017 }
4018
4019 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4020 if ( pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4021 || RTMpGetOnlineCount() == 1)
4022 supdrvGipReInitCpu(&pGipR0->aCPUs[0], u64NanoTS);
4023 else
4024 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4025
4026#ifndef DO_NOT_START_GIP
4027 rc = RTTimerStart(pDevExt->pGipTimer, 0); AssertRC(rc);
4028#endif
4029 rc = VINF_SUCCESS;
4030 }
4031 }
4032 }
4033 else
4034 {
4035 rc = VERR_GENERAL_FAILURE;
4036 Log(("SUPR0GipMap: GIP is not available!\n"));
4037 }
4038#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4039 RTSemMutexRelease(pDevExt->mtxGip);
4040#else
4041 RTSemFastMutexRelease(pDevExt->mtxGip);
4042#endif
4043
4044 /*
4045 * Write returns.
4046 */
4047 if (pHCPhysGip)
4048 *pHCPhysGip = HCPhys;
4049 if (ppGipR3)
4050 *ppGipR3 = pGipR3;
4051
4052#ifdef DEBUG_DARWIN_GIP
4053 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4054#else
4055 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4056#endif
4057 return rc;
4058}
4059
4060
4061/**
4062 * Unmaps any user mapping of the GIP and terminates all GIP access
4063 * from this session.
4064 *
4065 * @returns IPRT status code.
4066 * @param pSession Session to which the GIP mapping should belong.
4067 */
4068SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4069{
4070 int rc = VINF_SUCCESS;
4071 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4072#ifdef DEBUG_DARWIN_GIP
4073 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4074 pSession,
4075 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4076 pSession->GipMapObjR3));
4077#else
4078 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4079#endif
4080 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4081
4082#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4083 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4084#else
4085 RTSemFastMutexRequest(pDevExt->mtxGip);
4086#endif
4087
4088 /*
4089 * Unmap anything?
4090 */
4091 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4092 {
4093 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4094 AssertRC(rc);
4095 if (RT_SUCCESS(rc))
4096 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4097 }
4098
4099 /*
4100 * Dereference global GIP.
4101 */
4102 if (pSession->fGipReferenced && !rc)
4103 {
4104 pSession->fGipReferenced = 0;
4105 if ( pDevExt->cGipUsers > 0
4106 && !--pDevExt->cGipUsers)
4107 {
4108 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4109#ifndef DO_NOT_START_GIP
4110 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4111#endif
4112
4113 if (pDevExt->u32SystemTimerGranularityGrant)
4114 {
4115 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
4116 AssertRC(rc2);
4117 pDevExt->u32SystemTimerGranularityGrant = 0;
4118 }
4119 }
4120 }
4121
4122#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4123 RTSemMutexRelease(pDevExt->mtxGip);
4124#else
4125 RTSemFastMutexRelease(pDevExt->mtxGip);
4126#endif
4127
4128 return rc;
4129}
4130
4131
4132/**
4133 * Gets the GIP pointer.
4134 *
4135 * @returns Pointer to the GIP or NULL.
4136 */
4137SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4138{
4139 return g_pSUPGlobalInfoPage;
4140}
4141
4142
4143/**
4144 * Register a component factory with the support driver.
4145 *
4146 * This is currently restricted to kernel sessions only.
4147 *
4148 * @returns VBox status code.
4149 * @retval VINF_SUCCESS on success.
4150 * @retval VERR_NO_MEMORY if we're out of memory.
4151 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4152 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4153 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4154 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4155 *
4156 * @param pSession The SUPDRV session (must be a ring-0 session).
4157 * @param pFactory Pointer to the component factory registration structure.
4158 *
4159 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4160 */
4161SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4162{
4163 PSUPDRVFACTORYREG pNewReg;
4164 const char *psz;
4165 int rc;
4166
4167 /*
4168 * Validate parameters.
4169 */
4170 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4171 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4172 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4173 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4174 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4175 AssertReturn(psz, VERR_INVALID_PARAMETER);
4176
4177 /*
4178 * Allocate and initialize a new registration structure.
4179 */
4180 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4181 if (pNewReg)
4182 {
4183 pNewReg->pNext = NULL;
4184 pNewReg->pFactory = pFactory;
4185 pNewReg->pSession = pSession;
4186 pNewReg->cchName = psz - &pFactory->szName[0];
4187
4188 /*
4189 * Add it to the tail of the list after checking for prior registration.
4190 */
4191 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4192 if (RT_SUCCESS(rc))
4193 {
4194 PSUPDRVFACTORYREG pPrev = NULL;
4195 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4196 while (pCur && pCur->pFactory != pFactory)
4197 {
4198 pPrev = pCur;
4199 pCur = pCur->pNext;
4200 }
4201 if (!pCur)
4202 {
4203 if (pPrev)
4204 pPrev->pNext = pNewReg;
4205 else
4206 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4207 rc = VINF_SUCCESS;
4208 }
4209 else
4210 rc = VERR_ALREADY_EXISTS;
4211
4212 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4213 }
4214
4215 if (RT_FAILURE(rc))
4216 RTMemFree(pNewReg);
4217 }
4218 else
4219 rc = VERR_NO_MEMORY;
4220 return rc;
4221}
4222
4223
4224/**
4225 * Deregister a component factory.
4226 *
4227 * @returns VBox status code.
4228 * @retval VINF_SUCCESS on success.
4229 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4230 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4231 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4232 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4233 *
4234 * @param pSession The SUPDRV session (must be a ring-0 session).
4235 * @param pFactory Pointer to the component factory registration structure
4236 * previously passed SUPR0ComponentRegisterFactory().
4237 *
4238 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4239 */
4240SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4241{
4242 int rc;
4243
4244 /*
4245 * Validate parameters.
4246 */
4247 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4248 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4249 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4250
4251 /*
4252 * Take the lock and look for the registration record.
4253 */
4254 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4255 if (RT_SUCCESS(rc))
4256 {
4257 PSUPDRVFACTORYREG pPrev = NULL;
4258 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4259 while (pCur && pCur->pFactory != pFactory)
4260 {
4261 pPrev = pCur;
4262 pCur = pCur->pNext;
4263 }
4264 if (pCur)
4265 {
4266 if (!pPrev)
4267 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4268 else
4269 pPrev->pNext = pCur->pNext;
4270
4271 pCur->pNext = NULL;
4272 pCur->pFactory = NULL;
4273 pCur->pSession = NULL;
4274 rc = VINF_SUCCESS;
4275 }
4276 else
4277 rc = VERR_NOT_FOUND;
4278
4279 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4280
4281 RTMemFree(pCur);
4282 }
4283 return rc;
4284}
4285
4286
4287/**
4288 * Queries a component factory.
4289 *
4290 * @returns VBox status code.
4291 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4292 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4293 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4294 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4295 *
4296 * @param pSession The SUPDRV session.
4297 * @param pszName The name of the component factory.
4298 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4299 * @param ppvFactoryIf Where to store the factory interface.
4300 */
4301SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4302{
4303 const char *pszEnd;
4304 size_t cchName;
4305 int rc;
4306
4307 /*
4308 * Validate parameters.
4309 */
4310 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4311
4312 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4313 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4314 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4315 cchName = pszEnd - pszName;
4316
4317 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4318 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4319 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4320
4321 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4322 *ppvFactoryIf = NULL;
4323
4324 /*
4325 * Take the lock and try all factories by this name.
4326 */
4327 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4328 if (RT_SUCCESS(rc))
4329 {
4330 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4331 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4332 while (pCur)
4333 {
4334 if ( pCur->cchName == cchName
4335 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4336 {
4337 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4338 if (pvFactory)
4339 {
4340 *ppvFactoryIf = pvFactory;
4341 rc = VINF_SUCCESS;
4342 break;
4343 }
4344 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4345 }
4346
4347 /* next */
4348 pCur = pCur->pNext;
4349 }
4350
4351 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4352 }
4353 return rc;
4354}
4355
4356
4357/**
4358 * Adds a memory object to the session.
4359 *
4360 * @returns IPRT status code.
4361 * @param pMem Memory tracking structure containing the
4362 * information to track.
4363 * @param pSession The session.
4364 */
4365static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4366{
4367 PSUPDRVBUNDLE pBundle;
4368
4369 /*
4370 * Find free entry and record the allocation.
4371 */
4372 RTSpinlockAcquire(pSession->Spinlock);
4373 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4374 {
4375 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4376 {
4377 unsigned i;
4378 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4379 {
4380 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4381 {
4382 pBundle->cUsed++;
4383 pBundle->aMem[i] = *pMem;
4384 RTSpinlockRelease(pSession->Spinlock);
4385 return VINF_SUCCESS;
4386 }
4387 }
4388 AssertFailed(); /* !!this can't be happening!!! */
4389 }
4390 }
4391 RTSpinlockRelease(pSession->Spinlock);
4392
4393 /*
4394 * Need to allocate a new bundle.
4395 * Insert into the last entry in the bundle.
4396 */
4397 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4398 if (!pBundle)
4399 return VERR_NO_MEMORY;
4400
4401 /* take last entry. */
4402 pBundle->cUsed++;
4403 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4404
4405 /* insert into list. */
4406 RTSpinlockAcquire(pSession->Spinlock);
4407 pBundle->pNext = pSession->Bundle.pNext;
4408 pSession->Bundle.pNext = pBundle;
4409 RTSpinlockRelease(pSession->Spinlock);
4410
4411 return VINF_SUCCESS;
4412}
4413
4414
4415/**
4416 * Releases a memory object referenced by pointer and type.
4417 *
4418 * @returns IPRT status code.
4419 * @param pSession Session data.
4420 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4421 * @param eType Memory type.
4422 */
4423static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4424{
4425 PSUPDRVBUNDLE pBundle;
4426
4427 /*
4428 * Validate input.
4429 */
4430 if (!uPtr)
4431 {
4432 Log(("Illegal address %p\n", (void *)uPtr));
4433 return VERR_INVALID_PARAMETER;
4434 }
4435
4436 /*
4437 * Search for the address.
4438 */
4439 RTSpinlockAcquire(pSession->Spinlock);
4440 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4441 {
4442 if (pBundle->cUsed > 0)
4443 {
4444 unsigned i;
4445 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4446 {
4447 if ( pBundle->aMem[i].eType == eType
4448 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4449 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4450 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4451 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4452 )
4453 {
4454 /* Make a copy of it and release it outside the spinlock. */
4455 SUPDRVMEMREF Mem = pBundle->aMem[i];
4456 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4457 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4458 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4459 RTSpinlockRelease(pSession->Spinlock);
4460
4461 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4462 {
4463 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4464 AssertRC(rc); /** @todo figure out how to handle this. */
4465 }
4466 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4467 {
4468 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4469 AssertRC(rc); /** @todo figure out how to handle this. */
4470 }
4471 return VINF_SUCCESS;
4472 }
4473 }
4474 }
4475 }
4476 RTSpinlockRelease(pSession->Spinlock);
4477 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4478 return VERR_INVALID_PARAMETER;
4479}
4480
4481
4482/**
4483 * Opens an image. If it's the first time it's opened the call must upload
4484 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4485 *
4486 * This is the 1st step of the loading.
4487 *
4488 * @returns IPRT status code.
4489 * @param pDevExt Device globals.
4490 * @param pSession Session data.
4491 * @param pReq The open request.
4492 */
4493static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4494{
4495 int rc;
4496 PSUPDRVLDRIMAGE pImage;
4497 void *pv;
4498 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4499 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4500
4501 /*
4502 * Check if we got an instance of the image already.
4503 */
4504 supdrvLdrLock(pDevExt);
4505 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4506 {
4507 if ( pImage->szName[cchName] == '\0'
4508 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4509 {
4510 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4511 {
4512 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4513 pImage->cUsage++;
4514 pReq->u.Out.pvImageBase = pImage->pvImage;
4515 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4516 pReq->u.Out.fNativeLoader = pImage->fNative;
4517 supdrvLdrAddUsage(pSession, pImage);
4518 supdrvLdrUnlock(pDevExt);
4519 return VINF_SUCCESS;
4520 }
4521 supdrvLdrUnlock(pDevExt);
4522 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4523 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4524 }
4525 }
4526 /* (not found - add it!) */
4527
4528 /*
4529 * Allocate memory.
4530 */
4531 Assert(cchName < sizeof(pImage->szName));
4532 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4533 if (!pv)
4534 {
4535 supdrvLdrUnlock(pDevExt);
4536 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4537 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4538 }
4539
4540 /*
4541 * Setup and link in the LDR stuff.
4542 */
4543 pImage = (PSUPDRVLDRIMAGE)pv;
4544 pImage->pvImage = NULL;
4545 pImage->pvImageAlloc = NULL;
4546 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4547 pImage->cbImageBits = pReq->u.In.cbImageBits;
4548 pImage->cSymbols = 0;
4549 pImage->paSymbols = NULL;
4550 pImage->pachStrTab = NULL;
4551 pImage->cbStrTab = 0;
4552 pImage->pfnModuleInit = NULL;
4553 pImage->pfnModuleTerm = NULL;
4554 pImage->pfnServiceReqHandler = NULL;
4555 pImage->uState = SUP_IOCTL_LDR_OPEN;
4556 pImage->cUsage = 1;
4557 pImage->pDevExt = pDevExt;
4558 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4559
4560 /*
4561 * Try load it using the native loader, if that isn't supported, fall back
4562 * on the older method.
4563 */
4564 pImage->fNative = true;
4565 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4566 if (rc == VERR_NOT_SUPPORTED)
4567 {
4568 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4569 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4570 pImage->fNative = false;
4571 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4572 }
4573 if (RT_FAILURE(rc))
4574 {
4575 supdrvLdrUnlock(pDevExt);
4576 RTMemFree(pImage);
4577 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4578 return rc;
4579 }
4580 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4581
4582 /*
4583 * Link it.
4584 */
4585 pImage->pNext = pDevExt->pLdrImages;
4586 pDevExt->pLdrImages = pImage;
4587
4588 supdrvLdrAddUsage(pSession, pImage);
4589
4590 pReq->u.Out.pvImageBase = pImage->pvImage;
4591 pReq->u.Out.fNeedsLoading = true;
4592 pReq->u.Out.fNativeLoader = pImage->fNative;
4593 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4594
4595 supdrvLdrUnlock(pDevExt);
4596 return VINF_SUCCESS;
4597}
4598
4599
4600/**
4601 * Worker that validates a pointer to an image entrypoint.
4602 *
4603 * @returns IPRT status code.
4604 * @param pDevExt The device globals.
4605 * @param pImage The loader image.
4606 * @param pv The pointer into the image.
4607 * @param fMayBeNull Whether it may be NULL.
4608 * @param pszWhat What is this entrypoint? (for logging)
4609 * @param pbImageBits The image bits prepared by ring-3.
4610 *
4611 * @remarks Will leave the lock on failure.
4612 */
4613static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4614 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4615{
4616 if (!fMayBeNull || pv)
4617 {
4618 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4619 {
4620 supdrvLdrUnlock(pDevExt);
4621 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4622 return VERR_INVALID_PARAMETER;
4623 }
4624
4625 if (pImage->fNative)
4626 {
4627 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4628 if (RT_FAILURE(rc))
4629 {
4630 supdrvLdrUnlock(pDevExt);
4631 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4632 return rc;
4633 }
4634 }
4635 }
4636 return VINF_SUCCESS;
4637}
4638
4639
4640/**
4641 * Loads the image bits.
4642 *
4643 * This is the 2nd step of the loading.
4644 *
4645 * @returns IPRT status code.
4646 * @param pDevExt Device globals.
4647 * @param pSession Session data.
4648 * @param pReq The request.
4649 */
4650static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
4651{
4652 PSUPDRVLDRUSAGE pUsage;
4653 PSUPDRVLDRIMAGE pImage;
4654 int rc;
4655 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
4656
4657 /*
4658 * Find the ldr image.
4659 */
4660 supdrvLdrLock(pDevExt);
4661 pUsage = pSession->pLdrUsage;
4662 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4663 pUsage = pUsage->pNext;
4664 if (!pUsage)
4665 {
4666 supdrvLdrUnlock(pDevExt);
4667 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
4668 return VERR_INVALID_HANDLE;
4669 }
4670 pImage = pUsage->pImage;
4671
4672 /*
4673 * Validate input.
4674 */
4675 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
4676 || pImage->cbImageBits != pReq->u.In.cbImageBits)
4677 {
4678 supdrvLdrUnlock(pDevExt);
4679 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
4680 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
4681 return VERR_INVALID_HANDLE;
4682 }
4683
4684 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
4685 {
4686 unsigned uState = pImage->uState;
4687 supdrvLdrUnlock(pDevExt);
4688 if (uState != SUP_IOCTL_LDR_LOAD)
4689 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
4690 return VERR_ALREADY_LOADED;
4691 }
4692
4693 switch (pReq->u.In.eEPType)
4694 {
4695 case SUPLDRLOADEP_NOTHING:
4696 break;
4697
4698 case SUPLDRLOADEP_VMMR0:
4699 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
4700 if (RT_SUCCESS(rc))
4701 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
4702 if (RT_SUCCESS(rc))
4703 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
4704 if (RT_SUCCESS(rc))
4705 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
4706 if (RT_FAILURE(rc))
4707 return rc;
4708 break;
4709
4710 case SUPLDRLOADEP_SERVICE:
4711 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
4712 if (RT_FAILURE(rc))
4713 return rc;
4714 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
4715 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
4716 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
4717 {
4718 supdrvLdrUnlock(pDevExt);
4719 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
4720 pImage->pvImage, pReq->u.In.cbImageWithTabs,
4721 pReq->u.In.EP.Service.apvReserved[0],
4722 pReq->u.In.EP.Service.apvReserved[1],
4723 pReq->u.In.EP.Service.apvReserved[2]));
4724 return VERR_INVALID_PARAMETER;
4725 }
4726 break;
4727
4728 default:
4729 supdrvLdrUnlock(pDevExt);
4730 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
4731 return VERR_INVALID_PARAMETER;
4732 }
4733
4734 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
4735 if (RT_FAILURE(rc))
4736 return rc;
4737 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
4738 if (RT_FAILURE(rc))
4739 return rc;
4740
4741 /*
4742 * Allocate and copy the tables.
4743 * (No need to do try/except as this is a buffered request.)
4744 */
4745 pImage->cbStrTab = pReq->u.In.cbStrTab;
4746 if (pImage->cbStrTab)
4747 {
4748 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
4749 if (pImage->pachStrTab)
4750 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
4751 else
4752 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
4753 }
4754
4755 pImage->cSymbols = pReq->u.In.cSymbols;
4756 if (RT_SUCCESS(rc) && pImage->cSymbols)
4757 {
4758 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
4759 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
4760 if (pImage->paSymbols)
4761 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
4762 else
4763 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
4764 }
4765
4766 /*
4767 * Copy the bits / complete native loading.
4768 */
4769 if (RT_SUCCESS(rc))
4770 {
4771 pImage->uState = SUP_IOCTL_LDR_LOAD;
4772 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
4773 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
4774
4775 if (pImage->fNative)
4776 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
4777 else
4778 {
4779 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
4780 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
4781 }
4782 }
4783
4784 /*
4785 * Update any entry points.
4786 */
4787 if (RT_SUCCESS(rc))
4788 {
4789 switch (pReq->u.In.eEPType)
4790 {
4791 default:
4792 case SUPLDRLOADEP_NOTHING:
4793 rc = VINF_SUCCESS;
4794 break;
4795 case SUPLDRLOADEP_VMMR0:
4796 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
4797 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
4798 break;
4799 case SUPLDRLOADEP_SERVICE:
4800 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
4801 rc = VINF_SUCCESS;
4802 break;
4803 }
4804 }
4805
4806 /*
4807 * On success call the module initialization.
4808 */
4809 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
4810 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
4811 {
4812 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
4813 pDevExt->pLdrInitImage = pImage;
4814 pDevExt->hLdrInitThread = RTThreadNativeSelf();
4815 rc = pImage->pfnModuleInit(pImage);
4816 pDevExt->pLdrInitImage = NULL;
4817 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
4818 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
4819 supdrvLdrUnsetVMMR0EPs(pDevExt);
4820 }
4821 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
4822
4823 if (RT_FAILURE(rc))
4824 {
4825 /* Inform the tracing component in case ModuleInit registered TPs. */
4826 supdrvTracerModuleUnloading(pDevExt, pImage);
4827
4828 pImage->uState = SUP_IOCTL_LDR_OPEN;
4829 pImage->pfnModuleInit = NULL;
4830 pImage->pfnModuleTerm = NULL;
4831 pImage->pfnServiceReqHandler= NULL;
4832 pImage->cbStrTab = 0;
4833 RTMemFree(pImage->pachStrTab);
4834 pImage->pachStrTab = NULL;
4835 RTMemFree(pImage->paSymbols);
4836 pImage->paSymbols = NULL;
4837 pImage->cSymbols = 0;
4838 }
4839
4840 supdrvLdrUnlock(pDevExt);
4841 return rc;
4842}
4843
4844
4845/**
4846 * Frees a previously loaded (prep'ed) image.
4847 *
4848 * @returns IPRT status code.
4849 * @param pDevExt Device globals.
4850 * @param pSession Session data.
4851 * @param pReq The request.
4852 */
4853static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
4854{
4855 int rc;
4856 PSUPDRVLDRUSAGE pUsagePrev;
4857 PSUPDRVLDRUSAGE pUsage;
4858 PSUPDRVLDRIMAGE pImage;
4859 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
4860
4861 /*
4862 * Find the ldr image.
4863 */
4864 supdrvLdrLock(pDevExt);
4865 pUsagePrev = NULL;
4866 pUsage = pSession->pLdrUsage;
4867 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4868 {
4869 pUsagePrev = pUsage;
4870 pUsage = pUsage->pNext;
4871 }
4872 if (!pUsage)
4873 {
4874 supdrvLdrUnlock(pDevExt);
4875 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
4876 return VERR_INVALID_HANDLE;
4877 }
4878
4879 /*
4880 * Check if we can remove anything.
4881 */
4882 rc = VINF_SUCCESS;
4883 pImage = pUsage->pImage;
4884 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
4885 {
4886 /*
4887 * Check if there are any objects with destructors in the image, if
4888 * so leave it for the session cleanup routine so we get a chance to
4889 * clean things up in the right order and not leave them all dangling.
4890 */
4891 RTSpinlockAcquire(pDevExt->Spinlock);
4892 if (pImage->cUsage <= 1)
4893 {
4894 PSUPDRVOBJ pObj;
4895 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
4896 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4897 {
4898 rc = VERR_DANGLING_OBJECTS;
4899 break;
4900 }
4901 }
4902 else
4903 {
4904 PSUPDRVUSAGE pGenUsage;
4905 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
4906 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4907 {
4908 rc = VERR_DANGLING_OBJECTS;
4909 break;
4910 }
4911 }
4912 RTSpinlockRelease(pDevExt->Spinlock);
4913 if (rc == VINF_SUCCESS)
4914 {
4915 /* unlink it */
4916 if (pUsagePrev)
4917 pUsagePrev->pNext = pUsage->pNext;
4918 else
4919 pSession->pLdrUsage = pUsage->pNext;
4920
4921 /* free it */
4922 pUsage->pImage = NULL;
4923 pUsage->pNext = NULL;
4924 RTMemFree(pUsage);
4925
4926 /*
4927 * Dereference the image.
4928 */
4929 if (pImage->cUsage <= 1)
4930 supdrvLdrFree(pDevExt, pImage);
4931 else
4932 pImage->cUsage--;
4933 }
4934 else
4935 {
4936 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
4937 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
4938 }
4939 }
4940 else
4941 {
4942 /*
4943 * Dereference both image and usage.
4944 */
4945 pImage->cUsage--;
4946 pUsage->cUsage--;
4947 }
4948
4949 supdrvLdrUnlock(pDevExt);
4950 return rc;
4951}
4952
4953
4954/**
4955 * Gets the address of a symbol in an open image.
4956 *
4957 * @returns IPRT status code.
4958 * @param pDevExt Device globals.
4959 * @param pSession Session data.
4960 * @param pReq The request buffer.
4961 */
4962static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
4963{
4964 PSUPDRVLDRIMAGE pImage;
4965 PSUPDRVLDRUSAGE pUsage;
4966 uint32_t i;
4967 PSUPLDRSYM paSyms;
4968 const char *pchStrings;
4969 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
4970 void *pvSymbol = NULL;
4971 int rc = VERR_GENERAL_FAILURE;
4972 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
4973
4974 /*
4975 * Find the ldr image.
4976 */
4977 supdrvLdrLock(pDevExt);
4978 pUsage = pSession->pLdrUsage;
4979 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4980 pUsage = pUsage->pNext;
4981 if (!pUsage)
4982 {
4983 supdrvLdrUnlock(pDevExt);
4984 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
4985 return VERR_INVALID_HANDLE;
4986 }
4987 pImage = pUsage->pImage;
4988 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
4989 {
4990 unsigned uState = pImage->uState;
4991 supdrvLdrUnlock(pDevExt);
4992 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
4993 return VERR_ALREADY_LOADED;
4994 }
4995
4996 /*
4997 * Search the symbol strings.
4998 *
4999 * Note! The int32_t is for native loading on solaris where the data
5000 * and text segments are in very different places.
5001 */
5002 pchStrings = pImage->pachStrTab;
5003 paSyms = pImage->paSymbols;
5004 for (i = 0; i < pImage->cSymbols; i++)
5005 {
5006 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5007 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5008 {
5009 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5010 rc = VINF_SUCCESS;
5011 break;
5012 }
5013 }
5014 supdrvLdrUnlock(pDevExt);
5015 pReq->u.Out.pvSymbol = pvSymbol;
5016 return rc;
5017}
5018
5019
5020/**
5021 * Gets the address of a symbol in an open image or the support driver.
5022 *
5023 * @returns VINF_SUCCESS on success.
5024 * @returns
5025 * @param pDevExt Device globals.
5026 * @param pSession Session data.
5027 * @param pReq The request buffer.
5028 */
5029static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5030{
5031 int rc = VINF_SUCCESS;
5032 const char *pszSymbol = pReq->u.In.pszSymbol;
5033 const char *pszModule = pReq->u.In.pszModule;
5034 size_t cbSymbol;
5035 char const *pszEnd;
5036 uint32_t i;
5037
5038 /*
5039 * Input validation.
5040 */
5041 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5042 pszEnd = RTStrEnd(pszSymbol, 512);
5043 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5044 cbSymbol = pszEnd - pszSymbol + 1;
5045
5046 if (pszModule)
5047 {
5048 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5049 pszEnd = RTStrEnd(pszModule, 64);
5050 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5051 }
5052 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5053
5054
5055 if ( !pszModule
5056 || !strcmp(pszModule, "SupDrv"))
5057 {
5058 /*
5059 * Search the support driver export table.
5060 */
5061 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5062 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5063 {
5064 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5065 break;
5066 }
5067 }
5068 else
5069 {
5070 /*
5071 * Find the loader image.
5072 */
5073 PSUPDRVLDRIMAGE pImage;
5074
5075 supdrvLdrLock(pDevExt);
5076
5077 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5078 if (!strcmp(pImage->szName, pszModule))
5079 break;
5080 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5081 {
5082 /*
5083 * Search the symbol strings.
5084 */
5085 const char *pchStrings = pImage->pachStrTab;
5086 PCSUPLDRSYM paSyms = pImage->paSymbols;
5087 for (i = 0; i < pImage->cSymbols; i++)
5088 {
5089 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5090 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5091 {
5092 /*
5093 * Found it! Calc the symbol address and add a reference to the module.
5094 */
5095 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5096 rc = supdrvLdrAddUsage(pSession, pImage);
5097 break;
5098 }
5099 }
5100 }
5101 else
5102 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5103
5104 supdrvLdrUnlock(pDevExt);
5105 }
5106 return rc;
5107}
5108
5109
5110/**
5111 * Updates the VMMR0 entry point pointers.
5112 *
5113 * @returns IPRT status code.
5114 * @param pDevExt Device globals.
5115 * @param pSession Session data.
5116 * @param pVMMR0 VMMR0 image handle.
5117 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5118 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5119 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5120 * @remark Caller must own the loader mutex.
5121 */
5122static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5123{
5124 int rc = VINF_SUCCESS;
5125 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5126
5127
5128 /*
5129 * Check if not yet set.
5130 */
5131 if (!pDevExt->pvVMMR0)
5132 {
5133 pDevExt->pvVMMR0 = pvVMMR0;
5134 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5135 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5136 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5137 }
5138 else
5139 {
5140 /*
5141 * Return failure or success depending on whether the values match or not.
5142 */
5143 if ( pDevExt->pvVMMR0 != pvVMMR0
5144 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5145 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5146 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5147 {
5148 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5149 rc = VERR_INVALID_PARAMETER;
5150 }
5151 }
5152 return rc;
5153}
5154
5155
5156/**
5157 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5158 *
5159 * @param pDevExt Device globals.
5160 */
5161static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5162{
5163 pDevExt->pvVMMR0 = NULL;
5164 pDevExt->pfnVMMR0EntryInt = NULL;
5165 pDevExt->pfnVMMR0EntryFast = NULL;
5166 pDevExt->pfnVMMR0EntryEx = NULL;
5167}
5168
5169
5170/**
5171 * Adds a usage reference in the specified session of an image.
5172 *
5173 * Called while owning the loader semaphore.
5174 *
5175 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5176 * @param pSession Session in question.
5177 * @param pImage Image which the session is using.
5178 */
5179static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5180{
5181 PSUPDRVLDRUSAGE pUsage;
5182 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5183
5184 /*
5185 * Referenced it already?
5186 */
5187 pUsage = pSession->pLdrUsage;
5188 while (pUsage)
5189 {
5190 if (pUsage->pImage == pImage)
5191 {
5192 pUsage->cUsage++;
5193 return VINF_SUCCESS;
5194 }
5195 pUsage = pUsage->pNext;
5196 }
5197
5198 /*
5199 * Allocate new usage record.
5200 */
5201 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5202 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5203 pUsage->cUsage = 1;
5204 pUsage->pImage = pImage;
5205 pUsage->pNext = pSession->pLdrUsage;
5206 pSession->pLdrUsage = pUsage;
5207 return VINF_SUCCESS;
5208}
5209
5210
5211/**
5212 * Frees a load image.
5213 *
5214 * @param pDevExt Pointer to device extension.
5215 * @param pImage Pointer to the image we're gonna free.
5216 * This image must exit!
5217 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5218 */
5219static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5220{
5221 PSUPDRVLDRIMAGE pImagePrev;
5222 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5223
5224 /* find it - arg. should've used doubly linked list. */
5225 Assert(pDevExt->pLdrImages);
5226 pImagePrev = NULL;
5227 if (pDevExt->pLdrImages != pImage)
5228 {
5229 pImagePrev = pDevExt->pLdrImages;
5230 while (pImagePrev->pNext != pImage)
5231 pImagePrev = pImagePrev->pNext;
5232 Assert(pImagePrev->pNext == pImage);
5233 }
5234
5235 /* unlink */
5236 if (pImagePrev)
5237 pImagePrev->pNext = pImage->pNext;
5238 else
5239 pDevExt->pLdrImages = pImage->pNext;
5240
5241 /* check if this is VMMR0.r0 unset its entry point pointers. */
5242 if (pDevExt->pvVMMR0 == pImage->pvImage)
5243 supdrvLdrUnsetVMMR0EPs(pDevExt);
5244
5245 /* check for objects with destructors in this image. (Shouldn't happen.) */
5246 if (pDevExt->pObjs)
5247 {
5248 unsigned cObjs = 0;
5249 PSUPDRVOBJ pObj;
5250 RTSpinlockAcquire(pDevExt->Spinlock);
5251 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5252 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5253 {
5254 pObj->pfnDestructor = NULL;
5255 cObjs++;
5256 }
5257 RTSpinlockRelease(pDevExt->Spinlock);
5258 if (cObjs)
5259 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5260 }
5261
5262 /* call termination function if fully loaded. */
5263 if ( pImage->pfnModuleTerm
5264 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5265 {
5266 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5267 pImage->pfnModuleTerm(pImage);
5268 }
5269
5270 /* Inform the tracing component. */
5271 supdrvTracerModuleUnloading(pDevExt, pImage);
5272
5273 /* do native unload if appropriate. */
5274 if (pImage->fNative)
5275 supdrvOSLdrUnload(pDevExt, pImage);
5276
5277 /* free the image */
5278 pImage->cUsage = 0;
5279 pImage->pDevExt = NULL;
5280 pImage->pNext = NULL;
5281 pImage->uState = SUP_IOCTL_LDR_FREE;
5282 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5283 pImage->pvImageAlloc = NULL;
5284 RTMemFree(pImage->pachStrTab);
5285 pImage->pachStrTab = NULL;
5286 RTMemFree(pImage->paSymbols);
5287 pImage->paSymbols = NULL;
5288 RTMemFree(pImage);
5289}
5290
5291
5292/**
5293 * Acquires the loader lock.
5294 *
5295 * @returns IPRT status code.
5296 * @param pDevExt The device extension.
5297 */
5298DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5299{
5300#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5301 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5302#else
5303 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5304#endif
5305 AssertRC(rc);
5306 return rc;
5307}
5308
5309
5310/**
5311 * Releases the loader lock.
5312 *
5313 * @returns IPRT status code.
5314 * @param pDevExt The device extension.
5315 */
5316DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5317{
5318#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5319 return RTSemMutexRelease(pDevExt->mtxLdr);
5320#else
5321 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5322#endif
5323}
5324
5325
5326/**
5327 * Implements the service call request.
5328 *
5329 * @returns VBox status code.
5330 * @param pDevExt The device extension.
5331 * @param pSession The calling session.
5332 * @param pReq The request packet, valid.
5333 */
5334static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5335{
5336#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5337 int rc;
5338
5339 /*
5340 * Find the module first in the module referenced by the calling session.
5341 */
5342 rc = supdrvLdrLock(pDevExt);
5343 if (RT_SUCCESS(rc))
5344 {
5345 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5346 PSUPDRVLDRUSAGE pUsage;
5347
5348 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5349 if ( pUsage->pImage->pfnServiceReqHandler
5350 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5351 {
5352 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5353 break;
5354 }
5355 supdrvLdrUnlock(pDevExt);
5356
5357 if (pfnServiceReqHandler)
5358 {
5359 /*
5360 * Call it.
5361 */
5362 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5363 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5364 else
5365 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5366 }
5367 else
5368 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5369 }
5370
5371 /* log it */
5372 if ( RT_FAILURE(rc)
5373 && rc != VERR_INTERRUPTED
5374 && rc != VERR_TIMEOUT)
5375 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5376 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5377 else
5378 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5379 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5380 return rc;
5381#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5382 return VERR_NOT_IMPLEMENTED;
5383#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5384}
5385
5386
5387/**
5388 * Implements the logger settings request.
5389 *
5390 * @returns VBox status code.
5391 * @param pDevExt The device extension.
5392 * @param pSession The caller's session.
5393 * @param pReq The request.
5394 */
5395static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5396{
5397 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5398 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5399 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5400 PRTLOGGER pLogger = NULL;
5401 int rc;
5402
5403 /*
5404 * Some further validation.
5405 */
5406 switch (pReq->u.In.fWhat)
5407 {
5408 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5409 case SUPLOGGERSETTINGS_WHAT_CREATE:
5410 break;
5411
5412 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5413 if (*pszGroup || *pszFlags || *pszDest)
5414 return VERR_INVALID_PARAMETER;
5415 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5416 return VERR_ACCESS_DENIED;
5417 break;
5418
5419 default:
5420 return VERR_INTERNAL_ERROR;
5421 }
5422
5423 /*
5424 * Get the logger.
5425 */
5426 switch (pReq->u.In.fWhich)
5427 {
5428 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5429 pLogger = RTLogGetDefaultInstance();
5430 break;
5431
5432 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5433 pLogger = RTLogRelDefaultInstance();
5434 break;
5435
5436 default:
5437 return VERR_INTERNAL_ERROR;
5438 }
5439
5440 /*
5441 * Do the job.
5442 */
5443 switch (pReq->u.In.fWhat)
5444 {
5445 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5446 if (pLogger)
5447 {
5448 rc = RTLogFlags(pLogger, pszFlags);
5449 if (RT_SUCCESS(rc))
5450 rc = RTLogGroupSettings(pLogger, pszGroup);
5451 NOREF(pszDest);
5452 }
5453 else
5454 rc = VERR_NOT_FOUND;
5455 break;
5456
5457 case SUPLOGGERSETTINGS_WHAT_CREATE:
5458 {
5459 if (pLogger)
5460 rc = VERR_ALREADY_EXISTS;
5461 else
5462 {
5463 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5464
5465 rc = RTLogCreate(&pLogger,
5466 0 /* fFlags */,
5467 pszGroup,
5468 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5469 ? "VBOX_LOG"
5470 : "VBOX_RELEASE_LOG",
5471 RT_ELEMENTS(s_apszGroups),
5472 s_apszGroups,
5473 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5474 NULL);
5475 if (RT_SUCCESS(rc))
5476 {
5477 rc = RTLogFlags(pLogger, pszFlags);
5478 NOREF(pszDest);
5479 if (RT_SUCCESS(rc))
5480 {
5481 switch (pReq->u.In.fWhich)
5482 {
5483 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5484 pLogger = RTLogSetDefaultInstance(pLogger);
5485 break;
5486 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5487 pLogger = RTLogRelSetDefaultInstance(pLogger);
5488 break;
5489 }
5490 }
5491 RTLogDestroy(pLogger);
5492 }
5493 }
5494 break;
5495 }
5496
5497 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5498 switch (pReq->u.In.fWhich)
5499 {
5500 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5501 pLogger = RTLogSetDefaultInstance(NULL);
5502 break;
5503 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5504 pLogger = RTLogRelSetDefaultInstance(NULL);
5505 break;
5506 }
5507 rc = RTLogDestroy(pLogger);
5508 break;
5509
5510 default:
5511 {
5512 rc = VERR_INTERNAL_ERROR;
5513 break;
5514 }
5515 }
5516
5517 return rc;
5518}
5519
5520
5521/**
5522 * Implements the MSR prober operations.
5523 *
5524 * @returns VBox status code.
5525 * @param pDevExt The device extension.
5526 * @param pReq The request.
5527 */
5528static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5529{
5530#ifdef SUPDRV_WITH_MSR_PROBER
5531 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5532 int rc;
5533
5534 switch (pReq->u.In.enmOp)
5535 {
5536 case SUPMSRPROBEROP_READ:
5537 {
5538 uint64_t uValue;
5539 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5540 if (RT_SUCCESS(rc))
5541 {
5542 pReq->u.Out.uResults.Read.uValue = uValue;
5543 pReq->u.Out.uResults.Read.fGp = false;
5544 }
5545 else if (rc == VERR_ACCESS_DENIED)
5546 {
5547 pReq->u.Out.uResults.Read.uValue = 0;
5548 pReq->u.Out.uResults.Read.fGp = true;
5549 rc = VINF_SUCCESS;
5550 }
5551 break;
5552 }
5553
5554 case SUPMSRPROBEROP_WRITE:
5555 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5556 if (RT_SUCCESS(rc))
5557 pReq->u.Out.uResults.Write.fGp = false;
5558 else if (rc == VERR_ACCESS_DENIED)
5559 {
5560 pReq->u.Out.uResults.Write.fGp = true;
5561 rc = VINF_SUCCESS;
5562 }
5563 break;
5564
5565 case SUPMSRPROBEROP_MODIFY:
5566 case SUPMSRPROBEROP_MODIFY_FASTER:
5567 rc = supdrvOSMsrProberModify(idCpu, pReq);
5568 break;
5569
5570 default:
5571 return VERR_INVALID_FUNCTION;
5572 }
5573 return rc;
5574#else
5575 return VERR_NOT_IMPLEMENTED;
5576#endif
5577}
5578
5579
5580#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5581/**
5582 * Switches the TSC-delta measurement thread into the butchered state.
5583 *
5584 * @returns VBox status code.
5585 * @param pDevExt Pointer to the device instance data.
5586 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5587 * @param pszFailed An error message to log.
5588 * @param rcFailed The error code to exit the thread with.
5589 */
5590static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5591{
5592 if (!fSpinlockHeld)
5593 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5594
5595 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Butchered;
5596 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5597 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5598 return rcFailed;
5599}
5600
5601
5602/**
5603 * The TSC-delta measurement thread.
5604 *
5605 * @returns VBox status code.
5606 * @param hThread The thread handle.
5607 * @param pvUser Opaque pointer to the device instance data.
5608 */
5609static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
5610{
5611 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
5612 static uint32_t cTimesMeasured = 0;
5613 uint32_t cConsecutiveTimeouts = 0;
5614 int rc = VERR_INTERNAL_ERROR_2;
5615 for (;;)
5616 {
5617 /*
5618 * Switch on the current state.
5619 */
5620 SUPDRVTSCDELTASTATE enmState;
5621 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5622 enmState = pDevExt->enmTscDeltaState;
5623 switch (enmState)
5624 {
5625 case kSupDrvTscDeltaState_Creating:
5626 {
5627 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5628 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5629 if (RT_FAILURE(rc))
5630 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5631 /* fall thru */
5632 }
5633
5634 case kSupDrvTscDeltaState_Listening:
5635 {
5636 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5637
5638 /* Simple adaptive timeout. */
5639 if (cConsecutiveTimeouts++ == 10)
5640 {
5641 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
5642 pDevExt->cMsTscDeltaTimeout = 10;
5643 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
5644 pDevExt->cMsTscDeltaTimeout = 100;
5645 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
5646 pDevExt->cMsTscDeltaTimeout = 500;
5647 cConsecutiveTimeouts = 0;
5648 }
5649 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
5650 if ( RT_FAILURE(rc)
5651 && rc != VERR_TIMEOUT)
5652 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
5653 break;
5654 }
5655
5656 case kSupDrvTscDeltaState_WaitAndMeasure:
5657 {
5658 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Measuring;
5659 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5660 if (RT_FAILURE(rc))
5661 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5662 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5663 pDevExt->cMsTscDeltaTimeout = 1;
5664 RTThreadSleep(10);
5665 /* fall thru */
5666 }
5667
5668 case kSupDrvTscDeltaState_Measuring:
5669 {
5670 cConsecutiveTimeouts = 0;
5671 if (!cTimesMeasured++)
5672 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5673 else
5674 {
5675 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5676 unsigned iCpu;
5677
5678 if (cTimesMeasured == UINT32_MAX)
5679 cTimesMeasured = 1;
5680
5681 /* Measure TSC-deltas only for the CPUs that are in the set. */
5682 rc = VINF_SUCCESS;
5683 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5684 {
5685 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
5686 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
5687 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
5688 {
5689 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
5690 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
5691 }
5692 }
5693 }
5694 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5695 if (pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
5696 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5697 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5698 pDevExt->rcTscDelta = rc;
5699 break;
5700 }
5701
5702 case kSupDrvTscDeltaState_Terminating:
5703 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5704 return VINF_SUCCESS;
5705
5706 case kSupDrvTscDeltaState_Butchered:
5707 default:
5708 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
5709 }
5710 }
5711
5712 return rc;
5713}
5714
5715
5716/**
5717 * Waits for the TSC-delta measurement thread to respond to a state change.
5718 *
5719 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
5720 * other error code on internal error.
5721 *
5722 * @param pThis Pointer to the grant service instance data.
5723 * @param enmCurState The current state.
5724 * @param enmNewState The new state we're waiting for it to enter.
5725 */
5726static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTASTATE enmCurState, SUPDRVTSCDELTASTATE enmNewState)
5727{
5728 /*
5729 * Wait a short while for the expected state transition.
5730 */
5731 int rc;
5732 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
5733 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5734 if (pDevExt->enmTscDeltaState == enmNewState)
5735 {
5736 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5737 rc = VINF_SUCCESS;
5738 }
5739 else if (pDevExt->enmTscDeltaState == enmCurState)
5740 {
5741 /*
5742 * Wait longer if the state has not yet transitioned to the one we want.
5743 */
5744 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5745 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
5746 if ( RT_SUCCESS(rc)
5747 || rc == VERR_TIMEOUT)
5748 {
5749 /*
5750 * Check the state whether we've succeeded.
5751 */
5752 SUPDRVTSCDELTASTATE enmState;
5753 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5754 enmState = pDevExt->enmTscDeltaState;
5755 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5756 if (enmState == enmNewState)
5757 rc = VINF_SUCCESS;
5758 else if (enmState == enmCurState)
5759 {
5760 rc = VERR_TIMEOUT;
5761 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
5762 enmNewState));
5763 }
5764 else
5765 {
5766 rc = VERR_INTERNAL_ERROR;
5767 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
5768 enmState, enmNewState));
5769 }
5770 }
5771 else
5772 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
5773 }
5774 else
5775 {
5776 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5777 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
5778 rc = VERR_INTERNAL_ERROR;
5779 }
5780
5781 return rc;
5782}
5783
5784
5785/**
5786 * Terminates the TSC-delta measurement thread.
5787 *
5788 * @param pDevExt Pointer to the device instance data.
5789 */
5790static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
5791{
5792 int rc;
5793 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5794 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Terminating;
5795 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5796 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5797 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
5798 if (RT_FAILURE(rc))
5799 {
5800 /* Signal a few more times before giving up. */
5801 int cTries = 5;
5802 while (--cTries > 0)
5803 {
5804 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5805 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
5806 if (rc != VERR_TIMEOUT)
5807 break;
5808 }
5809 }
5810}
5811
5812
5813/**
5814 * Initializes and spawns the TSC-delta measurement thread.
5815 *
5816 * A thread is required for servicing re-measurement requests from events like
5817 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
5818 * under all contexts on all OSs.
5819 *
5820 * @returns VBox status code.
5821 * @param pDevExt Pointer to the device instance data.
5822 *
5823 * @remarks Must only be called -after- initializing GIP and setting up MP
5824 * notifications!
5825 */
5826static int supdrvTscDeltaInit(PSUPDRVDEVEXT pDevExt)
5827{
5828 int rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
5829 if (RT_SUCCESS(rc))
5830 {
5831 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
5832 if (RT_SUCCESS(rc))
5833 {
5834 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Creating;
5835 pDevExt->cMsTscDeltaTimeout = 1;
5836 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
5837 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
5838 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
5839 if (RT_SUCCESS(rc))
5840 {
5841 rc = supdrvTscDeltaThreadWait(pDevExt, kSupDrvTscDeltaState_Creating, kSupDrvTscDeltaState_Listening);
5842 if (RT_SUCCESS(rc))
5843 {
5844 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5845 return rc;
5846 }
5847
5848 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
5849 supdrvTscDeltaThreadTerminate(pDevExt);
5850 }
5851 else
5852 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
5853 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5854 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5855 }
5856 else
5857 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
5858 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5859 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5860 }
5861 else
5862 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
5863
5864 return rc;
5865}
5866
5867
5868/**
5869 * Terminates the TSC-delta measurement thread and cleanup.
5870 *
5871 * @param pDevExt Pointer to the device instance data.
5872 */
5873static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
5874{
5875 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
5876 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5877 {
5878 supdrvTscDeltaThreadTerminate(pDevExt);
5879 }
5880
5881 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
5882 {
5883 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5884 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5885 }
5886
5887 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5888 {
5889 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5890 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5891 }
5892
5893 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5894}
5895#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
5896
5897
5898/**
5899 * Creates the GIP.
5900 *
5901 * @returns VBox status code.
5902 * @param pDevExt Instance data. GIP stuff may be updated.
5903 */
5904static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
5905{
5906 PSUPGLOBALINFOPAGE pGip;
5907 RTHCPHYS HCPhysGip;
5908 uint32_t u32SystemResolution;
5909 uint32_t u32Interval;
5910 unsigned cCpus;
5911 int rc;
5912
5913 LogFlow(("supdrvGipCreate:\n"));
5914
5915 /* Assert order. */
5916 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
5917 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
5918 Assert(!pDevExt->pGipTimer);
5919
5920 /*
5921 * Check the CPU count.
5922 */
5923 cCpus = RTMpGetArraySize();
5924 if ( cCpus > RTCPUSET_MAX_CPUS
5925 || cCpus > 256 /*ApicId is used for the mappings*/)
5926 {
5927 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
5928 return VERR_TOO_MANY_CPUS;
5929 }
5930
5931 /*
5932 * Allocate a contiguous set of pages with a default kernel mapping.
5933 */
5934 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
5935 if (RT_FAILURE(rc))
5936 {
5937 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
5938 return rc;
5939 }
5940 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
5941 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
5942
5943 /*
5944 * Find a reasonable update interval and initialize the structure.
5945 */
5946 u32Interval = u32SystemResolution = RTTimerGetSystemGranularity();
5947 while (u32Interval < 10000000 /* 10 ms */)
5948 u32Interval += u32SystemResolution;
5949
5950 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), 1000000000 / u32Interval /*=Hz*/, cCpus);
5951
5952#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5953 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
5954 rc = supdrvTscDeltaInit(pDevExt);
5955#endif
5956 if (RT_SUCCESS(rc))
5957 {
5958 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
5959 if (RT_SUCCESS(rc))
5960 {
5961 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
5962 if (RT_SUCCESS(rc))
5963 {
5964#ifndef SUPDRV_USE_TSC_DELTA_THREAD
5965 /*
5966 * Measure the TSC deltas now that we have MP notifications.
5967 */
5968 int cTries = 5;
5969 uint16_t iCpu;
5970 do
5971 {
5972 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5973 if (rc != VERR_TRY_AGAIN)
5974 break;
5975 } while (--cTries > 0);
5976 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5977 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
5978#endif
5979
5980 /*
5981 * Create the timer.
5982 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
5983 */
5984 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
5985 {
5986 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer, pDevExt);
5987 if (rc == VERR_NOT_SUPPORTED)
5988 {
5989 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
5990 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
5991 }
5992 }
5993 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
5994 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */, supdrvGipSyncTimer, pDevExt);
5995 if (RT_SUCCESS(rc))
5996 {
5997 /*
5998 * We're good.
5999 */
6000 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6001 g_pSUPGlobalInfoPage = pGip;
6002 return VINF_SUCCESS;
6003 }
6004 else
6005 {
6006 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6007 Assert(!pDevExt->pGipTimer);
6008 }
6009 }
6010 else
6011 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6012 }
6013 else
6014 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6015 }
6016 else
6017 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6018
6019 supdrvGipDestroy(pDevExt);
6020 return rc;
6021}
6022
6023
6024/**
6025 * Terminates the GIP.
6026 *
6027 * @param pDevExt Instance data. GIP stuff may be updated.
6028 */
6029static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6030{
6031 int rc;
6032#ifdef DEBUG_DARWIN_GIP
6033 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6034 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6035 pDevExt->pGipTimer, pDevExt->GipMemObj));
6036#endif
6037
6038 /*
6039 * Stop receiving MP notifications before tearing anything else down.
6040 */
6041 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6042
6043#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6044 /*
6045 * Terminate the TSC-delta measurement thread and resources.
6046 */
6047 supdrvTscDeltaTerm(pDevExt);
6048#endif
6049
6050 /*
6051 * Invalid the GIP data.
6052 */
6053 if (pDevExt->pGip)
6054 {
6055 supdrvGipTerm(pDevExt->pGip);
6056 pDevExt->pGip = NULL;
6057 }
6058 g_pSUPGlobalInfoPage = NULL;
6059
6060 /*
6061 * Destroy the timer and free the GIP memory object.
6062 */
6063 if (pDevExt->pGipTimer)
6064 {
6065 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6066 pDevExt->pGipTimer = NULL;
6067 }
6068
6069 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6070 {
6071 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6072 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6073 }
6074
6075 /*
6076 * Finally, make sure we've release the system timer resolution request
6077 * if one actually succeeded and is still pending.
6078 */
6079 if (pDevExt->u32SystemTimerGranularityGrant)
6080 {
6081 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
6082 pDevExt->u32SystemTimerGranularityGrant = 0;
6083 }
6084}
6085
6086
6087/**
6088 * Timer callback function sync GIP mode.
6089 * @param pTimer The timer.
6090 * @param pvUser The device extension.
6091 */
6092static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6093{
6094 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6095 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6096 uint64_t u64TSC = ASMReadTSC();
6097 uint64_t NanoTS = RTTimeSystemNanoTS();
6098
6099 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, NIL_RTCPUID, iTick);
6100
6101 ASMSetFlags(fOldFlags);
6102}
6103
6104
6105/**
6106 * Timer callback function for async GIP mode.
6107 * @param pTimer The timer.
6108 * @param pvUser The device extension.
6109 */
6110static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6111{
6112 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6113 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6114 RTCPUID idCpu = RTMpCpuId();
6115 uint64_t u64TSC = ASMReadTSC();
6116 uint64_t NanoTS = RTTimeSystemNanoTS();
6117
6118 /** @todo reset the transaction number and whatnot when iTick == 1. */
6119 if (pDevExt->idGipMaster == idCpu)
6120 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6121 else
6122 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6123
6124 ASMSetFlags(fOldFlags);
6125}
6126
6127
6128/**
6129 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6130 *
6131 * @returns Index of the CPU in the cache set.
6132 * @param pGip The GIP.
6133 * @param idCpu The CPU ID.
6134 */
6135static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6136{
6137 uint32_t i, cTries;
6138
6139 /*
6140 * ASSUMES that CPU IDs are constant.
6141 */
6142 for (i = 0; i < pGip->cCpus; i++)
6143 if (pGip->aCPUs[i].idCpu == idCpu)
6144 return i;
6145
6146 cTries = 0;
6147 do
6148 {
6149 for (i = 0; i < pGip->cCpus; i++)
6150 {
6151 bool fRc;
6152 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6153 if (fRc)
6154 return i;
6155 }
6156 } while (cTries++ < 32);
6157 AssertReleaseFailed();
6158 return i - 1;
6159}
6160
6161
6162/**
6163 * The calling CPU should be accounted as online, update GIP accordingly.
6164 *
6165 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
6166 *
6167 * @param pDevExt The device extension.
6168 * @param idCpu The CPU ID.
6169 */
6170static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6171{
6172 int iCpuSet = 0;
6173 uint16_t idApic = UINT16_MAX;
6174 uint32_t i = 0;
6175 uint64_t u64NanoTS = 0;
6176 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6177
6178 AssertPtrReturnVoid(pGip);
6179 AssertRelease(idCpu == RTMpCpuId());
6180 Assert(pGip->cPossibleCpus == RTMpGetCount());
6181
6182 /*
6183 * Do this behind a spinlock with interrupts disabled as this can fire
6184 * on all CPUs simultaneously, see @bugref{6110}.
6185 */
6186 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6187
6188 /*
6189 * Update the globals.
6190 */
6191 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
6192 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
6193 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6194 if (iCpuSet >= 0)
6195 {
6196 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6197 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
6198 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
6199 }
6200
6201 /*
6202 * Update the entry.
6203 */
6204 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
6205 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
6206 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
6207 idApic = ASMGetApicId();
6208 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
6209 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
6210 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
6211
6212 /*
6213 * Update the APIC ID and CPU set index mappings.
6214 */
6215 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
6216 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
6217
6218 /* Update the Mp online/offline counter. */
6219 ASMAtomicIncU32(&g_cMpOnOffEvents);
6220
6221#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6222 /*
6223 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
6224 *
6225 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
6226 * update the state and it'll get serviced when the thread's listening interval times out.
6227 */
6228 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
6229 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6230 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
6231 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
6232 {
6233 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
6234 }
6235 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6236#endif
6237
6238 /* commit it */
6239 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
6240
6241 RTSpinlockRelease(pDevExt->hGipSpinlock);
6242}
6243
6244
6245/**
6246 * The CPU should be accounted as offline, update the GIP accordingly.
6247 *
6248 * This is used by supdrvGipMpEvent.
6249 *
6250 * @param pDevExt The device extension.
6251 * @param idCpu The CPU ID.
6252 */
6253static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6254{
6255 int iCpuSet;
6256 unsigned i;
6257
6258 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6259
6260 AssertPtrReturnVoid(pGip);
6261 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6262
6263 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6264 AssertReturnVoid(iCpuSet >= 0);
6265
6266 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
6267 AssertReturnVoid(i < pGip->cCpus);
6268 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
6269
6270 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6271 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
6272
6273 /* Update the Mp online/offline counter. */
6274 ASMAtomicIncU32(&g_cMpOnOffEvents);
6275
6276 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
6277 if (ASMAtomicReadU32(&g_idTscDeltaInitiator) == idCpu)
6278 {
6279 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6280 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
6281 }
6282
6283 /* Reset the TSC delta, we will recalculate it lazily. */
6284 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
6285
6286 /* commit it */
6287 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
6288
6289 RTSpinlockRelease(pDevExt->hGipSpinlock);
6290}
6291
6292
6293/**
6294 * Multiprocessor event notification callback.
6295 *
6296 * This is used to make sure that the GIP master gets passed on to
6297 * another CPU. It also updates the associated CPU data.
6298 *
6299 * @param enmEvent The event.
6300 * @param idCpu The cpu it applies to.
6301 * @param pvUser Pointer to the device extension.
6302 *
6303 * @remarks This function -must- fire on the newly online'd CPU for the
6304 * RTMPEVENT_ONLINE case and can fire on any CPU for the
6305 * RTMPEVENT_OFFLINE case.
6306 */
6307static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
6308{
6309 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6310 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6311
6312 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6313
6314 /*
6315 * Update the GIP CPU data.
6316 */
6317 if (pGip)
6318 {
6319 switch (enmEvent)
6320 {
6321 case RTMPEVENT_ONLINE:
6322 AssertRelease(idCpu == RTMpCpuId());
6323 supdrvGipMpEventOnline(pDevExt, idCpu);
6324 break;
6325 case RTMPEVENT_OFFLINE:
6326 supdrvGipMpEventOffline(pDevExt, idCpu);
6327 break;
6328 }
6329 }
6330
6331 /*
6332 * Make sure there is a master GIP.
6333 */
6334 if (enmEvent == RTMPEVENT_OFFLINE)
6335 {
6336 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
6337 if (idGipMaster == idCpu)
6338 {
6339 /*
6340 * Find a new GIP master.
6341 */
6342 bool fIgnored;
6343 unsigned i;
6344 int64_t iTSCDelta;
6345 uint32_t idxNewGipMaster;
6346 RTCPUID idNewGipMaster = NIL_RTCPUID;
6347 RTCPUSET OnlineCpus;
6348 RTMpGetOnlineSet(&OnlineCpus);
6349
6350 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
6351 {
6352 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
6353 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
6354 && idCurCpu != idGipMaster)
6355 {
6356 idNewGipMaster = idCurCpu;
6357 break;
6358 }
6359 }
6360
6361 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
6362 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
6363 NOREF(fIgnored);
6364
6365 /*
6366 * Adjust all the TSC deltas against the new GIP master.
6367 */
6368 if (pGip)
6369 {
6370 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
6371 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
6372 Assert(iTSCDelta != UINT64_MAX);
6373 for (i = 0; i < pGip->cCpus; i++)
6374 {
6375 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
6376 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
6377 if (iWorkerDelta != INT64_MAX)
6378 iWorkerDelta -= iTSCDelta;
6379 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
6380 }
6381 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
6382 }
6383 }
6384 }
6385}
6386
6387
6388/**
6389 * Returns whether the host CPU sports an invariant TSC or not.
6390 *
6391 * @returns true if invariant TSC is supported, false otherwise.
6392 */
6393static bool supdrvIsInvariantTsc(void)
6394{
6395 static bool s_fQueried = false;
6396 static bool s_fIsInvariantTsc = false;
6397 if (!s_fQueried)
6398 {
6399 uint32_t uEax, uEbx, uEcx, uEdx;
6400 ASMCpuId(0x80000000, &uEax, &uEbx, &uEcx, &uEdx);
6401 if (uEax >= 0x80000007)
6402 {
6403 ASMCpuId(0x80000007, &uEax, &uEbx, &uEcx, &uEdx);
6404 if (uEdx & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
6405 s_fIsInvariantTsc = true;
6406 }
6407 s_fQueried = true;
6408 }
6409
6410 return s_fIsInvariantTsc;
6411}
6412
6413
6414/**
6415 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
6416 * compute the delta between them.
6417 *
6418 * @param idCpu The CPU we are current scheduled on.
6419 * @param pvUser1 Opaque pointer to the GIP.
6420 * @param pvUser2 Opaque pointer to the worker Cpu Id.
6421 *
6422 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
6423 * read the TSC at exactly the same time on both the master and the worker
6424 * CPUs. Due to DMA, bus arbitration, cache locality, contention, pipelining
6425 * etc. there is no guaranteed way of doing this on x86 CPUs. We try to
6426 * minimize the measurement error by computing the minimum read time of the
6427 * compare statement in the worker by taking TSC measurements across it.
6428 *
6429 * We ignore the first few runs of the loop in order to prime the cache.
6430 * Also, be careful about using 'pause' instruction in critical busy-wait
6431 * loops in this code - it can cause undesired behaviour with
6432 * hyperthreading.
6433 *
6434 * It must be noted that the computed minimum read time is mostly to
6435 * eliminate huge deltas when the worker is too early and doesn't by itself
6436 * help produce more accurate deltas. We allow two times the computed
6437 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
6438 * possible to get negative deltas where there are none when the worker is
6439 * earlier. As long as these occasional negative deltas are lower than the
6440 * time it takes to exit guest-context and the OS to reschedule EMT on a
6441 * different CPU we won't expose a TSC that jumped backwards. It is because
6442 * of the existence of the negative deltas we don't recompute the delta with
6443 * the master and worker interchanged to eliminate the remaining measurement
6444 * error.
6445 */
6446static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6447{
6448 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
6449 uint32_t *pidWorker = (uint32_t *)pvUser2;
6450 RTCPUID idMaster = ASMAtomicUoReadU32(&g_idTscDeltaInitiator);
6451 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6452 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
6453 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
6454 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6455 int cTriesLeft = 12;
6456
6457 if ( idCpu != idMaster
6458 && idCpu != *pidWorker)
6459 return;
6460
6461 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
6462 with a timeout to avoid deadlocking the entire system. */
6463 if (!RTMpOnAllIsConcurrentSafe())
6464 {
6465 uint64_t uTscNow;
6466 uint64_t uTscStart;
6467 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
6468
6469 ASMSerializeInstruction();
6470 uTscStart = ASMReadTSC();
6471 if (idCpu == idMaster)
6472 {
6473 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
6474 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
6475 {
6476 ASMSerializeInstruction();
6477 uTscNow = ASMReadTSC();
6478 if (uTscNow - uTscStart > cWaitTicks)
6479 {
6480 /* Set the worker delta to indicate failure, not the master. */
6481 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6482 return;
6483 }
6484
6485 ASMNopPause();
6486 }
6487 }
6488 else
6489 {
6490 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
6491 {
6492 ASMSerializeInstruction();
6493 uTscNow = ASMReadTSC();
6494 if (uTscNow - uTscStart > cWaitTicks)
6495 {
6496 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6497 return;
6498 }
6499
6500 ASMNopPause();
6501 }
6502 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
6503 }
6504 }
6505
6506 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
6507 while (cTriesLeft-- > 0)
6508 {
6509 unsigned i;
6510 uint64_t uMinCmpReadTime = UINT64_MAX;
6511 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
6512 {
6513 RTCCUINTREG uFlags = ASMIntDisableFlags(); /* Disable interrupts per-iteration, see @bugref{6710} comment #38. */
6514 if (idCpu == idMaster)
6515 {
6516 /*
6517 * The master.
6518 */
6519 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6520 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6521 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
6522 ;
6523
6524 do
6525 {
6526 ASMSerializeInstruction();
6527 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
6528 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6529
6530 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
6531 ;
6532
6533 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6534 {
6535 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
6536 {
6537 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
6538 if (iDelta < pGipCpuWorker->i64TSCDelta)
6539 pGipCpuWorker->i64TSCDelta = iDelta;
6540 }
6541 }
6542
6543 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
6544 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6545 }
6546 else
6547 {
6548 /*
6549 * The worker.
6550 */
6551 uint64_t uTscWorker;
6552 uint64_t uTscWorkerFlushed;
6553 uint64_t uCmpReadTime;
6554
6555 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
6556 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
6557 ;
6558 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6559 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
6560
6561 /*
6562 * Keep reading the TSC until we notice that the master has read his. Reading
6563 * the TSC -after- the master has updated the memory is way too late. We thus
6564 * compensate by trying to measure how long it took for the worker to notice
6565 * the memory flushed from the master.
6566 */
6567 do
6568 {
6569 ASMSerializeInstruction();
6570 uTscWorker = ASMReadTSC();
6571 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6572 ASMSerializeInstruction();
6573 uTscWorkerFlushed = ASMReadTSC();
6574
6575 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
6576 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6577 {
6578 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
6579 if (uCmpReadTime < (uMinCmpReadTime << 1))
6580 {
6581 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
6582 if (uCmpReadTime < uMinCmpReadTime)
6583 uMinCmpReadTime = uCmpReadTime;
6584 }
6585 else
6586 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
6587 }
6588 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
6589 {
6590 if (uCmpReadTime < uMinCmpReadTime)
6591 uMinCmpReadTime = uCmpReadTime;
6592 }
6593
6594 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
6595 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
6596 ASMNopPause();
6597 }
6598
6599 ASMSetFlags(uFlags);
6600 }
6601
6602 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
6603 break;
6604 }
6605}
6606
6607
6608/**
6609 * Clears all TSCs on the per-CPUs GIP struct. as well as the delta
6610 * synchronization variable. Optionally also clears the deltas on the per-CPU
6611 * GIP struct. as well.
6612 *
6613 * @param pGip Pointer to the GIP.
6614 * @param fClearDeltas Whether the deltas are also to be cleared.
6615 */
6616DECLINLINE(void) supdrvClearTscSamples(PSUPGLOBALINFOPAGE pGip, bool fClearDeltas)
6617{
6618 unsigned iCpu;
6619 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6620 {
6621 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
6622 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
6623 if (fClearDeltas)
6624 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
6625 }
6626 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6627}
6628
6629
6630/**
6631 * Measures the TSC delta between the master GIP CPU and one specified worker
6632 * CPU.
6633 *
6634 * @returns VBox status code.
6635 * @param pDevExt Pointer to the device instance data.
6636 * @param idxWorker The index of the worker CPU from the GIP's array of
6637 * CPUs.
6638 *
6639 * @remarks This can be called with preemption disabled!
6640 */
6641static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
6642{
6643 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6644 RTCPUID idMaster = pDevExt->idGipMaster;
6645 uint32_t idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6646 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6647 int rc = VERR_CPU_OFFLINE;
6648
6649 if (idxWorker == idxMaster)
6650 {
6651 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
6652 return VINF_SUCCESS;
6653 }
6654
6655 /* Set the master TSC as the initiator. */
6656 while (ASMAtomicCmpXchgU32(&g_idTscDeltaInitiator, idMaster, NIL_RTCPUID) == false)
6657 ASMNopPause();
6658
6659 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
6660 {
6661 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
6662 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6663 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6664 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pGip, &pGipCpuWorker->idCpu);
6665 if (RT_SUCCESS(rc))
6666 {
6667 if (RT_UNLIKELY(pGipCpuWorker->i64TSCDelta == INT64_MAX))
6668 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
6669 }
6670 }
6671
6672 ASMAtomicWriteU32(&g_idTscDeltaInitiator, NIL_RTCPUID);
6673 return rc;
6674}
6675
6676
6677/**
6678 * Measures the TSC deltas between CPUs.
6679 *
6680 * @param pDevExt Pointer to the device instance data.
6681 * @param pidxMaster Where to store the index of the chosen master TSC if we
6682 * managed to determine the TSC deltas successfully.
6683 * Optional, can be NULL.
6684 *
6685 * @returns VBox status code.
6686 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
6687 * idCpu, GIP's online CPU set which are populated in
6688 * supdrvGipInitOnCpu().
6689 */
6690static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
6691{
6692 PSUPGIPCPU pGipCpuMaster;
6693 unsigned iCpu;
6694 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6695 uint32_t idxMaster = UINT32_MAX;
6696 int rc = VINF_SUCCESS;
6697 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&g_cMpOnOffEvents);
6698 uint32_t cOnlineCpus = pGip->cOnlineCpus;
6699
6700 /*
6701 * If we determined the TSC is async., don't bother with measuring deltas.
6702 */
6703 if (RT_UNLIKELY(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC))
6704 return VINF_SUCCESS;
6705
6706 /*
6707 * Pick the first CPU online as the master TSC and make it the new GIP master based
6708 * on the APIC ID.
6709 *
6710 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
6711 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
6712 * master as this point since the sync/async timer isn't created yet.
6713 */
6714 supdrvClearTscSamples(pGip, true /* fClearDeltas */);
6715 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
6716 {
6717 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
6718 if (idxCpu != UINT16_MAX)
6719 {
6720 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
6721 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
6722 {
6723 idxMaster = idxCpu;
6724 pGipCpu->i64TSCDelta = 0;
6725 break;
6726 }
6727 }
6728 }
6729 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
6730 pGipCpuMaster = &pGip->aCPUs[idxMaster];
6731 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
6732
6733 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
6734 if (pGip->cOnlineCpus <= 1)
6735 {
6736 if (pidxMaster)
6737 *pidxMaster = idxMaster;
6738 return VINF_SUCCESS;
6739 }
6740
6741 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6742 {
6743 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
6744 if ( iCpu != idxMaster
6745 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
6746 {
6747 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
6748 if (RT_FAILURE(rc))
6749 {
6750 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
6751 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
6752 break;
6753 }
6754
6755 if (ASMAtomicReadU32(&g_cMpOnOffEvents) != cMpOnOffEvents)
6756 {
6757 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
6758 rc = VERR_TRY_AGAIN;
6759 break;
6760 }
6761 }
6762 }
6763
6764 if ( RT_SUCCESS(rc)
6765 && !pGipCpuMaster->i64TSCDelta
6766 && pidxMaster)
6767 {
6768 *pidxMaster = idxMaster;
6769 }
6770 return rc;
6771}
6772
6773
6774/**
6775 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
6776 *
6777 * @param idCpu Ignored.
6778 * @param pvUser1 Where to put the TSC.
6779 * @param pvUser2 Ignored.
6780 */
6781static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6782{
6783 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
6784}
6785
6786
6787/**
6788 * Determine if Async GIP mode is required because of TSC drift.
6789 *
6790 * When using the default/normal timer code it is essential that the time stamp counter
6791 * (TSC) runs never backwards, that is, a read operation to the counter should return
6792 * a bigger value than any previous read operation. This is guaranteed by the latest
6793 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
6794 * case we have to choose the asynchronous timer mode.
6795 *
6796 * @param poffMin Pointer to the determined difference between different cores.
6797 * @return false if the time stamp counters appear to be synchronized, true otherwise.
6798 */
6799static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
6800{
6801 /*
6802 * Just iterate all the cpus 8 times and make sure that the TSC is
6803 * ever increasing. We don't bother taking TSC rollover into account.
6804 */
6805 int iEndCpu = RTMpGetArraySize();
6806 int iCpu;
6807 int cLoops = 8;
6808 bool fAsync = false;
6809 int rc = VINF_SUCCESS;
6810 uint64_t offMax = 0;
6811 uint64_t offMin = ~(uint64_t)0;
6812 uint64_t PrevTsc = ASMReadTSC();
6813
6814 while (cLoops-- > 0)
6815 {
6816 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
6817 {
6818 uint64_t CurTsc;
6819 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
6820 if (RT_SUCCESS(rc))
6821 {
6822 if (CurTsc <= PrevTsc)
6823 {
6824 fAsync = true;
6825 offMin = offMax = PrevTsc - CurTsc;
6826 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
6827 iCpu, cLoops, CurTsc, PrevTsc));
6828 break;
6829 }
6830
6831 /* Gather statistics (except the first time). */
6832 if (iCpu != 0 || cLoops != 7)
6833 {
6834 uint64_t off = CurTsc - PrevTsc;
6835 if (off < offMin)
6836 offMin = off;
6837 if (off > offMax)
6838 offMax = off;
6839 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
6840 }
6841
6842 /* Next */
6843 PrevTsc = CurTsc;
6844 }
6845 else if (rc == VERR_NOT_SUPPORTED)
6846 break;
6847 else
6848 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
6849 }
6850
6851 /* broke out of the loop. */
6852 if (iCpu < iEndCpu)
6853 break;
6854 }
6855
6856 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
6857 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
6858 fAsync, iEndCpu, rc, offMin, offMax));
6859#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
6860 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
6861#endif
6862 return fAsync;
6863}
6864
6865
6866/**
6867 * Determine the GIP TSC mode.
6868 *
6869 * @returns The most suitable TSC mode.
6870 * @param pDevExt Pointer to the device instance data.
6871 */
6872static SUPGIPMODE supdrvGipDeterminTscMode(PSUPDRVDEVEXT pDevExt)
6873{
6874 /*
6875 * On SMP we're faced with two problems:
6876 * (1) There might be a skew between the CPU, so that cpu0
6877 * returns a TSC that is slightly different from cpu1.
6878 * (2) Power management (and other things) may cause the TSC
6879 * to run at a non-constant speed, and cause the speed
6880 * to be different on the cpus. This will result in (1).
6881 *
6882 * So, on SMP systems we'll have to select the ASYNC update method
6883 * if there are symptoms of these problems.
6884 */
6885 if (RTMpGetCount() > 1)
6886 {
6887 uint32_t uEAX, uEBX, uECX, uEDX;
6888 uint64_t u64DiffCoresIgnored;
6889
6890 /* Permit the user and/or the OS specific bits to force async mode. */
6891 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
6892 return SUPGIPMODE_ASYNC_TSC;
6893
6894 /* Try check for current differences between the cpus. */
6895 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
6896 return SUPGIPMODE_ASYNC_TSC;
6897
6898 /*
6899 * If the CPU supports power management and is an AMD one we
6900 * won't trust it unless it has the TscInvariant bit is set.
6901 */
6902 /* Check for "AuthenticAMD" */
6903 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
6904 if ( uEAX >= 1
6905 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
6906 {
6907 /* Check for APM support and that TscInvariant is cleared. */
6908 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
6909 if (uEAX >= 0x80000007)
6910 {
6911 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
6912 if ( !(uEDX & RT_BIT(8))/* TscInvariant */
6913 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
6914 return SUPGIPMODE_ASYNC_TSC;
6915 }
6916 }
6917 }
6918 return SUPGIPMODE_SYNC_TSC;
6919}
6920
6921
6922/**
6923 * Initializes per-CPU GIP information.
6924 *
6925 * @param pGip Pointer to the read-write kernel mapping of the GIP.
6926 * @param pCpu Pointer to which GIP CPU to initalize.
6927 * @param u64NanoTS The current nanosecond timestamp.
6928 */
6929static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
6930{
6931 pCpu->u32TransactionId = 2;
6932 pCpu->u64NanoTS = u64NanoTS;
6933 pCpu->u64TSC = ASMReadTSC();
6934 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
6935 pCpu->i64TSCDelta = INT64_MAX;
6936
6937 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
6938 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
6939 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
6940 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
6941
6942 /*
6943 * We don't know the following values until we've executed updates.
6944 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
6945 * the 2nd timer callout.
6946 */
6947 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
6948 pCpu->u32UpdateIntervalTSC
6949 = pCpu->au32TSCHistory[0]
6950 = pCpu->au32TSCHistory[1]
6951 = pCpu->au32TSCHistory[2]
6952 = pCpu->au32TSCHistory[3]
6953 = pCpu->au32TSCHistory[4]
6954 = pCpu->au32TSCHistory[5]
6955 = pCpu->au32TSCHistory[6]
6956 = pCpu->au32TSCHistory[7]
6957 = (uint32_t)(_4G / pGip->u32UpdateHz);
6958}
6959
6960
6961/**
6962 * Initializes the GIP data.
6963 *
6964 * @param pDevExt Pointer to the device instance data.
6965 * @param pGip Pointer to the read-write kernel mapping of the GIP.
6966 * @param HCPhys The physical address of the GIP.
6967 * @param u64NanoTS The current nanosecond timestamp.
6968 * @param uUpdateHz The update frequency.
6969 * @param cCpus The CPU count.
6970 */
6971static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
6972 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned cCpus)
6973{
6974 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
6975 unsigned i;
6976#ifdef DEBUG_DARWIN_GIP
6977 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
6978#else
6979 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
6980#endif
6981
6982 /*
6983 * Initialize the structure.
6984 */
6985 memset(pGip, 0, cbGip);
6986 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
6987 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
6988 pGip->u32Mode = supdrvGipDeterminTscMode(pDevExt);
6989 pGip->cCpus = (uint16_t)cCpus;
6990 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
6991 pGip->u32UpdateHz = uUpdateHz;
6992 pGip->u32UpdateIntervalNS = 1000000000 / uUpdateHz;
6993 pGip->u64NanoTSLastUpdateHz = u64NanoTS;
6994 RTCpuSetEmpty(&pGip->OnlineCpuSet);
6995 RTCpuSetEmpty(&pGip->PresentCpuSet);
6996 RTMpGetSet(&pGip->PossibleCpuSet);
6997 pGip->cOnlineCpus = RTMpGetOnlineCount();
6998 pGip->cPresentCpus = RTMpGetPresentCount();
6999 pGip->cPossibleCpus = RTMpGetCount();
7000 pGip->idCpuMax = RTMpGetMaxCpuId();
7001 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7002 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7003 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7004 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7005
7006 for (i = 0; i < cCpus; i++)
7007 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
7008
7009 /*
7010 * Link it to the device extension.
7011 */
7012 pDevExt->pGip = pGip;
7013 pDevExt->HCPhysGip = HCPhys;
7014 pDevExt->cGipUsers = 0;
7015
7016 /*
7017 * Allocate the TSC delta sync. struct. on a separate cache line.
7018 */
7019 g_pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
7020 g_pTscDeltaSync = RT_ALIGN_PT(g_pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
7021 Assert(RT_ALIGN_PT(g_pTscDeltaSync, 64, PSUPTSCDELTASYNC) == g_pTscDeltaSync);
7022}
7023
7024
7025/**
7026 * On CPU initialization callback for RTMpOnAll.
7027 *
7028 * @param idCpu The CPU ID.
7029 * @param pvUser1 The device extension.
7030 * @param pvUser2 The GIP.
7031 */
7032static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7033{
7034 /* This is good enough, even though it will update some of the globals a
7035 bit to much. */
7036 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7037}
7038
7039
7040/**
7041 * Invalidates the GIP data upon termination.
7042 *
7043 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7044 */
7045static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7046{
7047 unsigned i;
7048 pGip->u32Magic = 0;
7049 for (i = 0; i < pGip->cCpus; i++)
7050 {
7051 pGip->aCPUs[i].u64NanoTS = 0;
7052 pGip->aCPUs[i].u64TSC = 0;
7053 pGip->aCPUs[i].iTSCHistoryHead = 0;
7054 pGip->aCPUs[i].u64TSCSample = 0;
7055 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7056 }
7057
7058 if (g_pvTscDeltaSync)
7059 {
7060 RTMemFree(g_pvTscDeltaSync);
7061 g_pTscDeltaSync = NULL;
7062 g_pvTscDeltaSync = NULL;
7063 }
7064}
7065
7066
7067/**
7068 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
7069 * updates all the per cpu data except the transaction id.
7070 *
7071 * @param pDevExt The device extension.
7072 * @param pGipCpu Pointer to the per cpu data.
7073 * @param u64NanoTS The current time stamp.
7074 * @param u64TSC The current TSC.
7075 * @param iTick The current timer tick.
7076 */
7077static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7078{
7079 uint64_t u64TSCDelta;
7080 uint32_t u32UpdateIntervalTSC;
7081 uint32_t u32UpdateIntervalTSCSlack;
7082 unsigned iTSCHistoryHead;
7083 uint64_t u64CpuHz;
7084 uint32_t u32TransactionId;
7085
7086 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7087 AssertPtrReturnVoid(pGip);
7088
7089 /* Delta between this and the previous update. */
7090 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7091
7092 /*
7093 * Update the NanoTS.
7094 */
7095 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
7096
7097 /*
7098 * Calc TSC delta.
7099 */
7100 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
7101 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
7102 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
7103
7104 if (u64TSCDelta >> 32)
7105 {
7106 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
7107 pGipCpu->cErrors++;
7108 }
7109
7110 /*
7111 * On the 2nd and 3rd callout, reset the history with the current TSC
7112 * interval since the values entered by supdrvGipInit are totally off.
7113 * The interval on the 1st callout completely unreliable, the 2nd is a bit
7114 * better, while the 3rd should be most reliable.
7115 */
7116 u32TransactionId = pGipCpu->u32TransactionId;
7117 if (RT_UNLIKELY( ( u32TransactionId == 5
7118 || u32TransactionId == 7)
7119 && ( iTick == 2
7120 || iTick == 3) ))
7121 {
7122 unsigned i;
7123 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
7124 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
7125 }
7126
7127 /*
7128 * TSC History.
7129 */
7130 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
7131 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
7132 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
7133 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
7134
7135 /*
7136 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
7137 */
7138 if (pGip->u32UpdateHz >= 1000)
7139 {
7140 uint32_t u32;
7141 u32 = pGipCpu->au32TSCHistory[0];
7142 u32 += pGipCpu->au32TSCHistory[1];
7143 u32 += pGipCpu->au32TSCHistory[2];
7144 u32 += pGipCpu->au32TSCHistory[3];
7145 u32 >>= 2;
7146 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
7147 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
7148 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
7149 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
7150 u32UpdateIntervalTSC >>= 2;
7151 u32UpdateIntervalTSC += u32;
7152 u32UpdateIntervalTSC >>= 1;
7153
7154 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11, . */
7155 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
7156 }
7157 else if (pGip->u32UpdateHz >= 90)
7158 {
7159 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7160 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
7161 u32UpdateIntervalTSC >>= 1;
7162
7163 /* value chosen on a 2GHz thinkpad running windows */
7164 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
7165 }
7166 else
7167 {
7168 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7169
7170 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
7171 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
7172 }
7173 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
7174
7175 /*
7176 * CpuHz.
7177 */
7178 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, pGip->u32UpdateHz);
7179 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
7180}
7181
7182
7183/**
7184 * Updates the GIP.
7185 *
7186 * @param pDevExt The device extension.
7187 * @param u64NanoTS The current nanosecond timesamp.
7188 * @param u64TSC The current TSC timesamp.
7189 * @param idCpu The CPU ID.
7190 * @param iTick The current timer tick.
7191 */
7192static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
7193{
7194 /*
7195 * Determine the relevant CPU data.
7196 */
7197 PSUPGIPCPU pGipCpu;
7198 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7199 AssertPtrReturnVoid(pGip);
7200
7201 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7202 pGipCpu = &pGip->aCPUs[0];
7203 else
7204 {
7205 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
7206 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
7207 return;
7208 pGipCpu = &pGip->aCPUs[iCpu];
7209 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
7210 return;
7211 }
7212
7213 /*
7214 * Start update transaction.
7215 */
7216 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7217 {
7218 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
7219 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7220 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7221 pGipCpu->cErrors++;
7222 return;
7223 }
7224
7225 /*
7226 * Recalc the update frequency every 0x800th time.
7227 */
7228 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
7229 {
7230 if (pGip->u64NanoTSLastUpdateHz)
7231 {
7232#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
7233 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
7234 uint32_t u32UpdateHz = (uint32_t)((UINT64_C(1000000000) * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
7235 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
7236 {
7237 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
7238 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, 1000000000 / u32UpdateHz);
7239 }
7240#endif
7241 }
7242 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS);
7243 }
7244
7245 /*
7246 * Update the data.
7247 */
7248 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7249
7250 /*
7251 * Complete transaction.
7252 */
7253 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7254}
7255
7256
7257/**
7258 * Updates the per cpu GIP data for the calling cpu.
7259 *
7260 * @param pDevExt The device extension.
7261 * @param u64NanoTS The current nanosecond timesamp.
7262 * @param u64TSC The current TSC timesamp.
7263 * @param idCpu The CPU ID.
7264 * @param idApic The APIC id for the CPU index.
7265 * @param iTick The current timer tick.
7266 */
7267static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
7268 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
7269{
7270 uint32_t iCpu;
7271 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7272
7273 /*
7274 * Avoid a potential race when a CPU online notification doesn't fire on
7275 * the onlined CPU but the tick creeps in before the event notification is
7276 * run.
7277 */
7278 if (RT_UNLIKELY(iTick == 1))
7279 {
7280 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7281 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
7282 supdrvGipMpEventOnline(pDevExt, idCpu);
7283 }
7284
7285 iCpu = pGip->aiCpuFromApicId[idApic];
7286 if (RT_LIKELY(iCpu < pGip->cCpus))
7287 {
7288 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7289 if (pGipCpu->idCpu == idCpu)
7290 {
7291 /*
7292 * Start update transaction.
7293 */
7294 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7295 {
7296 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7297 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7298 pGipCpu->cErrors++;
7299 return;
7300 }
7301
7302 /*
7303 * Update the data.
7304 */
7305 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7306
7307 /*
7308 * Complete transaction.
7309 */
7310 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7311 }
7312 }
7313}
7314
7315/**
7316 * Resume built-in keyboard on MacBook Air and Pro hosts.
7317 * If there is no built-in keyboard device, return success anyway.
7318 *
7319 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
7320 */
7321static int supdrvIOCtl_ResumeSuspendedKbds(void)
7322{
7323#if defined(RT_OS_DARWIN)
7324 return supdrvDarwinResumeSuspendedKbds();
7325#else
7326 return VERR_NOT_IMPLEMENTED;
7327#endif
7328}
7329
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette