VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 53226

Last change on this file since 53226 was 53226, checked in by vboxsync, 10 years ago

HostDrivers/Support: Recalc freq. casting.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 284.2 KB
Line 
1/* $Id: SUPDrv.c 53226 2014-11-05 10:56:59Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/x86.h>
58
59#include <VBox/param.h>
60#include <VBox/log.h>
61#include <VBox/err.h>
62#include <VBox/vmm/hm_svm.h>
63#include <VBox/vmm/hm_vmx.h>
64
65#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
66# include "dtrace/SUPDrv.h"
67#else
68# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
69# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
70# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
71# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
72#endif
73
74/*
75 * Logging assignments:
76 * Log - useful stuff, like failures.
77 * LogFlow - program flow, except the really noisy bits.
78 * Log2 - Cleanup.
79 * Log3 - Loader flow noise.
80 * Log4 - Call VMMR0 flow noise.
81 * Log5 - Native yet-to-be-defined noise.
82 * Log6 - Native ioctl flow noise.
83 *
84 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
85 * instantiation in log-vbox.c(pp).
86 */
87
88
89/*******************************************************************************
90* Defined Constants And Macros *
91*******************************************************************************/
92/** The frequency by which we recalculate the u32UpdateHz and
93 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
94 *
95 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
96 */
97#define GIP_UPDATEHZ_RECALC_FREQ 0x800
98
99/** A reserved TSC value used for synchronization as well as measurement of
100 * TSC deltas. */
101#define GIP_TSC_DELTA_RSVD UINT64_MAX
102/** The number of TSC delta measurement loops in total (includes primer and
103 * read-time loops). */
104#define GIP_TSC_DELTA_LOOPS 96
105/** The number of cache primer loops. */
106#define GIP_TSC_DELTA_PRIMER_LOOPS 4
107/** The number of loops until we keep computing the minumum read time. */
108#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
109/** Stop measurement of TSC delta. */
110#define GIP_TSC_DELTA_SYNC_STOP 0
111/** Start measurement of TSC delta. */
112#define GIP_TSC_DELTA_SYNC_START 1
113/** Worker thread is ready for reading the TSC. */
114#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
115/** Worker thread is done updating TSC delta info. */
116#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
117/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
118 * with a timeout. */
119#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
120/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
121 * master with a timeout. */
122#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
123
124AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
125AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
126
127/** @def VBOX_SVN_REV
128 * The makefile should define this if it can. */
129#ifndef VBOX_SVN_REV
130# define VBOX_SVN_REV 0
131#endif
132
133#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
134# define DO_NOT_START_GIP
135#endif
136
137
138/*******************************************************************************
139* Internal Functions *
140*******************************************************************************/
141static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
142static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
143static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
144static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
145static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
146static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
147static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
148static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
149static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
150static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
151static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
152static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
153static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
154DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
155DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
156static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
157static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
158static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
159static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq);
160static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq);
161static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
162static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
163static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
164static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
165static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
166static bool supdrvIsInvariantTsc(void);
167static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
168 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
169static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
170static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
171static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
172static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
173 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
174static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
175static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
176static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
177static int supdrvIOCtl_ResumeSuspendedKbds(void);
178
179
180/*******************************************************************************
181* Global Variables *
182*******************************************************************************/
183DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
184
185/**
186 * The TSC delta synchronization struct. rounded to cache line size.
187 */
188typedef union SUPTSCDELTASYNC
189{
190 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
191 volatile uint32_t u;
192 /** Padding to cache line size. */
193 uint8_t u8Padding[64];
194} SUPTSCDELTASYNC;
195AssertCompileSize(SUPTSCDELTASYNC, 64);
196typedef SUPTSCDELTASYNC *PSUPTSCDELTASYNC;
197
198/** Pointer to the TSC delta sync. struct. */
199static void *g_pvTscDeltaSync;
200/** Aligned pointer to the TSC delta sync. struct. */
201static PSUPTSCDELTASYNC g_pTscDeltaSync;
202/** The TSC delta measurement initiator Cpu Id. */
203static volatile RTCPUID g_idTscDeltaInitiator = NIL_RTCPUID;
204/** Number of online/offline events, incremented each time a CPU goes online
205 * or offline. */
206static volatile uint32_t g_cMpOnOffEvents;
207
208/**
209 * Array of the R0 SUP API.
210 */
211static SUPFUNC g_aFunctions[] =
212{
213/* SED: START */
214 /* name function */
215 /* Entries with absolute addresses determined at runtime, fixup
216 code makes ugly ASSUMPTIONS about the order here: */
217 { "SUPR0AbsIs64bit", (void *)0 },
218 { "SUPR0Abs64bitKernelCS", (void *)0 },
219 { "SUPR0Abs64bitKernelSS", (void *)0 },
220 { "SUPR0Abs64bitKernelDS", (void *)0 },
221 { "SUPR0AbsKernelCS", (void *)0 },
222 { "SUPR0AbsKernelSS", (void *)0 },
223 { "SUPR0AbsKernelDS", (void *)0 },
224 { "SUPR0AbsKernelES", (void *)0 },
225 { "SUPR0AbsKernelFS", (void *)0 },
226 { "SUPR0AbsKernelGS", (void *)0 },
227 /* Normal function pointers: */
228 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
229 { "SUPGetGIP", (void *)SUPGetGIP },
230 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
231 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
232 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
233 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
234 { "SUPR0ContFree", (void *)SUPR0ContFree },
235 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
236 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
237 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
238 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
239 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
240 { "SUPR0LockMem", (void *)SUPR0LockMem },
241 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
242 { "SUPR0LowFree", (void *)SUPR0LowFree },
243 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
244 { "SUPR0MemFree", (void *)SUPR0MemFree },
245 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
246 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
247 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
248 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
249 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
250 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
251 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
252 { "SUPR0PageFree", (void *)SUPR0PageFree },
253 { "SUPR0Printf", (void *)SUPR0Printf },
254 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
255 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
256 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
257 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
258 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
259 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
260 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
261 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
262 { "SUPSemEventClose", (void *)SUPSemEventClose },
263 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
264 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
265 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
266 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
267 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
268 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
269 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
270 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
271 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
272 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
273 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
274 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
275 { "SUPSemEventWait", (void *)SUPSemEventWait },
276 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
277 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
278 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
279
280 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
281 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
282 { "RTAssertMsg1", (void *)RTAssertMsg1 },
283 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
284 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
285 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
286 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
287 { "RTCrc32", (void *)RTCrc32 },
288 { "RTCrc32Finish", (void *)RTCrc32Finish },
289 { "RTCrc32Process", (void *)RTCrc32Process },
290 { "RTCrc32Start", (void *)RTCrc32Start },
291 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
292 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
293 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
294 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
295 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
296 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
297 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
298 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
299 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
300 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
301 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
302 { "RTLogPrintfV", (void *)RTLogPrintfV },
303 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
304 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
305 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
306 { "RTMemAllocTag", (void *)RTMemAllocTag },
307 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
308 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
309 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
310 { "RTMemDupExTag", (void *)RTMemDupExTag },
311 { "RTMemDupTag", (void *)RTMemDupTag },
312 { "RTMemFree", (void *)RTMemFree },
313 { "RTMemFreeEx", (void *)RTMemFreeEx },
314 { "RTMemReallocTag", (void *)RTMemReallocTag },
315 { "RTMpCpuId", (void *)RTMpCpuId },
316 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
317 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
318 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
319 { "RTMpGetCount", (void *)RTMpGetCount },
320 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
321 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
322 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
323 { "RTMpGetSet", (void *)RTMpGetSet },
324 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
325 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
326 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
327 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
328 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
329 { "RTMpOnAll", (void *)RTMpOnAll },
330 { "RTMpOnOthers", (void *)RTMpOnOthers },
331 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
332 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
333 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
334 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
335 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
336 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
337 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
338 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
339 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
340 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
341 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
342 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
343 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
344 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
345 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
346 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
347 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
348 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
349 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
350 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
351 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
352 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
353 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
354 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
355 { "RTProcSelf", (void *)RTProcSelf },
356 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
357 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
358 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
359 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
360 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
361 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
362 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
363 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
364 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
365 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
366 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
367 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
368 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
369 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
370 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
371 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
372 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
373 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
374 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
375 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
376 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
377 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
378 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
379 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
380 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
381 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
382 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
383 { "RTSemEventCreate", (void *)RTSemEventCreate },
384 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
385 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
386 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
387 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
388 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
389 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
390 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
391 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
392 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
393 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
394 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
395 { "RTSemEventSignal", (void *)RTSemEventSignal },
396 { "RTSemEventWait", (void *)RTSemEventWait },
397 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
398 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
399 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
400 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
401 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
402 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
403 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
404 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
405 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
406 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
407 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
408 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
409 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
410 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
411 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
412 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
413 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
414 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
415 { "RTStrCopy", (void *)RTStrCopy },
416 { "RTStrDupTag", (void *)RTStrDupTag },
417 { "RTStrFormat", (void *)RTStrFormat },
418 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
419 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
420 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
421 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
422 { "RTStrFormatV", (void *)RTStrFormatV },
423 { "RTStrFree", (void *)RTStrFree },
424 { "RTStrNCmp", (void *)RTStrNCmp },
425 { "RTStrPrintf", (void *)RTStrPrintf },
426 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
427 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
428 { "RTStrPrintfV", (void *)RTStrPrintfV },
429 { "RTThreadCreate", (void *)RTThreadCreate },
430 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
431 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
432 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
433 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
434 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
435 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
436 { "RTThreadGetName", (void *)RTThreadGetName },
437 { "RTThreadGetNative", (void *)RTThreadGetNative },
438 { "RTThreadGetType", (void *)RTThreadGetType },
439 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
440 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
441 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
442 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
443 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
444 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
445 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
446 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
447 { "RTThreadSelf", (void *)RTThreadSelf },
448 { "RTThreadSelfName", (void *)RTThreadSelfName },
449 { "RTThreadSleep", (void *)RTThreadSleep },
450 { "RTThreadUserReset", (void *)RTThreadUserReset },
451 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
452 { "RTThreadUserWait", (void *)RTThreadUserWait },
453 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
454 { "RTThreadWait", (void *)RTThreadWait },
455 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
456 { "RTThreadYield", (void *)RTThreadYield },
457 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
458 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
459 { "RTTimeNow", (void *)RTTimeNow },
460 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
461 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
462 { "RTTimerCreate", (void *)RTTimerCreate },
463 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
464 { "RTTimerDestroy", (void *)RTTimerDestroy },
465 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
466 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
467 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
468 { "RTTimerStart", (void *)RTTimerStart },
469 { "RTTimerStop", (void *)RTTimerStop },
470 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
471 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
472 { "RTUuidCompare", (void *)RTUuidCompare },
473 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
474 { "RTUuidFromStr", (void *)RTUuidFromStr },
475/* SED: END */
476};
477
478#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
479/**
480 * Drag in the rest of IRPT since we share it with the
481 * rest of the kernel modules on darwin.
482 */
483PFNRT g_apfnVBoxDrvIPRTDeps[] =
484{
485 /* VBoxNetAdp */
486 (PFNRT)RTRandBytes,
487 /* VBoxUSB */
488 (PFNRT)RTPathStripFilename,
489 NULL
490};
491#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
492
493
494/**
495 * Initializes the device extentsion structure.
496 *
497 * @returns IPRT status code.
498 * @param pDevExt The device extension to initialize.
499 * @param cbSession The size of the session structure. The size of
500 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
501 * defined because we're skipping the OS specific members
502 * then.
503 */
504int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
505{
506 int rc;
507
508#ifdef SUPDRV_WITH_RELEASE_LOGGER
509 /*
510 * Create the release log.
511 */
512 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
513 PRTLOGGER pRelLogger;
514 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
515 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
516 if (RT_SUCCESS(rc))
517 RTLogRelSetDefaultInstance(pRelLogger);
518 /** @todo Add native hook for getting logger config parameters and setting
519 * them. On linux we should use the module parameter stuff... */
520#endif
521
522 /*
523 * Initialize it.
524 */
525 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
526 pDevExt->Spinlock = NIL_RTSPINLOCK;
527 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
528 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
529 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
530 if (RT_SUCCESS(rc))
531 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
532 if (RT_SUCCESS(rc))
533 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
534
535 if (RT_SUCCESS(rc))
536#ifdef SUPDRV_USE_MUTEX_FOR_LDR
537 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
538#else
539 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
540#endif
541 if (RT_SUCCESS(rc))
542 {
543 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
544 if (RT_SUCCESS(rc))
545 {
546#ifdef SUPDRV_USE_MUTEX_FOR_LDR
547 rc = RTSemMutexCreate(&pDevExt->mtxGip);
548#else
549 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
550#endif
551 if (RT_SUCCESS(rc))
552 {
553 rc = supdrvGipCreate(pDevExt);
554 if (RT_SUCCESS(rc))
555 {
556 rc = supdrvTracerInit(pDevExt);
557 if (RT_SUCCESS(rc))
558 {
559 pDevExt->pLdrInitImage = NULL;
560 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
561 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
562 pDevExt->cbSession = (uint32_t)cbSession;
563
564 /*
565 * Fixup the absolute symbols.
566 *
567 * Because of the table indexing assumptions we'll have a little #ifdef orgy
568 * here rather than distributing this to OS specific files. At least for now.
569 */
570#ifdef RT_OS_DARWIN
571# if ARCH_BITS == 32
572 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
573 {
574 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
575 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
576 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
577 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
578 }
579 else
580 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
581 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
582 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
583 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
584 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
585 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
586 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
587# else /* 64-bit darwin: */
588 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
589 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
590 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
591 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
592 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
593 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
594 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
595 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
596 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
597 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
598
599# endif
600#else /* !RT_OS_DARWIN */
601# if ARCH_BITS == 64
602 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
603 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
604 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
605 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
606# else
607 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
608# endif
609 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
610 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
611 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
612 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
613 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
614 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
615#endif /* !RT_OS_DARWIN */
616 return VINF_SUCCESS;
617 }
618
619 supdrvGipDestroy(pDevExt);
620 }
621
622#ifdef SUPDRV_USE_MUTEX_FOR_GIP
623 RTSemMutexDestroy(pDevExt->mtxGip);
624 pDevExt->mtxGip = NIL_RTSEMMUTEX;
625#else
626 RTSemFastMutexDestroy(pDevExt->mtxGip);
627 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
628#endif
629 }
630 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
631 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
632 }
633#ifdef SUPDRV_USE_MUTEX_FOR_LDR
634 RTSemMutexDestroy(pDevExt->mtxLdr);
635 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
636#else
637 RTSemFastMutexDestroy(pDevExt->mtxLdr);
638 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
639#endif
640 }
641
642 RTSpinlockDestroy(pDevExt->Spinlock);
643 pDevExt->Spinlock = NIL_RTSPINLOCK;
644 RTSpinlockDestroy(pDevExt->hGipSpinlock);
645 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
646 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
647 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
648
649#ifdef SUPDRV_WITH_RELEASE_LOGGER
650 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
651 RTLogDestroy(RTLogSetDefaultInstance(NULL));
652#endif
653
654 return rc;
655}
656
657
658/**
659 * Delete the device extension (e.g. cleanup members).
660 *
661 * @param pDevExt The device extension to delete.
662 */
663void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
664{
665 PSUPDRVOBJ pObj;
666 PSUPDRVUSAGE pUsage;
667
668 /*
669 * Kill mutexes and spinlocks.
670 */
671#ifdef SUPDRV_USE_MUTEX_FOR_GIP
672 RTSemMutexDestroy(pDevExt->mtxGip);
673 pDevExt->mtxGip = NIL_RTSEMMUTEX;
674#else
675 RTSemFastMutexDestroy(pDevExt->mtxGip);
676 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
677#endif
678#ifdef SUPDRV_USE_MUTEX_FOR_LDR
679 RTSemMutexDestroy(pDevExt->mtxLdr);
680 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
681#else
682 RTSemFastMutexDestroy(pDevExt->mtxLdr);
683 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
684#endif
685 RTSpinlockDestroy(pDevExt->Spinlock);
686 pDevExt->Spinlock = NIL_RTSPINLOCK;
687 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
688 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
689 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
690 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
691
692 /*
693 * Free lists.
694 */
695 /* objects. */
696 pObj = pDevExt->pObjs;
697 Assert(!pObj); /* (can trigger on forced unloads) */
698 pDevExt->pObjs = NULL;
699 while (pObj)
700 {
701 void *pvFree = pObj;
702 pObj = pObj->pNext;
703 RTMemFree(pvFree);
704 }
705
706 /* usage records. */
707 pUsage = pDevExt->pUsageFree;
708 pDevExt->pUsageFree = NULL;
709 while (pUsage)
710 {
711 void *pvFree = pUsage;
712 pUsage = pUsage->pNext;
713 RTMemFree(pvFree);
714 }
715
716 /* kill the GIP. */
717 supdrvGipDestroy(pDevExt);
718 RTSpinlockDestroy(pDevExt->hGipSpinlock);
719 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
720
721 supdrvTracerTerm(pDevExt);
722
723#ifdef SUPDRV_WITH_RELEASE_LOGGER
724 /* destroy the loggers. */
725 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
726 RTLogDestroy(RTLogSetDefaultInstance(NULL));
727#endif
728}
729
730
731/**
732 * Create session.
733 *
734 * @returns IPRT status code.
735 * @param pDevExt Device extension.
736 * @param fUser Flag indicating whether this is a user or kernel
737 * session.
738 * @param fUnrestricted Unrestricted access (system) or restricted access
739 * (user)?
740 * @param ppSession Where to store the pointer to the session data.
741 */
742int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
743{
744 int rc;
745 PSUPDRVSESSION pSession;
746
747 if (!SUP_IS_DEVEXT_VALID(pDevExt))
748 return VERR_INVALID_PARAMETER;
749
750 /*
751 * Allocate memory for the session data.
752 */
753 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
754 if (pSession)
755 {
756 /* Initialize session data. */
757 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
758 if (!rc)
759 {
760 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
761 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
762 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
763 if (RT_SUCCESS(rc))
764 {
765 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
766 pSession->pDevExt = pDevExt;
767 pSession->u32Cookie = BIRD_INV;
768 pSession->fUnrestricted = fUnrestricted;
769 /*pSession->fInHashTable = false; */
770 pSession->cRefs = 1;
771 /*pSession->pCommonNextHash = NULL;
772 pSession->ppOsSessionPtr = NULL; */
773 if (fUser)
774 {
775 pSession->Process = RTProcSelf();
776 pSession->R0Process = RTR0ProcHandleSelf();
777 }
778 else
779 {
780 pSession->Process = NIL_RTPROCESS;
781 pSession->R0Process = NIL_RTR0PROCESS;
782 }
783 /*pSession->pLdrUsage = NULL;
784 pSession->pVM = NULL;
785 pSession->pUsage = NULL;
786 pSession->pGip = NULL;
787 pSession->fGipReferenced = false;
788 pSession->Bundle.cUsed = 0; */
789 pSession->Uid = NIL_RTUID;
790 pSession->Gid = NIL_RTGID;
791 /*pSession->uTracerData = 0;*/
792 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
793 RTListInit(&pSession->TpProviders);
794 /*pSession->cTpProviders = 0;*/
795 /*pSession->cTpProbesFiring = 0;*/
796 RTListInit(&pSession->TpUmods);
797 /*RT_ZERO(pSession->apTpLookupTable);*/
798
799 VBOXDRV_SESSION_CREATE(pSession, fUser);
800 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
801 return VINF_SUCCESS;
802 }
803
804 RTSpinlockDestroy(pSession->Spinlock);
805 }
806 RTMemFree(pSession);
807 *ppSession = NULL;
808 Log(("Failed to create spinlock, rc=%d!\n", rc));
809 }
810 else
811 rc = VERR_NO_MEMORY;
812
813 return rc;
814}
815
816
817/**
818 * Cleans up the session in the context of the process to which it belongs, the
819 * caller will free the session and the session spinlock.
820 *
821 * This should normally occur when the session is closed or as the process
822 * exits. Careful reference counting in the OS specfic code makes sure that
823 * there cannot be any races between process/handle cleanup callbacks and
824 * threads doing I/O control calls.
825 *
826 * @param pDevExt The device extension.
827 * @param pSession Session data.
828 */
829static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
830{
831 int rc;
832 PSUPDRVBUNDLE pBundle;
833 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
834
835 Assert(!pSession->fInHashTable);
836 Assert(!pSession->ppOsSessionPtr);
837 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
838 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
839
840 /*
841 * Remove logger instances related to this session.
842 */
843 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
844
845 /*
846 * Destroy the handle table.
847 */
848 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
849 AssertRC(rc);
850 pSession->hHandleTable = NIL_RTHANDLETABLE;
851
852 /*
853 * Release object references made in this session.
854 * In theory there should be noone racing us in this session.
855 */
856 Log2(("release objects - start\n"));
857 if (pSession->pUsage)
858 {
859 PSUPDRVUSAGE pUsage;
860 RTSpinlockAcquire(pDevExt->Spinlock);
861
862 while ((pUsage = pSession->pUsage) != NULL)
863 {
864 PSUPDRVOBJ pObj = pUsage->pObj;
865 pSession->pUsage = pUsage->pNext;
866
867 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
868 if (pUsage->cUsage < pObj->cUsage)
869 {
870 pObj->cUsage -= pUsage->cUsage;
871 RTSpinlockRelease(pDevExt->Spinlock);
872 }
873 else
874 {
875 /* Destroy the object and free the record. */
876 if (pDevExt->pObjs == pObj)
877 pDevExt->pObjs = pObj->pNext;
878 else
879 {
880 PSUPDRVOBJ pObjPrev;
881 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
882 if (pObjPrev->pNext == pObj)
883 {
884 pObjPrev->pNext = pObj->pNext;
885 break;
886 }
887 Assert(pObjPrev);
888 }
889 RTSpinlockRelease(pDevExt->Spinlock);
890
891 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
892 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
893 if (pObj->pfnDestructor)
894 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
895 RTMemFree(pObj);
896 }
897
898 /* free it and continue. */
899 RTMemFree(pUsage);
900
901 RTSpinlockAcquire(pDevExt->Spinlock);
902 }
903
904 RTSpinlockRelease(pDevExt->Spinlock);
905 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
906 }
907 Log2(("release objects - done\n"));
908
909 /*
910 * Do tracer cleanups related to this session.
911 */
912 Log2(("release tracer stuff - start\n"));
913 supdrvTracerCleanupSession(pDevExt, pSession);
914 Log2(("release tracer stuff - end\n"));
915
916 /*
917 * Release memory allocated in the session.
918 *
919 * We do not serialize this as we assume that the application will
920 * not allocated memory while closing the file handle object.
921 */
922 Log2(("freeing memory:\n"));
923 pBundle = &pSession->Bundle;
924 while (pBundle)
925 {
926 PSUPDRVBUNDLE pToFree;
927 unsigned i;
928
929 /*
930 * Check and unlock all entries in the bundle.
931 */
932 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
933 {
934 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
935 {
936 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
937 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
938 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
939 {
940 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
941 AssertRC(rc); /** @todo figure out how to handle this. */
942 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
943 }
944 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
945 AssertRC(rc); /** @todo figure out how to handle this. */
946 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
947 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
948 }
949 }
950
951 /*
952 * Advance and free previous bundle.
953 */
954 pToFree = pBundle;
955 pBundle = pBundle->pNext;
956
957 pToFree->pNext = NULL;
958 pToFree->cUsed = 0;
959 if (pToFree != &pSession->Bundle)
960 RTMemFree(pToFree);
961 }
962 Log2(("freeing memory - done\n"));
963
964 /*
965 * Deregister component factories.
966 */
967 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
968 Log2(("deregistering component factories:\n"));
969 if (pDevExt->pComponentFactoryHead)
970 {
971 PSUPDRVFACTORYREG pPrev = NULL;
972 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
973 while (pCur)
974 {
975 if (pCur->pSession == pSession)
976 {
977 /* unlink it */
978 PSUPDRVFACTORYREG pNext = pCur->pNext;
979 if (pPrev)
980 pPrev->pNext = pNext;
981 else
982 pDevExt->pComponentFactoryHead = pNext;
983
984 /* free it */
985 pCur->pNext = NULL;
986 pCur->pSession = NULL;
987 pCur->pFactory = NULL;
988 RTMemFree(pCur);
989
990 /* next */
991 pCur = pNext;
992 }
993 else
994 {
995 /* next */
996 pPrev = pCur;
997 pCur = pCur->pNext;
998 }
999 }
1000 }
1001 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
1002 Log2(("deregistering component factories - done\n"));
1003
1004 /*
1005 * Loaded images needs to be dereferenced and possibly freed up.
1006 */
1007 supdrvLdrLock(pDevExt);
1008 Log2(("freeing images:\n"));
1009 if (pSession->pLdrUsage)
1010 {
1011 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
1012 pSession->pLdrUsage = NULL;
1013 while (pUsage)
1014 {
1015 void *pvFree = pUsage;
1016 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1017 if (pImage->cUsage > pUsage->cUsage)
1018 pImage->cUsage -= pUsage->cUsage;
1019 else
1020 supdrvLdrFree(pDevExt, pImage);
1021 pUsage->pImage = NULL;
1022 pUsage = pUsage->pNext;
1023 RTMemFree(pvFree);
1024 }
1025 }
1026 supdrvLdrUnlock(pDevExt);
1027 Log2(("freeing images - done\n"));
1028
1029 /*
1030 * Unmap the GIP.
1031 */
1032 Log2(("umapping GIP:\n"));
1033 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1034 {
1035 SUPR0GipUnmap(pSession);
1036 pSession->fGipReferenced = 0;
1037 }
1038 Log2(("umapping GIP - done\n"));
1039}
1040
1041
1042/**
1043 * Common code for freeing a session when the reference count reaches zero.
1044 *
1045 * @param pDevExt Device extension.
1046 * @param pSession Session data.
1047 * This data will be freed by this routine.
1048 */
1049static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1050{
1051 VBOXDRV_SESSION_CLOSE(pSession);
1052
1053 /*
1054 * Cleanup the session first.
1055 */
1056 supdrvCleanupSession(pDevExt, pSession);
1057 supdrvOSCleanupSession(pDevExt, pSession);
1058
1059 /*
1060 * Free the rest of the session stuff.
1061 */
1062 RTSpinlockDestroy(pSession->Spinlock);
1063 pSession->Spinlock = NIL_RTSPINLOCK;
1064 pSession->pDevExt = NULL;
1065 RTMemFree(pSession);
1066 LogFlow(("supdrvDestroySession: returns\n"));
1067}
1068
1069
1070/**
1071 * Inserts the session into the global hash table.
1072 *
1073 * @retval VINF_SUCCESS on success.
1074 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1075 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1076 * session (asserted).
1077 * @retval VERR_DUPLICATE if there is already a session for that pid.
1078 *
1079 * @param pDevExt The device extension.
1080 * @param pSession The session.
1081 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1082 * available and used. This will set to point to the
1083 * session while under the protection of the session
1084 * hash table spinlock. It will also be kept in
1085 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1086 * cleanup use.
1087 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1088 */
1089int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1090 void *pvUser)
1091{
1092 PSUPDRVSESSION pCur;
1093 unsigned iHash;
1094
1095 /*
1096 * Validate input.
1097 */
1098 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1099 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1100
1101 /*
1102 * Calculate the hash table index and acquire the spinlock.
1103 */
1104 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1105
1106 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1107
1108 /*
1109 * If there are a collisions, we need to carefully check if we got a
1110 * duplicate. There can only be one open session per process.
1111 */
1112 pCur = pDevExt->apSessionHashTab[iHash];
1113 if (pCur)
1114 {
1115 while (pCur && pCur->Process != pSession->Process)
1116 pCur = pCur->pCommonNextHash;
1117
1118 if (pCur)
1119 {
1120 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1121 if (pCur == pSession)
1122 {
1123 Assert(pSession->fInHashTable);
1124 AssertFailed();
1125 return VERR_WRONG_ORDER;
1126 }
1127 Assert(!pSession->fInHashTable);
1128 if (pCur->R0Process == pSession->R0Process)
1129 return VERR_RESOURCE_IN_USE;
1130 return VERR_DUPLICATE;
1131 }
1132 }
1133 Assert(!pSession->fInHashTable);
1134 Assert(!pSession->ppOsSessionPtr);
1135
1136 /*
1137 * Insert it, doing a callout to the OS specific code in case it has
1138 * anything it wishes to do while we're holding the spinlock.
1139 */
1140 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1141 pDevExt->apSessionHashTab[iHash] = pSession;
1142 pSession->fInHashTable = true;
1143 ASMAtomicIncS32(&pDevExt->cSessions);
1144
1145 pSession->ppOsSessionPtr = ppOsSessionPtr;
1146 if (ppOsSessionPtr)
1147 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1148
1149 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1150
1151 /*
1152 * Retain a reference for the pointer in the session table.
1153 */
1154 ASMAtomicIncU32(&pSession->cRefs);
1155
1156 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1157 return VINF_SUCCESS;
1158}
1159
1160
1161/**
1162 * Removes the session from the global hash table.
1163 *
1164 * @retval VINF_SUCCESS on success.
1165 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1166 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1167 * session (asserted).
1168 *
1169 * @param pDevExt The device extension.
1170 * @param pSession The session. The caller is expected to have a reference
1171 * to this so it won't croak on us when we release the hash
1172 * table reference.
1173 * @param pvUser OS specific context value for the
1174 * supdrvOSSessionHashTabInserted callback.
1175 */
1176int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1177{
1178 PSUPDRVSESSION pCur;
1179 unsigned iHash;
1180 int32_t cRefs;
1181
1182 /*
1183 * Validate input.
1184 */
1185 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1186 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1187
1188 /*
1189 * Calculate the hash table index and acquire the spinlock.
1190 */
1191 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1192
1193 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1194
1195 /*
1196 * Unlink it.
1197 */
1198 pCur = pDevExt->apSessionHashTab[iHash];
1199 if (pCur == pSession)
1200 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1201 else
1202 {
1203 PSUPDRVSESSION pPrev = pCur;
1204 while (pCur && pCur != pSession)
1205 {
1206 pPrev = pCur;
1207 pCur = pCur->pCommonNextHash;
1208 }
1209 if (pCur)
1210 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1211 else
1212 {
1213 Assert(!pSession->fInHashTable);
1214 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1215 return VERR_NOT_FOUND;
1216 }
1217 }
1218
1219 pSession->pCommonNextHash = NULL;
1220 pSession->fInHashTable = false;
1221
1222 ASMAtomicDecS32(&pDevExt->cSessions);
1223
1224 /*
1225 * Clear OS specific session pointer if available and do the OS callback.
1226 */
1227 if (pSession->ppOsSessionPtr)
1228 {
1229 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1230 pSession->ppOsSessionPtr = NULL;
1231 }
1232
1233 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1234
1235 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1236
1237 /*
1238 * Drop the reference the hash table had to the session. This shouldn't
1239 * be the last reference!
1240 */
1241 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1242 Assert(cRefs > 0 && cRefs < _1M);
1243 if (cRefs == 0)
1244 supdrvDestroySession(pDevExt, pSession);
1245
1246 return VINF_SUCCESS;
1247}
1248
1249
1250/**
1251 * Looks up the session for the current process in the global hash table or in
1252 * OS specific pointer.
1253 *
1254 * @returns Pointer to the session with a reference that the caller must
1255 * release. If no valid session was found, NULL is returned.
1256 *
1257 * @param pDevExt The device extension.
1258 * @param Process The process ID.
1259 * @param R0Process The ring-0 process handle.
1260 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1261 * this is used instead of the hash table. For
1262 * additional safety it must then be equal to the
1263 * SUPDRVSESSION::ppOsSessionPtr member.
1264 * This can be NULL even if the OS has a session
1265 * pointer.
1266 */
1267PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1268 PSUPDRVSESSION *ppOsSessionPtr)
1269{
1270 PSUPDRVSESSION pCur;
1271 unsigned iHash;
1272
1273 /*
1274 * Validate input.
1275 */
1276 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1277
1278 /*
1279 * Calculate the hash table index and acquire the spinlock.
1280 */
1281 iHash = SUPDRV_SESSION_HASH(Process);
1282
1283 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1284
1285 /*
1286 * If an OS session pointer is provided, always use it.
1287 */
1288 if (ppOsSessionPtr)
1289 {
1290 pCur = *ppOsSessionPtr;
1291 if ( pCur
1292 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1293 || pCur->Process != Process
1294 || pCur->R0Process != R0Process) )
1295 pCur = NULL;
1296 }
1297 else
1298 {
1299 /*
1300 * Otherwise, do the hash table lookup.
1301 */
1302 pCur = pDevExt->apSessionHashTab[iHash];
1303 while ( pCur
1304 && ( pCur->Process != Process
1305 || pCur->R0Process != R0Process) )
1306 pCur = pCur->pCommonNextHash;
1307 }
1308
1309 /*
1310 * Retain the session.
1311 */
1312 if (pCur)
1313 {
1314 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1315 NOREF(cRefs);
1316 Assert(cRefs > 1 && cRefs < _1M);
1317 }
1318
1319 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1320
1321 return pCur;
1322}
1323
1324
1325/**
1326 * Retain a session to make sure it doesn't go away while it is in use.
1327 *
1328 * @returns New reference count on success, UINT32_MAX on failure.
1329 * @param pSession Session data.
1330 */
1331uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1332{
1333 uint32_t cRefs;
1334 AssertPtrReturn(pSession, UINT32_MAX);
1335 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1336
1337 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1338 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1339 return cRefs;
1340}
1341
1342
1343/**
1344 * Releases a given session.
1345 *
1346 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1347 * @param pSession Session data.
1348 */
1349uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1350{
1351 uint32_t cRefs;
1352 AssertPtrReturn(pSession, UINT32_MAX);
1353 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1354
1355 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1356 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1357 if (cRefs == 0)
1358 supdrvDestroySession(pSession->pDevExt, pSession);
1359 return cRefs;
1360}
1361
1362
1363/**
1364 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1365 *
1366 * @returns IPRT status code, see SUPR0ObjAddRef.
1367 * @param hHandleTable The handle table handle. Ignored.
1368 * @param pvObj The object pointer.
1369 * @param pvCtx Context, the handle type. Ignored.
1370 * @param pvUser Session pointer.
1371 */
1372static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1373{
1374 NOREF(pvCtx);
1375 NOREF(hHandleTable);
1376 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1377}
1378
1379
1380/**
1381 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1382 *
1383 * @param hHandleTable The handle table handle. Ignored.
1384 * @param h The handle value. Ignored.
1385 * @param pvObj The object pointer.
1386 * @param pvCtx Context, the handle type. Ignored.
1387 * @param pvUser Session pointer.
1388 */
1389static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1390{
1391 NOREF(pvCtx);
1392 NOREF(h);
1393 NOREF(hHandleTable);
1394 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1395}
1396
1397
1398/**
1399 * Fast path I/O Control worker.
1400 *
1401 * @returns VBox status code that should be passed down to ring-3 unchanged.
1402 * @param uIOCtl Function number.
1403 * @param idCpu VMCPU id.
1404 * @param pDevExt Device extention.
1405 * @param pSession Session data.
1406 */
1407int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1408{
1409 /*
1410 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1411 */
1412 if (RT_LIKELY( RT_VALID_PTR(pSession)
1413 && pSession->pVM
1414 && pDevExt->pfnVMMR0EntryFast))
1415 {
1416 switch (uIOCtl)
1417 {
1418 case SUP_IOCTL_FAST_DO_RAW_RUN:
1419 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1420 break;
1421 case SUP_IOCTL_FAST_DO_HM_RUN:
1422 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1423 break;
1424 case SUP_IOCTL_FAST_DO_NOP:
1425 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1426 break;
1427 default:
1428 return VERR_INTERNAL_ERROR;
1429 }
1430 return VINF_SUCCESS;
1431 }
1432 return VERR_INTERNAL_ERROR;
1433}
1434
1435
1436/**
1437 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1438 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1439 * list, see http://www.kerneldrivers.org/RHEL5.
1440 *
1441 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1442 * @param pszStr String to check
1443 * @param pszChars Character set
1444 */
1445static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1446{
1447 int chCur;
1448 while ((chCur = *pszStr++) != '\0')
1449 {
1450 int ch;
1451 const char *psz = pszChars;
1452 while ((ch = *psz++) != '\0')
1453 if (ch == chCur)
1454 return 1;
1455
1456 }
1457 return 0;
1458}
1459
1460
1461
1462/**
1463 * I/O Control inner worker (tracing reasons).
1464 *
1465 * @returns IPRT status code.
1466 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1467 *
1468 * @param uIOCtl Function number.
1469 * @param pDevExt Device extention.
1470 * @param pSession Session data.
1471 * @param pReqHdr The request header.
1472 */
1473static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1474{
1475 /*
1476 * Validation macros
1477 */
1478#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1479 do { \
1480 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1481 { \
1482 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1483 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1484 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1485 } \
1486 } while (0)
1487
1488#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1489
1490#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1491 do { \
1492 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1493 { \
1494 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1495 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1496 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1497 } \
1498 } while (0)
1499
1500#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1501 do { \
1502 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1503 { \
1504 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1505 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1506 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1507 } \
1508 } while (0)
1509
1510#define REQ_CHECK_EXPR(Name, expr) \
1511 do { \
1512 if (RT_UNLIKELY(!(expr))) \
1513 { \
1514 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1515 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1516 } \
1517 } while (0)
1518
1519#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1520 do { \
1521 if (RT_UNLIKELY(!(expr))) \
1522 { \
1523 OSDBGPRINT( fmt ); \
1524 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1525 } \
1526 } while (0)
1527
1528 /*
1529 * The switch.
1530 */
1531 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1532 {
1533 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1534 {
1535 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1536 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1537 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1538 {
1539 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1540 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1541 return 0;
1542 }
1543
1544#if 0
1545 /*
1546 * Call out to the OS specific code and let it do permission checks on the
1547 * client process.
1548 */
1549 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1550 {
1551 pReq->u.Out.u32Cookie = 0xffffffff;
1552 pReq->u.Out.u32SessionCookie = 0xffffffff;
1553 pReq->u.Out.u32SessionVersion = 0xffffffff;
1554 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1555 pReq->u.Out.pSession = NULL;
1556 pReq->u.Out.cFunctions = 0;
1557 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1558 return 0;
1559 }
1560#endif
1561
1562 /*
1563 * Match the version.
1564 * The current logic is very simple, match the major interface version.
1565 */
1566 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1567 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1568 {
1569 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1570 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1571 pReq->u.Out.u32Cookie = 0xffffffff;
1572 pReq->u.Out.u32SessionCookie = 0xffffffff;
1573 pReq->u.Out.u32SessionVersion = 0xffffffff;
1574 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1575 pReq->u.Out.pSession = NULL;
1576 pReq->u.Out.cFunctions = 0;
1577 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1578 return 0;
1579 }
1580
1581 /*
1582 * Fill in return data and be gone.
1583 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1584 * u32SessionVersion <= u32ReqVersion!
1585 */
1586 /** @todo Somehow validate the client and negotiate a secure cookie... */
1587 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1588 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1589 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1590 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1591 pReq->u.Out.pSession = pSession;
1592 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1593 pReq->Hdr.rc = VINF_SUCCESS;
1594 return 0;
1595 }
1596
1597 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1598 {
1599 /* validate */
1600 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1601 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1602
1603 /* execute */
1604 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1605 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1606 pReq->Hdr.rc = VINF_SUCCESS;
1607 return 0;
1608 }
1609
1610 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1611 {
1612 /* validate */
1613 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1614 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1615 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1616 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1617 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1618
1619 /* execute */
1620 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1621 if (RT_FAILURE(pReq->Hdr.rc))
1622 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1623 return 0;
1624 }
1625
1626 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1627 {
1628 /* validate */
1629 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1630 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1631
1632 /* execute */
1633 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1634 return 0;
1635 }
1636
1637 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1638 {
1639 /* validate */
1640 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1641 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1642
1643 /* execute */
1644 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1645 if (RT_FAILURE(pReq->Hdr.rc))
1646 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1647 return 0;
1648 }
1649
1650 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1651 {
1652 /* validate */
1653 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1654 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1655
1656 /* execute */
1657 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1658 return 0;
1659 }
1660
1661 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1662 {
1663 /* validate */
1664 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1665 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1666 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1667 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1668 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1669 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1670 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1671 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1672 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1673 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1674 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1675
1676 /* execute */
1677 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1678 return 0;
1679 }
1680
1681 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1682 {
1683 /* validate */
1684 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1685 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1686 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1687 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1688 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1689 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1690 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1691 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1692 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1693 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1694 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1695 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1696 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1697 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1698 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1699
1700 if (pReq->u.In.cSymbols)
1701 {
1702 uint32_t i;
1703 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1704 for (i = 0; i < pReq->u.In.cSymbols; i++)
1705 {
1706 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1707 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1708 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1709 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1710 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1711 pReq->u.In.cbStrTab - paSyms[i].offName),
1712 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1713 }
1714 }
1715
1716 /* execute */
1717 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1718 return 0;
1719 }
1720
1721 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1722 {
1723 /* validate */
1724 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1725 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1726
1727 /* execute */
1728 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1729 return 0;
1730 }
1731
1732 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1733 {
1734 /* validate */
1735 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1736 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1737 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1738
1739 /* execute */
1740 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1741 return 0;
1742 }
1743
1744 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1745 {
1746 /* validate */
1747 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1748 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1749 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1750
1751 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1752 {
1753 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1754
1755 /* execute */
1756 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1757 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1758 else
1759 pReq->Hdr.rc = VERR_WRONG_ORDER;
1760 }
1761 else
1762 {
1763 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1764 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1765 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1766 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1767 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1768
1769 /* execute */
1770 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1771 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1772 else
1773 pReq->Hdr.rc = VERR_WRONG_ORDER;
1774 }
1775
1776 if ( RT_FAILURE(pReq->Hdr.rc)
1777 && pReq->Hdr.rc != VERR_INTERRUPTED
1778 && pReq->Hdr.rc != VERR_TIMEOUT)
1779 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1780 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1781 else
1782 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1783 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1784 return 0;
1785 }
1786
1787 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1788 {
1789 /* validate */
1790 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1791 PSUPVMMR0REQHDR pVMMReq;
1792 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1793 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1794
1795 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1796 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1797 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1798 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1799 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1800
1801 /* execute */
1802 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1803 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1804 else
1805 pReq->Hdr.rc = VERR_WRONG_ORDER;
1806
1807 if ( RT_FAILURE(pReq->Hdr.rc)
1808 && pReq->Hdr.rc != VERR_INTERRUPTED
1809 && pReq->Hdr.rc != VERR_TIMEOUT)
1810 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1811 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1812 else
1813 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1814 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1815 return 0;
1816 }
1817
1818 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1819 {
1820 /* validate */
1821 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1822 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1823
1824 /* execute */
1825 pReq->Hdr.rc = VINF_SUCCESS;
1826 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1827 return 0;
1828 }
1829
1830 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1831 {
1832 /* validate */
1833 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1834 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1835 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1836
1837 /* execute */
1838 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1839 if (RT_FAILURE(pReq->Hdr.rc))
1840 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1841 return 0;
1842 }
1843
1844 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1845 {
1846 /* validate */
1847 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1848 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1849
1850 /* execute */
1851 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1852 return 0;
1853 }
1854
1855 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1856 {
1857 /* validate */
1858 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1859 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1860
1861 /* execute */
1862 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1863 if (RT_SUCCESS(pReq->Hdr.rc))
1864 pReq->u.Out.pGipR0 = pDevExt->pGip;
1865 return 0;
1866 }
1867
1868 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1869 {
1870 /* validate */
1871 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1872 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1873
1874 /* execute */
1875 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1876 return 0;
1877 }
1878
1879 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1880 {
1881 /* validate */
1882 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1883 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1884 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1885 || ( VALID_PTR(pReq->u.In.pVMR0)
1886 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1887 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1888 /* execute */
1889 pSession->pVM = pReq->u.In.pVMR0;
1890 pReq->Hdr.rc = VINF_SUCCESS;
1891 return 0;
1892 }
1893
1894 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1895 {
1896 /* validate */
1897 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1898 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1899 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1900 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1901 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1902 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1903 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1904 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1905 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1906
1907 /* execute */
1908 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1909 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1910 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1911 &pReq->u.Out.aPages[0]);
1912 if (RT_FAILURE(pReq->Hdr.rc))
1913 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1914 return 0;
1915 }
1916
1917 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1918 {
1919 /* validate */
1920 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1921 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1922 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1923 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1924 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1925 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1926
1927 /* execute */
1928 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1929 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1930 if (RT_FAILURE(pReq->Hdr.rc))
1931 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1932 return 0;
1933 }
1934
1935 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1936 {
1937 /* validate */
1938 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1939 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1940 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1941 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1942 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1943 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1944 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1945
1946 /* execute */
1947 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1948 return 0;
1949 }
1950
1951 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1952 {
1953 /* validate */
1954 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1955 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1956
1957 /* execute */
1958 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1959 return 0;
1960 }
1961
1962 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1963 {
1964 /* validate */
1965 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1966 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1967 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1968
1969 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1970 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1971 else
1972 {
1973 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1974 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1975 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1976 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1977 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1978 }
1979 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1980
1981 /* execute */
1982 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1983 return 0;
1984 }
1985
1986 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1987 {
1988 /* validate */
1989 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1990 size_t cbStrTab;
1991 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1992 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
1993 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
1994 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
1995 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
1996 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
1997 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
1998 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
1999 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
2000 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
2001 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
2002
2003 /* execute */
2004 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
2005 return 0;
2006 }
2007
2008 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
2009 {
2010 /* validate */
2011 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
2012 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
2013 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
2014
2015 /* execute */
2016 switch (pReq->u.In.uType)
2017 {
2018 case SUP_SEM_TYPE_EVENT:
2019 {
2020 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2021 switch (pReq->u.In.uOp)
2022 {
2023 case SUPSEMOP2_WAIT_MS_REL:
2024 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2025 break;
2026 case SUPSEMOP2_WAIT_NS_ABS:
2027 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2028 break;
2029 case SUPSEMOP2_WAIT_NS_REL:
2030 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2031 break;
2032 case SUPSEMOP2_SIGNAL:
2033 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2034 break;
2035 case SUPSEMOP2_CLOSE:
2036 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2037 break;
2038 case SUPSEMOP2_RESET:
2039 default:
2040 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2041 break;
2042 }
2043 break;
2044 }
2045
2046 case SUP_SEM_TYPE_EVENT_MULTI:
2047 {
2048 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2049 switch (pReq->u.In.uOp)
2050 {
2051 case SUPSEMOP2_WAIT_MS_REL:
2052 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2053 break;
2054 case SUPSEMOP2_WAIT_NS_ABS:
2055 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2056 break;
2057 case SUPSEMOP2_WAIT_NS_REL:
2058 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2059 break;
2060 case SUPSEMOP2_SIGNAL:
2061 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2062 break;
2063 case SUPSEMOP2_CLOSE:
2064 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2065 break;
2066 case SUPSEMOP2_RESET:
2067 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2068 break;
2069 default:
2070 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2071 break;
2072 }
2073 break;
2074 }
2075
2076 default:
2077 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2078 break;
2079 }
2080 return 0;
2081 }
2082
2083 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2084 {
2085 /* validate */
2086 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2087 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2088 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2089
2090 /* execute */
2091 switch (pReq->u.In.uType)
2092 {
2093 case SUP_SEM_TYPE_EVENT:
2094 {
2095 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2096 switch (pReq->u.In.uOp)
2097 {
2098 case SUPSEMOP3_CREATE:
2099 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2100 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2101 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2102 break;
2103 case SUPSEMOP3_GET_RESOLUTION:
2104 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2105 pReq->Hdr.rc = VINF_SUCCESS;
2106 pReq->Hdr.cbOut = sizeof(*pReq);
2107 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2108 break;
2109 default:
2110 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2111 break;
2112 }
2113 break;
2114 }
2115
2116 case SUP_SEM_TYPE_EVENT_MULTI:
2117 {
2118 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2119 switch (pReq->u.In.uOp)
2120 {
2121 case SUPSEMOP3_CREATE:
2122 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2123 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2124 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2125 break;
2126 case SUPSEMOP3_GET_RESOLUTION:
2127 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2128 pReq->Hdr.rc = VINF_SUCCESS;
2129 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2130 break;
2131 default:
2132 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2133 break;
2134 }
2135 break;
2136 }
2137
2138 default:
2139 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2140 break;
2141 }
2142 return 0;
2143 }
2144
2145 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2146 {
2147 /* validate */
2148 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2149 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2150
2151 /* execute */
2152 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2153 if (RT_FAILURE(pReq->Hdr.rc))
2154 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2155 return 0;
2156 }
2157
2158 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2159 {
2160 /* validate */
2161 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2162 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2163
2164 /* execute */
2165 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2166 return 0;
2167 }
2168
2169 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2170 {
2171 /* validate */
2172 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2173
2174 /* execute */
2175 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2176 return 0;
2177 }
2178
2179 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2180 {
2181 /* validate */
2182 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2183 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2184
2185 /* execute */
2186 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2187 return 0;
2188 }
2189
2190 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2191 {
2192 /* validate */
2193 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2194 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2195 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2196 return VERR_INVALID_PARAMETER;
2197
2198 /* execute */
2199 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2200 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2201 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2202 pReq->u.In.szName, pReq->u.In.fFlags);
2203 return 0;
2204 }
2205
2206 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2207 {
2208 /* validate */
2209 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2210 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2211
2212 /* execute */
2213 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2214 return 0;
2215 }
2216
2217 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2218 {
2219 /* validate */
2220 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2221 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2222
2223 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2224 pReqHdr->rc = VINF_SUCCESS;
2225 return 0;
2226 }
2227
2228 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2229 {
2230 /* validate */
2231 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2232 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2233 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2234 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2235
2236 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2237 return 0;
2238 }
2239
2240 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2241 {
2242 /* validate */
2243 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2244
2245 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2246 return 0;
2247 }
2248
2249 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2250 {
2251 /* validate */
2252 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2253 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2254
2255 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pReq);
2256 return 0;
2257 }
2258
2259 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2260 {
2261 /* validate */
2262 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2263 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2264
2265 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pReq);
2266 return 0;
2267 }
2268
2269 default:
2270 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2271 break;
2272 }
2273 return VERR_GENERAL_FAILURE;
2274}
2275
2276
2277/**
2278 * I/O Control inner worker for the restricted operations.
2279 *
2280 * @returns IPRT status code.
2281 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2282 *
2283 * @param uIOCtl Function number.
2284 * @param pDevExt Device extention.
2285 * @param pSession Session data.
2286 * @param pReqHdr The request header.
2287 */
2288static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2289{
2290 /*
2291 * The switch.
2292 */
2293 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2294 {
2295 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2296 {
2297 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2298 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2299 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2300 {
2301 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2302 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2303 return 0;
2304 }
2305
2306 /*
2307 * Match the version.
2308 * The current logic is very simple, match the major interface version.
2309 */
2310 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2311 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2312 {
2313 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2314 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2315 pReq->u.Out.u32Cookie = 0xffffffff;
2316 pReq->u.Out.u32SessionCookie = 0xffffffff;
2317 pReq->u.Out.u32SessionVersion = 0xffffffff;
2318 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2319 pReq->u.Out.pSession = NULL;
2320 pReq->u.Out.cFunctions = 0;
2321 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2322 return 0;
2323 }
2324
2325 /*
2326 * Fill in return data and be gone.
2327 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2328 * u32SessionVersion <= u32ReqVersion!
2329 */
2330 /** @todo Somehow validate the client and negotiate a secure cookie... */
2331 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2332 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2333 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2334 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2335 pReq->u.Out.pSession = pSession;
2336 pReq->u.Out.cFunctions = 0;
2337 pReq->Hdr.rc = VINF_SUCCESS;
2338 return 0;
2339 }
2340
2341 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2342 {
2343 /* validate */
2344 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2345 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2346
2347 /* execute */
2348 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2349 if (RT_FAILURE(pReq->Hdr.rc))
2350 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2351 return 0;
2352 }
2353
2354 default:
2355 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2356 break;
2357 }
2358 return VERR_GENERAL_FAILURE;
2359}
2360
2361
2362/**
2363 * I/O Control worker.
2364 *
2365 * @returns IPRT status code.
2366 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2367 *
2368 * @param uIOCtl Function number.
2369 * @param pDevExt Device extention.
2370 * @param pSession Session data.
2371 * @param pReqHdr The request header.
2372 */
2373int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2374{
2375 int rc;
2376 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2377
2378 /*
2379 * Validate the request.
2380 */
2381 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2382 {
2383 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2384 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2385 return VERR_INVALID_PARAMETER;
2386 }
2387 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2388 || pReqHdr->cbIn < sizeof(*pReqHdr)
2389 || pReqHdr->cbIn > cbReq
2390 || pReqHdr->cbOut < sizeof(*pReqHdr)
2391 || pReqHdr->cbOut > cbReq))
2392 {
2393 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2394 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2395 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2396 return VERR_INVALID_PARAMETER;
2397 }
2398 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2399 {
2400 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2401 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2402 return VERR_INVALID_PARAMETER;
2403 }
2404 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2405 {
2406 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2407 {
2408 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2409 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2410 return VERR_INVALID_PARAMETER;
2411 }
2412 }
2413 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2414 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2415 {
2416 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2417 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2418 return VERR_INVALID_PARAMETER;
2419 }
2420
2421 /*
2422 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2423 */
2424 if (pSession->fUnrestricted)
2425 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2426 else
2427 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2428
2429 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2430 return rc;
2431}
2432
2433
2434/**
2435 * Inter-Driver Communication (IDC) worker.
2436 *
2437 * @returns VBox status code.
2438 * @retval VINF_SUCCESS on success.
2439 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2440 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2441 *
2442 * @param uReq The request (function) code.
2443 * @param pDevExt Device extention.
2444 * @param pSession Session data.
2445 * @param pReqHdr The request header.
2446 */
2447int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2448{
2449 /*
2450 * The OS specific code has already validated the pSession
2451 * pointer, and the request size being greater or equal to
2452 * size of the header.
2453 *
2454 * So, just check that pSession is a kernel context session.
2455 */
2456 if (RT_UNLIKELY( pSession
2457 && pSession->R0Process != NIL_RTR0PROCESS))
2458 return VERR_INVALID_PARAMETER;
2459
2460/*
2461 * Validation macro.
2462 */
2463#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2464 do { \
2465 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2466 { \
2467 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2468 (long)pReqHdr->cb, (long)(cbExpect))); \
2469 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2470 } \
2471 } while (0)
2472
2473 switch (uReq)
2474 {
2475 case SUPDRV_IDC_REQ_CONNECT:
2476 {
2477 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2478 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2479
2480 /*
2481 * Validate the cookie and other input.
2482 */
2483 if (pReq->Hdr.pSession != NULL)
2484 {
2485 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2486 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2487 }
2488 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2489 {
2490 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2491 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2492 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2493 }
2494 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2495 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2496 {
2497 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2498 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2499 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2500 }
2501 if (pSession != NULL)
2502 {
2503 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2504 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2505 }
2506
2507 /*
2508 * Match the version.
2509 * The current logic is very simple, match the major interface version.
2510 */
2511 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2512 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2513 {
2514 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2515 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2516 pReq->u.Out.pSession = NULL;
2517 pReq->u.Out.uSessionVersion = 0xffffffff;
2518 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2519 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2520 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2521 return VINF_SUCCESS;
2522 }
2523
2524 pReq->u.Out.pSession = NULL;
2525 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2526 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2527 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2528
2529 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2530 if (RT_FAILURE(pReq->Hdr.rc))
2531 {
2532 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2533 return VINF_SUCCESS;
2534 }
2535
2536 pReq->u.Out.pSession = pSession;
2537 pReq->Hdr.pSession = pSession;
2538
2539 return VINF_SUCCESS;
2540 }
2541
2542 case SUPDRV_IDC_REQ_DISCONNECT:
2543 {
2544 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2545
2546 supdrvSessionRelease(pSession);
2547 return pReqHdr->rc = VINF_SUCCESS;
2548 }
2549
2550 case SUPDRV_IDC_REQ_GET_SYMBOL:
2551 {
2552 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2553 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2554
2555 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2556 return VINF_SUCCESS;
2557 }
2558
2559 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2560 {
2561 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2562 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2563
2564 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2565 return VINF_SUCCESS;
2566 }
2567
2568 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2569 {
2570 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2571 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2572
2573 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2574 return VINF_SUCCESS;
2575 }
2576
2577 default:
2578 Log(("Unknown IDC %#lx\n", (long)uReq));
2579 break;
2580 }
2581
2582#undef REQ_CHECK_IDC_SIZE
2583 return VERR_NOT_SUPPORTED;
2584}
2585
2586
2587/**
2588 * Register a object for reference counting.
2589 * The object is registered with one reference in the specified session.
2590 *
2591 * @returns Unique identifier on success (pointer).
2592 * All future reference must use this identifier.
2593 * @returns NULL on failure.
2594 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2595 * @param pvUser1 The first user argument.
2596 * @param pvUser2 The second user argument.
2597 */
2598SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2599{
2600 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2601 PSUPDRVOBJ pObj;
2602 PSUPDRVUSAGE pUsage;
2603
2604 /*
2605 * Validate the input.
2606 */
2607 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2608 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2609 AssertPtrReturn(pfnDestructor, NULL);
2610
2611 /*
2612 * Allocate and initialize the object.
2613 */
2614 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2615 if (!pObj)
2616 return NULL;
2617 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2618 pObj->enmType = enmType;
2619 pObj->pNext = NULL;
2620 pObj->cUsage = 1;
2621 pObj->pfnDestructor = pfnDestructor;
2622 pObj->pvUser1 = pvUser1;
2623 pObj->pvUser2 = pvUser2;
2624 pObj->CreatorUid = pSession->Uid;
2625 pObj->CreatorGid = pSession->Gid;
2626 pObj->CreatorProcess= pSession->Process;
2627 supdrvOSObjInitCreator(pObj, pSession);
2628
2629 /*
2630 * Allocate the usage record.
2631 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2632 */
2633 RTSpinlockAcquire(pDevExt->Spinlock);
2634
2635 pUsage = pDevExt->pUsageFree;
2636 if (pUsage)
2637 pDevExt->pUsageFree = pUsage->pNext;
2638 else
2639 {
2640 RTSpinlockRelease(pDevExt->Spinlock);
2641 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2642 if (!pUsage)
2643 {
2644 RTMemFree(pObj);
2645 return NULL;
2646 }
2647 RTSpinlockAcquire(pDevExt->Spinlock);
2648 }
2649
2650 /*
2651 * Insert the object and create the session usage record.
2652 */
2653 /* The object. */
2654 pObj->pNext = pDevExt->pObjs;
2655 pDevExt->pObjs = pObj;
2656
2657 /* The session record. */
2658 pUsage->cUsage = 1;
2659 pUsage->pObj = pObj;
2660 pUsage->pNext = pSession->pUsage;
2661 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2662 pSession->pUsage = pUsage;
2663
2664 RTSpinlockRelease(pDevExt->Spinlock);
2665
2666 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2667 return pObj;
2668}
2669
2670
2671/**
2672 * Increment the reference counter for the object associating the reference
2673 * with the specified session.
2674 *
2675 * @returns IPRT status code.
2676 * @param pvObj The identifier returned by SUPR0ObjRegister().
2677 * @param pSession The session which is referencing the object.
2678 *
2679 * @remarks The caller should not own any spinlocks and must carefully protect
2680 * itself against potential race with the destructor so freed memory
2681 * isn't accessed here.
2682 */
2683SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2684{
2685 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2686}
2687
2688
2689/**
2690 * Increment the reference counter for the object associating the reference
2691 * with the specified session.
2692 *
2693 * @returns IPRT status code.
2694 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2695 * couldn't be allocated. (If you see this you're not doing the right
2696 * thing and it won't ever work reliably.)
2697 *
2698 * @param pvObj The identifier returned by SUPR0ObjRegister().
2699 * @param pSession The session which is referencing the object.
2700 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2701 * first reference to an object in a session with this
2702 * argument set.
2703 *
2704 * @remarks The caller should not own any spinlocks and must carefully protect
2705 * itself against potential race with the destructor so freed memory
2706 * isn't accessed here.
2707 */
2708SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2709{
2710 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2711 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2712 int rc = VINF_SUCCESS;
2713 PSUPDRVUSAGE pUsagePre;
2714 PSUPDRVUSAGE pUsage;
2715
2716 /*
2717 * Validate the input.
2718 * Be ready for the destruction race (someone might be stuck in the
2719 * destructor waiting a lock we own).
2720 */
2721 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2722 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2723 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2724 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2725 VERR_INVALID_PARAMETER);
2726
2727 RTSpinlockAcquire(pDevExt->Spinlock);
2728
2729 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2730 {
2731 RTSpinlockRelease(pDevExt->Spinlock);
2732
2733 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2734 return VERR_WRONG_ORDER;
2735 }
2736
2737 /*
2738 * Preallocate the usage record if we can.
2739 */
2740 pUsagePre = pDevExt->pUsageFree;
2741 if (pUsagePre)
2742 pDevExt->pUsageFree = pUsagePre->pNext;
2743 else if (!fNoBlocking)
2744 {
2745 RTSpinlockRelease(pDevExt->Spinlock);
2746 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2747 if (!pUsagePre)
2748 return VERR_NO_MEMORY;
2749
2750 RTSpinlockAcquire(pDevExt->Spinlock);
2751 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2752 {
2753 RTSpinlockRelease(pDevExt->Spinlock);
2754
2755 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2756 return VERR_WRONG_ORDER;
2757 }
2758 }
2759
2760 /*
2761 * Reference the object.
2762 */
2763 pObj->cUsage++;
2764
2765 /*
2766 * Look for the session record.
2767 */
2768 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2769 {
2770 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2771 if (pUsage->pObj == pObj)
2772 break;
2773 }
2774 if (pUsage)
2775 pUsage->cUsage++;
2776 else if (pUsagePre)
2777 {
2778 /* create a new session record. */
2779 pUsagePre->cUsage = 1;
2780 pUsagePre->pObj = pObj;
2781 pUsagePre->pNext = pSession->pUsage;
2782 pSession->pUsage = pUsagePre;
2783 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2784
2785 pUsagePre = NULL;
2786 }
2787 else
2788 {
2789 pObj->cUsage--;
2790 rc = VERR_TRY_AGAIN;
2791 }
2792
2793 /*
2794 * Put any unused usage record into the free list..
2795 */
2796 if (pUsagePre)
2797 {
2798 pUsagePre->pNext = pDevExt->pUsageFree;
2799 pDevExt->pUsageFree = pUsagePre;
2800 }
2801
2802 RTSpinlockRelease(pDevExt->Spinlock);
2803
2804 return rc;
2805}
2806
2807
2808/**
2809 * Decrement / destroy a reference counter record for an object.
2810 *
2811 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2812 *
2813 * @returns IPRT status code.
2814 * @retval VINF_SUCCESS if not destroyed.
2815 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2816 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2817 * string builds.
2818 *
2819 * @param pvObj The identifier returned by SUPR0ObjRegister().
2820 * @param pSession The session which is referencing the object.
2821 */
2822SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2823{
2824 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2825 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2826 int rc = VERR_INVALID_PARAMETER;
2827 PSUPDRVUSAGE pUsage;
2828 PSUPDRVUSAGE pUsagePrev;
2829
2830 /*
2831 * Validate the input.
2832 */
2833 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2834 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2835 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2836 VERR_INVALID_PARAMETER);
2837
2838 /*
2839 * Acquire the spinlock and look for the usage record.
2840 */
2841 RTSpinlockAcquire(pDevExt->Spinlock);
2842
2843 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2844 pUsage;
2845 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2846 {
2847 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2848 if (pUsage->pObj == pObj)
2849 {
2850 rc = VINF_SUCCESS;
2851 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2852 if (pUsage->cUsage > 1)
2853 {
2854 pObj->cUsage--;
2855 pUsage->cUsage--;
2856 }
2857 else
2858 {
2859 /*
2860 * Free the session record.
2861 */
2862 if (pUsagePrev)
2863 pUsagePrev->pNext = pUsage->pNext;
2864 else
2865 pSession->pUsage = pUsage->pNext;
2866 pUsage->pNext = pDevExt->pUsageFree;
2867 pDevExt->pUsageFree = pUsage;
2868
2869 /* What about the object? */
2870 if (pObj->cUsage > 1)
2871 pObj->cUsage--;
2872 else
2873 {
2874 /*
2875 * Object is to be destroyed, unlink it.
2876 */
2877 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2878 rc = VINF_OBJECT_DESTROYED;
2879 if (pDevExt->pObjs == pObj)
2880 pDevExt->pObjs = pObj->pNext;
2881 else
2882 {
2883 PSUPDRVOBJ pObjPrev;
2884 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2885 if (pObjPrev->pNext == pObj)
2886 {
2887 pObjPrev->pNext = pObj->pNext;
2888 break;
2889 }
2890 Assert(pObjPrev);
2891 }
2892 }
2893 }
2894 break;
2895 }
2896 }
2897
2898 RTSpinlockRelease(pDevExt->Spinlock);
2899
2900 /*
2901 * Call the destructor and free the object if required.
2902 */
2903 if (rc == VINF_OBJECT_DESTROYED)
2904 {
2905 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2906 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2907 if (pObj->pfnDestructor)
2908 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2909 RTMemFree(pObj);
2910 }
2911
2912 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2913 return rc;
2914}
2915
2916
2917/**
2918 * Verifies that the current process can access the specified object.
2919 *
2920 * @returns The following IPRT status code:
2921 * @retval VINF_SUCCESS if access was granted.
2922 * @retval VERR_PERMISSION_DENIED if denied access.
2923 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2924 *
2925 * @param pvObj The identifier returned by SUPR0ObjRegister().
2926 * @param pSession The session which wishes to access the object.
2927 * @param pszObjName Object string name. This is optional and depends on the object type.
2928 *
2929 * @remark The caller is responsible for making sure the object isn't removed while
2930 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2931 */
2932SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2933{
2934 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2935 int rc;
2936
2937 /*
2938 * Validate the input.
2939 */
2940 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2941 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2942 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2943 VERR_INVALID_PARAMETER);
2944
2945 /*
2946 * Check access. (returns true if a decision has been made.)
2947 */
2948 rc = VERR_INTERNAL_ERROR;
2949 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2950 return rc;
2951
2952 /*
2953 * Default policy is to allow the user to access his own
2954 * stuff but nothing else.
2955 */
2956 if (pObj->CreatorUid == pSession->Uid)
2957 return VINF_SUCCESS;
2958 return VERR_PERMISSION_DENIED;
2959}
2960
2961
2962/**
2963 * Lock pages.
2964 *
2965 * @returns IPRT status code.
2966 * @param pSession Session to which the locked memory should be associated.
2967 * @param pvR3 Start of the memory range to lock.
2968 * This must be page aligned.
2969 * @param cPages Number of pages to lock.
2970 * @param paPages Where to put the physical addresses of locked memory.
2971 */
2972SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2973{
2974 int rc;
2975 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2976 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2977 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2978
2979 /*
2980 * Verify input.
2981 */
2982 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2983 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2984 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2985 || !pvR3)
2986 {
2987 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2988 return VERR_INVALID_PARAMETER;
2989 }
2990
2991 /*
2992 * Let IPRT do the job.
2993 */
2994 Mem.eType = MEMREF_TYPE_LOCKED;
2995 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
2996 if (RT_SUCCESS(rc))
2997 {
2998 uint32_t iPage = cPages;
2999 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
3000 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
3001
3002 while (iPage-- > 0)
3003 {
3004 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3005 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
3006 {
3007 AssertMsgFailed(("iPage=%d\n", iPage));
3008 rc = VERR_INTERNAL_ERROR;
3009 break;
3010 }
3011 }
3012 if (RT_SUCCESS(rc))
3013 rc = supdrvMemAdd(&Mem, pSession);
3014 if (RT_FAILURE(rc))
3015 {
3016 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3017 AssertRC(rc2);
3018 }
3019 }
3020
3021 return rc;
3022}
3023
3024
3025/**
3026 * Unlocks the memory pointed to by pv.
3027 *
3028 * @returns IPRT status code.
3029 * @param pSession Session to which the memory was locked.
3030 * @param pvR3 Memory to unlock.
3031 */
3032SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3033{
3034 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3035 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3036 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3037}
3038
3039
3040/**
3041 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3042 * backing.
3043 *
3044 * @returns IPRT status code.
3045 * @param pSession Session data.
3046 * @param cPages Number of pages to allocate.
3047 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3048 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3049 * @param pHCPhys Where to put the physical address of allocated memory.
3050 */
3051SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3052{
3053 int rc;
3054 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3055 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3056
3057 /*
3058 * Validate input.
3059 */
3060 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3061 if (!ppvR3 || !ppvR0 || !pHCPhys)
3062 {
3063 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3064 pSession, ppvR0, ppvR3, pHCPhys));
3065 return VERR_INVALID_PARAMETER;
3066
3067 }
3068 if (cPages < 1 || cPages >= 256)
3069 {
3070 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3071 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3072 }
3073
3074 /*
3075 * Let IPRT do the job.
3076 */
3077 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3078 if (RT_SUCCESS(rc))
3079 {
3080 int rc2;
3081 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3082 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3083 if (RT_SUCCESS(rc))
3084 {
3085 Mem.eType = MEMREF_TYPE_CONT;
3086 rc = supdrvMemAdd(&Mem, pSession);
3087 if (!rc)
3088 {
3089 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3090 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3091 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3092 return 0;
3093 }
3094
3095 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3096 AssertRC(rc2);
3097 }
3098 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3099 AssertRC(rc2);
3100 }
3101
3102 return rc;
3103}
3104
3105
3106/**
3107 * Frees memory allocated using SUPR0ContAlloc().
3108 *
3109 * @returns IPRT status code.
3110 * @param pSession The session to which the memory was allocated.
3111 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3112 */
3113SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3114{
3115 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3116 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3117 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3118}
3119
3120
3121/**
3122 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3123 *
3124 * The memory isn't zeroed.
3125 *
3126 * @returns IPRT status code.
3127 * @param pSession Session data.
3128 * @param cPages Number of pages to allocate.
3129 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3130 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3131 * @param paPages Where to put the physical addresses of allocated memory.
3132 */
3133SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3134{
3135 unsigned iPage;
3136 int rc;
3137 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3138 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3139
3140 /*
3141 * Validate input.
3142 */
3143 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3144 if (!ppvR3 || !ppvR0 || !paPages)
3145 {
3146 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3147 pSession, ppvR3, ppvR0, paPages));
3148 return VERR_INVALID_PARAMETER;
3149
3150 }
3151 if (cPages < 1 || cPages >= 256)
3152 {
3153 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3154 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3155 }
3156
3157 /*
3158 * Let IPRT do the work.
3159 */
3160 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3161 if (RT_SUCCESS(rc))
3162 {
3163 int rc2;
3164 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3165 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3166 if (RT_SUCCESS(rc))
3167 {
3168 Mem.eType = MEMREF_TYPE_LOW;
3169 rc = supdrvMemAdd(&Mem, pSession);
3170 if (!rc)
3171 {
3172 for (iPage = 0; iPage < cPages; iPage++)
3173 {
3174 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3175 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3176 }
3177 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3178 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3179 return 0;
3180 }
3181
3182 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3183 AssertRC(rc2);
3184 }
3185
3186 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3187 AssertRC(rc2);
3188 }
3189
3190 return rc;
3191}
3192
3193
3194/**
3195 * Frees memory allocated using SUPR0LowAlloc().
3196 *
3197 * @returns IPRT status code.
3198 * @param pSession The session to which the memory was allocated.
3199 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3200 */
3201SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3202{
3203 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3204 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3205 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3206}
3207
3208
3209
3210/**
3211 * Allocates a chunk of memory with both R0 and R3 mappings.
3212 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3213 *
3214 * @returns IPRT status code.
3215 * @param pSession The session to associated the allocation with.
3216 * @param cb Number of bytes to allocate.
3217 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3218 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3219 */
3220SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3221{
3222 int rc;
3223 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3224 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3225
3226 /*
3227 * Validate input.
3228 */
3229 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3230 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3231 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3232 if (cb < 1 || cb >= _4M)
3233 {
3234 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3235 return VERR_INVALID_PARAMETER;
3236 }
3237
3238 /*
3239 * Let IPRT do the work.
3240 */
3241 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3242 if (RT_SUCCESS(rc))
3243 {
3244 int rc2;
3245 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3246 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3247 if (RT_SUCCESS(rc))
3248 {
3249 Mem.eType = MEMREF_TYPE_MEM;
3250 rc = supdrvMemAdd(&Mem, pSession);
3251 if (!rc)
3252 {
3253 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3254 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3255 return VINF_SUCCESS;
3256 }
3257
3258 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3259 AssertRC(rc2);
3260 }
3261
3262 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3263 AssertRC(rc2);
3264 }
3265
3266 return rc;
3267}
3268
3269
3270/**
3271 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3272 *
3273 * @returns IPRT status code.
3274 * @param pSession The session to which the memory was allocated.
3275 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3276 * @param paPages Where to store the physical addresses.
3277 */
3278SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3279{
3280 PSUPDRVBUNDLE pBundle;
3281 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3282
3283 /*
3284 * Validate input.
3285 */
3286 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3287 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3288 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3289
3290 /*
3291 * Search for the address.
3292 */
3293 RTSpinlockAcquire(pSession->Spinlock);
3294 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3295 {
3296 if (pBundle->cUsed > 0)
3297 {
3298 unsigned i;
3299 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3300 {
3301 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3302 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3303 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3304 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3305 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3306 )
3307 )
3308 {
3309 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3310 size_t iPage;
3311 for (iPage = 0; iPage < cPages; iPage++)
3312 {
3313 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3314 paPages[iPage].uReserved = 0;
3315 }
3316 RTSpinlockRelease(pSession->Spinlock);
3317 return VINF_SUCCESS;
3318 }
3319 }
3320 }
3321 }
3322 RTSpinlockRelease(pSession->Spinlock);
3323 Log(("Failed to find %p!!!\n", (void *)uPtr));
3324 return VERR_INVALID_PARAMETER;
3325}
3326
3327
3328/**
3329 * Free memory allocated by SUPR0MemAlloc().
3330 *
3331 * @returns IPRT status code.
3332 * @param pSession The session owning the allocation.
3333 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3334 */
3335SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3336{
3337 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3338 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3339 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3340}
3341
3342
3343/**
3344 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3345 *
3346 * The memory is fixed and it's possible to query the physical addresses using
3347 * SUPR0MemGetPhys().
3348 *
3349 * @returns IPRT status code.
3350 * @param pSession The session to associated the allocation with.
3351 * @param cPages The number of pages to allocate.
3352 * @param fFlags Flags, reserved for the future. Must be zero.
3353 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3354 * NULL if no ring-3 mapping.
3355 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3356 * NULL if no ring-0 mapping.
3357 * @param paPages Where to store the addresses of the pages. Optional.
3358 */
3359SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3360{
3361 int rc;
3362 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3363 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3364
3365 /*
3366 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3367 */
3368 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3369 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3370 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3371 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3372 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3373 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3374 {
3375 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3376 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3377 }
3378
3379 /*
3380 * Let IPRT do the work.
3381 */
3382 if (ppvR0)
3383 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3384 else
3385 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3386 if (RT_SUCCESS(rc))
3387 {
3388 int rc2;
3389 if (ppvR3)
3390 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3391 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3392 else
3393 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3394 if (RT_SUCCESS(rc))
3395 {
3396 Mem.eType = MEMREF_TYPE_PAGE;
3397 rc = supdrvMemAdd(&Mem, pSession);
3398 if (!rc)
3399 {
3400 if (ppvR3)
3401 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3402 if (ppvR0)
3403 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3404 if (paPages)
3405 {
3406 uint32_t iPage = cPages;
3407 while (iPage-- > 0)
3408 {
3409 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3410 Assert(paPages[iPage] != NIL_RTHCPHYS);
3411 }
3412 }
3413 return VINF_SUCCESS;
3414 }
3415
3416 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3417 AssertRC(rc2);
3418 }
3419
3420 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3421 AssertRC(rc2);
3422 }
3423 return rc;
3424}
3425
3426
3427/**
3428 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3429 * space.
3430 *
3431 * @returns IPRT status code.
3432 * @param pSession The session to associated the allocation with.
3433 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3434 * @param offSub Where to start mapping. Must be page aligned.
3435 * @param cbSub How much to map. Must be page aligned.
3436 * @param fFlags Flags, MBZ.
3437 * @param ppvR0 Where to return the address of the ring-0 mapping on
3438 * success.
3439 */
3440SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3441 uint32_t fFlags, PRTR0PTR ppvR0)
3442{
3443 int rc;
3444 PSUPDRVBUNDLE pBundle;
3445 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3446 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3447
3448 /*
3449 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3450 */
3451 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3452 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3453 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3454 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3455 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3456 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3457
3458 /*
3459 * Find the memory object.
3460 */
3461 RTSpinlockAcquire(pSession->Spinlock);
3462 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3463 {
3464 if (pBundle->cUsed > 0)
3465 {
3466 unsigned i;
3467 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3468 {
3469 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3470 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3471 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3472 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3473 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3474 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3475 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3476 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3477 {
3478 hMemObj = pBundle->aMem[i].MemObj;
3479 break;
3480 }
3481 }
3482 }
3483 }
3484 RTSpinlockRelease(pSession->Spinlock);
3485
3486 rc = VERR_INVALID_PARAMETER;
3487 if (hMemObj != NIL_RTR0MEMOBJ)
3488 {
3489 /*
3490 * Do some further input validations before calling IPRT.
3491 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3492 */
3493 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3494 if ( offSub < cbMemObj
3495 && cbSub <= cbMemObj
3496 && offSub + cbSub <= cbMemObj)
3497 {
3498 RTR0MEMOBJ hMapObj;
3499 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3500 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3501 if (RT_SUCCESS(rc))
3502 *ppvR0 = RTR0MemObjAddress(hMapObj);
3503 }
3504 else
3505 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3506
3507 }
3508 return rc;
3509}
3510
3511
3512/**
3513 * Changes the page level protection of one or more pages previously allocated
3514 * by SUPR0PageAllocEx.
3515 *
3516 * @returns IPRT status code.
3517 * @param pSession The session to associated the allocation with.
3518 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3519 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3520 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3521 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3522 * @param offSub Where to start changing. Must be page aligned.
3523 * @param cbSub How much to change. Must be page aligned.
3524 * @param fProt The new page level protection, see RTMEM_PROT_*.
3525 */
3526SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3527{
3528 int rc;
3529 PSUPDRVBUNDLE pBundle;
3530 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3531 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3532 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3533
3534 /*
3535 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3536 */
3537 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3538 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3539 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3540 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3541 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3542
3543 /*
3544 * Find the memory object.
3545 */
3546 RTSpinlockAcquire(pSession->Spinlock);
3547 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3548 {
3549 if (pBundle->cUsed > 0)
3550 {
3551 unsigned i;
3552 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3553 {
3554 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3555 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3556 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3557 || pvR3 == NIL_RTR3PTR)
3558 && ( pvR0 == NIL_RTR0PTR
3559 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3560 && ( pvR3 == NIL_RTR3PTR
3561 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3562 {
3563 if (pvR0 != NIL_RTR0PTR)
3564 hMemObjR0 = pBundle->aMem[i].MemObj;
3565 if (pvR3 != NIL_RTR3PTR)
3566 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3567 break;
3568 }
3569 }
3570 }
3571 }
3572 RTSpinlockRelease(pSession->Spinlock);
3573
3574 rc = VERR_INVALID_PARAMETER;
3575 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3576 || hMemObjR3 != NIL_RTR0MEMOBJ)
3577 {
3578 /*
3579 * Do some further input validations before calling IPRT.
3580 */
3581 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3582 if ( offSub < cbMemObj
3583 && cbSub <= cbMemObj
3584 && offSub + cbSub <= cbMemObj)
3585 {
3586 rc = VINF_SUCCESS;
3587 if (hMemObjR3 != NIL_RTR0PTR)
3588 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3589 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3590 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3591 }
3592 else
3593 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3594
3595 }
3596 return rc;
3597
3598}
3599
3600
3601/**
3602 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3603 *
3604 * @returns IPRT status code.
3605 * @param pSession The session owning the allocation.
3606 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3607 * SUPR0PageAllocEx().
3608 */
3609SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3610{
3611 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3612 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3613 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3614}
3615
3616
3617/**
3618 * Gets the paging mode of the current CPU.
3619 *
3620 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3621 */
3622SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3623{
3624 SUPPAGINGMODE enmMode;
3625
3626 RTR0UINTREG cr0 = ASMGetCR0();
3627 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3628 enmMode = SUPPAGINGMODE_INVALID;
3629 else
3630 {
3631 RTR0UINTREG cr4 = ASMGetCR4();
3632 uint32_t fNXEPlusLMA = 0;
3633 if (cr4 & X86_CR4_PAE)
3634 {
3635 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3636 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3637 {
3638 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3639 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3640 fNXEPlusLMA |= RT_BIT(0);
3641 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3642 fNXEPlusLMA |= RT_BIT(1);
3643 }
3644 }
3645
3646 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3647 {
3648 case 0:
3649 enmMode = SUPPAGINGMODE_32_BIT;
3650 break;
3651
3652 case X86_CR4_PGE:
3653 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3654 break;
3655
3656 case X86_CR4_PAE:
3657 enmMode = SUPPAGINGMODE_PAE;
3658 break;
3659
3660 case X86_CR4_PAE | RT_BIT(0):
3661 enmMode = SUPPAGINGMODE_PAE_NX;
3662 break;
3663
3664 case X86_CR4_PAE | X86_CR4_PGE:
3665 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3666 break;
3667
3668 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3669 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3670 break;
3671
3672 case RT_BIT(1) | X86_CR4_PAE:
3673 enmMode = SUPPAGINGMODE_AMD64;
3674 break;
3675
3676 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3677 enmMode = SUPPAGINGMODE_AMD64_NX;
3678 break;
3679
3680 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3681 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3682 break;
3683
3684 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3685 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3686 break;
3687
3688 default:
3689 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3690 enmMode = SUPPAGINGMODE_INVALID;
3691 break;
3692 }
3693 }
3694 return enmMode;
3695}
3696
3697
3698/**
3699 * Enables or disabled hardware virtualization extensions using native OS APIs.
3700 *
3701 * @returns VBox status code.
3702 * @retval VINF_SUCCESS on success.
3703 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3704 *
3705 * @param fEnable Whether to enable or disable.
3706 */
3707SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3708{
3709#ifdef RT_OS_DARWIN
3710 return supdrvOSEnableVTx(fEnable);
3711#else
3712 return VERR_NOT_SUPPORTED;
3713#endif
3714}
3715
3716
3717/**
3718 * Suspends hardware virtualization extensions using the native OS API.
3719 *
3720 * This is called prior to entering raw-mode context.
3721 *
3722 * @returns @c true if suspended, @c false if not.
3723 */
3724SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3725{
3726#ifdef RT_OS_DARWIN
3727 return supdrvOSSuspendVTxOnCpu();
3728#else
3729 return false;
3730#endif
3731}
3732
3733
3734/**
3735 * Resumes hardware virtualization extensions using the native OS API.
3736 *
3737 * This is called after to entering raw-mode context.
3738 *
3739 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3740 */
3741SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3742{
3743#ifdef RT_OS_DARWIN
3744 supdrvOSResumeVTxOnCpu(fSuspended);
3745#else
3746 Assert(!fSuspended);
3747#endif
3748}
3749
3750
3751/**
3752 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3753 *
3754 * @returns VBox status code.
3755 * @retval VERR_VMX_NO_VMX
3756 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3757 * @retval VERR_VMX_MSR_VMXON_DISABLED
3758 * @retval VERR_VMX_MSR_LOCKING_FAILED
3759 * @retval VERR_SVM_NO_SVM
3760 * @retval VERR_SVM_DISABLED
3761 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3762 * (centaur) CPU.
3763 *
3764 * @param pSession The session handle.
3765 * @param pfCaps Where to store the capabilities.
3766 */
3767SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3768{
3769 int rc = VERR_UNSUPPORTED_CPU;
3770 bool fIsSmxModeAmbiguous = false;
3771 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3772
3773 /*
3774 * Input validation.
3775 */
3776 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3777 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3778
3779 *pfCaps = 0;
3780 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3781 RTThreadPreemptDisable(&PreemptState);
3782 if (ASMHasCpuId())
3783 {
3784 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3785 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3786
3787 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3788 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3789
3790 if ( ASMIsValidStdRange(uMaxId)
3791 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3792 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3793 )
3794 {
3795 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3796 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3797 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3798 )
3799 {
3800 /** @todo Unify code with hmR0InitIntelCpu(). */
3801 uint64_t u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3802 bool const fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3803 bool fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3804 bool fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3805 bool fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3806
3807 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3808 if (fMsrLocked)
3809 {
3810 if (fVmxAllowed && fSmxVmxAllowed)
3811 rc = VINF_SUCCESS;
3812 else if (!fVmxAllowed && !fSmxVmxAllowed)
3813 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3814 else if (!fMaybeSmxMode)
3815 {
3816 if (fVmxAllowed)
3817 rc = VINF_SUCCESS;
3818 else
3819 rc = VERR_VMX_MSR_VMXON_DISABLED;
3820 }
3821 else
3822 {
3823 /*
3824 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3825 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3826 * See @bugref{6873}.
3827 */
3828 Assert(fMaybeSmxMode == true);
3829 fIsSmxModeAmbiguous = true;
3830 rc = VINF_SUCCESS;
3831 }
3832 }
3833 else
3834 {
3835 /*
3836 * MSR is not yet locked; we can change it ourselves here.
3837 * Once the lock bit is set, this MSR can no longer be modified.
3838 *
3839 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3840 * accurately. See @bugref{6873}.
3841 */
3842 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3843 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3844 | MSR_IA32_FEATURE_CONTROL_VMXON;
3845 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3846
3847 /* Verify. */
3848 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3849 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3850 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3851 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3852 if (fSmxVmxAllowed && fVmxAllowed)
3853 rc = VINF_SUCCESS;
3854 else
3855 rc = VERR_VMX_MSR_LOCKING_FAILED;
3856 }
3857
3858 if (rc == VINF_SUCCESS)
3859 {
3860 VMXCAPABILITY vtCaps;
3861
3862 *pfCaps |= SUPVTCAPS_VT_X;
3863
3864 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3865 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3866 {
3867 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3868 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3869 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3870 }
3871 }
3872 }
3873 else
3874 rc = VERR_VMX_NO_VMX;
3875 }
3876 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3877 && ASMIsValidStdRange(uMaxId))
3878 {
3879 uint32_t fExtFeaturesEcx, uExtMaxId;
3880 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3881 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3882 if ( ASMIsValidExtRange(uExtMaxId)
3883 && uExtMaxId >= 0x8000000a
3884 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3885 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3886 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3887 )
3888 {
3889 /* Check if SVM is disabled */
3890 uint64_t u64FeatMsr = ASMRdMsr(MSR_K8_VM_CR);
3891 if (!(u64FeatMsr & MSR_K8_VM_CR_SVM_DISABLE))
3892 {
3893 uint32_t fSvmFeatures;
3894 *pfCaps |= SUPVTCAPS_AMD_V;
3895
3896 /* Query AMD-V features. */
3897 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3898 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3899 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3900
3901 rc = VINF_SUCCESS;
3902 }
3903 else
3904 rc = VERR_SVM_DISABLED;
3905 }
3906 else
3907 rc = VERR_SVM_NO_SVM;
3908 }
3909 }
3910
3911 RTThreadPreemptRestore(&PreemptState);
3912 if (fIsSmxModeAmbiguous)
3913 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3914 return rc;
3915}
3916
3917
3918/**
3919 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3920 * updating.
3921 *
3922 * @param pGipCpu The per CPU structure for this CPU.
3923 * @param u64NanoTS The current time.
3924 */
3925static void supdrvGipReInitCpu(PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3926{
3927 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
3928 pGipCpu->u64NanoTS = u64NanoTS;
3929}
3930
3931
3932/**
3933 * Set the current TSC and NanoTS value for the CPU.
3934 *
3935 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
3936 * @param pvUser1 Pointer to the ring-0 GIP mapping.
3937 * @param pvUser2 Pointer to the variable holding the current time.
3938 */
3939static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3940{
3941 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
3942 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
3943
3944 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
3945 supdrvGipReInitCpu(&pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
3946
3947 NOREF(pvUser2);
3948 NOREF(idCpu);
3949}
3950
3951
3952/**
3953 * Maps the GIP into userspace and/or get the physical address of the GIP.
3954 *
3955 * @returns IPRT status code.
3956 * @param pSession Session to which the GIP mapping should belong.
3957 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
3958 * @param pHCPhysGip Where to store the physical address. (optional)
3959 *
3960 * @remark There is no reference counting on the mapping, so one call to this function
3961 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
3962 * and remove the session as a GIP user.
3963 */
3964SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
3965{
3966 int rc;
3967 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
3968 RTR3PTR pGipR3 = NIL_RTR3PTR;
3969 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3970 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
3971
3972 /*
3973 * Validate
3974 */
3975 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3976 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
3977 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
3978
3979#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3980 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
3981#else
3982 RTSemFastMutexRequest(pDevExt->mtxGip);
3983#endif
3984 if (pDevExt->pGip)
3985 {
3986 /*
3987 * Map it?
3988 */
3989 rc = VINF_SUCCESS;
3990 if (ppGipR3)
3991 {
3992 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
3993 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
3994 RTMEM_PROT_READ, RTR0ProcHandleSelf());
3995 if (RT_SUCCESS(rc))
3996 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
3997 }
3998
3999 /*
4000 * Get physical address.
4001 */
4002 if (pHCPhysGip && RT_SUCCESS(rc))
4003 HCPhys = pDevExt->HCPhysGip;
4004
4005 /*
4006 * Reference globally.
4007 */
4008 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4009 {
4010 pSession->fGipReferenced = 1;
4011 pDevExt->cGipUsers++;
4012 if (pDevExt->cGipUsers == 1)
4013 {
4014 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4015 uint64_t u64NanoTS;
4016 uint32_t u32SystemResolution;
4017 unsigned i;
4018
4019 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4020
4021 /*
4022 * Try bump up the system timer resolution.
4023 * The more interrupts the better...
4024 */
4025 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
4026 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
4027 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
4028 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
4029 )
4030 {
4031 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
4032 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
4033 }
4034
4035 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4036 {
4037 for (i = 0; i < pGipR0->cCpus; i++)
4038 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4039 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4040 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4041 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4042 }
4043
4044 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4045 if ( pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4046 || RTMpGetOnlineCount() == 1)
4047 supdrvGipReInitCpu(&pGipR0->aCPUs[0], u64NanoTS);
4048 else
4049 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4050
4051#ifndef DO_NOT_START_GIP
4052 rc = RTTimerStart(pDevExt->pGipTimer, 0); AssertRC(rc);
4053#endif
4054 rc = VINF_SUCCESS;
4055 }
4056 }
4057 }
4058 else
4059 {
4060 rc = VERR_GENERAL_FAILURE;
4061 Log(("SUPR0GipMap: GIP is not available!\n"));
4062 }
4063#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4064 RTSemMutexRelease(pDevExt->mtxGip);
4065#else
4066 RTSemFastMutexRelease(pDevExt->mtxGip);
4067#endif
4068
4069 /*
4070 * Write returns.
4071 */
4072 if (pHCPhysGip)
4073 *pHCPhysGip = HCPhys;
4074 if (ppGipR3)
4075 *ppGipR3 = pGipR3;
4076
4077#ifdef DEBUG_DARWIN_GIP
4078 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4079#else
4080 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4081#endif
4082 return rc;
4083}
4084
4085
4086/**
4087 * Unmaps any user mapping of the GIP and terminates all GIP access
4088 * from this session.
4089 *
4090 * @returns IPRT status code.
4091 * @param pSession Session to which the GIP mapping should belong.
4092 */
4093SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4094{
4095 int rc = VINF_SUCCESS;
4096 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4097#ifdef DEBUG_DARWIN_GIP
4098 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4099 pSession,
4100 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4101 pSession->GipMapObjR3));
4102#else
4103 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4104#endif
4105 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4106
4107#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4108 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4109#else
4110 RTSemFastMutexRequest(pDevExt->mtxGip);
4111#endif
4112
4113 /*
4114 * Unmap anything?
4115 */
4116 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4117 {
4118 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4119 AssertRC(rc);
4120 if (RT_SUCCESS(rc))
4121 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4122 }
4123
4124 /*
4125 * Dereference global GIP.
4126 */
4127 if (pSession->fGipReferenced && !rc)
4128 {
4129 pSession->fGipReferenced = 0;
4130 if ( pDevExt->cGipUsers > 0
4131 && !--pDevExt->cGipUsers)
4132 {
4133 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4134#ifndef DO_NOT_START_GIP
4135 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4136#endif
4137
4138 if (pDevExt->u32SystemTimerGranularityGrant)
4139 {
4140 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
4141 AssertRC(rc2);
4142 pDevExt->u32SystemTimerGranularityGrant = 0;
4143 }
4144 }
4145 }
4146
4147#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4148 RTSemMutexRelease(pDevExt->mtxGip);
4149#else
4150 RTSemFastMutexRelease(pDevExt->mtxGip);
4151#endif
4152
4153 return rc;
4154}
4155
4156
4157/**
4158 * Gets the GIP pointer.
4159 *
4160 * @returns Pointer to the GIP or NULL.
4161 */
4162SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4163{
4164 return g_pSUPGlobalInfoPage;
4165}
4166
4167
4168/**
4169 * Register a component factory with the support driver.
4170 *
4171 * This is currently restricted to kernel sessions only.
4172 *
4173 * @returns VBox status code.
4174 * @retval VINF_SUCCESS on success.
4175 * @retval VERR_NO_MEMORY if we're out of memory.
4176 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4177 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4178 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4179 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4180 *
4181 * @param pSession The SUPDRV session (must be a ring-0 session).
4182 * @param pFactory Pointer to the component factory registration structure.
4183 *
4184 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4185 */
4186SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4187{
4188 PSUPDRVFACTORYREG pNewReg;
4189 const char *psz;
4190 int rc;
4191
4192 /*
4193 * Validate parameters.
4194 */
4195 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4196 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4197 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4198 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4199 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4200 AssertReturn(psz, VERR_INVALID_PARAMETER);
4201
4202 /*
4203 * Allocate and initialize a new registration structure.
4204 */
4205 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4206 if (pNewReg)
4207 {
4208 pNewReg->pNext = NULL;
4209 pNewReg->pFactory = pFactory;
4210 pNewReg->pSession = pSession;
4211 pNewReg->cchName = psz - &pFactory->szName[0];
4212
4213 /*
4214 * Add it to the tail of the list after checking for prior registration.
4215 */
4216 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4217 if (RT_SUCCESS(rc))
4218 {
4219 PSUPDRVFACTORYREG pPrev = NULL;
4220 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4221 while (pCur && pCur->pFactory != pFactory)
4222 {
4223 pPrev = pCur;
4224 pCur = pCur->pNext;
4225 }
4226 if (!pCur)
4227 {
4228 if (pPrev)
4229 pPrev->pNext = pNewReg;
4230 else
4231 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4232 rc = VINF_SUCCESS;
4233 }
4234 else
4235 rc = VERR_ALREADY_EXISTS;
4236
4237 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4238 }
4239
4240 if (RT_FAILURE(rc))
4241 RTMemFree(pNewReg);
4242 }
4243 else
4244 rc = VERR_NO_MEMORY;
4245 return rc;
4246}
4247
4248
4249/**
4250 * Deregister a component factory.
4251 *
4252 * @returns VBox status code.
4253 * @retval VINF_SUCCESS on success.
4254 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4255 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4256 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4257 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4258 *
4259 * @param pSession The SUPDRV session (must be a ring-0 session).
4260 * @param pFactory Pointer to the component factory registration structure
4261 * previously passed SUPR0ComponentRegisterFactory().
4262 *
4263 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4264 */
4265SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4266{
4267 int rc;
4268
4269 /*
4270 * Validate parameters.
4271 */
4272 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4273 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4274 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4275
4276 /*
4277 * Take the lock and look for the registration record.
4278 */
4279 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4280 if (RT_SUCCESS(rc))
4281 {
4282 PSUPDRVFACTORYREG pPrev = NULL;
4283 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4284 while (pCur && pCur->pFactory != pFactory)
4285 {
4286 pPrev = pCur;
4287 pCur = pCur->pNext;
4288 }
4289 if (pCur)
4290 {
4291 if (!pPrev)
4292 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4293 else
4294 pPrev->pNext = pCur->pNext;
4295
4296 pCur->pNext = NULL;
4297 pCur->pFactory = NULL;
4298 pCur->pSession = NULL;
4299 rc = VINF_SUCCESS;
4300 }
4301 else
4302 rc = VERR_NOT_FOUND;
4303
4304 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4305
4306 RTMemFree(pCur);
4307 }
4308 return rc;
4309}
4310
4311
4312/**
4313 * Queries a component factory.
4314 *
4315 * @returns VBox status code.
4316 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4317 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4318 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4319 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4320 *
4321 * @param pSession The SUPDRV session.
4322 * @param pszName The name of the component factory.
4323 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4324 * @param ppvFactoryIf Where to store the factory interface.
4325 */
4326SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4327{
4328 const char *pszEnd;
4329 size_t cchName;
4330 int rc;
4331
4332 /*
4333 * Validate parameters.
4334 */
4335 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4336
4337 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4338 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4339 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4340 cchName = pszEnd - pszName;
4341
4342 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4343 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4344 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4345
4346 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4347 *ppvFactoryIf = NULL;
4348
4349 /*
4350 * Take the lock and try all factories by this name.
4351 */
4352 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4353 if (RT_SUCCESS(rc))
4354 {
4355 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4356 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4357 while (pCur)
4358 {
4359 if ( pCur->cchName == cchName
4360 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4361 {
4362 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4363 if (pvFactory)
4364 {
4365 *ppvFactoryIf = pvFactory;
4366 rc = VINF_SUCCESS;
4367 break;
4368 }
4369 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4370 }
4371
4372 /* next */
4373 pCur = pCur->pNext;
4374 }
4375
4376 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4377 }
4378 return rc;
4379}
4380
4381
4382/**
4383 * Adds a memory object to the session.
4384 *
4385 * @returns IPRT status code.
4386 * @param pMem Memory tracking structure containing the
4387 * information to track.
4388 * @param pSession The session.
4389 */
4390static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4391{
4392 PSUPDRVBUNDLE pBundle;
4393
4394 /*
4395 * Find free entry and record the allocation.
4396 */
4397 RTSpinlockAcquire(pSession->Spinlock);
4398 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4399 {
4400 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4401 {
4402 unsigned i;
4403 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4404 {
4405 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4406 {
4407 pBundle->cUsed++;
4408 pBundle->aMem[i] = *pMem;
4409 RTSpinlockRelease(pSession->Spinlock);
4410 return VINF_SUCCESS;
4411 }
4412 }
4413 AssertFailed(); /* !!this can't be happening!!! */
4414 }
4415 }
4416 RTSpinlockRelease(pSession->Spinlock);
4417
4418 /*
4419 * Need to allocate a new bundle.
4420 * Insert into the last entry in the bundle.
4421 */
4422 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4423 if (!pBundle)
4424 return VERR_NO_MEMORY;
4425
4426 /* take last entry. */
4427 pBundle->cUsed++;
4428 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4429
4430 /* insert into list. */
4431 RTSpinlockAcquire(pSession->Spinlock);
4432 pBundle->pNext = pSession->Bundle.pNext;
4433 pSession->Bundle.pNext = pBundle;
4434 RTSpinlockRelease(pSession->Spinlock);
4435
4436 return VINF_SUCCESS;
4437}
4438
4439
4440/**
4441 * Releases a memory object referenced by pointer and type.
4442 *
4443 * @returns IPRT status code.
4444 * @param pSession Session data.
4445 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4446 * @param eType Memory type.
4447 */
4448static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4449{
4450 PSUPDRVBUNDLE pBundle;
4451
4452 /*
4453 * Validate input.
4454 */
4455 if (!uPtr)
4456 {
4457 Log(("Illegal address %p\n", (void *)uPtr));
4458 return VERR_INVALID_PARAMETER;
4459 }
4460
4461 /*
4462 * Search for the address.
4463 */
4464 RTSpinlockAcquire(pSession->Spinlock);
4465 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4466 {
4467 if (pBundle->cUsed > 0)
4468 {
4469 unsigned i;
4470 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4471 {
4472 if ( pBundle->aMem[i].eType == eType
4473 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4474 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4475 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4476 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4477 )
4478 {
4479 /* Make a copy of it and release it outside the spinlock. */
4480 SUPDRVMEMREF Mem = pBundle->aMem[i];
4481 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4482 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4483 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4484 RTSpinlockRelease(pSession->Spinlock);
4485
4486 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4487 {
4488 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4489 AssertRC(rc); /** @todo figure out how to handle this. */
4490 }
4491 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4492 {
4493 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4494 AssertRC(rc); /** @todo figure out how to handle this. */
4495 }
4496 return VINF_SUCCESS;
4497 }
4498 }
4499 }
4500 }
4501 RTSpinlockRelease(pSession->Spinlock);
4502 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4503 return VERR_INVALID_PARAMETER;
4504}
4505
4506
4507/**
4508 * Opens an image. If it's the first time it's opened the call must upload
4509 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4510 *
4511 * This is the 1st step of the loading.
4512 *
4513 * @returns IPRT status code.
4514 * @param pDevExt Device globals.
4515 * @param pSession Session data.
4516 * @param pReq The open request.
4517 */
4518static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4519{
4520 int rc;
4521 PSUPDRVLDRIMAGE pImage;
4522 void *pv;
4523 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4524 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4525
4526 /*
4527 * Check if we got an instance of the image already.
4528 */
4529 supdrvLdrLock(pDevExt);
4530 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4531 {
4532 if ( pImage->szName[cchName] == '\0'
4533 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4534 {
4535 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4536 {
4537 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4538 pImage->cUsage++;
4539 pReq->u.Out.pvImageBase = pImage->pvImage;
4540 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4541 pReq->u.Out.fNativeLoader = pImage->fNative;
4542 supdrvLdrAddUsage(pSession, pImage);
4543 supdrvLdrUnlock(pDevExt);
4544 return VINF_SUCCESS;
4545 }
4546 supdrvLdrUnlock(pDevExt);
4547 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4548 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4549 }
4550 }
4551 /* (not found - add it!) */
4552
4553 /*
4554 * Allocate memory.
4555 */
4556 Assert(cchName < sizeof(pImage->szName));
4557 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4558 if (!pv)
4559 {
4560 supdrvLdrUnlock(pDevExt);
4561 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4562 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4563 }
4564
4565 /*
4566 * Setup and link in the LDR stuff.
4567 */
4568 pImage = (PSUPDRVLDRIMAGE)pv;
4569 pImage->pvImage = NULL;
4570 pImage->pvImageAlloc = NULL;
4571 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4572 pImage->cbImageBits = pReq->u.In.cbImageBits;
4573 pImage->cSymbols = 0;
4574 pImage->paSymbols = NULL;
4575 pImage->pachStrTab = NULL;
4576 pImage->cbStrTab = 0;
4577 pImage->pfnModuleInit = NULL;
4578 pImage->pfnModuleTerm = NULL;
4579 pImage->pfnServiceReqHandler = NULL;
4580 pImage->uState = SUP_IOCTL_LDR_OPEN;
4581 pImage->cUsage = 1;
4582 pImage->pDevExt = pDevExt;
4583 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4584
4585 /*
4586 * Try load it using the native loader, if that isn't supported, fall back
4587 * on the older method.
4588 */
4589 pImage->fNative = true;
4590 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4591 if (rc == VERR_NOT_SUPPORTED)
4592 {
4593 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4594 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4595 pImage->fNative = false;
4596 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4597 }
4598 if (RT_FAILURE(rc))
4599 {
4600 supdrvLdrUnlock(pDevExt);
4601 RTMemFree(pImage);
4602 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4603 return rc;
4604 }
4605 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4606
4607 /*
4608 * Link it.
4609 */
4610 pImage->pNext = pDevExt->pLdrImages;
4611 pDevExt->pLdrImages = pImage;
4612
4613 supdrvLdrAddUsage(pSession, pImage);
4614
4615 pReq->u.Out.pvImageBase = pImage->pvImage;
4616 pReq->u.Out.fNeedsLoading = true;
4617 pReq->u.Out.fNativeLoader = pImage->fNative;
4618 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4619
4620 supdrvLdrUnlock(pDevExt);
4621 return VINF_SUCCESS;
4622}
4623
4624
4625/**
4626 * Worker that validates a pointer to an image entrypoint.
4627 *
4628 * @returns IPRT status code.
4629 * @param pDevExt The device globals.
4630 * @param pImage The loader image.
4631 * @param pv The pointer into the image.
4632 * @param fMayBeNull Whether it may be NULL.
4633 * @param pszWhat What is this entrypoint? (for logging)
4634 * @param pbImageBits The image bits prepared by ring-3.
4635 *
4636 * @remarks Will leave the lock on failure.
4637 */
4638static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4639 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4640{
4641 if (!fMayBeNull || pv)
4642 {
4643 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4644 {
4645 supdrvLdrUnlock(pDevExt);
4646 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4647 return VERR_INVALID_PARAMETER;
4648 }
4649
4650 if (pImage->fNative)
4651 {
4652 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4653 if (RT_FAILURE(rc))
4654 {
4655 supdrvLdrUnlock(pDevExt);
4656 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4657 return rc;
4658 }
4659 }
4660 }
4661 return VINF_SUCCESS;
4662}
4663
4664
4665/**
4666 * Loads the image bits.
4667 *
4668 * This is the 2nd step of the loading.
4669 *
4670 * @returns IPRT status code.
4671 * @param pDevExt Device globals.
4672 * @param pSession Session data.
4673 * @param pReq The request.
4674 */
4675static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
4676{
4677 PSUPDRVLDRUSAGE pUsage;
4678 PSUPDRVLDRIMAGE pImage;
4679 int rc;
4680 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
4681
4682 /*
4683 * Find the ldr image.
4684 */
4685 supdrvLdrLock(pDevExt);
4686 pUsage = pSession->pLdrUsage;
4687 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4688 pUsage = pUsage->pNext;
4689 if (!pUsage)
4690 {
4691 supdrvLdrUnlock(pDevExt);
4692 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
4693 return VERR_INVALID_HANDLE;
4694 }
4695 pImage = pUsage->pImage;
4696
4697 /*
4698 * Validate input.
4699 */
4700 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
4701 || pImage->cbImageBits != pReq->u.In.cbImageBits)
4702 {
4703 supdrvLdrUnlock(pDevExt);
4704 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
4705 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
4706 return VERR_INVALID_HANDLE;
4707 }
4708
4709 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
4710 {
4711 unsigned uState = pImage->uState;
4712 supdrvLdrUnlock(pDevExt);
4713 if (uState != SUP_IOCTL_LDR_LOAD)
4714 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
4715 return VERR_ALREADY_LOADED;
4716 }
4717
4718 switch (pReq->u.In.eEPType)
4719 {
4720 case SUPLDRLOADEP_NOTHING:
4721 break;
4722
4723 case SUPLDRLOADEP_VMMR0:
4724 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
4725 if (RT_SUCCESS(rc))
4726 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
4727 if (RT_SUCCESS(rc))
4728 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
4729 if (RT_SUCCESS(rc))
4730 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
4731 if (RT_FAILURE(rc))
4732 return rc;
4733 break;
4734
4735 case SUPLDRLOADEP_SERVICE:
4736 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
4737 if (RT_FAILURE(rc))
4738 return rc;
4739 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
4740 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
4741 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
4742 {
4743 supdrvLdrUnlock(pDevExt);
4744 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
4745 pImage->pvImage, pReq->u.In.cbImageWithTabs,
4746 pReq->u.In.EP.Service.apvReserved[0],
4747 pReq->u.In.EP.Service.apvReserved[1],
4748 pReq->u.In.EP.Service.apvReserved[2]));
4749 return VERR_INVALID_PARAMETER;
4750 }
4751 break;
4752
4753 default:
4754 supdrvLdrUnlock(pDevExt);
4755 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
4756 return VERR_INVALID_PARAMETER;
4757 }
4758
4759 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
4760 if (RT_FAILURE(rc))
4761 return rc;
4762 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
4763 if (RT_FAILURE(rc))
4764 return rc;
4765
4766 /*
4767 * Allocate and copy the tables.
4768 * (No need to do try/except as this is a buffered request.)
4769 */
4770 pImage->cbStrTab = pReq->u.In.cbStrTab;
4771 if (pImage->cbStrTab)
4772 {
4773 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
4774 if (pImage->pachStrTab)
4775 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
4776 else
4777 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
4778 }
4779
4780 pImage->cSymbols = pReq->u.In.cSymbols;
4781 if (RT_SUCCESS(rc) && pImage->cSymbols)
4782 {
4783 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
4784 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
4785 if (pImage->paSymbols)
4786 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
4787 else
4788 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
4789 }
4790
4791 /*
4792 * Copy the bits / complete native loading.
4793 */
4794 if (RT_SUCCESS(rc))
4795 {
4796 pImage->uState = SUP_IOCTL_LDR_LOAD;
4797 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
4798 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
4799
4800 if (pImage->fNative)
4801 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
4802 else
4803 {
4804 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
4805 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
4806 }
4807 }
4808
4809 /*
4810 * Update any entry points.
4811 */
4812 if (RT_SUCCESS(rc))
4813 {
4814 switch (pReq->u.In.eEPType)
4815 {
4816 default:
4817 case SUPLDRLOADEP_NOTHING:
4818 rc = VINF_SUCCESS;
4819 break;
4820 case SUPLDRLOADEP_VMMR0:
4821 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
4822 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
4823 break;
4824 case SUPLDRLOADEP_SERVICE:
4825 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
4826 rc = VINF_SUCCESS;
4827 break;
4828 }
4829 }
4830
4831 /*
4832 * On success call the module initialization.
4833 */
4834 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
4835 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
4836 {
4837 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
4838 pDevExt->pLdrInitImage = pImage;
4839 pDevExt->hLdrInitThread = RTThreadNativeSelf();
4840 rc = pImage->pfnModuleInit(pImage);
4841 pDevExt->pLdrInitImage = NULL;
4842 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
4843 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
4844 supdrvLdrUnsetVMMR0EPs(pDevExt);
4845 }
4846 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
4847
4848 if (RT_FAILURE(rc))
4849 {
4850 /* Inform the tracing component in case ModuleInit registered TPs. */
4851 supdrvTracerModuleUnloading(pDevExt, pImage);
4852
4853 pImage->uState = SUP_IOCTL_LDR_OPEN;
4854 pImage->pfnModuleInit = NULL;
4855 pImage->pfnModuleTerm = NULL;
4856 pImage->pfnServiceReqHandler= NULL;
4857 pImage->cbStrTab = 0;
4858 RTMemFree(pImage->pachStrTab);
4859 pImage->pachStrTab = NULL;
4860 RTMemFree(pImage->paSymbols);
4861 pImage->paSymbols = NULL;
4862 pImage->cSymbols = 0;
4863 }
4864
4865 supdrvLdrUnlock(pDevExt);
4866 return rc;
4867}
4868
4869
4870/**
4871 * Frees a previously loaded (prep'ed) image.
4872 *
4873 * @returns IPRT status code.
4874 * @param pDevExt Device globals.
4875 * @param pSession Session data.
4876 * @param pReq The request.
4877 */
4878static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
4879{
4880 int rc;
4881 PSUPDRVLDRUSAGE pUsagePrev;
4882 PSUPDRVLDRUSAGE pUsage;
4883 PSUPDRVLDRIMAGE pImage;
4884 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
4885
4886 /*
4887 * Find the ldr image.
4888 */
4889 supdrvLdrLock(pDevExt);
4890 pUsagePrev = NULL;
4891 pUsage = pSession->pLdrUsage;
4892 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4893 {
4894 pUsagePrev = pUsage;
4895 pUsage = pUsage->pNext;
4896 }
4897 if (!pUsage)
4898 {
4899 supdrvLdrUnlock(pDevExt);
4900 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
4901 return VERR_INVALID_HANDLE;
4902 }
4903
4904 /*
4905 * Check if we can remove anything.
4906 */
4907 rc = VINF_SUCCESS;
4908 pImage = pUsage->pImage;
4909 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
4910 {
4911 /*
4912 * Check if there are any objects with destructors in the image, if
4913 * so leave it for the session cleanup routine so we get a chance to
4914 * clean things up in the right order and not leave them all dangling.
4915 */
4916 RTSpinlockAcquire(pDevExt->Spinlock);
4917 if (pImage->cUsage <= 1)
4918 {
4919 PSUPDRVOBJ pObj;
4920 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
4921 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4922 {
4923 rc = VERR_DANGLING_OBJECTS;
4924 break;
4925 }
4926 }
4927 else
4928 {
4929 PSUPDRVUSAGE pGenUsage;
4930 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
4931 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4932 {
4933 rc = VERR_DANGLING_OBJECTS;
4934 break;
4935 }
4936 }
4937 RTSpinlockRelease(pDevExt->Spinlock);
4938 if (rc == VINF_SUCCESS)
4939 {
4940 /* unlink it */
4941 if (pUsagePrev)
4942 pUsagePrev->pNext = pUsage->pNext;
4943 else
4944 pSession->pLdrUsage = pUsage->pNext;
4945
4946 /* free it */
4947 pUsage->pImage = NULL;
4948 pUsage->pNext = NULL;
4949 RTMemFree(pUsage);
4950
4951 /*
4952 * Dereference the image.
4953 */
4954 if (pImage->cUsage <= 1)
4955 supdrvLdrFree(pDevExt, pImage);
4956 else
4957 pImage->cUsage--;
4958 }
4959 else
4960 {
4961 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
4962 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
4963 }
4964 }
4965 else
4966 {
4967 /*
4968 * Dereference both image and usage.
4969 */
4970 pImage->cUsage--;
4971 pUsage->cUsage--;
4972 }
4973
4974 supdrvLdrUnlock(pDevExt);
4975 return rc;
4976}
4977
4978
4979/**
4980 * Gets the address of a symbol in an open image.
4981 *
4982 * @returns IPRT status code.
4983 * @param pDevExt Device globals.
4984 * @param pSession Session data.
4985 * @param pReq The request buffer.
4986 */
4987static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
4988{
4989 PSUPDRVLDRIMAGE pImage;
4990 PSUPDRVLDRUSAGE pUsage;
4991 uint32_t i;
4992 PSUPLDRSYM paSyms;
4993 const char *pchStrings;
4994 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
4995 void *pvSymbol = NULL;
4996 int rc = VERR_GENERAL_FAILURE;
4997 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
4998
4999 /*
5000 * Find the ldr image.
5001 */
5002 supdrvLdrLock(pDevExt);
5003 pUsage = pSession->pLdrUsage;
5004 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5005 pUsage = pUsage->pNext;
5006 if (!pUsage)
5007 {
5008 supdrvLdrUnlock(pDevExt);
5009 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5010 return VERR_INVALID_HANDLE;
5011 }
5012 pImage = pUsage->pImage;
5013 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5014 {
5015 unsigned uState = pImage->uState;
5016 supdrvLdrUnlock(pDevExt);
5017 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5018 return VERR_ALREADY_LOADED;
5019 }
5020
5021 /*
5022 * Search the symbol strings.
5023 *
5024 * Note! The int32_t is for native loading on solaris where the data
5025 * and text segments are in very different places.
5026 */
5027 pchStrings = pImage->pachStrTab;
5028 paSyms = pImage->paSymbols;
5029 for (i = 0; i < pImage->cSymbols; i++)
5030 {
5031 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5032 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5033 {
5034 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5035 rc = VINF_SUCCESS;
5036 break;
5037 }
5038 }
5039 supdrvLdrUnlock(pDevExt);
5040 pReq->u.Out.pvSymbol = pvSymbol;
5041 return rc;
5042}
5043
5044
5045/**
5046 * Gets the address of a symbol in an open image or the support driver.
5047 *
5048 * @returns VINF_SUCCESS on success.
5049 * @returns
5050 * @param pDevExt Device globals.
5051 * @param pSession Session data.
5052 * @param pReq The request buffer.
5053 */
5054static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5055{
5056 int rc = VINF_SUCCESS;
5057 const char *pszSymbol = pReq->u.In.pszSymbol;
5058 const char *pszModule = pReq->u.In.pszModule;
5059 size_t cbSymbol;
5060 char const *pszEnd;
5061 uint32_t i;
5062
5063 /*
5064 * Input validation.
5065 */
5066 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5067 pszEnd = RTStrEnd(pszSymbol, 512);
5068 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5069 cbSymbol = pszEnd - pszSymbol + 1;
5070
5071 if (pszModule)
5072 {
5073 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5074 pszEnd = RTStrEnd(pszModule, 64);
5075 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5076 }
5077 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5078
5079
5080 if ( !pszModule
5081 || !strcmp(pszModule, "SupDrv"))
5082 {
5083 /*
5084 * Search the support driver export table.
5085 */
5086 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5087 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5088 {
5089 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5090 break;
5091 }
5092 }
5093 else
5094 {
5095 /*
5096 * Find the loader image.
5097 */
5098 PSUPDRVLDRIMAGE pImage;
5099
5100 supdrvLdrLock(pDevExt);
5101
5102 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5103 if (!strcmp(pImage->szName, pszModule))
5104 break;
5105 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5106 {
5107 /*
5108 * Search the symbol strings.
5109 */
5110 const char *pchStrings = pImage->pachStrTab;
5111 PCSUPLDRSYM paSyms = pImage->paSymbols;
5112 for (i = 0; i < pImage->cSymbols; i++)
5113 {
5114 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5115 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5116 {
5117 /*
5118 * Found it! Calc the symbol address and add a reference to the module.
5119 */
5120 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5121 rc = supdrvLdrAddUsage(pSession, pImage);
5122 break;
5123 }
5124 }
5125 }
5126 else
5127 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5128
5129 supdrvLdrUnlock(pDevExt);
5130 }
5131 return rc;
5132}
5133
5134
5135/**
5136 * Updates the VMMR0 entry point pointers.
5137 *
5138 * @returns IPRT status code.
5139 * @param pDevExt Device globals.
5140 * @param pSession Session data.
5141 * @param pVMMR0 VMMR0 image handle.
5142 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5143 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5144 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5145 * @remark Caller must own the loader mutex.
5146 */
5147static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5148{
5149 int rc = VINF_SUCCESS;
5150 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5151
5152
5153 /*
5154 * Check if not yet set.
5155 */
5156 if (!pDevExt->pvVMMR0)
5157 {
5158 pDevExt->pvVMMR0 = pvVMMR0;
5159 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5160 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5161 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5162 }
5163 else
5164 {
5165 /*
5166 * Return failure or success depending on whether the values match or not.
5167 */
5168 if ( pDevExt->pvVMMR0 != pvVMMR0
5169 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5170 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5171 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5172 {
5173 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5174 rc = VERR_INVALID_PARAMETER;
5175 }
5176 }
5177 return rc;
5178}
5179
5180
5181/**
5182 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5183 *
5184 * @param pDevExt Device globals.
5185 */
5186static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5187{
5188 pDevExt->pvVMMR0 = NULL;
5189 pDevExt->pfnVMMR0EntryInt = NULL;
5190 pDevExt->pfnVMMR0EntryFast = NULL;
5191 pDevExt->pfnVMMR0EntryEx = NULL;
5192}
5193
5194
5195/**
5196 * Adds a usage reference in the specified session of an image.
5197 *
5198 * Called while owning the loader semaphore.
5199 *
5200 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5201 * @param pSession Session in question.
5202 * @param pImage Image which the session is using.
5203 */
5204static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5205{
5206 PSUPDRVLDRUSAGE pUsage;
5207 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5208
5209 /*
5210 * Referenced it already?
5211 */
5212 pUsage = pSession->pLdrUsage;
5213 while (pUsage)
5214 {
5215 if (pUsage->pImage == pImage)
5216 {
5217 pUsage->cUsage++;
5218 return VINF_SUCCESS;
5219 }
5220 pUsage = pUsage->pNext;
5221 }
5222
5223 /*
5224 * Allocate new usage record.
5225 */
5226 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5227 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5228 pUsage->cUsage = 1;
5229 pUsage->pImage = pImage;
5230 pUsage->pNext = pSession->pLdrUsage;
5231 pSession->pLdrUsage = pUsage;
5232 return VINF_SUCCESS;
5233}
5234
5235
5236/**
5237 * Frees a load image.
5238 *
5239 * @param pDevExt Pointer to device extension.
5240 * @param pImage Pointer to the image we're gonna free.
5241 * This image must exit!
5242 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5243 */
5244static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5245{
5246 PSUPDRVLDRIMAGE pImagePrev;
5247 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5248
5249 /* find it - arg. should've used doubly linked list. */
5250 Assert(pDevExt->pLdrImages);
5251 pImagePrev = NULL;
5252 if (pDevExt->pLdrImages != pImage)
5253 {
5254 pImagePrev = pDevExt->pLdrImages;
5255 while (pImagePrev->pNext != pImage)
5256 pImagePrev = pImagePrev->pNext;
5257 Assert(pImagePrev->pNext == pImage);
5258 }
5259
5260 /* unlink */
5261 if (pImagePrev)
5262 pImagePrev->pNext = pImage->pNext;
5263 else
5264 pDevExt->pLdrImages = pImage->pNext;
5265
5266 /* check if this is VMMR0.r0 unset its entry point pointers. */
5267 if (pDevExt->pvVMMR0 == pImage->pvImage)
5268 supdrvLdrUnsetVMMR0EPs(pDevExt);
5269
5270 /* check for objects with destructors in this image. (Shouldn't happen.) */
5271 if (pDevExt->pObjs)
5272 {
5273 unsigned cObjs = 0;
5274 PSUPDRVOBJ pObj;
5275 RTSpinlockAcquire(pDevExt->Spinlock);
5276 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5277 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5278 {
5279 pObj->pfnDestructor = NULL;
5280 cObjs++;
5281 }
5282 RTSpinlockRelease(pDevExt->Spinlock);
5283 if (cObjs)
5284 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5285 }
5286
5287 /* call termination function if fully loaded. */
5288 if ( pImage->pfnModuleTerm
5289 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5290 {
5291 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5292 pImage->pfnModuleTerm(pImage);
5293 }
5294
5295 /* Inform the tracing component. */
5296 supdrvTracerModuleUnloading(pDevExt, pImage);
5297
5298 /* do native unload if appropriate. */
5299 if (pImage->fNative)
5300 supdrvOSLdrUnload(pDevExt, pImage);
5301
5302 /* free the image */
5303 pImage->cUsage = 0;
5304 pImage->pDevExt = NULL;
5305 pImage->pNext = NULL;
5306 pImage->uState = SUP_IOCTL_LDR_FREE;
5307 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5308 pImage->pvImageAlloc = NULL;
5309 RTMemFree(pImage->pachStrTab);
5310 pImage->pachStrTab = NULL;
5311 RTMemFree(pImage->paSymbols);
5312 pImage->paSymbols = NULL;
5313 RTMemFree(pImage);
5314}
5315
5316
5317/**
5318 * Acquires the loader lock.
5319 *
5320 * @returns IPRT status code.
5321 * @param pDevExt The device extension.
5322 */
5323DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5324{
5325#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5326 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5327#else
5328 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5329#endif
5330 AssertRC(rc);
5331 return rc;
5332}
5333
5334
5335/**
5336 * Releases the loader lock.
5337 *
5338 * @returns IPRT status code.
5339 * @param pDevExt The device extension.
5340 */
5341DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5342{
5343#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5344 return RTSemMutexRelease(pDevExt->mtxLdr);
5345#else
5346 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5347#endif
5348}
5349
5350
5351/**
5352 * Implements the service call request.
5353 *
5354 * @returns VBox status code.
5355 * @param pDevExt The device extension.
5356 * @param pSession The calling session.
5357 * @param pReq The request packet, valid.
5358 */
5359static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5360{
5361#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5362 int rc;
5363
5364 /*
5365 * Find the module first in the module referenced by the calling session.
5366 */
5367 rc = supdrvLdrLock(pDevExt);
5368 if (RT_SUCCESS(rc))
5369 {
5370 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5371 PSUPDRVLDRUSAGE pUsage;
5372
5373 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5374 if ( pUsage->pImage->pfnServiceReqHandler
5375 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5376 {
5377 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5378 break;
5379 }
5380 supdrvLdrUnlock(pDevExt);
5381
5382 if (pfnServiceReqHandler)
5383 {
5384 /*
5385 * Call it.
5386 */
5387 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5388 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5389 else
5390 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5391 }
5392 else
5393 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5394 }
5395
5396 /* log it */
5397 if ( RT_FAILURE(rc)
5398 && rc != VERR_INTERRUPTED
5399 && rc != VERR_TIMEOUT)
5400 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5401 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5402 else
5403 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5404 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5405 return rc;
5406#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5407 return VERR_NOT_IMPLEMENTED;
5408#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5409}
5410
5411
5412/**
5413 * Implements the logger settings request.
5414 *
5415 * @returns VBox status code.
5416 * @param pDevExt The device extension.
5417 * @param pSession The caller's session.
5418 * @param pReq The request.
5419 */
5420static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5421{
5422 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5423 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5424 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5425 PRTLOGGER pLogger = NULL;
5426 int rc;
5427
5428 /*
5429 * Some further validation.
5430 */
5431 switch (pReq->u.In.fWhat)
5432 {
5433 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5434 case SUPLOGGERSETTINGS_WHAT_CREATE:
5435 break;
5436
5437 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5438 if (*pszGroup || *pszFlags || *pszDest)
5439 return VERR_INVALID_PARAMETER;
5440 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5441 return VERR_ACCESS_DENIED;
5442 break;
5443
5444 default:
5445 return VERR_INTERNAL_ERROR;
5446 }
5447
5448 /*
5449 * Get the logger.
5450 */
5451 switch (pReq->u.In.fWhich)
5452 {
5453 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5454 pLogger = RTLogGetDefaultInstance();
5455 break;
5456
5457 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5458 pLogger = RTLogRelDefaultInstance();
5459 break;
5460
5461 default:
5462 return VERR_INTERNAL_ERROR;
5463 }
5464
5465 /*
5466 * Do the job.
5467 */
5468 switch (pReq->u.In.fWhat)
5469 {
5470 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5471 if (pLogger)
5472 {
5473 rc = RTLogFlags(pLogger, pszFlags);
5474 if (RT_SUCCESS(rc))
5475 rc = RTLogGroupSettings(pLogger, pszGroup);
5476 NOREF(pszDest);
5477 }
5478 else
5479 rc = VERR_NOT_FOUND;
5480 break;
5481
5482 case SUPLOGGERSETTINGS_WHAT_CREATE:
5483 {
5484 if (pLogger)
5485 rc = VERR_ALREADY_EXISTS;
5486 else
5487 {
5488 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5489
5490 rc = RTLogCreate(&pLogger,
5491 0 /* fFlags */,
5492 pszGroup,
5493 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5494 ? "VBOX_LOG"
5495 : "VBOX_RELEASE_LOG",
5496 RT_ELEMENTS(s_apszGroups),
5497 s_apszGroups,
5498 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5499 NULL);
5500 if (RT_SUCCESS(rc))
5501 {
5502 rc = RTLogFlags(pLogger, pszFlags);
5503 NOREF(pszDest);
5504 if (RT_SUCCESS(rc))
5505 {
5506 switch (pReq->u.In.fWhich)
5507 {
5508 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5509 pLogger = RTLogSetDefaultInstance(pLogger);
5510 break;
5511 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5512 pLogger = RTLogRelSetDefaultInstance(pLogger);
5513 break;
5514 }
5515 }
5516 RTLogDestroy(pLogger);
5517 }
5518 }
5519 break;
5520 }
5521
5522 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5523 switch (pReq->u.In.fWhich)
5524 {
5525 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5526 pLogger = RTLogSetDefaultInstance(NULL);
5527 break;
5528 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5529 pLogger = RTLogRelSetDefaultInstance(NULL);
5530 break;
5531 }
5532 rc = RTLogDestroy(pLogger);
5533 break;
5534
5535 default:
5536 {
5537 rc = VERR_INTERNAL_ERROR;
5538 break;
5539 }
5540 }
5541
5542 return rc;
5543}
5544
5545
5546/**
5547 * Implements the MSR prober operations.
5548 *
5549 * @returns VBox status code.
5550 * @param pDevExt The device extension.
5551 * @param pReq The request.
5552 */
5553static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5554{
5555#ifdef SUPDRV_WITH_MSR_PROBER
5556 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5557 int rc;
5558
5559 switch (pReq->u.In.enmOp)
5560 {
5561 case SUPMSRPROBEROP_READ:
5562 {
5563 uint64_t uValue;
5564 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5565 if (RT_SUCCESS(rc))
5566 {
5567 pReq->u.Out.uResults.Read.uValue = uValue;
5568 pReq->u.Out.uResults.Read.fGp = false;
5569 }
5570 else if (rc == VERR_ACCESS_DENIED)
5571 {
5572 pReq->u.Out.uResults.Read.uValue = 0;
5573 pReq->u.Out.uResults.Read.fGp = true;
5574 rc = VINF_SUCCESS;
5575 }
5576 break;
5577 }
5578
5579 case SUPMSRPROBEROP_WRITE:
5580 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5581 if (RT_SUCCESS(rc))
5582 pReq->u.Out.uResults.Write.fGp = false;
5583 else if (rc == VERR_ACCESS_DENIED)
5584 {
5585 pReq->u.Out.uResults.Write.fGp = true;
5586 rc = VINF_SUCCESS;
5587 }
5588 break;
5589
5590 case SUPMSRPROBEROP_MODIFY:
5591 case SUPMSRPROBEROP_MODIFY_FASTER:
5592 rc = supdrvOSMsrProberModify(idCpu, pReq);
5593 break;
5594
5595 default:
5596 return VERR_INVALID_FUNCTION;
5597 }
5598 return rc;
5599#else
5600 return VERR_NOT_IMPLEMENTED;
5601#endif
5602}
5603
5604
5605#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5606/**
5607 * Switches the TSC-delta measurement thread into the butchered state.
5608 *
5609 * @returns VBox status code.
5610 * @param pDevExt Pointer to the device instance data.
5611 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5612 * @param pszFailed An error message to log.
5613 * @param rcFailed The error code to exit the thread with.
5614 */
5615static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5616{
5617 if (!fSpinlockHeld)
5618 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5619
5620 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Butchered;
5621 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5622 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5623 return rcFailed;
5624}
5625
5626
5627/**
5628 * The TSC-delta measurement thread.
5629 *
5630 * @returns VBox status code.
5631 * @param hThread The thread handle.
5632 * @param pvUser Opaque pointer to the device instance data.
5633 */
5634static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
5635{
5636 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
5637 static uint32_t cTimesMeasured = 0;
5638 uint32_t cConsecutiveTimeouts = 0;
5639 int rc = VERR_INTERNAL_ERROR_2;
5640 for (;;)
5641 {
5642 /*
5643 * Switch on the current state.
5644 */
5645 SUPDRVTSCDELTASTATE enmState;
5646 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5647 enmState = pDevExt->enmTscDeltaState;
5648 switch (enmState)
5649 {
5650 case kSupDrvTscDeltaState_Creating:
5651 {
5652 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5653 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5654 if (RT_FAILURE(rc))
5655 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5656 /* fall thru */
5657 }
5658
5659 case kSupDrvTscDeltaState_Listening:
5660 {
5661 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5662
5663 /* Simple adaptive timeout. */
5664 if (cConsecutiveTimeouts++ == 10)
5665 {
5666 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
5667 pDevExt->cMsTscDeltaTimeout = 10;
5668 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
5669 pDevExt->cMsTscDeltaTimeout = 100;
5670 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
5671 pDevExt->cMsTscDeltaTimeout = 500;
5672 cConsecutiveTimeouts = 0;
5673 }
5674 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
5675 if ( RT_FAILURE(rc)
5676 && rc != VERR_TIMEOUT)
5677 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
5678 break;
5679 }
5680
5681 case kSupDrvTscDeltaState_WaitAndMeasure:
5682 {
5683 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Measuring;
5684 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5685 if (RT_FAILURE(rc))
5686 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5687 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5688 pDevExt->cMsTscDeltaTimeout = 1;
5689 RTThreadSleep(10);
5690 /* fall thru */
5691 }
5692
5693 case kSupDrvTscDeltaState_Measuring:
5694 {
5695 cConsecutiveTimeouts = 0;
5696 if (!cTimesMeasured++)
5697 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5698 else
5699 {
5700 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5701 unsigned iCpu;
5702
5703 if (cTimesMeasured == UINT32_MAX)
5704 cTimesMeasured = 1;
5705
5706 /* Measure TSC-deltas only for the CPUs that are in the set. */
5707 rc = VINF_SUCCESS;
5708 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5709 {
5710 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
5711 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
5712 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
5713 {
5714 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
5715 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
5716 }
5717 }
5718 }
5719 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5720 if (pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
5721 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5722 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5723 pDevExt->rcTscDelta = rc;
5724 break;
5725 }
5726
5727 case kSupDrvTscDeltaState_Terminating:
5728 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5729 return VINF_SUCCESS;
5730
5731 case kSupDrvTscDeltaState_Butchered:
5732 default:
5733 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
5734 }
5735 }
5736
5737 return rc;
5738}
5739
5740
5741/**
5742 * Waits for the TSC-delta measurement thread to respond to a state change.
5743 *
5744 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
5745 * other error code on internal error.
5746 *
5747 * @param pThis Pointer to the grant service instance data.
5748 * @param enmCurState The current state.
5749 * @param enmNewState The new state we're waiting for it to enter.
5750 */
5751static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTASTATE enmCurState, SUPDRVTSCDELTASTATE enmNewState)
5752{
5753 /*
5754 * Wait a short while for the expected state transition.
5755 */
5756 int rc;
5757 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
5758 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5759 if (pDevExt->enmTscDeltaState == enmNewState)
5760 {
5761 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5762 rc = VINF_SUCCESS;
5763 }
5764 else if (pDevExt->enmTscDeltaState == enmCurState)
5765 {
5766 /*
5767 * Wait longer if the state has not yet transitioned to the one we want.
5768 */
5769 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5770 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
5771 if ( RT_SUCCESS(rc)
5772 || rc == VERR_TIMEOUT)
5773 {
5774 /*
5775 * Check the state whether we've succeeded.
5776 */
5777 SUPDRVTSCDELTASTATE enmState;
5778 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5779 enmState = pDevExt->enmTscDeltaState;
5780 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5781 if (enmState == enmNewState)
5782 rc = VINF_SUCCESS;
5783 else if (enmState == enmCurState)
5784 {
5785 rc = VERR_TIMEOUT;
5786 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
5787 enmNewState));
5788 }
5789 else
5790 {
5791 rc = VERR_INTERNAL_ERROR;
5792 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
5793 enmState, enmNewState));
5794 }
5795 }
5796 else
5797 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
5798 }
5799 else
5800 {
5801 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5802 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
5803 rc = VERR_INTERNAL_ERROR;
5804 }
5805
5806 return rc;
5807}
5808
5809
5810/**
5811 * Terminates the TSC-delta measurement thread.
5812 *
5813 * @param pDevExt Pointer to the device instance data.
5814 */
5815static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
5816{
5817 int rc;
5818 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5819 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Terminating;
5820 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5821 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5822 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
5823 if (RT_FAILURE(rc))
5824 {
5825 /* Signal a few more times before giving up. */
5826 int cTries = 5;
5827 while (--cTries > 0)
5828 {
5829 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5830 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
5831 if (rc != VERR_TIMEOUT)
5832 break;
5833 }
5834 }
5835}
5836
5837
5838/**
5839 * Initializes and spawns the TSC-delta measurement thread.
5840 *
5841 * A thread is required for servicing re-measurement requests from events like
5842 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
5843 * under all contexts on all OSs.
5844 *
5845 * @returns VBox status code.
5846 * @param pDevExt Pointer to the device instance data.
5847 *
5848 * @remarks Must only be called -after- initializing GIP and setting up MP
5849 * notifications!
5850 */
5851static int supdrvTscDeltaInit(PSUPDRVDEVEXT pDevExt)
5852{
5853 int rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
5854 if (RT_SUCCESS(rc))
5855 {
5856 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
5857 if (RT_SUCCESS(rc))
5858 {
5859 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Creating;
5860 pDevExt->cMsTscDeltaTimeout = 1;
5861 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
5862 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
5863 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
5864 if (RT_SUCCESS(rc))
5865 {
5866 rc = supdrvTscDeltaThreadWait(pDevExt, kSupDrvTscDeltaState_Creating, kSupDrvTscDeltaState_Listening);
5867 if (RT_SUCCESS(rc))
5868 {
5869 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5870 return rc;
5871 }
5872
5873 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
5874 supdrvTscDeltaThreadTerminate(pDevExt);
5875 }
5876 else
5877 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
5878 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5879 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5880 }
5881 else
5882 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
5883 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5884 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5885 }
5886 else
5887 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
5888
5889 return rc;
5890}
5891
5892
5893/**
5894 * Terminates the TSC-delta measurement thread and cleanup.
5895 *
5896 * @param pDevExt Pointer to the device instance data.
5897 */
5898static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
5899{
5900 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
5901 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5902 {
5903 supdrvTscDeltaThreadTerminate(pDevExt);
5904 }
5905
5906 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
5907 {
5908 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5909 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5910 }
5911
5912 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5913 {
5914 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5915 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5916 }
5917
5918 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5919}
5920#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
5921
5922
5923#if 0
5924/**
5925 * Measures the nominal TSC frequency.
5926 *
5927 * Uses a busy-wait method for the async. case as it is intended to help push
5928 * the CPU frequency up, while for the invariant cases using a sleeping method.
5929 *
5930 * @returns VBox status code.
5931 * @param pGip Pointer to the GIP.
5932 *
5933 * @remarks Must be called only after measuring the TSC deltas.
5934 */
5935static int supdrvGipMeasureNominalTscFreq(PSUPGLOBALINFOPAGE pGip)
5936{
5937 int cTriesLeft = 4;
5938
5939 /* Assert order. */
5940 AssertReturn(pGip, VERR_INVALID_PARAMETER);
5941 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
5942
5943 while (cTriesLeft-- > 0)
5944 {
5945 RTCCUINTREG uFlags;
5946 uint64_t u64NanoTs;
5947 uint64_t u64NanoTsAfter;
5948 uint64_t u64TscBefore;
5949 uint64_t u64TscAfter;
5950 uint8_t idApicBefore;
5951 uint8_t idApicAfter;
5952
5953 /*
5954 * Synchronize with the host OS clock tick before reading the TSC.
5955 * Especially important on Windows where the granularity is terrible.
5956 */
5957 u64NanoTs = RTTimeSystemNanoTS();
5958 while (RTTimeSystemNanoTS() == u64NanoTs)
5959 ASMNopPause();
5960
5961 uFlags = ASMIntDisableFlags();
5962 idApicBefore = ASMGetApicId();
5963 u64TscBefore = ASMReadTSC();
5964 u64NanoTs = RTTimeSystemNanoTS();
5965 ASMSetFlags(uFlags);
5966
5967 if (supdrvIsInvariantTsc())
5968 {
5969 /*
5970 * Sleep wait since the TSC frequency is constant, eases host load.
5971 * Shorter interval produces more variance in the frequency (esp. Windows).
5972 */
5973 RTThreadSleep(200); /* Sleeping shorter produces a tad more variance in the frequency than I'd like. */
5974 u64NanoTsAfter = RTTimeSystemNanoTS();
5975 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
5976 ASMNopPause();
5977 u64NanoTsAfter = RTTimeSystemNanoTS();
5978 }
5979 else
5980 {
5981 /* Busy wait, ramps up the CPU frequency on async systems. */
5982 for (;;)
5983 {
5984 u64NanoTsAfter = RTTimeSystemNanoTS();
5985 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTs)
5986 ASMNopPause();
5987 else
5988 break;
5989 }
5990 }
5991
5992 uFlags = ASMIntDisableFlags();
5993 u64TscAfter = ASMReadTSC();
5994 idApicAfter = ASMGetApicId();
5995 ASMSetFlags(uFlags);
5996
5997 /** @todo replace with enum check. */
5998 if (supdrvIsInvariantTsc())
5999 {
6000 PSUPGIPCPU pGipCpuBefore;
6001 PSUPGIPCPU pGipCpuAfter;
6002
6003 uint16_t iCpuBefore = pGip->aiCpuFromApicId[idApicBefore];
6004 uint16_t iCpuAfter = pGip->aiCpuFromApicId[idApicAfter];
6005 AssertMsgReturn(iCpuBefore < pGip->cCpus, ("iCpuBefore=%u cCpus=%u\n", iCpuBefore, pGip->cCpus), VERR_INVALID_CPU_INDEX);
6006 AssertMsgReturn(iCpuAfter < pGip->cCpus, ("iCpuAfter=%u cCpus=%u\n", iCpuAfter, pGip->cCpus), VERR_INVALID_CPU_INDEX);
6007 pGipCpuBefore = &pGip->aCPUs[iCpuBefore];
6008 pGipCpuAfter = &pGip->aCPUs[iCpuAfter];
6009
6010 if ( pGipCpuBefore->i64TSCDelta != INT64_MAX
6011 && pGipCpuAfter->i64TSCDelta != INT64_MAX)
6012 {
6013 u64TscBefore -= pGipCpuBefore->i64TSCDelta;
6014 u64TscAfter -= pGipCpuAfter->i64TSCDelta;
6015
6016 SUPR0Printf("vboxdrv: TSC frequency is %lu Hz - invariant, kernel timer granularity is %lu Ns\n",
6017 ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTs),
6018 RTTimerGetSystemGranularity());
6019 return VINF_SUCCESS;
6020 }
6021 else
6022 {
6023 SUPR0Printf("vboxdrv: supdrvGipMeasureNominalTscFreq: iCpuBefore=%u iCpuAfter=%u cTriesLeft=%u\n", iCpuBefore,
6024 iCpuAfter, cTriesLeft);
6025 }
6026 }
6027 else
6028 {
6029 SUPR0Printf("vboxdrv: TSC frequency is %lu Hz - maybe variant, kernel timer granularity is %lu Ns\n",
6030 ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTs),
6031 RTTimerGetSystemGranularity());
6032 return VINF_SUCCESS;
6033 }
6034 }
6035
6036 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6037}
6038#endif
6039
6040
6041/**
6042 * Creates the GIP.
6043 *
6044 * @returns VBox status code.
6045 * @param pDevExt Instance data. GIP stuff may be updated.
6046 */
6047static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6048{
6049 PSUPGLOBALINFOPAGE pGip;
6050 RTHCPHYS HCPhysGip;
6051 uint32_t u32SystemResolution;
6052 uint32_t u32Interval;
6053 uint32_t u32MinInterval;
6054 uint32_t uMod;
6055 unsigned cCpus;
6056 int rc;
6057
6058 LogFlow(("supdrvGipCreate:\n"));
6059
6060 /* Assert order. */
6061 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6062 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6063 Assert(!pDevExt->pGipTimer);
6064
6065 /*
6066 * Check the CPU count.
6067 */
6068 cCpus = RTMpGetArraySize();
6069 if ( cCpus > RTCPUSET_MAX_CPUS
6070 || cCpus > 256 /*ApicId is used for the mappings*/)
6071 {
6072 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6073 return VERR_TOO_MANY_CPUS;
6074 }
6075
6076 /*
6077 * Allocate a contiguous set of pages with a default kernel mapping.
6078 */
6079 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6080 if (RT_FAILURE(rc))
6081 {
6082 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6083 return rc;
6084 }
6085 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6086 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6087
6088 /*
6089 * Find a reasonable update interval and initialize the structure.
6090 */
6091 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6092 * See @bugref{6710}. */
6093 u32MinInterval = RT_NS_10MS;
6094 u32SystemResolution = RTTimerGetSystemGranularity();
6095 u32Interval = u32MinInterval;
6096 uMod = u32MinInterval % u32SystemResolution;
6097 if (uMod)
6098 u32Interval += u32SystemResolution - uMod;
6099
6100 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6101
6102#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6103 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6104 rc = supdrvTscDeltaInit(pDevExt);
6105#endif
6106 if (RT_SUCCESS(rc))
6107 {
6108 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6109 if (RT_SUCCESS(rc))
6110 {
6111 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6112 if (RT_SUCCESS(rc))
6113 {
6114#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6115 /*
6116 * Measure the TSC deltas now that we have MP notifications.
6117 */
6118 int cTries = 5;
6119 uint16_t iCpu;
6120 do
6121 {
6122 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6123 if (rc != VERR_TRY_AGAIN)
6124 break;
6125 } while (--cTries > 0);
6126 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6127 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6128#endif
6129
6130#if 0
6131 /** @todo refactor later and use the nominal TSC rate for invariant case as
6132 * the real and constant TSC rate. */
6133 supdrvGipMeasureNominalTscFreq(pGip);
6134#endif
6135
6136 /*
6137 * Create the timer.
6138 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6139 */
6140 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6141 {
6142 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer, pDevExt);
6143 if (rc == VERR_NOT_SUPPORTED)
6144 {
6145 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6146 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6147 }
6148 }
6149 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6150 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */, supdrvGipSyncTimer, pDevExt);
6151 if (RT_SUCCESS(rc))
6152 {
6153 /*
6154 * We're good.
6155 */
6156 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6157 g_pSUPGlobalInfoPage = pGip;
6158 return VINF_SUCCESS;
6159 }
6160 else
6161 {
6162 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6163 Assert(!pDevExt->pGipTimer);
6164 }
6165 }
6166 else
6167 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6168 }
6169 else
6170 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6171 }
6172 else
6173 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6174
6175 supdrvGipDestroy(pDevExt);
6176 return rc;
6177}
6178
6179
6180/**
6181 * Terminates the GIP.
6182 *
6183 * @param pDevExt Instance data. GIP stuff may be updated.
6184 */
6185static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6186{
6187 int rc;
6188#ifdef DEBUG_DARWIN_GIP
6189 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6190 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6191 pDevExt->pGipTimer, pDevExt->GipMemObj));
6192#endif
6193
6194 /*
6195 * Stop receiving MP notifications before tearing anything else down.
6196 */
6197 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6198
6199#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6200 /*
6201 * Terminate the TSC-delta measurement thread and resources.
6202 */
6203 supdrvTscDeltaTerm(pDevExt);
6204#endif
6205
6206 /*
6207 * Invalid the GIP data.
6208 */
6209 if (pDevExt->pGip)
6210 {
6211 supdrvGipTerm(pDevExt->pGip);
6212 pDevExt->pGip = NULL;
6213 }
6214 g_pSUPGlobalInfoPage = NULL;
6215
6216 /*
6217 * Destroy the timer and free the GIP memory object.
6218 */
6219 if (pDevExt->pGipTimer)
6220 {
6221 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6222 pDevExt->pGipTimer = NULL;
6223 }
6224
6225 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6226 {
6227 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6228 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6229 }
6230
6231 /*
6232 * Finally, make sure we've release the system timer resolution request
6233 * if one actually succeeded and is still pending.
6234 */
6235 if (pDevExt->u32SystemTimerGranularityGrant)
6236 {
6237 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
6238 pDevExt->u32SystemTimerGranularityGrant = 0;
6239 }
6240}
6241
6242
6243/**
6244 * Timer callback function sync GIP mode.
6245 * @param pTimer The timer.
6246 * @param pvUser The device extension.
6247 */
6248static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6249{
6250 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6251 uint64_t u64TSC = ASMReadTSC();
6252 uint64_t NanoTS = RTTimeSystemNanoTS();
6253 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6254
6255 if (supdrvIsInvariantTsc())
6256 {
6257 PSUPGIPCPU pGipCpu;
6258 unsigned iCpu;
6259 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6260 uint8_t idApic = ASMGetApicId();
6261
6262 Assert(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId));
6263 iCpu = pGip->aiCpuFromApicId[idApic];
6264 Assert(iCpu < pGip->cCpus);
6265 pGipCpu = &pGip->aCPUs[iCpu];
6266 Assert(pGipCpu->idCpu == RTMpCpuId());
6267
6268 /*
6269 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6270 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6271 * affected a bit until we get proper TSC deltas than implementing options like
6272 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6273 *
6274 * The likely hood of this happening is really low. On Windows, Linux timers
6275 * fire on the CPU they were registered/started on. Darwin, Solaris need verification.
6276 */
6277 if (pGipCpu->i64TSCDelta != INT64_MAX)
6278 u64TSC -= pGipCpu->i64TSCDelta;
6279 }
6280
6281 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, NIL_RTCPUID, iTick);
6282
6283 ASMSetFlags(fOldFlags);
6284}
6285
6286
6287/**
6288 * Timer callback function for async GIP mode.
6289 * @param pTimer The timer.
6290 * @param pvUser The device extension.
6291 */
6292static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6293{
6294 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6295 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6296 RTCPUID idCpu = RTMpCpuId();
6297 uint64_t u64TSC = ASMReadTSC();
6298 uint64_t NanoTS = RTTimeSystemNanoTS();
6299
6300 /** @todo reset the transaction number and whatnot when iTick == 1. */
6301 if (pDevExt->idGipMaster == idCpu)
6302 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6303 else
6304 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6305
6306 ASMSetFlags(fOldFlags);
6307}
6308
6309
6310/**
6311 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6312 *
6313 * @returns Index of the CPU in the cache set.
6314 * @param pGip The GIP.
6315 * @param idCpu The CPU ID.
6316 */
6317static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6318{
6319 uint32_t i, cTries;
6320
6321 /*
6322 * ASSUMES that CPU IDs are constant.
6323 */
6324 for (i = 0; i < pGip->cCpus; i++)
6325 if (pGip->aCPUs[i].idCpu == idCpu)
6326 return i;
6327
6328 cTries = 0;
6329 do
6330 {
6331 for (i = 0; i < pGip->cCpus; i++)
6332 {
6333 bool fRc;
6334 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6335 if (fRc)
6336 return i;
6337 }
6338 } while (cTries++ < 32);
6339 AssertReleaseFailed();
6340 return i - 1;
6341}
6342
6343
6344/**
6345 * The calling CPU should be accounted as online, update GIP accordingly.
6346 *
6347 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
6348 *
6349 * @param pDevExt The device extension.
6350 * @param idCpu The CPU ID.
6351 */
6352static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6353{
6354 int iCpuSet = 0;
6355 uint16_t idApic = UINT16_MAX;
6356 uint32_t i = 0;
6357 uint64_t u64NanoTS = 0;
6358 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6359
6360 AssertPtrReturnVoid(pGip);
6361 AssertRelease(idCpu == RTMpCpuId());
6362 Assert(pGip->cPossibleCpus == RTMpGetCount());
6363
6364 /*
6365 * Do this behind a spinlock with interrupts disabled as this can fire
6366 * on all CPUs simultaneously, see @bugref{6110}.
6367 */
6368 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6369
6370 /*
6371 * Update the globals.
6372 */
6373 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
6374 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
6375 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6376 if (iCpuSet >= 0)
6377 {
6378 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6379 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
6380 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
6381 }
6382
6383 /*
6384 * Update the entry.
6385 */
6386 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
6387 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
6388 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
6389 idApic = ASMGetApicId();
6390 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
6391 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
6392 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
6393
6394 /*
6395 * Update the APIC ID and CPU set index mappings.
6396 */
6397 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
6398 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
6399
6400 /* Update the Mp online/offline counter. */
6401 ASMAtomicIncU32(&g_cMpOnOffEvents);
6402
6403#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6404 /*
6405 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
6406 *
6407 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
6408 * update the state and it'll get serviced when the thread's listening interval times out.
6409 */
6410 if (supdrvIsInvariantTsc())
6411 {
6412 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
6413 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6414 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
6415 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
6416 {
6417 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
6418 }
6419 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6420 }
6421#endif
6422
6423 /* commit it */
6424 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
6425
6426 RTSpinlockRelease(pDevExt->hGipSpinlock);
6427}
6428
6429
6430/**
6431 * The CPU should be accounted as offline, update the GIP accordingly.
6432 *
6433 * This is used by supdrvGipMpEvent.
6434 *
6435 * @param pDevExt The device extension.
6436 * @param idCpu The CPU ID.
6437 */
6438static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6439{
6440 int iCpuSet;
6441 unsigned i;
6442
6443 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6444
6445 AssertPtrReturnVoid(pGip);
6446 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6447
6448 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6449 AssertReturnVoid(iCpuSet >= 0);
6450
6451 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
6452 AssertReturnVoid(i < pGip->cCpus);
6453 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
6454
6455 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6456 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
6457
6458 /* Update the Mp online/offline counter. */
6459 ASMAtomicIncU32(&g_cMpOnOffEvents);
6460
6461 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
6462 if (ASMAtomicReadU32(&g_idTscDeltaInitiator) == idCpu)
6463 {
6464 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6465 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
6466 }
6467
6468 /* Reset the TSC delta, we will recalculate it lazily. */
6469 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
6470
6471 /* commit it */
6472 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
6473
6474 RTSpinlockRelease(pDevExt->hGipSpinlock);
6475}
6476
6477
6478/**
6479 * Multiprocessor event notification callback.
6480 *
6481 * This is used to make sure that the GIP master gets passed on to
6482 * another CPU. It also updates the associated CPU data.
6483 *
6484 * @param enmEvent The event.
6485 * @param idCpu The cpu it applies to.
6486 * @param pvUser Pointer to the device extension.
6487 *
6488 * @remarks This function -must- fire on the newly online'd CPU for the
6489 * RTMPEVENT_ONLINE case and can fire on any CPU for the
6490 * RTMPEVENT_OFFLINE case.
6491 */
6492static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
6493{
6494 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6495 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6496
6497 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6498
6499 /*
6500 * Update the GIP CPU data.
6501 */
6502 if (pGip)
6503 {
6504 switch (enmEvent)
6505 {
6506 case RTMPEVENT_ONLINE:
6507 AssertRelease(idCpu == RTMpCpuId());
6508 supdrvGipMpEventOnline(pDevExt, idCpu);
6509 break;
6510 case RTMPEVENT_OFFLINE:
6511 supdrvGipMpEventOffline(pDevExt, idCpu);
6512 break;
6513 }
6514 }
6515
6516 /*
6517 * Make sure there is a master GIP.
6518 */
6519 if (enmEvent == RTMPEVENT_OFFLINE)
6520 {
6521 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
6522 if (idGipMaster == idCpu)
6523 {
6524 /*
6525 * Find a new GIP master.
6526 */
6527 bool fIgnored;
6528 unsigned i;
6529 int64_t iTSCDelta;
6530 uint32_t idxNewGipMaster;
6531 RTCPUID idNewGipMaster = NIL_RTCPUID;
6532 RTCPUSET OnlineCpus;
6533 RTMpGetOnlineSet(&OnlineCpus);
6534
6535 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
6536 {
6537 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
6538 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
6539 && idCurCpu != idGipMaster)
6540 {
6541 idNewGipMaster = idCurCpu;
6542 break;
6543 }
6544 }
6545
6546 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
6547 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
6548 NOREF(fIgnored);
6549
6550 /*
6551 * Adjust all the TSC deltas against the new GIP master.
6552 */
6553 if (pGip)
6554 {
6555 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
6556 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
6557 Assert(iTSCDelta != UINT64_MAX);
6558 for (i = 0; i < pGip->cCpus; i++)
6559 {
6560 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
6561 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
6562 if (iWorkerDelta != INT64_MAX)
6563 iWorkerDelta -= iTSCDelta;
6564 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
6565 }
6566 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
6567 }
6568 }
6569 }
6570}
6571
6572
6573/**
6574 * Returns whether the host CPU sports an invariant TSC or not.
6575 *
6576 * @returns true if invariant TSC is supported, false otherwise.
6577 */
6578static bool supdrvIsInvariantTsc(void)
6579{
6580 static bool s_fQueried = false;
6581 static bool s_fIsInvariantTsc = false;
6582 if (!s_fQueried)
6583 {
6584 uint32_t uEax, uEbx, uEcx, uEdx;
6585 ASMCpuId(0x80000000, &uEax, &uEbx, &uEcx, &uEdx);
6586 if (uEax >= 0x80000007)
6587 {
6588 ASMCpuId(0x80000007, &uEax, &uEbx, &uEcx, &uEdx);
6589 if (uEdx & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
6590 s_fIsInvariantTsc = true;
6591 }
6592 s_fQueried = true;
6593 }
6594
6595 return s_fIsInvariantTsc;
6596}
6597
6598
6599/**
6600 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
6601 * compute the delta between them.
6602 *
6603 * @param idCpu The CPU we are current scheduled on.
6604 * @param pvUser1 Opaque pointer to the GIP.
6605 * @param pvUser2 Opaque pointer to the worker Cpu Id.
6606 *
6607 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
6608 * read the TSC at exactly the same time on both the master and the worker
6609 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
6610 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
6611 * try to minimize the measurement error by computing the minimum read time
6612 * of the compare statement in the worker by taking TSC measurements across
6613 * it.
6614 *
6615 * We ignore the first few runs of the loop in order to prime the cache.
6616 * Also, be careful about using 'pause' instruction in critical busy-wait
6617 * loops in this code - it can cause undesired behaviour with
6618 * hyperthreading.
6619 *
6620 * It must be noted that the computed minimum read time is mostly to
6621 * eliminate huge deltas when the worker is too early and doesn't by itself
6622 * help produce more accurate deltas. We allow two times the computed
6623 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
6624 * possible to get negative deltas where there are none when the worker is
6625 * earlier. As long as these occasional negative deltas are lower than the
6626 * time it takes to exit guest-context and the OS to reschedule EMT on a
6627 * different CPU we won't expose a TSC that jumped backwards. It is because
6628 * of the existence of the negative deltas we don't recompute the delta with
6629 * the master and worker interchanged to eliminate the remaining measurement
6630 * error.
6631 */
6632static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6633{
6634 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
6635 uint32_t *pidWorker = (uint32_t *)pvUser2;
6636 RTCPUID idMaster = ASMAtomicUoReadU32(&g_idTscDeltaInitiator);
6637 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6638 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
6639 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
6640 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6641 int cTriesLeft = 12;
6642
6643 if ( idCpu != idMaster
6644 && idCpu != *pidWorker)
6645 return;
6646
6647 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
6648 with a timeout to avoid deadlocking the entire system. */
6649 if (!RTMpOnAllIsConcurrentSafe())
6650 {
6651 uint64_t uTscNow;
6652 uint64_t uTscStart;
6653 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
6654
6655 ASMSerializeInstruction();
6656 uTscStart = ASMReadTSC();
6657 if (idCpu == idMaster)
6658 {
6659 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
6660 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
6661 {
6662 ASMSerializeInstruction();
6663 uTscNow = ASMReadTSC();
6664 if (uTscNow - uTscStart > cWaitTicks)
6665 {
6666 /* Set the worker delta to indicate failure, not the master. */
6667 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6668 return;
6669 }
6670
6671 ASMNopPause();
6672 }
6673 }
6674 else
6675 {
6676 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
6677 {
6678 ASMSerializeInstruction();
6679 uTscNow = ASMReadTSC();
6680 if (uTscNow - uTscStart > cWaitTicks)
6681 {
6682 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6683 return;
6684 }
6685
6686 ASMNopPause();
6687 }
6688 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
6689 }
6690 }
6691
6692 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
6693 while (cTriesLeft-- > 0)
6694 {
6695 unsigned i;
6696 uint64_t uMinCmpReadTime = UINT64_MAX;
6697 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
6698 {
6699 RTCCUINTREG uFlags = ASMIntDisableFlags(); /* Disable interrupts per-iteration, see @bugref{6710} comment #38. */
6700 if (idCpu == idMaster)
6701 {
6702 /*
6703 * The master.
6704 */
6705 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6706 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6707 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
6708 ;
6709
6710 do
6711 {
6712 ASMSerializeInstruction();
6713 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
6714 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6715
6716 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
6717 ;
6718
6719 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6720 {
6721 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
6722 {
6723 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
6724 if (iDelta < pGipCpuWorker->i64TSCDelta)
6725 pGipCpuWorker->i64TSCDelta = iDelta;
6726 }
6727 }
6728
6729 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
6730 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6731 }
6732 else
6733 {
6734 /*
6735 * The worker.
6736 */
6737 uint64_t uTscWorker;
6738 uint64_t uTscWorkerFlushed;
6739 uint64_t uCmpReadTime;
6740
6741 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
6742 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
6743 ;
6744 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6745 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
6746
6747 /*
6748 * Keep reading the TSC until we notice that the master has read his. Reading
6749 * the TSC -after- the master has updated the memory is way too late. We thus
6750 * compensate by trying to measure how long it took for the worker to notice
6751 * the memory flushed from the master.
6752 */
6753 do
6754 {
6755 ASMSerializeInstruction();
6756 uTscWorker = ASMReadTSC();
6757 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6758 ASMSerializeInstruction();
6759 uTscWorkerFlushed = ASMReadTSC();
6760
6761 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
6762 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6763 {
6764 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
6765 if (uCmpReadTime < (uMinCmpReadTime << 1))
6766 {
6767 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
6768 if (uCmpReadTime < uMinCmpReadTime)
6769 uMinCmpReadTime = uCmpReadTime;
6770 }
6771 else
6772 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
6773 }
6774 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
6775 {
6776 if (uCmpReadTime < uMinCmpReadTime)
6777 uMinCmpReadTime = uCmpReadTime;
6778 }
6779
6780 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
6781 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
6782 ASMNopPause();
6783 }
6784
6785 ASMSetFlags(uFlags);
6786 }
6787
6788 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
6789 break;
6790 }
6791}
6792
6793
6794/**
6795 * Clears all TSCs on the per-CPUs GIP struct. as well as the delta
6796 * synchronization variable. Optionally also clears the deltas on the per-CPU
6797 * GIP struct. as well.
6798 *
6799 * @param pGip Pointer to the GIP.
6800 * @param fClearDeltas Whether the deltas are also to be cleared.
6801 */
6802DECLINLINE(void) supdrvClearTscSamples(PSUPGLOBALINFOPAGE pGip, bool fClearDeltas)
6803{
6804 unsigned iCpu;
6805 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6806 {
6807 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
6808 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
6809 if (fClearDeltas)
6810 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
6811 }
6812 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6813}
6814
6815
6816/**
6817 * Measures the TSC delta between the master GIP CPU and one specified worker
6818 * CPU.
6819 *
6820 * @returns VBox status code.
6821 * @param pDevExt Pointer to the device instance data.
6822 * @param idxWorker The index of the worker CPU from the GIP's array of
6823 * CPUs.
6824 *
6825 * @remarks This can be called with preemption disabled!
6826 */
6827static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
6828{
6829 int rc;
6830 PSUPGLOBALINFOPAGE pGip;
6831 PSUPGIPCPU pGipCpuWorker;
6832 RTCPUID idMaster;
6833
6834 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
6835 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
6836
6837 pGip = pDevExt->pGip;
6838 idMaster = pDevExt->idGipMaster;
6839 pGipCpuWorker = &pGip->aCPUs[idxWorker];
6840
6841 if (pGipCpuWorker->idCpu == idMaster)
6842 {
6843 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
6844 return VINF_SUCCESS;
6845 }
6846
6847 /* Set the master TSC as the initiator. */
6848 while (ASMAtomicCmpXchgU32(&g_idTscDeltaInitiator, idMaster, NIL_RTCPUID) == false)
6849 {
6850 /*
6851 * Sleep here rather than spin as there is a parallel measurement
6852 * being executed and that can take a good while to be done.
6853 */
6854 RTThreadSleep(1);
6855 }
6856
6857 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
6858 {
6859 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
6860 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6861 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6862 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pGip, &pGipCpuWorker->idCpu);
6863 if (RT_SUCCESS(rc))
6864 {
6865 if (RT_UNLIKELY(pGipCpuWorker->i64TSCDelta == INT64_MAX))
6866 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
6867 }
6868 }
6869 else
6870 rc = VERR_CPU_OFFLINE;
6871
6872 ASMAtomicWriteU32(&g_idTscDeltaInitiator, NIL_RTCPUID);
6873 return rc;
6874}
6875
6876
6877/**
6878 * Measures the TSC deltas between CPUs.
6879 *
6880 * @param pDevExt Pointer to the device instance data.
6881 * @param pidxMaster Where to store the index of the chosen master TSC if we
6882 * managed to determine the TSC deltas successfully.
6883 * Optional, can be NULL.
6884 *
6885 * @returns VBox status code.
6886 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
6887 * idCpu, GIP's online CPU set which are populated in
6888 * supdrvGipInitOnCpu().
6889 */
6890static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
6891{
6892 PSUPGIPCPU pGipCpuMaster;
6893 unsigned iCpu;
6894 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6895 uint32_t idxMaster = UINT32_MAX;
6896 int rc = VINF_SUCCESS;
6897 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&g_cMpOnOffEvents);
6898 uint32_t cOnlineCpus = pGip->cOnlineCpus;
6899
6900 /*
6901 * If we determined the TSC is async., don't bother with measuring deltas.
6902 */
6903 if (RT_UNLIKELY(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC))
6904 return VINF_SUCCESS;
6905
6906 /*
6907 * Pick the first CPU online as the master TSC and make it the new GIP master based
6908 * on the APIC ID.
6909 *
6910 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
6911 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
6912 * master as this point since the sync/async timer isn't created yet.
6913 */
6914 supdrvClearTscSamples(pGip, true /* fClearDeltas */);
6915 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
6916 {
6917 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
6918 if (idxCpu != UINT16_MAX)
6919 {
6920 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
6921 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
6922 {
6923 idxMaster = idxCpu;
6924 pGipCpu->i64TSCDelta = 0;
6925 break;
6926 }
6927 }
6928 }
6929 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
6930 pGipCpuMaster = &pGip->aCPUs[idxMaster];
6931 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
6932
6933 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
6934 if (pGip->cOnlineCpus <= 1)
6935 {
6936 if (pidxMaster)
6937 *pidxMaster = idxMaster;
6938 return VINF_SUCCESS;
6939 }
6940
6941 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6942 {
6943 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
6944 if ( iCpu != idxMaster
6945 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
6946 {
6947 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
6948 if (RT_FAILURE(rc))
6949 {
6950 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
6951 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
6952 break;
6953 }
6954
6955 if (ASMAtomicReadU32(&g_cMpOnOffEvents) != cMpOnOffEvents)
6956 {
6957 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
6958 rc = VERR_TRY_AGAIN;
6959 break;
6960 }
6961 }
6962 }
6963
6964 if ( RT_SUCCESS(rc)
6965 && !pGipCpuMaster->i64TSCDelta
6966 && pidxMaster)
6967 {
6968 *pidxMaster = idxMaster;
6969 }
6970 return rc;
6971}
6972
6973
6974/**
6975 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
6976 *
6977 * @param idCpu Ignored.
6978 * @param pvUser1 Where to put the TSC.
6979 * @param pvUser2 Ignored.
6980 */
6981static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6982{
6983 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
6984}
6985
6986
6987/**
6988 * Determine if Async GIP mode is required because of TSC drift.
6989 *
6990 * When using the default/normal timer code it is essential that the time stamp counter
6991 * (TSC) runs never backwards, that is, a read operation to the counter should return
6992 * a bigger value than any previous read operation. This is guaranteed by the latest
6993 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
6994 * case we have to choose the asynchronous timer mode.
6995 *
6996 * @param poffMin Pointer to the determined difference between different cores.
6997 * @return false if the time stamp counters appear to be synchronized, true otherwise.
6998 */
6999static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
7000{
7001 /*
7002 * Just iterate all the cpus 8 times and make sure that the TSC is
7003 * ever increasing. We don't bother taking TSC rollover into account.
7004 */
7005 int iEndCpu = RTMpGetArraySize();
7006 int iCpu;
7007 int cLoops = 8;
7008 bool fAsync = false;
7009 int rc = VINF_SUCCESS;
7010 uint64_t offMax = 0;
7011 uint64_t offMin = ~(uint64_t)0;
7012 uint64_t PrevTsc = ASMReadTSC();
7013
7014 while (cLoops-- > 0)
7015 {
7016 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7017 {
7018 uint64_t CurTsc;
7019 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7020 if (RT_SUCCESS(rc))
7021 {
7022 if (CurTsc <= PrevTsc)
7023 {
7024 fAsync = true;
7025 offMin = offMax = PrevTsc - CurTsc;
7026 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7027 iCpu, cLoops, CurTsc, PrevTsc));
7028 break;
7029 }
7030
7031 /* Gather statistics (except the first time). */
7032 if (iCpu != 0 || cLoops != 7)
7033 {
7034 uint64_t off = CurTsc - PrevTsc;
7035 if (off < offMin)
7036 offMin = off;
7037 if (off > offMax)
7038 offMax = off;
7039 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7040 }
7041
7042 /* Next */
7043 PrevTsc = CurTsc;
7044 }
7045 else if (rc == VERR_NOT_SUPPORTED)
7046 break;
7047 else
7048 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7049 }
7050
7051 /* broke out of the loop. */
7052 if (iCpu < iEndCpu)
7053 break;
7054 }
7055
7056 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7057 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7058 fAsync, iEndCpu, rc, offMin, offMax));
7059#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7060 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7061#endif
7062 return fAsync;
7063}
7064
7065
7066/**
7067 * Determine the GIP TSC mode.
7068 *
7069 * @returns The most suitable TSC mode.
7070 * @param pDevExt Pointer to the device instance data.
7071 */
7072static SUPGIPMODE supdrvGipDeterminTscMode(PSUPDRVDEVEXT pDevExt)
7073{
7074#if 0
7075 if (supdrvIsInvariantTsc())
7076 return SUPGIPMODE_SYNC_TSC; /** @todo Switch to SUPGIPMODE_INVARIANT_TSC later. */
7077#endif
7078
7079 /*
7080 * On SMP we're faced with two problems:
7081 * (1) There might be a skew between the CPU, so that cpu0
7082 * returns a TSC that is slightly different from cpu1.
7083 * (2) Power management (and other things) may cause the TSC
7084 * to run at a non-constant speed, and cause the speed
7085 * to be different on the cpus. This will result in (1).
7086 *
7087 * So, on SMP systems we'll have to select the ASYNC update method
7088 * if there are symptoms of these problems.
7089 */
7090 if (RTMpGetCount() > 1)
7091 {
7092 uint32_t uEAX, uEBX, uECX, uEDX;
7093 uint64_t u64DiffCoresIgnored;
7094
7095 /* Permit the user and/or the OS specific bits to force async mode. */
7096 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7097 return SUPGIPMODE_ASYNC_TSC;
7098
7099 /* Try check for current differences between the cpus. */
7100 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7101 return SUPGIPMODE_ASYNC_TSC;
7102
7103 /*
7104 * If the CPU supports power management and is an AMD one we
7105 * won't trust it unless it has the TscInvariant bit is set.
7106 */
7107 /* Check for "AuthenticAMD" */
7108 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7109 if ( uEAX >= 1
7110 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7111 {
7112 /* Check for APM support and that TscInvariant is cleared. */
7113 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
7114 if (uEAX >= 0x80000007)
7115 {
7116 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
7117 if ( !(uEDX & RT_BIT(8))/* TscInvariant */
7118 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7119 return SUPGIPMODE_ASYNC_TSC;
7120 }
7121 }
7122 }
7123 return SUPGIPMODE_SYNC_TSC;
7124}
7125
7126
7127/**
7128 * Initializes per-CPU GIP information.
7129 *
7130 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7131 * @param pCpu Pointer to which GIP CPU to initalize.
7132 * @param u64NanoTS The current nanosecond timestamp.
7133 */
7134static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7135{
7136 pCpu->u32TransactionId = 2;
7137 pCpu->u64NanoTS = u64NanoTS;
7138 pCpu->u64TSC = ASMReadTSC();
7139 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7140 pCpu->i64TSCDelta = INT64_MAX;
7141
7142 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7143 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7144 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7145 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7146
7147 /*
7148 * We don't know the following values until we've executed updates.
7149 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7150 * the 2nd timer callout.
7151 */
7152 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7153 pCpu->u32UpdateIntervalTSC
7154 = pCpu->au32TSCHistory[0]
7155 = pCpu->au32TSCHistory[1]
7156 = pCpu->au32TSCHistory[2]
7157 = pCpu->au32TSCHistory[3]
7158 = pCpu->au32TSCHistory[4]
7159 = pCpu->au32TSCHistory[5]
7160 = pCpu->au32TSCHistory[6]
7161 = pCpu->au32TSCHistory[7]
7162 = (uint32_t)(_4G / pGip->u32UpdateHz);
7163}
7164
7165
7166/**
7167 * Initializes the GIP data.
7168 *
7169 * @param pDevExt Pointer to the device instance data.
7170 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7171 * @param HCPhys The physical address of the GIP.
7172 * @param u64NanoTS The current nanosecond timestamp.
7173 * @param uUpdateHz The update frequency.
7174 * @param uUpdateIntervalNS The update interval in nanoseconds.
7175 * @param cCpus The CPU count.
7176 */
7177static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7178 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7179{
7180 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7181 unsigned i;
7182#ifdef DEBUG_DARWIN_GIP
7183 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7184#else
7185 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7186#endif
7187
7188 /*
7189 * Initialize the structure.
7190 */
7191 memset(pGip, 0, cbGip);
7192 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7193 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7194 pGip->u32Mode = supdrvGipDeterminTscMode(pDevExt);
7195 pGip->cCpus = (uint16_t)cCpus;
7196 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7197 pGip->u32UpdateHz = uUpdateHz;
7198 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7199 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7200 RTCpuSetEmpty(&pGip->PresentCpuSet);
7201 RTMpGetSet(&pGip->PossibleCpuSet);
7202 pGip->cOnlineCpus = RTMpGetOnlineCount();
7203 pGip->cPresentCpus = RTMpGetPresentCount();
7204 pGip->cPossibleCpus = RTMpGetCount();
7205 pGip->idCpuMax = RTMpGetMaxCpuId();
7206 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7207 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7208 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7209 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7210
7211 for (i = 0; i < cCpus; i++)
7212 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
7213
7214 /*
7215 * Link it to the device extension.
7216 */
7217 pDevExt->pGip = pGip;
7218 pDevExt->HCPhysGip = HCPhys;
7219 pDevExt->cGipUsers = 0;
7220
7221 /*
7222 * Allocate the TSC delta sync. struct. on a separate cache line.
7223 */
7224 g_pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
7225 g_pTscDeltaSync = RT_ALIGN_PT(g_pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
7226 Assert(RT_ALIGN_PT(g_pTscDeltaSync, 64, PSUPTSCDELTASYNC) == g_pTscDeltaSync);
7227}
7228
7229
7230/**
7231 * On CPU initialization callback for RTMpOnAll.
7232 *
7233 * @param idCpu The CPU ID.
7234 * @param pvUser1 The device extension.
7235 * @param pvUser2 The GIP.
7236 */
7237static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7238{
7239 /* This is good enough, even though it will update some of the globals a
7240 bit to much. */
7241 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7242}
7243
7244
7245/**
7246 * Invalidates the GIP data upon termination.
7247 *
7248 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7249 */
7250static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7251{
7252 unsigned i;
7253 pGip->u32Magic = 0;
7254 for (i = 0; i < pGip->cCpus; i++)
7255 {
7256 pGip->aCPUs[i].u64NanoTS = 0;
7257 pGip->aCPUs[i].u64TSC = 0;
7258 pGip->aCPUs[i].iTSCHistoryHead = 0;
7259 pGip->aCPUs[i].u64TSCSample = 0;
7260 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7261 }
7262
7263 if (g_pvTscDeltaSync)
7264 {
7265 RTMemFree(g_pvTscDeltaSync);
7266 g_pTscDeltaSync = NULL;
7267 g_pvTscDeltaSync = NULL;
7268 }
7269}
7270
7271
7272/**
7273 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
7274 * updates all the per cpu data except the transaction id.
7275 *
7276 * @param pDevExt The device extension.
7277 * @param pGipCpu Pointer to the per cpu data.
7278 * @param u64NanoTS The current time stamp.
7279 * @param u64TSC The current TSC.
7280 * @param iTick The current timer tick.
7281 */
7282static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7283{
7284 uint64_t u64TSCDelta;
7285 uint32_t u32UpdateIntervalTSC;
7286 uint32_t u32UpdateIntervalTSCSlack;
7287 unsigned iTSCHistoryHead;
7288 uint64_t u64CpuHz;
7289 uint32_t u32TransactionId;
7290
7291 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7292 AssertPtrReturnVoid(pGip);
7293
7294 /* Delta between this and the previous update. */
7295 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7296
7297 /*
7298 * Update the NanoTS.
7299 */
7300 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
7301
7302 /*
7303 * Calc TSC delta.
7304 */
7305 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
7306 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
7307 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
7308
7309 if (u64TSCDelta >> 32)
7310 {
7311 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
7312 pGipCpu->cErrors++;
7313 }
7314
7315 /*
7316 * On the 2nd and 3rd callout, reset the history with the current TSC
7317 * interval since the values entered by supdrvGipInit are totally off.
7318 * The interval on the 1st callout completely unreliable, the 2nd is a bit
7319 * better, while the 3rd should be most reliable.
7320 */
7321 u32TransactionId = pGipCpu->u32TransactionId;
7322 if (RT_UNLIKELY( ( u32TransactionId == 5
7323 || u32TransactionId == 7)
7324 && ( iTick == 2
7325 || iTick == 3) ))
7326 {
7327 unsigned i;
7328 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
7329 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
7330 }
7331
7332 /*
7333 * TSC History.
7334 */
7335 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
7336 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
7337 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
7338 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
7339
7340 /*
7341 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
7342 *
7343 * On Windows, we have an occasional (but recurring) sour value that messed up
7344 * the history but taking only 1 interval reduces the precision overall.
7345 * However, this problem existed before the invariant mode was introduced.
7346 */
7347 if ( supdrvIsInvariantTsc()
7348 || pGip->u32UpdateHz >= 1000)
7349 {
7350 uint32_t u32;
7351 u32 = pGipCpu->au32TSCHistory[0];
7352 u32 += pGipCpu->au32TSCHistory[1];
7353 u32 += pGipCpu->au32TSCHistory[2];
7354 u32 += pGipCpu->au32TSCHistory[3];
7355 u32 >>= 2;
7356 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
7357 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
7358 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
7359 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
7360 u32UpdateIntervalTSC >>= 2;
7361 u32UpdateIntervalTSC += u32;
7362 u32UpdateIntervalTSC >>= 1;
7363
7364 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
7365 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
7366 }
7367 else if (pGip->u32UpdateHz >= 90)
7368 {
7369 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7370 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
7371 u32UpdateIntervalTSC >>= 1;
7372
7373 /* value chosen on a 2GHz thinkpad running windows */
7374 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
7375 }
7376 else
7377 {
7378 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7379
7380 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
7381 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
7382 }
7383 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
7384
7385 /*
7386 * CpuHz.
7387 */
7388 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC_64);
7389 u64CpuHz /= pGip->u32UpdateIntervalNS;
7390 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
7391}
7392
7393
7394/**
7395 * Updates the GIP.
7396 *
7397 * @param pDevExt The device extension.
7398 * @param u64NanoTS The current nanosecond timesamp.
7399 * @param u64TSC The current TSC timesamp.
7400 * @param idCpu The CPU ID.
7401 * @param iTick The current timer tick.
7402 */
7403static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
7404{
7405 /*
7406 * Determine the relevant CPU data.
7407 */
7408 PSUPGIPCPU pGipCpu;
7409 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7410 AssertPtrReturnVoid(pGip);
7411
7412 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7413 pGipCpu = &pGip->aCPUs[0];
7414 else
7415 {
7416 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
7417 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
7418 return;
7419 pGipCpu = &pGip->aCPUs[iCpu];
7420 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
7421 return;
7422 }
7423
7424 /*
7425 * Start update transaction.
7426 */
7427 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7428 {
7429 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
7430 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7431 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7432 pGipCpu->cErrors++;
7433 return;
7434 }
7435
7436 /*
7437 * Recalc the update frequency every 0x800th time.
7438 */
7439 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
7440 {
7441 if (pGip->u64NanoTSLastUpdateHz)
7442 {
7443#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
7444 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
7445 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
7446 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
7447 {
7448 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
7449 * calculation on non-invariant hosts if it changes the history decision
7450 * taken in supdrvGipDoUpdateCpu(). */
7451 uint64_t u64Interval = u64Delta / UINT64_C(GIP_UPDATEHZ_RECALC_FREQ);
7452 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
7453 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
7454 }
7455#endif
7456 }
7457 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS + 1);
7458 }
7459
7460 /*
7461 * Update the data.
7462 */
7463 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7464
7465 /*
7466 * Complete transaction.
7467 */
7468 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7469}
7470
7471
7472/**
7473 * Updates the per cpu GIP data for the calling cpu.
7474 *
7475 * @param pDevExt The device extension.
7476 * @param u64NanoTS The current nanosecond timesamp.
7477 * @param u64TSC The current TSC timesamp.
7478 * @param idCpu The CPU ID.
7479 * @param idApic The APIC id for the CPU index.
7480 * @param iTick The current timer tick.
7481 */
7482static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
7483 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
7484{
7485 uint32_t iCpu;
7486 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7487
7488 /*
7489 * Avoid a potential race when a CPU online notification doesn't fire on
7490 * the onlined CPU but the tick creeps in before the event notification is
7491 * run.
7492 */
7493 if (RT_UNLIKELY(iTick == 1))
7494 {
7495 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7496 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
7497 supdrvGipMpEventOnline(pDevExt, idCpu);
7498 }
7499
7500 iCpu = pGip->aiCpuFromApicId[idApic];
7501 if (RT_LIKELY(iCpu < pGip->cCpus))
7502 {
7503 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7504 if (pGipCpu->idCpu == idCpu)
7505 {
7506 /*
7507 * Start update transaction.
7508 */
7509 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7510 {
7511 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7512 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7513 pGipCpu->cErrors++;
7514 return;
7515 }
7516
7517 /*
7518 * Update the data.
7519 */
7520 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7521
7522 /*
7523 * Complete transaction.
7524 */
7525 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7526 }
7527 }
7528}
7529
7530
7531/**
7532 * Resume built-in keyboard on MacBook Air and Pro hosts.
7533 * If there is no built-in keyboard device, return success anyway.
7534 *
7535 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
7536 */
7537static int supdrvIOCtl_ResumeSuspendedKbds(void)
7538{
7539#if defined(RT_OS_DARWIN)
7540 return supdrvDarwinResumeSuspendedKbds();
7541#else
7542 return VERR_NOT_IMPLEMENTED;
7543#endif
7544}
7545
7546
7547/**
7548 * Service a TSC-delta measurement request.
7549 *
7550 * @returns VBox status code.
7551 * @param pDevExt Pointer to the device instance data.
7552 * @param pReq Pointer to the TSC-delta measurement request.
7553 */
7554static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq)
7555{
7556 PSUPGLOBALINFOPAGE pGip;
7557 RTCPUID idCpuWorker;
7558 int rc = VERR_CPU_NOT_FOUND;
7559 int16_t cTries;
7560 RTMSINTERVAL cMsWaitRetry;
7561 uint16_t iCpu;
7562
7563 /*
7564 * Validate.
7565 */
7566 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7567 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7568 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7569 idCpuWorker = pReq->u.In.idCpu;
7570 if (idCpuWorker == NIL_RTCPUID)
7571 return VERR_INVALID_CPU_ID;
7572
7573 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
7574 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
7575 pGip = pDevExt->pGip;
7576 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7577 {
7578 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7579 if (pGipCpuWorker->idCpu == idCpuWorker)
7580 {
7581 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
7582 && !pReq->u.In.fForce)
7583 return VINF_SUCCESS;
7584
7585#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7586 if (pReq->u.In.fAsync)
7587 {
7588 /** @todo Async. doesn't implement options like retries, waiting. We'll need
7589 * to pass those options to the thread somehow and implement it in the
7590 * thread. Check if anyone uses/needs fAsync before implementing this. */
7591 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
7592 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7593 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
7594 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
7595 {
7596 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
7597 }
7598 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7599 RTThreadUserSignal(pDevExt->hTscDeltaThread);
7600 return VINF_SUCCESS;
7601 }
7602#endif
7603
7604 while (cTries--)
7605 {
7606 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7607 if (RT_SUCCESS(rc))
7608 {
7609 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
7610 break;
7611 }
7612
7613 if (cMsWaitRetry)
7614 RTThreadSleep(cMsWaitRetry);
7615 }
7616
7617 break;
7618 }
7619 }
7620 return rc;
7621}
7622
7623
7624/**
7625 * Reads the TSC and TSC-delta atomically, applies the TSC delta.
7626 *
7627 * @returns VBox status code.
7628 * @param pDevExt Pointer to the device instance data.
7629 * @param pReq Pointer to the TSC-read request.
7630 */
7631static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq)
7632{
7633 uint64_t uTsc;
7634 uint16_t idApic;
7635 int16_t cTries;
7636 PSUPGLOBALINFOPAGE pGip;
7637 int rc;
7638
7639 /*
7640 * Validate.
7641 */
7642 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7643 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7644 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7645 pGip = pDevExt->pGip;
7646
7647 cTries = 4;
7648 while (cTries-- > 0)
7649 {
7650 rc = SUPReadTsc(&uTsc, &idApic);
7651 if (RT_SUCCESS(rc))
7652 {
7653 pReq->u.Out.u64AdjustedTsc = uTsc;
7654 pReq->u.Out.idApic = idApic;
7655 return VINF_SUCCESS;
7656 }
7657 else
7658 {
7659 int rc2;
7660 uint16_t iCpu;
7661
7662 /* If we failed to have a delta, measurement the delta and retry. */
7663 AssertMsgReturn(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId),
7664 ("idApic=%u ArraySize=%u\n", idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)), VERR_INVALID_CPU_INDEX);
7665 iCpu = pGip->aiCpuFromApicId[idApic];
7666 AssertMsgReturn(iCpu < pGip->cCpus, ("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus), VERR_INVALID_CPU_INDEX);
7667
7668 rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7669 if (RT_SUCCESS(rc2))
7670 AssertReturn(pGip->aCPUs[iCpu].i64TSCDelta != INT64_MAX, VERR_INTERNAL_ERROR_2);
7671 }
7672 }
7673
7674 return rc;
7675}
7676
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette