VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 53377

Last change on this file since 53377 was 53369, checked in by vboxsync, 10 years ago

HostDrivers/Support: minimize interrupt disabled time while measuring TSC deltas, thanks to Windows watchdog.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 286.9 KB
Line 
1/* $Id: SUPDrv.c 53369 2014-11-21 12:08:21Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/x86.h>
58
59#include <VBox/param.h>
60#include <VBox/log.h>
61#include <VBox/err.h>
62#include <VBox/vmm/hm_svm.h>
63#include <VBox/vmm/hm_vmx.h>
64
65#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
66# include "dtrace/SUPDrv.h"
67#else
68# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
69# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
70# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
71# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
72#endif
73
74/*
75 * Logging assignments:
76 * Log - useful stuff, like failures.
77 * LogFlow - program flow, except the really noisy bits.
78 * Log2 - Cleanup.
79 * Log3 - Loader flow noise.
80 * Log4 - Call VMMR0 flow noise.
81 * Log5 - Native yet-to-be-defined noise.
82 * Log6 - Native ioctl flow noise.
83 *
84 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
85 * instantiation in log-vbox.c(pp).
86 */
87
88
89/*******************************************************************************
90* Defined Constants And Macros *
91*******************************************************************************/
92/** The frequency by which we recalculate the u32UpdateHz and
93 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
94 *
95 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
96 */
97#define GIP_UPDATEHZ_RECALC_FREQ 0x800
98
99/** A reserved TSC value used for synchronization as well as measurement of
100 * TSC deltas. */
101#define GIP_TSC_DELTA_RSVD UINT64_MAX
102/** The number of TSC delta measurement loops in total (includes primer and
103 * read-time loops). */
104#define GIP_TSC_DELTA_LOOPS 96
105/** The number of cache primer loops. */
106#define GIP_TSC_DELTA_PRIMER_LOOPS 4
107/** The number of loops until we keep computing the minumum read time. */
108#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
109/** Stop measurement of TSC delta. */
110#define GIP_TSC_DELTA_SYNC_STOP 0
111/** Start measurement of TSC delta. */
112#define GIP_TSC_DELTA_SYNC_START 1
113/** Worker thread is ready for reading the TSC. */
114#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
115/** Worker thread is done updating TSC delta info. */
116#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
117/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
118 * with a timeout. */
119#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
120/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
121 * master with a timeout. */
122#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
123
124AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
125AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
126
127/** @def VBOX_SVN_REV
128 * The makefile should define this if it can. */
129#ifndef VBOX_SVN_REV
130# define VBOX_SVN_REV 0
131#endif
132
133#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
134# define DO_NOT_START_GIP
135#endif
136
137
138/*******************************************************************************
139* Internal Functions *
140*******************************************************************************/
141static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
142static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
143static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
144static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
145static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
146static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
147static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
148static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
149static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
150static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
151static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
152static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
153static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
154DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
155DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
156static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
157static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
158static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
159static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq);
160static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq);
161static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
162static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
163static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
164static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
165static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
166static bool supdrvIsInvariantTsc(void);
167static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
168 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
169static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
170static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
171static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
172static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
173 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
174static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
175static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
176static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
177static int supdrvIOCtl_ResumeSuspendedKbds(void);
178
179
180/*******************************************************************************
181* Global Variables *
182*******************************************************************************/
183DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
184
185/**
186 * The TSC delta synchronization struct. rounded to cache line size.
187 */
188typedef union SUPTSCDELTASYNC
189{
190 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
191 volatile uint32_t u;
192 /** Padding to cache line size. */
193 uint8_t u8Padding[64];
194} SUPTSCDELTASYNC;
195AssertCompileSize(SUPTSCDELTASYNC, 64);
196typedef SUPTSCDELTASYNC *PSUPTSCDELTASYNC;
197
198/** Pointer to the TSC delta sync. struct. */
199static void *g_pvTscDeltaSync;
200/** Aligned pointer to the TSC delta sync. struct. */
201static PSUPTSCDELTASYNC g_pTscDeltaSync;
202/** The TSC delta measurement initiator Cpu Id. */
203static volatile RTCPUID g_idTscDeltaInitiator = NIL_RTCPUID;
204/** Number of online/offline events, incremented each time a CPU goes online
205 * or offline. */
206static volatile uint32_t g_cMpOnOffEvents;
207/** TSC reading during start of TSC frequency refinement phase. */
208uint64_t g_u64TSCAnchor;
209/** Timestamp (in nanosec) during start of TSC frequency refinement phase. */
210uint64_t g_u64NanoTSAnchor;
211
212/**
213 * Array of the R0 SUP API.
214 */
215static SUPFUNC g_aFunctions[] =
216{
217/* SED: START */
218 /* name function */
219 /* Entries with absolute addresses determined at runtime, fixup
220 code makes ugly ASSUMPTIONS about the order here: */
221 { "SUPR0AbsIs64bit", (void *)0 },
222 { "SUPR0Abs64bitKernelCS", (void *)0 },
223 { "SUPR0Abs64bitKernelSS", (void *)0 },
224 { "SUPR0Abs64bitKernelDS", (void *)0 },
225 { "SUPR0AbsKernelCS", (void *)0 },
226 { "SUPR0AbsKernelSS", (void *)0 },
227 { "SUPR0AbsKernelDS", (void *)0 },
228 { "SUPR0AbsKernelES", (void *)0 },
229 { "SUPR0AbsKernelFS", (void *)0 },
230 { "SUPR0AbsKernelGS", (void *)0 },
231 /* Normal function pointers: */
232 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
233 { "SUPGetGIP", (void *)SUPGetGIP },
234 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
235 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
236 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
237 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
238 { "SUPR0ContFree", (void *)SUPR0ContFree },
239 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
240 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
241 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
242 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
243 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
244 { "SUPR0LockMem", (void *)SUPR0LockMem },
245 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
246 { "SUPR0LowFree", (void *)SUPR0LowFree },
247 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
248 { "SUPR0MemFree", (void *)SUPR0MemFree },
249 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
250 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
251 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
252 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
253 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
254 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
255 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
256 { "SUPR0PageFree", (void *)SUPR0PageFree },
257 { "SUPR0Printf", (void *)SUPR0Printf },
258 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
259 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
260 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
261 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
262 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
263 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
264 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
265 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
266 { "SUPSemEventClose", (void *)SUPSemEventClose },
267 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
268 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
269 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
270 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
271 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
272 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
273 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
274 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
275 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
276 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
277 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
278 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
279 { "SUPSemEventWait", (void *)SUPSemEventWait },
280 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
281 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
282 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
283
284 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
285 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
286 { "RTAssertMsg1", (void *)RTAssertMsg1 },
287 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
288 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
289 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
290 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
291 { "RTCrc32", (void *)RTCrc32 },
292 { "RTCrc32Finish", (void *)RTCrc32Finish },
293 { "RTCrc32Process", (void *)RTCrc32Process },
294 { "RTCrc32Start", (void *)RTCrc32Start },
295 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
296 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
297 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
298 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
299 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
300 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
301 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
302 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
303 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
304 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
305 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
306 { "RTLogPrintfV", (void *)RTLogPrintfV },
307 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
308 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
309 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
310 { "RTMemAllocTag", (void *)RTMemAllocTag },
311 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
312 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
313 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
314 { "RTMemDupExTag", (void *)RTMemDupExTag },
315 { "RTMemDupTag", (void *)RTMemDupTag },
316 { "RTMemFree", (void *)RTMemFree },
317 { "RTMemFreeEx", (void *)RTMemFreeEx },
318 { "RTMemReallocTag", (void *)RTMemReallocTag },
319 { "RTMpCpuId", (void *)RTMpCpuId },
320 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
321 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
322 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
323 { "RTMpGetCount", (void *)RTMpGetCount },
324 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
325 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
326 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
327 { "RTMpGetSet", (void *)RTMpGetSet },
328 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
329 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
330 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
331 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
332 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
333 { "RTMpOnAll", (void *)RTMpOnAll },
334 { "RTMpOnOthers", (void *)RTMpOnOthers },
335 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
336 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
337 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
338 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
339 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
340 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
341 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
342 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
343 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
344 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
345 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
346 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
347 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
348 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
349 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
350 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
351 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
352 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
353 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
354 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
355 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
356 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
357 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
358 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
359 { "RTProcSelf", (void *)RTProcSelf },
360 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
361 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
362 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
363 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
364 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
365 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
366 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
367 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
368 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
369 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
370 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
371 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
372 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
373 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
374 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
375 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
376 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
377 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
378 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
379 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
380 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
381 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
382 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
383 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
384 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
385 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
386 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
387 { "RTSemEventCreate", (void *)RTSemEventCreate },
388 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
389 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
390 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
391 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
392 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
393 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
394 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
395 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
396 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
397 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
398 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
399 { "RTSemEventSignal", (void *)RTSemEventSignal },
400 { "RTSemEventWait", (void *)RTSemEventWait },
401 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
402 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
403 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
404 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
405 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
406 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
407 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
408 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
409 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
410 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
411 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
412 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
413 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
414 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
415 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
416 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
417 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
418 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
419 { "RTStrCopy", (void *)RTStrCopy },
420 { "RTStrDupTag", (void *)RTStrDupTag },
421 { "RTStrFormat", (void *)RTStrFormat },
422 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
423 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
424 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
425 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
426 { "RTStrFormatV", (void *)RTStrFormatV },
427 { "RTStrFree", (void *)RTStrFree },
428 { "RTStrNCmp", (void *)RTStrNCmp },
429 { "RTStrPrintf", (void *)RTStrPrintf },
430 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
431 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
432 { "RTStrPrintfV", (void *)RTStrPrintfV },
433 { "RTThreadCreate", (void *)RTThreadCreate },
434 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
435 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
436 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
437 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
438 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
439 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
440 { "RTThreadGetName", (void *)RTThreadGetName },
441 { "RTThreadGetNative", (void *)RTThreadGetNative },
442 { "RTThreadGetType", (void *)RTThreadGetType },
443 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
444 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
445 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
446 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
447 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
448 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
449 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
450 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
451 { "RTThreadSelf", (void *)RTThreadSelf },
452 { "RTThreadSelfName", (void *)RTThreadSelfName },
453 { "RTThreadSleep", (void *)RTThreadSleep },
454 { "RTThreadUserReset", (void *)RTThreadUserReset },
455 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
456 { "RTThreadUserWait", (void *)RTThreadUserWait },
457 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
458 { "RTThreadWait", (void *)RTThreadWait },
459 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
460 { "RTThreadYield", (void *)RTThreadYield },
461 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
462 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
463 { "RTTimeNow", (void *)RTTimeNow },
464 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
465 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
466 { "RTTimerCreate", (void *)RTTimerCreate },
467 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
468 { "RTTimerDestroy", (void *)RTTimerDestroy },
469 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
470 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
471 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
472 { "RTTimerStart", (void *)RTTimerStart },
473 { "RTTimerStop", (void *)RTTimerStop },
474 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
475 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
476 { "RTUuidCompare", (void *)RTUuidCompare },
477 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
478 { "RTUuidFromStr", (void *)RTUuidFromStr },
479/* SED: END */
480};
481
482#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
483/**
484 * Drag in the rest of IRPT since we share it with the
485 * rest of the kernel modules on darwin.
486 */
487PFNRT g_apfnVBoxDrvIPRTDeps[] =
488{
489 /* VBoxNetAdp */
490 (PFNRT)RTRandBytes,
491 /* VBoxUSB */
492 (PFNRT)RTPathStripFilename,
493 NULL
494};
495#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
496
497
498/**
499 * Initializes the device extentsion structure.
500 *
501 * @returns IPRT status code.
502 * @param pDevExt The device extension to initialize.
503 * @param cbSession The size of the session structure. The size of
504 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
505 * defined because we're skipping the OS specific members
506 * then.
507 */
508int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
509{
510 int rc;
511
512#ifdef SUPDRV_WITH_RELEASE_LOGGER
513 /*
514 * Create the release log.
515 */
516 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
517 PRTLOGGER pRelLogger;
518 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
519 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
520 if (RT_SUCCESS(rc))
521 RTLogRelSetDefaultInstance(pRelLogger);
522 /** @todo Add native hook for getting logger config parameters and setting
523 * them. On linux we should use the module parameter stuff... */
524#endif
525
526 /*
527 * Initialize it.
528 */
529 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
530 pDevExt->Spinlock = NIL_RTSPINLOCK;
531 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
532 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
533 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
534 if (RT_SUCCESS(rc))
535 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
536 if (RT_SUCCESS(rc))
537 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
538
539 if (RT_SUCCESS(rc))
540#ifdef SUPDRV_USE_MUTEX_FOR_LDR
541 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
542#else
543 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
544#endif
545 if (RT_SUCCESS(rc))
546 {
547 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
548 if (RT_SUCCESS(rc))
549 {
550#ifdef SUPDRV_USE_MUTEX_FOR_LDR
551 rc = RTSemMutexCreate(&pDevExt->mtxGip);
552#else
553 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
554#endif
555 if (RT_SUCCESS(rc))
556 {
557 rc = supdrvGipCreate(pDevExt);
558 if (RT_SUCCESS(rc))
559 {
560 rc = supdrvTracerInit(pDevExt);
561 if (RT_SUCCESS(rc))
562 {
563 pDevExt->pLdrInitImage = NULL;
564 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
565 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
566 pDevExt->cbSession = (uint32_t)cbSession;
567
568 /*
569 * Fixup the absolute symbols.
570 *
571 * Because of the table indexing assumptions we'll have a little #ifdef orgy
572 * here rather than distributing this to OS specific files. At least for now.
573 */
574#ifdef RT_OS_DARWIN
575# if ARCH_BITS == 32
576 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
577 {
578 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
579 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
580 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
581 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
582 }
583 else
584 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
585 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
586 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
587 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
588 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
589 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
590 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
591# else /* 64-bit darwin: */
592 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
593 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
594 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
595 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
596 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
597 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
598 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
599 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
600 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
601 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
602
603# endif
604#else /* !RT_OS_DARWIN */
605# if ARCH_BITS == 64
606 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
607 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
608 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
609 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
610# else
611 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
612# endif
613 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
614 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
615 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
616 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
617 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
618 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
619#endif /* !RT_OS_DARWIN */
620 return VINF_SUCCESS;
621 }
622
623 supdrvGipDestroy(pDevExt);
624 }
625
626#ifdef SUPDRV_USE_MUTEX_FOR_GIP
627 RTSemMutexDestroy(pDevExt->mtxGip);
628 pDevExt->mtxGip = NIL_RTSEMMUTEX;
629#else
630 RTSemFastMutexDestroy(pDevExt->mtxGip);
631 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
632#endif
633 }
634 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
635 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
636 }
637#ifdef SUPDRV_USE_MUTEX_FOR_LDR
638 RTSemMutexDestroy(pDevExt->mtxLdr);
639 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
640#else
641 RTSemFastMutexDestroy(pDevExt->mtxLdr);
642 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
643#endif
644 }
645
646 RTSpinlockDestroy(pDevExt->Spinlock);
647 pDevExt->Spinlock = NIL_RTSPINLOCK;
648 RTSpinlockDestroy(pDevExt->hGipSpinlock);
649 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
650 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
651 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
652
653#ifdef SUPDRV_WITH_RELEASE_LOGGER
654 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
655 RTLogDestroy(RTLogSetDefaultInstance(NULL));
656#endif
657
658 return rc;
659}
660
661
662/**
663 * Delete the device extension (e.g. cleanup members).
664 *
665 * @param pDevExt The device extension to delete.
666 */
667void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
668{
669 PSUPDRVOBJ pObj;
670 PSUPDRVUSAGE pUsage;
671
672 /*
673 * Kill mutexes and spinlocks.
674 */
675#ifdef SUPDRV_USE_MUTEX_FOR_GIP
676 RTSemMutexDestroy(pDevExt->mtxGip);
677 pDevExt->mtxGip = NIL_RTSEMMUTEX;
678#else
679 RTSemFastMutexDestroy(pDevExt->mtxGip);
680 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
681#endif
682#ifdef SUPDRV_USE_MUTEX_FOR_LDR
683 RTSemMutexDestroy(pDevExt->mtxLdr);
684 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
685#else
686 RTSemFastMutexDestroy(pDevExt->mtxLdr);
687 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
688#endif
689 RTSpinlockDestroy(pDevExt->Spinlock);
690 pDevExt->Spinlock = NIL_RTSPINLOCK;
691 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
692 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
693 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
694 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
695
696 /*
697 * Free lists.
698 */
699 /* objects. */
700 pObj = pDevExt->pObjs;
701 Assert(!pObj); /* (can trigger on forced unloads) */
702 pDevExt->pObjs = NULL;
703 while (pObj)
704 {
705 void *pvFree = pObj;
706 pObj = pObj->pNext;
707 RTMemFree(pvFree);
708 }
709
710 /* usage records. */
711 pUsage = pDevExt->pUsageFree;
712 pDevExt->pUsageFree = NULL;
713 while (pUsage)
714 {
715 void *pvFree = pUsage;
716 pUsage = pUsage->pNext;
717 RTMemFree(pvFree);
718 }
719
720 /* kill the GIP. */
721 supdrvGipDestroy(pDevExt);
722 RTSpinlockDestroy(pDevExt->hGipSpinlock);
723 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
724
725 supdrvTracerTerm(pDevExt);
726
727#ifdef SUPDRV_WITH_RELEASE_LOGGER
728 /* destroy the loggers. */
729 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
730 RTLogDestroy(RTLogSetDefaultInstance(NULL));
731#endif
732}
733
734
735/**
736 * Create session.
737 *
738 * @returns IPRT status code.
739 * @param pDevExt Device extension.
740 * @param fUser Flag indicating whether this is a user or kernel
741 * session.
742 * @param fUnrestricted Unrestricted access (system) or restricted access
743 * (user)?
744 * @param ppSession Where to store the pointer to the session data.
745 */
746int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
747{
748 int rc;
749 PSUPDRVSESSION pSession;
750
751 if (!SUP_IS_DEVEXT_VALID(pDevExt))
752 return VERR_INVALID_PARAMETER;
753
754 /*
755 * Allocate memory for the session data.
756 */
757 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
758 if (pSession)
759 {
760 /* Initialize session data. */
761 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
762 if (!rc)
763 {
764 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
765 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
766 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
767 if (RT_SUCCESS(rc))
768 {
769 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
770 pSession->pDevExt = pDevExt;
771 pSession->u32Cookie = BIRD_INV;
772 pSession->fUnrestricted = fUnrestricted;
773 /*pSession->fInHashTable = false; */
774 pSession->cRefs = 1;
775 /*pSession->pCommonNextHash = NULL;
776 pSession->ppOsSessionPtr = NULL; */
777 if (fUser)
778 {
779 pSession->Process = RTProcSelf();
780 pSession->R0Process = RTR0ProcHandleSelf();
781 }
782 else
783 {
784 pSession->Process = NIL_RTPROCESS;
785 pSession->R0Process = NIL_RTR0PROCESS;
786 }
787 /*pSession->pLdrUsage = NULL;
788 pSession->pVM = NULL;
789 pSession->pUsage = NULL;
790 pSession->pGip = NULL;
791 pSession->fGipReferenced = false;
792 pSession->Bundle.cUsed = 0; */
793 pSession->Uid = NIL_RTUID;
794 pSession->Gid = NIL_RTGID;
795 /*pSession->uTracerData = 0;*/
796 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
797 RTListInit(&pSession->TpProviders);
798 /*pSession->cTpProviders = 0;*/
799 /*pSession->cTpProbesFiring = 0;*/
800 RTListInit(&pSession->TpUmods);
801 /*RT_ZERO(pSession->apTpLookupTable);*/
802
803 VBOXDRV_SESSION_CREATE(pSession, fUser);
804 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
805 return VINF_SUCCESS;
806 }
807
808 RTSpinlockDestroy(pSession->Spinlock);
809 }
810 RTMemFree(pSession);
811 *ppSession = NULL;
812 Log(("Failed to create spinlock, rc=%d!\n", rc));
813 }
814 else
815 rc = VERR_NO_MEMORY;
816
817 return rc;
818}
819
820
821/**
822 * Cleans up the session in the context of the process to which it belongs, the
823 * caller will free the session and the session spinlock.
824 *
825 * This should normally occur when the session is closed or as the process
826 * exits. Careful reference counting in the OS specfic code makes sure that
827 * there cannot be any races between process/handle cleanup callbacks and
828 * threads doing I/O control calls.
829 *
830 * @param pDevExt The device extension.
831 * @param pSession Session data.
832 */
833static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
834{
835 int rc;
836 PSUPDRVBUNDLE pBundle;
837 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
838
839 Assert(!pSession->fInHashTable);
840 Assert(!pSession->ppOsSessionPtr);
841 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
842 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
843
844 /*
845 * Remove logger instances related to this session.
846 */
847 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
848
849 /*
850 * Destroy the handle table.
851 */
852 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
853 AssertRC(rc);
854 pSession->hHandleTable = NIL_RTHANDLETABLE;
855
856 /*
857 * Release object references made in this session.
858 * In theory there should be noone racing us in this session.
859 */
860 Log2(("release objects - start\n"));
861 if (pSession->pUsage)
862 {
863 PSUPDRVUSAGE pUsage;
864 RTSpinlockAcquire(pDevExt->Spinlock);
865
866 while ((pUsage = pSession->pUsage) != NULL)
867 {
868 PSUPDRVOBJ pObj = pUsage->pObj;
869 pSession->pUsage = pUsage->pNext;
870
871 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
872 if (pUsage->cUsage < pObj->cUsage)
873 {
874 pObj->cUsage -= pUsage->cUsage;
875 RTSpinlockRelease(pDevExt->Spinlock);
876 }
877 else
878 {
879 /* Destroy the object and free the record. */
880 if (pDevExt->pObjs == pObj)
881 pDevExt->pObjs = pObj->pNext;
882 else
883 {
884 PSUPDRVOBJ pObjPrev;
885 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
886 if (pObjPrev->pNext == pObj)
887 {
888 pObjPrev->pNext = pObj->pNext;
889 break;
890 }
891 Assert(pObjPrev);
892 }
893 RTSpinlockRelease(pDevExt->Spinlock);
894
895 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
896 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
897 if (pObj->pfnDestructor)
898 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
899 RTMemFree(pObj);
900 }
901
902 /* free it and continue. */
903 RTMemFree(pUsage);
904
905 RTSpinlockAcquire(pDevExt->Spinlock);
906 }
907
908 RTSpinlockRelease(pDevExt->Spinlock);
909 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
910 }
911 Log2(("release objects - done\n"));
912
913 /*
914 * Do tracer cleanups related to this session.
915 */
916 Log2(("release tracer stuff - start\n"));
917 supdrvTracerCleanupSession(pDevExt, pSession);
918 Log2(("release tracer stuff - end\n"));
919
920 /*
921 * Release memory allocated in the session.
922 *
923 * We do not serialize this as we assume that the application will
924 * not allocated memory while closing the file handle object.
925 */
926 Log2(("freeing memory:\n"));
927 pBundle = &pSession->Bundle;
928 while (pBundle)
929 {
930 PSUPDRVBUNDLE pToFree;
931 unsigned i;
932
933 /*
934 * Check and unlock all entries in the bundle.
935 */
936 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
937 {
938 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
939 {
940 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
941 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
942 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
943 {
944 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
945 AssertRC(rc); /** @todo figure out how to handle this. */
946 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
947 }
948 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
949 AssertRC(rc); /** @todo figure out how to handle this. */
950 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
951 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
952 }
953 }
954
955 /*
956 * Advance and free previous bundle.
957 */
958 pToFree = pBundle;
959 pBundle = pBundle->pNext;
960
961 pToFree->pNext = NULL;
962 pToFree->cUsed = 0;
963 if (pToFree != &pSession->Bundle)
964 RTMemFree(pToFree);
965 }
966 Log2(("freeing memory - done\n"));
967
968 /*
969 * Deregister component factories.
970 */
971 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
972 Log2(("deregistering component factories:\n"));
973 if (pDevExt->pComponentFactoryHead)
974 {
975 PSUPDRVFACTORYREG pPrev = NULL;
976 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
977 while (pCur)
978 {
979 if (pCur->pSession == pSession)
980 {
981 /* unlink it */
982 PSUPDRVFACTORYREG pNext = pCur->pNext;
983 if (pPrev)
984 pPrev->pNext = pNext;
985 else
986 pDevExt->pComponentFactoryHead = pNext;
987
988 /* free it */
989 pCur->pNext = NULL;
990 pCur->pSession = NULL;
991 pCur->pFactory = NULL;
992 RTMemFree(pCur);
993
994 /* next */
995 pCur = pNext;
996 }
997 else
998 {
999 /* next */
1000 pPrev = pCur;
1001 pCur = pCur->pNext;
1002 }
1003 }
1004 }
1005 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
1006 Log2(("deregistering component factories - done\n"));
1007
1008 /*
1009 * Loaded images needs to be dereferenced and possibly freed up.
1010 */
1011 supdrvLdrLock(pDevExt);
1012 Log2(("freeing images:\n"));
1013 if (pSession->pLdrUsage)
1014 {
1015 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
1016 pSession->pLdrUsage = NULL;
1017 while (pUsage)
1018 {
1019 void *pvFree = pUsage;
1020 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1021 if (pImage->cUsage > pUsage->cUsage)
1022 pImage->cUsage -= pUsage->cUsage;
1023 else
1024 supdrvLdrFree(pDevExt, pImage);
1025 pUsage->pImage = NULL;
1026 pUsage = pUsage->pNext;
1027 RTMemFree(pvFree);
1028 }
1029 }
1030 supdrvLdrUnlock(pDevExt);
1031 Log2(("freeing images - done\n"));
1032
1033 /*
1034 * Unmap the GIP.
1035 */
1036 Log2(("umapping GIP:\n"));
1037 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1038 {
1039 SUPR0GipUnmap(pSession);
1040 pSession->fGipReferenced = 0;
1041 }
1042 Log2(("umapping GIP - done\n"));
1043}
1044
1045
1046/**
1047 * Common code for freeing a session when the reference count reaches zero.
1048 *
1049 * @param pDevExt Device extension.
1050 * @param pSession Session data.
1051 * This data will be freed by this routine.
1052 */
1053static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1054{
1055 VBOXDRV_SESSION_CLOSE(pSession);
1056
1057 /*
1058 * Cleanup the session first.
1059 */
1060 supdrvCleanupSession(pDevExt, pSession);
1061 supdrvOSCleanupSession(pDevExt, pSession);
1062
1063 /*
1064 * Free the rest of the session stuff.
1065 */
1066 RTSpinlockDestroy(pSession->Spinlock);
1067 pSession->Spinlock = NIL_RTSPINLOCK;
1068 pSession->pDevExt = NULL;
1069 RTMemFree(pSession);
1070 LogFlow(("supdrvDestroySession: returns\n"));
1071}
1072
1073
1074/**
1075 * Inserts the session into the global hash table.
1076 *
1077 * @retval VINF_SUCCESS on success.
1078 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1079 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1080 * session (asserted).
1081 * @retval VERR_DUPLICATE if there is already a session for that pid.
1082 *
1083 * @param pDevExt The device extension.
1084 * @param pSession The session.
1085 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1086 * available and used. This will set to point to the
1087 * session while under the protection of the session
1088 * hash table spinlock. It will also be kept in
1089 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1090 * cleanup use.
1091 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1092 */
1093int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1094 void *pvUser)
1095{
1096 PSUPDRVSESSION pCur;
1097 unsigned iHash;
1098
1099 /*
1100 * Validate input.
1101 */
1102 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1103 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1104
1105 /*
1106 * Calculate the hash table index and acquire the spinlock.
1107 */
1108 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1109
1110 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1111
1112 /*
1113 * If there are a collisions, we need to carefully check if we got a
1114 * duplicate. There can only be one open session per process.
1115 */
1116 pCur = pDevExt->apSessionHashTab[iHash];
1117 if (pCur)
1118 {
1119 while (pCur && pCur->Process != pSession->Process)
1120 pCur = pCur->pCommonNextHash;
1121
1122 if (pCur)
1123 {
1124 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1125 if (pCur == pSession)
1126 {
1127 Assert(pSession->fInHashTable);
1128 AssertFailed();
1129 return VERR_WRONG_ORDER;
1130 }
1131 Assert(!pSession->fInHashTable);
1132 if (pCur->R0Process == pSession->R0Process)
1133 return VERR_RESOURCE_IN_USE;
1134 return VERR_DUPLICATE;
1135 }
1136 }
1137 Assert(!pSession->fInHashTable);
1138 Assert(!pSession->ppOsSessionPtr);
1139
1140 /*
1141 * Insert it, doing a callout to the OS specific code in case it has
1142 * anything it wishes to do while we're holding the spinlock.
1143 */
1144 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1145 pDevExt->apSessionHashTab[iHash] = pSession;
1146 pSession->fInHashTable = true;
1147 ASMAtomicIncS32(&pDevExt->cSessions);
1148
1149 pSession->ppOsSessionPtr = ppOsSessionPtr;
1150 if (ppOsSessionPtr)
1151 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1152
1153 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1154
1155 /*
1156 * Retain a reference for the pointer in the session table.
1157 */
1158 ASMAtomicIncU32(&pSession->cRefs);
1159
1160 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1161 return VINF_SUCCESS;
1162}
1163
1164
1165/**
1166 * Removes the session from the global hash table.
1167 *
1168 * @retval VINF_SUCCESS on success.
1169 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1170 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1171 * session (asserted).
1172 *
1173 * @param pDevExt The device extension.
1174 * @param pSession The session. The caller is expected to have a reference
1175 * to this so it won't croak on us when we release the hash
1176 * table reference.
1177 * @param pvUser OS specific context value for the
1178 * supdrvOSSessionHashTabInserted callback.
1179 */
1180int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1181{
1182 PSUPDRVSESSION pCur;
1183 unsigned iHash;
1184 int32_t cRefs;
1185
1186 /*
1187 * Validate input.
1188 */
1189 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1190 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1191
1192 /*
1193 * Calculate the hash table index and acquire the spinlock.
1194 */
1195 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1196
1197 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1198
1199 /*
1200 * Unlink it.
1201 */
1202 pCur = pDevExt->apSessionHashTab[iHash];
1203 if (pCur == pSession)
1204 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1205 else
1206 {
1207 PSUPDRVSESSION pPrev = pCur;
1208 while (pCur && pCur != pSession)
1209 {
1210 pPrev = pCur;
1211 pCur = pCur->pCommonNextHash;
1212 }
1213 if (pCur)
1214 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1215 else
1216 {
1217 Assert(!pSession->fInHashTable);
1218 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1219 return VERR_NOT_FOUND;
1220 }
1221 }
1222
1223 pSession->pCommonNextHash = NULL;
1224 pSession->fInHashTable = false;
1225
1226 ASMAtomicDecS32(&pDevExt->cSessions);
1227
1228 /*
1229 * Clear OS specific session pointer if available and do the OS callback.
1230 */
1231 if (pSession->ppOsSessionPtr)
1232 {
1233 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1234 pSession->ppOsSessionPtr = NULL;
1235 }
1236
1237 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1238
1239 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1240
1241 /*
1242 * Drop the reference the hash table had to the session. This shouldn't
1243 * be the last reference!
1244 */
1245 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1246 Assert(cRefs > 0 && cRefs < _1M);
1247 if (cRefs == 0)
1248 supdrvDestroySession(pDevExt, pSession);
1249
1250 return VINF_SUCCESS;
1251}
1252
1253
1254/**
1255 * Looks up the session for the current process in the global hash table or in
1256 * OS specific pointer.
1257 *
1258 * @returns Pointer to the session with a reference that the caller must
1259 * release. If no valid session was found, NULL is returned.
1260 *
1261 * @param pDevExt The device extension.
1262 * @param Process The process ID.
1263 * @param R0Process The ring-0 process handle.
1264 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1265 * this is used instead of the hash table. For
1266 * additional safety it must then be equal to the
1267 * SUPDRVSESSION::ppOsSessionPtr member.
1268 * This can be NULL even if the OS has a session
1269 * pointer.
1270 */
1271PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1272 PSUPDRVSESSION *ppOsSessionPtr)
1273{
1274 PSUPDRVSESSION pCur;
1275 unsigned iHash;
1276
1277 /*
1278 * Validate input.
1279 */
1280 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1281
1282 /*
1283 * Calculate the hash table index and acquire the spinlock.
1284 */
1285 iHash = SUPDRV_SESSION_HASH(Process);
1286
1287 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1288
1289 /*
1290 * If an OS session pointer is provided, always use it.
1291 */
1292 if (ppOsSessionPtr)
1293 {
1294 pCur = *ppOsSessionPtr;
1295 if ( pCur
1296 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1297 || pCur->Process != Process
1298 || pCur->R0Process != R0Process) )
1299 pCur = NULL;
1300 }
1301 else
1302 {
1303 /*
1304 * Otherwise, do the hash table lookup.
1305 */
1306 pCur = pDevExt->apSessionHashTab[iHash];
1307 while ( pCur
1308 && ( pCur->Process != Process
1309 || pCur->R0Process != R0Process) )
1310 pCur = pCur->pCommonNextHash;
1311 }
1312
1313 /*
1314 * Retain the session.
1315 */
1316 if (pCur)
1317 {
1318 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1319 NOREF(cRefs);
1320 Assert(cRefs > 1 && cRefs < _1M);
1321 }
1322
1323 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1324
1325 return pCur;
1326}
1327
1328
1329/**
1330 * Retain a session to make sure it doesn't go away while it is in use.
1331 *
1332 * @returns New reference count on success, UINT32_MAX on failure.
1333 * @param pSession Session data.
1334 */
1335uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1336{
1337 uint32_t cRefs;
1338 AssertPtrReturn(pSession, UINT32_MAX);
1339 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1340
1341 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1342 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1343 return cRefs;
1344}
1345
1346
1347/**
1348 * Releases a given session.
1349 *
1350 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1351 * @param pSession Session data.
1352 */
1353uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1354{
1355 uint32_t cRefs;
1356 AssertPtrReturn(pSession, UINT32_MAX);
1357 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1358
1359 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1360 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1361 if (cRefs == 0)
1362 supdrvDestroySession(pSession->pDevExt, pSession);
1363 return cRefs;
1364}
1365
1366
1367/**
1368 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1369 *
1370 * @returns IPRT status code, see SUPR0ObjAddRef.
1371 * @param hHandleTable The handle table handle. Ignored.
1372 * @param pvObj The object pointer.
1373 * @param pvCtx Context, the handle type. Ignored.
1374 * @param pvUser Session pointer.
1375 */
1376static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1377{
1378 NOREF(pvCtx);
1379 NOREF(hHandleTable);
1380 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1381}
1382
1383
1384/**
1385 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1386 *
1387 * @param hHandleTable The handle table handle. Ignored.
1388 * @param h The handle value. Ignored.
1389 * @param pvObj The object pointer.
1390 * @param pvCtx Context, the handle type. Ignored.
1391 * @param pvUser Session pointer.
1392 */
1393static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1394{
1395 NOREF(pvCtx);
1396 NOREF(h);
1397 NOREF(hHandleTable);
1398 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1399}
1400
1401
1402/**
1403 * Fast path I/O Control worker.
1404 *
1405 * @returns VBox status code that should be passed down to ring-3 unchanged.
1406 * @param uIOCtl Function number.
1407 * @param idCpu VMCPU id.
1408 * @param pDevExt Device extention.
1409 * @param pSession Session data.
1410 */
1411int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1412{
1413 /*
1414 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1415 */
1416 if (RT_LIKELY( RT_VALID_PTR(pSession)
1417 && pSession->pVM
1418 && pDevExt->pfnVMMR0EntryFast))
1419 {
1420 switch (uIOCtl)
1421 {
1422 case SUP_IOCTL_FAST_DO_RAW_RUN:
1423 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1424 break;
1425 case SUP_IOCTL_FAST_DO_HM_RUN:
1426 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1427 break;
1428 case SUP_IOCTL_FAST_DO_NOP:
1429 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1430 break;
1431 default:
1432 return VERR_INTERNAL_ERROR;
1433 }
1434 return VINF_SUCCESS;
1435 }
1436 return VERR_INTERNAL_ERROR;
1437}
1438
1439
1440/**
1441 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1442 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1443 * list, see http://www.kerneldrivers.org/RHEL5.
1444 *
1445 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1446 * @param pszStr String to check
1447 * @param pszChars Character set
1448 */
1449static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1450{
1451 int chCur;
1452 while ((chCur = *pszStr++) != '\0')
1453 {
1454 int ch;
1455 const char *psz = pszChars;
1456 while ((ch = *psz++) != '\0')
1457 if (ch == chCur)
1458 return 1;
1459
1460 }
1461 return 0;
1462}
1463
1464
1465
1466/**
1467 * I/O Control inner worker (tracing reasons).
1468 *
1469 * @returns IPRT status code.
1470 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1471 *
1472 * @param uIOCtl Function number.
1473 * @param pDevExt Device extention.
1474 * @param pSession Session data.
1475 * @param pReqHdr The request header.
1476 */
1477static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1478{
1479 /*
1480 * Validation macros
1481 */
1482#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1483 do { \
1484 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1485 { \
1486 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1487 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1488 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1489 } \
1490 } while (0)
1491
1492#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1493
1494#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1495 do { \
1496 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1497 { \
1498 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1499 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1500 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1501 } \
1502 } while (0)
1503
1504#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1505 do { \
1506 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1507 { \
1508 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1509 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1510 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1511 } \
1512 } while (0)
1513
1514#define REQ_CHECK_EXPR(Name, expr) \
1515 do { \
1516 if (RT_UNLIKELY(!(expr))) \
1517 { \
1518 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1519 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1520 } \
1521 } while (0)
1522
1523#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1524 do { \
1525 if (RT_UNLIKELY(!(expr))) \
1526 { \
1527 OSDBGPRINT( fmt ); \
1528 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1529 } \
1530 } while (0)
1531
1532 /*
1533 * The switch.
1534 */
1535 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1536 {
1537 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1538 {
1539 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1540 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1541 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1542 {
1543 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1544 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1545 return 0;
1546 }
1547
1548#if 0
1549 /*
1550 * Call out to the OS specific code and let it do permission checks on the
1551 * client process.
1552 */
1553 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1554 {
1555 pReq->u.Out.u32Cookie = 0xffffffff;
1556 pReq->u.Out.u32SessionCookie = 0xffffffff;
1557 pReq->u.Out.u32SessionVersion = 0xffffffff;
1558 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1559 pReq->u.Out.pSession = NULL;
1560 pReq->u.Out.cFunctions = 0;
1561 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1562 return 0;
1563 }
1564#endif
1565
1566 /*
1567 * Match the version.
1568 * The current logic is very simple, match the major interface version.
1569 */
1570 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1571 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1572 {
1573 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1574 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1575 pReq->u.Out.u32Cookie = 0xffffffff;
1576 pReq->u.Out.u32SessionCookie = 0xffffffff;
1577 pReq->u.Out.u32SessionVersion = 0xffffffff;
1578 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1579 pReq->u.Out.pSession = NULL;
1580 pReq->u.Out.cFunctions = 0;
1581 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1582 return 0;
1583 }
1584
1585 /*
1586 * Fill in return data and be gone.
1587 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1588 * u32SessionVersion <= u32ReqVersion!
1589 */
1590 /** @todo Somehow validate the client and negotiate a secure cookie... */
1591 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1592 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1593 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1594 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1595 pReq->u.Out.pSession = pSession;
1596 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1597 pReq->Hdr.rc = VINF_SUCCESS;
1598 return 0;
1599 }
1600
1601 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1602 {
1603 /* validate */
1604 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1605 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1606
1607 /* execute */
1608 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1609 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1610 pReq->Hdr.rc = VINF_SUCCESS;
1611 return 0;
1612 }
1613
1614 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1615 {
1616 /* validate */
1617 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1618 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1619 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1620 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1621 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1622
1623 /* execute */
1624 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1625 if (RT_FAILURE(pReq->Hdr.rc))
1626 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1627 return 0;
1628 }
1629
1630 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1631 {
1632 /* validate */
1633 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1634 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1635
1636 /* execute */
1637 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1638 return 0;
1639 }
1640
1641 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1642 {
1643 /* validate */
1644 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1645 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1646
1647 /* execute */
1648 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1649 if (RT_FAILURE(pReq->Hdr.rc))
1650 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1651 return 0;
1652 }
1653
1654 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1655 {
1656 /* validate */
1657 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1658 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1659
1660 /* execute */
1661 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1662 return 0;
1663 }
1664
1665 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1666 {
1667 /* validate */
1668 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1669 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1670 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1671 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1672 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1673 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1674 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1675 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1676 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1677 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1678 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1679
1680 /* execute */
1681 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1682 return 0;
1683 }
1684
1685 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1686 {
1687 /* validate */
1688 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1689 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1690 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1691 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1692 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1693 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1694 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1695 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1696 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1697 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1698 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1699 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1700 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1701 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1702 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1703
1704 if (pReq->u.In.cSymbols)
1705 {
1706 uint32_t i;
1707 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1708 for (i = 0; i < pReq->u.In.cSymbols; i++)
1709 {
1710 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1711 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1712 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1713 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1714 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1715 pReq->u.In.cbStrTab - paSyms[i].offName),
1716 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1717 }
1718 }
1719
1720 /* execute */
1721 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1722 return 0;
1723 }
1724
1725 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1726 {
1727 /* validate */
1728 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1729 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1730
1731 /* execute */
1732 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1733 return 0;
1734 }
1735
1736 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1737 {
1738 /* validate */
1739 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1740 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1741 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1742
1743 /* execute */
1744 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1745 return 0;
1746 }
1747
1748 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1749 {
1750 /* validate */
1751 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1752 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1753 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1754
1755 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1756 {
1757 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1758
1759 /* execute */
1760 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1761 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1762 else
1763 pReq->Hdr.rc = VERR_WRONG_ORDER;
1764 }
1765 else
1766 {
1767 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1768 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1769 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1770 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1771 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1772
1773 /* execute */
1774 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1775 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1776 else
1777 pReq->Hdr.rc = VERR_WRONG_ORDER;
1778 }
1779
1780 if ( RT_FAILURE(pReq->Hdr.rc)
1781 && pReq->Hdr.rc != VERR_INTERRUPTED
1782 && pReq->Hdr.rc != VERR_TIMEOUT)
1783 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1784 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1785 else
1786 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1787 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1788 return 0;
1789 }
1790
1791 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1792 {
1793 /* validate */
1794 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1795 PSUPVMMR0REQHDR pVMMReq;
1796 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1797 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1798
1799 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1800 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1801 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1802 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1803 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1804
1805 /* execute */
1806 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1807 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1808 else
1809 pReq->Hdr.rc = VERR_WRONG_ORDER;
1810
1811 if ( RT_FAILURE(pReq->Hdr.rc)
1812 && pReq->Hdr.rc != VERR_INTERRUPTED
1813 && pReq->Hdr.rc != VERR_TIMEOUT)
1814 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1815 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1816 else
1817 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1818 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1819 return 0;
1820 }
1821
1822 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1823 {
1824 /* validate */
1825 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1826 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1827
1828 /* execute */
1829 pReq->Hdr.rc = VINF_SUCCESS;
1830 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1831 return 0;
1832 }
1833
1834 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1835 {
1836 /* validate */
1837 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1838 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1839 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1840
1841 /* execute */
1842 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1843 if (RT_FAILURE(pReq->Hdr.rc))
1844 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1845 return 0;
1846 }
1847
1848 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1849 {
1850 /* validate */
1851 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1852 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1853
1854 /* execute */
1855 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1856 return 0;
1857 }
1858
1859 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1860 {
1861 /* validate */
1862 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1863 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1864
1865 /* execute */
1866 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1867 if (RT_SUCCESS(pReq->Hdr.rc))
1868 pReq->u.Out.pGipR0 = pDevExt->pGip;
1869 return 0;
1870 }
1871
1872 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1873 {
1874 /* validate */
1875 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1876 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1877
1878 /* execute */
1879 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1880 return 0;
1881 }
1882
1883 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1884 {
1885 /* validate */
1886 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1887 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1888 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1889 || ( VALID_PTR(pReq->u.In.pVMR0)
1890 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1891 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1892 /* execute */
1893 pSession->pVM = pReq->u.In.pVMR0;
1894 pReq->Hdr.rc = VINF_SUCCESS;
1895 return 0;
1896 }
1897
1898 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1899 {
1900 /* validate */
1901 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1902 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1903 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1904 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1905 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1906 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1907 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1908 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1909 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1910
1911 /* execute */
1912 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1913 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1914 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1915 &pReq->u.Out.aPages[0]);
1916 if (RT_FAILURE(pReq->Hdr.rc))
1917 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1918 return 0;
1919 }
1920
1921 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1922 {
1923 /* validate */
1924 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1925 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1926 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1927 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1928 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1929 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1930
1931 /* execute */
1932 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1933 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1934 if (RT_FAILURE(pReq->Hdr.rc))
1935 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1936 return 0;
1937 }
1938
1939 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1940 {
1941 /* validate */
1942 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1943 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1944 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1945 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1946 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1947 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1948 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1949
1950 /* execute */
1951 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1952 return 0;
1953 }
1954
1955 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1956 {
1957 /* validate */
1958 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1959 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1960
1961 /* execute */
1962 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1963 return 0;
1964 }
1965
1966 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1967 {
1968 /* validate */
1969 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1970 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1971 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1972
1973 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1974 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1975 else
1976 {
1977 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1978 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1979 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1980 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1981 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1982 }
1983 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1984
1985 /* execute */
1986 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1987 return 0;
1988 }
1989
1990 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1991 {
1992 /* validate */
1993 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1994 size_t cbStrTab;
1995 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1996 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
1997 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
1998 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
1999 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
2000 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
2001 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
2002 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
2003 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
2004 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
2005 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
2006
2007 /* execute */
2008 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
2009 return 0;
2010 }
2011
2012 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
2013 {
2014 /* validate */
2015 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
2016 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
2017 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
2018
2019 /* execute */
2020 switch (pReq->u.In.uType)
2021 {
2022 case SUP_SEM_TYPE_EVENT:
2023 {
2024 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2025 switch (pReq->u.In.uOp)
2026 {
2027 case SUPSEMOP2_WAIT_MS_REL:
2028 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2029 break;
2030 case SUPSEMOP2_WAIT_NS_ABS:
2031 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2032 break;
2033 case SUPSEMOP2_WAIT_NS_REL:
2034 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2035 break;
2036 case SUPSEMOP2_SIGNAL:
2037 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2038 break;
2039 case SUPSEMOP2_CLOSE:
2040 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2041 break;
2042 case SUPSEMOP2_RESET:
2043 default:
2044 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2045 break;
2046 }
2047 break;
2048 }
2049
2050 case SUP_SEM_TYPE_EVENT_MULTI:
2051 {
2052 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2053 switch (pReq->u.In.uOp)
2054 {
2055 case SUPSEMOP2_WAIT_MS_REL:
2056 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2057 break;
2058 case SUPSEMOP2_WAIT_NS_ABS:
2059 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2060 break;
2061 case SUPSEMOP2_WAIT_NS_REL:
2062 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2063 break;
2064 case SUPSEMOP2_SIGNAL:
2065 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2066 break;
2067 case SUPSEMOP2_CLOSE:
2068 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2069 break;
2070 case SUPSEMOP2_RESET:
2071 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2072 break;
2073 default:
2074 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2075 break;
2076 }
2077 break;
2078 }
2079
2080 default:
2081 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2082 break;
2083 }
2084 return 0;
2085 }
2086
2087 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2088 {
2089 /* validate */
2090 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2091 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2092 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2093
2094 /* execute */
2095 switch (pReq->u.In.uType)
2096 {
2097 case SUP_SEM_TYPE_EVENT:
2098 {
2099 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2100 switch (pReq->u.In.uOp)
2101 {
2102 case SUPSEMOP3_CREATE:
2103 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2104 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2105 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2106 break;
2107 case SUPSEMOP3_GET_RESOLUTION:
2108 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2109 pReq->Hdr.rc = VINF_SUCCESS;
2110 pReq->Hdr.cbOut = sizeof(*pReq);
2111 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2112 break;
2113 default:
2114 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2115 break;
2116 }
2117 break;
2118 }
2119
2120 case SUP_SEM_TYPE_EVENT_MULTI:
2121 {
2122 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2123 switch (pReq->u.In.uOp)
2124 {
2125 case SUPSEMOP3_CREATE:
2126 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2127 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2128 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2129 break;
2130 case SUPSEMOP3_GET_RESOLUTION:
2131 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2132 pReq->Hdr.rc = VINF_SUCCESS;
2133 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2134 break;
2135 default:
2136 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2137 break;
2138 }
2139 break;
2140 }
2141
2142 default:
2143 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2144 break;
2145 }
2146 return 0;
2147 }
2148
2149 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2150 {
2151 /* validate */
2152 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2153 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2154
2155 /* execute */
2156 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2157 if (RT_FAILURE(pReq->Hdr.rc))
2158 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2159 return 0;
2160 }
2161
2162 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2163 {
2164 /* validate */
2165 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2166 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2167
2168 /* execute */
2169 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2170 return 0;
2171 }
2172
2173 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2174 {
2175 /* validate */
2176 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2177
2178 /* execute */
2179 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2180 return 0;
2181 }
2182
2183 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2184 {
2185 /* validate */
2186 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2187 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2188
2189 /* execute */
2190 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2191 return 0;
2192 }
2193
2194 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2195 {
2196 /* validate */
2197 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2198 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2199 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2200 return VERR_INVALID_PARAMETER;
2201
2202 /* execute */
2203 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2204 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2205 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2206 pReq->u.In.szName, pReq->u.In.fFlags);
2207 return 0;
2208 }
2209
2210 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2211 {
2212 /* validate */
2213 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2214 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2215
2216 /* execute */
2217 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2218 return 0;
2219 }
2220
2221 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2222 {
2223 /* validate */
2224 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2225 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2226
2227 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2228 pReqHdr->rc = VINF_SUCCESS;
2229 return 0;
2230 }
2231
2232 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2233 {
2234 /* validate */
2235 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2236 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2237 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2238 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2239
2240 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2241 return 0;
2242 }
2243
2244 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2245 {
2246 /* validate */
2247 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2248
2249 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2250 return 0;
2251 }
2252
2253 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2254 {
2255 /* validate */
2256 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2257 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2258
2259 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pReq);
2260 return 0;
2261 }
2262
2263 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2264 {
2265 /* validate */
2266 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2267 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2268
2269 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pReq);
2270 return 0;
2271 }
2272
2273 default:
2274 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2275 break;
2276 }
2277 return VERR_GENERAL_FAILURE;
2278}
2279
2280
2281/**
2282 * I/O Control inner worker for the restricted operations.
2283 *
2284 * @returns IPRT status code.
2285 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2286 *
2287 * @param uIOCtl Function number.
2288 * @param pDevExt Device extention.
2289 * @param pSession Session data.
2290 * @param pReqHdr The request header.
2291 */
2292static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2293{
2294 /*
2295 * The switch.
2296 */
2297 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2298 {
2299 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2300 {
2301 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2302 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2303 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2304 {
2305 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2306 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2307 return 0;
2308 }
2309
2310 /*
2311 * Match the version.
2312 * The current logic is very simple, match the major interface version.
2313 */
2314 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2315 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2316 {
2317 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2318 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2319 pReq->u.Out.u32Cookie = 0xffffffff;
2320 pReq->u.Out.u32SessionCookie = 0xffffffff;
2321 pReq->u.Out.u32SessionVersion = 0xffffffff;
2322 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2323 pReq->u.Out.pSession = NULL;
2324 pReq->u.Out.cFunctions = 0;
2325 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2326 return 0;
2327 }
2328
2329 /*
2330 * Fill in return data and be gone.
2331 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2332 * u32SessionVersion <= u32ReqVersion!
2333 */
2334 /** @todo Somehow validate the client and negotiate a secure cookie... */
2335 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2336 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2337 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2338 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2339 pReq->u.Out.pSession = pSession;
2340 pReq->u.Out.cFunctions = 0;
2341 pReq->Hdr.rc = VINF_SUCCESS;
2342 return 0;
2343 }
2344
2345 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2346 {
2347 /* validate */
2348 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2349 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2350
2351 /* execute */
2352 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2353 if (RT_FAILURE(pReq->Hdr.rc))
2354 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2355 return 0;
2356 }
2357
2358 default:
2359 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2360 break;
2361 }
2362 return VERR_GENERAL_FAILURE;
2363}
2364
2365
2366/**
2367 * I/O Control worker.
2368 *
2369 * @returns IPRT status code.
2370 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2371 *
2372 * @param uIOCtl Function number.
2373 * @param pDevExt Device extention.
2374 * @param pSession Session data.
2375 * @param pReqHdr The request header.
2376 */
2377int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2378{
2379 int rc;
2380 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2381
2382 /*
2383 * Validate the request.
2384 */
2385 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2386 {
2387 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2388 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2389 return VERR_INVALID_PARAMETER;
2390 }
2391 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2392 || pReqHdr->cbIn < sizeof(*pReqHdr)
2393 || pReqHdr->cbIn > cbReq
2394 || pReqHdr->cbOut < sizeof(*pReqHdr)
2395 || pReqHdr->cbOut > cbReq))
2396 {
2397 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2398 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2399 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2400 return VERR_INVALID_PARAMETER;
2401 }
2402 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2403 {
2404 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2405 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2406 return VERR_INVALID_PARAMETER;
2407 }
2408 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2409 {
2410 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2411 {
2412 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2413 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2414 return VERR_INVALID_PARAMETER;
2415 }
2416 }
2417 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2418 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2419 {
2420 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2421 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2422 return VERR_INVALID_PARAMETER;
2423 }
2424
2425 /*
2426 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2427 */
2428 if (pSession->fUnrestricted)
2429 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2430 else
2431 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2432
2433 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2434 return rc;
2435}
2436
2437
2438/**
2439 * Inter-Driver Communication (IDC) worker.
2440 *
2441 * @returns VBox status code.
2442 * @retval VINF_SUCCESS on success.
2443 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2444 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2445 *
2446 * @param uReq The request (function) code.
2447 * @param pDevExt Device extention.
2448 * @param pSession Session data.
2449 * @param pReqHdr The request header.
2450 */
2451int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2452{
2453 /*
2454 * The OS specific code has already validated the pSession
2455 * pointer, and the request size being greater or equal to
2456 * size of the header.
2457 *
2458 * So, just check that pSession is a kernel context session.
2459 */
2460 if (RT_UNLIKELY( pSession
2461 && pSession->R0Process != NIL_RTR0PROCESS))
2462 return VERR_INVALID_PARAMETER;
2463
2464/*
2465 * Validation macro.
2466 */
2467#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2468 do { \
2469 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2470 { \
2471 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2472 (long)pReqHdr->cb, (long)(cbExpect))); \
2473 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2474 } \
2475 } while (0)
2476
2477 switch (uReq)
2478 {
2479 case SUPDRV_IDC_REQ_CONNECT:
2480 {
2481 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2482 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2483
2484 /*
2485 * Validate the cookie and other input.
2486 */
2487 if (pReq->Hdr.pSession != NULL)
2488 {
2489 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2490 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2491 }
2492 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2493 {
2494 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2495 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2496 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2497 }
2498 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2499 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2500 {
2501 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2502 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2503 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2504 }
2505 if (pSession != NULL)
2506 {
2507 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2508 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2509 }
2510
2511 /*
2512 * Match the version.
2513 * The current logic is very simple, match the major interface version.
2514 */
2515 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2516 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2517 {
2518 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2519 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2520 pReq->u.Out.pSession = NULL;
2521 pReq->u.Out.uSessionVersion = 0xffffffff;
2522 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2523 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2524 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2525 return VINF_SUCCESS;
2526 }
2527
2528 pReq->u.Out.pSession = NULL;
2529 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2530 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2531 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2532
2533 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2534 if (RT_FAILURE(pReq->Hdr.rc))
2535 {
2536 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2537 return VINF_SUCCESS;
2538 }
2539
2540 pReq->u.Out.pSession = pSession;
2541 pReq->Hdr.pSession = pSession;
2542
2543 return VINF_SUCCESS;
2544 }
2545
2546 case SUPDRV_IDC_REQ_DISCONNECT:
2547 {
2548 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2549
2550 supdrvSessionRelease(pSession);
2551 return pReqHdr->rc = VINF_SUCCESS;
2552 }
2553
2554 case SUPDRV_IDC_REQ_GET_SYMBOL:
2555 {
2556 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2557 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2558
2559 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2560 return VINF_SUCCESS;
2561 }
2562
2563 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2564 {
2565 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2566 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2567
2568 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2569 return VINF_SUCCESS;
2570 }
2571
2572 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2573 {
2574 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2575 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2576
2577 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2578 return VINF_SUCCESS;
2579 }
2580
2581 default:
2582 Log(("Unknown IDC %#lx\n", (long)uReq));
2583 break;
2584 }
2585
2586#undef REQ_CHECK_IDC_SIZE
2587 return VERR_NOT_SUPPORTED;
2588}
2589
2590
2591/**
2592 * Register a object for reference counting.
2593 * The object is registered with one reference in the specified session.
2594 *
2595 * @returns Unique identifier on success (pointer).
2596 * All future reference must use this identifier.
2597 * @returns NULL on failure.
2598 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2599 * @param pvUser1 The first user argument.
2600 * @param pvUser2 The second user argument.
2601 */
2602SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2603{
2604 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2605 PSUPDRVOBJ pObj;
2606 PSUPDRVUSAGE pUsage;
2607
2608 /*
2609 * Validate the input.
2610 */
2611 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2612 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2613 AssertPtrReturn(pfnDestructor, NULL);
2614
2615 /*
2616 * Allocate and initialize the object.
2617 */
2618 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2619 if (!pObj)
2620 return NULL;
2621 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2622 pObj->enmType = enmType;
2623 pObj->pNext = NULL;
2624 pObj->cUsage = 1;
2625 pObj->pfnDestructor = pfnDestructor;
2626 pObj->pvUser1 = pvUser1;
2627 pObj->pvUser2 = pvUser2;
2628 pObj->CreatorUid = pSession->Uid;
2629 pObj->CreatorGid = pSession->Gid;
2630 pObj->CreatorProcess= pSession->Process;
2631 supdrvOSObjInitCreator(pObj, pSession);
2632
2633 /*
2634 * Allocate the usage record.
2635 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2636 */
2637 RTSpinlockAcquire(pDevExt->Spinlock);
2638
2639 pUsage = pDevExt->pUsageFree;
2640 if (pUsage)
2641 pDevExt->pUsageFree = pUsage->pNext;
2642 else
2643 {
2644 RTSpinlockRelease(pDevExt->Spinlock);
2645 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2646 if (!pUsage)
2647 {
2648 RTMemFree(pObj);
2649 return NULL;
2650 }
2651 RTSpinlockAcquire(pDevExt->Spinlock);
2652 }
2653
2654 /*
2655 * Insert the object and create the session usage record.
2656 */
2657 /* The object. */
2658 pObj->pNext = pDevExt->pObjs;
2659 pDevExt->pObjs = pObj;
2660
2661 /* The session record. */
2662 pUsage->cUsage = 1;
2663 pUsage->pObj = pObj;
2664 pUsage->pNext = pSession->pUsage;
2665 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2666 pSession->pUsage = pUsage;
2667
2668 RTSpinlockRelease(pDevExt->Spinlock);
2669
2670 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2671 return pObj;
2672}
2673
2674
2675/**
2676 * Increment the reference counter for the object associating the reference
2677 * with the specified session.
2678 *
2679 * @returns IPRT status code.
2680 * @param pvObj The identifier returned by SUPR0ObjRegister().
2681 * @param pSession The session which is referencing the object.
2682 *
2683 * @remarks The caller should not own any spinlocks and must carefully protect
2684 * itself against potential race with the destructor so freed memory
2685 * isn't accessed here.
2686 */
2687SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2688{
2689 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2690}
2691
2692
2693/**
2694 * Increment the reference counter for the object associating the reference
2695 * with the specified session.
2696 *
2697 * @returns IPRT status code.
2698 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2699 * couldn't be allocated. (If you see this you're not doing the right
2700 * thing and it won't ever work reliably.)
2701 *
2702 * @param pvObj The identifier returned by SUPR0ObjRegister().
2703 * @param pSession The session which is referencing the object.
2704 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2705 * first reference to an object in a session with this
2706 * argument set.
2707 *
2708 * @remarks The caller should not own any spinlocks and must carefully protect
2709 * itself against potential race with the destructor so freed memory
2710 * isn't accessed here.
2711 */
2712SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2713{
2714 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2715 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2716 int rc = VINF_SUCCESS;
2717 PSUPDRVUSAGE pUsagePre;
2718 PSUPDRVUSAGE pUsage;
2719
2720 /*
2721 * Validate the input.
2722 * Be ready for the destruction race (someone might be stuck in the
2723 * destructor waiting a lock we own).
2724 */
2725 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2726 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2727 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2728 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2729 VERR_INVALID_PARAMETER);
2730
2731 RTSpinlockAcquire(pDevExt->Spinlock);
2732
2733 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2734 {
2735 RTSpinlockRelease(pDevExt->Spinlock);
2736
2737 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2738 return VERR_WRONG_ORDER;
2739 }
2740
2741 /*
2742 * Preallocate the usage record if we can.
2743 */
2744 pUsagePre = pDevExt->pUsageFree;
2745 if (pUsagePre)
2746 pDevExt->pUsageFree = pUsagePre->pNext;
2747 else if (!fNoBlocking)
2748 {
2749 RTSpinlockRelease(pDevExt->Spinlock);
2750 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2751 if (!pUsagePre)
2752 return VERR_NO_MEMORY;
2753
2754 RTSpinlockAcquire(pDevExt->Spinlock);
2755 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2756 {
2757 RTSpinlockRelease(pDevExt->Spinlock);
2758
2759 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2760 return VERR_WRONG_ORDER;
2761 }
2762 }
2763
2764 /*
2765 * Reference the object.
2766 */
2767 pObj->cUsage++;
2768
2769 /*
2770 * Look for the session record.
2771 */
2772 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2773 {
2774 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2775 if (pUsage->pObj == pObj)
2776 break;
2777 }
2778 if (pUsage)
2779 pUsage->cUsage++;
2780 else if (pUsagePre)
2781 {
2782 /* create a new session record. */
2783 pUsagePre->cUsage = 1;
2784 pUsagePre->pObj = pObj;
2785 pUsagePre->pNext = pSession->pUsage;
2786 pSession->pUsage = pUsagePre;
2787 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2788
2789 pUsagePre = NULL;
2790 }
2791 else
2792 {
2793 pObj->cUsage--;
2794 rc = VERR_TRY_AGAIN;
2795 }
2796
2797 /*
2798 * Put any unused usage record into the free list..
2799 */
2800 if (pUsagePre)
2801 {
2802 pUsagePre->pNext = pDevExt->pUsageFree;
2803 pDevExt->pUsageFree = pUsagePre;
2804 }
2805
2806 RTSpinlockRelease(pDevExt->Spinlock);
2807
2808 return rc;
2809}
2810
2811
2812/**
2813 * Decrement / destroy a reference counter record for an object.
2814 *
2815 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2816 *
2817 * @returns IPRT status code.
2818 * @retval VINF_SUCCESS if not destroyed.
2819 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2820 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2821 * string builds.
2822 *
2823 * @param pvObj The identifier returned by SUPR0ObjRegister().
2824 * @param pSession The session which is referencing the object.
2825 */
2826SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2827{
2828 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2829 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2830 int rc = VERR_INVALID_PARAMETER;
2831 PSUPDRVUSAGE pUsage;
2832 PSUPDRVUSAGE pUsagePrev;
2833
2834 /*
2835 * Validate the input.
2836 */
2837 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2838 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2839 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2840 VERR_INVALID_PARAMETER);
2841
2842 /*
2843 * Acquire the spinlock and look for the usage record.
2844 */
2845 RTSpinlockAcquire(pDevExt->Spinlock);
2846
2847 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2848 pUsage;
2849 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2850 {
2851 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2852 if (pUsage->pObj == pObj)
2853 {
2854 rc = VINF_SUCCESS;
2855 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2856 if (pUsage->cUsage > 1)
2857 {
2858 pObj->cUsage--;
2859 pUsage->cUsage--;
2860 }
2861 else
2862 {
2863 /*
2864 * Free the session record.
2865 */
2866 if (pUsagePrev)
2867 pUsagePrev->pNext = pUsage->pNext;
2868 else
2869 pSession->pUsage = pUsage->pNext;
2870 pUsage->pNext = pDevExt->pUsageFree;
2871 pDevExt->pUsageFree = pUsage;
2872
2873 /* What about the object? */
2874 if (pObj->cUsage > 1)
2875 pObj->cUsage--;
2876 else
2877 {
2878 /*
2879 * Object is to be destroyed, unlink it.
2880 */
2881 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2882 rc = VINF_OBJECT_DESTROYED;
2883 if (pDevExt->pObjs == pObj)
2884 pDevExt->pObjs = pObj->pNext;
2885 else
2886 {
2887 PSUPDRVOBJ pObjPrev;
2888 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2889 if (pObjPrev->pNext == pObj)
2890 {
2891 pObjPrev->pNext = pObj->pNext;
2892 break;
2893 }
2894 Assert(pObjPrev);
2895 }
2896 }
2897 }
2898 break;
2899 }
2900 }
2901
2902 RTSpinlockRelease(pDevExt->Spinlock);
2903
2904 /*
2905 * Call the destructor and free the object if required.
2906 */
2907 if (rc == VINF_OBJECT_DESTROYED)
2908 {
2909 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2910 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2911 if (pObj->pfnDestructor)
2912 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2913 RTMemFree(pObj);
2914 }
2915
2916 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2917 return rc;
2918}
2919
2920
2921/**
2922 * Verifies that the current process can access the specified object.
2923 *
2924 * @returns The following IPRT status code:
2925 * @retval VINF_SUCCESS if access was granted.
2926 * @retval VERR_PERMISSION_DENIED if denied access.
2927 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2928 *
2929 * @param pvObj The identifier returned by SUPR0ObjRegister().
2930 * @param pSession The session which wishes to access the object.
2931 * @param pszObjName Object string name. This is optional and depends on the object type.
2932 *
2933 * @remark The caller is responsible for making sure the object isn't removed while
2934 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2935 */
2936SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2937{
2938 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2939 int rc;
2940
2941 /*
2942 * Validate the input.
2943 */
2944 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2945 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2946 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2947 VERR_INVALID_PARAMETER);
2948
2949 /*
2950 * Check access. (returns true if a decision has been made.)
2951 */
2952 rc = VERR_INTERNAL_ERROR;
2953 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2954 return rc;
2955
2956 /*
2957 * Default policy is to allow the user to access his own
2958 * stuff but nothing else.
2959 */
2960 if (pObj->CreatorUid == pSession->Uid)
2961 return VINF_SUCCESS;
2962 return VERR_PERMISSION_DENIED;
2963}
2964
2965
2966/**
2967 * Lock pages.
2968 *
2969 * @returns IPRT status code.
2970 * @param pSession Session to which the locked memory should be associated.
2971 * @param pvR3 Start of the memory range to lock.
2972 * This must be page aligned.
2973 * @param cPages Number of pages to lock.
2974 * @param paPages Where to put the physical addresses of locked memory.
2975 */
2976SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2977{
2978 int rc;
2979 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2980 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2981 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2982
2983 /*
2984 * Verify input.
2985 */
2986 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2987 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2988 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2989 || !pvR3)
2990 {
2991 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2992 return VERR_INVALID_PARAMETER;
2993 }
2994
2995 /*
2996 * Let IPRT do the job.
2997 */
2998 Mem.eType = MEMREF_TYPE_LOCKED;
2999 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
3000 if (RT_SUCCESS(rc))
3001 {
3002 uint32_t iPage = cPages;
3003 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
3004 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
3005
3006 while (iPage-- > 0)
3007 {
3008 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3009 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
3010 {
3011 AssertMsgFailed(("iPage=%d\n", iPage));
3012 rc = VERR_INTERNAL_ERROR;
3013 break;
3014 }
3015 }
3016 if (RT_SUCCESS(rc))
3017 rc = supdrvMemAdd(&Mem, pSession);
3018 if (RT_FAILURE(rc))
3019 {
3020 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3021 AssertRC(rc2);
3022 }
3023 }
3024
3025 return rc;
3026}
3027
3028
3029/**
3030 * Unlocks the memory pointed to by pv.
3031 *
3032 * @returns IPRT status code.
3033 * @param pSession Session to which the memory was locked.
3034 * @param pvR3 Memory to unlock.
3035 */
3036SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3037{
3038 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3039 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3040 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3041}
3042
3043
3044/**
3045 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3046 * backing.
3047 *
3048 * @returns IPRT status code.
3049 * @param pSession Session data.
3050 * @param cPages Number of pages to allocate.
3051 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3052 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3053 * @param pHCPhys Where to put the physical address of allocated memory.
3054 */
3055SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3056{
3057 int rc;
3058 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3059 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3060
3061 /*
3062 * Validate input.
3063 */
3064 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3065 if (!ppvR3 || !ppvR0 || !pHCPhys)
3066 {
3067 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3068 pSession, ppvR0, ppvR3, pHCPhys));
3069 return VERR_INVALID_PARAMETER;
3070
3071 }
3072 if (cPages < 1 || cPages >= 256)
3073 {
3074 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3075 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3076 }
3077
3078 /*
3079 * Let IPRT do the job.
3080 */
3081 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3082 if (RT_SUCCESS(rc))
3083 {
3084 int rc2;
3085 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3086 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3087 if (RT_SUCCESS(rc))
3088 {
3089 Mem.eType = MEMREF_TYPE_CONT;
3090 rc = supdrvMemAdd(&Mem, pSession);
3091 if (!rc)
3092 {
3093 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3094 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3095 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3096 return 0;
3097 }
3098
3099 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3100 AssertRC(rc2);
3101 }
3102 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3103 AssertRC(rc2);
3104 }
3105
3106 return rc;
3107}
3108
3109
3110/**
3111 * Frees memory allocated using SUPR0ContAlloc().
3112 *
3113 * @returns IPRT status code.
3114 * @param pSession The session to which the memory was allocated.
3115 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3116 */
3117SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3118{
3119 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3120 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3121 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3122}
3123
3124
3125/**
3126 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3127 *
3128 * The memory isn't zeroed.
3129 *
3130 * @returns IPRT status code.
3131 * @param pSession Session data.
3132 * @param cPages Number of pages to allocate.
3133 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3134 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3135 * @param paPages Where to put the physical addresses of allocated memory.
3136 */
3137SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3138{
3139 unsigned iPage;
3140 int rc;
3141 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3142 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3143
3144 /*
3145 * Validate input.
3146 */
3147 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3148 if (!ppvR3 || !ppvR0 || !paPages)
3149 {
3150 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3151 pSession, ppvR3, ppvR0, paPages));
3152 return VERR_INVALID_PARAMETER;
3153
3154 }
3155 if (cPages < 1 || cPages >= 256)
3156 {
3157 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3158 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3159 }
3160
3161 /*
3162 * Let IPRT do the work.
3163 */
3164 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3165 if (RT_SUCCESS(rc))
3166 {
3167 int rc2;
3168 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3169 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3170 if (RT_SUCCESS(rc))
3171 {
3172 Mem.eType = MEMREF_TYPE_LOW;
3173 rc = supdrvMemAdd(&Mem, pSession);
3174 if (!rc)
3175 {
3176 for (iPage = 0; iPage < cPages; iPage++)
3177 {
3178 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3179 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3180 }
3181 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3182 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3183 return 0;
3184 }
3185
3186 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3187 AssertRC(rc2);
3188 }
3189
3190 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3191 AssertRC(rc2);
3192 }
3193
3194 return rc;
3195}
3196
3197
3198/**
3199 * Frees memory allocated using SUPR0LowAlloc().
3200 *
3201 * @returns IPRT status code.
3202 * @param pSession The session to which the memory was allocated.
3203 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3204 */
3205SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3206{
3207 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3208 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3209 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3210}
3211
3212
3213
3214/**
3215 * Allocates a chunk of memory with both R0 and R3 mappings.
3216 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3217 *
3218 * @returns IPRT status code.
3219 * @param pSession The session to associated the allocation with.
3220 * @param cb Number of bytes to allocate.
3221 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3222 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3223 */
3224SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3225{
3226 int rc;
3227 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3228 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3229
3230 /*
3231 * Validate input.
3232 */
3233 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3234 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3235 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3236 if (cb < 1 || cb >= _4M)
3237 {
3238 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3239 return VERR_INVALID_PARAMETER;
3240 }
3241
3242 /*
3243 * Let IPRT do the work.
3244 */
3245 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3246 if (RT_SUCCESS(rc))
3247 {
3248 int rc2;
3249 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3250 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3251 if (RT_SUCCESS(rc))
3252 {
3253 Mem.eType = MEMREF_TYPE_MEM;
3254 rc = supdrvMemAdd(&Mem, pSession);
3255 if (!rc)
3256 {
3257 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3258 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3259 return VINF_SUCCESS;
3260 }
3261
3262 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3263 AssertRC(rc2);
3264 }
3265
3266 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3267 AssertRC(rc2);
3268 }
3269
3270 return rc;
3271}
3272
3273
3274/**
3275 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3276 *
3277 * @returns IPRT status code.
3278 * @param pSession The session to which the memory was allocated.
3279 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3280 * @param paPages Where to store the physical addresses.
3281 */
3282SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3283{
3284 PSUPDRVBUNDLE pBundle;
3285 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3286
3287 /*
3288 * Validate input.
3289 */
3290 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3291 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3292 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3293
3294 /*
3295 * Search for the address.
3296 */
3297 RTSpinlockAcquire(pSession->Spinlock);
3298 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3299 {
3300 if (pBundle->cUsed > 0)
3301 {
3302 unsigned i;
3303 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3304 {
3305 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3306 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3307 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3308 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3309 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3310 )
3311 )
3312 {
3313 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3314 size_t iPage;
3315 for (iPage = 0; iPage < cPages; iPage++)
3316 {
3317 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3318 paPages[iPage].uReserved = 0;
3319 }
3320 RTSpinlockRelease(pSession->Spinlock);
3321 return VINF_SUCCESS;
3322 }
3323 }
3324 }
3325 }
3326 RTSpinlockRelease(pSession->Spinlock);
3327 Log(("Failed to find %p!!!\n", (void *)uPtr));
3328 return VERR_INVALID_PARAMETER;
3329}
3330
3331
3332/**
3333 * Free memory allocated by SUPR0MemAlloc().
3334 *
3335 * @returns IPRT status code.
3336 * @param pSession The session owning the allocation.
3337 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3338 */
3339SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3340{
3341 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3342 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3343 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3344}
3345
3346
3347/**
3348 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3349 *
3350 * The memory is fixed and it's possible to query the physical addresses using
3351 * SUPR0MemGetPhys().
3352 *
3353 * @returns IPRT status code.
3354 * @param pSession The session to associated the allocation with.
3355 * @param cPages The number of pages to allocate.
3356 * @param fFlags Flags, reserved for the future. Must be zero.
3357 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3358 * NULL if no ring-3 mapping.
3359 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3360 * NULL if no ring-0 mapping.
3361 * @param paPages Where to store the addresses of the pages. Optional.
3362 */
3363SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3364{
3365 int rc;
3366 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3367 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3368
3369 /*
3370 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3371 */
3372 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3373 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3374 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3375 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3376 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3377 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3378 {
3379 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3380 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3381 }
3382
3383 /*
3384 * Let IPRT do the work.
3385 */
3386 if (ppvR0)
3387 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3388 else
3389 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3390 if (RT_SUCCESS(rc))
3391 {
3392 int rc2;
3393 if (ppvR3)
3394 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3395 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3396 else
3397 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3398 if (RT_SUCCESS(rc))
3399 {
3400 Mem.eType = MEMREF_TYPE_PAGE;
3401 rc = supdrvMemAdd(&Mem, pSession);
3402 if (!rc)
3403 {
3404 if (ppvR3)
3405 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3406 if (ppvR0)
3407 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3408 if (paPages)
3409 {
3410 uint32_t iPage = cPages;
3411 while (iPage-- > 0)
3412 {
3413 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3414 Assert(paPages[iPage] != NIL_RTHCPHYS);
3415 }
3416 }
3417 return VINF_SUCCESS;
3418 }
3419
3420 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3421 AssertRC(rc2);
3422 }
3423
3424 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3425 AssertRC(rc2);
3426 }
3427 return rc;
3428}
3429
3430
3431/**
3432 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3433 * space.
3434 *
3435 * @returns IPRT status code.
3436 * @param pSession The session to associated the allocation with.
3437 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3438 * @param offSub Where to start mapping. Must be page aligned.
3439 * @param cbSub How much to map. Must be page aligned.
3440 * @param fFlags Flags, MBZ.
3441 * @param ppvR0 Where to return the address of the ring-0 mapping on
3442 * success.
3443 */
3444SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3445 uint32_t fFlags, PRTR0PTR ppvR0)
3446{
3447 int rc;
3448 PSUPDRVBUNDLE pBundle;
3449 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3450 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3451
3452 /*
3453 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3454 */
3455 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3456 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3457 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3458 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3459 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3460 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3461
3462 /*
3463 * Find the memory object.
3464 */
3465 RTSpinlockAcquire(pSession->Spinlock);
3466 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3467 {
3468 if (pBundle->cUsed > 0)
3469 {
3470 unsigned i;
3471 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3472 {
3473 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3474 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3475 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3476 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3477 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3478 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3479 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3480 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3481 {
3482 hMemObj = pBundle->aMem[i].MemObj;
3483 break;
3484 }
3485 }
3486 }
3487 }
3488 RTSpinlockRelease(pSession->Spinlock);
3489
3490 rc = VERR_INVALID_PARAMETER;
3491 if (hMemObj != NIL_RTR0MEMOBJ)
3492 {
3493 /*
3494 * Do some further input validations before calling IPRT.
3495 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3496 */
3497 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3498 if ( offSub < cbMemObj
3499 && cbSub <= cbMemObj
3500 && offSub + cbSub <= cbMemObj)
3501 {
3502 RTR0MEMOBJ hMapObj;
3503 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3504 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3505 if (RT_SUCCESS(rc))
3506 *ppvR0 = RTR0MemObjAddress(hMapObj);
3507 }
3508 else
3509 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3510
3511 }
3512 return rc;
3513}
3514
3515
3516/**
3517 * Changes the page level protection of one or more pages previously allocated
3518 * by SUPR0PageAllocEx.
3519 *
3520 * @returns IPRT status code.
3521 * @param pSession The session to associated the allocation with.
3522 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3523 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3524 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3525 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3526 * @param offSub Where to start changing. Must be page aligned.
3527 * @param cbSub How much to change. Must be page aligned.
3528 * @param fProt The new page level protection, see RTMEM_PROT_*.
3529 */
3530SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3531{
3532 int rc;
3533 PSUPDRVBUNDLE pBundle;
3534 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3535 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3536 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3537
3538 /*
3539 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3540 */
3541 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3542 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3543 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3544 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3545 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3546
3547 /*
3548 * Find the memory object.
3549 */
3550 RTSpinlockAcquire(pSession->Spinlock);
3551 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3552 {
3553 if (pBundle->cUsed > 0)
3554 {
3555 unsigned i;
3556 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3557 {
3558 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3559 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3560 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3561 || pvR3 == NIL_RTR3PTR)
3562 && ( pvR0 == NIL_RTR0PTR
3563 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3564 && ( pvR3 == NIL_RTR3PTR
3565 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3566 {
3567 if (pvR0 != NIL_RTR0PTR)
3568 hMemObjR0 = pBundle->aMem[i].MemObj;
3569 if (pvR3 != NIL_RTR3PTR)
3570 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3571 break;
3572 }
3573 }
3574 }
3575 }
3576 RTSpinlockRelease(pSession->Spinlock);
3577
3578 rc = VERR_INVALID_PARAMETER;
3579 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3580 || hMemObjR3 != NIL_RTR0MEMOBJ)
3581 {
3582 /*
3583 * Do some further input validations before calling IPRT.
3584 */
3585 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3586 if ( offSub < cbMemObj
3587 && cbSub <= cbMemObj
3588 && offSub + cbSub <= cbMemObj)
3589 {
3590 rc = VINF_SUCCESS;
3591 if (hMemObjR3 != NIL_RTR0PTR)
3592 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3593 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3594 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3595 }
3596 else
3597 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3598
3599 }
3600 return rc;
3601
3602}
3603
3604
3605/**
3606 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3607 *
3608 * @returns IPRT status code.
3609 * @param pSession The session owning the allocation.
3610 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3611 * SUPR0PageAllocEx().
3612 */
3613SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3614{
3615 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3616 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3617 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3618}
3619
3620
3621/**
3622 * Gets the paging mode of the current CPU.
3623 *
3624 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3625 */
3626SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3627{
3628 SUPPAGINGMODE enmMode;
3629
3630 RTR0UINTREG cr0 = ASMGetCR0();
3631 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3632 enmMode = SUPPAGINGMODE_INVALID;
3633 else
3634 {
3635 RTR0UINTREG cr4 = ASMGetCR4();
3636 uint32_t fNXEPlusLMA = 0;
3637 if (cr4 & X86_CR4_PAE)
3638 {
3639 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3640 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3641 {
3642 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3643 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3644 fNXEPlusLMA |= RT_BIT(0);
3645 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3646 fNXEPlusLMA |= RT_BIT(1);
3647 }
3648 }
3649
3650 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3651 {
3652 case 0:
3653 enmMode = SUPPAGINGMODE_32_BIT;
3654 break;
3655
3656 case X86_CR4_PGE:
3657 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3658 break;
3659
3660 case X86_CR4_PAE:
3661 enmMode = SUPPAGINGMODE_PAE;
3662 break;
3663
3664 case X86_CR4_PAE | RT_BIT(0):
3665 enmMode = SUPPAGINGMODE_PAE_NX;
3666 break;
3667
3668 case X86_CR4_PAE | X86_CR4_PGE:
3669 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3670 break;
3671
3672 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3673 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3674 break;
3675
3676 case RT_BIT(1) | X86_CR4_PAE:
3677 enmMode = SUPPAGINGMODE_AMD64;
3678 break;
3679
3680 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3681 enmMode = SUPPAGINGMODE_AMD64_NX;
3682 break;
3683
3684 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3685 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3686 break;
3687
3688 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3689 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3690 break;
3691
3692 default:
3693 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3694 enmMode = SUPPAGINGMODE_INVALID;
3695 break;
3696 }
3697 }
3698 return enmMode;
3699}
3700
3701
3702/**
3703 * Enables or disabled hardware virtualization extensions using native OS APIs.
3704 *
3705 * @returns VBox status code.
3706 * @retval VINF_SUCCESS on success.
3707 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3708 *
3709 * @param fEnable Whether to enable or disable.
3710 */
3711SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3712{
3713#ifdef RT_OS_DARWIN
3714 return supdrvOSEnableVTx(fEnable);
3715#else
3716 return VERR_NOT_SUPPORTED;
3717#endif
3718}
3719
3720
3721/**
3722 * Suspends hardware virtualization extensions using the native OS API.
3723 *
3724 * This is called prior to entering raw-mode context.
3725 *
3726 * @returns @c true if suspended, @c false if not.
3727 */
3728SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3729{
3730#ifdef RT_OS_DARWIN
3731 return supdrvOSSuspendVTxOnCpu();
3732#else
3733 return false;
3734#endif
3735}
3736
3737
3738/**
3739 * Resumes hardware virtualization extensions using the native OS API.
3740 *
3741 * This is called after to entering raw-mode context.
3742 *
3743 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3744 */
3745SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3746{
3747#ifdef RT_OS_DARWIN
3748 supdrvOSResumeVTxOnCpu(fSuspended);
3749#else
3750 Assert(!fSuspended);
3751#endif
3752}
3753
3754
3755/**
3756 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3757 *
3758 * @returns VBox status code.
3759 * @retval VERR_VMX_NO_VMX
3760 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3761 * @retval VERR_VMX_MSR_VMXON_DISABLED
3762 * @retval VERR_VMX_MSR_LOCKING_FAILED
3763 * @retval VERR_SVM_NO_SVM
3764 * @retval VERR_SVM_DISABLED
3765 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3766 * (centaur) CPU.
3767 *
3768 * @param pSession The session handle.
3769 * @param pfCaps Where to store the capabilities.
3770 */
3771SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3772{
3773 int rc = VERR_UNSUPPORTED_CPU;
3774 bool fIsSmxModeAmbiguous = false;
3775 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3776
3777 /*
3778 * Input validation.
3779 */
3780 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3781 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3782
3783 *pfCaps = 0;
3784 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3785 RTThreadPreemptDisable(&PreemptState);
3786 if (ASMHasCpuId())
3787 {
3788 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3789 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3790
3791 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3792 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3793
3794 if ( ASMIsValidStdRange(uMaxId)
3795 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3796 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3797 )
3798 {
3799 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3800 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3801 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3802 )
3803 {
3804 /** @todo Unify code with hmR0InitIntelCpu(). */
3805 uint64_t u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3806 bool const fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3807 bool fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3808 bool fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3809 bool fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3810
3811 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3812 if (fMsrLocked)
3813 {
3814 if (fVmxAllowed && fSmxVmxAllowed)
3815 rc = VINF_SUCCESS;
3816 else if (!fVmxAllowed && !fSmxVmxAllowed)
3817 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3818 else if (!fMaybeSmxMode)
3819 {
3820 if (fVmxAllowed)
3821 rc = VINF_SUCCESS;
3822 else
3823 rc = VERR_VMX_MSR_VMXON_DISABLED;
3824 }
3825 else
3826 {
3827 /*
3828 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3829 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3830 * See @bugref{6873}.
3831 */
3832 Assert(fMaybeSmxMode == true);
3833 fIsSmxModeAmbiguous = true;
3834 rc = VINF_SUCCESS;
3835 }
3836 }
3837 else
3838 {
3839 /*
3840 * MSR is not yet locked; we can change it ourselves here.
3841 * Once the lock bit is set, this MSR can no longer be modified.
3842 *
3843 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3844 * accurately. See @bugref{6873}.
3845 */
3846 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3847 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3848 | MSR_IA32_FEATURE_CONTROL_VMXON;
3849 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3850
3851 /* Verify. */
3852 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3853 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3854 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3855 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3856 if (fSmxVmxAllowed && fVmxAllowed)
3857 rc = VINF_SUCCESS;
3858 else
3859 rc = VERR_VMX_MSR_LOCKING_FAILED;
3860 }
3861
3862 if (rc == VINF_SUCCESS)
3863 {
3864 VMXCAPABILITY vtCaps;
3865
3866 *pfCaps |= SUPVTCAPS_VT_X;
3867
3868 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3869 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3870 {
3871 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3872 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3873 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3874 }
3875 }
3876 }
3877 else
3878 rc = VERR_VMX_NO_VMX;
3879 }
3880 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3881 && ASMIsValidStdRange(uMaxId))
3882 {
3883 uint32_t fExtFeaturesEcx, uExtMaxId;
3884 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3885 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3886 if ( ASMIsValidExtRange(uExtMaxId)
3887 && uExtMaxId >= 0x8000000a
3888 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3889 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3890 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3891 )
3892 {
3893 /* Check if SVM is disabled */
3894 uint64_t u64FeatMsr = ASMRdMsr(MSR_K8_VM_CR);
3895 if (!(u64FeatMsr & MSR_K8_VM_CR_SVM_DISABLE))
3896 {
3897 uint32_t fSvmFeatures;
3898 *pfCaps |= SUPVTCAPS_AMD_V;
3899
3900 /* Query AMD-V features. */
3901 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3902 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3903 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3904
3905 rc = VINF_SUCCESS;
3906 }
3907 else
3908 rc = VERR_SVM_DISABLED;
3909 }
3910 else
3911 rc = VERR_SVM_NO_SVM;
3912 }
3913 }
3914
3915 RTThreadPreemptRestore(&PreemptState);
3916 if (fIsSmxModeAmbiguous)
3917 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3918 return rc;
3919}
3920
3921
3922/**
3923 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3924 * updating.
3925 *
3926 * @param pGipCpu The per CPU structure for this CPU.
3927 * @param u64NanoTS The current time.
3928 */
3929static void supdrvGipReInitCpu(PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3930{
3931 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
3932 pGipCpu->u64NanoTS = u64NanoTS;
3933}
3934
3935
3936/**
3937 * Set the current TSC and NanoTS value for the CPU.
3938 *
3939 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
3940 * @param pvUser1 Pointer to the ring-0 GIP mapping.
3941 * @param pvUser2 Pointer to the variable holding the current time.
3942 */
3943static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3944{
3945 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
3946 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
3947
3948 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
3949 supdrvGipReInitCpu(&pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
3950
3951 NOREF(pvUser2);
3952 NOREF(idCpu);
3953}
3954
3955
3956/**
3957 * Maps the GIP into userspace and/or get the physical address of the GIP.
3958 *
3959 * @returns IPRT status code.
3960 * @param pSession Session to which the GIP mapping should belong.
3961 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
3962 * @param pHCPhysGip Where to store the physical address. (optional)
3963 *
3964 * @remark There is no reference counting on the mapping, so one call to this function
3965 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
3966 * and remove the session as a GIP user.
3967 */
3968SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
3969{
3970 int rc;
3971 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
3972 RTR3PTR pGipR3 = NIL_RTR3PTR;
3973 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3974 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
3975
3976 /*
3977 * Validate
3978 */
3979 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3980 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
3981 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
3982
3983#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3984 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
3985#else
3986 RTSemFastMutexRequest(pDevExt->mtxGip);
3987#endif
3988 if (pDevExt->pGip)
3989 {
3990 /*
3991 * Map it?
3992 */
3993 rc = VINF_SUCCESS;
3994 if (ppGipR3)
3995 {
3996 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
3997 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
3998 RTMEM_PROT_READ, RTR0ProcHandleSelf());
3999 if (RT_SUCCESS(rc))
4000 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
4001 }
4002
4003 /*
4004 * Get physical address.
4005 */
4006 if (pHCPhysGip && RT_SUCCESS(rc))
4007 HCPhys = pDevExt->HCPhysGip;
4008
4009 /*
4010 * Reference globally.
4011 */
4012 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4013 {
4014 pSession->fGipReferenced = 1;
4015 pDevExt->cGipUsers++;
4016 if (pDevExt->cGipUsers == 1)
4017 {
4018 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4019 uint64_t u64NanoTS;
4020 uint32_t u32SystemResolution;
4021 unsigned i;
4022
4023 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4024
4025 /*
4026 * Try bump up the system timer resolution.
4027 * The more interrupts the better...
4028 */
4029 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
4030 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
4031 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
4032 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
4033 )
4034 {
4035 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
4036 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
4037 }
4038
4039 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4040 {
4041 for (i = 0; i < pGipR0->cCpus; i++)
4042 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4043 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4044 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4045 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4046 }
4047
4048 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4049 if ( pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4050 || RTMpGetOnlineCount() == 1)
4051 supdrvGipReInitCpu(&pGipR0->aCPUs[0], u64NanoTS);
4052 else
4053 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4054
4055#ifndef DO_NOT_START_GIP
4056 rc = RTTimerStart(pDevExt->pGipTimer, 0); AssertRC(rc);
4057#endif
4058 rc = VINF_SUCCESS;
4059 }
4060 }
4061 }
4062 else
4063 {
4064 rc = VERR_GENERAL_FAILURE;
4065 Log(("SUPR0GipMap: GIP is not available!\n"));
4066 }
4067#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4068 RTSemMutexRelease(pDevExt->mtxGip);
4069#else
4070 RTSemFastMutexRelease(pDevExt->mtxGip);
4071#endif
4072
4073 /*
4074 * Write returns.
4075 */
4076 if (pHCPhysGip)
4077 *pHCPhysGip = HCPhys;
4078 if (ppGipR3)
4079 *ppGipR3 = pGipR3;
4080
4081#ifdef DEBUG_DARWIN_GIP
4082 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4083#else
4084 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4085#endif
4086 return rc;
4087}
4088
4089
4090/**
4091 * Unmaps any user mapping of the GIP and terminates all GIP access
4092 * from this session.
4093 *
4094 * @returns IPRT status code.
4095 * @param pSession Session to which the GIP mapping should belong.
4096 */
4097SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4098{
4099 int rc = VINF_SUCCESS;
4100 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4101#ifdef DEBUG_DARWIN_GIP
4102 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4103 pSession,
4104 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4105 pSession->GipMapObjR3));
4106#else
4107 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4108#endif
4109 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4110
4111#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4112 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4113#else
4114 RTSemFastMutexRequest(pDevExt->mtxGip);
4115#endif
4116
4117 /*
4118 * Unmap anything?
4119 */
4120 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4121 {
4122 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4123 AssertRC(rc);
4124 if (RT_SUCCESS(rc))
4125 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4126 }
4127
4128 /*
4129 * Dereference global GIP.
4130 */
4131 if (pSession->fGipReferenced && !rc)
4132 {
4133 pSession->fGipReferenced = 0;
4134 if ( pDevExt->cGipUsers > 0
4135 && !--pDevExt->cGipUsers)
4136 {
4137 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4138#ifndef DO_NOT_START_GIP
4139 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4140#endif
4141
4142 if (pDevExt->u32SystemTimerGranularityGrant)
4143 {
4144 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
4145 AssertRC(rc2);
4146 pDevExt->u32SystemTimerGranularityGrant = 0;
4147 }
4148 }
4149 }
4150
4151#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4152 RTSemMutexRelease(pDevExt->mtxGip);
4153#else
4154 RTSemFastMutexRelease(pDevExt->mtxGip);
4155#endif
4156
4157 return rc;
4158}
4159
4160
4161/**
4162 * Gets the GIP pointer.
4163 *
4164 * @returns Pointer to the GIP or NULL.
4165 */
4166SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4167{
4168 return g_pSUPGlobalInfoPage;
4169}
4170
4171
4172/**
4173 * Register a component factory with the support driver.
4174 *
4175 * This is currently restricted to kernel sessions only.
4176 *
4177 * @returns VBox status code.
4178 * @retval VINF_SUCCESS on success.
4179 * @retval VERR_NO_MEMORY if we're out of memory.
4180 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4181 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4182 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4183 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4184 *
4185 * @param pSession The SUPDRV session (must be a ring-0 session).
4186 * @param pFactory Pointer to the component factory registration structure.
4187 *
4188 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4189 */
4190SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4191{
4192 PSUPDRVFACTORYREG pNewReg;
4193 const char *psz;
4194 int rc;
4195
4196 /*
4197 * Validate parameters.
4198 */
4199 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4200 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4201 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4202 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4203 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4204 AssertReturn(psz, VERR_INVALID_PARAMETER);
4205
4206 /*
4207 * Allocate and initialize a new registration structure.
4208 */
4209 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4210 if (pNewReg)
4211 {
4212 pNewReg->pNext = NULL;
4213 pNewReg->pFactory = pFactory;
4214 pNewReg->pSession = pSession;
4215 pNewReg->cchName = psz - &pFactory->szName[0];
4216
4217 /*
4218 * Add it to the tail of the list after checking for prior registration.
4219 */
4220 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4221 if (RT_SUCCESS(rc))
4222 {
4223 PSUPDRVFACTORYREG pPrev = NULL;
4224 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4225 while (pCur && pCur->pFactory != pFactory)
4226 {
4227 pPrev = pCur;
4228 pCur = pCur->pNext;
4229 }
4230 if (!pCur)
4231 {
4232 if (pPrev)
4233 pPrev->pNext = pNewReg;
4234 else
4235 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4236 rc = VINF_SUCCESS;
4237 }
4238 else
4239 rc = VERR_ALREADY_EXISTS;
4240
4241 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4242 }
4243
4244 if (RT_FAILURE(rc))
4245 RTMemFree(pNewReg);
4246 }
4247 else
4248 rc = VERR_NO_MEMORY;
4249 return rc;
4250}
4251
4252
4253/**
4254 * Deregister a component factory.
4255 *
4256 * @returns VBox status code.
4257 * @retval VINF_SUCCESS on success.
4258 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4259 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4260 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4261 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4262 *
4263 * @param pSession The SUPDRV session (must be a ring-0 session).
4264 * @param pFactory Pointer to the component factory registration structure
4265 * previously passed SUPR0ComponentRegisterFactory().
4266 *
4267 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4268 */
4269SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4270{
4271 int rc;
4272
4273 /*
4274 * Validate parameters.
4275 */
4276 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4277 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4278 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4279
4280 /*
4281 * Take the lock and look for the registration record.
4282 */
4283 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4284 if (RT_SUCCESS(rc))
4285 {
4286 PSUPDRVFACTORYREG pPrev = NULL;
4287 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4288 while (pCur && pCur->pFactory != pFactory)
4289 {
4290 pPrev = pCur;
4291 pCur = pCur->pNext;
4292 }
4293 if (pCur)
4294 {
4295 if (!pPrev)
4296 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4297 else
4298 pPrev->pNext = pCur->pNext;
4299
4300 pCur->pNext = NULL;
4301 pCur->pFactory = NULL;
4302 pCur->pSession = NULL;
4303 rc = VINF_SUCCESS;
4304 }
4305 else
4306 rc = VERR_NOT_FOUND;
4307
4308 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4309
4310 RTMemFree(pCur);
4311 }
4312 return rc;
4313}
4314
4315
4316/**
4317 * Queries a component factory.
4318 *
4319 * @returns VBox status code.
4320 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4321 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4322 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4323 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4324 *
4325 * @param pSession The SUPDRV session.
4326 * @param pszName The name of the component factory.
4327 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4328 * @param ppvFactoryIf Where to store the factory interface.
4329 */
4330SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4331{
4332 const char *pszEnd;
4333 size_t cchName;
4334 int rc;
4335
4336 /*
4337 * Validate parameters.
4338 */
4339 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4340
4341 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4342 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4343 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4344 cchName = pszEnd - pszName;
4345
4346 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4347 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4348 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4349
4350 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4351 *ppvFactoryIf = NULL;
4352
4353 /*
4354 * Take the lock and try all factories by this name.
4355 */
4356 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4357 if (RT_SUCCESS(rc))
4358 {
4359 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4360 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4361 while (pCur)
4362 {
4363 if ( pCur->cchName == cchName
4364 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4365 {
4366 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4367 if (pvFactory)
4368 {
4369 *ppvFactoryIf = pvFactory;
4370 rc = VINF_SUCCESS;
4371 break;
4372 }
4373 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4374 }
4375
4376 /* next */
4377 pCur = pCur->pNext;
4378 }
4379
4380 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4381 }
4382 return rc;
4383}
4384
4385
4386/**
4387 * Adds a memory object to the session.
4388 *
4389 * @returns IPRT status code.
4390 * @param pMem Memory tracking structure containing the
4391 * information to track.
4392 * @param pSession The session.
4393 */
4394static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4395{
4396 PSUPDRVBUNDLE pBundle;
4397
4398 /*
4399 * Find free entry and record the allocation.
4400 */
4401 RTSpinlockAcquire(pSession->Spinlock);
4402 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4403 {
4404 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4405 {
4406 unsigned i;
4407 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4408 {
4409 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4410 {
4411 pBundle->cUsed++;
4412 pBundle->aMem[i] = *pMem;
4413 RTSpinlockRelease(pSession->Spinlock);
4414 return VINF_SUCCESS;
4415 }
4416 }
4417 AssertFailed(); /* !!this can't be happening!!! */
4418 }
4419 }
4420 RTSpinlockRelease(pSession->Spinlock);
4421
4422 /*
4423 * Need to allocate a new bundle.
4424 * Insert into the last entry in the bundle.
4425 */
4426 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4427 if (!pBundle)
4428 return VERR_NO_MEMORY;
4429
4430 /* take last entry. */
4431 pBundle->cUsed++;
4432 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4433
4434 /* insert into list. */
4435 RTSpinlockAcquire(pSession->Spinlock);
4436 pBundle->pNext = pSession->Bundle.pNext;
4437 pSession->Bundle.pNext = pBundle;
4438 RTSpinlockRelease(pSession->Spinlock);
4439
4440 return VINF_SUCCESS;
4441}
4442
4443
4444/**
4445 * Releases a memory object referenced by pointer and type.
4446 *
4447 * @returns IPRT status code.
4448 * @param pSession Session data.
4449 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4450 * @param eType Memory type.
4451 */
4452static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4453{
4454 PSUPDRVBUNDLE pBundle;
4455
4456 /*
4457 * Validate input.
4458 */
4459 if (!uPtr)
4460 {
4461 Log(("Illegal address %p\n", (void *)uPtr));
4462 return VERR_INVALID_PARAMETER;
4463 }
4464
4465 /*
4466 * Search for the address.
4467 */
4468 RTSpinlockAcquire(pSession->Spinlock);
4469 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4470 {
4471 if (pBundle->cUsed > 0)
4472 {
4473 unsigned i;
4474 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4475 {
4476 if ( pBundle->aMem[i].eType == eType
4477 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4478 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4479 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4480 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4481 )
4482 {
4483 /* Make a copy of it and release it outside the spinlock. */
4484 SUPDRVMEMREF Mem = pBundle->aMem[i];
4485 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4486 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4487 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4488 RTSpinlockRelease(pSession->Spinlock);
4489
4490 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4491 {
4492 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4493 AssertRC(rc); /** @todo figure out how to handle this. */
4494 }
4495 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4496 {
4497 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4498 AssertRC(rc); /** @todo figure out how to handle this. */
4499 }
4500 return VINF_SUCCESS;
4501 }
4502 }
4503 }
4504 }
4505 RTSpinlockRelease(pSession->Spinlock);
4506 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4507 return VERR_INVALID_PARAMETER;
4508}
4509
4510
4511/**
4512 * Opens an image. If it's the first time it's opened the call must upload
4513 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4514 *
4515 * This is the 1st step of the loading.
4516 *
4517 * @returns IPRT status code.
4518 * @param pDevExt Device globals.
4519 * @param pSession Session data.
4520 * @param pReq The open request.
4521 */
4522static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4523{
4524 int rc;
4525 PSUPDRVLDRIMAGE pImage;
4526 void *pv;
4527 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4528 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4529
4530 /*
4531 * Check if we got an instance of the image already.
4532 */
4533 supdrvLdrLock(pDevExt);
4534 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4535 {
4536 if ( pImage->szName[cchName] == '\0'
4537 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4538 {
4539 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4540 {
4541 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4542 pImage->cUsage++;
4543 pReq->u.Out.pvImageBase = pImage->pvImage;
4544 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4545 pReq->u.Out.fNativeLoader = pImage->fNative;
4546 supdrvLdrAddUsage(pSession, pImage);
4547 supdrvLdrUnlock(pDevExt);
4548 return VINF_SUCCESS;
4549 }
4550 supdrvLdrUnlock(pDevExt);
4551 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4552 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4553 }
4554 }
4555 /* (not found - add it!) */
4556
4557 /*
4558 * Allocate memory.
4559 */
4560 Assert(cchName < sizeof(pImage->szName));
4561 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4562 if (!pv)
4563 {
4564 supdrvLdrUnlock(pDevExt);
4565 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4566 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4567 }
4568
4569 /*
4570 * Setup and link in the LDR stuff.
4571 */
4572 pImage = (PSUPDRVLDRIMAGE)pv;
4573 pImage->pvImage = NULL;
4574 pImage->pvImageAlloc = NULL;
4575 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4576 pImage->cbImageBits = pReq->u.In.cbImageBits;
4577 pImage->cSymbols = 0;
4578 pImage->paSymbols = NULL;
4579 pImage->pachStrTab = NULL;
4580 pImage->cbStrTab = 0;
4581 pImage->pfnModuleInit = NULL;
4582 pImage->pfnModuleTerm = NULL;
4583 pImage->pfnServiceReqHandler = NULL;
4584 pImage->uState = SUP_IOCTL_LDR_OPEN;
4585 pImage->cUsage = 1;
4586 pImage->pDevExt = pDevExt;
4587 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4588
4589 /*
4590 * Try load it using the native loader, if that isn't supported, fall back
4591 * on the older method.
4592 */
4593 pImage->fNative = true;
4594 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4595 if (rc == VERR_NOT_SUPPORTED)
4596 {
4597 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4598 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4599 pImage->fNative = false;
4600 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4601 }
4602 if (RT_FAILURE(rc))
4603 {
4604 supdrvLdrUnlock(pDevExt);
4605 RTMemFree(pImage);
4606 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4607 return rc;
4608 }
4609 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4610
4611 /*
4612 * Link it.
4613 */
4614 pImage->pNext = pDevExt->pLdrImages;
4615 pDevExt->pLdrImages = pImage;
4616
4617 supdrvLdrAddUsage(pSession, pImage);
4618
4619 pReq->u.Out.pvImageBase = pImage->pvImage;
4620 pReq->u.Out.fNeedsLoading = true;
4621 pReq->u.Out.fNativeLoader = pImage->fNative;
4622 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4623
4624 supdrvLdrUnlock(pDevExt);
4625 return VINF_SUCCESS;
4626}
4627
4628
4629/**
4630 * Worker that validates a pointer to an image entrypoint.
4631 *
4632 * @returns IPRT status code.
4633 * @param pDevExt The device globals.
4634 * @param pImage The loader image.
4635 * @param pv The pointer into the image.
4636 * @param fMayBeNull Whether it may be NULL.
4637 * @param pszWhat What is this entrypoint? (for logging)
4638 * @param pbImageBits The image bits prepared by ring-3.
4639 *
4640 * @remarks Will leave the lock on failure.
4641 */
4642static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4643 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4644{
4645 if (!fMayBeNull || pv)
4646 {
4647 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4648 {
4649 supdrvLdrUnlock(pDevExt);
4650 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4651 return VERR_INVALID_PARAMETER;
4652 }
4653
4654 if (pImage->fNative)
4655 {
4656 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4657 if (RT_FAILURE(rc))
4658 {
4659 supdrvLdrUnlock(pDevExt);
4660 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4661 return rc;
4662 }
4663 }
4664 }
4665 return VINF_SUCCESS;
4666}
4667
4668
4669/**
4670 * Loads the image bits.
4671 *
4672 * This is the 2nd step of the loading.
4673 *
4674 * @returns IPRT status code.
4675 * @param pDevExt Device globals.
4676 * @param pSession Session data.
4677 * @param pReq The request.
4678 */
4679static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
4680{
4681 PSUPDRVLDRUSAGE pUsage;
4682 PSUPDRVLDRIMAGE pImage;
4683 int rc;
4684 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
4685
4686 /*
4687 * Find the ldr image.
4688 */
4689 supdrvLdrLock(pDevExt);
4690 pUsage = pSession->pLdrUsage;
4691 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4692 pUsage = pUsage->pNext;
4693 if (!pUsage)
4694 {
4695 supdrvLdrUnlock(pDevExt);
4696 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
4697 return VERR_INVALID_HANDLE;
4698 }
4699 pImage = pUsage->pImage;
4700
4701 /*
4702 * Validate input.
4703 */
4704 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
4705 || pImage->cbImageBits != pReq->u.In.cbImageBits)
4706 {
4707 supdrvLdrUnlock(pDevExt);
4708 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
4709 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
4710 return VERR_INVALID_HANDLE;
4711 }
4712
4713 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
4714 {
4715 unsigned uState = pImage->uState;
4716 supdrvLdrUnlock(pDevExt);
4717 if (uState != SUP_IOCTL_LDR_LOAD)
4718 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
4719 return VERR_ALREADY_LOADED;
4720 }
4721
4722 switch (pReq->u.In.eEPType)
4723 {
4724 case SUPLDRLOADEP_NOTHING:
4725 break;
4726
4727 case SUPLDRLOADEP_VMMR0:
4728 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
4729 if (RT_SUCCESS(rc))
4730 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
4731 if (RT_SUCCESS(rc))
4732 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
4733 if (RT_SUCCESS(rc))
4734 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
4735 if (RT_FAILURE(rc))
4736 return rc;
4737 break;
4738
4739 case SUPLDRLOADEP_SERVICE:
4740 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
4741 if (RT_FAILURE(rc))
4742 return rc;
4743 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
4744 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
4745 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
4746 {
4747 supdrvLdrUnlock(pDevExt);
4748 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
4749 pImage->pvImage, pReq->u.In.cbImageWithTabs,
4750 pReq->u.In.EP.Service.apvReserved[0],
4751 pReq->u.In.EP.Service.apvReserved[1],
4752 pReq->u.In.EP.Service.apvReserved[2]));
4753 return VERR_INVALID_PARAMETER;
4754 }
4755 break;
4756
4757 default:
4758 supdrvLdrUnlock(pDevExt);
4759 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
4760 return VERR_INVALID_PARAMETER;
4761 }
4762
4763 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
4764 if (RT_FAILURE(rc))
4765 return rc;
4766 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
4767 if (RT_FAILURE(rc))
4768 return rc;
4769
4770 /*
4771 * Allocate and copy the tables.
4772 * (No need to do try/except as this is a buffered request.)
4773 */
4774 pImage->cbStrTab = pReq->u.In.cbStrTab;
4775 if (pImage->cbStrTab)
4776 {
4777 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
4778 if (pImage->pachStrTab)
4779 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
4780 else
4781 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
4782 }
4783
4784 pImage->cSymbols = pReq->u.In.cSymbols;
4785 if (RT_SUCCESS(rc) && pImage->cSymbols)
4786 {
4787 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
4788 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
4789 if (pImage->paSymbols)
4790 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
4791 else
4792 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
4793 }
4794
4795 /*
4796 * Copy the bits / complete native loading.
4797 */
4798 if (RT_SUCCESS(rc))
4799 {
4800 pImage->uState = SUP_IOCTL_LDR_LOAD;
4801 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
4802 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
4803
4804 if (pImage->fNative)
4805 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
4806 else
4807 {
4808 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
4809 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
4810 }
4811 }
4812
4813 /*
4814 * Update any entry points.
4815 */
4816 if (RT_SUCCESS(rc))
4817 {
4818 switch (pReq->u.In.eEPType)
4819 {
4820 default:
4821 case SUPLDRLOADEP_NOTHING:
4822 rc = VINF_SUCCESS;
4823 break;
4824 case SUPLDRLOADEP_VMMR0:
4825 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
4826 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
4827 break;
4828 case SUPLDRLOADEP_SERVICE:
4829 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
4830 rc = VINF_SUCCESS;
4831 break;
4832 }
4833 }
4834
4835 /*
4836 * On success call the module initialization.
4837 */
4838 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
4839 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
4840 {
4841 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
4842 pDevExt->pLdrInitImage = pImage;
4843 pDevExt->hLdrInitThread = RTThreadNativeSelf();
4844 rc = pImage->pfnModuleInit(pImage);
4845 pDevExt->pLdrInitImage = NULL;
4846 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
4847 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
4848 supdrvLdrUnsetVMMR0EPs(pDevExt);
4849 }
4850 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
4851
4852 if (RT_FAILURE(rc))
4853 {
4854 /* Inform the tracing component in case ModuleInit registered TPs. */
4855 supdrvTracerModuleUnloading(pDevExt, pImage);
4856
4857 pImage->uState = SUP_IOCTL_LDR_OPEN;
4858 pImage->pfnModuleInit = NULL;
4859 pImage->pfnModuleTerm = NULL;
4860 pImage->pfnServiceReqHandler= NULL;
4861 pImage->cbStrTab = 0;
4862 RTMemFree(pImage->pachStrTab);
4863 pImage->pachStrTab = NULL;
4864 RTMemFree(pImage->paSymbols);
4865 pImage->paSymbols = NULL;
4866 pImage->cSymbols = 0;
4867 }
4868
4869 supdrvLdrUnlock(pDevExt);
4870 return rc;
4871}
4872
4873
4874/**
4875 * Frees a previously loaded (prep'ed) image.
4876 *
4877 * @returns IPRT status code.
4878 * @param pDevExt Device globals.
4879 * @param pSession Session data.
4880 * @param pReq The request.
4881 */
4882static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
4883{
4884 int rc;
4885 PSUPDRVLDRUSAGE pUsagePrev;
4886 PSUPDRVLDRUSAGE pUsage;
4887 PSUPDRVLDRIMAGE pImage;
4888 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
4889
4890 /*
4891 * Find the ldr image.
4892 */
4893 supdrvLdrLock(pDevExt);
4894 pUsagePrev = NULL;
4895 pUsage = pSession->pLdrUsage;
4896 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4897 {
4898 pUsagePrev = pUsage;
4899 pUsage = pUsage->pNext;
4900 }
4901 if (!pUsage)
4902 {
4903 supdrvLdrUnlock(pDevExt);
4904 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
4905 return VERR_INVALID_HANDLE;
4906 }
4907
4908 /*
4909 * Check if we can remove anything.
4910 */
4911 rc = VINF_SUCCESS;
4912 pImage = pUsage->pImage;
4913 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
4914 {
4915 /*
4916 * Check if there are any objects with destructors in the image, if
4917 * so leave it for the session cleanup routine so we get a chance to
4918 * clean things up in the right order and not leave them all dangling.
4919 */
4920 RTSpinlockAcquire(pDevExt->Spinlock);
4921 if (pImage->cUsage <= 1)
4922 {
4923 PSUPDRVOBJ pObj;
4924 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
4925 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4926 {
4927 rc = VERR_DANGLING_OBJECTS;
4928 break;
4929 }
4930 }
4931 else
4932 {
4933 PSUPDRVUSAGE pGenUsage;
4934 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
4935 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4936 {
4937 rc = VERR_DANGLING_OBJECTS;
4938 break;
4939 }
4940 }
4941 RTSpinlockRelease(pDevExt->Spinlock);
4942 if (rc == VINF_SUCCESS)
4943 {
4944 /* unlink it */
4945 if (pUsagePrev)
4946 pUsagePrev->pNext = pUsage->pNext;
4947 else
4948 pSession->pLdrUsage = pUsage->pNext;
4949
4950 /* free it */
4951 pUsage->pImage = NULL;
4952 pUsage->pNext = NULL;
4953 RTMemFree(pUsage);
4954
4955 /*
4956 * Dereference the image.
4957 */
4958 if (pImage->cUsage <= 1)
4959 supdrvLdrFree(pDevExt, pImage);
4960 else
4961 pImage->cUsage--;
4962 }
4963 else
4964 {
4965 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
4966 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
4967 }
4968 }
4969 else
4970 {
4971 /*
4972 * Dereference both image and usage.
4973 */
4974 pImage->cUsage--;
4975 pUsage->cUsage--;
4976 }
4977
4978 supdrvLdrUnlock(pDevExt);
4979 return rc;
4980}
4981
4982
4983/**
4984 * Gets the address of a symbol in an open image.
4985 *
4986 * @returns IPRT status code.
4987 * @param pDevExt Device globals.
4988 * @param pSession Session data.
4989 * @param pReq The request buffer.
4990 */
4991static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
4992{
4993 PSUPDRVLDRIMAGE pImage;
4994 PSUPDRVLDRUSAGE pUsage;
4995 uint32_t i;
4996 PSUPLDRSYM paSyms;
4997 const char *pchStrings;
4998 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
4999 void *pvSymbol = NULL;
5000 int rc = VERR_GENERAL_FAILURE;
5001 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
5002
5003 /*
5004 * Find the ldr image.
5005 */
5006 supdrvLdrLock(pDevExt);
5007 pUsage = pSession->pLdrUsage;
5008 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5009 pUsage = pUsage->pNext;
5010 if (!pUsage)
5011 {
5012 supdrvLdrUnlock(pDevExt);
5013 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5014 return VERR_INVALID_HANDLE;
5015 }
5016 pImage = pUsage->pImage;
5017 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5018 {
5019 unsigned uState = pImage->uState;
5020 supdrvLdrUnlock(pDevExt);
5021 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5022 return VERR_ALREADY_LOADED;
5023 }
5024
5025 /*
5026 * Search the symbol strings.
5027 *
5028 * Note! The int32_t is for native loading on solaris where the data
5029 * and text segments are in very different places.
5030 */
5031 pchStrings = pImage->pachStrTab;
5032 paSyms = pImage->paSymbols;
5033 for (i = 0; i < pImage->cSymbols; i++)
5034 {
5035 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5036 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5037 {
5038 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5039 rc = VINF_SUCCESS;
5040 break;
5041 }
5042 }
5043 supdrvLdrUnlock(pDevExt);
5044 pReq->u.Out.pvSymbol = pvSymbol;
5045 return rc;
5046}
5047
5048
5049/**
5050 * Gets the address of a symbol in an open image or the support driver.
5051 *
5052 * @returns VINF_SUCCESS on success.
5053 * @returns
5054 * @param pDevExt Device globals.
5055 * @param pSession Session data.
5056 * @param pReq The request buffer.
5057 */
5058static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5059{
5060 int rc = VINF_SUCCESS;
5061 const char *pszSymbol = pReq->u.In.pszSymbol;
5062 const char *pszModule = pReq->u.In.pszModule;
5063 size_t cbSymbol;
5064 char const *pszEnd;
5065 uint32_t i;
5066
5067 /*
5068 * Input validation.
5069 */
5070 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5071 pszEnd = RTStrEnd(pszSymbol, 512);
5072 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5073 cbSymbol = pszEnd - pszSymbol + 1;
5074
5075 if (pszModule)
5076 {
5077 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5078 pszEnd = RTStrEnd(pszModule, 64);
5079 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5080 }
5081 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5082
5083
5084 if ( !pszModule
5085 || !strcmp(pszModule, "SupDrv"))
5086 {
5087 /*
5088 * Search the support driver export table.
5089 */
5090 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5091 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5092 {
5093 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5094 break;
5095 }
5096 }
5097 else
5098 {
5099 /*
5100 * Find the loader image.
5101 */
5102 PSUPDRVLDRIMAGE pImage;
5103
5104 supdrvLdrLock(pDevExt);
5105
5106 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5107 if (!strcmp(pImage->szName, pszModule))
5108 break;
5109 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5110 {
5111 /*
5112 * Search the symbol strings.
5113 */
5114 const char *pchStrings = pImage->pachStrTab;
5115 PCSUPLDRSYM paSyms = pImage->paSymbols;
5116 for (i = 0; i < pImage->cSymbols; i++)
5117 {
5118 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5119 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5120 {
5121 /*
5122 * Found it! Calc the symbol address and add a reference to the module.
5123 */
5124 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5125 rc = supdrvLdrAddUsage(pSession, pImage);
5126 break;
5127 }
5128 }
5129 }
5130 else
5131 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5132
5133 supdrvLdrUnlock(pDevExt);
5134 }
5135 return rc;
5136}
5137
5138
5139/**
5140 * Updates the VMMR0 entry point pointers.
5141 *
5142 * @returns IPRT status code.
5143 * @param pDevExt Device globals.
5144 * @param pSession Session data.
5145 * @param pVMMR0 VMMR0 image handle.
5146 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5147 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5148 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5149 * @remark Caller must own the loader mutex.
5150 */
5151static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5152{
5153 int rc = VINF_SUCCESS;
5154 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5155
5156
5157 /*
5158 * Check if not yet set.
5159 */
5160 if (!pDevExt->pvVMMR0)
5161 {
5162 pDevExt->pvVMMR0 = pvVMMR0;
5163 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5164 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5165 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5166 }
5167 else
5168 {
5169 /*
5170 * Return failure or success depending on whether the values match or not.
5171 */
5172 if ( pDevExt->pvVMMR0 != pvVMMR0
5173 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5174 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5175 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5176 {
5177 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5178 rc = VERR_INVALID_PARAMETER;
5179 }
5180 }
5181 return rc;
5182}
5183
5184
5185/**
5186 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5187 *
5188 * @param pDevExt Device globals.
5189 */
5190static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5191{
5192 pDevExt->pvVMMR0 = NULL;
5193 pDevExt->pfnVMMR0EntryInt = NULL;
5194 pDevExt->pfnVMMR0EntryFast = NULL;
5195 pDevExt->pfnVMMR0EntryEx = NULL;
5196}
5197
5198
5199/**
5200 * Adds a usage reference in the specified session of an image.
5201 *
5202 * Called while owning the loader semaphore.
5203 *
5204 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5205 * @param pSession Session in question.
5206 * @param pImage Image which the session is using.
5207 */
5208static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5209{
5210 PSUPDRVLDRUSAGE pUsage;
5211 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5212
5213 /*
5214 * Referenced it already?
5215 */
5216 pUsage = pSession->pLdrUsage;
5217 while (pUsage)
5218 {
5219 if (pUsage->pImage == pImage)
5220 {
5221 pUsage->cUsage++;
5222 return VINF_SUCCESS;
5223 }
5224 pUsage = pUsage->pNext;
5225 }
5226
5227 /*
5228 * Allocate new usage record.
5229 */
5230 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5231 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5232 pUsage->cUsage = 1;
5233 pUsage->pImage = pImage;
5234 pUsage->pNext = pSession->pLdrUsage;
5235 pSession->pLdrUsage = pUsage;
5236 return VINF_SUCCESS;
5237}
5238
5239
5240/**
5241 * Frees a load image.
5242 *
5243 * @param pDevExt Pointer to device extension.
5244 * @param pImage Pointer to the image we're gonna free.
5245 * This image must exit!
5246 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5247 */
5248static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5249{
5250 PSUPDRVLDRIMAGE pImagePrev;
5251 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5252
5253 /* find it - arg. should've used doubly linked list. */
5254 Assert(pDevExt->pLdrImages);
5255 pImagePrev = NULL;
5256 if (pDevExt->pLdrImages != pImage)
5257 {
5258 pImagePrev = pDevExt->pLdrImages;
5259 while (pImagePrev->pNext != pImage)
5260 pImagePrev = pImagePrev->pNext;
5261 Assert(pImagePrev->pNext == pImage);
5262 }
5263
5264 /* unlink */
5265 if (pImagePrev)
5266 pImagePrev->pNext = pImage->pNext;
5267 else
5268 pDevExt->pLdrImages = pImage->pNext;
5269
5270 /* check if this is VMMR0.r0 unset its entry point pointers. */
5271 if (pDevExt->pvVMMR0 == pImage->pvImage)
5272 supdrvLdrUnsetVMMR0EPs(pDevExt);
5273
5274 /* check for objects with destructors in this image. (Shouldn't happen.) */
5275 if (pDevExt->pObjs)
5276 {
5277 unsigned cObjs = 0;
5278 PSUPDRVOBJ pObj;
5279 RTSpinlockAcquire(pDevExt->Spinlock);
5280 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5281 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5282 {
5283 pObj->pfnDestructor = NULL;
5284 cObjs++;
5285 }
5286 RTSpinlockRelease(pDevExt->Spinlock);
5287 if (cObjs)
5288 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5289 }
5290
5291 /* call termination function if fully loaded. */
5292 if ( pImage->pfnModuleTerm
5293 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5294 {
5295 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5296 pImage->pfnModuleTerm(pImage);
5297 }
5298
5299 /* Inform the tracing component. */
5300 supdrvTracerModuleUnloading(pDevExt, pImage);
5301
5302 /* do native unload if appropriate. */
5303 if (pImage->fNative)
5304 supdrvOSLdrUnload(pDevExt, pImage);
5305
5306 /* free the image */
5307 pImage->cUsage = 0;
5308 pImage->pDevExt = NULL;
5309 pImage->pNext = NULL;
5310 pImage->uState = SUP_IOCTL_LDR_FREE;
5311 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5312 pImage->pvImageAlloc = NULL;
5313 RTMemFree(pImage->pachStrTab);
5314 pImage->pachStrTab = NULL;
5315 RTMemFree(pImage->paSymbols);
5316 pImage->paSymbols = NULL;
5317 RTMemFree(pImage);
5318}
5319
5320
5321/**
5322 * Acquires the loader lock.
5323 *
5324 * @returns IPRT status code.
5325 * @param pDevExt The device extension.
5326 */
5327DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5328{
5329#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5330 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5331#else
5332 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5333#endif
5334 AssertRC(rc);
5335 return rc;
5336}
5337
5338
5339/**
5340 * Releases the loader lock.
5341 *
5342 * @returns IPRT status code.
5343 * @param pDevExt The device extension.
5344 */
5345DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5346{
5347#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5348 return RTSemMutexRelease(pDevExt->mtxLdr);
5349#else
5350 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5351#endif
5352}
5353
5354
5355/**
5356 * Implements the service call request.
5357 *
5358 * @returns VBox status code.
5359 * @param pDevExt The device extension.
5360 * @param pSession The calling session.
5361 * @param pReq The request packet, valid.
5362 */
5363static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5364{
5365#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5366 int rc;
5367
5368 /*
5369 * Find the module first in the module referenced by the calling session.
5370 */
5371 rc = supdrvLdrLock(pDevExt);
5372 if (RT_SUCCESS(rc))
5373 {
5374 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5375 PSUPDRVLDRUSAGE pUsage;
5376
5377 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5378 if ( pUsage->pImage->pfnServiceReqHandler
5379 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5380 {
5381 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5382 break;
5383 }
5384 supdrvLdrUnlock(pDevExt);
5385
5386 if (pfnServiceReqHandler)
5387 {
5388 /*
5389 * Call it.
5390 */
5391 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5392 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5393 else
5394 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5395 }
5396 else
5397 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5398 }
5399
5400 /* log it */
5401 if ( RT_FAILURE(rc)
5402 && rc != VERR_INTERRUPTED
5403 && rc != VERR_TIMEOUT)
5404 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5405 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5406 else
5407 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5408 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5409 return rc;
5410#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5411 return VERR_NOT_IMPLEMENTED;
5412#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5413}
5414
5415
5416/**
5417 * Implements the logger settings request.
5418 *
5419 * @returns VBox status code.
5420 * @param pDevExt The device extension.
5421 * @param pSession The caller's session.
5422 * @param pReq The request.
5423 */
5424static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5425{
5426 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5427 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5428 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5429 PRTLOGGER pLogger = NULL;
5430 int rc;
5431
5432 /*
5433 * Some further validation.
5434 */
5435 switch (pReq->u.In.fWhat)
5436 {
5437 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5438 case SUPLOGGERSETTINGS_WHAT_CREATE:
5439 break;
5440
5441 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5442 if (*pszGroup || *pszFlags || *pszDest)
5443 return VERR_INVALID_PARAMETER;
5444 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5445 return VERR_ACCESS_DENIED;
5446 break;
5447
5448 default:
5449 return VERR_INTERNAL_ERROR;
5450 }
5451
5452 /*
5453 * Get the logger.
5454 */
5455 switch (pReq->u.In.fWhich)
5456 {
5457 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5458 pLogger = RTLogGetDefaultInstance();
5459 break;
5460
5461 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5462 pLogger = RTLogRelDefaultInstance();
5463 break;
5464
5465 default:
5466 return VERR_INTERNAL_ERROR;
5467 }
5468
5469 /*
5470 * Do the job.
5471 */
5472 switch (pReq->u.In.fWhat)
5473 {
5474 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5475 if (pLogger)
5476 {
5477 rc = RTLogFlags(pLogger, pszFlags);
5478 if (RT_SUCCESS(rc))
5479 rc = RTLogGroupSettings(pLogger, pszGroup);
5480 NOREF(pszDest);
5481 }
5482 else
5483 rc = VERR_NOT_FOUND;
5484 break;
5485
5486 case SUPLOGGERSETTINGS_WHAT_CREATE:
5487 {
5488 if (pLogger)
5489 rc = VERR_ALREADY_EXISTS;
5490 else
5491 {
5492 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5493
5494 rc = RTLogCreate(&pLogger,
5495 0 /* fFlags */,
5496 pszGroup,
5497 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5498 ? "VBOX_LOG"
5499 : "VBOX_RELEASE_LOG",
5500 RT_ELEMENTS(s_apszGroups),
5501 s_apszGroups,
5502 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5503 NULL);
5504 if (RT_SUCCESS(rc))
5505 {
5506 rc = RTLogFlags(pLogger, pszFlags);
5507 NOREF(pszDest);
5508 if (RT_SUCCESS(rc))
5509 {
5510 switch (pReq->u.In.fWhich)
5511 {
5512 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5513 pLogger = RTLogSetDefaultInstance(pLogger);
5514 break;
5515 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5516 pLogger = RTLogRelSetDefaultInstance(pLogger);
5517 break;
5518 }
5519 }
5520 RTLogDestroy(pLogger);
5521 }
5522 }
5523 break;
5524 }
5525
5526 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5527 switch (pReq->u.In.fWhich)
5528 {
5529 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5530 pLogger = RTLogSetDefaultInstance(NULL);
5531 break;
5532 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5533 pLogger = RTLogRelSetDefaultInstance(NULL);
5534 break;
5535 }
5536 rc = RTLogDestroy(pLogger);
5537 break;
5538
5539 default:
5540 {
5541 rc = VERR_INTERNAL_ERROR;
5542 break;
5543 }
5544 }
5545
5546 return rc;
5547}
5548
5549
5550/**
5551 * Implements the MSR prober operations.
5552 *
5553 * @returns VBox status code.
5554 * @param pDevExt The device extension.
5555 * @param pReq The request.
5556 */
5557static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5558{
5559#ifdef SUPDRV_WITH_MSR_PROBER
5560 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5561 int rc;
5562
5563 switch (pReq->u.In.enmOp)
5564 {
5565 case SUPMSRPROBEROP_READ:
5566 {
5567 uint64_t uValue;
5568 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5569 if (RT_SUCCESS(rc))
5570 {
5571 pReq->u.Out.uResults.Read.uValue = uValue;
5572 pReq->u.Out.uResults.Read.fGp = false;
5573 }
5574 else if (rc == VERR_ACCESS_DENIED)
5575 {
5576 pReq->u.Out.uResults.Read.uValue = 0;
5577 pReq->u.Out.uResults.Read.fGp = true;
5578 rc = VINF_SUCCESS;
5579 }
5580 break;
5581 }
5582
5583 case SUPMSRPROBEROP_WRITE:
5584 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5585 if (RT_SUCCESS(rc))
5586 pReq->u.Out.uResults.Write.fGp = false;
5587 else if (rc == VERR_ACCESS_DENIED)
5588 {
5589 pReq->u.Out.uResults.Write.fGp = true;
5590 rc = VINF_SUCCESS;
5591 }
5592 break;
5593
5594 case SUPMSRPROBEROP_MODIFY:
5595 case SUPMSRPROBEROP_MODIFY_FASTER:
5596 rc = supdrvOSMsrProberModify(idCpu, pReq);
5597 break;
5598
5599 default:
5600 return VERR_INVALID_FUNCTION;
5601 }
5602 return rc;
5603#else
5604 return VERR_NOT_IMPLEMENTED;
5605#endif
5606}
5607
5608
5609#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5610/**
5611 * Switches the TSC-delta measurement thread into the butchered state.
5612 *
5613 * @returns VBox status code.
5614 * @param pDevExt Pointer to the device instance data.
5615 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5616 * @param pszFailed An error message to log.
5617 * @param rcFailed The error code to exit the thread with.
5618 */
5619static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5620{
5621 if (!fSpinlockHeld)
5622 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5623
5624 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Butchered;
5625 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5626 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5627 return rcFailed;
5628}
5629
5630
5631/**
5632 * The TSC-delta measurement thread.
5633 *
5634 * @returns VBox status code.
5635 * @param hThread The thread handle.
5636 * @param pvUser Opaque pointer to the device instance data.
5637 */
5638static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
5639{
5640 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
5641 static uint32_t cTimesMeasured = 0;
5642 uint32_t cConsecutiveTimeouts = 0;
5643 int rc = VERR_INTERNAL_ERROR_2;
5644 for (;;)
5645 {
5646 /*
5647 * Switch on the current state.
5648 */
5649 SUPDRVTSCDELTASTATE enmState;
5650 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5651 enmState = pDevExt->enmTscDeltaState;
5652 switch (enmState)
5653 {
5654 case kSupDrvTscDeltaState_Creating:
5655 {
5656 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5657 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5658 if (RT_FAILURE(rc))
5659 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5660 /* fall thru */
5661 }
5662
5663 case kSupDrvTscDeltaState_Listening:
5664 {
5665 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5666
5667 /* Simple adaptive timeout. */
5668 if (cConsecutiveTimeouts++ == 10)
5669 {
5670 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
5671 pDevExt->cMsTscDeltaTimeout = 10;
5672 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
5673 pDevExt->cMsTscDeltaTimeout = 100;
5674 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
5675 pDevExt->cMsTscDeltaTimeout = 500;
5676 cConsecutiveTimeouts = 0;
5677 }
5678 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
5679 if ( RT_FAILURE(rc)
5680 && rc != VERR_TIMEOUT)
5681 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
5682 break;
5683 }
5684
5685 case kSupDrvTscDeltaState_WaitAndMeasure:
5686 {
5687 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Measuring;
5688 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5689 if (RT_FAILURE(rc))
5690 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5691 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5692 pDevExt->cMsTscDeltaTimeout = 1;
5693 RTThreadSleep(10);
5694 /* fall thru */
5695 }
5696
5697 case kSupDrvTscDeltaState_Measuring:
5698 {
5699 cConsecutiveTimeouts = 0;
5700 if (!cTimesMeasured++)
5701 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5702 else
5703 {
5704 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5705 unsigned iCpu;
5706
5707 if (cTimesMeasured == UINT32_MAX)
5708 cTimesMeasured = 1;
5709
5710 /* Measure TSC-deltas only for the CPUs that are in the set. */
5711 rc = VINF_SUCCESS;
5712 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5713 {
5714 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
5715 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
5716 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
5717 {
5718 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
5719 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
5720 }
5721 }
5722 }
5723 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5724 if (pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
5725 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5726 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5727 pDevExt->rcTscDelta = rc;
5728 break;
5729 }
5730
5731 case kSupDrvTscDeltaState_Terminating:
5732 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5733 return VINF_SUCCESS;
5734
5735 case kSupDrvTscDeltaState_Butchered:
5736 default:
5737 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
5738 }
5739 }
5740
5741 return rc;
5742}
5743
5744
5745/**
5746 * Waits for the TSC-delta measurement thread to respond to a state change.
5747 *
5748 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
5749 * other error code on internal error.
5750 *
5751 * @param pThis Pointer to the grant service instance data.
5752 * @param enmCurState The current state.
5753 * @param enmNewState The new state we're waiting for it to enter.
5754 */
5755static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTASTATE enmCurState, SUPDRVTSCDELTASTATE enmNewState)
5756{
5757 /*
5758 * Wait a short while for the expected state transition.
5759 */
5760 int rc;
5761 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
5762 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5763 if (pDevExt->enmTscDeltaState == enmNewState)
5764 {
5765 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5766 rc = VINF_SUCCESS;
5767 }
5768 else if (pDevExt->enmTscDeltaState == enmCurState)
5769 {
5770 /*
5771 * Wait longer if the state has not yet transitioned to the one we want.
5772 */
5773 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5774 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
5775 if ( RT_SUCCESS(rc)
5776 || rc == VERR_TIMEOUT)
5777 {
5778 /*
5779 * Check the state whether we've succeeded.
5780 */
5781 SUPDRVTSCDELTASTATE enmState;
5782 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5783 enmState = pDevExt->enmTscDeltaState;
5784 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5785 if (enmState == enmNewState)
5786 rc = VINF_SUCCESS;
5787 else if (enmState == enmCurState)
5788 {
5789 rc = VERR_TIMEOUT;
5790 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
5791 enmNewState));
5792 }
5793 else
5794 {
5795 rc = VERR_INTERNAL_ERROR;
5796 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
5797 enmState, enmNewState));
5798 }
5799 }
5800 else
5801 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
5802 }
5803 else
5804 {
5805 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5806 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
5807 rc = VERR_INTERNAL_ERROR;
5808 }
5809
5810 return rc;
5811}
5812
5813
5814/**
5815 * Terminates the TSC-delta measurement thread.
5816 *
5817 * @param pDevExt Pointer to the device instance data.
5818 */
5819static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
5820{
5821 int rc;
5822 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5823 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Terminating;
5824 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5825 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5826 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
5827 if (RT_FAILURE(rc))
5828 {
5829 /* Signal a few more times before giving up. */
5830 int cTries = 5;
5831 while (--cTries > 0)
5832 {
5833 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5834 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
5835 if (rc != VERR_TIMEOUT)
5836 break;
5837 }
5838 }
5839}
5840
5841
5842/**
5843 * Initializes and spawns the TSC-delta measurement thread.
5844 *
5845 * A thread is required for servicing re-measurement requests from events like
5846 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
5847 * under all contexts on all OSs.
5848 *
5849 * @returns VBox status code.
5850 * @param pDevExt Pointer to the device instance data.
5851 *
5852 * @remarks Must only be called -after- initializing GIP and setting up MP
5853 * notifications!
5854 */
5855static int supdrvTscDeltaInit(PSUPDRVDEVEXT pDevExt)
5856{
5857 int rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
5858 if (RT_SUCCESS(rc))
5859 {
5860 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
5861 if (RT_SUCCESS(rc))
5862 {
5863 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Creating;
5864 pDevExt->cMsTscDeltaTimeout = 1;
5865 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
5866 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
5867 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
5868 if (RT_SUCCESS(rc))
5869 {
5870 rc = supdrvTscDeltaThreadWait(pDevExt, kSupDrvTscDeltaState_Creating, kSupDrvTscDeltaState_Listening);
5871 if (RT_SUCCESS(rc))
5872 {
5873 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5874 return rc;
5875 }
5876
5877 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
5878 supdrvTscDeltaThreadTerminate(pDevExt);
5879 }
5880 else
5881 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
5882 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5883 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5884 }
5885 else
5886 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
5887 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5888 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5889 }
5890 else
5891 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
5892
5893 return rc;
5894}
5895
5896
5897/**
5898 * Terminates the TSC-delta measurement thread and cleanup.
5899 *
5900 * @param pDevExt Pointer to the device instance data.
5901 */
5902static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
5903{
5904 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
5905 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5906 {
5907 supdrvTscDeltaThreadTerminate(pDevExt);
5908 }
5909
5910 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
5911 {
5912 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5913 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5914 }
5915
5916 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5917 {
5918 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5919 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5920 }
5921
5922 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5923}
5924#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
5925
5926
5927/**
5928 * Measures the TSC frequency of the system.
5929 *
5930 * Uses a busy-wait method for the async. case as it is intended to help push
5931 * the CPU frequency up, while for the invariant cases using a sleeping method.
5932 *
5933 * The TSC frequency can vary on systems that are not reported as invariant.
5934 * However, on such systems the object of this function is to find out what the
5935 * nominal, maximum TSC frequency under normal CPU operation.
5936 *
5937 * @returns VBox status code.
5938 * @param pGip Pointer to the GIP.
5939 *
5940 * @remarks Must be called only after measuring the TSC deltas.
5941 */
5942static int supdrvGipMeasureTscFreq(PSUPGLOBALINFOPAGE pGip)
5943{
5944 int cTriesLeft = 4;
5945
5946 /* Assert order. */
5947 AssertReturn(pGip, VERR_INVALID_PARAMETER);
5948 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
5949
5950 while (cTriesLeft-- > 0)
5951 {
5952 RTCCUINTREG uFlags;
5953 uint64_t u64NanoTsBefore;
5954 uint64_t u64NanoTsAfter;
5955 uint64_t u64TscBefore;
5956 uint64_t u64TscAfter;
5957 uint8_t idApicBefore;
5958 uint8_t idApicAfter;
5959
5960 /*
5961 * Synchronize with the host OS clock tick before reading the TSC.
5962 * Especially important on Windows where the granularity is terrible.
5963 */
5964 u64NanoTsBefore = RTTimeSystemNanoTS();
5965 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
5966 ASMNopPause();
5967
5968 uFlags = ASMIntDisableFlags();
5969 idApicBefore = ASMGetApicId();
5970 u64TscBefore = ASMReadTSC();
5971 u64NanoTsBefore = RTTimeSystemNanoTS();
5972 ASMSetFlags(uFlags);
5973
5974 /* Activate this when implemented invariant TSC GIP mode. Otherwise systems that are really invariant
5975 which get detected as async will break. */
5976#if 0
5977 if (supdrvIsInvariantTsc())
5978 {
5979 /*
5980 * Sleep wait since the TSC frequency is constant, eases host load.
5981 * Shorter interval produces more variance in the frequency (esp. Windows).
5982 */
5983 RTThreadSleep(200);
5984 u64NanoTsAfter = RTTimeSystemNanoTS();
5985 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
5986 ASMNopPause();
5987 u64NanoTsAfter = RTTimeSystemNanoTS();
5988 }
5989 else
5990#endif
5991 {
5992 /* Busy-wait keeping the frequency up and measure. */
5993 for (;;)
5994 {
5995 u64NanoTsAfter = RTTimeSystemNanoTS();
5996 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
5997 ASMNopPause();
5998 else
5999 break;
6000 }
6001 }
6002
6003 uFlags = ASMIntDisableFlags();
6004 idApicAfter = ASMGetApicId();
6005 u64TscAfter = ASMReadTSC();
6006 ASMSetFlags(uFlags);
6007
6008 /* Activate this when implemented invariant TSC GIP mode. Otherwise systems that are really invariant
6009 which get detected as async will break. */
6010#if 0
6011 if (supdrvIsInvariantTsc()) /** @todo replace with enum check. */
6012 {
6013 int rc;
6014 bool fAppliedBefore;
6015 bool fAppliedAfter;
6016 rc = SUPTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
6017 rc = SUPTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
6018
6019 if ( !fAppliedBefore
6020 || !fAppliedAfter)
6021 {
6022 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
6023 idApicBefore, idApicAfter, cTriesLeft);
6024 continue;
6025 }
6026 }
6027#endif
6028
6029 /*
6030 * Update GIP.
6031 */
6032 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
6033 return VINF_SUCCESS;
6034 }
6035
6036 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6037}
6038
6039
6040/**
6041 * Creates the GIP.
6042 *
6043 * @returns VBox status code.
6044 * @param pDevExt Instance data. GIP stuff may be updated.
6045 */
6046static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6047{
6048 PSUPGLOBALINFOPAGE pGip;
6049 RTHCPHYS HCPhysGip;
6050 uint32_t u32SystemResolution;
6051 uint32_t u32Interval;
6052 uint32_t u32MinInterval;
6053 uint32_t uMod;
6054 unsigned cCpus;
6055 int rc;
6056
6057 LogFlow(("supdrvGipCreate:\n"));
6058
6059 /* Assert order. */
6060 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6061 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6062 Assert(!pDevExt->pGipTimer);
6063
6064 /*
6065 * Check the CPU count.
6066 */
6067 cCpus = RTMpGetArraySize();
6068 if ( cCpus > RTCPUSET_MAX_CPUS
6069 || cCpus > 256 /*ApicId is used for the mappings*/)
6070 {
6071 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6072 return VERR_TOO_MANY_CPUS;
6073 }
6074
6075 /*
6076 * Allocate a contiguous set of pages with a default kernel mapping.
6077 */
6078 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6079 if (RT_FAILURE(rc))
6080 {
6081 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6082 return rc;
6083 }
6084 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6085 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6086
6087 /*
6088 * Find a reasonable update interval and initialize the structure.
6089 */
6090 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6091 * See @bugref{6710}. */
6092 u32MinInterval = RT_NS_10MS;
6093 u32SystemResolution = RTTimerGetSystemGranularity();
6094 u32Interval = u32MinInterval;
6095 uMod = u32MinInterval % u32SystemResolution;
6096 if (uMod)
6097 u32Interval += u32SystemResolution - uMod;
6098
6099 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6100
6101#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6102 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6103 rc = supdrvTscDeltaInit(pDevExt);
6104#endif
6105 if (RT_SUCCESS(rc))
6106 {
6107 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6108 if (RT_SUCCESS(rc))
6109 {
6110 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6111 if (RT_SUCCESS(rc))
6112 {
6113#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6114 /*
6115 * Measure the TSC deltas now that we have MP notifications.
6116 */
6117 int cTries = 5;
6118 uint16_t iCpu;
6119 do
6120 {
6121 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6122 if (rc != VERR_TRY_AGAIN)
6123 break;
6124 } while (--cTries > 0);
6125 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6126 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6127#endif
6128
6129 rc = supdrvGipMeasureTscFreq(pGip);
6130 if (RT_SUCCESS(rc))
6131 {
6132 if (supdrvIsInvariantTsc())
6133 {
6134 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6135 pGip->aCPUs[iCpu].u64CpuHz = pGip->u64CpuHz;
6136 }
6137
6138 /*
6139 * Create the timer.
6140 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6141 */
6142 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6143 {
6144 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer, pDevExt);
6145 if (rc == VERR_NOT_SUPPORTED)
6146 {
6147 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6148 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6149 }
6150 }
6151 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6152 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */, supdrvGipSyncTimer, pDevExt);
6153 if (RT_SUCCESS(rc))
6154 {
6155 /*
6156 * We're good.
6157 */
6158 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6159 g_pSUPGlobalInfoPage = pGip;
6160 return VINF_SUCCESS;
6161 }
6162 else
6163 {
6164 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6165 Assert(!pDevExt->pGipTimer);
6166 }
6167 }
6168 }
6169 else
6170 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6171 }
6172 else
6173 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6174 }
6175 else
6176 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6177
6178 supdrvGipDestroy(pDevExt);
6179 return rc;
6180}
6181
6182
6183/**
6184 * Terminates the GIP.
6185 *
6186 * @param pDevExt Instance data. GIP stuff may be updated.
6187 */
6188static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6189{
6190 int rc;
6191#ifdef DEBUG_DARWIN_GIP
6192 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6193 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6194 pDevExt->pGipTimer, pDevExt->GipMemObj));
6195#endif
6196
6197 /*
6198 * Stop receiving MP notifications before tearing anything else down.
6199 */
6200 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6201
6202#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6203 /*
6204 * Terminate the TSC-delta measurement thread and resources.
6205 */
6206 supdrvTscDeltaTerm(pDevExt);
6207#endif
6208
6209 /*
6210 * Invalid the GIP data.
6211 */
6212 if (pDevExt->pGip)
6213 {
6214 supdrvGipTerm(pDevExt->pGip);
6215 pDevExt->pGip = NULL;
6216 }
6217 g_pSUPGlobalInfoPage = NULL;
6218
6219 /*
6220 * Destroy the timer and free the GIP memory object.
6221 */
6222 if (pDevExt->pGipTimer)
6223 {
6224 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6225 pDevExt->pGipTimer = NULL;
6226 }
6227
6228 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6229 {
6230 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6231 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6232 }
6233
6234 /*
6235 * Finally, make sure we've release the system timer resolution request
6236 * if one actually succeeded and is still pending.
6237 */
6238 if (pDevExt->u32SystemTimerGranularityGrant)
6239 {
6240 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
6241 pDevExt->u32SystemTimerGranularityGrant = 0;
6242 }
6243}
6244
6245
6246/**
6247 * Timer callback function sync GIP mode.
6248 * @param pTimer The timer.
6249 * @param pvUser The device extension.
6250 */
6251static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6252{
6253 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6254 uint64_t u64TSC = ASMReadTSC();
6255 uint64_t NanoTS = RTTimeSystemNanoTS();
6256 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6257
6258 if (supdrvIsInvariantTsc())
6259 {
6260 PSUPGIPCPU pGipCpu;
6261 unsigned iCpu;
6262 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6263 uint8_t idApic = ASMGetApicId();
6264
6265 iCpu = pGip->aiCpuFromApicId[idApic];
6266 Assert(iCpu < pGip->cCpus);
6267 pGipCpu = &pGip->aCPUs[iCpu];
6268 Assert(pGipCpu->idCpu == RTMpCpuId());
6269
6270 /*
6271 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6272 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6273 * affected a bit until we get proper TSC deltas than implementing options like
6274 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6275 *
6276 * The likely hood of this happening is really low. On Windows, Linux timers
6277 * fire on the CPU they were registered/started on. Darwin, Solaris need verification.
6278 */
6279 if (pGipCpu->i64TSCDelta != INT64_MAX)
6280 u64TSC -= pGipCpu->i64TSCDelta;
6281 }
6282
6283 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, NIL_RTCPUID, iTick);
6284
6285 ASMSetFlags(fOldFlags);
6286
6287 if (supdrvIsInvariantTsc())
6288 {
6289 /*
6290 * Refine the TSC frequency measurement over a longer interval. Ideally, we want to keep the
6291 * interval as small as possible while gaining the most consistent and accurate frequency
6292 * (compared to what the host OS might have measured).
6293 *
6294 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
6295 * same TSC frequency whenever possible so we need to keep the interval short.
6296 */
6297 uint8_t idApic;
6298 uint64_t u64NanoTS;
6299 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6300 const int cSeconds = 3;
6301 if (RT_UNLIKELY(iTick == 3)) /* Helps with more consistent values across multiple runs (esp. Windows). */
6302 {
6303 u64NanoTS = RTTimeSystemNanoTS();
6304 while (RTTimeSystemNanoTS() == u64NanoTS)
6305 ASMNopPause();
6306 fOldFlags = ASMIntDisableFlags();
6307 idApic = ASMGetApicId();
6308 g_u64TSCAnchor = ASMReadTSC();
6309 g_u64NanoTSAnchor = RTTimeSystemNanoTS();
6310 ASMSetFlags(fOldFlags);
6311 SUPTscDeltaApply(pGip, &g_u64TSCAnchor, idApic, NULL /* pfDeltaApplied */);
6312 ++g_u64TSCAnchor;
6313 }
6314 else if (g_u64TSCAnchor)
6315 {
6316 uint64_t u64DeltaNanoTS;
6317 u64NanoTS = RTTimeSystemNanoTS();
6318 while (RTTimeSystemNanoTS() == u64NanoTS)
6319 ASMNopPause();
6320 fOldFlags = ASMIntDisableFlags();
6321 idApic = ASMGetApicId();
6322 u64TSC = ASMReadTSC();
6323 u64NanoTS = RTTimeSystemNanoTS();
6324 ASMSetFlags(fOldFlags);
6325 SUPTscDeltaApply(pGip, &u64TSC, idApic, NULL /* pfDeltaApplied */);
6326 u64DeltaNanoTS = u64NanoTS - g_u64NanoTSAnchor;
6327 if (u64DeltaNanoTS >= cSeconds * RT_NS_1SEC_64)
6328 {
6329 uint16_t iCpu;
6330 if (u64DeltaNanoTS < UINT32_MAX)
6331 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64TSC - g_u64TSCAnchor, RT_NS_1SEC, u64DeltaNanoTS);
6332 else
6333 pGip->u64CpuHz = (u64TSC - g_u64TSCAnchor) / (u64DeltaNanoTS / RT_NS_1SEC);
6334
6335 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6336 pGip->aCPUs[iCpu].u64CpuHz = pGip->u64CpuHz;
6337 g_u64TSCAnchor = 0;
6338 }
6339 }
6340 }
6341}
6342
6343
6344/**
6345 * Timer callback function for async GIP mode.
6346 * @param pTimer The timer.
6347 * @param pvUser The device extension.
6348 */
6349static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6350{
6351 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6352 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6353 RTCPUID idCpu = RTMpCpuId();
6354 uint64_t u64TSC = ASMReadTSC();
6355 uint64_t NanoTS = RTTimeSystemNanoTS();
6356
6357 /** @todo reset the transaction number and whatnot when iTick == 1. */
6358 if (pDevExt->idGipMaster == idCpu)
6359 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6360 else
6361 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6362
6363 ASMSetFlags(fOldFlags);
6364}
6365
6366
6367/**
6368 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6369 *
6370 * @returns Index of the CPU in the cache set.
6371 * @param pGip The GIP.
6372 * @param idCpu The CPU ID.
6373 */
6374static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6375{
6376 uint32_t i, cTries;
6377
6378 /*
6379 * ASSUMES that CPU IDs are constant.
6380 */
6381 for (i = 0; i < pGip->cCpus; i++)
6382 if (pGip->aCPUs[i].idCpu == idCpu)
6383 return i;
6384
6385 cTries = 0;
6386 do
6387 {
6388 for (i = 0; i < pGip->cCpus; i++)
6389 {
6390 bool fRc;
6391 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6392 if (fRc)
6393 return i;
6394 }
6395 } while (cTries++ < 32);
6396 AssertReleaseFailed();
6397 return i - 1;
6398}
6399
6400
6401/**
6402 * The calling CPU should be accounted as online, update GIP accordingly.
6403 *
6404 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
6405 *
6406 * @param pDevExt The device extension.
6407 * @param idCpu The CPU ID.
6408 */
6409static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6410{
6411 int iCpuSet = 0;
6412 uint16_t idApic = UINT16_MAX;
6413 uint32_t i = 0;
6414 uint64_t u64NanoTS = 0;
6415 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6416
6417 AssertPtrReturnVoid(pGip);
6418 AssertRelease(idCpu == RTMpCpuId());
6419 Assert(pGip->cPossibleCpus == RTMpGetCount());
6420
6421 /*
6422 * Do this behind a spinlock with interrupts disabled as this can fire
6423 * on all CPUs simultaneously, see @bugref{6110}.
6424 */
6425 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6426
6427 /*
6428 * Update the globals.
6429 */
6430 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
6431 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
6432 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6433 if (iCpuSet >= 0)
6434 {
6435 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6436 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
6437 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
6438 }
6439
6440 /*
6441 * Update the entry.
6442 */
6443 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
6444 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
6445 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
6446 idApic = ASMGetApicId();
6447 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
6448 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
6449 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
6450
6451 /*
6452 * Update the APIC ID and CPU set index mappings.
6453 */
6454 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
6455 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
6456
6457 /* Update the Mp online/offline counter. */
6458 ASMAtomicIncU32(&g_cMpOnOffEvents);
6459
6460#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6461 /*
6462 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
6463 *
6464 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
6465 * update the state and it'll get serviced when the thread's listening interval times out.
6466 */
6467 if (supdrvIsInvariantTsc())
6468 {
6469 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
6470 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6471 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
6472 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
6473 {
6474 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
6475 }
6476 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6477 }
6478#endif
6479
6480 /* commit it */
6481 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
6482
6483 RTSpinlockRelease(pDevExt->hGipSpinlock);
6484}
6485
6486
6487/**
6488 * The CPU should be accounted as offline, update the GIP accordingly.
6489 *
6490 * This is used by supdrvGipMpEvent.
6491 *
6492 * @param pDevExt The device extension.
6493 * @param idCpu The CPU ID.
6494 */
6495static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6496{
6497 int iCpuSet;
6498 unsigned i;
6499
6500 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6501
6502 AssertPtrReturnVoid(pGip);
6503 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6504
6505 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6506 AssertReturnVoid(iCpuSet >= 0);
6507
6508 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
6509 AssertReturnVoid(i < pGip->cCpus);
6510 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
6511
6512 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6513 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
6514
6515 /* Update the Mp online/offline counter. */
6516 ASMAtomicIncU32(&g_cMpOnOffEvents);
6517
6518 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
6519 if (ASMAtomicReadU32(&g_idTscDeltaInitiator) == idCpu)
6520 {
6521 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6522 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
6523 }
6524
6525 /* Reset the TSC delta, we will recalculate it lazily. */
6526 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
6527
6528 /* commit it */
6529 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
6530
6531 RTSpinlockRelease(pDevExt->hGipSpinlock);
6532}
6533
6534
6535/**
6536 * Multiprocessor event notification callback.
6537 *
6538 * This is used to make sure that the GIP master gets passed on to
6539 * another CPU. It also updates the associated CPU data.
6540 *
6541 * @param enmEvent The event.
6542 * @param idCpu The cpu it applies to.
6543 * @param pvUser Pointer to the device extension.
6544 *
6545 * @remarks This function -must- fire on the newly online'd CPU for the
6546 * RTMPEVENT_ONLINE case and can fire on any CPU for the
6547 * RTMPEVENT_OFFLINE case.
6548 */
6549static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
6550{
6551 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6552 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6553
6554 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6555
6556 /*
6557 * Update the GIP CPU data.
6558 */
6559 if (pGip)
6560 {
6561 switch (enmEvent)
6562 {
6563 case RTMPEVENT_ONLINE:
6564 AssertRelease(idCpu == RTMpCpuId());
6565 supdrvGipMpEventOnline(pDevExt, idCpu);
6566 break;
6567 case RTMPEVENT_OFFLINE:
6568 supdrvGipMpEventOffline(pDevExt, idCpu);
6569 break;
6570 }
6571 }
6572
6573 /*
6574 * Make sure there is a master GIP.
6575 */
6576 if (enmEvent == RTMPEVENT_OFFLINE)
6577 {
6578 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
6579 if (idGipMaster == idCpu)
6580 {
6581 /*
6582 * Find a new GIP master.
6583 */
6584 bool fIgnored;
6585 unsigned i;
6586 int64_t iTSCDelta;
6587 uint32_t idxNewGipMaster;
6588 RTCPUID idNewGipMaster = NIL_RTCPUID;
6589 RTCPUSET OnlineCpus;
6590 RTMpGetOnlineSet(&OnlineCpus);
6591
6592 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
6593 {
6594 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
6595 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
6596 && idCurCpu != idGipMaster)
6597 {
6598 idNewGipMaster = idCurCpu;
6599 break;
6600 }
6601 }
6602
6603 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
6604 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
6605 NOREF(fIgnored);
6606
6607 /*
6608 * Adjust all the TSC deltas against the new GIP master.
6609 */
6610 if (pGip)
6611 {
6612 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
6613 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
6614 Assert(iTSCDelta != INT64_MAX);
6615 for (i = 0; i < pGip->cCpus; i++)
6616 {
6617 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
6618 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
6619 if (iWorkerDelta != INT64_MAX)
6620 iWorkerDelta -= iTSCDelta;
6621 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
6622 }
6623 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
6624 }
6625 }
6626 }
6627}
6628
6629
6630/**
6631 * Returns whether the host CPU sports an invariant TSC or not.
6632 *
6633 * @returns true if invariant TSC is supported, false otherwise.
6634 */
6635static bool supdrvIsInvariantTsc(void)
6636{
6637 static bool s_fQueried = false;
6638 static bool s_fIsInvariantTsc = false;
6639 if (!s_fQueried)
6640 {
6641 uint32_t uEax, uEbx, uEcx, uEdx;
6642 ASMCpuId(0x80000000, &uEax, &uEbx, &uEcx, &uEdx);
6643 if (uEax >= 0x80000007)
6644 {
6645 ASMCpuId(0x80000007, &uEax, &uEbx, &uEcx, &uEdx);
6646 if (uEdx & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
6647 s_fIsInvariantTsc = true;
6648 }
6649 s_fQueried = true;
6650 }
6651
6652 return s_fIsInvariantTsc;
6653}
6654
6655
6656/**
6657 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
6658 * compute the delta between them.
6659 *
6660 * @param idCpu The CPU we are current scheduled on.
6661 * @param pvUser1 Opaque pointer to the GIP.
6662 * @param pvUser2 Opaque pointer to the worker Cpu Id.
6663 *
6664 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
6665 * read the TSC at exactly the same time on both the master and the worker
6666 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
6667 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
6668 * try to minimize the measurement error by computing the minimum read time
6669 * of the compare statement in the worker by taking TSC measurements across
6670 * it.
6671 *
6672 * We ignore the first few runs of the loop in order to prime the cache.
6673 * Also, be careful about using 'pause' instruction in critical busy-wait
6674 * loops in this code - it can cause undesired behaviour with
6675 * hyperthreading.
6676 *
6677 * It must be noted that the computed minimum read time is mostly to
6678 * eliminate huge deltas when the worker is too early and doesn't by itself
6679 * help produce more accurate deltas. We allow two times the computed
6680 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
6681 * possible to get negative deltas where there are none when the worker is
6682 * earlier. As long as these occasional negative deltas are lower than the
6683 * time it takes to exit guest-context and the OS to reschedule EMT on a
6684 * different CPU we won't expose a TSC that jumped backwards. It is because
6685 * of the existence of the negative deltas we don't recompute the delta with
6686 * the master and worker interchanged to eliminate the remaining measurement
6687 * error.
6688 */
6689static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6690{
6691 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
6692 uint32_t *pidWorker = (uint32_t *)pvUser2;
6693 RTCPUID idMaster = ASMAtomicUoReadU32(&g_idTscDeltaInitiator);
6694 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6695 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
6696 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
6697 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6698 int cTriesLeft = 12;
6699
6700 if ( idCpu != idMaster
6701 && idCpu != *pidWorker)
6702 return;
6703
6704 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
6705 with a timeout to avoid deadlocking the entire system. */
6706 if (!RTMpOnAllIsConcurrentSafe())
6707 {
6708 uint64_t uTscNow;
6709 uint64_t uTscStart;
6710 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
6711
6712 ASMSerializeInstruction();
6713 uTscStart = ASMReadTSC();
6714 if (idCpu == idMaster)
6715 {
6716 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
6717 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
6718 {
6719 ASMSerializeInstruction();
6720 uTscNow = ASMReadTSC();
6721 if (uTscNow - uTscStart > cWaitTicks)
6722 {
6723 /* Set the worker delta to indicate failure, not the master. */
6724 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6725 return;
6726 }
6727
6728 ASMNopPause();
6729 }
6730 }
6731 else
6732 {
6733 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
6734 {
6735 ASMSerializeInstruction();
6736 uTscNow = ASMReadTSC();
6737 if (uTscNow - uTscStart > cWaitTicks)
6738 {
6739 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6740 return;
6741 }
6742
6743 ASMNopPause();
6744 }
6745 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
6746 }
6747 }
6748
6749 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
6750 while (cTriesLeft-- > 0)
6751 {
6752 unsigned i;
6753 uint64_t uMinCmpReadTime = UINT64_MAX;
6754 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
6755 {
6756 if (idCpu == idMaster)
6757 {
6758 /*
6759 * The master.
6760 */
6761 RTCCUINTREG uFlags;
6762 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6763 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6764
6765 /* Disable interrupts only in the master for as short a period
6766 as possible, thanks again to Windows. See @bugref{6710} comment #73. */
6767 uFlags = ASMIntDisableFlags();
6768
6769 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
6770 ;
6771
6772 do
6773 {
6774 ASMSerializeInstruction();
6775 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
6776 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6777
6778 ASMSetFlags(uFlags);
6779
6780 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
6781 ;
6782
6783 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6784 {
6785 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
6786 {
6787 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
6788 if (iDelta < pGipCpuWorker->i64TSCDelta)
6789 pGipCpuWorker->i64TSCDelta = iDelta;
6790 }
6791 }
6792
6793 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
6794 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6795 }
6796 else
6797 {
6798 /*
6799 * The worker.
6800 */
6801 uint64_t uTscWorker;
6802 uint64_t uTscWorkerFlushed;
6803 uint64_t uCmpReadTime;
6804
6805 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
6806 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
6807 ;
6808 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6809 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
6810
6811 /*
6812 * Keep reading the TSC until we notice that the master has read his. Reading
6813 * the TSC -after- the master has updated the memory is way too late. We thus
6814 * compensate by trying to measure how long it took for the worker to notice
6815 * the memory flushed from the master.
6816 */
6817 do
6818 {
6819 ASMSerializeInstruction();
6820 uTscWorker = ASMReadTSC();
6821 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6822 ASMSerializeInstruction();
6823 uTscWorkerFlushed = ASMReadTSC();
6824
6825 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
6826 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6827 {
6828 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
6829 if (uCmpReadTime < (uMinCmpReadTime << 1))
6830 {
6831 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
6832 if (uCmpReadTime < uMinCmpReadTime)
6833 uMinCmpReadTime = uCmpReadTime;
6834 }
6835 else
6836 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
6837 }
6838 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
6839 {
6840 if (uCmpReadTime < uMinCmpReadTime)
6841 uMinCmpReadTime = uCmpReadTime;
6842 }
6843
6844 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
6845 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
6846 ASMNopPause();
6847 }
6848 }
6849
6850 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
6851 break;
6852 }
6853}
6854
6855
6856/**
6857 * Clears all TSCs on the per-CPUs GIP struct. as well as the delta
6858 * synchronization variable. Optionally also clears the deltas on the per-CPU
6859 * GIP struct. as well.
6860 *
6861 * @param pGip Pointer to the GIP.
6862 * @param fClearDeltas Whether the deltas are also to be cleared.
6863 */
6864DECLINLINE(void) supdrvClearTscSamples(PSUPGLOBALINFOPAGE pGip, bool fClearDeltas)
6865{
6866 unsigned iCpu;
6867 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6868 {
6869 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
6870 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
6871 if (fClearDeltas)
6872 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
6873 }
6874 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6875}
6876
6877
6878/**
6879 * Measures the TSC delta between the master GIP CPU and one specified worker
6880 * CPU.
6881 *
6882 * @returns VBox status code.
6883 * @param pDevExt Pointer to the device instance data.
6884 * @param idxWorker The index of the worker CPU from the GIP's array of
6885 * CPUs.
6886 *
6887 * @remarks This can be called with preemption disabled!
6888 */
6889static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
6890{
6891 int rc;
6892 PSUPGLOBALINFOPAGE pGip;
6893 PSUPGIPCPU pGipCpuWorker;
6894 RTCPUID idMaster;
6895
6896 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
6897 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
6898
6899 pGip = pDevExt->pGip;
6900 idMaster = pDevExt->idGipMaster;
6901 pGipCpuWorker = &pGip->aCPUs[idxWorker];
6902
6903 if (pGipCpuWorker->idCpu == idMaster)
6904 {
6905 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
6906 return VINF_SUCCESS;
6907 }
6908
6909 /* Set the master TSC as the initiator. */
6910 while (ASMAtomicCmpXchgU32(&g_idTscDeltaInitiator, idMaster, NIL_RTCPUID) == false)
6911 {
6912 /*
6913 * Sleep here rather than spin as there is a parallel measurement
6914 * being executed and that can take a good while to be done.
6915 */
6916 RTThreadSleep(1);
6917 }
6918
6919 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
6920 {
6921 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
6922 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6923 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6924 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pGip, &pGipCpuWorker->idCpu);
6925 if (RT_SUCCESS(rc))
6926 {
6927 if (RT_UNLIKELY(pGipCpuWorker->i64TSCDelta == INT64_MAX))
6928 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
6929 }
6930 }
6931 else
6932 rc = VERR_CPU_OFFLINE;
6933
6934 ASMAtomicWriteU32(&g_idTscDeltaInitiator, NIL_RTCPUID);
6935 return rc;
6936}
6937
6938
6939/**
6940 * Measures the TSC deltas between CPUs.
6941 *
6942 * @param pDevExt Pointer to the device instance data.
6943 * @param pidxMaster Where to store the index of the chosen master TSC if we
6944 * managed to determine the TSC deltas successfully.
6945 * Optional, can be NULL.
6946 *
6947 * @returns VBox status code.
6948 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
6949 * idCpu, GIP's online CPU set which are populated in
6950 * supdrvGipInitOnCpu().
6951 */
6952static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
6953{
6954 PSUPGIPCPU pGipCpuMaster;
6955 unsigned iCpu;
6956 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6957 uint32_t idxMaster = UINT32_MAX;
6958 int rc = VINF_SUCCESS;
6959 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&g_cMpOnOffEvents);
6960 uint32_t cOnlineCpus = pGip->cOnlineCpus;
6961
6962 /*
6963 * If we determined the TSC is async., don't bother with measuring deltas.
6964 */
6965 if (RT_UNLIKELY(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC))
6966 return VINF_SUCCESS;
6967
6968 /*
6969 * Pick the first CPU online as the master TSC and make it the new GIP master based
6970 * on the APIC ID.
6971 *
6972 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
6973 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
6974 * master as this point since the sync/async timer isn't created yet.
6975 */
6976 supdrvClearTscSamples(pGip, true /* fClearDeltas */);
6977 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
6978 {
6979 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
6980 if (idxCpu != UINT16_MAX)
6981 {
6982 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
6983 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
6984 {
6985 idxMaster = idxCpu;
6986 pGipCpu->i64TSCDelta = 0;
6987 break;
6988 }
6989 }
6990 }
6991 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
6992 pGipCpuMaster = &pGip->aCPUs[idxMaster];
6993 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
6994
6995 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
6996 if (pGip->cOnlineCpus <= 1)
6997 {
6998 if (pidxMaster)
6999 *pidxMaster = idxMaster;
7000 return VINF_SUCCESS;
7001 }
7002
7003 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7004 {
7005 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7006 if ( iCpu != idxMaster
7007 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7008 {
7009 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7010 if (RT_FAILURE(rc))
7011 {
7012 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
7013 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7014 break;
7015 }
7016
7017 if (ASMAtomicReadU32(&g_cMpOnOffEvents) != cMpOnOffEvents)
7018 {
7019 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
7020 rc = VERR_TRY_AGAIN;
7021 break;
7022 }
7023 }
7024 }
7025
7026 if ( RT_SUCCESS(rc)
7027 && !pGipCpuMaster->i64TSCDelta
7028 && pidxMaster)
7029 {
7030 *pidxMaster = idxMaster;
7031 }
7032 return rc;
7033}
7034
7035
7036/**
7037 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
7038 *
7039 * @param idCpu Ignored.
7040 * @param pvUser1 Where to put the TSC.
7041 * @param pvUser2 Ignored.
7042 */
7043static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7044{
7045 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
7046}
7047
7048
7049/**
7050 * Determine if Async GIP mode is required because of TSC drift.
7051 *
7052 * When using the default/normal timer code it is essential that the time stamp counter
7053 * (TSC) runs never backwards, that is, a read operation to the counter should return
7054 * a bigger value than any previous read operation. This is guaranteed by the latest
7055 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
7056 * case we have to choose the asynchronous timer mode.
7057 *
7058 * @param poffMin Pointer to the determined difference between different cores.
7059 * @return false if the time stamp counters appear to be synchronized, true otherwise.
7060 */
7061static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
7062{
7063 /*
7064 * Just iterate all the cpus 8 times and make sure that the TSC is
7065 * ever increasing. We don't bother taking TSC rollover into account.
7066 */
7067 int iEndCpu = RTMpGetArraySize();
7068 int iCpu;
7069 int cLoops = 8;
7070 bool fAsync = false;
7071 int rc = VINF_SUCCESS;
7072 uint64_t offMax = 0;
7073 uint64_t offMin = ~(uint64_t)0;
7074 uint64_t PrevTsc = ASMReadTSC();
7075
7076 while (cLoops-- > 0)
7077 {
7078 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7079 {
7080 uint64_t CurTsc;
7081 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7082 if (RT_SUCCESS(rc))
7083 {
7084 if (CurTsc <= PrevTsc)
7085 {
7086 fAsync = true;
7087 offMin = offMax = PrevTsc - CurTsc;
7088 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7089 iCpu, cLoops, CurTsc, PrevTsc));
7090 break;
7091 }
7092
7093 /* Gather statistics (except the first time). */
7094 if (iCpu != 0 || cLoops != 7)
7095 {
7096 uint64_t off = CurTsc - PrevTsc;
7097 if (off < offMin)
7098 offMin = off;
7099 if (off > offMax)
7100 offMax = off;
7101 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7102 }
7103
7104 /* Next */
7105 PrevTsc = CurTsc;
7106 }
7107 else if (rc == VERR_NOT_SUPPORTED)
7108 break;
7109 else
7110 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7111 }
7112
7113 /* broke out of the loop. */
7114 if (iCpu < iEndCpu)
7115 break;
7116 }
7117
7118 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7119 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7120 fAsync, iEndCpu, rc, offMin, offMax));
7121#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7122 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7123#endif
7124 return fAsync;
7125}
7126
7127
7128/**
7129 * Determine the GIP TSC mode.
7130 *
7131 * @returns The most suitable TSC mode.
7132 * @param pDevExt Pointer to the device instance data.
7133 */
7134static SUPGIPMODE supdrvGipDetermineTscMode(PSUPDRVDEVEXT pDevExt)
7135{
7136#if 0
7137 if (supdrvIsInvariantTsc())
7138 return SUPGIPMODE_SYNC_TSC; /** @todo Switch to SUPGIPMODE_INVARIANT_TSC later. */
7139#endif
7140
7141 /*
7142 * On SMP we're faced with two problems:
7143 * (1) There might be a skew between the CPU, so that cpu0
7144 * returns a TSC that is slightly different from cpu1.
7145 * (2) Power management (and other things) may cause the TSC
7146 * to run at a non-constant speed, and cause the speed
7147 * to be different on the cpus. This will result in (1).
7148 *
7149 * So, on SMP systems we'll have to select the ASYNC update method
7150 * if there are symptoms of these problems.
7151 */
7152 if (RTMpGetCount() > 1)
7153 {
7154 uint32_t uEAX, uEBX, uECX, uEDX;
7155 uint64_t u64DiffCoresIgnored;
7156
7157 /* Permit the user and/or the OS specific bits to force async mode. */
7158 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7159 return SUPGIPMODE_ASYNC_TSC;
7160
7161 /* Try check for current differences between the cpus. */
7162 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7163 return SUPGIPMODE_ASYNC_TSC;
7164
7165 /*
7166 * If the CPU supports power management and is an AMD one we
7167 * won't trust it unless it has the TscInvariant bit is set.
7168 */
7169 /* Check for "AuthenticAMD" */
7170 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7171 if ( uEAX >= 1
7172 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7173 {
7174 /* Check for APM support and that TscInvariant is cleared. */
7175 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
7176 if (uEAX >= 0x80000007)
7177 {
7178 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
7179 if ( !(uEDX & RT_BIT(8))/* TscInvariant */
7180 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7181 return SUPGIPMODE_ASYNC_TSC;
7182 }
7183 }
7184 }
7185 return SUPGIPMODE_SYNC_TSC;
7186}
7187
7188
7189/**
7190 * Initializes per-CPU GIP information.
7191 *
7192 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7193 * @param pCpu Pointer to which GIP CPU to initalize.
7194 * @param u64NanoTS The current nanosecond timestamp.
7195 */
7196static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7197{
7198 pCpu->u32TransactionId = 2;
7199 pCpu->u64NanoTS = u64NanoTS;
7200 pCpu->u64TSC = ASMReadTSC();
7201 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7202 pCpu->i64TSCDelta = INT64_MAX;
7203
7204 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7205 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7206 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7207 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7208
7209 /*
7210 * We don't know the following values until we've executed updates.
7211 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7212 * the 2nd timer callout.
7213 */
7214 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7215 pCpu->u32UpdateIntervalTSC
7216 = pCpu->au32TSCHistory[0]
7217 = pCpu->au32TSCHistory[1]
7218 = pCpu->au32TSCHistory[2]
7219 = pCpu->au32TSCHistory[3]
7220 = pCpu->au32TSCHistory[4]
7221 = pCpu->au32TSCHistory[5]
7222 = pCpu->au32TSCHistory[6]
7223 = pCpu->au32TSCHistory[7]
7224 = (uint32_t)(_4G / pGip->u32UpdateHz);
7225}
7226
7227
7228/**
7229 * Initializes the GIP data.
7230 *
7231 * @param pDevExt Pointer to the device instance data.
7232 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7233 * @param HCPhys The physical address of the GIP.
7234 * @param u64NanoTS The current nanosecond timestamp.
7235 * @param uUpdateHz The update frequency.
7236 * @param uUpdateIntervalNS The update interval in nanoseconds.
7237 * @param cCpus The CPU count.
7238 */
7239static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7240 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7241{
7242 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7243 unsigned i;
7244#ifdef DEBUG_DARWIN_GIP
7245 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7246#else
7247 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7248#endif
7249
7250 /*
7251 * Initialize the structure.
7252 */
7253 memset(pGip, 0, cbGip);
7254 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7255 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7256 pGip->u32Mode = supdrvGipDetermineTscMode(pDevExt);
7257 pGip->cCpus = (uint16_t)cCpus;
7258 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7259 pGip->u32UpdateHz = uUpdateHz;
7260 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7261 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7262 RTCpuSetEmpty(&pGip->PresentCpuSet);
7263 RTMpGetSet(&pGip->PossibleCpuSet);
7264 pGip->cOnlineCpus = RTMpGetOnlineCount();
7265 pGip->cPresentCpus = RTMpGetPresentCount();
7266 pGip->cPossibleCpus = RTMpGetCount();
7267 pGip->idCpuMax = RTMpGetMaxCpuId();
7268 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7269 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7270 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7271 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7272
7273 for (i = 0; i < cCpus; i++)
7274 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
7275
7276 /*
7277 * Link it to the device extension.
7278 */
7279 pDevExt->pGip = pGip;
7280 pDevExt->HCPhysGip = HCPhys;
7281 pDevExt->cGipUsers = 0;
7282
7283 /*
7284 * Allocate the TSC delta sync. struct. on a separate cache line.
7285 */
7286 g_pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
7287 g_pTscDeltaSync = RT_ALIGN_PT(g_pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
7288 Assert(RT_ALIGN_PT(g_pTscDeltaSync, 64, PSUPTSCDELTASYNC) == g_pTscDeltaSync);
7289}
7290
7291
7292/**
7293 * On CPU initialization callback for RTMpOnAll.
7294 *
7295 * @param idCpu The CPU ID.
7296 * @param pvUser1 The device extension.
7297 * @param pvUser2 The GIP.
7298 */
7299static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7300{
7301 /* This is good enough, even though it will update some of the globals a
7302 bit to much. */
7303 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7304}
7305
7306
7307/**
7308 * Invalidates the GIP data upon termination.
7309 *
7310 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7311 */
7312static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7313{
7314 unsigned i;
7315 pGip->u32Magic = 0;
7316 for (i = 0; i < pGip->cCpus; i++)
7317 {
7318 pGip->aCPUs[i].u64NanoTS = 0;
7319 pGip->aCPUs[i].u64TSC = 0;
7320 pGip->aCPUs[i].iTSCHistoryHead = 0;
7321 pGip->aCPUs[i].u64TSCSample = 0;
7322 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7323 }
7324
7325 if (g_pvTscDeltaSync)
7326 {
7327 RTMemFree(g_pvTscDeltaSync);
7328 g_pTscDeltaSync = NULL;
7329 g_pvTscDeltaSync = NULL;
7330 }
7331}
7332
7333
7334/**
7335 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
7336 * updates all the per cpu data except the transaction id.
7337 *
7338 * @param pDevExt The device extension.
7339 * @param pGipCpu Pointer to the per cpu data.
7340 * @param u64NanoTS The current time stamp.
7341 * @param u64TSC The current TSC.
7342 * @param iTick The current timer tick.
7343 *
7344 * @remarks Can be called with interrupts disabled!
7345 */
7346static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7347{
7348 uint64_t u64TSCDelta;
7349 uint32_t u32UpdateIntervalTSC;
7350 uint32_t u32UpdateIntervalTSCSlack;
7351 unsigned iTSCHistoryHead;
7352 uint64_t u64CpuHz;
7353 uint32_t u32TransactionId;
7354
7355 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7356 AssertPtrReturnVoid(pGip);
7357
7358 /* Delta between this and the previous update. */
7359 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7360
7361 /*
7362 * Update the NanoTS.
7363 */
7364 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
7365
7366 /*
7367 * Calc TSC delta.
7368 */
7369 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
7370 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
7371 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
7372
7373 if (u64TSCDelta >> 32)
7374 {
7375 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
7376 pGipCpu->cErrors++;
7377 }
7378
7379 /*
7380 * On the 2nd and 3rd callout, reset the history with the current TSC
7381 * interval since the values entered by supdrvGipInit are totally off.
7382 * The interval on the 1st callout completely unreliable, the 2nd is a bit
7383 * better, while the 3rd should be most reliable.
7384 */
7385 u32TransactionId = pGipCpu->u32TransactionId;
7386 if (RT_UNLIKELY( ( u32TransactionId == 5
7387 || u32TransactionId == 7)
7388 && ( iTick == 2
7389 || iTick == 3) ))
7390 {
7391 unsigned i;
7392 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
7393 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
7394 }
7395
7396 /*
7397 * TSC History.
7398 */
7399 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
7400 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
7401 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
7402 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
7403
7404 /*
7405 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
7406 *
7407 * On Windows, we have an occasional (but recurring) sour value that messed up
7408 * the history but taking only 1 interval reduces the precision overall.
7409 * However, this problem existed before the invariant mode was introduced.
7410 */
7411 if ( supdrvIsInvariantTsc()
7412 || pGip->u32UpdateHz >= 1000)
7413 {
7414 uint32_t u32;
7415 u32 = pGipCpu->au32TSCHistory[0];
7416 u32 += pGipCpu->au32TSCHistory[1];
7417 u32 += pGipCpu->au32TSCHistory[2];
7418 u32 += pGipCpu->au32TSCHistory[3];
7419 u32 >>= 2;
7420 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
7421 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
7422 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
7423 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
7424 u32UpdateIntervalTSC >>= 2;
7425 u32UpdateIntervalTSC += u32;
7426 u32UpdateIntervalTSC >>= 1;
7427
7428 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
7429 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
7430 }
7431 else if (pGip->u32UpdateHz >= 90)
7432 {
7433 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7434 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
7435 u32UpdateIntervalTSC >>= 1;
7436
7437 /* value chosen on a 2GHz thinkpad running windows */
7438 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
7439 }
7440 else
7441 {
7442 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7443
7444 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
7445 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
7446 }
7447 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
7448
7449 if (supdrvIsInvariantTsc())
7450 return;
7451
7452 /*
7453 * CpuHz.
7454 */
7455 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
7456 u64CpuHz /= pGip->u32UpdateIntervalNS;
7457 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
7458}
7459
7460
7461/**
7462 * Updates the GIP.
7463 *
7464 * @param pDevExt The device extension.
7465 * @param u64NanoTS The current nanosecond timesamp.
7466 * @param u64TSC The current TSC timesamp.
7467 * @param idCpu The CPU ID.
7468 * @param iTick The current timer tick.
7469 *
7470 * @remarks Can be called with interrupts disabled!
7471 */
7472static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
7473{
7474 /*
7475 * Determine the relevant CPU data.
7476 */
7477 PSUPGIPCPU pGipCpu;
7478 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7479 AssertPtrReturnVoid(pGip);
7480
7481 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7482 pGipCpu = &pGip->aCPUs[0];
7483 else
7484 {
7485 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
7486 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
7487 return;
7488 pGipCpu = &pGip->aCPUs[iCpu];
7489 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
7490 return;
7491 }
7492
7493 /*
7494 * Start update transaction.
7495 */
7496 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7497 {
7498 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
7499 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7500 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7501 pGipCpu->cErrors++;
7502 return;
7503 }
7504
7505 /*
7506 * Recalc the update frequency every 0x800th time.
7507 */
7508 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
7509 {
7510 if (pGip->u64NanoTSLastUpdateHz)
7511 {
7512#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
7513 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
7514 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
7515 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
7516 {
7517 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
7518 * calculation on non-invariant hosts if it changes the history decision
7519 * taken in supdrvGipDoUpdateCpu(). */
7520 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
7521 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
7522 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
7523 }
7524#endif
7525 }
7526 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS + 1);
7527 }
7528
7529 /*
7530 * Update the data.
7531 */
7532 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7533
7534 /*
7535 * Complete transaction.
7536 */
7537 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7538}
7539
7540
7541/**
7542 * Updates the per cpu GIP data for the calling cpu.
7543 *
7544 * @param pDevExt The device extension.
7545 * @param u64NanoTS The current nanosecond timesamp.
7546 * @param u64TSC The current TSC timesamp.
7547 * @param idCpu The CPU ID.
7548 * @param idApic The APIC id for the CPU index.
7549 * @param iTick The current timer tick.
7550 *
7551 * @remarks Can be called with interrupts disabled!
7552 */
7553static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
7554 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
7555{
7556 uint32_t iCpu;
7557 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7558
7559 /*
7560 * Avoid a potential race when a CPU online notification doesn't fire on
7561 * the onlined CPU but the tick creeps in before the event notification is
7562 * run.
7563 */
7564 if (RT_UNLIKELY(iTick == 1))
7565 {
7566 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7567 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
7568 supdrvGipMpEventOnline(pDevExt, idCpu);
7569 }
7570
7571 iCpu = pGip->aiCpuFromApicId[idApic];
7572 if (RT_LIKELY(iCpu < pGip->cCpus))
7573 {
7574 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7575 if (pGipCpu->idCpu == idCpu)
7576 {
7577 /*
7578 * Start update transaction.
7579 */
7580 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7581 {
7582 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7583 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7584 pGipCpu->cErrors++;
7585 return;
7586 }
7587
7588 /*
7589 * Update the data.
7590 */
7591 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7592
7593 /*
7594 * Complete transaction.
7595 */
7596 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7597 }
7598 }
7599}
7600
7601
7602/**
7603 * Resume built-in keyboard on MacBook Air and Pro hosts.
7604 * If there is no built-in keyboard device, return success anyway.
7605 *
7606 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
7607 */
7608static int supdrvIOCtl_ResumeSuspendedKbds(void)
7609{
7610#if defined(RT_OS_DARWIN)
7611 return supdrvDarwinResumeSuspendedKbds();
7612#else
7613 return VERR_NOT_IMPLEMENTED;
7614#endif
7615}
7616
7617
7618/**
7619 * Service a TSC-delta measurement request.
7620 *
7621 * @returns VBox status code.
7622 * @param pDevExt Pointer to the device instance data.
7623 * @param pReq Pointer to the TSC-delta measurement request.
7624 */
7625static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq)
7626{
7627 PSUPGLOBALINFOPAGE pGip;
7628 RTCPUID idCpuWorker;
7629 int rc = VERR_CPU_NOT_FOUND;
7630 int16_t cTries;
7631 RTMSINTERVAL cMsWaitRetry;
7632 uint16_t iCpu;
7633
7634 /*
7635 * Validate.
7636 */
7637 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7638 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7639 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7640 idCpuWorker = pReq->u.In.idCpu;
7641 if (idCpuWorker == NIL_RTCPUID)
7642 return VERR_INVALID_CPU_ID;
7643
7644 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
7645 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
7646 pGip = pDevExt->pGip;
7647 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7648 {
7649 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7650 if (pGipCpuWorker->idCpu == idCpuWorker)
7651 {
7652 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
7653 && !pReq->u.In.fForce)
7654 return VINF_SUCCESS;
7655
7656#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7657 if (pReq->u.In.fAsync)
7658 {
7659 /** @todo Async. doesn't implement options like retries, waiting. We'll need
7660 * to pass those options to the thread somehow and implement it in the
7661 * thread. Check if anyone uses/needs fAsync before implementing this. */
7662 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
7663 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7664 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
7665 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
7666 {
7667 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
7668 }
7669 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7670 RTThreadUserSignal(pDevExt->hTscDeltaThread);
7671 return VINF_SUCCESS;
7672 }
7673#endif
7674
7675 while (cTries--)
7676 {
7677 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7678 if (RT_SUCCESS(rc))
7679 {
7680 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
7681 break;
7682 }
7683
7684 if (cMsWaitRetry)
7685 RTThreadSleep(cMsWaitRetry);
7686 }
7687
7688 break;
7689 }
7690 }
7691 return rc;
7692}
7693
7694
7695/**
7696 * Reads the TSC and TSC-delta atomically, applies the TSC delta.
7697 *
7698 * @returns VBox status code.
7699 * @param pDevExt Pointer to the device instance data.
7700 * @param pReq Pointer to the TSC-read request.
7701 */
7702static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq)
7703{
7704 uint64_t uTsc;
7705 uint16_t idApic;
7706 int16_t cTries;
7707 PSUPGLOBALINFOPAGE pGip;
7708 int rc;
7709
7710 /*
7711 * Validate.
7712 */
7713 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7714 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7715 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7716 pGip = pDevExt->pGip;
7717
7718 cTries = 4;
7719 while (cTries-- > 0)
7720 {
7721 rc = SUPReadTsc(&uTsc, &idApic);
7722 if (RT_SUCCESS(rc))
7723 {
7724 pReq->u.Out.u64AdjustedTsc = uTsc;
7725 pReq->u.Out.idApic = idApic;
7726 return VINF_SUCCESS;
7727 }
7728 else
7729 {
7730 int rc2;
7731 uint16_t iCpu;
7732
7733 /* If we failed to have a delta, measurement the delta and retry. */
7734 AssertMsgReturn(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId),
7735 ("idApic=%u ArraySize=%u\n", idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)), VERR_INVALID_CPU_INDEX);
7736 iCpu = pGip->aiCpuFromApicId[idApic];
7737 AssertMsgReturn(iCpu < pGip->cCpus, ("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus), VERR_INVALID_CPU_INDEX);
7738
7739 rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7740 if (RT_SUCCESS(rc2))
7741 AssertReturn(pGip->aCPUs[iCpu].i64TSCDelta != INT64_MAX, VERR_INTERNAL_ERROR_2);
7742 }
7743 }
7744
7745 return rc;
7746}
7747
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette