VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 53209

Last change on this file since 53209 was 53209, checked in by vboxsync, 10 years ago

HostDrivers/Support: Fix recalc. frequency code, eliminate u32UpdateHz from the equation (too little accuracy) and fix initialization.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 282.0 KB
Line 
1/* $Id: SUPDrv.c 53209 2014-11-04 16:53:37Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/x86.h>
58
59#include <VBox/param.h>
60#include <VBox/log.h>
61#include <VBox/err.h>
62#include <VBox/vmm/hm_svm.h>
63#include <VBox/vmm/hm_vmx.h>
64
65#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
66# include "dtrace/SUPDrv.h"
67#else
68# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
69# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
70# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
71# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
72#endif
73
74/*
75 * Logging assignments:
76 * Log - useful stuff, like failures.
77 * LogFlow - program flow, except the really noisy bits.
78 * Log2 - Cleanup.
79 * Log3 - Loader flow noise.
80 * Log4 - Call VMMR0 flow noise.
81 * Log5 - Native yet-to-be-defined noise.
82 * Log6 - Native ioctl flow noise.
83 *
84 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
85 * instantiation in log-vbox.c(pp).
86 */
87
88
89/*******************************************************************************
90* Defined Constants And Macros *
91*******************************************************************************/
92/** The frequency by which we recalculate the u32UpdateHz and
93 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
94 *
95 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
96 */
97#define GIP_UPDATEHZ_RECALC_FREQ 0x800
98
99/** A reserved TSC value used for synchronization as well as measurement of
100 * TSC deltas. */
101#define GIP_TSC_DELTA_RSVD UINT64_MAX
102/** The number of TSC delta measurement loops in total (includes primer and
103 * read-time loops). */
104#define GIP_TSC_DELTA_LOOPS 96
105/** The number of cache primer loops. */
106#define GIP_TSC_DELTA_PRIMER_LOOPS 4
107/** The number of loops until we keep computing the minumum read time. */
108#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
109/** Stop measurement of TSC delta. */
110#define GIP_TSC_DELTA_SYNC_STOP 0
111/** Start measurement of TSC delta. */
112#define GIP_TSC_DELTA_SYNC_START 1
113/** Worker thread is ready for reading the TSC. */
114#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
115/** Worker thread is done updating TSC delta info. */
116#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
117/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
118 * with a timeout. */
119#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
120/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
121 * master with a timeout. */
122#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
123
124AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
125AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
126
127/** @def VBOX_SVN_REV
128 * The makefile should define this if it can. */
129#ifndef VBOX_SVN_REV
130# define VBOX_SVN_REV 0
131#endif
132
133#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
134# define DO_NOT_START_GIP
135#endif
136
137
138/*******************************************************************************
139* Internal Functions *
140*******************************************************************************/
141static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
142static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
143static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
144static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
145static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
146static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
147static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
148static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
149static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
150static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
151static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
152static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
153static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
154DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
155DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
156static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
157static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
158static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
159static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq);
160static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
161static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
162static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
163static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
164static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
165static bool supdrvIsInvariantTsc(void);
166static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
167 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
168static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
169static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
170static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
171static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
172 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
173static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
174static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
175static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
176static int supdrvIOCtl_ResumeSuspendedKbds(void);
177
178
179/*******************************************************************************
180* Global Variables *
181*******************************************************************************/
182DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
183
184/**
185 * The TSC delta synchronization struct. rounded to cache line size.
186 */
187typedef union SUPTSCDELTASYNC
188{
189 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
190 volatile uint32_t u;
191 /** Padding to cache line size. */
192 uint8_t u8Padding[64];
193} SUPTSCDELTASYNC;
194AssertCompileSize(SUPTSCDELTASYNC, 64);
195typedef SUPTSCDELTASYNC *PSUPTSCDELTASYNC;
196
197/** Pointer to the TSC delta sync. struct. */
198static void *g_pvTscDeltaSync;
199/** Aligned pointer to the TSC delta sync. struct. */
200static PSUPTSCDELTASYNC g_pTscDeltaSync;
201/** The TSC delta measurement initiator Cpu Id. */
202static volatile RTCPUID g_idTscDeltaInitiator = NIL_RTCPUID;
203/** Number of online/offline events, incremented each time a CPU goes online
204 * or offline. */
205static volatile uint32_t g_cMpOnOffEvents;
206
207/**
208 * Array of the R0 SUP API.
209 */
210static SUPFUNC g_aFunctions[] =
211{
212/* SED: START */
213 /* name function */
214 /* Entries with absolute addresses determined at runtime, fixup
215 code makes ugly ASSUMPTIONS about the order here: */
216 { "SUPR0AbsIs64bit", (void *)0 },
217 { "SUPR0Abs64bitKernelCS", (void *)0 },
218 { "SUPR0Abs64bitKernelSS", (void *)0 },
219 { "SUPR0Abs64bitKernelDS", (void *)0 },
220 { "SUPR0AbsKernelCS", (void *)0 },
221 { "SUPR0AbsKernelSS", (void *)0 },
222 { "SUPR0AbsKernelDS", (void *)0 },
223 { "SUPR0AbsKernelES", (void *)0 },
224 { "SUPR0AbsKernelFS", (void *)0 },
225 { "SUPR0AbsKernelGS", (void *)0 },
226 /* Normal function pointers: */
227 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
228 { "SUPGetGIP", (void *)SUPGetGIP },
229 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
230 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
231 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
232 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
233 { "SUPR0ContFree", (void *)SUPR0ContFree },
234 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
235 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
236 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
237 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
238 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
239 { "SUPR0LockMem", (void *)SUPR0LockMem },
240 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
241 { "SUPR0LowFree", (void *)SUPR0LowFree },
242 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
243 { "SUPR0MemFree", (void *)SUPR0MemFree },
244 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
245 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
246 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
247 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
248 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
249 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
250 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
251 { "SUPR0PageFree", (void *)SUPR0PageFree },
252 { "SUPR0Printf", (void *)SUPR0Printf },
253 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
254 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
255 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
256 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
257 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
258 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
259 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
260 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
261 { "SUPSemEventClose", (void *)SUPSemEventClose },
262 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
263 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
264 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
265 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
266 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
267 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
268 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
269 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
270 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
271 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
272 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
273 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
274 { "SUPSemEventWait", (void *)SUPSemEventWait },
275 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
276 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
277 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
278
279 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
280 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
281 { "RTAssertMsg1", (void *)RTAssertMsg1 },
282 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
283 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
284 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
285 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
286 { "RTCrc32", (void *)RTCrc32 },
287 { "RTCrc32Finish", (void *)RTCrc32Finish },
288 { "RTCrc32Process", (void *)RTCrc32Process },
289 { "RTCrc32Start", (void *)RTCrc32Start },
290 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
291 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
292 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
293 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
294 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
295 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
296 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
297 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
298 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
299 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
300 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
301 { "RTLogPrintfV", (void *)RTLogPrintfV },
302 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
303 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
304 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
305 { "RTMemAllocTag", (void *)RTMemAllocTag },
306 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
307 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
308 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
309 { "RTMemDupExTag", (void *)RTMemDupExTag },
310 { "RTMemDupTag", (void *)RTMemDupTag },
311 { "RTMemFree", (void *)RTMemFree },
312 { "RTMemFreeEx", (void *)RTMemFreeEx },
313 { "RTMemReallocTag", (void *)RTMemReallocTag },
314 { "RTMpCpuId", (void *)RTMpCpuId },
315 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
316 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
317 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
318 { "RTMpGetCount", (void *)RTMpGetCount },
319 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
320 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
321 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
322 { "RTMpGetSet", (void *)RTMpGetSet },
323 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
324 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
325 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
326 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
327 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
328 { "RTMpOnAll", (void *)RTMpOnAll },
329 { "RTMpOnOthers", (void *)RTMpOnOthers },
330 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
331 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
332 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
333 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
334 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
335 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
336 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
337 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
338 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
339 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
340 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
341 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
342 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
343 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
344 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
345 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
346 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
347 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
348 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
349 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
350 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
351 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
352 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
353 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
354 { "RTProcSelf", (void *)RTProcSelf },
355 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
356 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
357 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
358 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
359 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
360 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
361 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
362 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
363 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
364 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
365 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
366 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
367 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
368 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
369 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
370 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
371 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
372 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
373 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
374 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
375 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
376 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
377 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
378 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
379 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
380 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
381 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
382 { "RTSemEventCreate", (void *)RTSemEventCreate },
383 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
384 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
385 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
386 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
387 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
388 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
389 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
390 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
391 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
392 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
393 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
394 { "RTSemEventSignal", (void *)RTSemEventSignal },
395 { "RTSemEventWait", (void *)RTSemEventWait },
396 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
397 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
398 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
399 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
400 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
401 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
402 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
403 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
404 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
405 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
406 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
407 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
408 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
409 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
410 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
411 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
412 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
413 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
414 { "RTStrCopy", (void *)RTStrCopy },
415 { "RTStrDupTag", (void *)RTStrDupTag },
416 { "RTStrFormat", (void *)RTStrFormat },
417 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
418 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
419 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
420 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
421 { "RTStrFormatV", (void *)RTStrFormatV },
422 { "RTStrFree", (void *)RTStrFree },
423 { "RTStrNCmp", (void *)RTStrNCmp },
424 { "RTStrPrintf", (void *)RTStrPrintf },
425 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
426 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
427 { "RTStrPrintfV", (void *)RTStrPrintfV },
428 { "RTThreadCreate", (void *)RTThreadCreate },
429 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
430 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
431 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
432 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
433 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
434 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
435 { "RTThreadGetName", (void *)RTThreadGetName },
436 { "RTThreadGetNative", (void *)RTThreadGetNative },
437 { "RTThreadGetType", (void *)RTThreadGetType },
438 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
439 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
440 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
441 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
442 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
443 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
444 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
445 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
446 { "RTThreadSelf", (void *)RTThreadSelf },
447 { "RTThreadSelfName", (void *)RTThreadSelfName },
448 { "RTThreadSleep", (void *)RTThreadSleep },
449 { "RTThreadUserReset", (void *)RTThreadUserReset },
450 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
451 { "RTThreadUserWait", (void *)RTThreadUserWait },
452 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
453 { "RTThreadWait", (void *)RTThreadWait },
454 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
455 { "RTThreadYield", (void *)RTThreadYield },
456 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
457 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
458 { "RTTimeNow", (void *)RTTimeNow },
459 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
460 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
461 { "RTTimerCreate", (void *)RTTimerCreate },
462 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
463 { "RTTimerDestroy", (void *)RTTimerDestroy },
464 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
465 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
466 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
467 { "RTTimerStart", (void *)RTTimerStart },
468 { "RTTimerStop", (void *)RTTimerStop },
469 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
470 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
471 { "RTUuidCompare", (void *)RTUuidCompare },
472 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
473 { "RTUuidFromStr", (void *)RTUuidFromStr },
474/* SED: END */
475};
476
477#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
478/**
479 * Drag in the rest of IRPT since we share it with the
480 * rest of the kernel modules on darwin.
481 */
482PFNRT g_apfnVBoxDrvIPRTDeps[] =
483{
484 /* VBoxNetAdp */
485 (PFNRT)RTRandBytes,
486 /* VBoxUSB */
487 (PFNRT)RTPathStripFilename,
488 NULL
489};
490#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
491
492
493/**
494 * Initializes the device extentsion structure.
495 *
496 * @returns IPRT status code.
497 * @param pDevExt The device extension to initialize.
498 * @param cbSession The size of the session structure. The size of
499 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
500 * defined because we're skipping the OS specific members
501 * then.
502 */
503int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
504{
505 int rc;
506
507#ifdef SUPDRV_WITH_RELEASE_LOGGER
508 /*
509 * Create the release log.
510 */
511 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
512 PRTLOGGER pRelLogger;
513 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
514 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
515 if (RT_SUCCESS(rc))
516 RTLogRelSetDefaultInstance(pRelLogger);
517 /** @todo Add native hook for getting logger config parameters and setting
518 * them. On linux we should use the module parameter stuff... */
519#endif
520
521 /*
522 * Initialize it.
523 */
524 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
525 pDevExt->Spinlock = NIL_RTSPINLOCK;
526 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
527 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
528 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
529 if (RT_SUCCESS(rc))
530 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
531 if (RT_SUCCESS(rc))
532 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
533
534 if (RT_SUCCESS(rc))
535#ifdef SUPDRV_USE_MUTEX_FOR_LDR
536 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
537#else
538 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
539#endif
540 if (RT_SUCCESS(rc))
541 {
542 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
543 if (RT_SUCCESS(rc))
544 {
545#ifdef SUPDRV_USE_MUTEX_FOR_LDR
546 rc = RTSemMutexCreate(&pDevExt->mtxGip);
547#else
548 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
549#endif
550 if (RT_SUCCESS(rc))
551 {
552 rc = supdrvGipCreate(pDevExt);
553 if (RT_SUCCESS(rc))
554 {
555 rc = supdrvTracerInit(pDevExt);
556 if (RT_SUCCESS(rc))
557 {
558 pDevExt->pLdrInitImage = NULL;
559 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
560 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
561 pDevExt->cbSession = (uint32_t)cbSession;
562
563 /*
564 * Fixup the absolute symbols.
565 *
566 * Because of the table indexing assumptions we'll have a little #ifdef orgy
567 * here rather than distributing this to OS specific files. At least for now.
568 */
569#ifdef RT_OS_DARWIN
570# if ARCH_BITS == 32
571 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
572 {
573 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
574 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
575 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
576 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
577 }
578 else
579 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
580 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
581 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
582 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
583 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
584 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
585 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
586# else /* 64-bit darwin: */
587 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
588 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
589 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
590 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
591 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
592 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
593 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
594 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
595 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
596 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
597
598# endif
599#else /* !RT_OS_DARWIN */
600# if ARCH_BITS == 64
601 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
602 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
603 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
604 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
605# else
606 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
607# endif
608 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
609 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
610 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
611 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
612 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
613 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
614#endif /* !RT_OS_DARWIN */
615 return VINF_SUCCESS;
616 }
617
618 supdrvGipDestroy(pDevExt);
619 }
620
621#ifdef SUPDRV_USE_MUTEX_FOR_GIP
622 RTSemMutexDestroy(pDevExt->mtxGip);
623 pDevExt->mtxGip = NIL_RTSEMMUTEX;
624#else
625 RTSemFastMutexDestroy(pDevExt->mtxGip);
626 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
627#endif
628 }
629 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
630 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
631 }
632#ifdef SUPDRV_USE_MUTEX_FOR_LDR
633 RTSemMutexDestroy(pDevExt->mtxLdr);
634 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
635#else
636 RTSemFastMutexDestroy(pDevExt->mtxLdr);
637 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
638#endif
639 }
640
641 RTSpinlockDestroy(pDevExt->Spinlock);
642 pDevExt->Spinlock = NIL_RTSPINLOCK;
643 RTSpinlockDestroy(pDevExt->hGipSpinlock);
644 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
645 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
646 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
647
648#ifdef SUPDRV_WITH_RELEASE_LOGGER
649 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
650 RTLogDestroy(RTLogSetDefaultInstance(NULL));
651#endif
652
653 return rc;
654}
655
656
657/**
658 * Delete the device extension (e.g. cleanup members).
659 *
660 * @param pDevExt The device extension to delete.
661 */
662void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
663{
664 PSUPDRVOBJ pObj;
665 PSUPDRVUSAGE pUsage;
666
667 /*
668 * Kill mutexes and spinlocks.
669 */
670#ifdef SUPDRV_USE_MUTEX_FOR_GIP
671 RTSemMutexDestroy(pDevExt->mtxGip);
672 pDevExt->mtxGip = NIL_RTSEMMUTEX;
673#else
674 RTSemFastMutexDestroy(pDevExt->mtxGip);
675 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
676#endif
677#ifdef SUPDRV_USE_MUTEX_FOR_LDR
678 RTSemMutexDestroy(pDevExt->mtxLdr);
679 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
680#else
681 RTSemFastMutexDestroy(pDevExt->mtxLdr);
682 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
683#endif
684 RTSpinlockDestroy(pDevExt->Spinlock);
685 pDevExt->Spinlock = NIL_RTSPINLOCK;
686 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
687 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
688 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
689 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
690
691 /*
692 * Free lists.
693 */
694 /* objects. */
695 pObj = pDevExt->pObjs;
696 Assert(!pObj); /* (can trigger on forced unloads) */
697 pDevExt->pObjs = NULL;
698 while (pObj)
699 {
700 void *pvFree = pObj;
701 pObj = pObj->pNext;
702 RTMemFree(pvFree);
703 }
704
705 /* usage records. */
706 pUsage = pDevExt->pUsageFree;
707 pDevExt->pUsageFree = NULL;
708 while (pUsage)
709 {
710 void *pvFree = pUsage;
711 pUsage = pUsage->pNext;
712 RTMemFree(pvFree);
713 }
714
715 /* kill the GIP. */
716 supdrvGipDestroy(pDevExt);
717 RTSpinlockDestroy(pDevExt->hGipSpinlock);
718 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
719
720 supdrvTracerTerm(pDevExt);
721
722#ifdef SUPDRV_WITH_RELEASE_LOGGER
723 /* destroy the loggers. */
724 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
725 RTLogDestroy(RTLogSetDefaultInstance(NULL));
726#endif
727}
728
729
730/**
731 * Create session.
732 *
733 * @returns IPRT status code.
734 * @param pDevExt Device extension.
735 * @param fUser Flag indicating whether this is a user or kernel
736 * session.
737 * @param fUnrestricted Unrestricted access (system) or restricted access
738 * (user)?
739 * @param ppSession Where to store the pointer to the session data.
740 */
741int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
742{
743 int rc;
744 PSUPDRVSESSION pSession;
745
746 if (!SUP_IS_DEVEXT_VALID(pDevExt))
747 return VERR_INVALID_PARAMETER;
748
749 /*
750 * Allocate memory for the session data.
751 */
752 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
753 if (pSession)
754 {
755 /* Initialize session data. */
756 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
757 if (!rc)
758 {
759 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
760 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
761 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
762 if (RT_SUCCESS(rc))
763 {
764 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
765 pSession->pDevExt = pDevExt;
766 pSession->u32Cookie = BIRD_INV;
767 pSession->fUnrestricted = fUnrestricted;
768 /*pSession->fInHashTable = false; */
769 pSession->cRefs = 1;
770 /*pSession->pCommonNextHash = NULL;
771 pSession->ppOsSessionPtr = NULL; */
772 if (fUser)
773 {
774 pSession->Process = RTProcSelf();
775 pSession->R0Process = RTR0ProcHandleSelf();
776 }
777 else
778 {
779 pSession->Process = NIL_RTPROCESS;
780 pSession->R0Process = NIL_RTR0PROCESS;
781 }
782 /*pSession->pLdrUsage = NULL;
783 pSession->pVM = NULL;
784 pSession->pUsage = NULL;
785 pSession->pGip = NULL;
786 pSession->fGipReferenced = false;
787 pSession->Bundle.cUsed = 0; */
788 pSession->Uid = NIL_RTUID;
789 pSession->Gid = NIL_RTGID;
790 /*pSession->uTracerData = 0;*/
791 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
792 RTListInit(&pSession->TpProviders);
793 /*pSession->cTpProviders = 0;*/
794 /*pSession->cTpProbesFiring = 0;*/
795 RTListInit(&pSession->TpUmods);
796 /*RT_ZERO(pSession->apTpLookupTable);*/
797
798 VBOXDRV_SESSION_CREATE(pSession, fUser);
799 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
800 return VINF_SUCCESS;
801 }
802
803 RTSpinlockDestroy(pSession->Spinlock);
804 }
805 RTMemFree(pSession);
806 *ppSession = NULL;
807 Log(("Failed to create spinlock, rc=%d!\n", rc));
808 }
809 else
810 rc = VERR_NO_MEMORY;
811
812 return rc;
813}
814
815
816/**
817 * Cleans up the session in the context of the process to which it belongs, the
818 * caller will free the session and the session spinlock.
819 *
820 * This should normally occur when the session is closed or as the process
821 * exits. Careful reference counting in the OS specfic code makes sure that
822 * there cannot be any races between process/handle cleanup callbacks and
823 * threads doing I/O control calls.
824 *
825 * @param pDevExt The device extension.
826 * @param pSession Session data.
827 */
828static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
829{
830 int rc;
831 PSUPDRVBUNDLE pBundle;
832 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
833
834 Assert(!pSession->fInHashTable);
835 Assert(!pSession->ppOsSessionPtr);
836 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
837 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
838
839 /*
840 * Remove logger instances related to this session.
841 */
842 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
843
844 /*
845 * Destroy the handle table.
846 */
847 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
848 AssertRC(rc);
849 pSession->hHandleTable = NIL_RTHANDLETABLE;
850
851 /*
852 * Release object references made in this session.
853 * In theory there should be noone racing us in this session.
854 */
855 Log2(("release objects - start\n"));
856 if (pSession->pUsage)
857 {
858 PSUPDRVUSAGE pUsage;
859 RTSpinlockAcquire(pDevExt->Spinlock);
860
861 while ((pUsage = pSession->pUsage) != NULL)
862 {
863 PSUPDRVOBJ pObj = pUsage->pObj;
864 pSession->pUsage = pUsage->pNext;
865
866 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
867 if (pUsage->cUsage < pObj->cUsage)
868 {
869 pObj->cUsage -= pUsage->cUsage;
870 RTSpinlockRelease(pDevExt->Spinlock);
871 }
872 else
873 {
874 /* Destroy the object and free the record. */
875 if (pDevExt->pObjs == pObj)
876 pDevExt->pObjs = pObj->pNext;
877 else
878 {
879 PSUPDRVOBJ pObjPrev;
880 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
881 if (pObjPrev->pNext == pObj)
882 {
883 pObjPrev->pNext = pObj->pNext;
884 break;
885 }
886 Assert(pObjPrev);
887 }
888 RTSpinlockRelease(pDevExt->Spinlock);
889
890 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
891 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
892 if (pObj->pfnDestructor)
893 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
894 RTMemFree(pObj);
895 }
896
897 /* free it and continue. */
898 RTMemFree(pUsage);
899
900 RTSpinlockAcquire(pDevExt->Spinlock);
901 }
902
903 RTSpinlockRelease(pDevExt->Spinlock);
904 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
905 }
906 Log2(("release objects - done\n"));
907
908 /*
909 * Do tracer cleanups related to this session.
910 */
911 Log2(("release tracer stuff - start\n"));
912 supdrvTracerCleanupSession(pDevExt, pSession);
913 Log2(("release tracer stuff - end\n"));
914
915 /*
916 * Release memory allocated in the session.
917 *
918 * We do not serialize this as we assume that the application will
919 * not allocated memory while closing the file handle object.
920 */
921 Log2(("freeing memory:\n"));
922 pBundle = &pSession->Bundle;
923 while (pBundle)
924 {
925 PSUPDRVBUNDLE pToFree;
926 unsigned i;
927
928 /*
929 * Check and unlock all entries in the bundle.
930 */
931 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
932 {
933 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
934 {
935 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
936 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
937 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
938 {
939 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
940 AssertRC(rc); /** @todo figure out how to handle this. */
941 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
942 }
943 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
944 AssertRC(rc); /** @todo figure out how to handle this. */
945 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
946 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
947 }
948 }
949
950 /*
951 * Advance and free previous bundle.
952 */
953 pToFree = pBundle;
954 pBundle = pBundle->pNext;
955
956 pToFree->pNext = NULL;
957 pToFree->cUsed = 0;
958 if (pToFree != &pSession->Bundle)
959 RTMemFree(pToFree);
960 }
961 Log2(("freeing memory - done\n"));
962
963 /*
964 * Deregister component factories.
965 */
966 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
967 Log2(("deregistering component factories:\n"));
968 if (pDevExt->pComponentFactoryHead)
969 {
970 PSUPDRVFACTORYREG pPrev = NULL;
971 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
972 while (pCur)
973 {
974 if (pCur->pSession == pSession)
975 {
976 /* unlink it */
977 PSUPDRVFACTORYREG pNext = pCur->pNext;
978 if (pPrev)
979 pPrev->pNext = pNext;
980 else
981 pDevExt->pComponentFactoryHead = pNext;
982
983 /* free it */
984 pCur->pNext = NULL;
985 pCur->pSession = NULL;
986 pCur->pFactory = NULL;
987 RTMemFree(pCur);
988
989 /* next */
990 pCur = pNext;
991 }
992 else
993 {
994 /* next */
995 pPrev = pCur;
996 pCur = pCur->pNext;
997 }
998 }
999 }
1000 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
1001 Log2(("deregistering component factories - done\n"));
1002
1003 /*
1004 * Loaded images needs to be dereferenced and possibly freed up.
1005 */
1006 supdrvLdrLock(pDevExt);
1007 Log2(("freeing images:\n"));
1008 if (pSession->pLdrUsage)
1009 {
1010 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
1011 pSession->pLdrUsage = NULL;
1012 while (pUsage)
1013 {
1014 void *pvFree = pUsage;
1015 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1016 if (pImage->cUsage > pUsage->cUsage)
1017 pImage->cUsage -= pUsage->cUsage;
1018 else
1019 supdrvLdrFree(pDevExt, pImage);
1020 pUsage->pImage = NULL;
1021 pUsage = pUsage->pNext;
1022 RTMemFree(pvFree);
1023 }
1024 }
1025 supdrvLdrUnlock(pDevExt);
1026 Log2(("freeing images - done\n"));
1027
1028 /*
1029 * Unmap the GIP.
1030 */
1031 Log2(("umapping GIP:\n"));
1032 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1033 {
1034 SUPR0GipUnmap(pSession);
1035 pSession->fGipReferenced = 0;
1036 }
1037 Log2(("umapping GIP - done\n"));
1038}
1039
1040
1041/**
1042 * Common code for freeing a session when the reference count reaches zero.
1043 *
1044 * @param pDevExt Device extension.
1045 * @param pSession Session data.
1046 * This data will be freed by this routine.
1047 */
1048static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1049{
1050 VBOXDRV_SESSION_CLOSE(pSession);
1051
1052 /*
1053 * Cleanup the session first.
1054 */
1055 supdrvCleanupSession(pDevExt, pSession);
1056 supdrvOSCleanupSession(pDevExt, pSession);
1057
1058 /*
1059 * Free the rest of the session stuff.
1060 */
1061 RTSpinlockDestroy(pSession->Spinlock);
1062 pSession->Spinlock = NIL_RTSPINLOCK;
1063 pSession->pDevExt = NULL;
1064 RTMemFree(pSession);
1065 LogFlow(("supdrvDestroySession: returns\n"));
1066}
1067
1068
1069/**
1070 * Inserts the session into the global hash table.
1071 *
1072 * @retval VINF_SUCCESS on success.
1073 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1074 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1075 * session (asserted).
1076 * @retval VERR_DUPLICATE if there is already a session for that pid.
1077 *
1078 * @param pDevExt The device extension.
1079 * @param pSession The session.
1080 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1081 * available and used. This will set to point to the
1082 * session while under the protection of the session
1083 * hash table spinlock. It will also be kept in
1084 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1085 * cleanup use.
1086 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1087 */
1088int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1089 void *pvUser)
1090{
1091 PSUPDRVSESSION pCur;
1092 unsigned iHash;
1093
1094 /*
1095 * Validate input.
1096 */
1097 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1098 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1099
1100 /*
1101 * Calculate the hash table index and acquire the spinlock.
1102 */
1103 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1104
1105 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1106
1107 /*
1108 * If there are a collisions, we need to carefully check if we got a
1109 * duplicate. There can only be one open session per process.
1110 */
1111 pCur = pDevExt->apSessionHashTab[iHash];
1112 if (pCur)
1113 {
1114 while (pCur && pCur->Process != pSession->Process)
1115 pCur = pCur->pCommonNextHash;
1116
1117 if (pCur)
1118 {
1119 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1120 if (pCur == pSession)
1121 {
1122 Assert(pSession->fInHashTable);
1123 AssertFailed();
1124 return VERR_WRONG_ORDER;
1125 }
1126 Assert(!pSession->fInHashTable);
1127 if (pCur->R0Process == pSession->R0Process)
1128 return VERR_RESOURCE_IN_USE;
1129 return VERR_DUPLICATE;
1130 }
1131 }
1132 Assert(!pSession->fInHashTable);
1133 Assert(!pSession->ppOsSessionPtr);
1134
1135 /*
1136 * Insert it, doing a callout to the OS specific code in case it has
1137 * anything it wishes to do while we're holding the spinlock.
1138 */
1139 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1140 pDevExt->apSessionHashTab[iHash] = pSession;
1141 pSession->fInHashTable = true;
1142 ASMAtomicIncS32(&pDevExt->cSessions);
1143
1144 pSession->ppOsSessionPtr = ppOsSessionPtr;
1145 if (ppOsSessionPtr)
1146 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1147
1148 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1149
1150 /*
1151 * Retain a reference for the pointer in the session table.
1152 */
1153 ASMAtomicIncU32(&pSession->cRefs);
1154
1155 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1156 return VINF_SUCCESS;
1157}
1158
1159
1160/**
1161 * Removes the session from the global hash table.
1162 *
1163 * @retval VINF_SUCCESS on success.
1164 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1165 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1166 * session (asserted).
1167 *
1168 * @param pDevExt The device extension.
1169 * @param pSession The session. The caller is expected to have a reference
1170 * to this so it won't croak on us when we release the hash
1171 * table reference.
1172 * @param pvUser OS specific context value for the
1173 * supdrvOSSessionHashTabInserted callback.
1174 */
1175int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1176{
1177 PSUPDRVSESSION pCur;
1178 unsigned iHash;
1179 int32_t cRefs;
1180
1181 /*
1182 * Validate input.
1183 */
1184 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1185 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1186
1187 /*
1188 * Calculate the hash table index and acquire the spinlock.
1189 */
1190 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1191
1192 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1193
1194 /*
1195 * Unlink it.
1196 */
1197 pCur = pDevExt->apSessionHashTab[iHash];
1198 if (pCur == pSession)
1199 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1200 else
1201 {
1202 PSUPDRVSESSION pPrev = pCur;
1203 while (pCur && pCur != pSession)
1204 {
1205 pPrev = pCur;
1206 pCur = pCur->pCommonNextHash;
1207 }
1208 if (pCur)
1209 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1210 else
1211 {
1212 Assert(!pSession->fInHashTable);
1213 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1214 return VERR_NOT_FOUND;
1215 }
1216 }
1217
1218 pSession->pCommonNextHash = NULL;
1219 pSession->fInHashTable = false;
1220
1221 ASMAtomicDecS32(&pDevExt->cSessions);
1222
1223 /*
1224 * Clear OS specific session pointer if available and do the OS callback.
1225 */
1226 if (pSession->ppOsSessionPtr)
1227 {
1228 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1229 pSession->ppOsSessionPtr = NULL;
1230 }
1231
1232 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1233
1234 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1235
1236 /*
1237 * Drop the reference the hash table had to the session. This shouldn't
1238 * be the last reference!
1239 */
1240 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1241 Assert(cRefs > 0 && cRefs < _1M);
1242 if (cRefs == 0)
1243 supdrvDestroySession(pDevExt, pSession);
1244
1245 return VINF_SUCCESS;
1246}
1247
1248
1249/**
1250 * Looks up the session for the current process in the global hash table or in
1251 * OS specific pointer.
1252 *
1253 * @returns Pointer to the session with a reference that the caller must
1254 * release. If no valid session was found, NULL is returned.
1255 *
1256 * @param pDevExt The device extension.
1257 * @param Process The process ID.
1258 * @param R0Process The ring-0 process handle.
1259 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1260 * this is used instead of the hash table. For
1261 * additional safety it must then be equal to the
1262 * SUPDRVSESSION::ppOsSessionPtr member.
1263 * This can be NULL even if the OS has a session
1264 * pointer.
1265 */
1266PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1267 PSUPDRVSESSION *ppOsSessionPtr)
1268{
1269 PSUPDRVSESSION pCur;
1270 unsigned iHash;
1271
1272 /*
1273 * Validate input.
1274 */
1275 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1276
1277 /*
1278 * Calculate the hash table index and acquire the spinlock.
1279 */
1280 iHash = SUPDRV_SESSION_HASH(Process);
1281
1282 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1283
1284 /*
1285 * If an OS session pointer is provided, always use it.
1286 */
1287 if (ppOsSessionPtr)
1288 {
1289 pCur = *ppOsSessionPtr;
1290 if ( pCur
1291 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1292 || pCur->Process != Process
1293 || pCur->R0Process != R0Process) )
1294 pCur = NULL;
1295 }
1296 else
1297 {
1298 /*
1299 * Otherwise, do the hash table lookup.
1300 */
1301 pCur = pDevExt->apSessionHashTab[iHash];
1302 while ( pCur
1303 && ( pCur->Process != Process
1304 || pCur->R0Process != R0Process) )
1305 pCur = pCur->pCommonNextHash;
1306 }
1307
1308 /*
1309 * Retain the session.
1310 */
1311 if (pCur)
1312 {
1313 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1314 NOREF(cRefs);
1315 Assert(cRefs > 1 && cRefs < _1M);
1316 }
1317
1318 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1319
1320 return pCur;
1321}
1322
1323
1324/**
1325 * Retain a session to make sure it doesn't go away while it is in use.
1326 *
1327 * @returns New reference count on success, UINT32_MAX on failure.
1328 * @param pSession Session data.
1329 */
1330uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1331{
1332 uint32_t cRefs;
1333 AssertPtrReturn(pSession, UINT32_MAX);
1334 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1335
1336 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1337 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1338 return cRefs;
1339}
1340
1341
1342/**
1343 * Releases a given session.
1344 *
1345 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1346 * @param pSession Session data.
1347 */
1348uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1349{
1350 uint32_t cRefs;
1351 AssertPtrReturn(pSession, UINT32_MAX);
1352 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1353
1354 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1355 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1356 if (cRefs == 0)
1357 supdrvDestroySession(pSession->pDevExt, pSession);
1358 return cRefs;
1359}
1360
1361
1362/**
1363 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1364 *
1365 * @returns IPRT status code, see SUPR0ObjAddRef.
1366 * @param hHandleTable The handle table handle. Ignored.
1367 * @param pvObj The object pointer.
1368 * @param pvCtx Context, the handle type. Ignored.
1369 * @param pvUser Session pointer.
1370 */
1371static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1372{
1373 NOREF(pvCtx);
1374 NOREF(hHandleTable);
1375 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1376}
1377
1378
1379/**
1380 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1381 *
1382 * @param hHandleTable The handle table handle. Ignored.
1383 * @param h The handle value. Ignored.
1384 * @param pvObj The object pointer.
1385 * @param pvCtx Context, the handle type. Ignored.
1386 * @param pvUser Session pointer.
1387 */
1388static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1389{
1390 NOREF(pvCtx);
1391 NOREF(h);
1392 NOREF(hHandleTable);
1393 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1394}
1395
1396
1397/**
1398 * Fast path I/O Control worker.
1399 *
1400 * @returns VBox status code that should be passed down to ring-3 unchanged.
1401 * @param uIOCtl Function number.
1402 * @param idCpu VMCPU id.
1403 * @param pDevExt Device extention.
1404 * @param pSession Session data.
1405 */
1406int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1407{
1408 /*
1409 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1410 */
1411 if (RT_LIKELY( RT_VALID_PTR(pSession)
1412 && pSession->pVM
1413 && pDevExt->pfnVMMR0EntryFast))
1414 {
1415 switch (uIOCtl)
1416 {
1417 case SUP_IOCTL_FAST_DO_RAW_RUN:
1418 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1419 break;
1420 case SUP_IOCTL_FAST_DO_HM_RUN:
1421 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1422 break;
1423 case SUP_IOCTL_FAST_DO_NOP:
1424 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1425 break;
1426 default:
1427 return VERR_INTERNAL_ERROR;
1428 }
1429 return VINF_SUCCESS;
1430 }
1431 return VERR_INTERNAL_ERROR;
1432}
1433
1434
1435/**
1436 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1437 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1438 * list, see http://www.kerneldrivers.org/RHEL5.
1439 *
1440 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1441 * @param pszStr String to check
1442 * @param pszChars Character set
1443 */
1444static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1445{
1446 int chCur;
1447 while ((chCur = *pszStr++) != '\0')
1448 {
1449 int ch;
1450 const char *psz = pszChars;
1451 while ((ch = *psz++) != '\0')
1452 if (ch == chCur)
1453 return 1;
1454
1455 }
1456 return 0;
1457}
1458
1459
1460
1461/**
1462 * I/O Control inner worker (tracing reasons).
1463 *
1464 * @returns IPRT status code.
1465 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1466 *
1467 * @param uIOCtl Function number.
1468 * @param pDevExt Device extention.
1469 * @param pSession Session data.
1470 * @param pReqHdr The request header.
1471 */
1472static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1473{
1474 /*
1475 * Validation macros
1476 */
1477#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1478 do { \
1479 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1480 { \
1481 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1482 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1483 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1484 } \
1485 } while (0)
1486
1487#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1488
1489#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1490 do { \
1491 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1492 { \
1493 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1494 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1495 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1496 } \
1497 } while (0)
1498
1499#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1500 do { \
1501 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1502 { \
1503 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1504 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1505 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1506 } \
1507 } while (0)
1508
1509#define REQ_CHECK_EXPR(Name, expr) \
1510 do { \
1511 if (RT_UNLIKELY(!(expr))) \
1512 { \
1513 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1514 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1515 } \
1516 } while (0)
1517
1518#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1519 do { \
1520 if (RT_UNLIKELY(!(expr))) \
1521 { \
1522 OSDBGPRINT( fmt ); \
1523 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1524 } \
1525 } while (0)
1526
1527 /*
1528 * The switch.
1529 */
1530 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1531 {
1532 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1533 {
1534 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1535 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1536 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1537 {
1538 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1539 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1540 return 0;
1541 }
1542
1543#if 0
1544 /*
1545 * Call out to the OS specific code and let it do permission checks on the
1546 * client process.
1547 */
1548 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1549 {
1550 pReq->u.Out.u32Cookie = 0xffffffff;
1551 pReq->u.Out.u32SessionCookie = 0xffffffff;
1552 pReq->u.Out.u32SessionVersion = 0xffffffff;
1553 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1554 pReq->u.Out.pSession = NULL;
1555 pReq->u.Out.cFunctions = 0;
1556 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1557 return 0;
1558 }
1559#endif
1560
1561 /*
1562 * Match the version.
1563 * The current logic is very simple, match the major interface version.
1564 */
1565 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1566 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1567 {
1568 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1569 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1570 pReq->u.Out.u32Cookie = 0xffffffff;
1571 pReq->u.Out.u32SessionCookie = 0xffffffff;
1572 pReq->u.Out.u32SessionVersion = 0xffffffff;
1573 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1574 pReq->u.Out.pSession = NULL;
1575 pReq->u.Out.cFunctions = 0;
1576 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1577 return 0;
1578 }
1579
1580 /*
1581 * Fill in return data and be gone.
1582 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1583 * u32SessionVersion <= u32ReqVersion!
1584 */
1585 /** @todo Somehow validate the client and negotiate a secure cookie... */
1586 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1587 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1588 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1589 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1590 pReq->u.Out.pSession = pSession;
1591 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1592 pReq->Hdr.rc = VINF_SUCCESS;
1593 return 0;
1594 }
1595
1596 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1597 {
1598 /* validate */
1599 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1600 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1601
1602 /* execute */
1603 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1604 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1605 pReq->Hdr.rc = VINF_SUCCESS;
1606 return 0;
1607 }
1608
1609 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1610 {
1611 /* validate */
1612 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1613 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1614 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1615 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1616 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1617
1618 /* execute */
1619 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1620 if (RT_FAILURE(pReq->Hdr.rc))
1621 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1622 return 0;
1623 }
1624
1625 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1626 {
1627 /* validate */
1628 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1629 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1630
1631 /* execute */
1632 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1633 return 0;
1634 }
1635
1636 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1637 {
1638 /* validate */
1639 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1640 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1641
1642 /* execute */
1643 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1644 if (RT_FAILURE(pReq->Hdr.rc))
1645 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1646 return 0;
1647 }
1648
1649 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1650 {
1651 /* validate */
1652 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1653 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1654
1655 /* execute */
1656 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1657 return 0;
1658 }
1659
1660 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1661 {
1662 /* validate */
1663 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1664 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1665 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1666 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1667 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1668 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1669 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1670 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1671 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1672 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1673 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1674
1675 /* execute */
1676 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1677 return 0;
1678 }
1679
1680 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1681 {
1682 /* validate */
1683 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1684 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1685 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1686 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1687 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1688 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1689 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1690 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1691 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1692 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1693 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1694 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1695 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1696 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1697 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1698
1699 if (pReq->u.In.cSymbols)
1700 {
1701 uint32_t i;
1702 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1703 for (i = 0; i < pReq->u.In.cSymbols; i++)
1704 {
1705 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1706 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1707 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1708 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1709 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1710 pReq->u.In.cbStrTab - paSyms[i].offName),
1711 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1712 }
1713 }
1714
1715 /* execute */
1716 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1717 return 0;
1718 }
1719
1720 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1721 {
1722 /* validate */
1723 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1724 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1725
1726 /* execute */
1727 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1728 return 0;
1729 }
1730
1731 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1732 {
1733 /* validate */
1734 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1735 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1736 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1737
1738 /* execute */
1739 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1740 return 0;
1741 }
1742
1743 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1744 {
1745 /* validate */
1746 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1747 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1748 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1749
1750 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1751 {
1752 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1753
1754 /* execute */
1755 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1756 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1757 else
1758 pReq->Hdr.rc = VERR_WRONG_ORDER;
1759 }
1760 else
1761 {
1762 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1763 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1764 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1765 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1766 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1767
1768 /* execute */
1769 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1770 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1771 else
1772 pReq->Hdr.rc = VERR_WRONG_ORDER;
1773 }
1774
1775 if ( RT_FAILURE(pReq->Hdr.rc)
1776 && pReq->Hdr.rc != VERR_INTERRUPTED
1777 && pReq->Hdr.rc != VERR_TIMEOUT)
1778 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1779 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1780 else
1781 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1782 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1783 return 0;
1784 }
1785
1786 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1787 {
1788 /* validate */
1789 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1790 PSUPVMMR0REQHDR pVMMReq;
1791 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1792 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1793
1794 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1795 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1796 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1797 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1798 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1799
1800 /* execute */
1801 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1802 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1803 else
1804 pReq->Hdr.rc = VERR_WRONG_ORDER;
1805
1806 if ( RT_FAILURE(pReq->Hdr.rc)
1807 && pReq->Hdr.rc != VERR_INTERRUPTED
1808 && pReq->Hdr.rc != VERR_TIMEOUT)
1809 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1810 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1811 else
1812 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1813 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1814 return 0;
1815 }
1816
1817 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1818 {
1819 /* validate */
1820 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1821 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1822
1823 /* execute */
1824 pReq->Hdr.rc = VINF_SUCCESS;
1825 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1826 return 0;
1827 }
1828
1829 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1830 {
1831 /* validate */
1832 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1833 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1834 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1835
1836 /* execute */
1837 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1838 if (RT_FAILURE(pReq->Hdr.rc))
1839 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1840 return 0;
1841 }
1842
1843 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1844 {
1845 /* validate */
1846 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1847 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1848
1849 /* execute */
1850 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1851 return 0;
1852 }
1853
1854 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1855 {
1856 /* validate */
1857 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1858 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1859
1860 /* execute */
1861 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1862 if (RT_SUCCESS(pReq->Hdr.rc))
1863 pReq->u.Out.pGipR0 = pDevExt->pGip;
1864 return 0;
1865 }
1866
1867 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1868 {
1869 /* validate */
1870 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1871 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1872
1873 /* execute */
1874 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1875 return 0;
1876 }
1877
1878 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1879 {
1880 /* validate */
1881 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1882 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1883 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1884 || ( VALID_PTR(pReq->u.In.pVMR0)
1885 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1886 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1887 /* execute */
1888 pSession->pVM = pReq->u.In.pVMR0;
1889 pReq->Hdr.rc = VINF_SUCCESS;
1890 return 0;
1891 }
1892
1893 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1894 {
1895 /* validate */
1896 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1897 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1898 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1899 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1900 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1901 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1902 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1903 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1904 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1905
1906 /* execute */
1907 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1908 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1909 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1910 &pReq->u.Out.aPages[0]);
1911 if (RT_FAILURE(pReq->Hdr.rc))
1912 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1913 return 0;
1914 }
1915
1916 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1917 {
1918 /* validate */
1919 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1920 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1921 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1922 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1923 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1924 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1925
1926 /* execute */
1927 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1928 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1929 if (RT_FAILURE(pReq->Hdr.rc))
1930 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1931 return 0;
1932 }
1933
1934 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1935 {
1936 /* validate */
1937 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1938 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1939 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1940 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1941 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1942 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1943 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1944
1945 /* execute */
1946 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1947 return 0;
1948 }
1949
1950 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1951 {
1952 /* validate */
1953 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1954 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1955
1956 /* execute */
1957 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1958 return 0;
1959 }
1960
1961 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1962 {
1963 /* validate */
1964 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1965 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1966 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1967
1968 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1969 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1970 else
1971 {
1972 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1973 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1974 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1975 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1976 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1977 }
1978 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1979
1980 /* execute */
1981 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1982 return 0;
1983 }
1984
1985 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1986 {
1987 /* validate */
1988 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1989 size_t cbStrTab;
1990 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1991 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
1992 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
1993 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
1994 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
1995 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
1996 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
1997 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
1998 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
1999 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
2000 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
2001
2002 /* execute */
2003 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
2004 return 0;
2005 }
2006
2007 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
2008 {
2009 /* validate */
2010 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
2011 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
2012 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
2013
2014 /* execute */
2015 switch (pReq->u.In.uType)
2016 {
2017 case SUP_SEM_TYPE_EVENT:
2018 {
2019 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2020 switch (pReq->u.In.uOp)
2021 {
2022 case SUPSEMOP2_WAIT_MS_REL:
2023 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2024 break;
2025 case SUPSEMOP2_WAIT_NS_ABS:
2026 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2027 break;
2028 case SUPSEMOP2_WAIT_NS_REL:
2029 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2030 break;
2031 case SUPSEMOP2_SIGNAL:
2032 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2033 break;
2034 case SUPSEMOP2_CLOSE:
2035 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2036 break;
2037 case SUPSEMOP2_RESET:
2038 default:
2039 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2040 break;
2041 }
2042 break;
2043 }
2044
2045 case SUP_SEM_TYPE_EVENT_MULTI:
2046 {
2047 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2048 switch (pReq->u.In.uOp)
2049 {
2050 case SUPSEMOP2_WAIT_MS_REL:
2051 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2052 break;
2053 case SUPSEMOP2_WAIT_NS_ABS:
2054 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2055 break;
2056 case SUPSEMOP2_WAIT_NS_REL:
2057 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2058 break;
2059 case SUPSEMOP2_SIGNAL:
2060 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2061 break;
2062 case SUPSEMOP2_CLOSE:
2063 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2064 break;
2065 case SUPSEMOP2_RESET:
2066 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2067 break;
2068 default:
2069 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2070 break;
2071 }
2072 break;
2073 }
2074
2075 default:
2076 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2077 break;
2078 }
2079 return 0;
2080 }
2081
2082 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2083 {
2084 /* validate */
2085 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2086 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2087 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2088
2089 /* execute */
2090 switch (pReq->u.In.uType)
2091 {
2092 case SUP_SEM_TYPE_EVENT:
2093 {
2094 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2095 switch (pReq->u.In.uOp)
2096 {
2097 case SUPSEMOP3_CREATE:
2098 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2099 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2100 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2101 break;
2102 case SUPSEMOP3_GET_RESOLUTION:
2103 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2104 pReq->Hdr.rc = VINF_SUCCESS;
2105 pReq->Hdr.cbOut = sizeof(*pReq);
2106 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2107 break;
2108 default:
2109 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2110 break;
2111 }
2112 break;
2113 }
2114
2115 case SUP_SEM_TYPE_EVENT_MULTI:
2116 {
2117 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2118 switch (pReq->u.In.uOp)
2119 {
2120 case SUPSEMOP3_CREATE:
2121 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2122 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2123 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2124 break;
2125 case SUPSEMOP3_GET_RESOLUTION:
2126 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2127 pReq->Hdr.rc = VINF_SUCCESS;
2128 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2129 break;
2130 default:
2131 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2132 break;
2133 }
2134 break;
2135 }
2136
2137 default:
2138 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2139 break;
2140 }
2141 return 0;
2142 }
2143
2144 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2145 {
2146 /* validate */
2147 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2148 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2149
2150 /* execute */
2151 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2152 if (RT_FAILURE(pReq->Hdr.rc))
2153 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2154 return 0;
2155 }
2156
2157 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2158 {
2159 /* validate */
2160 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2161 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2162
2163 /* execute */
2164 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2165 return 0;
2166 }
2167
2168 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2169 {
2170 /* validate */
2171 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2172
2173 /* execute */
2174 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2175 return 0;
2176 }
2177
2178 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2179 {
2180 /* validate */
2181 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2182 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2183
2184 /* execute */
2185 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2186 return 0;
2187 }
2188
2189 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2190 {
2191 /* validate */
2192 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2193 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2194 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2195 return VERR_INVALID_PARAMETER;
2196
2197 /* execute */
2198 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2199 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2200 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2201 pReq->u.In.szName, pReq->u.In.fFlags);
2202 return 0;
2203 }
2204
2205 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2206 {
2207 /* validate */
2208 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2209 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2210
2211 /* execute */
2212 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2213 return 0;
2214 }
2215
2216 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2217 {
2218 /* validate */
2219 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2220 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2221
2222 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2223 pReqHdr->rc = VINF_SUCCESS;
2224 return 0;
2225 }
2226
2227 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2228 {
2229 /* validate */
2230 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2231 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2232 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2233 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2234
2235 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2236 return 0;
2237 }
2238
2239 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2240 {
2241 /* validate */
2242 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2243
2244 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2245 return 0;
2246 }
2247
2248 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2249 {
2250 /* validate */
2251 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2252 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2253
2254 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pReq);
2255 return 0;
2256 }
2257
2258 default:
2259 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2260 break;
2261 }
2262 return VERR_GENERAL_FAILURE;
2263}
2264
2265
2266/**
2267 * I/O Control inner worker for the restricted operations.
2268 *
2269 * @returns IPRT status code.
2270 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2271 *
2272 * @param uIOCtl Function number.
2273 * @param pDevExt Device extention.
2274 * @param pSession Session data.
2275 * @param pReqHdr The request header.
2276 */
2277static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2278{
2279 /*
2280 * The switch.
2281 */
2282 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2283 {
2284 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2285 {
2286 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2287 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2288 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2289 {
2290 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2291 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2292 return 0;
2293 }
2294
2295 /*
2296 * Match the version.
2297 * The current logic is very simple, match the major interface version.
2298 */
2299 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2300 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2301 {
2302 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2303 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2304 pReq->u.Out.u32Cookie = 0xffffffff;
2305 pReq->u.Out.u32SessionCookie = 0xffffffff;
2306 pReq->u.Out.u32SessionVersion = 0xffffffff;
2307 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2308 pReq->u.Out.pSession = NULL;
2309 pReq->u.Out.cFunctions = 0;
2310 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2311 return 0;
2312 }
2313
2314 /*
2315 * Fill in return data and be gone.
2316 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2317 * u32SessionVersion <= u32ReqVersion!
2318 */
2319 /** @todo Somehow validate the client and negotiate a secure cookie... */
2320 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2321 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2322 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2323 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2324 pReq->u.Out.pSession = pSession;
2325 pReq->u.Out.cFunctions = 0;
2326 pReq->Hdr.rc = VINF_SUCCESS;
2327 return 0;
2328 }
2329
2330 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2331 {
2332 /* validate */
2333 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2334 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2335
2336 /* execute */
2337 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2338 if (RT_FAILURE(pReq->Hdr.rc))
2339 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2340 return 0;
2341 }
2342
2343 default:
2344 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2345 break;
2346 }
2347 return VERR_GENERAL_FAILURE;
2348}
2349
2350
2351/**
2352 * I/O Control worker.
2353 *
2354 * @returns IPRT status code.
2355 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2356 *
2357 * @param uIOCtl Function number.
2358 * @param pDevExt Device extention.
2359 * @param pSession Session data.
2360 * @param pReqHdr The request header.
2361 */
2362int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2363{
2364 int rc;
2365 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2366
2367 /*
2368 * Validate the request.
2369 */
2370 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2371 {
2372 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2373 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2374 return VERR_INVALID_PARAMETER;
2375 }
2376 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2377 || pReqHdr->cbIn < sizeof(*pReqHdr)
2378 || pReqHdr->cbIn > cbReq
2379 || pReqHdr->cbOut < sizeof(*pReqHdr)
2380 || pReqHdr->cbOut > cbReq))
2381 {
2382 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2383 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2384 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2385 return VERR_INVALID_PARAMETER;
2386 }
2387 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2388 {
2389 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2390 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2391 return VERR_INVALID_PARAMETER;
2392 }
2393 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2394 {
2395 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2396 {
2397 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2398 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2399 return VERR_INVALID_PARAMETER;
2400 }
2401 }
2402 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2403 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2404 {
2405 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2406 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2407 return VERR_INVALID_PARAMETER;
2408 }
2409
2410 /*
2411 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2412 */
2413 if (pSession->fUnrestricted)
2414 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2415 else
2416 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2417
2418 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2419 return rc;
2420}
2421
2422
2423/**
2424 * Inter-Driver Communication (IDC) worker.
2425 *
2426 * @returns VBox status code.
2427 * @retval VINF_SUCCESS on success.
2428 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2429 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2430 *
2431 * @param uReq The request (function) code.
2432 * @param pDevExt Device extention.
2433 * @param pSession Session data.
2434 * @param pReqHdr The request header.
2435 */
2436int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2437{
2438 /*
2439 * The OS specific code has already validated the pSession
2440 * pointer, and the request size being greater or equal to
2441 * size of the header.
2442 *
2443 * So, just check that pSession is a kernel context session.
2444 */
2445 if (RT_UNLIKELY( pSession
2446 && pSession->R0Process != NIL_RTR0PROCESS))
2447 return VERR_INVALID_PARAMETER;
2448
2449/*
2450 * Validation macro.
2451 */
2452#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2453 do { \
2454 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2455 { \
2456 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2457 (long)pReqHdr->cb, (long)(cbExpect))); \
2458 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2459 } \
2460 } while (0)
2461
2462 switch (uReq)
2463 {
2464 case SUPDRV_IDC_REQ_CONNECT:
2465 {
2466 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2467 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2468
2469 /*
2470 * Validate the cookie and other input.
2471 */
2472 if (pReq->Hdr.pSession != NULL)
2473 {
2474 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2475 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2476 }
2477 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2478 {
2479 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2480 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2481 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2482 }
2483 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2484 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2485 {
2486 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2487 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2488 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2489 }
2490 if (pSession != NULL)
2491 {
2492 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2493 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2494 }
2495
2496 /*
2497 * Match the version.
2498 * The current logic is very simple, match the major interface version.
2499 */
2500 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2501 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2502 {
2503 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2504 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2505 pReq->u.Out.pSession = NULL;
2506 pReq->u.Out.uSessionVersion = 0xffffffff;
2507 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2508 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2509 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2510 return VINF_SUCCESS;
2511 }
2512
2513 pReq->u.Out.pSession = NULL;
2514 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2515 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2516 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2517
2518 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2519 if (RT_FAILURE(pReq->Hdr.rc))
2520 {
2521 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2522 return VINF_SUCCESS;
2523 }
2524
2525 pReq->u.Out.pSession = pSession;
2526 pReq->Hdr.pSession = pSession;
2527
2528 return VINF_SUCCESS;
2529 }
2530
2531 case SUPDRV_IDC_REQ_DISCONNECT:
2532 {
2533 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2534
2535 supdrvSessionRelease(pSession);
2536 return pReqHdr->rc = VINF_SUCCESS;
2537 }
2538
2539 case SUPDRV_IDC_REQ_GET_SYMBOL:
2540 {
2541 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2542 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2543
2544 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2545 return VINF_SUCCESS;
2546 }
2547
2548 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2549 {
2550 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2551 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2552
2553 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2554 return VINF_SUCCESS;
2555 }
2556
2557 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2558 {
2559 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2560 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2561
2562 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2563 return VINF_SUCCESS;
2564 }
2565
2566 default:
2567 Log(("Unknown IDC %#lx\n", (long)uReq));
2568 break;
2569 }
2570
2571#undef REQ_CHECK_IDC_SIZE
2572 return VERR_NOT_SUPPORTED;
2573}
2574
2575
2576/**
2577 * Register a object for reference counting.
2578 * The object is registered with one reference in the specified session.
2579 *
2580 * @returns Unique identifier on success (pointer).
2581 * All future reference must use this identifier.
2582 * @returns NULL on failure.
2583 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2584 * @param pvUser1 The first user argument.
2585 * @param pvUser2 The second user argument.
2586 */
2587SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2588{
2589 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2590 PSUPDRVOBJ pObj;
2591 PSUPDRVUSAGE pUsage;
2592
2593 /*
2594 * Validate the input.
2595 */
2596 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2597 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2598 AssertPtrReturn(pfnDestructor, NULL);
2599
2600 /*
2601 * Allocate and initialize the object.
2602 */
2603 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2604 if (!pObj)
2605 return NULL;
2606 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2607 pObj->enmType = enmType;
2608 pObj->pNext = NULL;
2609 pObj->cUsage = 1;
2610 pObj->pfnDestructor = pfnDestructor;
2611 pObj->pvUser1 = pvUser1;
2612 pObj->pvUser2 = pvUser2;
2613 pObj->CreatorUid = pSession->Uid;
2614 pObj->CreatorGid = pSession->Gid;
2615 pObj->CreatorProcess= pSession->Process;
2616 supdrvOSObjInitCreator(pObj, pSession);
2617
2618 /*
2619 * Allocate the usage record.
2620 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2621 */
2622 RTSpinlockAcquire(pDevExt->Spinlock);
2623
2624 pUsage = pDevExt->pUsageFree;
2625 if (pUsage)
2626 pDevExt->pUsageFree = pUsage->pNext;
2627 else
2628 {
2629 RTSpinlockRelease(pDevExt->Spinlock);
2630 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2631 if (!pUsage)
2632 {
2633 RTMemFree(pObj);
2634 return NULL;
2635 }
2636 RTSpinlockAcquire(pDevExt->Spinlock);
2637 }
2638
2639 /*
2640 * Insert the object and create the session usage record.
2641 */
2642 /* The object. */
2643 pObj->pNext = pDevExt->pObjs;
2644 pDevExt->pObjs = pObj;
2645
2646 /* The session record. */
2647 pUsage->cUsage = 1;
2648 pUsage->pObj = pObj;
2649 pUsage->pNext = pSession->pUsage;
2650 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2651 pSession->pUsage = pUsage;
2652
2653 RTSpinlockRelease(pDevExt->Spinlock);
2654
2655 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2656 return pObj;
2657}
2658
2659
2660/**
2661 * Increment the reference counter for the object associating the reference
2662 * with the specified session.
2663 *
2664 * @returns IPRT status code.
2665 * @param pvObj The identifier returned by SUPR0ObjRegister().
2666 * @param pSession The session which is referencing the object.
2667 *
2668 * @remarks The caller should not own any spinlocks and must carefully protect
2669 * itself against potential race with the destructor so freed memory
2670 * isn't accessed here.
2671 */
2672SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2673{
2674 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2675}
2676
2677
2678/**
2679 * Increment the reference counter for the object associating the reference
2680 * with the specified session.
2681 *
2682 * @returns IPRT status code.
2683 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2684 * couldn't be allocated. (If you see this you're not doing the right
2685 * thing and it won't ever work reliably.)
2686 *
2687 * @param pvObj The identifier returned by SUPR0ObjRegister().
2688 * @param pSession The session which is referencing the object.
2689 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2690 * first reference to an object in a session with this
2691 * argument set.
2692 *
2693 * @remarks The caller should not own any spinlocks and must carefully protect
2694 * itself against potential race with the destructor so freed memory
2695 * isn't accessed here.
2696 */
2697SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2698{
2699 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2700 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2701 int rc = VINF_SUCCESS;
2702 PSUPDRVUSAGE pUsagePre;
2703 PSUPDRVUSAGE pUsage;
2704
2705 /*
2706 * Validate the input.
2707 * Be ready for the destruction race (someone might be stuck in the
2708 * destructor waiting a lock we own).
2709 */
2710 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2711 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2712 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2713 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2714 VERR_INVALID_PARAMETER);
2715
2716 RTSpinlockAcquire(pDevExt->Spinlock);
2717
2718 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2719 {
2720 RTSpinlockRelease(pDevExt->Spinlock);
2721
2722 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2723 return VERR_WRONG_ORDER;
2724 }
2725
2726 /*
2727 * Preallocate the usage record if we can.
2728 */
2729 pUsagePre = pDevExt->pUsageFree;
2730 if (pUsagePre)
2731 pDevExt->pUsageFree = pUsagePre->pNext;
2732 else if (!fNoBlocking)
2733 {
2734 RTSpinlockRelease(pDevExt->Spinlock);
2735 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2736 if (!pUsagePre)
2737 return VERR_NO_MEMORY;
2738
2739 RTSpinlockAcquire(pDevExt->Spinlock);
2740 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2741 {
2742 RTSpinlockRelease(pDevExt->Spinlock);
2743
2744 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2745 return VERR_WRONG_ORDER;
2746 }
2747 }
2748
2749 /*
2750 * Reference the object.
2751 */
2752 pObj->cUsage++;
2753
2754 /*
2755 * Look for the session record.
2756 */
2757 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2758 {
2759 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2760 if (pUsage->pObj == pObj)
2761 break;
2762 }
2763 if (pUsage)
2764 pUsage->cUsage++;
2765 else if (pUsagePre)
2766 {
2767 /* create a new session record. */
2768 pUsagePre->cUsage = 1;
2769 pUsagePre->pObj = pObj;
2770 pUsagePre->pNext = pSession->pUsage;
2771 pSession->pUsage = pUsagePre;
2772 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2773
2774 pUsagePre = NULL;
2775 }
2776 else
2777 {
2778 pObj->cUsage--;
2779 rc = VERR_TRY_AGAIN;
2780 }
2781
2782 /*
2783 * Put any unused usage record into the free list..
2784 */
2785 if (pUsagePre)
2786 {
2787 pUsagePre->pNext = pDevExt->pUsageFree;
2788 pDevExt->pUsageFree = pUsagePre;
2789 }
2790
2791 RTSpinlockRelease(pDevExt->Spinlock);
2792
2793 return rc;
2794}
2795
2796
2797/**
2798 * Decrement / destroy a reference counter record for an object.
2799 *
2800 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2801 *
2802 * @returns IPRT status code.
2803 * @retval VINF_SUCCESS if not destroyed.
2804 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2805 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2806 * string builds.
2807 *
2808 * @param pvObj The identifier returned by SUPR0ObjRegister().
2809 * @param pSession The session which is referencing the object.
2810 */
2811SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2812{
2813 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2814 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2815 int rc = VERR_INVALID_PARAMETER;
2816 PSUPDRVUSAGE pUsage;
2817 PSUPDRVUSAGE pUsagePrev;
2818
2819 /*
2820 * Validate the input.
2821 */
2822 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2823 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2824 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2825 VERR_INVALID_PARAMETER);
2826
2827 /*
2828 * Acquire the spinlock and look for the usage record.
2829 */
2830 RTSpinlockAcquire(pDevExt->Spinlock);
2831
2832 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2833 pUsage;
2834 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2835 {
2836 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2837 if (pUsage->pObj == pObj)
2838 {
2839 rc = VINF_SUCCESS;
2840 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2841 if (pUsage->cUsage > 1)
2842 {
2843 pObj->cUsage--;
2844 pUsage->cUsage--;
2845 }
2846 else
2847 {
2848 /*
2849 * Free the session record.
2850 */
2851 if (pUsagePrev)
2852 pUsagePrev->pNext = pUsage->pNext;
2853 else
2854 pSession->pUsage = pUsage->pNext;
2855 pUsage->pNext = pDevExt->pUsageFree;
2856 pDevExt->pUsageFree = pUsage;
2857
2858 /* What about the object? */
2859 if (pObj->cUsage > 1)
2860 pObj->cUsage--;
2861 else
2862 {
2863 /*
2864 * Object is to be destroyed, unlink it.
2865 */
2866 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2867 rc = VINF_OBJECT_DESTROYED;
2868 if (pDevExt->pObjs == pObj)
2869 pDevExt->pObjs = pObj->pNext;
2870 else
2871 {
2872 PSUPDRVOBJ pObjPrev;
2873 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2874 if (pObjPrev->pNext == pObj)
2875 {
2876 pObjPrev->pNext = pObj->pNext;
2877 break;
2878 }
2879 Assert(pObjPrev);
2880 }
2881 }
2882 }
2883 break;
2884 }
2885 }
2886
2887 RTSpinlockRelease(pDevExt->Spinlock);
2888
2889 /*
2890 * Call the destructor and free the object if required.
2891 */
2892 if (rc == VINF_OBJECT_DESTROYED)
2893 {
2894 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2895 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2896 if (pObj->pfnDestructor)
2897 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2898 RTMemFree(pObj);
2899 }
2900
2901 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2902 return rc;
2903}
2904
2905
2906/**
2907 * Verifies that the current process can access the specified object.
2908 *
2909 * @returns The following IPRT status code:
2910 * @retval VINF_SUCCESS if access was granted.
2911 * @retval VERR_PERMISSION_DENIED if denied access.
2912 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2913 *
2914 * @param pvObj The identifier returned by SUPR0ObjRegister().
2915 * @param pSession The session which wishes to access the object.
2916 * @param pszObjName Object string name. This is optional and depends on the object type.
2917 *
2918 * @remark The caller is responsible for making sure the object isn't removed while
2919 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2920 */
2921SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2922{
2923 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2924 int rc;
2925
2926 /*
2927 * Validate the input.
2928 */
2929 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2930 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2931 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2932 VERR_INVALID_PARAMETER);
2933
2934 /*
2935 * Check access. (returns true if a decision has been made.)
2936 */
2937 rc = VERR_INTERNAL_ERROR;
2938 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2939 return rc;
2940
2941 /*
2942 * Default policy is to allow the user to access his own
2943 * stuff but nothing else.
2944 */
2945 if (pObj->CreatorUid == pSession->Uid)
2946 return VINF_SUCCESS;
2947 return VERR_PERMISSION_DENIED;
2948}
2949
2950
2951/**
2952 * Lock pages.
2953 *
2954 * @returns IPRT status code.
2955 * @param pSession Session to which the locked memory should be associated.
2956 * @param pvR3 Start of the memory range to lock.
2957 * This must be page aligned.
2958 * @param cPages Number of pages to lock.
2959 * @param paPages Where to put the physical addresses of locked memory.
2960 */
2961SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2962{
2963 int rc;
2964 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2965 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2966 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2967
2968 /*
2969 * Verify input.
2970 */
2971 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2972 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2973 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2974 || !pvR3)
2975 {
2976 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2977 return VERR_INVALID_PARAMETER;
2978 }
2979
2980 /*
2981 * Let IPRT do the job.
2982 */
2983 Mem.eType = MEMREF_TYPE_LOCKED;
2984 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
2985 if (RT_SUCCESS(rc))
2986 {
2987 uint32_t iPage = cPages;
2988 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
2989 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
2990
2991 while (iPage-- > 0)
2992 {
2993 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
2994 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
2995 {
2996 AssertMsgFailed(("iPage=%d\n", iPage));
2997 rc = VERR_INTERNAL_ERROR;
2998 break;
2999 }
3000 }
3001 if (RT_SUCCESS(rc))
3002 rc = supdrvMemAdd(&Mem, pSession);
3003 if (RT_FAILURE(rc))
3004 {
3005 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3006 AssertRC(rc2);
3007 }
3008 }
3009
3010 return rc;
3011}
3012
3013
3014/**
3015 * Unlocks the memory pointed to by pv.
3016 *
3017 * @returns IPRT status code.
3018 * @param pSession Session to which the memory was locked.
3019 * @param pvR3 Memory to unlock.
3020 */
3021SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3022{
3023 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3024 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3025 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3026}
3027
3028
3029/**
3030 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3031 * backing.
3032 *
3033 * @returns IPRT status code.
3034 * @param pSession Session data.
3035 * @param cPages Number of pages to allocate.
3036 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3037 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3038 * @param pHCPhys Where to put the physical address of allocated memory.
3039 */
3040SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3041{
3042 int rc;
3043 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3044 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3045
3046 /*
3047 * Validate input.
3048 */
3049 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3050 if (!ppvR3 || !ppvR0 || !pHCPhys)
3051 {
3052 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3053 pSession, ppvR0, ppvR3, pHCPhys));
3054 return VERR_INVALID_PARAMETER;
3055
3056 }
3057 if (cPages < 1 || cPages >= 256)
3058 {
3059 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3060 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3061 }
3062
3063 /*
3064 * Let IPRT do the job.
3065 */
3066 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3067 if (RT_SUCCESS(rc))
3068 {
3069 int rc2;
3070 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3071 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3072 if (RT_SUCCESS(rc))
3073 {
3074 Mem.eType = MEMREF_TYPE_CONT;
3075 rc = supdrvMemAdd(&Mem, pSession);
3076 if (!rc)
3077 {
3078 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3079 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3080 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3081 return 0;
3082 }
3083
3084 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3085 AssertRC(rc2);
3086 }
3087 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3088 AssertRC(rc2);
3089 }
3090
3091 return rc;
3092}
3093
3094
3095/**
3096 * Frees memory allocated using SUPR0ContAlloc().
3097 *
3098 * @returns IPRT status code.
3099 * @param pSession The session to which the memory was allocated.
3100 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3101 */
3102SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3103{
3104 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3105 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3106 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3107}
3108
3109
3110/**
3111 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3112 *
3113 * The memory isn't zeroed.
3114 *
3115 * @returns IPRT status code.
3116 * @param pSession Session data.
3117 * @param cPages Number of pages to allocate.
3118 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3119 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3120 * @param paPages Where to put the physical addresses of allocated memory.
3121 */
3122SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3123{
3124 unsigned iPage;
3125 int rc;
3126 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3127 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3128
3129 /*
3130 * Validate input.
3131 */
3132 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3133 if (!ppvR3 || !ppvR0 || !paPages)
3134 {
3135 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3136 pSession, ppvR3, ppvR0, paPages));
3137 return VERR_INVALID_PARAMETER;
3138
3139 }
3140 if (cPages < 1 || cPages >= 256)
3141 {
3142 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3143 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3144 }
3145
3146 /*
3147 * Let IPRT do the work.
3148 */
3149 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3150 if (RT_SUCCESS(rc))
3151 {
3152 int rc2;
3153 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3154 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3155 if (RT_SUCCESS(rc))
3156 {
3157 Mem.eType = MEMREF_TYPE_LOW;
3158 rc = supdrvMemAdd(&Mem, pSession);
3159 if (!rc)
3160 {
3161 for (iPage = 0; iPage < cPages; iPage++)
3162 {
3163 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3164 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3165 }
3166 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3167 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3168 return 0;
3169 }
3170
3171 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3172 AssertRC(rc2);
3173 }
3174
3175 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3176 AssertRC(rc2);
3177 }
3178
3179 return rc;
3180}
3181
3182
3183/**
3184 * Frees memory allocated using SUPR0LowAlloc().
3185 *
3186 * @returns IPRT status code.
3187 * @param pSession The session to which the memory was allocated.
3188 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3189 */
3190SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3191{
3192 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3193 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3194 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3195}
3196
3197
3198
3199/**
3200 * Allocates a chunk of memory with both R0 and R3 mappings.
3201 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3202 *
3203 * @returns IPRT status code.
3204 * @param pSession The session to associated the allocation with.
3205 * @param cb Number of bytes to allocate.
3206 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3207 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3208 */
3209SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3210{
3211 int rc;
3212 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3213 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3214
3215 /*
3216 * Validate input.
3217 */
3218 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3219 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3220 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3221 if (cb < 1 || cb >= _4M)
3222 {
3223 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3224 return VERR_INVALID_PARAMETER;
3225 }
3226
3227 /*
3228 * Let IPRT do the work.
3229 */
3230 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3231 if (RT_SUCCESS(rc))
3232 {
3233 int rc2;
3234 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3235 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3236 if (RT_SUCCESS(rc))
3237 {
3238 Mem.eType = MEMREF_TYPE_MEM;
3239 rc = supdrvMemAdd(&Mem, pSession);
3240 if (!rc)
3241 {
3242 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3243 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3244 return VINF_SUCCESS;
3245 }
3246
3247 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3248 AssertRC(rc2);
3249 }
3250
3251 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3252 AssertRC(rc2);
3253 }
3254
3255 return rc;
3256}
3257
3258
3259/**
3260 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3261 *
3262 * @returns IPRT status code.
3263 * @param pSession The session to which the memory was allocated.
3264 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3265 * @param paPages Where to store the physical addresses.
3266 */
3267SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3268{
3269 PSUPDRVBUNDLE pBundle;
3270 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3271
3272 /*
3273 * Validate input.
3274 */
3275 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3276 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3277 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3278
3279 /*
3280 * Search for the address.
3281 */
3282 RTSpinlockAcquire(pSession->Spinlock);
3283 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3284 {
3285 if (pBundle->cUsed > 0)
3286 {
3287 unsigned i;
3288 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3289 {
3290 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3291 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3292 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3293 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3294 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3295 )
3296 )
3297 {
3298 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3299 size_t iPage;
3300 for (iPage = 0; iPage < cPages; iPage++)
3301 {
3302 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3303 paPages[iPage].uReserved = 0;
3304 }
3305 RTSpinlockRelease(pSession->Spinlock);
3306 return VINF_SUCCESS;
3307 }
3308 }
3309 }
3310 }
3311 RTSpinlockRelease(pSession->Spinlock);
3312 Log(("Failed to find %p!!!\n", (void *)uPtr));
3313 return VERR_INVALID_PARAMETER;
3314}
3315
3316
3317/**
3318 * Free memory allocated by SUPR0MemAlloc().
3319 *
3320 * @returns IPRT status code.
3321 * @param pSession The session owning the allocation.
3322 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3323 */
3324SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3325{
3326 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3327 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3328 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3329}
3330
3331
3332/**
3333 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3334 *
3335 * The memory is fixed and it's possible to query the physical addresses using
3336 * SUPR0MemGetPhys().
3337 *
3338 * @returns IPRT status code.
3339 * @param pSession The session to associated the allocation with.
3340 * @param cPages The number of pages to allocate.
3341 * @param fFlags Flags, reserved for the future. Must be zero.
3342 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3343 * NULL if no ring-3 mapping.
3344 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3345 * NULL if no ring-0 mapping.
3346 * @param paPages Where to store the addresses of the pages. Optional.
3347 */
3348SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3349{
3350 int rc;
3351 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3352 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3353
3354 /*
3355 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3356 */
3357 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3358 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3359 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3360 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3361 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3362 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3363 {
3364 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3365 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3366 }
3367
3368 /*
3369 * Let IPRT do the work.
3370 */
3371 if (ppvR0)
3372 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3373 else
3374 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3375 if (RT_SUCCESS(rc))
3376 {
3377 int rc2;
3378 if (ppvR3)
3379 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3380 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3381 else
3382 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3383 if (RT_SUCCESS(rc))
3384 {
3385 Mem.eType = MEMREF_TYPE_PAGE;
3386 rc = supdrvMemAdd(&Mem, pSession);
3387 if (!rc)
3388 {
3389 if (ppvR3)
3390 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3391 if (ppvR0)
3392 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3393 if (paPages)
3394 {
3395 uint32_t iPage = cPages;
3396 while (iPage-- > 0)
3397 {
3398 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3399 Assert(paPages[iPage] != NIL_RTHCPHYS);
3400 }
3401 }
3402 return VINF_SUCCESS;
3403 }
3404
3405 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3406 AssertRC(rc2);
3407 }
3408
3409 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3410 AssertRC(rc2);
3411 }
3412 return rc;
3413}
3414
3415
3416/**
3417 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3418 * space.
3419 *
3420 * @returns IPRT status code.
3421 * @param pSession The session to associated the allocation with.
3422 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3423 * @param offSub Where to start mapping. Must be page aligned.
3424 * @param cbSub How much to map. Must be page aligned.
3425 * @param fFlags Flags, MBZ.
3426 * @param ppvR0 Where to return the address of the ring-0 mapping on
3427 * success.
3428 */
3429SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3430 uint32_t fFlags, PRTR0PTR ppvR0)
3431{
3432 int rc;
3433 PSUPDRVBUNDLE pBundle;
3434 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3435 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3436
3437 /*
3438 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3439 */
3440 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3441 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3442 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3443 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3444 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3445 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3446
3447 /*
3448 * Find the memory object.
3449 */
3450 RTSpinlockAcquire(pSession->Spinlock);
3451 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3452 {
3453 if (pBundle->cUsed > 0)
3454 {
3455 unsigned i;
3456 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3457 {
3458 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3459 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3460 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3461 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3462 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3463 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3464 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3465 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3466 {
3467 hMemObj = pBundle->aMem[i].MemObj;
3468 break;
3469 }
3470 }
3471 }
3472 }
3473 RTSpinlockRelease(pSession->Spinlock);
3474
3475 rc = VERR_INVALID_PARAMETER;
3476 if (hMemObj != NIL_RTR0MEMOBJ)
3477 {
3478 /*
3479 * Do some further input validations before calling IPRT.
3480 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3481 */
3482 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3483 if ( offSub < cbMemObj
3484 && cbSub <= cbMemObj
3485 && offSub + cbSub <= cbMemObj)
3486 {
3487 RTR0MEMOBJ hMapObj;
3488 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3489 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3490 if (RT_SUCCESS(rc))
3491 *ppvR0 = RTR0MemObjAddress(hMapObj);
3492 }
3493 else
3494 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3495
3496 }
3497 return rc;
3498}
3499
3500
3501/**
3502 * Changes the page level protection of one or more pages previously allocated
3503 * by SUPR0PageAllocEx.
3504 *
3505 * @returns IPRT status code.
3506 * @param pSession The session to associated the allocation with.
3507 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3508 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3509 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3510 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3511 * @param offSub Where to start changing. Must be page aligned.
3512 * @param cbSub How much to change. Must be page aligned.
3513 * @param fProt The new page level protection, see RTMEM_PROT_*.
3514 */
3515SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3516{
3517 int rc;
3518 PSUPDRVBUNDLE pBundle;
3519 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3520 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3521 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3522
3523 /*
3524 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3525 */
3526 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3527 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3528 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3529 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3530 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3531
3532 /*
3533 * Find the memory object.
3534 */
3535 RTSpinlockAcquire(pSession->Spinlock);
3536 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3537 {
3538 if (pBundle->cUsed > 0)
3539 {
3540 unsigned i;
3541 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3542 {
3543 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3544 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3545 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3546 || pvR3 == NIL_RTR3PTR)
3547 && ( pvR0 == NIL_RTR0PTR
3548 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3549 && ( pvR3 == NIL_RTR3PTR
3550 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3551 {
3552 if (pvR0 != NIL_RTR0PTR)
3553 hMemObjR0 = pBundle->aMem[i].MemObj;
3554 if (pvR3 != NIL_RTR3PTR)
3555 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3556 break;
3557 }
3558 }
3559 }
3560 }
3561 RTSpinlockRelease(pSession->Spinlock);
3562
3563 rc = VERR_INVALID_PARAMETER;
3564 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3565 || hMemObjR3 != NIL_RTR0MEMOBJ)
3566 {
3567 /*
3568 * Do some further input validations before calling IPRT.
3569 */
3570 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3571 if ( offSub < cbMemObj
3572 && cbSub <= cbMemObj
3573 && offSub + cbSub <= cbMemObj)
3574 {
3575 rc = VINF_SUCCESS;
3576 if (hMemObjR3 != NIL_RTR0PTR)
3577 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3578 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3579 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3580 }
3581 else
3582 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3583
3584 }
3585 return rc;
3586
3587}
3588
3589
3590/**
3591 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3592 *
3593 * @returns IPRT status code.
3594 * @param pSession The session owning the allocation.
3595 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3596 * SUPR0PageAllocEx().
3597 */
3598SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3599{
3600 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3601 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3602 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3603}
3604
3605
3606/**
3607 * Gets the paging mode of the current CPU.
3608 *
3609 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3610 */
3611SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3612{
3613 SUPPAGINGMODE enmMode;
3614
3615 RTR0UINTREG cr0 = ASMGetCR0();
3616 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3617 enmMode = SUPPAGINGMODE_INVALID;
3618 else
3619 {
3620 RTR0UINTREG cr4 = ASMGetCR4();
3621 uint32_t fNXEPlusLMA = 0;
3622 if (cr4 & X86_CR4_PAE)
3623 {
3624 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3625 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3626 {
3627 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3628 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3629 fNXEPlusLMA |= RT_BIT(0);
3630 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3631 fNXEPlusLMA |= RT_BIT(1);
3632 }
3633 }
3634
3635 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3636 {
3637 case 0:
3638 enmMode = SUPPAGINGMODE_32_BIT;
3639 break;
3640
3641 case X86_CR4_PGE:
3642 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3643 break;
3644
3645 case X86_CR4_PAE:
3646 enmMode = SUPPAGINGMODE_PAE;
3647 break;
3648
3649 case X86_CR4_PAE | RT_BIT(0):
3650 enmMode = SUPPAGINGMODE_PAE_NX;
3651 break;
3652
3653 case X86_CR4_PAE | X86_CR4_PGE:
3654 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3655 break;
3656
3657 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3658 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3659 break;
3660
3661 case RT_BIT(1) | X86_CR4_PAE:
3662 enmMode = SUPPAGINGMODE_AMD64;
3663 break;
3664
3665 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3666 enmMode = SUPPAGINGMODE_AMD64_NX;
3667 break;
3668
3669 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3670 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3671 break;
3672
3673 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3674 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3675 break;
3676
3677 default:
3678 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3679 enmMode = SUPPAGINGMODE_INVALID;
3680 break;
3681 }
3682 }
3683 return enmMode;
3684}
3685
3686
3687/**
3688 * Enables or disabled hardware virtualization extensions using native OS APIs.
3689 *
3690 * @returns VBox status code.
3691 * @retval VINF_SUCCESS on success.
3692 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3693 *
3694 * @param fEnable Whether to enable or disable.
3695 */
3696SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3697{
3698#ifdef RT_OS_DARWIN
3699 return supdrvOSEnableVTx(fEnable);
3700#else
3701 return VERR_NOT_SUPPORTED;
3702#endif
3703}
3704
3705
3706/**
3707 * Suspends hardware virtualization extensions using the native OS API.
3708 *
3709 * This is called prior to entering raw-mode context.
3710 *
3711 * @returns @c true if suspended, @c false if not.
3712 */
3713SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3714{
3715#ifdef RT_OS_DARWIN
3716 return supdrvOSSuspendVTxOnCpu();
3717#else
3718 return false;
3719#endif
3720}
3721
3722
3723/**
3724 * Resumes hardware virtualization extensions using the native OS API.
3725 *
3726 * This is called after to entering raw-mode context.
3727 *
3728 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3729 */
3730SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3731{
3732#ifdef RT_OS_DARWIN
3733 supdrvOSResumeVTxOnCpu(fSuspended);
3734#else
3735 Assert(!fSuspended);
3736#endif
3737}
3738
3739
3740/**
3741 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3742 *
3743 * @returns VBox status code.
3744 * @retval VERR_VMX_NO_VMX
3745 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3746 * @retval VERR_VMX_MSR_VMXON_DISABLED
3747 * @retval VERR_VMX_MSR_LOCKING_FAILED
3748 * @retval VERR_SVM_NO_SVM
3749 * @retval VERR_SVM_DISABLED
3750 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3751 * (centaur) CPU.
3752 *
3753 * @param pSession The session handle.
3754 * @param pfCaps Where to store the capabilities.
3755 */
3756SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3757{
3758 int rc = VERR_UNSUPPORTED_CPU;
3759 bool fIsSmxModeAmbiguous = false;
3760 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3761
3762 /*
3763 * Input validation.
3764 */
3765 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3766 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3767
3768 *pfCaps = 0;
3769 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3770 RTThreadPreemptDisable(&PreemptState);
3771 if (ASMHasCpuId())
3772 {
3773 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3774 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3775
3776 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3777 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3778
3779 if ( ASMIsValidStdRange(uMaxId)
3780 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3781 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3782 )
3783 {
3784 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3785 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3786 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3787 )
3788 {
3789 /** @todo Unify code with hmR0InitIntelCpu(). */
3790 uint64_t u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3791 bool const fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3792 bool fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3793 bool fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3794 bool fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3795
3796 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3797 if (fMsrLocked)
3798 {
3799 if (fVmxAllowed && fSmxVmxAllowed)
3800 rc = VINF_SUCCESS;
3801 else if (!fVmxAllowed && !fSmxVmxAllowed)
3802 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3803 else if (!fMaybeSmxMode)
3804 {
3805 if (fVmxAllowed)
3806 rc = VINF_SUCCESS;
3807 else
3808 rc = VERR_VMX_MSR_VMXON_DISABLED;
3809 }
3810 else
3811 {
3812 /*
3813 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3814 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3815 * See @bugref{6873}.
3816 */
3817 Assert(fMaybeSmxMode == true);
3818 fIsSmxModeAmbiguous = true;
3819 rc = VINF_SUCCESS;
3820 }
3821 }
3822 else
3823 {
3824 /*
3825 * MSR is not yet locked; we can change it ourselves here.
3826 * Once the lock bit is set, this MSR can no longer be modified.
3827 *
3828 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3829 * accurately. See @bugref{6873}.
3830 */
3831 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3832 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3833 | MSR_IA32_FEATURE_CONTROL_VMXON;
3834 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3835
3836 /* Verify. */
3837 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3838 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3839 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3840 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3841 if (fSmxVmxAllowed && fVmxAllowed)
3842 rc = VINF_SUCCESS;
3843 else
3844 rc = VERR_VMX_MSR_LOCKING_FAILED;
3845 }
3846
3847 if (rc == VINF_SUCCESS)
3848 {
3849 VMXCAPABILITY vtCaps;
3850
3851 *pfCaps |= SUPVTCAPS_VT_X;
3852
3853 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3854 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3855 {
3856 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3857 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3858 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3859 }
3860 }
3861 }
3862 else
3863 rc = VERR_VMX_NO_VMX;
3864 }
3865 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3866 && ASMIsValidStdRange(uMaxId))
3867 {
3868 uint32_t fExtFeaturesEcx, uExtMaxId;
3869 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3870 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3871 if ( ASMIsValidExtRange(uExtMaxId)
3872 && uExtMaxId >= 0x8000000a
3873 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3874 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3875 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3876 )
3877 {
3878 /* Check if SVM is disabled */
3879 uint64_t u64FeatMsr = ASMRdMsr(MSR_K8_VM_CR);
3880 if (!(u64FeatMsr & MSR_K8_VM_CR_SVM_DISABLE))
3881 {
3882 uint32_t fSvmFeatures;
3883 *pfCaps |= SUPVTCAPS_AMD_V;
3884
3885 /* Query AMD-V features. */
3886 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3887 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3888 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3889
3890 rc = VINF_SUCCESS;
3891 }
3892 else
3893 rc = VERR_SVM_DISABLED;
3894 }
3895 else
3896 rc = VERR_SVM_NO_SVM;
3897 }
3898 }
3899
3900 RTThreadPreemptRestore(&PreemptState);
3901 if (fIsSmxModeAmbiguous)
3902 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3903 return rc;
3904}
3905
3906
3907/**
3908 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3909 * updating.
3910 *
3911 * @param pGipCpu The per CPU structure for this CPU.
3912 * @param u64NanoTS The current time.
3913 */
3914static void supdrvGipReInitCpu(PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3915{
3916 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
3917 pGipCpu->u64NanoTS = u64NanoTS;
3918}
3919
3920
3921/**
3922 * Set the current TSC and NanoTS value for the CPU.
3923 *
3924 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
3925 * @param pvUser1 Pointer to the ring-0 GIP mapping.
3926 * @param pvUser2 Pointer to the variable holding the current time.
3927 */
3928static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3929{
3930 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
3931 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
3932
3933 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
3934 supdrvGipReInitCpu(&pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
3935
3936 NOREF(pvUser2);
3937 NOREF(idCpu);
3938}
3939
3940
3941/**
3942 * Maps the GIP into userspace and/or get the physical address of the GIP.
3943 *
3944 * @returns IPRT status code.
3945 * @param pSession Session to which the GIP mapping should belong.
3946 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
3947 * @param pHCPhysGip Where to store the physical address. (optional)
3948 *
3949 * @remark There is no reference counting on the mapping, so one call to this function
3950 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
3951 * and remove the session as a GIP user.
3952 */
3953SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
3954{
3955 int rc;
3956 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
3957 RTR3PTR pGipR3 = NIL_RTR3PTR;
3958 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3959 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
3960
3961 /*
3962 * Validate
3963 */
3964 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3965 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
3966 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
3967
3968#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3969 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
3970#else
3971 RTSemFastMutexRequest(pDevExt->mtxGip);
3972#endif
3973 if (pDevExt->pGip)
3974 {
3975 /*
3976 * Map it?
3977 */
3978 rc = VINF_SUCCESS;
3979 if (ppGipR3)
3980 {
3981 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
3982 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
3983 RTMEM_PROT_READ, RTR0ProcHandleSelf());
3984 if (RT_SUCCESS(rc))
3985 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
3986 }
3987
3988 /*
3989 * Get physical address.
3990 */
3991 if (pHCPhysGip && RT_SUCCESS(rc))
3992 HCPhys = pDevExt->HCPhysGip;
3993
3994 /*
3995 * Reference globally.
3996 */
3997 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
3998 {
3999 pSession->fGipReferenced = 1;
4000 pDevExt->cGipUsers++;
4001 if (pDevExt->cGipUsers == 1)
4002 {
4003 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4004 uint64_t u64NanoTS;
4005 uint32_t u32SystemResolution;
4006 unsigned i;
4007
4008 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4009
4010 /*
4011 * Try bump up the system timer resolution.
4012 * The more interrupts the better...
4013 */
4014 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
4015 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
4016 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
4017 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
4018 )
4019 {
4020 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
4021 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
4022 }
4023
4024 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4025 {
4026 for (i = 0; i < pGipR0->cCpus; i++)
4027 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4028 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4029 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4030 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4031 }
4032
4033 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4034 if ( pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4035 || RTMpGetOnlineCount() == 1)
4036 supdrvGipReInitCpu(&pGipR0->aCPUs[0], u64NanoTS);
4037 else
4038 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4039
4040#ifndef DO_NOT_START_GIP
4041 rc = RTTimerStart(pDevExt->pGipTimer, 0); AssertRC(rc);
4042#endif
4043 rc = VINF_SUCCESS;
4044 }
4045 }
4046 }
4047 else
4048 {
4049 rc = VERR_GENERAL_FAILURE;
4050 Log(("SUPR0GipMap: GIP is not available!\n"));
4051 }
4052#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4053 RTSemMutexRelease(pDevExt->mtxGip);
4054#else
4055 RTSemFastMutexRelease(pDevExt->mtxGip);
4056#endif
4057
4058 /*
4059 * Write returns.
4060 */
4061 if (pHCPhysGip)
4062 *pHCPhysGip = HCPhys;
4063 if (ppGipR3)
4064 *ppGipR3 = pGipR3;
4065
4066#ifdef DEBUG_DARWIN_GIP
4067 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4068#else
4069 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4070#endif
4071 return rc;
4072}
4073
4074
4075/**
4076 * Unmaps any user mapping of the GIP and terminates all GIP access
4077 * from this session.
4078 *
4079 * @returns IPRT status code.
4080 * @param pSession Session to which the GIP mapping should belong.
4081 */
4082SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4083{
4084 int rc = VINF_SUCCESS;
4085 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4086#ifdef DEBUG_DARWIN_GIP
4087 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4088 pSession,
4089 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4090 pSession->GipMapObjR3));
4091#else
4092 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4093#endif
4094 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4095
4096#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4097 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4098#else
4099 RTSemFastMutexRequest(pDevExt->mtxGip);
4100#endif
4101
4102 /*
4103 * Unmap anything?
4104 */
4105 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4106 {
4107 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4108 AssertRC(rc);
4109 if (RT_SUCCESS(rc))
4110 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4111 }
4112
4113 /*
4114 * Dereference global GIP.
4115 */
4116 if (pSession->fGipReferenced && !rc)
4117 {
4118 pSession->fGipReferenced = 0;
4119 if ( pDevExt->cGipUsers > 0
4120 && !--pDevExt->cGipUsers)
4121 {
4122 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4123#ifndef DO_NOT_START_GIP
4124 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4125#endif
4126
4127 if (pDevExt->u32SystemTimerGranularityGrant)
4128 {
4129 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
4130 AssertRC(rc2);
4131 pDevExt->u32SystemTimerGranularityGrant = 0;
4132 }
4133 }
4134 }
4135
4136#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4137 RTSemMutexRelease(pDevExt->mtxGip);
4138#else
4139 RTSemFastMutexRelease(pDevExt->mtxGip);
4140#endif
4141
4142 return rc;
4143}
4144
4145
4146/**
4147 * Gets the GIP pointer.
4148 *
4149 * @returns Pointer to the GIP or NULL.
4150 */
4151SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4152{
4153 return g_pSUPGlobalInfoPage;
4154}
4155
4156
4157/**
4158 * Register a component factory with the support driver.
4159 *
4160 * This is currently restricted to kernel sessions only.
4161 *
4162 * @returns VBox status code.
4163 * @retval VINF_SUCCESS on success.
4164 * @retval VERR_NO_MEMORY if we're out of memory.
4165 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4166 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4167 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4168 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4169 *
4170 * @param pSession The SUPDRV session (must be a ring-0 session).
4171 * @param pFactory Pointer to the component factory registration structure.
4172 *
4173 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4174 */
4175SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4176{
4177 PSUPDRVFACTORYREG pNewReg;
4178 const char *psz;
4179 int rc;
4180
4181 /*
4182 * Validate parameters.
4183 */
4184 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4185 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4186 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4187 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4188 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4189 AssertReturn(psz, VERR_INVALID_PARAMETER);
4190
4191 /*
4192 * Allocate and initialize a new registration structure.
4193 */
4194 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4195 if (pNewReg)
4196 {
4197 pNewReg->pNext = NULL;
4198 pNewReg->pFactory = pFactory;
4199 pNewReg->pSession = pSession;
4200 pNewReg->cchName = psz - &pFactory->szName[0];
4201
4202 /*
4203 * Add it to the tail of the list after checking for prior registration.
4204 */
4205 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4206 if (RT_SUCCESS(rc))
4207 {
4208 PSUPDRVFACTORYREG pPrev = NULL;
4209 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4210 while (pCur && pCur->pFactory != pFactory)
4211 {
4212 pPrev = pCur;
4213 pCur = pCur->pNext;
4214 }
4215 if (!pCur)
4216 {
4217 if (pPrev)
4218 pPrev->pNext = pNewReg;
4219 else
4220 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4221 rc = VINF_SUCCESS;
4222 }
4223 else
4224 rc = VERR_ALREADY_EXISTS;
4225
4226 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4227 }
4228
4229 if (RT_FAILURE(rc))
4230 RTMemFree(pNewReg);
4231 }
4232 else
4233 rc = VERR_NO_MEMORY;
4234 return rc;
4235}
4236
4237
4238/**
4239 * Deregister a component factory.
4240 *
4241 * @returns VBox status code.
4242 * @retval VINF_SUCCESS on success.
4243 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4244 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4245 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4246 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4247 *
4248 * @param pSession The SUPDRV session (must be a ring-0 session).
4249 * @param pFactory Pointer to the component factory registration structure
4250 * previously passed SUPR0ComponentRegisterFactory().
4251 *
4252 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4253 */
4254SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4255{
4256 int rc;
4257
4258 /*
4259 * Validate parameters.
4260 */
4261 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4262 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4263 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4264
4265 /*
4266 * Take the lock and look for the registration record.
4267 */
4268 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4269 if (RT_SUCCESS(rc))
4270 {
4271 PSUPDRVFACTORYREG pPrev = NULL;
4272 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4273 while (pCur && pCur->pFactory != pFactory)
4274 {
4275 pPrev = pCur;
4276 pCur = pCur->pNext;
4277 }
4278 if (pCur)
4279 {
4280 if (!pPrev)
4281 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4282 else
4283 pPrev->pNext = pCur->pNext;
4284
4285 pCur->pNext = NULL;
4286 pCur->pFactory = NULL;
4287 pCur->pSession = NULL;
4288 rc = VINF_SUCCESS;
4289 }
4290 else
4291 rc = VERR_NOT_FOUND;
4292
4293 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4294
4295 RTMemFree(pCur);
4296 }
4297 return rc;
4298}
4299
4300
4301/**
4302 * Queries a component factory.
4303 *
4304 * @returns VBox status code.
4305 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4306 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4307 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4308 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4309 *
4310 * @param pSession The SUPDRV session.
4311 * @param pszName The name of the component factory.
4312 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4313 * @param ppvFactoryIf Where to store the factory interface.
4314 */
4315SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4316{
4317 const char *pszEnd;
4318 size_t cchName;
4319 int rc;
4320
4321 /*
4322 * Validate parameters.
4323 */
4324 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4325
4326 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4327 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4328 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4329 cchName = pszEnd - pszName;
4330
4331 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4332 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4333 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4334
4335 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4336 *ppvFactoryIf = NULL;
4337
4338 /*
4339 * Take the lock and try all factories by this name.
4340 */
4341 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4342 if (RT_SUCCESS(rc))
4343 {
4344 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4345 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4346 while (pCur)
4347 {
4348 if ( pCur->cchName == cchName
4349 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4350 {
4351 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4352 if (pvFactory)
4353 {
4354 *ppvFactoryIf = pvFactory;
4355 rc = VINF_SUCCESS;
4356 break;
4357 }
4358 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4359 }
4360
4361 /* next */
4362 pCur = pCur->pNext;
4363 }
4364
4365 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4366 }
4367 return rc;
4368}
4369
4370
4371/**
4372 * Adds a memory object to the session.
4373 *
4374 * @returns IPRT status code.
4375 * @param pMem Memory tracking structure containing the
4376 * information to track.
4377 * @param pSession The session.
4378 */
4379static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4380{
4381 PSUPDRVBUNDLE pBundle;
4382
4383 /*
4384 * Find free entry and record the allocation.
4385 */
4386 RTSpinlockAcquire(pSession->Spinlock);
4387 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4388 {
4389 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4390 {
4391 unsigned i;
4392 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4393 {
4394 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4395 {
4396 pBundle->cUsed++;
4397 pBundle->aMem[i] = *pMem;
4398 RTSpinlockRelease(pSession->Spinlock);
4399 return VINF_SUCCESS;
4400 }
4401 }
4402 AssertFailed(); /* !!this can't be happening!!! */
4403 }
4404 }
4405 RTSpinlockRelease(pSession->Spinlock);
4406
4407 /*
4408 * Need to allocate a new bundle.
4409 * Insert into the last entry in the bundle.
4410 */
4411 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4412 if (!pBundle)
4413 return VERR_NO_MEMORY;
4414
4415 /* take last entry. */
4416 pBundle->cUsed++;
4417 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4418
4419 /* insert into list. */
4420 RTSpinlockAcquire(pSession->Spinlock);
4421 pBundle->pNext = pSession->Bundle.pNext;
4422 pSession->Bundle.pNext = pBundle;
4423 RTSpinlockRelease(pSession->Spinlock);
4424
4425 return VINF_SUCCESS;
4426}
4427
4428
4429/**
4430 * Releases a memory object referenced by pointer and type.
4431 *
4432 * @returns IPRT status code.
4433 * @param pSession Session data.
4434 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4435 * @param eType Memory type.
4436 */
4437static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4438{
4439 PSUPDRVBUNDLE pBundle;
4440
4441 /*
4442 * Validate input.
4443 */
4444 if (!uPtr)
4445 {
4446 Log(("Illegal address %p\n", (void *)uPtr));
4447 return VERR_INVALID_PARAMETER;
4448 }
4449
4450 /*
4451 * Search for the address.
4452 */
4453 RTSpinlockAcquire(pSession->Spinlock);
4454 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4455 {
4456 if (pBundle->cUsed > 0)
4457 {
4458 unsigned i;
4459 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4460 {
4461 if ( pBundle->aMem[i].eType == eType
4462 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4463 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4464 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4465 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4466 )
4467 {
4468 /* Make a copy of it and release it outside the spinlock. */
4469 SUPDRVMEMREF Mem = pBundle->aMem[i];
4470 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4471 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4472 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4473 RTSpinlockRelease(pSession->Spinlock);
4474
4475 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4476 {
4477 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4478 AssertRC(rc); /** @todo figure out how to handle this. */
4479 }
4480 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4481 {
4482 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4483 AssertRC(rc); /** @todo figure out how to handle this. */
4484 }
4485 return VINF_SUCCESS;
4486 }
4487 }
4488 }
4489 }
4490 RTSpinlockRelease(pSession->Spinlock);
4491 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4492 return VERR_INVALID_PARAMETER;
4493}
4494
4495
4496/**
4497 * Opens an image. If it's the first time it's opened the call must upload
4498 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4499 *
4500 * This is the 1st step of the loading.
4501 *
4502 * @returns IPRT status code.
4503 * @param pDevExt Device globals.
4504 * @param pSession Session data.
4505 * @param pReq The open request.
4506 */
4507static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4508{
4509 int rc;
4510 PSUPDRVLDRIMAGE pImage;
4511 void *pv;
4512 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4513 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4514
4515 /*
4516 * Check if we got an instance of the image already.
4517 */
4518 supdrvLdrLock(pDevExt);
4519 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4520 {
4521 if ( pImage->szName[cchName] == '\0'
4522 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4523 {
4524 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4525 {
4526 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4527 pImage->cUsage++;
4528 pReq->u.Out.pvImageBase = pImage->pvImage;
4529 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4530 pReq->u.Out.fNativeLoader = pImage->fNative;
4531 supdrvLdrAddUsage(pSession, pImage);
4532 supdrvLdrUnlock(pDevExt);
4533 return VINF_SUCCESS;
4534 }
4535 supdrvLdrUnlock(pDevExt);
4536 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4537 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4538 }
4539 }
4540 /* (not found - add it!) */
4541
4542 /*
4543 * Allocate memory.
4544 */
4545 Assert(cchName < sizeof(pImage->szName));
4546 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4547 if (!pv)
4548 {
4549 supdrvLdrUnlock(pDevExt);
4550 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4551 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4552 }
4553
4554 /*
4555 * Setup and link in the LDR stuff.
4556 */
4557 pImage = (PSUPDRVLDRIMAGE)pv;
4558 pImage->pvImage = NULL;
4559 pImage->pvImageAlloc = NULL;
4560 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4561 pImage->cbImageBits = pReq->u.In.cbImageBits;
4562 pImage->cSymbols = 0;
4563 pImage->paSymbols = NULL;
4564 pImage->pachStrTab = NULL;
4565 pImage->cbStrTab = 0;
4566 pImage->pfnModuleInit = NULL;
4567 pImage->pfnModuleTerm = NULL;
4568 pImage->pfnServiceReqHandler = NULL;
4569 pImage->uState = SUP_IOCTL_LDR_OPEN;
4570 pImage->cUsage = 1;
4571 pImage->pDevExt = pDevExt;
4572 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4573
4574 /*
4575 * Try load it using the native loader, if that isn't supported, fall back
4576 * on the older method.
4577 */
4578 pImage->fNative = true;
4579 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4580 if (rc == VERR_NOT_SUPPORTED)
4581 {
4582 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4583 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4584 pImage->fNative = false;
4585 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4586 }
4587 if (RT_FAILURE(rc))
4588 {
4589 supdrvLdrUnlock(pDevExt);
4590 RTMemFree(pImage);
4591 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4592 return rc;
4593 }
4594 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4595
4596 /*
4597 * Link it.
4598 */
4599 pImage->pNext = pDevExt->pLdrImages;
4600 pDevExt->pLdrImages = pImage;
4601
4602 supdrvLdrAddUsage(pSession, pImage);
4603
4604 pReq->u.Out.pvImageBase = pImage->pvImage;
4605 pReq->u.Out.fNeedsLoading = true;
4606 pReq->u.Out.fNativeLoader = pImage->fNative;
4607 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4608
4609 supdrvLdrUnlock(pDevExt);
4610 return VINF_SUCCESS;
4611}
4612
4613
4614/**
4615 * Worker that validates a pointer to an image entrypoint.
4616 *
4617 * @returns IPRT status code.
4618 * @param pDevExt The device globals.
4619 * @param pImage The loader image.
4620 * @param pv The pointer into the image.
4621 * @param fMayBeNull Whether it may be NULL.
4622 * @param pszWhat What is this entrypoint? (for logging)
4623 * @param pbImageBits The image bits prepared by ring-3.
4624 *
4625 * @remarks Will leave the lock on failure.
4626 */
4627static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4628 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4629{
4630 if (!fMayBeNull || pv)
4631 {
4632 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4633 {
4634 supdrvLdrUnlock(pDevExt);
4635 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4636 return VERR_INVALID_PARAMETER;
4637 }
4638
4639 if (pImage->fNative)
4640 {
4641 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4642 if (RT_FAILURE(rc))
4643 {
4644 supdrvLdrUnlock(pDevExt);
4645 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4646 return rc;
4647 }
4648 }
4649 }
4650 return VINF_SUCCESS;
4651}
4652
4653
4654/**
4655 * Loads the image bits.
4656 *
4657 * This is the 2nd step of the loading.
4658 *
4659 * @returns IPRT status code.
4660 * @param pDevExt Device globals.
4661 * @param pSession Session data.
4662 * @param pReq The request.
4663 */
4664static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
4665{
4666 PSUPDRVLDRUSAGE pUsage;
4667 PSUPDRVLDRIMAGE pImage;
4668 int rc;
4669 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
4670
4671 /*
4672 * Find the ldr image.
4673 */
4674 supdrvLdrLock(pDevExt);
4675 pUsage = pSession->pLdrUsage;
4676 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4677 pUsage = pUsage->pNext;
4678 if (!pUsage)
4679 {
4680 supdrvLdrUnlock(pDevExt);
4681 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
4682 return VERR_INVALID_HANDLE;
4683 }
4684 pImage = pUsage->pImage;
4685
4686 /*
4687 * Validate input.
4688 */
4689 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
4690 || pImage->cbImageBits != pReq->u.In.cbImageBits)
4691 {
4692 supdrvLdrUnlock(pDevExt);
4693 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
4694 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
4695 return VERR_INVALID_HANDLE;
4696 }
4697
4698 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
4699 {
4700 unsigned uState = pImage->uState;
4701 supdrvLdrUnlock(pDevExt);
4702 if (uState != SUP_IOCTL_LDR_LOAD)
4703 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
4704 return VERR_ALREADY_LOADED;
4705 }
4706
4707 switch (pReq->u.In.eEPType)
4708 {
4709 case SUPLDRLOADEP_NOTHING:
4710 break;
4711
4712 case SUPLDRLOADEP_VMMR0:
4713 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
4714 if (RT_SUCCESS(rc))
4715 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
4716 if (RT_SUCCESS(rc))
4717 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
4718 if (RT_SUCCESS(rc))
4719 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
4720 if (RT_FAILURE(rc))
4721 return rc;
4722 break;
4723
4724 case SUPLDRLOADEP_SERVICE:
4725 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
4726 if (RT_FAILURE(rc))
4727 return rc;
4728 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
4729 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
4730 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
4731 {
4732 supdrvLdrUnlock(pDevExt);
4733 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
4734 pImage->pvImage, pReq->u.In.cbImageWithTabs,
4735 pReq->u.In.EP.Service.apvReserved[0],
4736 pReq->u.In.EP.Service.apvReserved[1],
4737 pReq->u.In.EP.Service.apvReserved[2]));
4738 return VERR_INVALID_PARAMETER;
4739 }
4740 break;
4741
4742 default:
4743 supdrvLdrUnlock(pDevExt);
4744 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
4745 return VERR_INVALID_PARAMETER;
4746 }
4747
4748 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
4749 if (RT_FAILURE(rc))
4750 return rc;
4751 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
4752 if (RT_FAILURE(rc))
4753 return rc;
4754
4755 /*
4756 * Allocate and copy the tables.
4757 * (No need to do try/except as this is a buffered request.)
4758 */
4759 pImage->cbStrTab = pReq->u.In.cbStrTab;
4760 if (pImage->cbStrTab)
4761 {
4762 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
4763 if (pImage->pachStrTab)
4764 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
4765 else
4766 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
4767 }
4768
4769 pImage->cSymbols = pReq->u.In.cSymbols;
4770 if (RT_SUCCESS(rc) && pImage->cSymbols)
4771 {
4772 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
4773 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
4774 if (pImage->paSymbols)
4775 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
4776 else
4777 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
4778 }
4779
4780 /*
4781 * Copy the bits / complete native loading.
4782 */
4783 if (RT_SUCCESS(rc))
4784 {
4785 pImage->uState = SUP_IOCTL_LDR_LOAD;
4786 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
4787 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
4788
4789 if (pImage->fNative)
4790 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
4791 else
4792 {
4793 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
4794 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
4795 }
4796 }
4797
4798 /*
4799 * Update any entry points.
4800 */
4801 if (RT_SUCCESS(rc))
4802 {
4803 switch (pReq->u.In.eEPType)
4804 {
4805 default:
4806 case SUPLDRLOADEP_NOTHING:
4807 rc = VINF_SUCCESS;
4808 break;
4809 case SUPLDRLOADEP_VMMR0:
4810 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
4811 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
4812 break;
4813 case SUPLDRLOADEP_SERVICE:
4814 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
4815 rc = VINF_SUCCESS;
4816 break;
4817 }
4818 }
4819
4820 /*
4821 * On success call the module initialization.
4822 */
4823 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
4824 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
4825 {
4826 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
4827 pDevExt->pLdrInitImage = pImage;
4828 pDevExt->hLdrInitThread = RTThreadNativeSelf();
4829 rc = pImage->pfnModuleInit(pImage);
4830 pDevExt->pLdrInitImage = NULL;
4831 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
4832 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
4833 supdrvLdrUnsetVMMR0EPs(pDevExt);
4834 }
4835 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
4836
4837 if (RT_FAILURE(rc))
4838 {
4839 /* Inform the tracing component in case ModuleInit registered TPs. */
4840 supdrvTracerModuleUnloading(pDevExt, pImage);
4841
4842 pImage->uState = SUP_IOCTL_LDR_OPEN;
4843 pImage->pfnModuleInit = NULL;
4844 pImage->pfnModuleTerm = NULL;
4845 pImage->pfnServiceReqHandler= NULL;
4846 pImage->cbStrTab = 0;
4847 RTMemFree(pImage->pachStrTab);
4848 pImage->pachStrTab = NULL;
4849 RTMemFree(pImage->paSymbols);
4850 pImage->paSymbols = NULL;
4851 pImage->cSymbols = 0;
4852 }
4853
4854 supdrvLdrUnlock(pDevExt);
4855 return rc;
4856}
4857
4858
4859/**
4860 * Frees a previously loaded (prep'ed) image.
4861 *
4862 * @returns IPRT status code.
4863 * @param pDevExt Device globals.
4864 * @param pSession Session data.
4865 * @param pReq The request.
4866 */
4867static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
4868{
4869 int rc;
4870 PSUPDRVLDRUSAGE pUsagePrev;
4871 PSUPDRVLDRUSAGE pUsage;
4872 PSUPDRVLDRIMAGE pImage;
4873 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
4874
4875 /*
4876 * Find the ldr image.
4877 */
4878 supdrvLdrLock(pDevExt);
4879 pUsagePrev = NULL;
4880 pUsage = pSession->pLdrUsage;
4881 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4882 {
4883 pUsagePrev = pUsage;
4884 pUsage = pUsage->pNext;
4885 }
4886 if (!pUsage)
4887 {
4888 supdrvLdrUnlock(pDevExt);
4889 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
4890 return VERR_INVALID_HANDLE;
4891 }
4892
4893 /*
4894 * Check if we can remove anything.
4895 */
4896 rc = VINF_SUCCESS;
4897 pImage = pUsage->pImage;
4898 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
4899 {
4900 /*
4901 * Check if there are any objects with destructors in the image, if
4902 * so leave it for the session cleanup routine so we get a chance to
4903 * clean things up in the right order and not leave them all dangling.
4904 */
4905 RTSpinlockAcquire(pDevExt->Spinlock);
4906 if (pImage->cUsage <= 1)
4907 {
4908 PSUPDRVOBJ pObj;
4909 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
4910 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4911 {
4912 rc = VERR_DANGLING_OBJECTS;
4913 break;
4914 }
4915 }
4916 else
4917 {
4918 PSUPDRVUSAGE pGenUsage;
4919 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
4920 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4921 {
4922 rc = VERR_DANGLING_OBJECTS;
4923 break;
4924 }
4925 }
4926 RTSpinlockRelease(pDevExt->Spinlock);
4927 if (rc == VINF_SUCCESS)
4928 {
4929 /* unlink it */
4930 if (pUsagePrev)
4931 pUsagePrev->pNext = pUsage->pNext;
4932 else
4933 pSession->pLdrUsage = pUsage->pNext;
4934
4935 /* free it */
4936 pUsage->pImage = NULL;
4937 pUsage->pNext = NULL;
4938 RTMemFree(pUsage);
4939
4940 /*
4941 * Dereference the image.
4942 */
4943 if (pImage->cUsage <= 1)
4944 supdrvLdrFree(pDevExt, pImage);
4945 else
4946 pImage->cUsage--;
4947 }
4948 else
4949 {
4950 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
4951 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
4952 }
4953 }
4954 else
4955 {
4956 /*
4957 * Dereference both image and usage.
4958 */
4959 pImage->cUsage--;
4960 pUsage->cUsage--;
4961 }
4962
4963 supdrvLdrUnlock(pDevExt);
4964 return rc;
4965}
4966
4967
4968/**
4969 * Gets the address of a symbol in an open image.
4970 *
4971 * @returns IPRT status code.
4972 * @param pDevExt Device globals.
4973 * @param pSession Session data.
4974 * @param pReq The request buffer.
4975 */
4976static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
4977{
4978 PSUPDRVLDRIMAGE pImage;
4979 PSUPDRVLDRUSAGE pUsage;
4980 uint32_t i;
4981 PSUPLDRSYM paSyms;
4982 const char *pchStrings;
4983 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
4984 void *pvSymbol = NULL;
4985 int rc = VERR_GENERAL_FAILURE;
4986 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
4987
4988 /*
4989 * Find the ldr image.
4990 */
4991 supdrvLdrLock(pDevExt);
4992 pUsage = pSession->pLdrUsage;
4993 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4994 pUsage = pUsage->pNext;
4995 if (!pUsage)
4996 {
4997 supdrvLdrUnlock(pDevExt);
4998 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
4999 return VERR_INVALID_HANDLE;
5000 }
5001 pImage = pUsage->pImage;
5002 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5003 {
5004 unsigned uState = pImage->uState;
5005 supdrvLdrUnlock(pDevExt);
5006 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5007 return VERR_ALREADY_LOADED;
5008 }
5009
5010 /*
5011 * Search the symbol strings.
5012 *
5013 * Note! The int32_t is for native loading on solaris where the data
5014 * and text segments are in very different places.
5015 */
5016 pchStrings = pImage->pachStrTab;
5017 paSyms = pImage->paSymbols;
5018 for (i = 0; i < pImage->cSymbols; i++)
5019 {
5020 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5021 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5022 {
5023 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5024 rc = VINF_SUCCESS;
5025 break;
5026 }
5027 }
5028 supdrvLdrUnlock(pDevExt);
5029 pReq->u.Out.pvSymbol = pvSymbol;
5030 return rc;
5031}
5032
5033
5034/**
5035 * Gets the address of a symbol in an open image or the support driver.
5036 *
5037 * @returns VINF_SUCCESS on success.
5038 * @returns
5039 * @param pDevExt Device globals.
5040 * @param pSession Session data.
5041 * @param pReq The request buffer.
5042 */
5043static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5044{
5045 int rc = VINF_SUCCESS;
5046 const char *pszSymbol = pReq->u.In.pszSymbol;
5047 const char *pszModule = pReq->u.In.pszModule;
5048 size_t cbSymbol;
5049 char const *pszEnd;
5050 uint32_t i;
5051
5052 /*
5053 * Input validation.
5054 */
5055 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5056 pszEnd = RTStrEnd(pszSymbol, 512);
5057 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5058 cbSymbol = pszEnd - pszSymbol + 1;
5059
5060 if (pszModule)
5061 {
5062 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5063 pszEnd = RTStrEnd(pszModule, 64);
5064 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5065 }
5066 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5067
5068
5069 if ( !pszModule
5070 || !strcmp(pszModule, "SupDrv"))
5071 {
5072 /*
5073 * Search the support driver export table.
5074 */
5075 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5076 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5077 {
5078 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5079 break;
5080 }
5081 }
5082 else
5083 {
5084 /*
5085 * Find the loader image.
5086 */
5087 PSUPDRVLDRIMAGE pImage;
5088
5089 supdrvLdrLock(pDevExt);
5090
5091 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5092 if (!strcmp(pImage->szName, pszModule))
5093 break;
5094 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5095 {
5096 /*
5097 * Search the symbol strings.
5098 */
5099 const char *pchStrings = pImage->pachStrTab;
5100 PCSUPLDRSYM paSyms = pImage->paSymbols;
5101 for (i = 0; i < pImage->cSymbols; i++)
5102 {
5103 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5104 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5105 {
5106 /*
5107 * Found it! Calc the symbol address and add a reference to the module.
5108 */
5109 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5110 rc = supdrvLdrAddUsage(pSession, pImage);
5111 break;
5112 }
5113 }
5114 }
5115 else
5116 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5117
5118 supdrvLdrUnlock(pDevExt);
5119 }
5120 return rc;
5121}
5122
5123
5124/**
5125 * Updates the VMMR0 entry point pointers.
5126 *
5127 * @returns IPRT status code.
5128 * @param pDevExt Device globals.
5129 * @param pSession Session data.
5130 * @param pVMMR0 VMMR0 image handle.
5131 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5132 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5133 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5134 * @remark Caller must own the loader mutex.
5135 */
5136static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5137{
5138 int rc = VINF_SUCCESS;
5139 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5140
5141
5142 /*
5143 * Check if not yet set.
5144 */
5145 if (!pDevExt->pvVMMR0)
5146 {
5147 pDevExt->pvVMMR0 = pvVMMR0;
5148 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5149 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5150 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5151 }
5152 else
5153 {
5154 /*
5155 * Return failure or success depending on whether the values match or not.
5156 */
5157 if ( pDevExt->pvVMMR0 != pvVMMR0
5158 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5159 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5160 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5161 {
5162 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5163 rc = VERR_INVALID_PARAMETER;
5164 }
5165 }
5166 return rc;
5167}
5168
5169
5170/**
5171 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5172 *
5173 * @param pDevExt Device globals.
5174 */
5175static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5176{
5177 pDevExt->pvVMMR0 = NULL;
5178 pDevExt->pfnVMMR0EntryInt = NULL;
5179 pDevExt->pfnVMMR0EntryFast = NULL;
5180 pDevExt->pfnVMMR0EntryEx = NULL;
5181}
5182
5183
5184/**
5185 * Adds a usage reference in the specified session of an image.
5186 *
5187 * Called while owning the loader semaphore.
5188 *
5189 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5190 * @param pSession Session in question.
5191 * @param pImage Image which the session is using.
5192 */
5193static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5194{
5195 PSUPDRVLDRUSAGE pUsage;
5196 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5197
5198 /*
5199 * Referenced it already?
5200 */
5201 pUsage = pSession->pLdrUsage;
5202 while (pUsage)
5203 {
5204 if (pUsage->pImage == pImage)
5205 {
5206 pUsage->cUsage++;
5207 return VINF_SUCCESS;
5208 }
5209 pUsage = pUsage->pNext;
5210 }
5211
5212 /*
5213 * Allocate new usage record.
5214 */
5215 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5216 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5217 pUsage->cUsage = 1;
5218 pUsage->pImage = pImage;
5219 pUsage->pNext = pSession->pLdrUsage;
5220 pSession->pLdrUsage = pUsage;
5221 return VINF_SUCCESS;
5222}
5223
5224
5225/**
5226 * Frees a load image.
5227 *
5228 * @param pDevExt Pointer to device extension.
5229 * @param pImage Pointer to the image we're gonna free.
5230 * This image must exit!
5231 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5232 */
5233static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5234{
5235 PSUPDRVLDRIMAGE pImagePrev;
5236 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5237
5238 /* find it - arg. should've used doubly linked list. */
5239 Assert(pDevExt->pLdrImages);
5240 pImagePrev = NULL;
5241 if (pDevExt->pLdrImages != pImage)
5242 {
5243 pImagePrev = pDevExt->pLdrImages;
5244 while (pImagePrev->pNext != pImage)
5245 pImagePrev = pImagePrev->pNext;
5246 Assert(pImagePrev->pNext == pImage);
5247 }
5248
5249 /* unlink */
5250 if (pImagePrev)
5251 pImagePrev->pNext = pImage->pNext;
5252 else
5253 pDevExt->pLdrImages = pImage->pNext;
5254
5255 /* check if this is VMMR0.r0 unset its entry point pointers. */
5256 if (pDevExt->pvVMMR0 == pImage->pvImage)
5257 supdrvLdrUnsetVMMR0EPs(pDevExt);
5258
5259 /* check for objects with destructors in this image. (Shouldn't happen.) */
5260 if (pDevExt->pObjs)
5261 {
5262 unsigned cObjs = 0;
5263 PSUPDRVOBJ pObj;
5264 RTSpinlockAcquire(pDevExt->Spinlock);
5265 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5266 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5267 {
5268 pObj->pfnDestructor = NULL;
5269 cObjs++;
5270 }
5271 RTSpinlockRelease(pDevExt->Spinlock);
5272 if (cObjs)
5273 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5274 }
5275
5276 /* call termination function if fully loaded. */
5277 if ( pImage->pfnModuleTerm
5278 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5279 {
5280 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5281 pImage->pfnModuleTerm(pImage);
5282 }
5283
5284 /* Inform the tracing component. */
5285 supdrvTracerModuleUnloading(pDevExt, pImage);
5286
5287 /* do native unload if appropriate. */
5288 if (pImage->fNative)
5289 supdrvOSLdrUnload(pDevExt, pImage);
5290
5291 /* free the image */
5292 pImage->cUsage = 0;
5293 pImage->pDevExt = NULL;
5294 pImage->pNext = NULL;
5295 pImage->uState = SUP_IOCTL_LDR_FREE;
5296 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5297 pImage->pvImageAlloc = NULL;
5298 RTMemFree(pImage->pachStrTab);
5299 pImage->pachStrTab = NULL;
5300 RTMemFree(pImage->paSymbols);
5301 pImage->paSymbols = NULL;
5302 RTMemFree(pImage);
5303}
5304
5305
5306/**
5307 * Acquires the loader lock.
5308 *
5309 * @returns IPRT status code.
5310 * @param pDevExt The device extension.
5311 */
5312DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5313{
5314#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5315 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5316#else
5317 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5318#endif
5319 AssertRC(rc);
5320 return rc;
5321}
5322
5323
5324/**
5325 * Releases the loader lock.
5326 *
5327 * @returns IPRT status code.
5328 * @param pDevExt The device extension.
5329 */
5330DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5331{
5332#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5333 return RTSemMutexRelease(pDevExt->mtxLdr);
5334#else
5335 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5336#endif
5337}
5338
5339
5340/**
5341 * Implements the service call request.
5342 *
5343 * @returns VBox status code.
5344 * @param pDevExt The device extension.
5345 * @param pSession The calling session.
5346 * @param pReq The request packet, valid.
5347 */
5348static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5349{
5350#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5351 int rc;
5352
5353 /*
5354 * Find the module first in the module referenced by the calling session.
5355 */
5356 rc = supdrvLdrLock(pDevExt);
5357 if (RT_SUCCESS(rc))
5358 {
5359 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5360 PSUPDRVLDRUSAGE pUsage;
5361
5362 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5363 if ( pUsage->pImage->pfnServiceReqHandler
5364 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5365 {
5366 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5367 break;
5368 }
5369 supdrvLdrUnlock(pDevExt);
5370
5371 if (pfnServiceReqHandler)
5372 {
5373 /*
5374 * Call it.
5375 */
5376 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5377 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5378 else
5379 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5380 }
5381 else
5382 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5383 }
5384
5385 /* log it */
5386 if ( RT_FAILURE(rc)
5387 && rc != VERR_INTERRUPTED
5388 && rc != VERR_TIMEOUT)
5389 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5390 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5391 else
5392 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5393 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5394 return rc;
5395#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5396 return VERR_NOT_IMPLEMENTED;
5397#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5398}
5399
5400
5401/**
5402 * Implements the logger settings request.
5403 *
5404 * @returns VBox status code.
5405 * @param pDevExt The device extension.
5406 * @param pSession The caller's session.
5407 * @param pReq The request.
5408 */
5409static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5410{
5411 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5412 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5413 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5414 PRTLOGGER pLogger = NULL;
5415 int rc;
5416
5417 /*
5418 * Some further validation.
5419 */
5420 switch (pReq->u.In.fWhat)
5421 {
5422 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5423 case SUPLOGGERSETTINGS_WHAT_CREATE:
5424 break;
5425
5426 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5427 if (*pszGroup || *pszFlags || *pszDest)
5428 return VERR_INVALID_PARAMETER;
5429 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5430 return VERR_ACCESS_DENIED;
5431 break;
5432
5433 default:
5434 return VERR_INTERNAL_ERROR;
5435 }
5436
5437 /*
5438 * Get the logger.
5439 */
5440 switch (pReq->u.In.fWhich)
5441 {
5442 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5443 pLogger = RTLogGetDefaultInstance();
5444 break;
5445
5446 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5447 pLogger = RTLogRelDefaultInstance();
5448 break;
5449
5450 default:
5451 return VERR_INTERNAL_ERROR;
5452 }
5453
5454 /*
5455 * Do the job.
5456 */
5457 switch (pReq->u.In.fWhat)
5458 {
5459 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5460 if (pLogger)
5461 {
5462 rc = RTLogFlags(pLogger, pszFlags);
5463 if (RT_SUCCESS(rc))
5464 rc = RTLogGroupSettings(pLogger, pszGroup);
5465 NOREF(pszDest);
5466 }
5467 else
5468 rc = VERR_NOT_FOUND;
5469 break;
5470
5471 case SUPLOGGERSETTINGS_WHAT_CREATE:
5472 {
5473 if (pLogger)
5474 rc = VERR_ALREADY_EXISTS;
5475 else
5476 {
5477 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5478
5479 rc = RTLogCreate(&pLogger,
5480 0 /* fFlags */,
5481 pszGroup,
5482 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5483 ? "VBOX_LOG"
5484 : "VBOX_RELEASE_LOG",
5485 RT_ELEMENTS(s_apszGroups),
5486 s_apszGroups,
5487 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5488 NULL);
5489 if (RT_SUCCESS(rc))
5490 {
5491 rc = RTLogFlags(pLogger, pszFlags);
5492 NOREF(pszDest);
5493 if (RT_SUCCESS(rc))
5494 {
5495 switch (pReq->u.In.fWhich)
5496 {
5497 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5498 pLogger = RTLogSetDefaultInstance(pLogger);
5499 break;
5500 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5501 pLogger = RTLogRelSetDefaultInstance(pLogger);
5502 break;
5503 }
5504 }
5505 RTLogDestroy(pLogger);
5506 }
5507 }
5508 break;
5509 }
5510
5511 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5512 switch (pReq->u.In.fWhich)
5513 {
5514 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5515 pLogger = RTLogSetDefaultInstance(NULL);
5516 break;
5517 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5518 pLogger = RTLogRelSetDefaultInstance(NULL);
5519 break;
5520 }
5521 rc = RTLogDestroy(pLogger);
5522 break;
5523
5524 default:
5525 {
5526 rc = VERR_INTERNAL_ERROR;
5527 break;
5528 }
5529 }
5530
5531 return rc;
5532}
5533
5534
5535/**
5536 * Implements the MSR prober operations.
5537 *
5538 * @returns VBox status code.
5539 * @param pDevExt The device extension.
5540 * @param pReq The request.
5541 */
5542static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5543{
5544#ifdef SUPDRV_WITH_MSR_PROBER
5545 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5546 int rc;
5547
5548 switch (pReq->u.In.enmOp)
5549 {
5550 case SUPMSRPROBEROP_READ:
5551 {
5552 uint64_t uValue;
5553 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5554 if (RT_SUCCESS(rc))
5555 {
5556 pReq->u.Out.uResults.Read.uValue = uValue;
5557 pReq->u.Out.uResults.Read.fGp = false;
5558 }
5559 else if (rc == VERR_ACCESS_DENIED)
5560 {
5561 pReq->u.Out.uResults.Read.uValue = 0;
5562 pReq->u.Out.uResults.Read.fGp = true;
5563 rc = VINF_SUCCESS;
5564 }
5565 break;
5566 }
5567
5568 case SUPMSRPROBEROP_WRITE:
5569 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5570 if (RT_SUCCESS(rc))
5571 pReq->u.Out.uResults.Write.fGp = false;
5572 else if (rc == VERR_ACCESS_DENIED)
5573 {
5574 pReq->u.Out.uResults.Write.fGp = true;
5575 rc = VINF_SUCCESS;
5576 }
5577 break;
5578
5579 case SUPMSRPROBEROP_MODIFY:
5580 case SUPMSRPROBEROP_MODIFY_FASTER:
5581 rc = supdrvOSMsrProberModify(idCpu, pReq);
5582 break;
5583
5584 default:
5585 return VERR_INVALID_FUNCTION;
5586 }
5587 return rc;
5588#else
5589 return VERR_NOT_IMPLEMENTED;
5590#endif
5591}
5592
5593
5594#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5595/**
5596 * Switches the TSC-delta measurement thread into the butchered state.
5597 *
5598 * @returns VBox status code.
5599 * @param pDevExt Pointer to the device instance data.
5600 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5601 * @param pszFailed An error message to log.
5602 * @param rcFailed The error code to exit the thread with.
5603 */
5604static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5605{
5606 if (!fSpinlockHeld)
5607 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5608
5609 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Butchered;
5610 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5611 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5612 return rcFailed;
5613}
5614
5615
5616/**
5617 * The TSC-delta measurement thread.
5618 *
5619 * @returns VBox status code.
5620 * @param hThread The thread handle.
5621 * @param pvUser Opaque pointer to the device instance data.
5622 */
5623static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
5624{
5625 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
5626 static uint32_t cTimesMeasured = 0;
5627 uint32_t cConsecutiveTimeouts = 0;
5628 int rc = VERR_INTERNAL_ERROR_2;
5629 for (;;)
5630 {
5631 /*
5632 * Switch on the current state.
5633 */
5634 SUPDRVTSCDELTASTATE enmState;
5635 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5636 enmState = pDevExt->enmTscDeltaState;
5637 switch (enmState)
5638 {
5639 case kSupDrvTscDeltaState_Creating:
5640 {
5641 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5642 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5643 if (RT_FAILURE(rc))
5644 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5645 /* fall thru */
5646 }
5647
5648 case kSupDrvTscDeltaState_Listening:
5649 {
5650 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5651
5652 /* Simple adaptive timeout. */
5653 if (cConsecutiveTimeouts++ == 10)
5654 {
5655 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
5656 pDevExt->cMsTscDeltaTimeout = 10;
5657 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
5658 pDevExt->cMsTscDeltaTimeout = 100;
5659 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
5660 pDevExt->cMsTscDeltaTimeout = 500;
5661 cConsecutiveTimeouts = 0;
5662 }
5663 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
5664 if ( RT_FAILURE(rc)
5665 && rc != VERR_TIMEOUT)
5666 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
5667 break;
5668 }
5669
5670 case kSupDrvTscDeltaState_WaitAndMeasure:
5671 {
5672 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Measuring;
5673 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5674 if (RT_FAILURE(rc))
5675 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5676 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5677 pDevExt->cMsTscDeltaTimeout = 1;
5678 RTThreadSleep(10);
5679 /* fall thru */
5680 }
5681
5682 case kSupDrvTscDeltaState_Measuring:
5683 {
5684 cConsecutiveTimeouts = 0;
5685 if (!cTimesMeasured++)
5686 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5687 else
5688 {
5689 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5690 unsigned iCpu;
5691
5692 if (cTimesMeasured == UINT32_MAX)
5693 cTimesMeasured = 1;
5694
5695 /* Measure TSC-deltas only for the CPUs that are in the set. */
5696 rc = VINF_SUCCESS;
5697 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5698 {
5699 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
5700 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
5701 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
5702 {
5703 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
5704 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
5705 }
5706 }
5707 }
5708 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5709 if (pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
5710 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5711 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5712 pDevExt->rcTscDelta = rc;
5713 break;
5714 }
5715
5716 case kSupDrvTscDeltaState_Terminating:
5717 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5718 return VINF_SUCCESS;
5719
5720 case kSupDrvTscDeltaState_Butchered:
5721 default:
5722 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
5723 }
5724 }
5725
5726 return rc;
5727}
5728
5729
5730/**
5731 * Waits for the TSC-delta measurement thread to respond to a state change.
5732 *
5733 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
5734 * other error code on internal error.
5735 *
5736 * @param pThis Pointer to the grant service instance data.
5737 * @param enmCurState The current state.
5738 * @param enmNewState The new state we're waiting for it to enter.
5739 */
5740static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTASTATE enmCurState, SUPDRVTSCDELTASTATE enmNewState)
5741{
5742 /*
5743 * Wait a short while for the expected state transition.
5744 */
5745 int rc;
5746 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
5747 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5748 if (pDevExt->enmTscDeltaState == enmNewState)
5749 {
5750 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5751 rc = VINF_SUCCESS;
5752 }
5753 else if (pDevExt->enmTscDeltaState == enmCurState)
5754 {
5755 /*
5756 * Wait longer if the state has not yet transitioned to the one we want.
5757 */
5758 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5759 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
5760 if ( RT_SUCCESS(rc)
5761 || rc == VERR_TIMEOUT)
5762 {
5763 /*
5764 * Check the state whether we've succeeded.
5765 */
5766 SUPDRVTSCDELTASTATE enmState;
5767 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5768 enmState = pDevExt->enmTscDeltaState;
5769 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5770 if (enmState == enmNewState)
5771 rc = VINF_SUCCESS;
5772 else if (enmState == enmCurState)
5773 {
5774 rc = VERR_TIMEOUT;
5775 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
5776 enmNewState));
5777 }
5778 else
5779 {
5780 rc = VERR_INTERNAL_ERROR;
5781 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
5782 enmState, enmNewState));
5783 }
5784 }
5785 else
5786 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
5787 }
5788 else
5789 {
5790 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5791 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
5792 rc = VERR_INTERNAL_ERROR;
5793 }
5794
5795 return rc;
5796}
5797
5798
5799/**
5800 * Terminates the TSC-delta measurement thread.
5801 *
5802 * @param pDevExt Pointer to the device instance data.
5803 */
5804static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
5805{
5806 int rc;
5807 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5808 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Terminating;
5809 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5810 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5811 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
5812 if (RT_FAILURE(rc))
5813 {
5814 /* Signal a few more times before giving up. */
5815 int cTries = 5;
5816 while (--cTries > 0)
5817 {
5818 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5819 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
5820 if (rc != VERR_TIMEOUT)
5821 break;
5822 }
5823 }
5824}
5825
5826
5827/**
5828 * Initializes and spawns the TSC-delta measurement thread.
5829 *
5830 * A thread is required for servicing re-measurement requests from events like
5831 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
5832 * under all contexts on all OSs.
5833 *
5834 * @returns VBox status code.
5835 * @param pDevExt Pointer to the device instance data.
5836 *
5837 * @remarks Must only be called -after- initializing GIP and setting up MP
5838 * notifications!
5839 */
5840static int supdrvTscDeltaInit(PSUPDRVDEVEXT pDevExt)
5841{
5842 int rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
5843 if (RT_SUCCESS(rc))
5844 {
5845 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
5846 if (RT_SUCCESS(rc))
5847 {
5848 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Creating;
5849 pDevExt->cMsTscDeltaTimeout = 1;
5850 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
5851 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
5852 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
5853 if (RT_SUCCESS(rc))
5854 {
5855 rc = supdrvTscDeltaThreadWait(pDevExt, kSupDrvTscDeltaState_Creating, kSupDrvTscDeltaState_Listening);
5856 if (RT_SUCCESS(rc))
5857 {
5858 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5859 return rc;
5860 }
5861
5862 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
5863 supdrvTscDeltaThreadTerminate(pDevExt);
5864 }
5865 else
5866 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
5867 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5868 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5869 }
5870 else
5871 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
5872 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5873 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5874 }
5875 else
5876 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
5877
5878 return rc;
5879}
5880
5881
5882/**
5883 * Terminates the TSC-delta measurement thread and cleanup.
5884 *
5885 * @param pDevExt Pointer to the device instance data.
5886 */
5887static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
5888{
5889 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
5890 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5891 {
5892 supdrvTscDeltaThreadTerminate(pDevExt);
5893 }
5894
5895 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
5896 {
5897 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5898 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5899 }
5900
5901 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5902 {
5903 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5904 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5905 }
5906
5907 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5908}
5909#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
5910
5911
5912/**
5913 * Measures the nominal TSC frequency.
5914 *
5915 * Uses a busy-wait method for the async. case as it is intended to help push
5916 * the CPU frequency up, while for the invariant cases using a sleeping method.
5917 *
5918 * @returns VBox status code.
5919 * @param pGip Pointer to the GIP.
5920 *
5921 * @remarks Must be called only after measuring the TSC deltas.
5922 */
5923static int supdrvGipMeasureNominalTscFreq(PSUPGLOBALINFOPAGE pGip)
5924{
5925 int cTriesLeft = 4;
5926
5927 /* Assert order. */
5928 AssertReturn(pGip, VERR_INVALID_PARAMETER);
5929 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
5930
5931 while (cTriesLeft-- > 0)
5932 {
5933 RTCCUINTREG uFlags;
5934 uint64_t u64NanoTs;
5935 uint64_t u64NanoTsAfter;
5936 uint64_t u64TscBefore;
5937 uint64_t u64TscAfter;
5938 uint8_t idApicBefore;
5939 uint8_t idApicAfter;
5940
5941 /*
5942 * Synchronize with the host OS clock tick before reading the TSC.
5943 * Especially important on Windows where the granularity is terrible.
5944 */
5945 u64NanoTs = RTTimeSystemNanoTS();
5946 while (RTTimeSystemNanoTS() == u64NanoTs)
5947 ASMNopPause();
5948
5949 uFlags = ASMIntDisableFlags();
5950 idApicBefore = ASMGetApicId();
5951 u64TscBefore = ASMReadTSC();
5952 u64NanoTs = RTTimeSystemNanoTS();
5953 ASMSetFlags(uFlags);
5954
5955 if (supdrvIsInvariantTsc())
5956 {
5957 /*
5958 * Sleep wait since the TSC frequency is constant, eases host load.
5959 * Shorter interval produces more variance in the frequency (esp. Windows).
5960 */
5961 RTThreadSleep(200); /* Sleeping shorter produces a tad more variance in the frequency than I'd like. */
5962 u64NanoTsAfter = RTTimeSystemNanoTS();
5963 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
5964 ASMNopPause();
5965 u64NanoTsAfter = RTTimeSystemNanoTS();
5966 }
5967 else
5968 {
5969 /* Busy wait, ramps up the CPU frequency on async systems. */
5970 for (;;)
5971 {
5972 u64NanoTsAfter = RTTimeSystemNanoTS();
5973 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTs)
5974 ASMNopPause();
5975 else
5976 break;
5977 }
5978 }
5979
5980 uFlags = ASMIntDisableFlags();
5981 u64TscAfter = ASMReadTSC();
5982 idApicAfter = ASMGetApicId();
5983 ASMSetFlags(uFlags);
5984
5985 /** @todo replace with enum check. */
5986 if (supdrvIsInvariantTsc())
5987 {
5988 PSUPGIPCPU pGipCpuBefore;
5989 PSUPGIPCPU pGipCpuAfter;
5990
5991 uint16_t iCpuBefore = pGip->aiCpuFromApicId[idApicBefore];
5992 uint16_t iCpuAfter = pGip->aiCpuFromApicId[idApicAfter];
5993 AssertMsgReturn(iCpuBefore < pGip->cCpus, ("iCpuBefore=%u cCpus=%u\n", iCpuBefore, pGip->cCpus), VERR_WRONG_ORDER);
5994 AssertMsgReturn(iCpuAfter < pGip->cCpus, ("iCpuAfter=%u cCpus=%u\n", iCpuAfter, pGip->cCpus), VERR_WRONG_ORDER);
5995 pGipCpuBefore = &pGip->aCPUs[iCpuBefore];
5996 pGipCpuAfter = &pGip->aCPUs[iCpuAfter];
5997
5998 if ( pGipCpuBefore->i64TSCDelta != INT64_MAX
5999 && pGipCpuAfter->i64TSCDelta != INT64_MAX)
6000 {
6001 u64TscBefore -= pGipCpuBefore->i64TSCDelta;
6002 u64TscAfter -= pGipCpuAfter->i64TSCDelta;
6003
6004 SUPR0Printf("vboxdrv: TSC frequency is %lu Hz - invariant, kernel timer granularity is %lu Ns\n",
6005 ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTs),
6006 RTTimerGetSystemGranularity());
6007 return VINF_SUCCESS;
6008 }
6009 else
6010 {
6011 SUPR0Printf("vboxdrv: supdrvGipMeasureNominalTscFreq: iCpuBefore=%u iCpuAfter=%u cTriesLeft=%u\n", iCpuBefore,
6012 iCpuAfter, cTriesLeft);
6013 }
6014 }
6015 else
6016 {
6017 SUPR0Printf("vboxdrv: TSC frequency is %lu Hz - maybe variant, kernel timer granularity is %lu Ns\n",
6018 ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTs),
6019 RTTimerGetSystemGranularity());
6020 return VINF_SUCCESS;
6021 }
6022 }
6023
6024 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6025}
6026
6027
6028/**
6029 * Creates the GIP.
6030 *
6031 * @returns VBox status code.
6032 * @param pDevExt Instance data. GIP stuff may be updated.
6033 */
6034static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6035{
6036 PSUPGLOBALINFOPAGE pGip;
6037 RTHCPHYS HCPhysGip;
6038 uint32_t u32SystemResolution;
6039 uint32_t u32Interval;
6040 uint32_t u32MinInterval;
6041 uint32_t uMod;
6042 unsigned cCpus;
6043 int rc;
6044
6045 LogFlow(("supdrvGipCreate:\n"));
6046
6047 /* Assert order. */
6048 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6049 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6050 Assert(!pDevExt->pGipTimer);
6051
6052 /*
6053 * Check the CPU count.
6054 */
6055 cCpus = RTMpGetArraySize();
6056 if ( cCpus > RTCPUSET_MAX_CPUS
6057 || cCpus > 256 /*ApicId is used for the mappings*/)
6058 {
6059 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6060 return VERR_TOO_MANY_CPUS;
6061 }
6062
6063 /*
6064 * Allocate a contiguous set of pages with a default kernel mapping.
6065 */
6066 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6067 if (RT_FAILURE(rc))
6068 {
6069 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6070 return rc;
6071 }
6072 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6073 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6074
6075 /*
6076 * Find a reasonable update interval and initialize the structure.
6077 */
6078 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6079 * See @bugref{6710}. */
6080 u32MinInterval = RT_NS_10MS;
6081 u32SystemResolution = RTTimerGetSystemGranularity();
6082 u32Interval = u32MinInterval;
6083 uMod = u32MinInterval % u32SystemResolution;
6084 if (uMod)
6085 u32Interval += u32SystemResolution - uMod;
6086
6087 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6088
6089#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6090 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6091 rc = supdrvTscDeltaInit(pDevExt);
6092#endif
6093 if (RT_SUCCESS(rc))
6094 {
6095 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6096 if (RT_SUCCESS(rc))
6097 {
6098 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6099 if (RT_SUCCESS(rc))
6100 {
6101#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6102 /*
6103 * Measure the TSC deltas now that we have MP notifications.
6104 */
6105 int cTries = 5;
6106 uint16_t iCpu;
6107 do
6108 {
6109 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6110 if (rc != VERR_TRY_AGAIN)
6111 break;
6112 } while (--cTries > 0);
6113 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6114 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6115#endif
6116
6117#if 0
6118 /** @todo refactor later and use the nominal TSC rate for invariant case as
6119 * the real and constant TSC rate. */
6120 supdrvGipMeasureNominalTscFreq(pGip);
6121#endif
6122
6123 /*
6124 * Create the timer.
6125 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6126 */
6127 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6128 {
6129 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer, pDevExt);
6130 if (rc == VERR_NOT_SUPPORTED)
6131 {
6132 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6133 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6134 }
6135 }
6136 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6137 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */, supdrvGipSyncTimer, pDevExt);
6138 if (RT_SUCCESS(rc))
6139 {
6140 /*
6141 * We're good.
6142 */
6143 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6144 g_pSUPGlobalInfoPage = pGip;
6145 return VINF_SUCCESS;
6146 }
6147 else
6148 {
6149 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6150 Assert(!pDevExt->pGipTimer);
6151 }
6152 }
6153 else
6154 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6155 }
6156 else
6157 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6158 }
6159 else
6160 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6161
6162 supdrvGipDestroy(pDevExt);
6163 return rc;
6164}
6165
6166
6167/**
6168 * Terminates the GIP.
6169 *
6170 * @param pDevExt Instance data. GIP stuff may be updated.
6171 */
6172static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6173{
6174 int rc;
6175#ifdef DEBUG_DARWIN_GIP
6176 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6177 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6178 pDevExt->pGipTimer, pDevExt->GipMemObj));
6179#endif
6180
6181 /*
6182 * Stop receiving MP notifications before tearing anything else down.
6183 */
6184 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6185
6186#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6187 /*
6188 * Terminate the TSC-delta measurement thread and resources.
6189 */
6190 supdrvTscDeltaTerm(pDevExt);
6191#endif
6192
6193 /*
6194 * Invalid the GIP data.
6195 */
6196 if (pDevExt->pGip)
6197 {
6198 supdrvGipTerm(pDevExt->pGip);
6199 pDevExt->pGip = NULL;
6200 }
6201 g_pSUPGlobalInfoPage = NULL;
6202
6203 /*
6204 * Destroy the timer and free the GIP memory object.
6205 */
6206 if (pDevExt->pGipTimer)
6207 {
6208 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6209 pDevExt->pGipTimer = NULL;
6210 }
6211
6212 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6213 {
6214 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6215 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6216 }
6217
6218 /*
6219 * Finally, make sure we've release the system timer resolution request
6220 * if one actually succeeded and is still pending.
6221 */
6222 if (pDevExt->u32SystemTimerGranularityGrant)
6223 {
6224 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
6225 pDevExt->u32SystemTimerGranularityGrant = 0;
6226 }
6227}
6228
6229
6230/**
6231 * Timer callback function sync GIP mode.
6232 * @param pTimer The timer.
6233 * @param pvUser The device extension.
6234 */
6235static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6236{
6237 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6238 uint64_t u64TSC = ASMReadTSC();
6239 uint64_t NanoTS = RTTimeSystemNanoTS();
6240 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6241
6242 if (supdrvIsInvariantTsc())
6243 {
6244 PSUPGIPCPU pGipCpu;
6245 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6246 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
6247 AssertReturnVoid(iCpu < pGip->cCpus);
6248 pGipCpu = &pGip->aCPUs[iCpu];
6249 AssertReturnVoid(pGipCpu->idCpu == RTMpCpuId());
6250
6251 /*
6252 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6253 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6254 * affected a bit until we get proper TSC deltas than implementing options like
6255 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6256 *
6257 * The likely hood of this happening is really low. On Windows, Linux timers
6258 * fire on the CPU they were registered/started on. Darwin, Solaris need verification.
6259 */
6260 if (pGipCpu->i64TSCDelta != INT64_MAX)
6261 u64TSC -= pGipCpu->i64TSCDelta;
6262 }
6263
6264 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, NIL_RTCPUID, iTick);
6265
6266 ASMSetFlags(fOldFlags);
6267}
6268
6269
6270/**
6271 * Timer callback function for async GIP mode.
6272 * @param pTimer The timer.
6273 * @param pvUser The device extension.
6274 */
6275static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6276{
6277 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6278 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6279 RTCPUID idCpu = RTMpCpuId();
6280 uint64_t u64TSC = ASMReadTSC();
6281 uint64_t NanoTS = RTTimeSystemNanoTS();
6282
6283 /** @todo reset the transaction number and whatnot when iTick == 1. */
6284 if (pDevExt->idGipMaster == idCpu)
6285 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6286 else
6287 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6288
6289 ASMSetFlags(fOldFlags);
6290}
6291
6292
6293/**
6294 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6295 *
6296 * @returns Index of the CPU in the cache set.
6297 * @param pGip The GIP.
6298 * @param idCpu The CPU ID.
6299 */
6300static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6301{
6302 uint32_t i, cTries;
6303
6304 /*
6305 * ASSUMES that CPU IDs are constant.
6306 */
6307 for (i = 0; i < pGip->cCpus; i++)
6308 if (pGip->aCPUs[i].idCpu == idCpu)
6309 return i;
6310
6311 cTries = 0;
6312 do
6313 {
6314 for (i = 0; i < pGip->cCpus; i++)
6315 {
6316 bool fRc;
6317 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6318 if (fRc)
6319 return i;
6320 }
6321 } while (cTries++ < 32);
6322 AssertReleaseFailed();
6323 return i - 1;
6324}
6325
6326
6327/**
6328 * The calling CPU should be accounted as online, update GIP accordingly.
6329 *
6330 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
6331 *
6332 * @param pDevExt The device extension.
6333 * @param idCpu The CPU ID.
6334 */
6335static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6336{
6337 int iCpuSet = 0;
6338 uint16_t idApic = UINT16_MAX;
6339 uint32_t i = 0;
6340 uint64_t u64NanoTS = 0;
6341 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6342
6343 AssertPtrReturnVoid(pGip);
6344 AssertRelease(idCpu == RTMpCpuId());
6345 Assert(pGip->cPossibleCpus == RTMpGetCount());
6346
6347 /*
6348 * Do this behind a spinlock with interrupts disabled as this can fire
6349 * on all CPUs simultaneously, see @bugref{6110}.
6350 */
6351 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6352
6353 /*
6354 * Update the globals.
6355 */
6356 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
6357 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
6358 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6359 if (iCpuSet >= 0)
6360 {
6361 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6362 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
6363 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
6364 }
6365
6366 /*
6367 * Update the entry.
6368 */
6369 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
6370 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
6371 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
6372 idApic = ASMGetApicId();
6373 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
6374 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
6375 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
6376
6377 /*
6378 * Update the APIC ID and CPU set index mappings.
6379 */
6380 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
6381 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
6382
6383 /* Update the Mp online/offline counter. */
6384 ASMAtomicIncU32(&g_cMpOnOffEvents);
6385
6386#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6387 /*
6388 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
6389 *
6390 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
6391 * update the state and it'll get serviced when the thread's listening interval times out.
6392 */
6393 if (supdrvIsInvariantTsc())
6394 {
6395 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
6396 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6397 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
6398 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
6399 {
6400 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
6401 }
6402 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6403 }
6404#endif
6405
6406 /* commit it */
6407 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
6408
6409 RTSpinlockRelease(pDevExt->hGipSpinlock);
6410}
6411
6412
6413/**
6414 * The CPU should be accounted as offline, update the GIP accordingly.
6415 *
6416 * This is used by supdrvGipMpEvent.
6417 *
6418 * @param pDevExt The device extension.
6419 * @param idCpu The CPU ID.
6420 */
6421static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6422{
6423 int iCpuSet;
6424 unsigned i;
6425
6426 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6427
6428 AssertPtrReturnVoid(pGip);
6429 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6430
6431 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6432 AssertReturnVoid(iCpuSet >= 0);
6433
6434 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
6435 AssertReturnVoid(i < pGip->cCpus);
6436 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
6437
6438 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6439 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
6440
6441 /* Update the Mp online/offline counter. */
6442 ASMAtomicIncU32(&g_cMpOnOffEvents);
6443
6444 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
6445 if (ASMAtomicReadU32(&g_idTscDeltaInitiator) == idCpu)
6446 {
6447 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6448 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
6449 }
6450
6451 /* Reset the TSC delta, we will recalculate it lazily. */
6452 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
6453
6454 /* commit it */
6455 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
6456
6457 RTSpinlockRelease(pDevExt->hGipSpinlock);
6458}
6459
6460
6461/**
6462 * Multiprocessor event notification callback.
6463 *
6464 * This is used to make sure that the GIP master gets passed on to
6465 * another CPU. It also updates the associated CPU data.
6466 *
6467 * @param enmEvent The event.
6468 * @param idCpu The cpu it applies to.
6469 * @param pvUser Pointer to the device extension.
6470 *
6471 * @remarks This function -must- fire on the newly online'd CPU for the
6472 * RTMPEVENT_ONLINE case and can fire on any CPU for the
6473 * RTMPEVENT_OFFLINE case.
6474 */
6475static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
6476{
6477 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6478 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6479
6480 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6481
6482 /*
6483 * Update the GIP CPU data.
6484 */
6485 if (pGip)
6486 {
6487 switch (enmEvent)
6488 {
6489 case RTMPEVENT_ONLINE:
6490 AssertRelease(idCpu == RTMpCpuId());
6491 supdrvGipMpEventOnline(pDevExt, idCpu);
6492 break;
6493 case RTMPEVENT_OFFLINE:
6494 supdrvGipMpEventOffline(pDevExt, idCpu);
6495 break;
6496 }
6497 }
6498
6499 /*
6500 * Make sure there is a master GIP.
6501 */
6502 if (enmEvent == RTMPEVENT_OFFLINE)
6503 {
6504 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
6505 if (idGipMaster == idCpu)
6506 {
6507 /*
6508 * Find a new GIP master.
6509 */
6510 bool fIgnored;
6511 unsigned i;
6512 int64_t iTSCDelta;
6513 uint32_t idxNewGipMaster;
6514 RTCPUID idNewGipMaster = NIL_RTCPUID;
6515 RTCPUSET OnlineCpus;
6516 RTMpGetOnlineSet(&OnlineCpus);
6517
6518 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
6519 {
6520 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
6521 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
6522 && idCurCpu != idGipMaster)
6523 {
6524 idNewGipMaster = idCurCpu;
6525 break;
6526 }
6527 }
6528
6529 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
6530 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
6531 NOREF(fIgnored);
6532
6533 /*
6534 * Adjust all the TSC deltas against the new GIP master.
6535 */
6536 if (pGip)
6537 {
6538 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
6539 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
6540 Assert(iTSCDelta != UINT64_MAX);
6541 for (i = 0; i < pGip->cCpus; i++)
6542 {
6543 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
6544 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
6545 if (iWorkerDelta != INT64_MAX)
6546 iWorkerDelta -= iTSCDelta;
6547 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
6548 }
6549 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
6550 }
6551 }
6552 }
6553}
6554
6555
6556/**
6557 * Returns whether the host CPU sports an invariant TSC or not.
6558 *
6559 * @returns true if invariant TSC is supported, false otherwise.
6560 */
6561static bool supdrvIsInvariantTsc(void)
6562{
6563 static bool s_fQueried = false;
6564 static bool s_fIsInvariantTsc = false;
6565 if (!s_fQueried)
6566 {
6567 uint32_t uEax, uEbx, uEcx, uEdx;
6568 ASMCpuId(0x80000000, &uEax, &uEbx, &uEcx, &uEdx);
6569 if (uEax >= 0x80000007)
6570 {
6571 ASMCpuId(0x80000007, &uEax, &uEbx, &uEcx, &uEdx);
6572 if (uEdx & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
6573 s_fIsInvariantTsc = true;
6574 }
6575 s_fQueried = true;
6576 }
6577
6578 return s_fIsInvariantTsc;
6579}
6580
6581
6582/**
6583 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
6584 * compute the delta between them.
6585 *
6586 * @param idCpu The CPU we are current scheduled on.
6587 * @param pvUser1 Opaque pointer to the GIP.
6588 * @param pvUser2 Opaque pointer to the worker Cpu Id.
6589 *
6590 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
6591 * read the TSC at exactly the same time on both the master and the worker
6592 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
6593 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
6594 * try to minimize the measurement error by computing the minimum read time
6595 * of the compare statement in the worker by taking TSC measurements across
6596 * it.
6597 *
6598 * We ignore the first few runs of the loop in order to prime the cache.
6599 * Also, be careful about using 'pause' instruction in critical busy-wait
6600 * loops in this code - it can cause undesired behaviour with
6601 * hyperthreading.
6602 *
6603 * It must be noted that the computed minimum read time is mostly to
6604 * eliminate huge deltas when the worker is too early and doesn't by itself
6605 * help produce more accurate deltas. We allow two times the computed
6606 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
6607 * possible to get negative deltas where there are none when the worker is
6608 * earlier. As long as these occasional negative deltas are lower than the
6609 * time it takes to exit guest-context and the OS to reschedule EMT on a
6610 * different CPU we won't expose a TSC that jumped backwards. It is because
6611 * of the existence of the negative deltas we don't recompute the delta with
6612 * the master and worker interchanged to eliminate the remaining measurement
6613 * error.
6614 */
6615static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6616{
6617 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
6618 uint32_t *pidWorker = (uint32_t *)pvUser2;
6619 RTCPUID idMaster = ASMAtomicUoReadU32(&g_idTscDeltaInitiator);
6620 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6621 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
6622 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
6623 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6624 int cTriesLeft = 12;
6625
6626 if ( idCpu != idMaster
6627 && idCpu != *pidWorker)
6628 return;
6629
6630 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
6631 with a timeout to avoid deadlocking the entire system. */
6632 if (!RTMpOnAllIsConcurrentSafe())
6633 {
6634 uint64_t uTscNow;
6635 uint64_t uTscStart;
6636 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
6637
6638 ASMSerializeInstruction();
6639 uTscStart = ASMReadTSC();
6640 if (idCpu == idMaster)
6641 {
6642 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
6643 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
6644 {
6645 ASMSerializeInstruction();
6646 uTscNow = ASMReadTSC();
6647 if (uTscNow - uTscStart > cWaitTicks)
6648 {
6649 /* Set the worker delta to indicate failure, not the master. */
6650 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6651 return;
6652 }
6653
6654 ASMNopPause();
6655 }
6656 }
6657 else
6658 {
6659 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
6660 {
6661 ASMSerializeInstruction();
6662 uTscNow = ASMReadTSC();
6663 if (uTscNow - uTscStart > cWaitTicks)
6664 {
6665 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6666 return;
6667 }
6668
6669 ASMNopPause();
6670 }
6671 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
6672 }
6673 }
6674
6675 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
6676 while (cTriesLeft-- > 0)
6677 {
6678 unsigned i;
6679 uint64_t uMinCmpReadTime = UINT64_MAX;
6680 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
6681 {
6682 RTCCUINTREG uFlags = ASMIntDisableFlags(); /* Disable interrupts per-iteration, see @bugref{6710} comment #38. */
6683 if (idCpu == idMaster)
6684 {
6685 /*
6686 * The master.
6687 */
6688 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6689 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6690 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
6691 ;
6692
6693 do
6694 {
6695 ASMSerializeInstruction();
6696 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
6697 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6698
6699 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
6700 ;
6701
6702 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6703 {
6704 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
6705 {
6706 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
6707 if (iDelta < pGipCpuWorker->i64TSCDelta)
6708 pGipCpuWorker->i64TSCDelta = iDelta;
6709 }
6710 }
6711
6712 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
6713 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6714 }
6715 else
6716 {
6717 /*
6718 * The worker.
6719 */
6720 uint64_t uTscWorker;
6721 uint64_t uTscWorkerFlushed;
6722 uint64_t uCmpReadTime;
6723
6724 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
6725 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
6726 ;
6727 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6728 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
6729
6730 /*
6731 * Keep reading the TSC until we notice that the master has read his. Reading
6732 * the TSC -after- the master has updated the memory is way too late. We thus
6733 * compensate by trying to measure how long it took for the worker to notice
6734 * the memory flushed from the master.
6735 */
6736 do
6737 {
6738 ASMSerializeInstruction();
6739 uTscWorker = ASMReadTSC();
6740 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6741 ASMSerializeInstruction();
6742 uTscWorkerFlushed = ASMReadTSC();
6743
6744 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
6745 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6746 {
6747 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
6748 if (uCmpReadTime < (uMinCmpReadTime << 1))
6749 {
6750 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
6751 if (uCmpReadTime < uMinCmpReadTime)
6752 uMinCmpReadTime = uCmpReadTime;
6753 }
6754 else
6755 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
6756 }
6757 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
6758 {
6759 if (uCmpReadTime < uMinCmpReadTime)
6760 uMinCmpReadTime = uCmpReadTime;
6761 }
6762
6763 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
6764 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
6765 ASMNopPause();
6766 }
6767
6768 ASMSetFlags(uFlags);
6769 }
6770
6771 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
6772 break;
6773 }
6774}
6775
6776
6777/**
6778 * Clears all TSCs on the per-CPUs GIP struct. as well as the delta
6779 * synchronization variable. Optionally also clears the deltas on the per-CPU
6780 * GIP struct. as well.
6781 *
6782 * @param pGip Pointer to the GIP.
6783 * @param fClearDeltas Whether the deltas are also to be cleared.
6784 */
6785DECLINLINE(void) supdrvClearTscSamples(PSUPGLOBALINFOPAGE pGip, bool fClearDeltas)
6786{
6787 unsigned iCpu;
6788 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6789 {
6790 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
6791 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
6792 if (fClearDeltas)
6793 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
6794 }
6795 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6796}
6797
6798
6799/**
6800 * Measures the TSC delta between the master GIP CPU and one specified worker
6801 * CPU.
6802 *
6803 * @returns VBox status code.
6804 * @param pDevExt Pointer to the device instance data.
6805 * @param idxWorker The index of the worker CPU from the GIP's array of
6806 * CPUs.
6807 *
6808 * @remarks This can be called with preemption disabled!
6809 */
6810static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
6811{
6812 int rc;
6813 PSUPGLOBALINFOPAGE pGip;
6814 PSUPGIPCPU pGipCpuWorker;
6815 RTCPUID idMaster;
6816
6817 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
6818 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
6819
6820 pGip = pDevExt->pGip;
6821 idMaster = pDevExt->idGipMaster;
6822 pGipCpuWorker = &pGip->aCPUs[idxWorker];
6823
6824 if (pGipCpuWorker->idCpu == idMaster)
6825 {
6826 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
6827 return VINF_SUCCESS;
6828 }
6829
6830 /* Set the master TSC as the initiator. */
6831 while (ASMAtomicCmpXchgU32(&g_idTscDeltaInitiator, idMaster, NIL_RTCPUID) == false)
6832 {
6833 /*
6834 * Sleep here rather than spin as there is a parallel measurement
6835 * being executed and that can take a good while to be done.
6836 */
6837 RTThreadSleep(1);
6838 }
6839
6840 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
6841 {
6842 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
6843 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6844 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6845 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pGip, &pGipCpuWorker->idCpu);
6846 if (RT_SUCCESS(rc))
6847 {
6848 if (RT_UNLIKELY(pGipCpuWorker->i64TSCDelta == INT64_MAX))
6849 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
6850 }
6851 }
6852 else
6853 rc = VERR_CPU_OFFLINE;
6854
6855 ASMAtomicWriteU32(&g_idTscDeltaInitiator, NIL_RTCPUID);
6856 return rc;
6857}
6858
6859
6860/**
6861 * Measures the TSC deltas between CPUs.
6862 *
6863 * @param pDevExt Pointer to the device instance data.
6864 * @param pidxMaster Where to store the index of the chosen master TSC if we
6865 * managed to determine the TSC deltas successfully.
6866 * Optional, can be NULL.
6867 *
6868 * @returns VBox status code.
6869 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
6870 * idCpu, GIP's online CPU set which are populated in
6871 * supdrvGipInitOnCpu().
6872 */
6873static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
6874{
6875 PSUPGIPCPU pGipCpuMaster;
6876 unsigned iCpu;
6877 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6878 uint32_t idxMaster = UINT32_MAX;
6879 int rc = VINF_SUCCESS;
6880 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&g_cMpOnOffEvents);
6881 uint32_t cOnlineCpus = pGip->cOnlineCpus;
6882
6883 /*
6884 * If we determined the TSC is async., don't bother with measuring deltas.
6885 */
6886 if (RT_UNLIKELY(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC))
6887 return VINF_SUCCESS;
6888
6889 /*
6890 * Pick the first CPU online as the master TSC and make it the new GIP master based
6891 * on the APIC ID.
6892 *
6893 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
6894 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
6895 * master as this point since the sync/async timer isn't created yet.
6896 */
6897 supdrvClearTscSamples(pGip, true /* fClearDeltas */);
6898 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
6899 {
6900 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
6901 if (idxCpu != UINT16_MAX)
6902 {
6903 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
6904 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
6905 {
6906 idxMaster = idxCpu;
6907 pGipCpu->i64TSCDelta = 0;
6908 break;
6909 }
6910 }
6911 }
6912 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
6913 pGipCpuMaster = &pGip->aCPUs[idxMaster];
6914 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
6915
6916 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
6917 if (pGip->cOnlineCpus <= 1)
6918 {
6919 if (pidxMaster)
6920 *pidxMaster = idxMaster;
6921 return VINF_SUCCESS;
6922 }
6923
6924 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6925 {
6926 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
6927 if ( iCpu != idxMaster
6928 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
6929 {
6930 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
6931 if (RT_FAILURE(rc))
6932 {
6933 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
6934 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
6935 break;
6936 }
6937
6938 if (ASMAtomicReadU32(&g_cMpOnOffEvents) != cMpOnOffEvents)
6939 {
6940 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
6941 rc = VERR_TRY_AGAIN;
6942 break;
6943 }
6944 }
6945 }
6946
6947 if ( RT_SUCCESS(rc)
6948 && !pGipCpuMaster->i64TSCDelta
6949 && pidxMaster)
6950 {
6951 *pidxMaster = idxMaster;
6952 }
6953 return rc;
6954}
6955
6956
6957/**
6958 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
6959 *
6960 * @param idCpu Ignored.
6961 * @param pvUser1 Where to put the TSC.
6962 * @param pvUser2 Ignored.
6963 */
6964static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6965{
6966 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
6967}
6968
6969
6970/**
6971 * Determine if Async GIP mode is required because of TSC drift.
6972 *
6973 * When using the default/normal timer code it is essential that the time stamp counter
6974 * (TSC) runs never backwards, that is, a read operation to the counter should return
6975 * a bigger value than any previous read operation. This is guaranteed by the latest
6976 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
6977 * case we have to choose the asynchronous timer mode.
6978 *
6979 * @param poffMin Pointer to the determined difference between different cores.
6980 * @return false if the time stamp counters appear to be synchronized, true otherwise.
6981 */
6982static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
6983{
6984 /*
6985 * Just iterate all the cpus 8 times and make sure that the TSC is
6986 * ever increasing. We don't bother taking TSC rollover into account.
6987 */
6988 int iEndCpu = RTMpGetArraySize();
6989 int iCpu;
6990 int cLoops = 8;
6991 bool fAsync = false;
6992 int rc = VINF_SUCCESS;
6993 uint64_t offMax = 0;
6994 uint64_t offMin = ~(uint64_t)0;
6995 uint64_t PrevTsc = ASMReadTSC();
6996
6997 while (cLoops-- > 0)
6998 {
6999 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7000 {
7001 uint64_t CurTsc;
7002 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7003 if (RT_SUCCESS(rc))
7004 {
7005 if (CurTsc <= PrevTsc)
7006 {
7007 fAsync = true;
7008 offMin = offMax = PrevTsc - CurTsc;
7009 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7010 iCpu, cLoops, CurTsc, PrevTsc));
7011 break;
7012 }
7013
7014 /* Gather statistics (except the first time). */
7015 if (iCpu != 0 || cLoops != 7)
7016 {
7017 uint64_t off = CurTsc - PrevTsc;
7018 if (off < offMin)
7019 offMin = off;
7020 if (off > offMax)
7021 offMax = off;
7022 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7023 }
7024
7025 /* Next */
7026 PrevTsc = CurTsc;
7027 }
7028 else if (rc == VERR_NOT_SUPPORTED)
7029 break;
7030 else
7031 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7032 }
7033
7034 /* broke out of the loop. */
7035 if (iCpu < iEndCpu)
7036 break;
7037 }
7038
7039 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7040 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7041 fAsync, iEndCpu, rc, offMin, offMax));
7042#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7043 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7044#endif
7045 return fAsync;
7046}
7047
7048
7049/**
7050 * Determine the GIP TSC mode.
7051 *
7052 * @returns The most suitable TSC mode.
7053 * @param pDevExt Pointer to the device instance data.
7054 */
7055static SUPGIPMODE supdrvGipDeterminTscMode(PSUPDRVDEVEXT pDevExt)
7056{
7057#if 0
7058 if (supdrvIsInvariantTsc())
7059 return SUPGIPMODE_SYNC_TSC; /** @todo Switch to SUPGIPMODE_INVARIANT_TSC later. */
7060#endif
7061
7062 /*
7063 * On SMP we're faced with two problems:
7064 * (1) There might be a skew between the CPU, so that cpu0
7065 * returns a TSC that is slightly different from cpu1.
7066 * (2) Power management (and other things) may cause the TSC
7067 * to run at a non-constant speed, and cause the speed
7068 * to be different on the cpus. This will result in (1).
7069 *
7070 * So, on SMP systems we'll have to select the ASYNC update method
7071 * if there are symptoms of these problems.
7072 */
7073 if (RTMpGetCount() > 1)
7074 {
7075 uint32_t uEAX, uEBX, uECX, uEDX;
7076 uint64_t u64DiffCoresIgnored;
7077
7078 /* Permit the user and/or the OS specific bits to force async mode. */
7079 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7080 return SUPGIPMODE_ASYNC_TSC;
7081
7082 /* Try check for current differences between the cpus. */
7083 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7084 return SUPGIPMODE_ASYNC_TSC;
7085
7086 /*
7087 * If the CPU supports power management and is an AMD one we
7088 * won't trust it unless it has the TscInvariant bit is set.
7089 */
7090 /* Check for "AuthenticAMD" */
7091 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7092 if ( uEAX >= 1
7093 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7094 {
7095 /* Check for APM support and that TscInvariant is cleared. */
7096 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
7097 if (uEAX >= 0x80000007)
7098 {
7099 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
7100 if ( !(uEDX & RT_BIT(8))/* TscInvariant */
7101 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7102 return SUPGIPMODE_ASYNC_TSC;
7103 }
7104 }
7105 }
7106 return SUPGIPMODE_SYNC_TSC;
7107}
7108
7109
7110/**
7111 * Initializes per-CPU GIP information.
7112 *
7113 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7114 * @param pCpu Pointer to which GIP CPU to initalize.
7115 * @param u64NanoTS The current nanosecond timestamp.
7116 */
7117static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7118{
7119 pCpu->u32TransactionId = 2;
7120 pCpu->u64NanoTS = u64NanoTS;
7121 pCpu->u64TSC = ASMReadTSC();
7122 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7123 pCpu->i64TSCDelta = INT64_MAX;
7124
7125 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7126 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7127 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7128 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7129
7130 /*
7131 * We don't know the following values until we've executed updates.
7132 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7133 * the 2nd timer callout.
7134 */
7135 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7136 pCpu->u32UpdateIntervalTSC
7137 = pCpu->au32TSCHistory[0]
7138 = pCpu->au32TSCHistory[1]
7139 = pCpu->au32TSCHistory[2]
7140 = pCpu->au32TSCHistory[3]
7141 = pCpu->au32TSCHistory[4]
7142 = pCpu->au32TSCHistory[5]
7143 = pCpu->au32TSCHistory[6]
7144 = pCpu->au32TSCHistory[7]
7145 = (uint32_t)(_4G / pGip->u32UpdateHz);
7146}
7147
7148
7149/**
7150 * Initializes the GIP data.
7151 *
7152 * @param pDevExt Pointer to the device instance data.
7153 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7154 * @param HCPhys The physical address of the GIP.
7155 * @param u64NanoTS The current nanosecond timestamp.
7156 * @param uUpdateHz The update frequency.
7157 * @param uUpdateIntervalNS The update interval in nanoseconds.
7158 * @param cCpus The CPU count.
7159 */
7160static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7161 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7162{
7163 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7164 unsigned i;
7165#ifdef DEBUG_DARWIN_GIP
7166 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7167#else
7168 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7169#endif
7170
7171 /*
7172 * Initialize the structure.
7173 */
7174 memset(pGip, 0, cbGip);
7175 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7176 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7177 pGip->u32Mode = supdrvGipDeterminTscMode(pDevExt);
7178 pGip->cCpus = (uint16_t)cCpus;
7179 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7180 pGip->u32UpdateHz = uUpdateHz;
7181 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7182 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7183 RTCpuSetEmpty(&pGip->PresentCpuSet);
7184 RTMpGetSet(&pGip->PossibleCpuSet);
7185 pGip->cOnlineCpus = RTMpGetOnlineCount();
7186 pGip->cPresentCpus = RTMpGetPresentCount();
7187 pGip->cPossibleCpus = RTMpGetCount();
7188 pGip->idCpuMax = RTMpGetMaxCpuId();
7189 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7190 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7191 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7192 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7193
7194 for (i = 0; i < cCpus; i++)
7195 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
7196
7197 /*
7198 * Link it to the device extension.
7199 */
7200 pDevExt->pGip = pGip;
7201 pDevExt->HCPhysGip = HCPhys;
7202 pDevExt->cGipUsers = 0;
7203
7204 /*
7205 * Allocate the TSC delta sync. struct. on a separate cache line.
7206 */
7207 g_pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
7208 g_pTscDeltaSync = RT_ALIGN_PT(g_pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
7209 Assert(RT_ALIGN_PT(g_pTscDeltaSync, 64, PSUPTSCDELTASYNC) == g_pTscDeltaSync);
7210}
7211
7212
7213/**
7214 * On CPU initialization callback for RTMpOnAll.
7215 *
7216 * @param idCpu The CPU ID.
7217 * @param pvUser1 The device extension.
7218 * @param pvUser2 The GIP.
7219 */
7220static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7221{
7222 /* This is good enough, even though it will update some of the globals a
7223 bit to much. */
7224 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7225}
7226
7227
7228/**
7229 * Invalidates the GIP data upon termination.
7230 *
7231 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7232 */
7233static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7234{
7235 unsigned i;
7236 pGip->u32Magic = 0;
7237 for (i = 0; i < pGip->cCpus; i++)
7238 {
7239 pGip->aCPUs[i].u64NanoTS = 0;
7240 pGip->aCPUs[i].u64TSC = 0;
7241 pGip->aCPUs[i].iTSCHistoryHead = 0;
7242 pGip->aCPUs[i].u64TSCSample = 0;
7243 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7244 }
7245
7246 if (g_pvTscDeltaSync)
7247 {
7248 RTMemFree(g_pvTscDeltaSync);
7249 g_pTscDeltaSync = NULL;
7250 g_pvTscDeltaSync = NULL;
7251 }
7252}
7253
7254
7255/**
7256 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
7257 * updates all the per cpu data except the transaction id.
7258 *
7259 * @param pDevExt The device extension.
7260 * @param pGipCpu Pointer to the per cpu data.
7261 * @param u64NanoTS The current time stamp.
7262 * @param u64TSC The current TSC.
7263 * @param iTick The current timer tick.
7264 */
7265static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7266{
7267 uint64_t u64TSCDelta;
7268 uint32_t u32UpdateIntervalTSC;
7269 uint32_t u32UpdateIntervalTSCSlack;
7270 unsigned iTSCHistoryHead;
7271 uint64_t u64CpuHz;
7272 uint32_t u32TransactionId;
7273
7274 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7275 AssertPtrReturnVoid(pGip);
7276
7277 /* Delta between this and the previous update. */
7278 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7279
7280 /*
7281 * Update the NanoTS.
7282 */
7283 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
7284
7285 /*
7286 * Calc TSC delta.
7287 */
7288 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
7289 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
7290 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
7291
7292 if (u64TSCDelta >> 32)
7293 {
7294 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
7295 pGipCpu->cErrors++;
7296 }
7297
7298 /*
7299 * On the 2nd and 3rd callout, reset the history with the current TSC
7300 * interval since the values entered by supdrvGipInit are totally off.
7301 * The interval on the 1st callout completely unreliable, the 2nd is a bit
7302 * better, while the 3rd should be most reliable.
7303 */
7304 u32TransactionId = pGipCpu->u32TransactionId;
7305 if (RT_UNLIKELY( ( u32TransactionId == 5
7306 || u32TransactionId == 7)
7307 && ( iTick == 2
7308 || iTick == 3) ))
7309 {
7310 unsigned i;
7311 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
7312 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
7313 }
7314
7315 /*
7316 * TSC History.
7317 */
7318 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
7319 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
7320 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
7321 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
7322
7323 /*
7324 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
7325 *
7326 * On Windows, we have an occasional (but recurring) sour value that messed up
7327 * the history but taking only 1 interval reduces the precision overall.
7328 * However, this problem existed before the invariant mode was introduced.
7329 */
7330 if ( supdrvIsInvariantTsc()
7331 || pGip->u32UpdateHz >= 1000)
7332 {
7333 uint32_t u32;
7334 u32 = pGipCpu->au32TSCHistory[0];
7335 u32 += pGipCpu->au32TSCHistory[1];
7336 u32 += pGipCpu->au32TSCHistory[2];
7337 u32 += pGipCpu->au32TSCHistory[3];
7338 u32 >>= 2;
7339 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
7340 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
7341 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
7342 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
7343 u32UpdateIntervalTSC >>= 2;
7344 u32UpdateIntervalTSC += u32;
7345 u32UpdateIntervalTSC >>= 1;
7346
7347 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11, . */
7348 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
7349 }
7350 else if (pGip->u32UpdateHz >= 90)
7351 {
7352 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7353 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
7354 u32UpdateIntervalTSC >>= 1;
7355
7356 /* value chosen on a 2GHz thinkpad running windows */
7357 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
7358 }
7359 else
7360 {
7361 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7362
7363 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
7364 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
7365 }
7366 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
7367
7368 /*
7369 * CpuHz.
7370 */
7371 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC_64);
7372 u64CpuHz /= pGip->u32UpdateIntervalNS;
7373 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
7374}
7375
7376
7377/**
7378 * Updates the GIP.
7379 *
7380 * @param pDevExt The device extension.
7381 * @param u64NanoTS The current nanosecond timesamp.
7382 * @param u64TSC The current TSC timesamp.
7383 * @param idCpu The CPU ID.
7384 * @param iTick The current timer tick.
7385 */
7386static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
7387{
7388 /*
7389 * Determine the relevant CPU data.
7390 */
7391 PSUPGIPCPU pGipCpu;
7392 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7393 AssertPtrReturnVoid(pGip);
7394
7395 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7396 pGipCpu = &pGip->aCPUs[0];
7397 else
7398 {
7399 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
7400 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
7401 return;
7402 pGipCpu = &pGip->aCPUs[iCpu];
7403 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
7404 return;
7405 }
7406
7407 /*
7408 * Start update transaction.
7409 */
7410 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7411 {
7412 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
7413 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7414 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7415 pGipCpu->cErrors++;
7416 return;
7417 }
7418
7419 /*
7420 * Recalc the update frequency every 0x800th time.
7421 */
7422 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
7423 {
7424 if (pGip->u64NanoTSLastUpdateHz)
7425 {
7426#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
7427 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
7428 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
7429 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
7430 {
7431 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
7432 * calculation on non-invariant hosts if it changes the history decision
7433 * taken in supdrvGipDoUpdateCpu(). */
7434 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
7435 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, u64Delta / GIP_UPDATEHZ_RECALC_FREQ);
7436 }
7437#endif
7438 }
7439 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS + 1);
7440 }
7441
7442 /*
7443 * Update the data.
7444 */
7445 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7446
7447 /*
7448 * Complete transaction.
7449 */
7450 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7451}
7452
7453
7454/**
7455 * Updates the per cpu GIP data for the calling cpu.
7456 *
7457 * @param pDevExt The device extension.
7458 * @param u64NanoTS The current nanosecond timesamp.
7459 * @param u64TSC The current TSC timesamp.
7460 * @param idCpu The CPU ID.
7461 * @param idApic The APIC id for the CPU index.
7462 * @param iTick The current timer tick.
7463 */
7464static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
7465 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
7466{
7467 uint32_t iCpu;
7468 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7469
7470 /*
7471 * Avoid a potential race when a CPU online notification doesn't fire on
7472 * the onlined CPU but the tick creeps in before the event notification is
7473 * run.
7474 */
7475 if (RT_UNLIKELY(iTick == 1))
7476 {
7477 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7478 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
7479 supdrvGipMpEventOnline(pDevExt, idCpu);
7480 }
7481
7482 iCpu = pGip->aiCpuFromApicId[idApic];
7483 if (RT_LIKELY(iCpu < pGip->cCpus))
7484 {
7485 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7486 if (pGipCpu->idCpu == idCpu)
7487 {
7488 /*
7489 * Start update transaction.
7490 */
7491 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7492 {
7493 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7494 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7495 pGipCpu->cErrors++;
7496 return;
7497 }
7498
7499 /*
7500 * Update the data.
7501 */
7502 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7503
7504 /*
7505 * Complete transaction.
7506 */
7507 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7508 }
7509 }
7510}
7511
7512
7513/**
7514 * Resume built-in keyboard on MacBook Air and Pro hosts.
7515 * If there is no built-in keyboard device, return success anyway.
7516 *
7517 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
7518 */
7519static int supdrvIOCtl_ResumeSuspendedKbds(void)
7520{
7521#if defined(RT_OS_DARWIN)
7522 return supdrvDarwinResumeSuspendedKbds();
7523#else
7524 return VERR_NOT_IMPLEMENTED;
7525#endif
7526}
7527
7528
7529/**
7530 * Service a TSC-delta measurement request.
7531 *
7532 * @returns VBox status code.
7533 * @param pDevExt Pointer to the device instance data.
7534 * @param pReq Pointer to the TSC-delta measurement request.
7535 */
7536static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq)
7537{
7538 PSUPGLOBALINFOPAGE pGip;
7539 RTCPUID idCpuWorker;
7540 int rc = VERR_CPU_NOT_FOUND;
7541 int16_t cTries;
7542 RTMSINTERVAL cMsWaitRetry;
7543 uint16_t iCpu;
7544
7545 /*
7546 * Validate.
7547 */
7548 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7549 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7550 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7551 idCpuWorker = pReq->u.In.idCpu;
7552 if (idCpuWorker == NIL_RTCPUID)
7553 return VERR_INVALID_CPU_ID;
7554
7555 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
7556 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
7557 pGip = pDevExt->pGip;
7558 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7559 {
7560 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7561 if (pGipCpuWorker->idCpu == idCpuWorker)
7562 {
7563 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
7564 && !pReq->u.In.fForce)
7565 return VINF_SUCCESS;
7566
7567#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7568 if (pReq->u.In.fAsync)
7569 {
7570 /** @todo Async. doesn't implement options like retries, waiting. We'll need
7571 * to pass those options to the thread somehow and implement it in the
7572 * thread. Check if anyone uses/needs fAsync before implementing this. */
7573 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
7574 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7575 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
7576 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
7577 {
7578 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
7579 }
7580 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7581 RTThreadUserSignal(pDevExt->hTscDeltaThread);
7582 return VINF_SUCCESS;
7583 }
7584#endif
7585
7586 while (cTries--)
7587 {
7588 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7589 if (RT_SUCCESS(rc))
7590 {
7591 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
7592 break;
7593 }
7594
7595 if (cMsWaitRetry)
7596 RTThreadSleep(cMsWaitRetry);
7597 }
7598
7599 break;
7600 }
7601 }
7602 return rc;
7603}
7604
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette