VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 53396

Last change on this file since 53396 was 53396, checked in by vboxsync, 10 years ago

HostDrivers/Support: Don't try measuring TSC deltas on OSes that normalizes TSC-deltas themselves, currently only Windows.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 288.9 KB
Line 
1/* $Id: SUPDrv.c 53396 2014-11-25 15:01:59Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/x86.h>
58
59#include <VBox/param.h>
60#include <VBox/log.h>
61#include <VBox/err.h>
62#include <VBox/vmm/hm_svm.h>
63#include <VBox/vmm/hm_vmx.h>
64
65#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
66# include "dtrace/SUPDrv.h"
67#else
68# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
69# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
70# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
71# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
72#endif
73
74/*
75 * Logging assignments:
76 * Log - useful stuff, like failures.
77 * LogFlow - program flow, except the really noisy bits.
78 * Log2 - Cleanup.
79 * Log3 - Loader flow noise.
80 * Log4 - Call VMMR0 flow noise.
81 * Log5 - Native yet-to-be-defined noise.
82 * Log6 - Native ioctl flow noise.
83 *
84 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
85 * instantiation in log-vbox.c(pp).
86 */
87
88
89/*******************************************************************************
90* Defined Constants And Macros *
91*******************************************************************************/
92/** The frequency by which we recalculate the u32UpdateHz and
93 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
94 *
95 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
96 */
97#define GIP_UPDATEHZ_RECALC_FREQ 0x800
98
99/** A reserved TSC value used for synchronization as well as measurement of
100 * TSC deltas. */
101#define GIP_TSC_DELTA_RSVD UINT64_MAX
102/** The number of TSC delta measurement loops in total (includes primer and
103 * read-time loops). */
104#define GIP_TSC_DELTA_LOOPS 96
105/** The number of cache primer loops. */
106#define GIP_TSC_DELTA_PRIMER_LOOPS 4
107/** The number of loops until we keep computing the minumum read time. */
108#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
109/** Stop measurement of TSC delta. */
110#define GIP_TSC_DELTA_SYNC_STOP 0
111/** Start measurement of TSC delta. */
112#define GIP_TSC_DELTA_SYNC_START 1
113/** Worker thread is ready for reading the TSC. */
114#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
115/** Worker thread is done updating TSC delta info. */
116#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
117/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
118 * with a timeout. */
119#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
120/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
121 * master with a timeout. */
122#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
123
124AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
125AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
126
127/** @def VBOX_SVN_REV
128 * The makefile should define this if it can. */
129#ifndef VBOX_SVN_REV
130# define VBOX_SVN_REV 0
131#endif
132
133#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
134# define DO_NOT_START_GIP
135#endif
136
137
138/*******************************************************************************
139* Internal Functions *
140*******************************************************************************/
141static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
142static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
143static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
144static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
145static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
146static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
147static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
148static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
149static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
150static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
151static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
152static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
153static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
154DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
155DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
156static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
157static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
158static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
159static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq);
160static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq);
161static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
162static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
163static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
164static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
165static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
166static bool supdrvIsInvariantTsc(void);
167static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
168 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
169static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
170static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
171static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
172static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
173 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
174static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
175static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
176static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
177static int supdrvIOCtl_ResumeSuspendedKbds(void);
178
179
180/*******************************************************************************
181* Global Variables *
182*******************************************************************************/
183DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
184
185/**
186 * The TSC delta synchronization struct. rounded to cache line size.
187 */
188typedef union SUPTSCDELTASYNC
189{
190 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
191 volatile uint32_t u;
192 /** Padding to cache line size. */
193 uint8_t u8Padding[64];
194} SUPTSCDELTASYNC;
195AssertCompileSize(SUPTSCDELTASYNC, 64);
196typedef SUPTSCDELTASYNC *PSUPTSCDELTASYNC;
197
198/** Pointer to the TSC delta sync. struct. */
199static void *g_pvTscDeltaSync;
200/** Aligned pointer to the TSC delta sync. struct. */
201static PSUPTSCDELTASYNC g_pTscDeltaSync;
202/** The TSC delta measurement initiator Cpu Id. */
203static volatile RTCPUID g_idTscDeltaInitiator = NIL_RTCPUID;
204/** Number of online/offline events, incremented each time a CPU goes online
205 * or offline. */
206static volatile uint32_t g_cMpOnOffEvents;
207/** TSC reading during start of TSC frequency refinement phase. */
208static uint64_t g_u64TSCAnchor;
209/** Timestamp (in nanosec) during start of TSC frequency refinement phase. */
210static uint64_t g_u64NanoTSAnchor;
211/** Whether the host OS has already normalized the hardware TSC deltas across
212 * CPUs. */
213static bool g_fOsTscDeltasInSync;
214
215/**
216 * Array of the R0 SUP API.
217 */
218static SUPFUNC g_aFunctions[] =
219{
220/* SED: START */
221 /* name function */
222 /* Entries with absolute addresses determined at runtime, fixup
223 code makes ugly ASSUMPTIONS about the order here: */
224 { "SUPR0AbsIs64bit", (void *)0 },
225 { "SUPR0Abs64bitKernelCS", (void *)0 },
226 { "SUPR0Abs64bitKernelSS", (void *)0 },
227 { "SUPR0Abs64bitKernelDS", (void *)0 },
228 { "SUPR0AbsKernelCS", (void *)0 },
229 { "SUPR0AbsKernelSS", (void *)0 },
230 { "SUPR0AbsKernelDS", (void *)0 },
231 { "SUPR0AbsKernelES", (void *)0 },
232 { "SUPR0AbsKernelFS", (void *)0 },
233 { "SUPR0AbsKernelGS", (void *)0 },
234 /* Normal function pointers: */
235 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
236 { "SUPGetGIP", (void *)SUPGetGIP },
237 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
238 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
239 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
240 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
241 { "SUPR0ContFree", (void *)SUPR0ContFree },
242 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
243 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
244 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
245 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
246 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
247 { "SUPR0LockMem", (void *)SUPR0LockMem },
248 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
249 { "SUPR0LowFree", (void *)SUPR0LowFree },
250 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
251 { "SUPR0MemFree", (void *)SUPR0MemFree },
252 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
253 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
254 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
255 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
256 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
257 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
258 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
259 { "SUPR0PageFree", (void *)SUPR0PageFree },
260 { "SUPR0Printf", (void *)SUPR0Printf },
261 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
262 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
263 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
264 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
265 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
266 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
267 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
268 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
269 { "SUPSemEventClose", (void *)SUPSemEventClose },
270 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
271 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
272 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
273 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
274 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
275 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
276 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
277 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
278 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
279 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
280 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
281 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
282 { "SUPSemEventWait", (void *)SUPSemEventWait },
283 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
284 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
285 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
286
287 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
288 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
289 { "RTAssertMsg1", (void *)RTAssertMsg1 },
290 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
291 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
292 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
293 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
294 { "RTCrc32", (void *)RTCrc32 },
295 { "RTCrc32Finish", (void *)RTCrc32Finish },
296 { "RTCrc32Process", (void *)RTCrc32Process },
297 { "RTCrc32Start", (void *)RTCrc32Start },
298 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
299 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
300 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
301 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
302 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
303 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
304 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
305 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
306 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
307 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
308 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
309 { "RTLogPrintfV", (void *)RTLogPrintfV },
310 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
311 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
312 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
313 { "RTMemAllocTag", (void *)RTMemAllocTag },
314 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
315 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
316 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
317 { "RTMemDupExTag", (void *)RTMemDupExTag },
318 { "RTMemDupTag", (void *)RTMemDupTag },
319 { "RTMemFree", (void *)RTMemFree },
320 { "RTMemFreeEx", (void *)RTMemFreeEx },
321 { "RTMemReallocTag", (void *)RTMemReallocTag },
322 { "RTMpCpuId", (void *)RTMpCpuId },
323 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
324 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
325 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
326 { "RTMpGetCount", (void *)RTMpGetCount },
327 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
328 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
329 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
330 { "RTMpGetSet", (void *)RTMpGetSet },
331 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
332 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
333 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
334 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
335 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
336 { "RTMpOnAll", (void *)RTMpOnAll },
337 { "RTMpOnOthers", (void *)RTMpOnOthers },
338 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
339 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
340 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
341 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
342 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
343 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
344 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
345 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
346 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
347 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
348 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
349 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
350 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
351 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
352 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
353 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
354 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
355 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
356 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
357 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
358 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
359 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
360 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
361 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
362 { "RTProcSelf", (void *)RTProcSelf },
363 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
364 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
365 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
366 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
367 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
368 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
369 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
370 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
371 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
372 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
373 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
374 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
375 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
376 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
377 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
378 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
379 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
380 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
381 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
382 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
383 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
384 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
385 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
386 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
387 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
388 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
389 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
390 { "RTSemEventCreate", (void *)RTSemEventCreate },
391 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
392 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
393 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
394 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
395 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
396 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
397 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
398 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
399 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
400 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
401 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
402 { "RTSemEventSignal", (void *)RTSemEventSignal },
403 { "RTSemEventWait", (void *)RTSemEventWait },
404 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
405 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
406 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
407 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
408 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
409 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
410 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
411 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
412 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
413 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
414 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
415 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
416 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
417 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
418 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
419 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
420 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
421 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
422 { "RTStrCopy", (void *)RTStrCopy },
423 { "RTStrDupTag", (void *)RTStrDupTag },
424 { "RTStrFormat", (void *)RTStrFormat },
425 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
426 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
427 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
428 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
429 { "RTStrFormatV", (void *)RTStrFormatV },
430 { "RTStrFree", (void *)RTStrFree },
431 { "RTStrNCmp", (void *)RTStrNCmp },
432 { "RTStrPrintf", (void *)RTStrPrintf },
433 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
434 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
435 { "RTStrPrintfV", (void *)RTStrPrintfV },
436 { "RTThreadCreate", (void *)RTThreadCreate },
437 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
438 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
439 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
440 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
441 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
442 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
443 { "RTThreadGetName", (void *)RTThreadGetName },
444 { "RTThreadGetNative", (void *)RTThreadGetNative },
445 { "RTThreadGetType", (void *)RTThreadGetType },
446 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
447 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
448 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
449 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
450 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
451 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
452 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
453 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
454 { "RTThreadSelf", (void *)RTThreadSelf },
455 { "RTThreadSelfName", (void *)RTThreadSelfName },
456 { "RTThreadSleep", (void *)RTThreadSleep },
457 { "RTThreadUserReset", (void *)RTThreadUserReset },
458 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
459 { "RTThreadUserWait", (void *)RTThreadUserWait },
460 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
461 { "RTThreadWait", (void *)RTThreadWait },
462 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
463 { "RTThreadYield", (void *)RTThreadYield },
464 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
465 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
466 { "RTTimeNow", (void *)RTTimeNow },
467 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
468 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
469 { "RTTimerCreate", (void *)RTTimerCreate },
470 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
471 { "RTTimerDestroy", (void *)RTTimerDestroy },
472 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
473 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
474 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
475 { "RTTimerStart", (void *)RTTimerStart },
476 { "RTTimerStop", (void *)RTTimerStop },
477 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
478 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
479 { "RTUuidCompare", (void *)RTUuidCompare },
480 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
481 { "RTUuidFromStr", (void *)RTUuidFromStr },
482/* SED: END */
483};
484
485#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
486/**
487 * Drag in the rest of IRPT since we share it with the
488 * rest of the kernel modules on darwin.
489 */
490PFNRT g_apfnVBoxDrvIPRTDeps[] =
491{
492 /* VBoxNetAdp */
493 (PFNRT)RTRandBytes,
494 /* VBoxUSB */
495 (PFNRT)RTPathStripFilename,
496 NULL
497};
498#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
499
500
501/**
502 * Initializes the device extentsion structure.
503 *
504 * @returns IPRT status code.
505 * @param pDevExt The device extension to initialize.
506 * @param cbSession The size of the session structure. The size of
507 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
508 * defined because we're skipping the OS specific members
509 * then.
510 */
511int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
512{
513 int rc;
514
515#ifdef SUPDRV_WITH_RELEASE_LOGGER
516 /*
517 * Create the release log.
518 */
519 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
520 PRTLOGGER pRelLogger;
521 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
522 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
523 if (RT_SUCCESS(rc))
524 RTLogRelSetDefaultInstance(pRelLogger);
525 /** @todo Add native hook for getting logger config parameters and setting
526 * them. On linux we should use the module parameter stuff... */
527#endif
528
529 /*
530 * Initialize it.
531 */
532 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
533 pDevExt->Spinlock = NIL_RTSPINLOCK;
534 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
535 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
536 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
537 if (RT_SUCCESS(rc))
538 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
539 if (RT_SUCCESS(rc))
540 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
541
542 if (RT_SUCCESS(rc))
543#ifdef SUPDRV_USE_MUTEX_FOR_LDR
544 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
545#else
546 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
547#endif
548 if (RT_SUCCESS(rc))
549 {
550 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
551 if (RT_SUCCESS(rc))
552 {
553#ifdef SUPDRV_USE_MUTEX_FOR_LDR
554 rc = RTSemMutexCreate(&pDevExt->mtxGip);
555#else
556 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
557#endif
558 if (RT_SUCCESS(rc))
559 {
560 rc = supdrvGipCreate(pDevExt);
561 if (RT_SUCCESS(rc))
562 {
563 rc = supdrvTracerInit(pDevExt);
564 if (RT_SUCCESS(rc))
565 {
566 pDevExt->pLdrInitImage = NULL;
567 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
568 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
569 pDevExt->cbSession = (uint32_t)cbSession;
570
571 /*
572 * Fixup the absolute symbols.
573 *
574 * Because of the table indexing assumptions we'll have a little #ifdef orgy
575 * here rather than distributing this to OS specific files. At least for now.
576 */
577#ifdef RT_OS_DARWIN
578# if ARCH_BITS == 32
579 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
580 {
581 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
582 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
583 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
584 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
585 }
586 else
587 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
588 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
589 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
590 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
591 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
592 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
593 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
594# else /* 64-bit darwin: */
595 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
596 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
597 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
598 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
599 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
600 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
601 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
602 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
603 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
604 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
605
606# endif
607#else /* !RT_OS_DARWIN */
608# if ARCH_BITS == 64
609 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
610 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
611 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
612 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
613# else
614 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
615# endif
616 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
617 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
618 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
619 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
620 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
621 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
622#endif /* !RT_OS_DARWIN */
623 return VINF_SUCCESS;
624 }
625
626 supdrvGipDestroy(pDevExt);
627 }
628
629#ifdef SUPDRV_USE_MUTEX_FOR_GIP
630 RTSemMutexDestroy(pDevExt->mtxGip);
631 pDevExt->mtxGip = NIL_RTSEMMUTEX;
632#else
633 RTSemFastMutexDestroy(pDevExt->mtxGip);
634 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
635#endif
636 }
637 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
638 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
639 }
640#ifdef SUPDRV_USE_MUTEX_FOR_LDR
641 RTSemMutexDestroy(pDevExt->mtxLdr);
642 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
643#else
644 RTSemFastMutexDestroy(pDevExt->mtxLdr);
645 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
646#endif
647 }
648
649 RTSpinlockDestroy(pDevExt->Spinlock);
650 pDevExt->Spinlock = NIL_RTSPINLOCK;
651 RTSpinlockDestroy(pDevExt->hGipSpinlock);
652 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
653 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
654 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
655
656#ifdef SUPDRV_WITH_RELEASE_LOGGER
657 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
658 RTLogDestroy(RTLogSetDefaultInstance(NULL));
659#endif
660
661 return rc;
662}
663
664
665/**
666 * Delete the device extension (e.g. cleanup members).
667 *
668 * @param pDevExt The device extension to delete.
669 */
670void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
671{
672 PSUPDRVOBJ pObj;
673 PSUPDRVUSAGE pUsage;
674
675 /*
676 * Kill mutexes and spinlocks.
677 */
678#ifdef SUPDRV_USE_MUTEX_FOR_GIP
679 RTSemMutexDestroy(pDevExt->mtxGip);
680 pDevExt->mtxGip = NIL_RTSEMMUTEX;
681#else
682 RTSemFastMutexDestroy(pDevExt->mtxGip);
683 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
684#endif
685#ifdef SUPDRV_USE_MUTEX_FOR_LDR
686 RTSemMutexDestroy(pDevExt->mtxLdr);
687 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
688#else
689 RTSemFastMutexDestroy(pDevExt->mtxLdr);
690 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
691#endif
692 RTSpinlockDestroy(pDevExt->Spinlock);
693 pDevExt->Spinlock = NIL_RTSPINLOCK;
694 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
695 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
696 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
697 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
698
699 /*
700 * Free lists.
701 */
702 /* objects. */
703 pObj = pDevExt->pObjs;
704 Assert(!pObj); /* (can trigger on forced unloads) */
705 pDevExt->pObjs = NULL;
706 while (pObj)
707 {
708 void *pvFree = pObj;
709 pObj = pObj->pNext;
710 RTMemFree(pvFree);
711 }
712
713 /* usage records. */
714 pUsage = pDevExt->pUsageFree;
715 pDevExt->pUsageFree = NULL;
716 while (pUsage)
717 {
718 void *pvFree = pUsage;
719 pUsage = pUsage->pNext;
720 RTMemFree(pvFree);
721 }
722
723 /* kill the GIP. */
724 supdrvGipDestroy(pDevExt);
725 RTSpinlockDestroy(pDevExt->hGipSpinlock);
726 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
727
728 supdrvTracerTerm(pDevExt);
729
730#ifdef SUPDRV_WITH_RELEASE_LOGGER
731 /* destroy the loggers. */
732 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
733 RTLogDestroy(RTLogSetDefaultInstance(NULL));
734#endif
735}
736
737
738/**
739 * Create session.
740 *
741 * @returns IPRT status code.
742 * @param pDevExt Device extension.
743 * @param fUser Flag indicating whether this is a user or kernel
744 * session.
745 * @param fUnrestricted Unrestricted access (system) or restricted access
746 * (user)?
747 * @param ppSession Where to store the pointer to the session data.
748 */
749int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
750{
751 int rc;
752 PSUPDRVSESSION pSession;
753
754 if (!SUP_IS_DEVEXT_VALID(pDevExt))
755 return VERR_INVALID_PARAMETER;
756
757 /*
758 * Allocate memory for the session data.
759 */
760 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
761 if (pSession)
762 {
763 /* Initialize session data. */
764 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
765 if (!rc)
766 {
767 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
768 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
769 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
770 if (RT_SUCCESS(rc))
771 {
772 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
773 pSession->pDevExt = pDevExt;
774 pSession->u32Cookie = BIRD_INV;
775 pSession->fUnrestricted = fUnrestricted;
776 /*pSession->fInHashTable = false; */
777 pSession->cRefs = 1;
778 /*pSession->pCommonNextHash = NULL;
779 pSession->ppOsSessionPtr = NULL; */
780 if (fUser)
781 {
782 pSession->Process = RTProcSelf();
783 pSession->R0Process = RTR0ProcHandleSelf();
784 }
785 else
786 {
787 pSession->Process = NIL_RTPROCESS;
788 pSession->R0Process = NIL_RTR0PROCESS;
789 }
790 /*pSession->pLdrUsage = NULL;
791 pSession->pVM = NULL;
792 pSession->pUsage = NULL;
793 pSession->pGip = NULL;
794 pSession->fGipReferenced = false;
795 pSession->Bundle.cUsed = 0; */
796 pSession->Uid = NIL_RTUID;
797 pSession->Gid = NIL_RTGID;
798 /*pSession->uTracerData = 0;*/
799 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
800 RTListInit(&pSession->TpProviders);
801 /*pSession->cTpProviders = 0;*/
802 /*pSession->cTpProbesFiring = 0;*/
803 RTListInit(&pSession->TpUmods);
804 /*RT_ZERO(pSession->apTpLookupTable);*/
805
806 VBOXDRV_SESSION_CREATE(pSession, fUser);
807 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
808 return VINF_SUCCESS;
809 }
810
811 RTSpinlockDestroy(pSession->Spinlock);
812 }
813 RTMemFree(pSession);
814 *ppSession = NULL;
815 Log(("Failed to create spinlock, rc=%d!\n", rc));
816 }
817 else
818 rc = VERR_NO_MEMORY;
819
820 return rc;
821}
822
823
824/**
825 * Cleans up the session in the context of the process to which it belongs, the
826 * caller will free the session and the session spinlock.
827 *
828 * This should normally occur when the session is closed or as the process
829 * exits. Careful reference counting in the OS specfic code makes sure that
830 * there cannot be any races between process/handle cleanup callbacks and
831 * threads doing I/O control calls.
832 *
833 * @param pDevExt The device extension.
834 * @param pSession Session data.
835 */
836static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
837{
838 int rc;
839 PSUPDRVBUNDLE pBundle;
840 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
841
842 Assert(!pSession->fInHashTable);
843 Assert(!pSession->ppOsSessionPtr);
844 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
845 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
846
847 /*
848 * Remove logger instances related to this session.
849 */
850 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
851
852 /*
853 * Destroy the handle table.
854 */
855 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
856 AssertRC(rc);
857 pSession->hHandleTable = NIL_RTHANDLETABLE;
858
859 /*
860 * Release object references made in this session.
861 * In theory there should be noone racing us in this session.
862 */
863 Log2(("release objects - start\n"));
864 if (pSession->pUsage)
865 {
866 PSUPDRVUSAGE pUsage;
867 RTSpinlockAcquire(pDevExt->Spinlock);
868
869 while ((pUsage = pSession->pUsage) != NULL)
870 {
871 PSUPDRVOBJ pObj = pUsage->pObj;
872 pSession->pUsage = pUsage->pNext;
873
874 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
875 if (pUsage->cUsage < pObj->cUsage)
876 {
877 pObj->cUsage -= pUsage->cUsage;
878 RTSpinlockRelease(pDevExt->Spinlock);
879 }
880 else
881 {
882 /* Destroy the object and free the record. */
883 if (pDevExt->pObjs == pObj)
884 pDevExt->pObjs = pObj->pNext;
885 else
886 {
887 PSUPDRVOBJ pObjPrev;
888 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
889 if (pObjPrev->pNext == pObj)
890 {
891 pObjPrev->pNext = pObj->pNext;
892 break;
893 }
894 Assert(pObjPrev);
895 }
896 RTSpinlockRelease(pDevExt->Spinlock);
897
898 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
899 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
900 if (pObj->pfnDestructor)
901 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
902 RTMemFree(pObj);
903 }
904
905 /* free it and continue. */
906 RTMemFree(pUsage);
907
908 RTSpinlockAcquire(pDevExt->Spinlock);
909 }
910
911 RTSpinlockRelease(pDevExt->Spinlock);
912 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
913 }
914 Log2(("release objects - done\n"));
915
916 /*
917 * Do tracer cleanups related to this session.
918 */
919 Log2(("release tracer stuff - start\n"));
920 supdrvTracerCleanupSession(pDevExt, pSession);
921 Log2(("release tracer stuff - end\n"));
922
923 /*
924 * Release memory allocated in the session.
925 *
926 * We do not serialize this as we assume that the application will
927 * not allocated memory while closing the file handle object.
928 */
929 Log2(("freeing memory:\n"));
930 pBundle = &pSession->Bundle;
931 while (pBundle)
932 {
933 PSUPDRVBUNDLE pToFree;
934 unsigned i;
935
936 /*
937 * Check and unlock all entries in the bundle.
938 */
939 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
940 {
941 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
942 {
943 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
944 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
945 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
946 {
947 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
948 AssertRC(rc); /** @todo figure out how to handle this. */
949 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
950 }
951 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
952 AssertRC(rc); /** @todo figure out how to handle this. */
953 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
954 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
955 }
956 }
957
958 /*
959 * Advance and free previous bundle.
960 */
961 pToFree = pBundle;
962 pBundle = pBundle->pNext;
963
964 pToFree->pNext = NULL;
965 pToFree->cUsed = 0;
966 if (pToFree != &pSession->Bundle)
967 RTMemFree(pToFree);
968 }
969 Log2(("freeing memory - done\n"));
970
971 /*
972 * Deregister component factories.
973 */
974 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
975 Log2(("deregistering component factories:\n"));
976 if (pDevExt->pComponentFactoryHead)
977 {
978 PSUPDRVFACTORYREG pPrev = NULL;
979 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
980 while (pCur)
981 {
982 if (pCur->pSession == pSession)
983 {
984 /* unlink it */
985 PSUPDRVFACTORYREG pNext = pCur->pNext;
986 if (pPrev)
987 pPrev->pNext = pNext;
988 else
989 pDevExt->pComponentFactoryHead = pNext;
990
991 /* free it */
992 pCur->pNext = NULL;
993 pCur->pSession = NULL;
994 pCur->pFactory = NULL;
995 RTMemFree(pCur);
996
997 /* next */
998 pCur = pNext;
999 }
1000 else
1001 {
1002 /* next */
1003 pPrev = pCur;
1004 pCur = pCur->pNext;
1005 }
1006 }
1007 }
1008 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
1009 Log2(("deregistering component factories - done\n"));
1010
1011 /*
1012 * Loaded images needs to be dereferenced and possibly freed up.
1013 */
1014 supdrvLdrLock(pDevExt);
1015 Log2(("freeing images:\n"));
1016 if (pSession->pLdrUsage)
1017 {
1018 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
1019 pSession->pLdrUsage = NULL;
1020 while (pUsage)
1021 {
1022 void *pvFree = pUsage;
1023 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1024 if (pImage->cUsage > pUsage->cUsage)
1025 pImage->cUsage -= pUsage->cUsage;
1026 else
1027 supdrvLdrFree(pDevExt, pImage);
1028 pUsage->pImage = NULL;
1029 pUsage = pUsage->pNext;
1030 RTMemFree(pvFree);
1031 }
1032 }
1033 supdrvLdrUnlock(pDevExt);
1034 Log2(("freeing images - done\n"));
1035
1036 /*
1037 * Unmap the GIP.
1038 */
1039 Log2(("umapping GIP:\n"));
1040 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1041 {
1042 SUPR0GipUnmap(pSession);
1043 pSession->fGipReferenced = 0;
1044 }
1045 Log2(("umapping GIP - done\n"));
1046}
1047
1048
1049/**
1050 * Common code for freeing a session when the reference count reaches zero.
1051 *
1052 * @param pDevExt Device extension.
1053 * @param pSession Session data.
1054 * This data will be freed by this routine.
1055 */
1056static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1057{
1058 VBOXDRV_SESSION_CLOSE(pSession);
1059
1060 /*
1061 * Cleanup the session first.
1062 */
1063 supdrvCleanupSession(pDevExt, pSession);
1064 supdrvOSCleanupSession(pDevExt, pSession);
1065
1066 /*
1067 * Free the rest of the session stuff.
1068 */
1069 RTSpinlockDestroy(pSession->Spinlock);
1070 pSession->Spinlock = NIL_RTSPINLOCK;
1071 pSession->pDevExt = NULL;
1072 RTMemFree(pSession);
1073 LogFlow(("supdrvDestroySession: returns\n"));
1074}
1075
1076
1077/**
1078 * Inserts the session into the global hash table.
1079 *
1080 * @retval VINF_SUCCESS on success.
1081 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1082 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1083 * session (asserted).
1084 * @retval VERR_DUPLICATE if there is already a session for that pid.
1085 *
1086 * @param pDevExt The device extension.
1087 * @param pSession The session.
1088 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1089 * available and used. This will set to point to the
1090 * session while under the protection of the session
1091 * hash table spinlock. It will also be kept in
1092 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1093 * cleanup use.
1094 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1095 */
1096int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1097 void *pvUser)
1098{
1099 PSUPDRVSESSION pCur;
1100 unsigned iHash;
1101
1102 /*
1103 * Validate input.
1104 */
1105 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1106 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1107
1108 /*
1109 * Calculate the hash table index and acquire the spinlock.
1110 */
1111 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1112
1113 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1114
1115 /*
1116 * If there are a collisions, we need to carefully check if we got a
1117 * duplicate. There can only be one open session per process.
1118 */
1119 pCur = pDevExt->apSessionHashTab[iHash];
1120 if (pCur)
1121 {
1122 while (pCur && pCur->Process != pSession->Process)
1123 pCur = pCur->pCommonNextHash;
1124
1125 if (pCur)
1126 {
1127 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1128 if (pCur == pSession)
1129 {
1130 Assert(pSession->fInHashTable);
1131 AssertFailed();
1132 return VERR_WRONG_ORDER;
1133 }
1134 Assert(!pSession->fInHashTable);
1135 if (pCur->R0Process == pSession->R0Process)
1136 return VERR_RESOURCE_IN_USE;
1137 return VERR_DUPLICATE;
1138 }
1139 }
1140 Assert(!pSession->fInHashTable);
1141 Assert(!pSession->ppOsSessionPtr);
1142
1143 /*
1144 * Insert it, doing a callout to the OS specific code in case it has
1145 * anything it wishes to do while we're holding the spinlock.
1146 */
1147 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1148 pDevExt->apSessionHashTab[iHash] = pSession;
1149 pSession->fInHashTable = true;
1150 ASMAtomicIncS32(&pDevExt->cSessions);
1151
1152 pSession->ppOsSessionPtr = ppOsSessionPtr;
1153 if (ppOsSessionPtr)
1154 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1155
1156 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1157
1158 /*
1159 * Retain a reference for the pointer in the session table.
1160 */
1161 ASMAtomicIncU32(&pSession->cRefs);
1162
1163 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1164 return VINF_SUCCESS;
1165}
1166
1167
1168/**
1169 * Removes the session from the global hash table.
1170 *
1171 * @retval VINF_SUCCESS on success.
1172 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1173 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1174 * session (asserted).
1175 *
1176 * @param pDevExt The device extension.
1177 * @param pSession The session. The caller is expected to have a reference
1178 * to this so it won't croak on us when we release the hash
1179 * table reference.
1180 * @param pvUser OS specific context value for the
1181 * supdrvOSSessionHashTabInserted callback.
1182 */
1183int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1184{
1185 PSUPDRVSESSION pCur;
1186 unsigned iHash;
1187 int32_t cRefs;
1188
1189 /*
1190 * Validate input.
1191 */
1192 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1193 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1194
1195 /*
1196 * Calculate the hash table index and acquire the spinlock.
1197 */
1198 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1199
1200 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1201
1202 /*
1203 * Unlink it.
1204 */
1205 pCur = pDevExt->apSessionHashTab[iHash];
1206 if (pCur == pSession)
1207 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1208 else
1209 {
1210 PSUPDRVSESSION pPrev = pCur;
1211 while (pCur && pCur != pSession)
1212 {
1213 pPrev = pCur;
1214 pCur = pCur->pCommonNextHash;
1215 }
1216 if (pCur)
1217 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1218 else
1219 {
1220 Assert(!pSession->fInHashTable);
1221 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1222 return VERR_NOT_FOUND;
1223 }
1224 }
1225
1226 pSession->pCommonNextHash = NULL;
1227 pSession->fInHashTable = false;
1228
1229 ASMAtomicDecS32(&pDevExt->cSessions);
1230
1231 /*
1232 * Clear OS specific session pointer if available and do the OS callback.
1233 */
1234 if (pSession->ppOsSessionPtr)
1235 {
1236 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1237 pSession->ppOsSessionPtr = NULL;
1238 }
1239
1240 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1241
1242 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1243
1244 /*
1245 * Drop the reference the hash table had to the session. This shouldn't
1246 * be the last reference!
1247 */
1248 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1249 Assert(cRefs > 0 && cRefs < _1M);
1250 if (cRefs == 0)
1251 supdrvDestroySession(pDevExt, pSession);
1252
1253 return VINF_SUCCESS;
1254}
1255
1256
1257/**
1258 * Looks up the session for the current process in the global hash table or in
1259 * OS specific pointer.
1260 *
1261 * @returns Pointer to the session with a reference that the caller must
1262 * release. If no valid session was found, NULL is returned.
1263 *
1264 * @param pDevExt The device extension.
1265 * @param Process The process ID.
1266 * @param R0Process The ring-0 process handle.
1267 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1268 * this is used instead of the hash table. For
1269 * additional safety it must then be equal to the
1270 * SUPDRVSESSION::ppOsSessionPtr member.
1271 * This can be NULL even if the OS has a session
1272 * pointer.
1273 */
1274PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1275 PSUPDRVSESSION *ppOsSessionPtr)
1276{
1277 PSUPDRVSESSION pCur;
1278 unsigned iHash;
1279
1280 /*
1281 * Validate input.
1282 */
1283 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1284
1285 /*
1286 * Calculate the hash table index and acquire the spinlock.
1287 */
1288 iHash = SUPDRV_SESSION_HASH(Process);
1289
1290 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1291
1292 /*
1293 * If an OS session pointer is provided, always use it.
1294 */
1295 if (ppOsSessionPtr)
1296 {
1297 pCur = *ppOsSessionPtr;
1298 if ( pCur
1299 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1300 || pCur->Process != Process
1301 || pCur->R0Process != R0Process) )
1302 pCur = NULL;
1303 }
1304 else
1305 {
1306 /*
1307 * Otherwise, do the hash table lookup.
1308 */
1309 pCur = pDevExt->apSessionHashTab[iHash];
1310 while ( pCur
1311 && ( pCur->Process != Process
1312 || pCur->R0Process != R0Process) )
1313 pCur = pCur->pCommonNextHash;
1314 }
1315
1316 /*
1317 * Retain the session.
1318 */
1319 if (pCur)
1320 {
1321 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1322 NOREF(cRefs);
1323 Assert(cRefs > 1 && cRefs < _1M);
1324 }
1325
1326 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1327
1328 return pCur;
1329}
1330
1331
1332/**
1333 * Retain a session to make sure it doesn't go away while it is in use.
1334 *
1335 * @returns New reference count on success, UINT32_MAX on failure.
1336 * @param pSession Session data.
1337 */
1338uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1339{
1340 uint32_t cRefs;
1341 AssertPtrReturn(pSession, UINT32_MAX);
1342 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1343
1344 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1345 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1346 return cRefs;
1347}
1348
1349
1350/**
1351 * Releases a given session.
1352 *
1353 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1354 * @param pSession Session data.
1355 */
1356uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1357{
1358 uint32_t cRefs;
1359 AssertPtrReturn(pSession, UINT32_MAX);
1360 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1361
1362 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1363 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1364 if (cRefs == 0)
1365 supdrvDestroySession(pSession->pDevExt, pSession);
1366 return cRefs;
1367}
1368
1369
1370/**
1371 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1372 *
1373 * @returns IPRT status code, see SUPR0ObjAddRef.
1374 * @param hHandleTable The handle table handle. Ignored.
1375 * @param pvObj The object pointer.
1376 * @param pvCtx Context, the handle type. Ignored.
1377 * @param pvUser Session pointer.
1378 */
1379static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1380{
1381 NOREF(pvCtx);
1382 NOREF(hHandleTable);
1383 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1384}
1385
1386
1387/**
1388 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1389 *
1390 * @param hHandleTable The handle table handle. Ignored.
1391 * @param h The handle value. Ignored.
1392 * @param pvObj The object pointer.
1393 * @param pvCtx Context, the handle type. Ignored.
1394 * @param pvUser Session pointer.
1395 */
1396static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1397{
1398 NOREF(pvCtx);
1399 NOREF(h);
1400 NOREF(hHandleTable);
1401 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1402}
1403
1404
1405/**
1406 * Fast path I/O Control worker.
1407 *
1408 * @returns VBox status code that should be passed down to ring-3 unchanged.
1409 * @param uIOCtl Function number.
1410 * @param idCpu VMCPU id.
1411 * @param pDevExt Device extention.
1412 * @param pSession Session data.
1413 */
1414int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1415{
1416 /*
1417 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1418 */
1419 if (RT_LIKELY( RT_VALID_PTR(pSession)
1420 && pSession->pVM
1421 && pDevExt->pfnVMMR0EntryFast))
1422 {
1423 switch (uIOCtl)
1424 {
1425 case SUP_IOCTL_FAST_DO_RAW_RUN:
1426 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1427 break;
1428 case SUP_IOCTL_FAST_DO_HM_RUN:
1429 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1430 break;
1431 case SUP_IOCTL_FAST_DO_NOP:
1432 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1433 break;
1434 default:
1435 return VERR_INTERNAL_ERROR;
1436 }
1437 return VINF_SUCCESS;
1438 }
1439 return VERR_INTERNAL_ERROR;
1440}
1441
1442
1443/**
1444 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1445 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1446 * list, see http://www.kerneldrivers.org/RHEL5.
1447 *
1448 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1449 * @param pszStr String to check
1450 * @param pszChars Character set
1451 */
1452static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1453{
1454 int chCur;
1455 while ((chCur = *pszStr++) != '\0')
1456 {
1457 int ch;
1458 const char *psz = pszChars;
1459 while ((ch = *psz++) != '\0')
1460 if (ch == chCur)
1461 return 1;
1462
1463 }
1464 return 0;
1465}
1466
1467
1468
1469/**
1470 * I/O Control inner worker (tracing reasons).
1471 *
1472 * @returns IPRT status code.
1473 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1474 *
1475 * @param uIOCtl Function number.
1476 * @param pDevExt Device extention.
1477 * @param pSession Session data.
1478 * @param pReqHdr The request header.
1479 */
1480static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1481{
1482 /*
1483 * Validation macros
1484 */
1485#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1486 do { \
1487 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1488 { \
1489 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1490 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1491 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1492 } \
1493 } while (0)
1494
1495#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1496
1497#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1498 do { \
1499 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1500 { \
1501 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1502 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1503 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1504 } \
1505 } while (0)
1506
1507#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1508 do { \
1509 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1510 { \
1511 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1512 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1513 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1514 } \
1515 } while (0)
1516
1517#define REQ_CHECK_EXPR(Name, expr) \
1518 do { \
1519 if (RT_UNLIKELY(!(expr))) \
1520 { \
1521 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1522 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1523 } \
1524 } while (0)
1525
1526#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1527 do { \
1528 if (RT_UNLIKELY(!(expr))) \
1529 { \
1530 OSDBGPRINT( fmt ); \
1531 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1532 } \
1533 } while (0)
1534
1535 /*
1536 * The switch.
1537 */
1538 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1539 {
1540 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1541 {
1542 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1543 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1544 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1545 {
1546 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1547 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1548 return 0;
1549 }
1550
1551#if 0
1552 /*
1553 * Call out to the OS specific code and let it do permission checks on the
1554 * client process.
1555 */
1556 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1557 {
1558 pReq->u.Out.u32Cookie = 0xffffffff;
1559 pReq->u.Out.u32SessionCookie = 0xffffffff;
1560 pReq->u.Out.u32SessionVersion = 0xffffffff;
1561 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1562 pReq->u.Out.pSession = NULL;
1563 pReq->u.Out.cFunctions = 0;
1564 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1565 return 0;
1566 }
1567#endif
1568
1569 /*
1570 * Match the version.
1571 * The current logic is very simple, match the major interface version.
1572 */
1573 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1574 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1575 {
1576 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1577 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1578 pReq->u.Out.u32Cookie = 0xffffffff;
1579 pReq->u.Out.u32SessionCookie = 0xffffffff;
1580 pReq->u.Out.u32SessionVersion = 0xffffffff;
1581 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1582 pReq->u.Out.pSession = NULL;
1583 pReq->u.Out.cFunctions = 0;
1584 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1585 return 0;
1586 }
1587
1588 /*
1589 * Fill in return data and be gone.
1590 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1591 * u32SessionVersion <= u32ReqVersion!
1592 */
1593 /** @todo Somehow validate the client and negotiate a secure cookie... */
1594 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1595 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1596 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1597 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1598 pReq->u.Out.pSession = pSession;
1599 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1600 pReq->Hdr.rc = VINF_SUCCESS;
1601 return 0;
1602 }
1603
1604 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1605 {
1606 /* validate */
1607 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1608 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1609
1610 /* execute */
1611 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1612 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1613 pReq->Hdr.rc = VINF_SUCCESS;
1614 return 0;
1615 }
1616
1617 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1618 {
1619 /* validate */
1620 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1621 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1622 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1623 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1624 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1625
1626 /* execute */
1627 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1628 if (RT_FAILURE(pReq->Hdr.rc))
1629 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1630 return 0;
1631 }
1632
1633 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1634 {
1635 /* validate */
1636 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1637 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1638
1639 /* execute */
1640 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1641 return 0;
1642 }
1643
1644 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1645 {
1646 /* validate */
1647 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1648 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1649
1650 /* execute */
1651 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1652 if (RT_FAILURE(pReq->Hdr.rc))
1653 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1654 return 0;
1655 }
1656
1657 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1658 {
1659 /* validate */
1660 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1661 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1662
1663 /* execute */
1664 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1665 return 0;
1666 }
1667
1668 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1669 {
1670 /* validate */
1671 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1672 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1673 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1674 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1675 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1676 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1677 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1678 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1679 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1680 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1681 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1682
1683 /* execute */
1684 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1685 return 0;
1686 }
1687
1688 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1689 {
1690 /* validate */
1691 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1692 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1693 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1694 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1695 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1696 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1697 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1698 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1699 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1700 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1701 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1702 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1703 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1704 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1705 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1706
1707 if (pReq->u.In.cSymbols)
1708 {
1709 uint32_t i;
1710 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1711 for (i = 0; i < pReq->u.In.cSymbols; i++)
1712 {
1713 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1714 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1715 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1716 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1717 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1718 pReq->u.In.cbStrTab - paSyms[i].offName),
1719 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1720 }
1721 }
1722
1723 /* execute */
1724 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1725 return 0;
1726 }
1727
1728 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1729 {
1730 /* validate */
1731 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1732 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1733
1734 /* execute */
1735 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1736 return 0;
1737 }
1738
1739 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1740 {
1741 /* validate */
1742 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1743 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1744 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1745
1746 /* execute */
1747 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1748 return 0;
1749 }
1750
1751 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1752 {
1753 /* validate */
1754 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1755 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1756 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1757
1758 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1759 {
1760 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1761
1762 /* execute */
1763 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1764 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1765 else
1766 pReq->Hdr.rc = VERR_WRONG_ORDER;
1767 }
1768 else
1769 {
1770 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1771 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1772 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1773 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1774 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1775
1776 /* execute */
1777 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1778 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1779 else
1780 pReq->Hdr.rc = VERR_WRONG_ORDER;
1781 }
1782
1783 if ( RT_FAILURE(pReq->Hdr.rc)
1784 && pReq->Hdr.rc != VERR_INTERRUPTED
1785 && pReq->Hdr.rc != VERR_TIMEOUT)
1786 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1787 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1788 else
1789 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1790 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1791 return 0;
1792 }
1793
1794 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1795 {
1796 /* validate */
1797 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1798 PSUPVMMR0REQHDR pVMMReq;
1799 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1800 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1801
1802 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1803 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1804 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1805 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1806 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1807
1808 /* execute */
1809 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1810 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1811 else
1812 pReq->Hdr.rc = VERR_WRONG_ORDER;
1813
1814 if ( RT_FAILURE(pReq->Hdr.rc)
1815 && pReq->Hdr.rc != VERR_INTERRUPTED
1816 && pReq->Hdr.rc != VERR_TIMEOUT)
1817 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1818 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1819 else
1820 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1821 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1822 return 0;
1823 }
1824
1825 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1826 {
1827 /* validate */
1828 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1829 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1830
1831 /* execute */
1832 pReq->Hdr.rc = VINF_SUCCESS;
1833 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1834 return 0;
1835 }
1836
1837 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1838 {
1839 /* validate */
1840 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1841 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1842 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1843
1844 /* execute */
1845 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1846 if (RT_FAILURE(pReq->Hdr.rc))
1847 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1848 return 0;
1849 }
1850
1851 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1852 {
1853 /* validate */
1854 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1855 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1856
1857 /* execute */
1858 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1859 return 0;
1860 }
1861
1862 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1863 {
1864 /* validate */
1865 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1866 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1867
1868 /* execute */
1869 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1870 if (RT_SUCCESS(pReq->Hdr.rc))
1871 pReq->u.Out.pGipR0 = pDevExt->pGip;
1872 return 0;
1873 }
1874
1875 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1876 {
1877 /* validate */
1878 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1879 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1880
1881 /* execute */
1882 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1883 return 0;
1884 }
1885
1886 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1887 {
1888 /* validate */
1889 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1890 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1891 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1892 || ( VALID_PTR(pReq->u.In.pVMR0)
1893 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1894 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1895 /* execute */
1896 pSession->pVM = pReq->u.In.pVMR0;
1897 pReq->Hdr.rc = VINF_SUCCESS;
1898 return 0;
1899 }
1900
1901 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1902 {
1903 /* validate */
1904 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1905 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1906 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1907 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1908 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1909 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1910 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1911 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1912 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1913
1914 /* execute */
1915 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1916 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1917 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1918 &pReq->u.Out.aPages[0]);
1919 if (RT_FAILURE(pReq->Hdr.rc))
1920 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1921 return 0;
1922 }
1923
1924 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1925 {
1926 /* validate */
1927 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1928 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1929 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1930 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1931 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1932 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1933
1934 /* execute */
1935 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1936 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1937 if (RT_FAILURE(pReq->Hdr.rc))
1938 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1939 return 0;
1940 }
1941
1942 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1943 {
1944 /* validate */
1945 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1946 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1947 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1948 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1949 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1950 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1951 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1952
1953 /* execute */
1954 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1955 return 0;
1956 }
1957
1958 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1959 {
1960 /* validate */
1961 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1962 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1963
1964 /* execute */
1965 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1966 return 0;
1967 }
1968
1969 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1970 {
1971 /* validate */
1972 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1973 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1974 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1975
1976 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1977 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1978 else
1979 {
1980 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1981 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1982 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1983 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1984 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1985 }
1986 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1987
1988 /* execute */
1989 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1990 return 0;
1991 }
1992
1993 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1994 {
1995 /* validate */
1996 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1997 size_t cbStrTab;
1998 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1999 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
2000 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
2001 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
2002 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
2003 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
2004 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
2005 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
2006 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
2007 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
2008 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
2009
2010 /* execute */
2011 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
2012 return 0;
2013 }
2014
2015 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
2016 {
2017 /* validate */
2018 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
2019 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
2020 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
2021
2022 /* execute */
2023 switch (pReq->u.In.uType)
2024 {
2025 case SUP_SEM_TYPE_EVENT:
2026 {
2027 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2028 switch (pReq->u.In.uOp)
2029 {
2030 case SUPSEMOP2_WAIT_MS_REL:
2031 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2032 break;
2033 case SUPSEMOP2_WAIT_NS_ABS:
2034 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2035 break;
2036 case SUPSEMOP2_WAIT_NS_REL:
2037 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2038 break;
2039 case SUPSEMOP2_SIGNAL:
2040 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2041 break;
2042 case SUPSEMOP2_CLOSE:
2043 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2044 break;
2045 case SUPSEMOP2_RESET:
2046 default:
2047 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2048 break;
2049 }
2050 break;
2051 }
2052
2053 case SUP_SEM_TYPE_EVENT_MULTI:
2054 {
2055 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2056 switch (pReq->u.In.uOp)
2057 {
2058 case SUPSEMOP2_WAIT_MS_REL:
2059 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2060 break;
2061 case SUPSEMOP2_WAIT_NS_ABS:
2062 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2063 break;
2064 case SUPSEMOP2_WAIT_NS_REL:
2065 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2066 break;
2067 case SUPSEMOP2_SIGNAL:
2068 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2069 break;
2070 case SUPSEMOP2_CLOSE:
2071 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2072 break;
2073 case SUPSEMOP2_RESET:
2074 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2075 break;
2076 default:
2077 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2078 break;
2079 }
2080 break;
2081 }
2082
2083 default:
2084 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2085 break;
2086 }
2087 return 0;
2088 }
2089
2090 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2091 {
2092 /* validate */
2093 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2094 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2095 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2096
2097 /* execute */
2098 switch (pReq->u.In.uType)
2099 {
2100 case SUP_SEM_TYPE_EVENT:
2101 {
2102 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2103 switch (pReq->u.In.uOp)
2104 {
2105 case SUPSEMOP3_CREATE:
2106 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2107 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2108 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2109 break;
2110 case SUPSEMOP3_GET_RESOLUTION:
2111 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2112 pReq->Hdr.rc = VINF_SUCCESS;
2113 pReq->Hdr.cbOut = sizeof(*pReq);
2114 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2115 break;
2116 default:
2117 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2118 break;
2119 }
2120 break;
2121 }
2122
2123 case SUP_SEM_TYPE_EVENT_MULTI:
2124 {
2125 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2126 switch (pReq->u.In.uOp)
2127 {
2128 case SUPSEMOP3_CREATE:
2129 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2130 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2131 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2132 break;
2133 case SUPSEMOP3_GET_RESOLUTION:
2134 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2135 pReq->Hdr.rc = VINF_SUCCESS;
2136 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2137 break;
2138 default:
2139 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2140 break;
2141 }
2142 break;
2143 }
2144
2145 default:
2146 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2147 break;
2148 }
2149 return 0;
2150 }
2151
2152 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2153 {
2154 /* validate */
2155 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2156 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2157
2158 /* execute */
2159 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2160 if (RT_FAILURE(pReq->Hdr.rc))
2161 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2162 return 0;
2163 }
2164
2165 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2166 {
2167 /* validate */
2168 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2169 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2170
2171 /* execute */
2172 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2173 return 0;
2174 }
2175
2176 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2177 {
2178 /* validate */
2179 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2180
2181 /* execute */
2182 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2183 return 0;
2184 }
2185
2186 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2187 {
2188 /* validate */
2189 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2190 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2191
2192 /* execute */
2193 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2194 return 0;
2195 }
2196
2197 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2198 {
2199 /* validate */
2200 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2201 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2202 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2203 return VERR_INVALID_PARAMETER;
2204
2205 /* execute */
2206 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2207 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2208 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2209 pReq->u.In.szName, pReq->u.In.fFlags);
2210 return 0;
2211 }
2212
2213 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2214 {
2215 /* validate */
2216 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2217 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2218
2219 /* execute */
2220 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2221 return 0;
2222 }
2223
2224 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2225 {
2226 /* validate */
2227 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2228 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2229
2230 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2231 pReqHdr->rc = VINF_SUCCESS;
2232 return 0;
2233 }
2234
2235 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2236 {
2237 /* validate */
2238 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2239 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2240 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2241 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2242
2243 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2244 return 0;
2245 }
2246
2247 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2248 {
2249 /* validate */
2250 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2251
2252 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2253 return 0;
2254 }
2255
2256 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2257 {
2258 /* validate */
2259 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2260 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2261
2262 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pReq);
2263 return 0;
2264 }
2265
2266 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2267 {
2268 /* validate */
2269 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2270 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2271
2272 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pReq);
2273 return 0;
2274 }
2275
2276 default:
2277 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2278 break;
2279 }
2280 return VERR_GENERAL_FAILURE;
2281}
2282
2283
2284/**
2285 * I/O Control inner worker for the restricted operations.
2286 *
2287 * @returns IPRT status code.
2288 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2289 *
2290 * @param uIOCtl Function number.
2291 * @param pDevExt Device extention.
2292 * @param pSession Session data.
2293 * @param pReqHdr The request header.
2294 */
2295static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2296{
2297 /*
2298 * The switch.
2299 */
2300 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2301 {
2302 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2303 {
2304 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2305 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2306 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2307 {
2308 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2309 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2310 return 0;
2311 }
2312
2313 /*
2314 * Match the version.
2315 * The current logic is very simple, match the major interface version.
2316 */
2317 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2318 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2319 {
2320 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2321 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2322 pReq->u.Out.u32Cookie = 0xffffffff;
2323 pReq->u.Out.u32SessionCookie = 0xffffffff;
2324 pReq->u.Out.u32SessionVersion = 0xffffffff;
2325 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2326 pReq->u.Out.pSession = NULL;
2327 pReq->u.Out.cFunctions = 0;
2328 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2329 return 0;
2330 }
2331
2332 /*
2333 * Fill in return data and be gone.
2334 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2335 * u32SessionVersion <= u32ReqVersion!
2336 */
2337 /** @todo Somehow validate the client and negotiate a secure cookie... */
2338 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2339 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2340 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2341 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2342 pReq->u.Out.pSession = pSession;
2343 pReq->u.Out.cFunctions = 0;
2344 pReq->Hdr.rc = VINF_SUCCESS;
2345 return 0;
2346 }
2347
2348 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2349 {
2350 /* validate */
2351 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2352 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2353
2354 /* execute */
2355 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2356 if (RT_FAILURE(pReq->Hdr.rc))
2357 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2358 return 0;
2359 }
2360
2361 default:
2362 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2363 break;
2364 }
2365 return VERR_GENERAL_FAILURE;
2366}
2367
2368
2369/**
2370 * I/O Control worker.
2371 *
2372 * @returns IPRT status code.
2373 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2374 *
2375 * @param uIOCtl Function number.
2376 * @param pDevExt Device extention.
2377 * @param pSession Session data.
2378 * @param pReqHdr The request header.
2379 */
2380int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2381{
2382 int rc;
2383 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2384
2385 /*
2386 * Validate the request.
2387 */
2388 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2389 {
2390 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2391 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2392 return VERR_INVALID_PARAMETER;
2393 }
2394 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2395 || pReqHdr->cbIn < sizeof(*pReqHdr)
2396 || pReqHdr->cbIn > cbReq
2397 || pReqHdr->cbOut < sizeof(*pReqHdr)
2398 || pReqHdr->cbOut > cbReq))
2399 {
2400 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2401 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2402 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2403 return VERR_INVALID_PARAMETER;
2404 }
2405 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2406 {
2407 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2408 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2409 return VERR_INVALID_PARAMETER;
2410 }
2411 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2412 {
2413 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2414 {
2415 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2416 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2417 return VERR_INVALID_PARAMETER;
2418 }
2419 }
2420 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2421 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2422 {
2423 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2424 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2425 return VERR_INVALID_PARAMETER;
2426 }
2427
2428 /*
2429 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2430 */
2431 if (pSession->fUnrestricted)
2432 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2433 else
2434 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2435
2436 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2437 return rc;
2438}
2439
2440
2441/**
2442 * Inter-Driver Communication (IDC) worker.
2443 *
2444 * @returns VBox status code.
2445 * @retval VINF_SUCCESS on success.
2446 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2447 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2448 *
2449 * @param uReq The request (function) code.
2450 * @param pDevExt Device extention.
2451 * @param pSession Session data.
2452 * @param pReqHdr The request header.
2453 */
2454int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2455{
2456 /*
2457 * The OS specific code has already validated the pSession
2458 * pointer, and the request size being greater or equal to
2459 * size of the header.
2460 *
2461 * So, just check that pSession is a kernel context session.
2462 */
2463 if (RT_UNLIKELY( pSession
2464 && pSession->R0Process != NIL_RTR0PROCESS))
2465 return VERR_INVALID_PARAMETER;
2466
2467/*
2468 * Validation macro.
2469 */
2470#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2471 do { \
2472 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2473 { \
2474 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2475 (long)pReqHdr->cb, (long)(cbExpect))); \
2476 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2477 } \
2478 } while (0)
2479
2480 switch (uReq)
2481 {
2482 case SUPDRV_IDC_REQ_CONNECT:
2483 {
2484 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2485 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2486
2487 /*
2488 * Validate the cookie and other input.
2489 */
2490 if (pReq->Hdr.pSession != NULL)
2491 {
2492 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2493 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2494 }
2495 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2496 {
2497 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2498 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2499 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2500 }
2501 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2502 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2503 {
2504 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2505 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2506 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2507 }
2508 if (pSession != NULL)
2509 {
2510 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2511 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2512 }
2513
2514 /*
2515 * Match the version.
2516 * The current logic is very simple, match the major interface version.
2517 */
2518 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2519 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2520 {
2521 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2522 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2523 pReq->u.Out.pSession = NULL;
2524 pReq->u.Out.uSessionVersion = 0xffffffff;
2525 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2526 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2527 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2528 return VINF_SUCCESS;
2529 }
2530
2531 pReq->u.Out.pSession = NULL;
2532 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2533 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2534 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2535
2536 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2537 if (RT_FAILURE(pReq->Hdr.rc))
2538 {
2539 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2540 return VINF_SUCCESS;
2541 }
2542
2543 pReq->u.Out.pSession = pSession;
2544 pReq->Hdr.pSession = pSession;
2545
2546 return VINF_SUCCESS;
2547 }
2548
2549 case SUPDRV_IDC_REQ_DISCONNECT:
2550 {
2551 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2552
2553 supdrvSessionRelease(pSession);
2554 return pReqHdr->rc = VINF_SUCCESS;
2555 }
2556
2557 case SUPDRV_IDC_REQ_GET_SYMBOL:
2558 {
2559 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2560 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2561
2562 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2563 return VINF_SUCCESS;
2564 }
2565
2566 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2567 {
2568 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2569 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2570
2571 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2572 return VINF_SUCCESS;
2573 }
2574
2575 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2576 {
2577 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2578 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2579
2580 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2581 return VINF_SUCCESS;
2582 }
2583
2584 default:
2585 Log(("Unknown IDC %#lx\n", (long)uReq));
2586 break;
2587 }
2588
2589#undef REQ_CHECK_IDC_SIZE
2590 return VERR_NOT_SUPPORTED;
2591}
2592
2593
2594/**
2595 * Register a object for reference counting.
2596 * The object is registered with one reference in the specified session.
2597 *
2598 * @returns Unique identifier on success (pointer).
2599 * All future reference must use this identifier.
2600 * @returns NULL on failure.
2601 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2602 * @param pvUser1 The first user argument.
2603 * @param pvUser2 The second user argument.
2604 */
2605SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2606{
2607 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2608 PSUPDRVOBJ pObj;
2609 PSUPDRVUSAGE pUsage;
2610
2611 /*
2612 * Validate the input.
2613 */
2614 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2615 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2616 AssertPtrReturn(pfnDestructor, NULL);
2617
2618 /*
2619 * Allocate and initialize the object.
2620 */
2621 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2622 if (!pObj)
2623 return NULL;
2624 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2625 pObj->enmType = enmType;
2626 pObj->pNext = NULL;
2627 pObj->cUsage = 1;
2628 pObj->pfnDestructor = pfnDestructor;
2629 pObj->pvUser1 = pvUser1;
2630 pObj->pvUser2 = pvUser2;
2631 pObj->CreatorUid = pSession->Uid;
2632 pObj->CreatorGid = pSession->Gid;
2633 pObj->CreatorProcess= pSession->Process;
2634 supdrvOSObjInitCreator(pObj, pSession);
2635
2636 /*
2637 * Allocate the usage record.
2638 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2639 */
2640 RTSpinlockAcquire(pDevExt->Spinlock);
2641
2642 pUsage = pDevExt->pUsageFree;
2643 if (pUsage)
2644 pDevExt->pUsageFree = pUsage->pNext;
2645 else
2646 {
2647 RTSpinlockRelease(pDevExt->Spinlock);
2648 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2649 if (!pUsage)
2650 {
2651 RTMemFree(pObj);
2652 return NULL;
2653 }
2654 RTSpinlockAcquire(pDevExt->Spinlock);
2655 }
2656
2657 /*
2658 * Insert the object and create the session usage record.
2659 */
2660 /* The object. */
2661 pObj->pNext = pDevExt->pObjs;
2662 pDevExt->pObjs = pObj;
2663
2664 /* The session record. */
2665 pUsage->cUsage = 1;
2666 pUsage->pObj = pObj;
2667 pUsage->pNext = pSession->pUsage;
2668 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2669 pSession->pUsage = pUsage;
2670
2671 RTSpinlockRelease(pDevExt->Spinlock);
2672
2673 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2674 return pObj;
2675}
2676
2677
2678/**
2679 * Increment the reference counter for the object associating the reference
2680 * with the specified session.
2681 *
2682 * @returns IPRT status code.
2683 * @param pvObj The identifier returned by SUPR0ObjRegister().
2684 * @param pSession The session which is referencing the object.
2685 *
2686 * @remarks The caller should not own any spinlocks and must carefully protect
2687 * itself against potential race with the destructor so freed memory
2688 * isn't accessed here.
2689 */
2690SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2691{
2692 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2693}
2694
2695
2696/**
2697 * Increment the reference counter for the object associating the reference
2698 * with the specified session.
2699 *
2700 * @returns IPRT status code.
2701 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2702 * couldn't be allocated. (If you see this you're not doing the right
2703 * thing and it won't ever work reliably.)
2704 *
2705 * @param pvObj The identifier returned by SUPR0ObjRegister().
2706 * @param pSession The session which is referencing the object.
2707 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2708 * first reference to an object in a session with this
2709 * argument set.
2710 *
2711 * @remarks The caller should not own any spinlocks and must carefully protect
2712 * itself against potential race with the destructor so freed memory
2713 * isn't accessed here.
2714 */
2715SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2716{
2717 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2718 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2719 int rc = VINF_SUCCESS;
2720 PSUPDRVUSAGE pUsagePre;
2721 PSUPDRVUSAGE pUsage;
2722
2723 /*
2724 * Validate the input.
2725 * Be ready for the destruction race (someone might be stuck in the
2726 * destructor waiting a lock we own).
2727 */
2728 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2729 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2730 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2731 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2732 VERR_INVALID_PARAMETER);
2733
2734 RTSpinlockAcquire(pDevExt->Spinlock);
2735
2736 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2737 {
2738 RTSpinlockRelease(pDevExt->Spinlock);
2739
2740 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2741 return VERR_WRONG_ORDER;
2742 }
2743
2744 /*
2745 * Preallocate the usage record if we can.
2746 */
2747 pUsagePre = pDevExt->pUsageFree;
2748 if (pUsagePre)
2749 pDevExt->pUsageFree = pUsagePre->pNext;
2750 else if (!fNoBlocking)
2751 {
2752 RTSpinlockRelease(pDevExt->Spinlock);
2753 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2754 if (!pUsagePre)
2755 return VERR_NO_MEMORY;
2756
2757 RTSpinlockAcquire(pDevExt->Spinlock);
2758 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2759 {
2760 RTSpinlockRelease(pDevExt->Spinlock);
2761
2762 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2763 return VERR_WRONG_ORDER;
2764 }
2765 }
2766
2767 /*
2768 * Reference the object.
2769 */
2770 pObj->cUsage++;
2771
2772 /*
2773 * Look for the session record.
2774 */
2775 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2776 {
2777 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2778 if (pUsage->pObj == pObj)
2779 break;
2780 }
2781 if (pUsage)
2782 pUsage->cUsage++;
2783 else if (pUsagePre)
2784 {
2785 /* create a new session record. */
2786 pUsagePre->cUsage = 1;
2787 pUsagePre->pObj = pObj;
2788 pUsagePre->pNext = pSession->pUsage;
2789 pSession->pUsage = pUsagePre;
2790 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2791
2792 pUsagePre = NULL;
2793 }
2794 else
2795 {
2796 pObj->cUsage--;
2797 rc = VERR_TRY_AGAIN;
2798 }
2799
2800 /*
2801 * Put any unused usage record into the free list..
2802 */
2803 if (pUsagePre)
2804 {
2805 pUsagePre->pNext = pDevExt->pUsageFree;
2806 pDevExt->pUsageFree = pUsagePre;
2807 }
2808
2809 RTSpinlockRelease(pDevExt->Spinlock);
2810
2811 return rc;
2812}
2813
2814
2815/**
2816 * Decrement / destroy a reference counter record for an object.
2817 *
2818 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2819 *
2820 * @returns IPRT status code.
2821 * @retval VINF_SUCCESS if not destroyed.
2822 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2823 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2824 * string builds.
2825 *
2826 * @param pvObj The identifier returned by SUPR0ObjRegister().
2827 * @param pSession The session which is referencing the object.
2828 */
2829SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2830{
2831 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2832 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2833 int rc = VERR_INVALID_PARAMETER;
2834 PSUPDRVUSAGE pUsage;
2835 PSUPDRVUSAGE pUsagePrev;
2836
2837 /*
2838 * Validate the input.
2839 */
2840 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2841 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2842 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2843 VERR_INVALID_PARAMETER);
2844
2845 /*
2846 * Acquire the spinlock and look for the usage record.
2847 */
2848 RTSpinlockAcquire(pDevExt->Spinlock);
2849
2850 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2851 pUsage;
2852 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2853 {
2854 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2855 if (pUsage->pObj == pObj)
2856 {
2857 rc = VINF_SUCCESS;
2858 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2859 if (pUsage->cUsage > 1)
2860 {
2861 pObj->cUsage--;
2862 pUsage->cUsage--;
2863 }
2864 else
2865 {
2866 /*
2867 * Free the session record.
2868 */
2869 if (pUsagePrev)
2870 pUsagePrev->pNext = pUsage->pNext;
2871 else
2872 pSession->pUsage = pUsage->pNext;
2873 pUsage->pNext = pDevExt->pUsageFree;
2874 pDevExt->pUsageFree = pUsage;
2875
2876 /* What about the object? */
2877 if (pObj->cUsage > 1)
2878 pObj->cUsage--;
2879 else
2880 {
2881 /*
2882 * Object is to be destroyed, unlink it.
2883 */
2884 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2885 rc = VINF_OBJECT_DESTROYED;
2886 if (pDevExt->pObjs == pObj)
2887 pDevExt->pObjs = pObj->pNext;
2888 else
2889 {
2890 PSUPDRVOBJ pObjPrev;
2891 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2892 if (pObjPrev->pNext == pObj)
2893 {
2894 pObjPrev->pNext = pObj->pNext;
2895 break;
2896 }
2897 Assert(pObjPrev);
2898 }
2899 }
2900 }
2901 break;
2902 }
2903 }
2904
2905 RTSpinlockRelease(pDevExt->Spinlock);
2906
2907 /*
2908 * Call the destructor and free the object if required.
2909 */
2910 if (rc == VINF_OBJECT_DESTROYED)
2911 {
2912 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2913 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2914 if (pObj->pfnDestructor)
2915 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2916 RTMemFree(pObj);
2917 }
2918
2919 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2920 return rc;
2921}
2922
2923
2924/**
2925 * Verifies that the current process can access the specified object.
2926 *
2927 * @returns The following IPRT status code:
2928 * @retval VINF_SUCCESS if access was granted.
2929 * @retval VERR_PERMISSION_DENIED if denied access.
2930 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2931 *
2932 * @param pvObj The identifier returned by SUPR0ObjRegister().
2933 * @param pSession The session which wishes to access the object.
2934 * @param pszObjName Object string name. This is optional and depends on the object type.
2935 *
2936 * @remark The caller is responsible for making sure the object isn't removed while
2937 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2938 */
2939SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2940{
2941 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2942 int rc;
2943
2944 /*
2945 * Validate the input.
2946 */
2947 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2948 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2949 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2950 VERR_INVALID_PARAMETER);
2951
2952 /*
2953 * Check access. (returns true if a decision has been made.)
2954 */
2955 rc = VERR_INTERNAL_ERROR;
2956 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2957 return rc;
2958
2959 /*
2960 * Default policy is to allow the user to access his own
2961 * stuff but nothing else.
2962 */
2963 if (pObj->CreatorUid == pSession->Uid)
2964 return VINF_SUCCESS;
2965 return VERR_PERMISSION_DENIED;
2966}
2967
2968
2969/**
2970 * Lock pages.
2971 *
2972 * @returns IPRT status code.
2973 * @param pSession Session to which the locked memory should be associated.
2974 * @param pvR3 Start of the memory range to lock.
2975 * This must be page aligned.
2976 * @param cPages Number of pages to lock.
2977 * @param paPages Where to put the physical addresses of locked memory.
2978 */
2979SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2980{
2981 int rc;
2982 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2983 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2984 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2985
2986 /*
2987 * Verify input.
2988 */
2989 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2990 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2991 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2992 || !pvR3)
2993 {
2994 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2995 return VERR_INVALID_PARAMETER;
2996 }
2997
2998 /*
2999 * Let IPRT do the job.
3000 */
3001 Mem.eType = MEMREF_TYPE_LOCKED;
3002 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
3003 if (RT_SUCCESS(rc))
3004 {
3005 uint32_t iPage = cPages;
3006 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
3007 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
3008
3009 while (iPage-- > 0)
3010 {
3011 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3012 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
3013 {
3014 AssertMsgFailed(("iPage=%d\n", iPage));
3015 rc = VERR_INTERNAL_ERROR;
3016 break;
3017 }
3018 }
3019 if (RT_SUCCESS(rc))
3020 rc = supdrvMemAdd(&Mem, pSession);
3021 if (RT_FAILURE(rc))
3022 {
3023 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3024 AssertRC(rc2);
3025 }
3026 }
3027
3028 return rc;
3029}
3030
3031
3032/**
3033 * Unlocks the memory pointed to by pv.
3034 *
3035 * @returns IPRT status code.
3036 * @param pSession Session to which the memory was locked.
3037 * @param pvR3 Memory to unlock.
3038 */
3039SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3040{
3041 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3042 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3043 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3044}
3045
3046
3047/**
3048 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3049 * backing.
3050 *
3051 * @returns IPRT status code.
3052 * @param pSession Session data.
3053 * @param cPages Number of pages to allocate.
3054 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3055 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3056 * @param pHCPhys Where to put the physical address of allocated memory.
3057 */
3058SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3059{
3060 int rc;
3061 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3062 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3063
3064 /*
3065 * Validate input.
3066 */
3067 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3068 if (!ppvR3 || !ppvR0 || !pHCPhys)
3069 {
3070 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3071 pSession, ppvR0, ppvR3, pHCPhys));
3072 return VERR_INVALID_PARAMETER;
3073
3074 }
3075 if (cPages < 1 || cPages >= 256)
3076 {
3077 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3078 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3079 }
3080
3081 /*
3082 * Let IPRT do the job.
3083 */
3084 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3085 if (RT_SUCCESS(rc))
3086 {
3087 int rc2;
3088 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3089 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3090 if (RT_SUCCESS(rc))
3091 {
3092 Mem.eType = MEMREF_TYPE_CONT;
3093 rc = supdrvMemAdd(&Mem, pSession);
3094 if (!rc)
3095 {
3096 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3097 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3098 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3099 return 0;
3100 }
3101
3102 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3103 AssertRC(rc2);
3104 }
3105 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3106 AssertRC(rc2);
3107 }
3108
3109 return rc;
3110}
3111
3112
3113/**
3114 * Frees memory allocated using SUPR0ContAlloc().
3115 *
3116 * @returns IPRT status code.
3117 * @param pSession The session to which the memory was allocated.
3118 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3119 */
3120SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3121{
3122 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3123 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3124 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3125}
3126
3127
3128/**
3129 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3130 *
3131 * The memory isn't zeroed.
3132 *
3133 * @returns IPRT status code.
3134 * @param pSession Session data.
3135 * @param cPages Number of pages to allocate.
3136 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3137 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3138 * @param paPages Where to put the physical addresses of allocated memory.
3139 */
3140SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3141{
3142 unsigned iPage;
3143 int rc;
3144 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3145 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3146
3147 /*
3148 * Validate input.
3149 */
3150 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3151 if (!ppvR3 || !ppvR0 || !paPages)
3152 {
3153 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3154 pSession, ppvR3, ppvR0, paPages));
3155 return VERR_INVALID_PARAMETER;
3156
3157 }
3158 if (cPages < 1 || cPages >= 256)
3159 {
3160 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3161 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3162 }
3163
3164 /*
3165 * Let IPRT do the work.
3166 */
3167 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3168 if (RT_SUCCESS(rc))
3169 {
3170 int rc2;
3171 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3172 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3173 if (RT_SUCCESS(rc))
3174 {
3175 Mem.eType = MEMREF_TYPE_LOW;
3176 rc = supdrvMemAdd(&Mem, pSession);
3177 if (!rc)
3178 {
3179 for (iPage = 0; iPage < cPages; iPage++)
3180 {
3181 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3182 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3183 }
3184 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3185 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3186 return 0;
3187 }
3188
3189 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3190 AssertRC(rc2);
3191 }
3192
3193 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3194 AssertRC(rc2);
3195 }
3196
3197 return rc;
3198}
3199
3200
3201/**
3202 * Frees memory allocated using SUPR0LowAlloc().
3203 *
3204 * @returns IPRT status code.
3205 * @param pSession The session to which the memory was allocated.
3206 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3207 */
3208SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3209{
3210 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3211 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3212 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3213}
3214
3215
3216
3217/**
3218 * Allocates a chunk of memory with both R0 and R3 mappings.
3219 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3220 *
3221 * @returns IPRT status code.
3222 * @param pSession The session to associated the allocation with.
3223 * @param cb Number of bytes to allocate.
3224 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3225 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3226 */
3227SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3228{
3229 int rc;
3230 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3231 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3232
3233 /*
3234 * Validate input.
3235 */
3236 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3237 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3238 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3239 if (cb < 1 || cb >= _4M)
3240 {
3241 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3242 return VERR_INVALID_PARAMETER;
3243 }
3244
3245 /*
3246 * Let IPRT do the work.
3247 */
3248 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3249 if (RT_SUCCESS(rc))
3250 {
3251 int rc2;
3252 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3253 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3254 if (RT_SUCCESS(rc))
3255 {
3256 Mem.eType = MEMREF_TYPE_MEM;
3257 rc = supdrvMemAdd(&Mem, pSession);
3258 if (!rc)
3259 {
3260 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3261 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3262 return VINF_SUCCESS;
3263 }
3264
3265 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3266 AssertRC(rc2);
3267 }
3268
3269 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3270 AssertRC(rc2);
3271 }
3272
3273 return rc;
3274}
3275
3276
3277/**
3278 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3279 *
3280 * @returns IPRT status code.
3281 * @param pSession The session to which the memory was allocated.
3282 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3283 * @param paPages Where to store the physical addresses.
3284 */
3285SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3286{
3287 PSUPDRVBUNDLE pBundle;
3288 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3289
3290 /*
3291 * Validate input.
3292 */
3293 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3294 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3295 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3296
3297 /*
3298 * Search for the address.
3299 */
3300 RTSpinlockAcquire(pSession->Spinlock);
3301 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3302 {
3303 if (pBundle->cUsed > 0)
3304 {
3305 unsigned i;
3306 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3307 {
3308 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3309 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3310 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3311 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3312 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3313 )
3314 )
3315 {
3316 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3317 size_t iPage;
3318 for (iPage = 0; iPage < cPages; iPage++)
3319 {
3320 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3321 paPages[iPage].uReserved = 0;
3322 }
3323 RTSpinlockRelease(pSession->Spinlock);
3324 return VINF_SUCCESS;
3325 }
3326 }
3327 }
3328 }
3329 RTSpinlockRelease(pSession->Spinlock);
3330 Log(("Failed to find %p!!!\n", (void *)uPtr));
3331 return VERR_INVALID_PARAMETER;
3332}
3333
3334
3335/**
3336 * Free memory allocated by SUPR0MemAlloc().
3337 *
3338 * @returns IPRT status code.
3339 * @param pSession The session owning the allocation.
3340 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3341 */
3342SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3343{
3344 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3345 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3346 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3347}
3348
3349
3350/**
3351 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3352 *
3353 * The memory is fixed and it's possible to query the physical addresses using
3354 * SUPR0MemGetPhys().
3355 *
3356 * @returns IPRT status code.
3357 * @param pSession The session to associated the allocation with.
3358 * @param cPages The number of pages to allocate.
3359 * @param fFlags Flags, reserved for the future. Must be zero.
3360 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3361 * NULL if no ring-3 mapping.
3362 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3363 * NULL if no ring-0 mapping.
3364 * @param paPages Where to store the addresses of the pages. Optional.
3365 */
3366SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3367{
3368 int rc;
3369 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3370 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3371
3372 /*
3373 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3374 */
3375 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3376 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3377 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3378 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3379 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3380 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3381 {
3382 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3383 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3384 }
3385
3386 /*
3387 * Let IPRT do the work.
3388 */
3389 if (ppvR0)
3390 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3391 else
3392 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3393 if (RT_SUCCESS(rc))
3394 {
3395 int rc2;
3396 if (ppvR3)
3397 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3398 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3399 else
3400 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3401 if (RT_SUCCESS(rc))
3402 {
3403 Mem.eType = MEMREF_TYPE_PAGE;
3404 rc = supdrvMemAdd(&Mem, pSession);
3405 if (!rc)
3406 {
3407 if (ppvR3)
3408 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3409 if (ppvR0)
3410 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3411 if (paPages)
3412 {
3413 uint32_t iPage = cPages;
3414 while (iPage-- > 0)
3415 {
3416 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3417 Assert(paPages[iPage] != NIL_RTHCPHYS);
3418 }
3419 }
3420 return VINF_SUCCESS;
3421 }
3422
3423 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3424 AssertRC(rc2);
3425 }
3426
3427 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3428 AssertRC(rc2);
3429 }
3430 return rc;
3431}
3432
3433
3434/**
3435 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3436 * space.
3437 *
3438 * @returns IPRT status code.
3439 * @param pSession The session to associated the allocation with.
3440 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3441 * @param offSub Where to start mapping. Must be page aligned.
3442 * @param cbSub How much to map. Must be page aligned.
3443 * @param fFlags Flags, MBZ.
3444 * @param ppvR0 Where to return the address of the ring-0 mapping on
3445 * success.
3446 */
3447SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3448 uint32_t fFlags, PRTR0PTR ppvR0)
3449{
3450 int rc;
3451 PSUPDRVBUNDLE pBundle;
3452 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3453 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3454
3455 /*
3456 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3457 */
3458 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3459 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3460 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3461 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3462 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3463 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3464
3465 /*
3466 * Find the memory object.
3467 */
3468 RTSpinlockAcquire(pSession->Spinlock);
3469 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3470 {
3471 if (pBundle->cUsed > 0)
3472 {
3473 unsigned i;
3474 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3475 {
3476 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3477 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3478 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3479 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3480 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3481 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3482 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3483 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3484 {
3485 hMemObj = pBundle->aMem[i].MemObj;
3486 break;
3487 }
3488 }
3489 }
3490 }
3491 RTSpinlockRelease(pSession->Spinlock);
3492
3493 rc = VERR_INVALID_PARAMETER;
3494 if (hMemObj != NIL_RTR0MEMOBJ)
3495 {
3496 /*
3497 * Do some further input validations before calling IPRT.
3498 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3499 */
3500 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3501 if ( offSub < cbMemObj
3502 && cbSub <= cbMemObj
3503 && offSub + cbSub <= cbMemObj)
3504 {
3505 RTR0MEMOBJ hMapObj;
3506 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3507 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3508 if (RT_SUCCESS(rc))
3509 *ppvR0 = RTR0MemObjAddress(hMapObj);
3510 }
3511 else
3512 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3513
3514 }
3515 return rc;
3516}
3517
3518
3519/**
3520 * Changes the page level protection of one or more pages previously allocated
3521 * by SUPR0PageAllocEx.
3522 *
3523 * @returns IPRT status code.
3524 * @param pSession The session to associated the allocation with.
3525 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3526 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3527 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3528 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3529 * @param offSub Where to start changing. Must be page aligned.
3530 * @param cbSub How much to change. Must be page aligned.
3531 * @param fProt The new page level protection, see RTMEM_PROT_*.
3532 */
3533SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3534{
3535 int rc;
3536 PSUPDRVBUNDLE pBundle;
3537 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3538 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3539 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3540
3541 /*
3542 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3543 */
3544 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3545 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3546 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3547 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3548 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3549
3550 /*
3551 * Find the memory object.
3552 */
3553 RTSpinlockAcquire(pSession->Spinlock);
3554 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3555 {
3556 if (pBundle->cUsed > 0)
3557 {
3558 unsigned i;
3559 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3560 {
3561 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3562 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3563 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3564 || pvR3 == NIL_RTR3PTR)
3565 && ( pvR0 == NIL_RTR0PTR
3566 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3567 && ( pvR3 == NIL_RTR3PTR
3568 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3569 {
3570 if (pvR0 != NIL_RTR0PTR)
3571 hMemObjR0 = pBundle->aMem[i].MemObj;
3572 if (pvR3 != NIL_RTR3PTR)
3573 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3574 break;
3575 }
3576 }
3577 }
3578 }
3579 RTSpinlockRelease(pSession->Spinlock);
3580
3581 rc = VERR_INVALID_PARAMETER;
3582 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3583 || hMemObjR3 != NIL_RTR0MEMOBJ)
3584 {
3585 /*
3586 * Do some further input validations before calling IPRT.
3587 */
3588 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3589 if ( offSub < cbMemObj
3590 && cbSub <= cbMemObj
3591 && offSub + cbSub <= cbMemObj)
3592 {
3593 rc = VINF_SUCCESS;
3594 if (hMemObjR3 != NIL_RTR0PTR)
3595 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3596 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3597 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3598 }
3599 else
3600 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3601
3602 }
3603 return rc;
3604
3605}
3606
3607
3608/**
3609 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3610 *
3611 * @returns IPRT status code.
3612 * @param pSession The session owning the allocation.
3613 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3614 * SUPR0PageAllocEx().
3615 */
3616SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3617{
3618 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3619 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3620 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3621}
3622
3623
3624/**
3625 * Gets the paging mode of the current CPU.
3626 *
3627 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3628 */
3629SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3630{
3631 SUPPAGINGMODE enmMode;
3632
3633 RTR0UINTREG cr0 = ASMGetCR0();
3634 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3635 enmMode = SUPPAGINGMODE_INVALID;
3636 else
3637 {
3638 RTR0UINTREG cr4 = ASMGetCR4();
3639 uint32_t fNXEPlusLMA = 0;
3640 if (cr4 & X86_CR4_PAE)
3641 {
3642 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3643 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3644 {
3645 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3646 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3647 fNXEPlusLMA |= RT_BIT(0);
3648 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3649 fNXEPlusLMA |= RT_BIT(1);
3650 }
3651 }
3652
3653 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3654 {
3655 case 0:
3656 enmMode = SUPPAGINGMODE_32_BIT;
3657 break;
3658
3659 case X86_CR4_PGE:
3660 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3661 break;
3662
3663 case X86_CR4_PAE:
3664 enmMode = SUPPAGINGMODE_PAE;
3665 break;
3666
3667 case X86_CR4_PAE | RT_BIT(0):
3668 enmMode = SUPPAGINGMODE_PAE_NX;
3669 break;
3670
3671 case X86_CR4_PAE | X86_CR4_PGE:
3672 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3673 break;
3674
3675 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3676 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3677 break;
3678
3679 case RT_BIT(1) | X86_CR4_PAE:
3680 enmMode = SUPPAGINGMODE_AMD64;
3681 break;
3682
3683 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3684 enmMode = SUPPAGINGMODE_AMD64_NX;
3685 break;
3686
3687 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3688 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3689 break;
3690
3691 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3692 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3693 break;
3694
3695 default:
3696 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3697 enmMode = SUPPAGINGMODE_INVALID;
3698 break;
3699 }
3700 }
3701 return enmMode;
3702}
3703
3704
3705/**
3706 * Enables or disabled hardware virtualization extensions using native OS APIs.
3707 *
3708 * @returns VBox status code.
3709 * @retval VINF_SUCCESS on success.
3710 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3711 *
3712 * @param fEnable Whether to enable or disable.
3713 */
3714SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3715{
3716#ifdef RT_OS_DARWIN
3717 return supdrvOSEnableVTx(fEnable);
3718#else
3719 return VERR_NOT_SUPPORTED;
3720#endif
3721}
3722
3723
3724/**
3725 * Suspends hardware virtualization extensions using the native OS API.
3726 *
3727 * This is called prior to entering raw-mode context.
3728 *
3729 * @returns @c true if suspended, @c false if not.
3730 */
3731SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3732{
3733#ifdef RT_OS_DARWIN
3734 return supdrvOSSuspendVTxOnCpu();
3735#else
3736 return false;
3737#endif
3738}
3739
3740
3741/**
3742 * Resumes hardware virtualization extensions using the native OS API.
3743 *
3744 * This is called after to entering raw-mode context.
3745 *
3746 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3747 */
3748SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3749{
3750#ifdef RT_OS_DARWIN
3751 supdrvOSResumeVTxOnCpu(fSuspended);
3752#else
3753 Assert(!fSuspended);
3754#endif
3755}
3756
3757
3758/**
3759 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3760 *
3761 * @returns VBox status code.
3762 * @retval VERR_VMX_NO_VMX
3763 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3764 * @retval VERR_VMX_MSR_VMXON_DISABLED
3765 * @retval VERR_VMX_MSR_LOCKING_FAILED
3766 * @retval VERR_SVM_NO_SVM
3767 * @retval VERR_SVM_DISABLED
3768 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3769 * (centaur) CPU.
3770 *
3771 * @param pSession The session handle.
3772 * @param pfCaps Where to store the capabilities.
3773 */
3774SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3775{
3776 int rc = VERR_UNSUPPORTED_CPU;
3777 bool fIsSmxModeAmbiguous = false;
3778 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3779
3780 /*
3781 * Input validation.
3782 */
3783 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3784 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3785
3786 *pfCaps = 0;
3787 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3788 RTThreadPreemptDisable(&PreemptState);
3789 if (ASMHasCpuId())
3790 {
3791 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3792 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3793
3794 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3795 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3796
3797 if ( ASMIsValidStdRange(uMaxId)
3798 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3799 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3800 )
3801 {
3802 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3803 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3804 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3805 )
3806 {
3807 /** @todo Unify code with hmR0InitIntelCpu(). */
3808 uint64_t u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3809 bool const fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3810 bool fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3811 bool fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3812 bool fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3813
3814 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3815 if (fMsrLocked)
3816 {
3817 if (fVmxAllowed && fSmxVmxAllowed)
3818 rc = VINF_SUCCESS;
3819 else if (!fVmxAllowed && !fSmxVmxAllowed)
3820 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3821 else if (!fMaybeSmxMode)
3822 {
3823 if (fVmxAllowed)
3824 rc = VINF_SUCCESS;
3825 else
3826 rc = VERR_VMX_MSR_VMXON_DISABLED;
3827 }
3828 else
3829 {
3830 /*
3831 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3832 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3833 * See @bugref{6873}.
3834 */
3835 Assert(fMaybeSmxMode == true);
3836 fIsSmxModeAmbiguous = true;
3837 rc = VINF_SUCCESS;
3838 }
3839 }
3840 else
3841 {
3842 /*
3843 * MSR is not yet locked; we can change it ourselves here.
3844 * Once the lock bit is set, this MSR can no longer be modified.
3845 *
3846 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3847 * accurately. See @bugref{6873}.
3848 */
3849 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3850 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3851 | MSR_IA32_FEATURE_CONTROL_VMXON;
3852 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3853
3854 /* Verify. */
3855 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3856 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3857 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3858 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3859 if (fSmxVmxAllowed && fVmxAllowed)
3860 rc = VINF_SUCCESS;
3861 else
3862 rc = VERR_VMX_MSR_LOCKING_FAILED;
3863 }
3864
3865 if (rc == VINF_SUCCESS)
3866 {
3867 VMXCAPABILITY vtCaps;
3868
3869 *pfCaps |= SUPVTCAPS_VT_X;
3870
3871 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3872 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3873 {
3874 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3875 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3876 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3877 }
3878 }
3879 }
3880 else
3881 rc = VERR_VMX_NO_VMX;
3882 }
3883 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3884 && ASMIsValidStdRange(uMaxId))
3885 {
3886 uint32_t fExtFeaturesEcx, uExtMaxId;
3887 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3888 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3889 if ( ASMIsValidExtRange(uExtMaxId)
3890 && uExtMaxId >= 0x8000000a
3891 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3892 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3893 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3894 )
3895 {
3896 /* Check if SVM is disabled */
3897 uint64_t u64FeatMsr = ASMRdMsr(MSR_K8_VM_CR);
3898 if (!(u64FeatMsr & MSR_K8_VM_CR_SVM_DISABLE))
3899 {
3900 uint32_t fSvmFeatures;
3901 *pfCaps |= SUPVTCAPS_AMD_V;
3902
3903 /* Query AMD-V features. */
3904 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3905 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3906 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3907
3908 rc = VINF_SUCCESS;
3909 }
3910 else
3911 rc = VERR_SVM_DISABLED;
3912 }
3913 else
3914 rc = VERR_SVM_NO_SVM;
3915 }
3916 }
3917
3918 RTThreadPreemptRestore(&PreemptState);
3919 if (fIsSmxModeAmbiguous)
3920 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3921 return rc;
3922}
3923
3924
3925/**
3926 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3927 * updating.
3928 *
3929 * @param pGipCpu The per CPU structure for this CPU.
3930 * @param u64NanoTS The current time.
3931 */
3932static void supdrvGipReInitCpu(PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3933{
3934 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
3935 pGipCpu->u64NanoTS = u64NanoTS;
3936}
3937
3938
3939/**
3940 * Set the current TSC and NanoTS value for the CPU.
3941 *
3942 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
3943 * @param pvUser1 Pointer to the ring-0 GIP mapping.
3944 * @param pvUser2 Pointer to the variable holding the current time.
3945 */
3946static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3947{
3948 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
3949 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
3950
3951 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
3952 supdrvGipReInitCpu(&pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
3953
3954 NOREF(pvUser2);
3955 NOREF(idCpu);
3956}
3957
3958
3959/**
3960 * Maps the GIP into userspace and/or get the physical address of the GIP.
3961 *
3962 * @returns IPRT status code.
3963 * @param pSession Session to which the GIP mapping should belong.
3964 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
3965 * @param pHCPhysGip Where to store the physical address. (optional)
3966 *
3967 * @remark There is no reference counting on the mapping, so one call to this function
3968 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
3969 * and remove the session as a GIP user.
3970 */
3971SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
3972{
3973 int rc;
3974 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
3975 RTR3PTR pGipR3 = NIL_RTR3PTR;
3976 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3977 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
3978
3979 /*
3980 * Validate
3981 */
3982 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3983 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
3984 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
3985
3986#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3987 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
3988#else
3989 RTSemFastMutexRequest(pDevExt->mtxGip);
3990#endif
3991 if (pDevExt->pGip)
3992 {
3993 /*
3994 * Map it?
3995 */
3996 rc = VINF_SUCCESS;
3997 if (ppGipR3)
3998 {
3999 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4000 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
4001 RTMEM_PROT_READ, RTR0ProcHandleSelf());
4002 if (RT_SUCCESS(rc))
4003 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
4004 }
4005
4006 /*
4007 * Get physical address.
4008 */
4009 if (pHCPhysGip && RT_SUCCESS(rc))
4010 HCPhys = pDevExt->HCPhysGip;
4011
4012 /*
4013 * Reference globally.
4014 */
4015 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4016 {
4017 pSession->fGipReferenced = 1;
4018 pDevExt->cGipUsers++;
4019 if (pDevExt->cGipUsers == 1)
4020 {
4021 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4022 uint64_t u64NanoTS;
4023 uint32_t u32SystemResolution;
4024 unsigned i;
4025
4026 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4027
4028 /*
4029 * Try bump up the system timer resolution.
4030 * The more interrupts the better...
4031 */
4032 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
4033 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
4034 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
4035 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
4036 )
4037 {
4038 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
4039 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
4040 }
4041
4042 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4043 {
4044 for (i = 0; i < pGipR0->cCpus; i++)
4045 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4046 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4047 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4048 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4049 }
4050
4051 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4052 if ( pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4053 || RTMpGetOnlineCount() == 1)
4054 supdrvGipReInitCpu(&pGipR0->aCPUs[0], u64NanoTS);
4055 else
4056 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4057
4058#ifndef DO_NOT_START_GIP
4059 rc = RTTimerStart(pDevExt->pGipTimer, 0); AssertRC(rc);
4060#endif
4061 rc = VINF_SUCCESS;
4062 }
4063 }
4064 }
4065 else
4066 {
4067 rc = VERR_GENERAL_FAILURE;
4068 Log(("SUPR0GipMap: GIP is not available!\n"));
4069 }
4070#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4071 RTSemMutexRelease(pDevExt->mtxGip);
4072#else
4073 RTSemFastMutexRelease(pDevExt->mtxGip);
4074#endif
4075
4076 /*
4077 * Write returns.
4078 */
4079 if (pHCPhysGip)
4080 *pHCPhysGip = HCPhys;
4081 if (ppGipR3)
4082 *ppGipR3 = pGipR3;
4083
4084#ifdef DEBUG_DARWIN_GIP
4085 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4086#else
4087 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4088#endif
4089 return rc;
4090}
4091
4092
4093/**
4094 * Unmaps any user mapping of the GIP and terminates all GIP access
4095 * from this session.
4096 *
4097 * @returns IPRT status code.
4098 * @param pSession Session to which the GIP mapping should belong.
4099 */
4100SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4101{
4102 int rc = VINF_SUCCESS;
4103 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4104#ifdef DEBUG_DARWIN_GIP
4105 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4106 pSession,
4107 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4108 pSession->GipMapObjR3));
4109#else
4110 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4111#endif
4112 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4113
4114#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4115 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4116#else
4117 RTSemFastMutexRequest(pDevExt->mtxGip);
4118#endif
4119
4120 /*
4121 * Unmap anything?
4122 */
4123 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4124 {
4125 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4126 AssertRC(rc);
4127 if (RT_SUCCESS(rc))
4128 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4129 }
4130
4131 /*
4132 * Dereference global GIP.
4133 */
4134 if (pSession->fGipReferenced && !rc)
4135 {
4136 pSession->fGipReferenced = 0;
4137 if ( pDevExt->cGipUsers > 0
4138 && !--pDevExt->cGipUsers)
4139 {
4140 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4141#ifndef DO_NOT_START_GIP
4142 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4143#endif
4144
4145 if (pDevExt->u32SystemTimerGranularityGrant)
4146 {
4147 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
4148 AssertRC(rc2);
4149 pDevExt->u32SystemTimerGranularityGrant = 0;
4150 }
4151 }
4152 }
4153
4154#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4155 RTSemMutexRelease(pDevExt->mtxGip);
4156#else
4157 RTSemFastMutexRelease(pDevExt->mtxGip);
4158#endif
4159
4160 return rc;
4161}
4162
4163
4164/**
4165 * Gets the GIP pointer.
4166 *
4167 * @returns Pointer to the GIP or NULL.
4168 */
4169SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4170{
4171 return g_pSUPGlobalInfoPage;
4172}
4173
4174
4175/**
4176 * Register a component factory with the support driver.
4177 *
4178 * This is currently restricted to kernel sessions only.
4179 *
4180 * @returns VBox status code.
4181 * @retval VINF_SUCCESS on success.
4182 * @retval VERR_NO_MEMORY if we're out of memory.
4183 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4184 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4185 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4186 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4187 *
4188 * @param pSession The SUPDRV session (must be a ring-0 session).
4189 * @param pFactory Pointer to the component factory registration structure.
4190 *
4191 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4192 */
4193SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4194{
4195 PSUPDRVFACTORYREG pNewReg;
4196 const char *psz;
4197 int rc;
4198
4199 /*
4200 * Validate parameters.
4201 */
4202 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4203 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4204 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4205 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4206 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4207 AssertReturn(psz, VERR_INVALID_PARAMETER);
4208
4209 /*
4210 * Allocate and initialize a new registration structure.
4211 */
4212 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4213 if (pNewReg)
4214 {
4215 pNewReg->pNext = NULL;
4216 pNewReg->pFactory = pFactory;
4217 pNewReg->pSession = pSession;
4218 pNewReg->cchName = psz - &pFactory->szName[0];
4219
4220 /*
4221 * Add it to the tail of the list after checking for prior registration.
4222 */
4223 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4224 if (RT_SUCCESS(rc))
4225 {
4226 PSUPDRVFACTORYREG pPrev = NULL;
4227 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4228 while (pCur && pCur->pFactory != pFactory)
4229 {
4230 pPrev = pCur;
4231 pCur = pCur->pNext;
4232 }
4233 if (!pCur)
4234 {
4235 if (pPrev)
4236 pPrev->pNext = pNewReg;
4237 else
4238 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4239 rc = VINF_SUCCESS;
4240 }
4241 else
4242 rc = VERR_ALREADY_EXISTS;
4243
4244 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4245 }
4246
4247 if (RT_FAILURE(rc))
4248 RTMemFree(pNewReg);
4249 }
4250 else
4251 rc = VERR_NO_MEMORY;
4252 return rc;
4253}
4254
4255
4256/**
4257 * Deregister a component factory.
4258 *
4259 * @returns VBox status code.
4260 * @retval VINF_SUCCESS on success.
4261 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4262 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4263 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4264 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4265 *
4266 * @param pSession The SUPDRV session (must be a ring-0 session).
4267 * @param pFactory Pointer to the component factory registration structure
4268 * previously passed SUPR0ComponentRegisterFactory().
4269 *
4270 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4271 */
4272SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4273{
4274 int rc;
4275
4276 /*
4277 * Validate parameters.
4278 */
4279 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4280 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4281 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4282
4283 /*
4284 * Take the lock and look for the registration record.
4285 */
4286 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4287 if (RT_SUCCESS(rc))
4288 {
4289 PSUPDRVFACTORYREG pPrev = NULL;
4290 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4291 while (pCur && pCur->pFactory != pFactory)
4292 {
4293 pPrev = pCur;
4294 pCur = pCur->pNext;
4295 }
4296 if (pCur)
4297 {
4298 if (!pPrev)
4299 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4300 else
4301 pPrev->pNext = pCur->pNext;
4302
4303 pCur->pNext = NULL;
4304 pCur->pFactory = NULL;
4305 pCur->pSession = NULL;
4306 rc = VINF_SUCCESS;
4307 }
4308 else
4309 rc = VERR_NOT_FOUND;
4310
4311 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4312
4313 RTMemFree(pCur);
4314 }
4315 return rc;
4316}
4317
4318
4319/**
4320 * Queries a component factory.
4321 *
4322 * @returns VBox status code.
4323 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4324 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4325 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4326 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4327 *
4328 * @param pSession The SUPDRV session.
4329 * @param pszName The name of the component factory.
4330 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4331 * @param ppvFactoryIf Where to store the factory interface.
4332 */
4333SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4334{
4335 const char *pszEnd;
4336 size_t cchName;
4337 int rc;
4338
4339 /*
4340 * Validate parameters.
4341 */
4342 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4343
4344 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4345 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4346 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4347 cchName = pszEnd - pszName;
4348
4349 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4350 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4351 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4352
4353 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4354 *ppvFactoryIf = NULL;
4355
4356 /*
4357 * Take the lock and try all factories by this name.
4358 */
4359 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4360 if (RT_SUCCESS(rc))
4361 {
4362 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4363 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4364 while (pCur)
4365 {
4366 if ( pCur->cchName == cchName
4367 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4368 {
4369 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4370 if (pvFactory)
4371 {
4372 *ppvFactoryIf = pvFactory;
4373 rc = VINF_SUCCESS;
4374 break;
4375 }
4376 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4377 }
4378
4379 /* next */
4380 pCur = pCur->pNext;
4381 }
4382
4383 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4384 }
4385 return rc;
4386}
4387
4388
4389/**
4390 * Adds a memory object to the session.
4391 *
4392 * @returns IPRT status code.
4393 * @param pMem Memory tracking structure containing the
4394 * information to track.
4395 * @param pSession The session.
4396 */
4397static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4398{
4399 PSUPDRVBUNDLE pBundle;
4400
4401 /*
4402 * Find free entry and record the allocation.
4403 */
4404 RTSpinlockAcquire(pSession->Spinlock);
4405 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4406 {
4407 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4408 {
4409 unsigned i;
4410 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4411 {
4412 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4413 {
4414 pBundle->cUsed++;
4415 pBundle->aMem[i] = *pMem;
4416 RTSpinlockRelease(pSession->Spinlock);
4417 return VINF_SUCCESS;
4418 }
4419 }
4420 AssertFailed(); /* !!this can't be happening!!! */
4421 }
4422 }
4423 RTSpinlockRelease(pSession->Spinlock);
4424
4425 /*
4426 * Need to allocate a new bundle.
4427 * Insert into the last entry in the bundle.
4428 */
4429 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4430 if (!pBundle)
4431 return VERR_NO_MEMORY;
4432
4433 /* take last entry. */
4434 pBundle->cUsed++;
4435 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4436
4437 /* insert into list. */
4438 RTSpinlockAcquire(pSession->Spinlock);
4439 pBundle->pNext = pSession->Bundle.pNext;
4440 pSession->Bundle.pNext = pBundle;
4441 RTSpinlockRelease(pSession->Spinlock);
4442
4443 return VINF_SUCCESS;
4444}
4445
4446
4447/**
4448 * Releases a memory object referenced by pointer and type.
4449 *
4450 * @returns IPRT status code.
4451 * @param pSession Session data.
4452 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4453 * @param eType Memory type.
4454 */
4455static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4456{
4457 PSUPDRVBUNDLE pBundle;
4458
4459 /*
4460 * Validate input.
4461 */
4462 if (!uPtr)
4463 {
4464 Log(("Illegal address %p\n", (void *)uPtr));
4465 return VERR_INVALID_PARAMETER;
4466 }
4467
4468 /*
4469 * Search for the address.
4470 */
4471 RTSpinlockAcquire(pSession->Spinlock);
4472 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4473 {
4474 if (pBundle->cUsed > 0)
4475 {
4476 unsigned i;
4477 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4478 {
4479 if ( pBundle->aMem[i].eType == eType
4480 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4481 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4482 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4483 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4484 )
4485 {
4486 /* Make a copy of it and release it outside the spinlock. */
4487 SUPDRVMEMREF Mem = pBundle->aMem[i];
4488 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4489 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4490 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4491 RTSpinlockRelease(pSession->Spinlock);
4492
4493 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4494 {
4495 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4496 AssertRC(rc); /** @todo figure out how to handle this. */
4497 }
4498 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4499 {
4500 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4501 AssertRC(rc); /** @todo figure out how to handle this. */
4502 }
4503 return VINF_SUCCESS;
4504 }
4505 }
4506 }
4507 }
4508 RTSpinlockRelease(pSession->Spinlock);
4509 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4510 return VERR_INVALID_PARAMETER;
4511}
4512
4513
4514/**
4515 * Opens an image. If it's the first time it's opened the call must upload
4516 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4517 *
4518 * This is the 1st step of the loading.
4519 *
4520 * @returns IPRT status code.
4521 * @param pDevExt Device globals.
4522 * @param pSession Session data.
4523 * @param pReq The open request.
4524 */
4525static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4526{
4527 int rc;
4528 PSUPDRVLDRIMAGE pImage;
4529 void *pv;
4530 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4531 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4532
4533 /*
4534 * Check if we got an instance of the image already.
4535 */
4536 supdrvLdrLock(pDevExt);
4537 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4538 {
4539 if ( pImage->szName[cchName] == '\0'
4540 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4541 {
4542 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4543 {
4544 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4545 pImage->cUsage++;
4546 pReq->u.Out.pvImageBase = pImage->pvImage;
4547 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4548 pReq->u.Out.fNativeLoader = pImage->fNative;
4549 supdrvLdrAddUsage(pSession, pImage);
4550 supdrvLdrUnlock(pDevExt);
4551 return VINF_SUCCESS;
4552 }
4553 supdrvLdrUnlock(pDevExt);
4554 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4555 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4556 }
4557 }
4558 /* (not found - add it!) */
4559
4560 /*
4561 * Allocate memory.
4562 */
4563 Assert(cchName < sizeof(pImage->szName));
4564 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4565 if (!pv)
4566 {
4567 supdrvLdrUnlock(pDevExt);
4568 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4569 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4570 }
4571
4572 /*
4573 * Setup and link in the LDR stuff.
4574 */
4575 pImage = (PSUPDRVLDRIMAGE)pv;
4576 pImage->pvImage = NULL;
4577 pImage->pvImageAlloc = NULL;
4578 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4579 pImage->cbImageBits = pReq->u.In.cbImageBits;
4580 pImage->cSymbols = 0;
4581 pImage->paSymbols = NULL;
4582 pImage->pachStrTab = NULL;
4583 pImage->cbStrTab = 0;
4584 pImage->pfnModuleInit = NULL;
4585 pImage->pfnModuleTerm = NULL;
4586 pImage->pfnServiceReqHandler = NULL;
4587 pImage->uState = SUP_IOCTL_LDR_OPEN;
4588 pImage->cUsage = 1;
4589 pImage->pDevExt = pDevExt;
4590 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4591
4592 /*
4593 * Try load it using the native loader, if that isn't supported, fall back
4594 * on the older method.
4595 */
4596 pImage->fNative = true;
4597 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4598 if (rc == VERR_NOT_SUPPORTED)
4599 {
4600 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4601 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4602 pImage->fNative = false;
4603 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4604 }
4605 if (RT_FAILURE(rc))
4606 {
4607 supdrvLdrUnlock(pDevExt);
4608 RTMemFree(pImage);
4609 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4610 return rc;
4611 }
4612 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4613
4614 /*
4615 * Link it.
4616 */
4617 pImage->pNext = pDevExt->pLdrImages;
4618 pDevExt->pLdrImages = pImage;
4619
4620 supdrvLdrAddUsage(pSession, pImage);
4621
4622 pReq->u.Out.pvImageBase = pImage->pvImage;
4623 pReq->u.Out.fNeedsLoading = true;
4624 pReq->u.Out.fNativeLoader = pImage->fNative;
4625 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4626
4627 supdrvLdrUnlock(pDevExt);
4628 return VINF_SUCCESS;
4629}
4630
4631
4632/**
4633 * Worker that validates a pointer to an image entrypoint.
4634 *
4635 * @returns IPRT status code.
4636 * @param pDevExt The device globals.
4637 * @param pImage The loader image.
4638 * @param pv The pointer into the image.
4639 * @param fMayBeNull Whether it may be NULL.
4640 * @param pszWhat What is this entrypoint? (for logging)
4641 * @param pbImageBits The image bits prepared by ring-3.
4642 *
4643 * @remarks Will leave the lock on failure.
4644 */
4645static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4646 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4647{
4648 if (!fMayBeNull || pv)
4649 {
4650 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4651 {
4652 supdrvLdrUnlock(pDevExt);
4653 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4654 return VERR_INVALID_PARAMETER;
4655 }
4656
4657 if (pImage->fNative)
4658 {
4659 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4660 if (RT_FAILURE(rc))
4661 {
4662 supdrvLdrUnlock(pDevExt);
4663 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4664 return rc;
4665 }
4666 }
4667 }
4668 return VINF_SUCCESS;
4669}
4670
4671
4672/**
4673 * Loads the image bits.
4674 *
4675 * This is the 2nd step of the loading.
4676 *
4677 * @returns IPRT status code.
4678 * @param pDevExt Device globals.
4679 * @param pSession Session data.
4680 * @param pReq The request.
4681 */
4682static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
4683{
4684 PSUPDRVLDRUSAGE pUsage;
4685 PSUPDRVLDRIMAGE pImage;
4686 int rc;
4687 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
4688
4689 /*
4690 * Find the ldr image.
4691 */
4692 supdrvLdrLock(pDevExt);
4693 pUsage = pSession->pLdrUsage;
4694 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4695 pUsage = pUsage->pNext;
4696 if (!pUsage)
4697 {
4698 supdrvLdrUnlock(pDevExt);
4699 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
4700 return VERR_INVALID_HANDLE;
4701 }
4702 pImage = pUsage->pImage;
4703
4704 /*
4705 * Validate input.
4706 */
4707 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
4708 || pImage->cbImageBits != pReq->u.In.cbImageBits)
4709 {
4710 supdrvLdrUnlock(pDevExt);
4711 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
4712 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
4713 return VERR_INVALID_HANDLE;
4714 }
4715
4716 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
4717 {
4718 unsigned uState = pImage->uState;
4719 supdrvLdrUnlock(pDevExt);
4720 if (uState != SUP_IOCTL_LDR_LOAD)
4721 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
4722 return VERR_ALREADY_LOADED;
4723 }
4724
4725 switch (pReq->u.In.eEPType)
4726 {
4727 case SUPLDRLOADEP_NOTHING:
4728 break;
4729
4730 case SUPLDRLOADEP_VMMR0:
4731 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
4732 if (RT_SUCCESS(rc))
4733 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
4734 if (RT_SUCCESS(rc))
4735 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
4736 if (RT_SUCCESS(rc))
4737 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
4738 if (RT_FAILURE(rc))
4739 return rc;
4740 break;
4741
4742 case SUPLDRLOADEP_SERVICE:
4743 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
4744 if (RT_FAILURE(rc))
4745 return rc;
4746 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
4747 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
4748 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
4749 {
4750 supdrvLdrUnlock(pDevExt);
4751 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
4752 pImage->pvImage, pReq->u.In.cbImageWithTabs,
4753 pReq->u.In.EP.Service.apvReserved[0],
4754 pReq->u.In.EP.Service.apvReserved[1],
4755 pReq->u.In.EP.Service.apvReserved[2]));
4756 return VERR_INVALID_PARAMETER;
4757 }
4758 break;
4759
4760 default:
4761 supdrvLdrUnlock(pDevExt);
4762 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
4763 return VERR_INVALID_PARAMETER;
4764 }
4765
4766 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
4767 if (RT_FAILURE(rc))
4768 return rc;
4769 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
4770 if (RT_FAILURE(rc))
4771 return rc;
4772
4773 /*
4774 * Allocate and copy the tables.
4775 * (No need to do try/except as this is a buffered request.)
4776 */
4777 pImage->cbStrTab = pReq->u.In.cbStrTab;
4778 if (pImage->cbStrTab)
4779 {
4780 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
4781 if (pImage->pachStrTab)
4782 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
4783 else
4784 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
4785 }
4786
4787 pImage->cSymbols = pReq->u.In.cSymbols;
4788 if (RT_SUCCESS(rc) && pImage->cSymbols)
4789 {
4790 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
4791 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
4792 if (pImage->paSymbols)
4793 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
4794 else
4795 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
4796 }
4797
4798 /*
4799 * Copy the bits / complete native loading.
4800 */
4801 if (RT_SUCCESS(rc))
4802 {
4803 pImage->uState = SUP_IOCTL_LDR_LOAD;
4804 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
4805 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
4806
4807 if (pImage->fNative)
4808 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
4809 else
4810 {
4811 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
4812 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
4813 }
4814 }
4815
4816 /*
4817 * Update any entry points.
4818 */
4819 if (RT_SUCCESS(rc))
4820 {
4821 switch (pReq->u.In.eEPType)
4822 {
4823 default:
4824 case SUPLDRLOADEP_NOTHING:
4825 rc = VINF_SUCCESS;
4826 break;
4827 case SUPLDRLOADEP_VMMR0:
4828 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
4829 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
4830 break;
4831 case SUPLDRLOADEP_SERVICE:
4832 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
4833 rc = VINF_SUCCESS;
4834 break;
4835 }
4836 }
4837
4838 /*
4839 * On success call the module initialization.
4840 */
4841 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
4842 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
4843 {
4844 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
4845 pDevExt->pLdrInitImage = pImage;
4846 pDevExt->hLdrInitThread = RTThreadNativeSelf();
4847 rc = pImage->pfnModuleInit(pImage);
4848 pDevExt->pLdrInitImage = NULL;
4849 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
4850 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
4851 supdrvLdrUnsetVMMR0EPs(pDevExt);
4852 }
4853 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
4854
4855 if (RT_FAILURE(rc))
4856 {
4857 /* Inform the tracing component in case ModuleInit registered TPs. */
4858 supdrvTracerModuleUnloading(pDevExt, pImage);
4859
4860 pImage->uState = SUP_IOCTL_LDR_OPEN;
4861 pImage->pfnModuleInit = NULL;
4862 pImage->pfnModuleTerm = NULL;
4863 pImage->pfnServiceReqHandler= NULL;
4864 pImage->cbStrTab = 0;
4865 RTMemFree(pImage->pachStrTab);
4866 pImage->pachStrTab = NULL;
4867 RTMemFree(pImage->paSymbols);
4868 pImage->paSymbols = NULL;
4869 pImage->cSymbols = 0;
4870 }
4871
4872 supdrvLdrUnlock(pDevExt);
4873 return rc;
4874}
4875
4876
4877/**
4878 * Frees a previously loaded (prep'ed) image.
4879 *
4880 * @returns IPRT status code.
4881 * @param pDevExt Device globals.
4882 * @param pSession Session data.
4883 * @param pReq The request.
4884 */
4885static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
4886{
4887 int rc;
4888 PSUPDRVLDRUSAGE pUsagePrev;
4889 PSUPDRVLDRUSAGE pUsage;
4890 PSUPDRVLDRIMAGE pImage;
4891 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
4892
4893 /*
4894 * Find the ldr image.
4895 */
4896 supdrvLdrLock(pDevExt);
4897 pUsagePrev = NULL;
4898 pUsage = pSession->pLdrUsage;
4899 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4900 {
4901 pUsagePrev = pUsage;
4902 pUsage = pUsage->pNext;
4903 }
4904 if (!pUsage)
4905 {
4906 supdrvLdrUnlock(pDevExt);
4907 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
4908 return VERR_INVALID_HANDLE;
4909 }
4910
4911 /*
4912 * Check if we can remove anything.
4913 */
4914 rc = VINF_SUCCESS;
4915 pImage = pUsage->pImage;
4916 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
4917 {
4918 /*
4919 * Check if there are any objects with destructors in the image, if
4920 * so leave it for the session cleanup routine so we get a chance to
4921 * clean things up in the right order and not leave them all dangling.
4922 */
4923 RTSpinlockAcquire(pDevExt->Spinlock);
4924 if (pImage->cUsage <= 1)
4925 {
4926 PSUPDRVOBJ pObj;
4927 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
4928 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4929 {
4930 rc = VERR_DANGLING_OBJECTS;
4931 break;
4932 }
4933 }
4934 else
4935 {
4936 PSUPDRVUSAGE pGenUsage;
4937 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
4938 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4939 {
4940 rc = VERR_DANGLING_OBJECTS;
4941 break;
4942 }
4943 }
4944 RTSpinlockRelease(pDevExt->Spinlock);
4945 if (rc == VINF_SUCCESS)
4946 {
4947 /* unlink it */
4948 if (pUsagePrev)
4949 pUsagePrev->pNext = pUsage->pNext;
4950 else
4951 pSession->pLdrUsage = pUsage->pNext;
4952
4953 /* free it */
4954 pUsage->pImage = NULL;
4955 pUsage->pNext = NULL;
4956 RTMemFree(pUsage);
4957
4958 /*
4959 * Dereference the image.
4960 */
4961 if (pImage->cUsage <= 1)
4962 supdrvLdrFree(pDevExt, pImage);
4963 else
4964 pImage->cUsage--;
4965 }
4966 else
4967 {
4968 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
4969 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
4970 }
4971 }
4972 else
4973 {
4974 /*
4975 * Dereference both image and usage.
4976 */
4977 pImage->cUsage--;
4978 pUsage->cUsage--;
4979 }
4980
4981 supdrvLdrUnlock(pDevExt);
4982 return rc;
4983}
4984
4985
4986/**
4987 * Gets the address of a symbol in an open image.
4988 *
4989 * @returns IPRT status code.
4990 * @param pDevExt Device globals.
4991 * @param pSession Session data.
4992 * @param pReq The request buffer.
4993 */
4994static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
4995{
4996 PSUPDRVLDRIMAGE pImage;
4997 PSUPDRVLDRUSAGE pUsage;
4998 uint32_t i;
4999 PSUPLDRSYM paSyms;
5000 const char *pchStrings;
5001 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
5002 void *pvSymbol = NULL;
5003 int rc = VERR_GENERAL_FAILURE;
5004 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
5005
5006 /*
5007 * Find the ldr image.
5008 */
5009 supdrvLdrLock(pDevExt);
5010 pUsage = pSession->pLdrUsage;
5011 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5012 pUsage = pUsage->pNext;
5013 if (!pUsage)
5014 {
5015 supdrvLdrUnlock(pDevExt);
5016 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5017 return VERR_INVALID_HANDLE;
5018 }
5019 pImage = pUsage->pImage;
5020 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5021 {
5022 unsigned uState = pImage->uState;
5023 supdrvLdrUnlock(pDevExt);
5024 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5025 return VERR_ALREADY_LOADED;
5026 }
5027
5028 /*
5029 * Search the symbol strings.
5030 *
5031 * Note! The int32_t is for native loading on solaris where the data
5032 * and text segments are in very different places.
5033 */
5034 pchStrings = pImage->pachStrTab;
5035 paSyms = pImage->paSymbols;
5036 for (i = 0; i < pImage->cSymbols; i++)
5037 {
5038 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5039 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5040 {
5041 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5042 rc = VINF_SUCCESS;
5043 break;
5044 }
5045 }
5046 supdrvLdrUnlock(pDevExt);
5047 pReq->u.Out.pvSymbol = pvSymbol;
5048 return rc;
5049}
5050
5051
5052/**
5053 * Gets the address of a symbol in an open image or the support driver.
5054 *
5055 * @returns VINF_SUCCESS on success.
5056 * @returns
5057 * @param pDevExt Device globals.
5058 * @param pSession Session data.
5059 * @param pReq The request buffer.
5060 */
5061static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5062{
5063 int rc = VINF_SUCCESS;
5064 const char *pszSymbol = pReq->u.In.pszSymbol;
5065 const char *pszModule = pReq->u.In.pszModule;
5066 size_t cbSymbol;
5067 char const *pszEnd;
5068 uint32_t i;
5069
5070 /*
5071 * Input validation.
5072 */
5073 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5074 pszEnd = RTStrEnd(pszSymbol, 512);
5075 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5076 cbSymbol = pszEnd - pszSymbol + 1;
5077
5078 if (pszModule)
5079 {
5080 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5081 pszEnd = RTStrEnd(pszModule, 64);
5082 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5083 }
5084 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5085
5086
5087 if ( !pszModule
5088 || !strcmp(pszModule, "SupDrv"))
5089 {
5090 /*
5091 * Search the support driver export table.
5092 */
5093 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5094 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5095 {
5096 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5097 break;
5098 }
5099 }
5100 else
5101 {
5102 /*
5103 * Find the loader image.
5104 */
5105 PSUPDRVLDRIMAGE pImage;
5106
5107 supdrvLdrLock(pDevExt);
5108
5109 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5110 if (!strcmp(pImage->szName, pszModule))
5111 break;
5112 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5113 {
5114 /*
5115 * Search the symbol strings.
5116 */
5117 const char *pchStrings = pImage->pachStrTab;
5118 PCSUPLDRSYM paSyms = pImage->paSymbols;
5119 for (i = 0; i < pImage->cSymbols; i++)
5120 {
5121 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5122 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5123 {
5124 /*
5125 * Found it! Calc the symbol address and add a reference to the module.
5126 */
5127 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5128 rc = supdrvLdrAddUsage(pSession, pImage);
5129 break;
5130 }
5131 }
5132 }
5133 else
5134 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5135
5136 supdrvLdrUnlock(pDevExt);
5137 }
5138 return rc;
5139}
5140
5141
5142/**
5143 * Updates the VMMR0 entry point pointers.
5144 *
5145 * @returns IPRT status code.
5146 * @param pDevExt Device globals.
5147 * @param pSession Session data.
5148 * @param pVMMR0 VMMR0 image handle.
5149 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5150 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5151 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5152 * @remark Caller must own the loader mutex.
5153 */
5154static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5155{
5156 int rc = VINF_SUCCESS;
5157 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5158
5159
5160 /*
5161 * Check if not yet set.
5162 */
5163 if (!pDevExt->pvVMMR0)
5164 {
5165 pDevExt->pvVMMR0 = pvVMMR0;
5166 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5167 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5168 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5169 }
5170 else
5171 {
5172 /*
5173 * Return failure or success depending on whether the values match or not.
5174 */
5175 if ( pDevExt->pvVMMR0 != pvVMMR0
5176 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5177 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5178 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5179 {
5180 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5181 rc = VERR_INVALID_PARAMETER;
5182 }
5183 }
5184 return rc;
5185}
5186
5187
5188/**
5189 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5190 *
5191 * @param pDevExt Device globals.
5192 */
5193static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5194{
5195 pDevExt->pvVMMR0 = NULL;
5196 pDevExt->pfnVMMR0EntryInt = NULL;
5197 pDevExt->pfnVMMR0EntryFast = NULL;
5198 pDevExt->pfnVMMR0EntryEx = NULL;
5199}
5200
5201
5202/**
5203 * Adds a usage reference in the specified session of an image.
5204 *
5205 * Called while owning the loader semaphore.
5206 *
5207 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5208 * @param pSession Session in question.
5209 * @param pImage Image which the session is using.
5210 */
5211static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5212{
5213 PSUPDRVLDRUSAGE pUsage;
5214 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5215
5216 /*
5217 * Referenced it already?
5218 */
5219 pUsage = pSession->pLdrUsage;
5220 while (pUsage)
5221 {
5222 if (pUsage->pImage == pImage)
5223 {
5224 pUsage->cUsage++;
5225 return VINF_SUCCESS;
5226 }
5227 pUsage = pUsage->pNext;
5228 }
5229
5230 /*
5231 * Allocate new usage record.
5232 */
5233 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5234 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5235 pUsage->cUsage = 1;
5236 pUsage->pImage = pImage;
5237 pUsage->pNext = pSession->pLdrUsage;
5238 pSession->pLdrUsage = pUsage;
5239 return VINF_SUCCESS;
5240}
5241
5242
5243/**
5244 * Frees a load image.
5245 *
5246 * @param pDevExt Pointer to device extension.
5247 * @param pImage Pointer to the image we're gonna free.
5248 * This image must exit!
5249 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5250 */
5251static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5252{
5253 PSUPDRVLDRIMAGE pImagePrev;
5254 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5255
5256 /* find it - arg. should've used doubly linked list. */
5257 Assert(pDevExt->pLdrImages);
5258 pImagePrev = NULL;
5259 if (pDevExt->pLdrImages != pImage)
5260 {
5261 pImagePrev = pDevExt->pLdrImages;
5262 while (pImagePrev->pNext != pImage)
5263 pImagePrev = pImagePrev->pNext;
5264 Assert(pImagePrev->pNext == pImage);
5265 }
5266
5267 /* unlink */
5268 if (pImagePrev)
5269 pImagePrev->pNext = pImage->pNext;
5270 else
5271 pDevExt->pLdrImages = pImage->pNext;
5272
5273 /* check if this is VMMR0.r0 unset its entry point pointers. */
5274 if (pDevExt->pvVMMR0 == pImage->pvImage)
5275 supdrvLdrUnsetVMMR0EPs(pDevExt);
5276
5277 /* check for objects with destructors in this image. (Shouldn't happen.) */
5278 if (pDevExt->pObjs)
5279 {
5280 unsigned cObjs = 0;
5281 PSUPDRVOBJ pObj;
5282 RTSpinlockAcquire(pDevExt->Spinlock);
5283 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5284 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5285 {
5286 pObj->pfnDestructor = NULL;
5287 cObjs++;
5288 }
5289 RTSpinlockRelease(pDevExt->Spinlock);
5290 if (cObjs)
5291 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5292 }
5293
5294 /* call termination function if fully loaded. */
5295 if ( pImage->pfnModuleTerm
5296 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5297 {
5298 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5299 pImage->pfnModuleTerm(pImage);
5300 }
5301
5302 /* Inform the tracing component. */
5303 supdrvTracerModuleUnloading(pDevExt, pImage);
5304
5305 /* do native unload if appropriate. */
5306 if (pImage->fNative)
5307 supdrvOSLdrUnload(pDevExt, pImage);
5308
5309 /* free the image */
5310 pImage->cUsage = 0;
5311 pImage->pDevExt = NULL;
5312 pImage->pNext = NULL;
5313 pImage->uState = SUP_IOCTL_LDR_FREE;
5314 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5315 pImage->pvImageAlloc = NULL;
5316 RTMemFree(pImage->pachStrTab);
5317 pImage->pachStrTab = NULL;
5318 RTMemFree(pImage->paSymbols);
5319 pImage->paSymbols = NULL;
5320 RTMemFree(pImage);
5321}
5322
5323
5324/**
5325 * Acquires the loader lock.
5326 *
5327 * @returns IPRT status code.
5328 * @param pDevExt The device extension.
5329 */
5330DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5331{
5332#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5333 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5334#else
5335 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5336#endif
5337 AssertRC(rc);
5338 return rc;
5339}
5340
5341
5342/**
5343 * Releases the loader lock.
5344 *
5345 * @returns IPRT status code.
5346 * @param pDevExt The device extension.
5347 */
5348DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5349{
5350#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5351 return RTSemMutexRelease(pDevExt->mtxLdr);
5352#else
5353 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5354#endif
5355}
5356
5357
5358/**
5359 * Implements the service call request.
5360 *
5361 * @returns VBox status code.
5362 * @param pDevExt The device extension.
5363 * @param pSession The calling session.
5364 * @param pReq The request packet, valid.
5365 */
5366static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5367{
5368#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5369 int rc;
5370
5371 /*
5372 * Find the module first in the module referenced by the calling session.
5373 */
5374 rc = supdrvLdrLock(pDevExt);
5375 if (RT_SUCCESS(rc))
5376 {
5377 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5378 PSUPDRVLDRUSAGE pUsage;
5379
5380 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5381 if ( pUsage->pImage->pfnServiceReqHandler
5382 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5383 {
5384 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5385 break;
5386 }
5387 supdrvLdrUnlock(pDevExt);
5388
5389 if (pfnServiceReqHandler)
5390 {
5391 /*
5392 * Call it.
5393 */
5394 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5395 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5396 else
5397 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5398 }
5399 else
5400 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5401 }
5402
5403 /* log it */
5404 if ( RT_FAILURE(rc)
5405 && rc != VERR_INTERRUPTED
5406 && rc != VERR_TIMEOUT)
5407 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5408 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5409 else
5410 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5411 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5412 return rc;
5413#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5414 return VERR_NOT_IMPLEMENTED;
5415#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5416}
5417
5418
5419/**
5420 * Implements the logger settings request.
5421 *
5422 * @returns VBox status code.
5423 * @param pDevExt The device extension.
5424 * @param pSession The caller's session.
5425 * @param pReq The request.
5426 */
5427static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5428{
5429 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5430 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5431 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5432 PRTLOGGER pLogger = NULL;
5433 int rc;
5434
5435 /*
5436 * Some further validation.
5437 */
5438 switch (pReq->u.In.fWhat)
5439 {
5440 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5441 case SUPLOGGERSETTINGS_WHAT_CREATE:
5442 break;
5443
5444 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5445 if (*pszGroup || *pszFlags || *pszDest)
5446 return VERR_INVALID_PARAMETER;
5447 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5448 return VERR_ACCESS_DENIED;
5449 break;
5450
5451 default:
5452 return VERR_INTERNAL_ERROR;
5453 }
5454
5455 /*
5456 * Get the logger.
5457 */
5458 switch (pReq->u.In.fWhich)
5459 {
5460 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5461 pLogger = RTLogGetDefaultInstance();
5462 break;
5463
5464 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5465 pLogger = RTLogRelDefaultInstance();
5466 break;
5467
5468 default:
5469 return VERR_INTERNAL_ERROR;
5470 }
5471
5472 /*
5473 * Do the job.
5474 */
5475 switch (pReq->u.In.fWhat)
5476 {
5477 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5478 if (pLogger)
5479 {
5480 rc = RTLogFlags(pLogger, pszFlags);
5481 if (RT_SUCCESS(rc))
5482 rc = RTLogGroupSettings(pLogger, pszGroup);
5483 NOREF(pszDest);
5484 }
5485 else
5486 rc = VERR_NOT_FOUND;
5487 break;
5488
5489 case SUPLOGGERSETTINGS_WHAT_CREATE:
5490 {
5491 if (pLogger)
5492 rc = VERR_ALREADY_EXISTS;
5493 else
5494 {
5495 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5496
5497 rc = RTLogCreate(&pLogger,
5498 0 /* fFlags */,
5499 pszGroup,
5500 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5501 ? "VBOX_LOG"
5502 : "VBOX_RELEASE_LOG",
5503 RT_ELEMENTS(s_apszGroups),
5504 s_apszGroups,
5505 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5506 NULL);
5507 if (RT_SUCCESS(rc))
5508 {
5509 rc = RTLogFlags(pLogger, pszFlags);
5510 NOREF(pszDest);
5511 if (RT_SUCCESS(rc))
5512 {
5513 switch (pReq->u.In.fWhich)
5514 {
5515 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5516 pLogger = RTLogSetDefaultInstance(pLogger);
5517 break;
5518 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5519 pLogger = RTLogRelSetDefaultInstance(pLogger);
5520 break;
5521 }
5522 }
5523 RTLogDestroy(pLogger);
5524 }
5525 }
5526 break;
5527 }
5528
5529 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5530 switch (pReq->u.In.fWhich)
5531 {
5532 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5533 pLogger = RTLogSetDefaultInstance(NULL);
5534 break;
5535 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5536 pLogger = RTLogRelSetDefaultInstance(NULL);
5537 break;
5538 }
5539 rc = RTLogDestroy(pLogger);
5540 break;
5541
5542 default:
5543 {
5544 rc = VERR_INTERNAL_ERROR;
5545 break;
5546 }
5547 }
5548
5549 return rc;
5550}
5551
5552
5553/**
5554 * Implements the MSR prober operations.
5555 *
5556 * @returns VBox status code.
5557 * @param pDevExt The device extension.
5558 * @param pReq The request.
5559 */
5560static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5561{
5562#ifdef SUPDRV_WITH_MSR_PROBER
5563 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5564 int rc;
5565
5566 switch (pReq->u.In.enmOp)
5567 {
5568 case SUPMSRPROBEROP_READ:
5569 {
5570 uint64_t uValue;
5571 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5572 if (RT_SUCCESS(rc))
5573 {
5574 pReq->u.Out.uResults.Read.uValue = uValue;
5575 pReq->u.Out.uResults.Read.fGp = false;
5576 }
5577 else if (rc == VERR_ACCESS_DENIED)
5578 {
5579 pReq->u.Out.uResults.Read.uValue = 0;
5580 pReq->u.Out.uResults.Read.fGp = true;
5581 rc = VINF_SUCCESS;
5582 }
5583 break;
5584 }
5585
5586 case SUPMSRPROBEROP_WRITE:
5587 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5588 if (RT_SUCCESS(rc))
5589 pReq->u.Out.uResults.Write.fGp = false;
5590 else if (rc == VERR_ACCESS_DENIED)
5591 {
5592 pReq->u.Out.uResults.Write.fGp = true;
5593 rc = VINF_SUCCESS;
5594 }
5595 break;
5596
5597 case SUPMSRPROBEROP_MODIFY:
5598 case SUPMSRPROBEROP_MODIFY_FASTER:
5599 rc = supdrvOSMsrProberModify(idCpu, pReq);
5600 break;
5601
5602 default:
5603 return VERR_INVALID_FUNCTION;
5604 }
5605 return rc;
5606#else
5607 return VERR_NOT_IMPLEMENTED;
5608#endif
5609}
5610
5611
5612#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5613/**
5614 * Switches the TSC-delta measurement thread into the butchered state.
5615 *
5616 * @returns VBox status code.
5617 * @param pDevExt Pointer to the device instance data.
5618 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5619 * @param pszFailed An error message to log.
5620 * @param rcFailed The error code to exit the thread with.
5621 */
5622static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5623{
5624 if (!fSpinlockHeld)
5625 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5626
5627 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Butchered;
5628 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5629 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5630 return rcFailed;
5631}
5632
5633
5634/**
5635 * The TSC-delta measurement thread.
5636 *
5637 * @returns VBox status code.
5638 * @param hThread The thread handle.
5639 * @param pvUser Opaque pointer to the device instance data.
5640 */
5641static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
5642{
5643 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
5644 static uint32_t cTimesMeasured = 0;
5645 uint32_t cConsecutiveTimeouts = 0;
5646 int rc = VERR_INTERNAL_ERROR_2;
5647 for (;;)
5648 {
5649 /*
5650 * Switch on the current state.
5651 */
5652 SUPDRVTSCDELTASTATE enmState;
5653 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5654 enmState = pDevExt->enmTscDeltaState;
5655 switch (enmState)
5656 {
5657 case kSupDrvTscDeltaState_Creating:
5658 {
5659 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5660 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5661 if (RT_FAILURE(rc))
5662 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5663 /* fall thru */
5664 }
5665
5666 case kSupDrvTscDeltaState_Listening:
5667 {
5668 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5669
5670 /* Simple adaptive timeout. */
5671 if (cConsecutiveTimeouts++ == 10)
5672 {
5673 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
5674 pDevExt->cMsTscDeltaTimeout = 10;
5675 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
5676 pDevExt->cMsTscDeltaTimeout = 100;
5677 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
5678 pDevExt->cMsTscDeltaTimeout = 500;
5679 cConsecutiveTimeouts = 0;
5680 }
5681 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
5682 if ( RT_FAILURE(rc)
5683 && rc != VERR_TIMEOUT)
5684 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
5685 break;
5686 }
5687
5688 case kSupDrvTscDeltaState_WaitAndMeasure:
5689 {
5690 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Measuring;
5691 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5692 if (RT_FAILURE(rc))
5693 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5694 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5695 pDevExt->cMsTscDeltaTimeout = 1;
5696 RTThreadSleep(10);
5697 /* fall thru */
5698 }
5699
5700 case kSupDrvTscDeltaState_Measuring:
5701 {
5702 cConsecutiveTimeouts = 0;
5703 if (!cTimesMeasured++)
5704 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5705 else
5706 {
5707 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5708 unsigned iCpu;
5709
5710 if (cTimesMeasured == UINT32_MAX)
5711 cTimesMeasured = 1;
5712
5713 /* Measure TSC-deltas only for the CPUs that are in the set. */
5714 rc = VINF_SUCCESS;
5715 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5716 {
5717 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
5718 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
5719 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
5720 {
5721 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
5722 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
5723 }
5724 }
5725 }
5726 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5727 if (pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
5728 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5729 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5730 pDevExt->rcTscDelta = rc;
5731 break;
5732 }
5733
5734 case kSupDrvTscDeltaState_Terminating:
5735 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5736 return VINF_SUCCESS;
5737
5738 case kSupDrvTscDeltaState_Butchered:
5739 default:
5740 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
5741 }
5742 }
5743
5744 return rc;
5745}
5746
5747
5748/**
5749 * Waits for the TSC-delta measurement thread to respond to a state change.
5750 *
5751 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
5752 * other error code on internal error.
5753 *
5754 * @param pThis Pointer to the grant service instance data.
5755 * @param enmCurState The current state.
5756 * @param enmNewState The new state we're waiting for it to enter.
5757 */
5758static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTASTATE enmCurState, SUPDRVTSCDELTASTATE enmNewState)
5759{
5760 /*
5761 * Wait a short while for the expected state transition.
5762 */
5763 int rc;
5764 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
5765 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5766 if (pDevExt->enmTscDeltaState == enmNewState)
5767 {
5768 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5769 rc = VINF_SUCCESS;
5770 }
5771 else if (pDevExt->enmTscDeltaState == enmCurState)
5772 {
5773 /*
5774 * Wait longer if the state has not yet transitioned to the one we want.
5775 */
5776 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5777 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
5778 if ( RT_SUCCESS(rc)
5779 || rc == VERR_TIMEOUT)
5780 {
5781 /*
5782 * Check the state whether we've succeeded.
5783 */
5784 SUPDRVTSCDELTASTATE enmState;
5785 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5786 enmState = pDevExt->enmTscDeltaState;
5787 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5788 if (enmState == enmNewState)
5789 rc = VINF_SUCCESS;
5790 else if (enmState == enmCurState)
5791 {
5792 rc = VERR_TIMEOUT;
5793 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
5794 enmNewState));
5795 }
5796 else
5797 {
5798 rc = VERR_INTERNAL_ERROR;
5799 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
5800 enmState, enmNewState));
5801 }
5802 }
5803 else
5804 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
5805 }
5806 else
5807 {
5808 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5809 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
5810 rc = VERR_INTERNAL_ERROR;
5811 }
5812
5813 return rc;
5814}
5815
5816
5817/**
5818 * Terminates the TSC-delta measurement thread.
5819 *
5820 * @param pDevExt Pointer to the device instance data.
5821 */
5822static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
5823{
5824 int rc;
5825 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5826 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Terminating;
5827 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5828 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5829 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
5830 if (RT_FAILURE(rc))
5831 {
5832 /* Signal a few more times before giving up. */
5833 int cTries = 5;
5834 while (--cTries > 0)
5835 {
5836 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5837 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
5838 if (rc != VERR_TIMEOUT)
5839 break;
5840 }
5841 }
5842}
5843
5844
5845/**
5846 * Initializes and spawns the TSC-delta measurement thread.
5847 *
5848 * A thread is required for servicing re-measurement requests from events like
5849 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
5850 * under all contexts on all OSs.
5851 *
5852 * @returns VBox status code.
5853 * @param pDevExt Pointer to the device instance data.
5854 *
5855 * @remarks Must only be called -after- initializing GIP and setting up MP
5856 * notifications!
5857 */
5858static int supdrvTscDeltaInit(PSUPDRVDEVEXT pDevExt)
5859{
5860 Assert(!g_fOsTscDeltasInSync);
5861 int rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
5862 if (RT_SUCCESS(rc))
5863 {
5864 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
5865 if (RT_SUCCESS(rc))
5866 {
5867 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Creating;
5868 pDevExt->cMsTscDeltaTimeout = 1;
5869 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
5870 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
5871 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
5872 if (RT_SUCCESS(rc))
5873 {
5874 rc = supdrvTscDeltaThreadWait(pDevExt, kSupDrvTscDeltaState_Creating, kSupDrvTscDeltaState_Listening);
5875 if (RT_SUCCESS(rc))
5876 {
5877 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5878 return rc;
5879 }
5880
5881 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
5882 supdrvTscDeltaThreadTerminate(pDevExt);
5883 }
5884 else
5885 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
5886 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5887 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5888 }
5889 else
5890 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
5891 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5892 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5893 }
5894 else
5895 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
5896
5897 return rc;
5898}
5899
5900
5901/**
5902 * Terminates the TSC-delta measurement thread and cleanup.
5903 *
5904 * @param pDevExt Pointer to the device instance data.
5905 */
5906static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
5907{
5908 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
5909 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5910 {
5911 supdrvTscDeltaThreadTerminate(pDevExt);
5912 }
5913
5914 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
5915 {
5916 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5917 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5918 }
5919
5920 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5921 {
5922 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5923 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5924 }
5925
5926 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5927}
5928#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
5929
5930
5931/**
5932 * Measures the TSC frequency of the system.
5933 *
5934 * Uses a busy-wait method for the async. case as it is intended to help push
5935 * the CPU frequency up, while for the invariant cases using a sleeping method.
5936 *
5937 * The TSC frequency can vary on systems that are not reported as invariant.
5938 * However, on such systems the object of this function is to find out what the
5939 * nominal, maximum TSC frequency under normal CPU operation.
5940 *
5941 * @returns VBox status code.
5942 * @param pGip Pointer to the GIP.
5943 *
5944 * @remarks Must be called only after measuring the TSC deltas.
5945 */
5946static int supdrvGipMeasureTscFreq(PSUPGLOBALINFOPAGE pGip)
5947{
5948 int cTriesLeft = 4;
5949
5950 /* Assert order. */
5951 AssertReturn(pGip, VERR_INVALID_PARAMETER);
5952 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
5953
5954 while (cTriesLeft-- > 0)
5955 {
5956 RTCCUINTREG uFlags;
5957 uint64_t u64NanoTsBefore;
5958 uint64_t u64NanoTsAfter;
5959 uint64_t u64TscBefore;
5960 uint64_t u64TscAfter;
5961 uint8_t idApicBefore;
5962 uint8_t idApicAfter;
5963
5964 /*
5965 * Synchronize with the host OS clock tick before reading the TSC.
5966 * Especially important on Windows where the granularity is terrible.
5967 */
5968 u64NanoTsBefore = RTTimeSystemNanoTS();
5969 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
5970 ASMNopPause();
5971
5972 uFlags = ASMIntDisableFlags();
5973 idApicBefore = ASMGetApicId();
5974 u64TscBefore = ASMReadTSC();
5975 u64NanoTsBefore = RTTimeSystemNanoTS();
5976 ASMSetFlags(uFlags);
5977
5978 /* Activate this when implemented invariant TSC GIP mode. Otherwise systems that are really invariant
5979 which get detected as async will break. */
5980#if 0
5981 if (supdrvIsInvariantTsc())
5982 {
5983 /*
5984 * Sleep wait since the TSC frequency is constant, eases host load.
5985 * Shorter interval produces more variance in the frequency (esp. Windows).
5986 */
5987 RTThreadSleep(200);
5988 u64NanoTsAfter = RTTimeSystemNanoTS();
5989 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
5990 ASMNopPause();
5991 u64NanoTsAfter = RTTimeSystemNanoTS();
5992 }
5993 else
5994#endif
5995 {
5996 /* Busy-wait keeping the frequency up and measure. */
5997 for (;;)
5998 {
5999 u64NanoTsAfter = RTTimeSystemNanoTS();
6000 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
6001 ASMNopPause();
6002 else
6003 break;
6004 }
6005 }
6006
6007 uFlags = ASMIntDisableFlags();
6008 idApicAfter = ASMGetApicId();
6009 u64TscAfter = ASMReadTSC();
6010 ASMSetFlags(uFlags);
6011
6012 /* Activate this when implemented invariant TSC GIP mode. Otherwise systems that are really invariant
6013 which get detected as async will break. */
6014#if 0
6015 if (supdrvIsInvariantTsc()) /** @todo replace with enum check. */
6016 {
6017 int rc;
6018 bool fAppliedBefore;
6019 bool fAppliedAfter;
6020 rc = SUPTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
6021 rc = SUPTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
6022
6023 if ( !fAppliedBefore
6024 || !fAppliedAfter)
6025 {
6026 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
6027 idApicBefore, idApicAfter, cTriesLeft);
6028 continue;
6029 }
6030 }
6031#endif
6032
6033 /*
6034 * Update GIP.
6035 */
6036 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
6037 return VINF_SUCCESS;
6038 }
6039
6040 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6041}
6042
6043
6044/**
6045 * Creates the GIP.
6046 *
6047 * @returns VBox status code.
6048 * @param pDevExt Instance data. GIP stuff may be updated.
6049 */
6050static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6051{
6052 PSUPGLOBALINFOPAGE pGip;
6053 RTHCPHYS HCPhysGip;
6054 uint32_t u32SystemResolution;
6055 uint32_t u32Interval;
6056 uint32_t u32MinInterval;
6057 uint32_t uMod;
6058 unsigned cCpus;
6059 int rc;
6060
6061 LogFlow(("supdrvGipCreate:\n"));
6062
6063 /* Assert order. */
6064 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6065 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6066 Assert(!pDevExt->pGipTimer);
6067
6068 /*
6069 * Check the CPU count.
6070 */
6071 cCpus = RTMpGetArraySize();
6072 if ( cCpus > RTCPUSET_MAX_CPUS
6073 || cCpus > 256 /* ApicId is used for the mappings */)
6074 {
6075 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6076 return VERR_TOO_MANY_CPUS;
6077 }
6078
6079 /*
6080 * Allocate a contiguous set of pages with a default kernel mapping.
6081 */
6082 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6083 if (RT_FAILURE(rc))
6084 {
6085 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6086 return rc;
6087 }
6088 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6089 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6090
6091 /*
6092 * Find a reasonable update interval and initialize the structure.
6093 */
6094 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6095 * See @bugref{6710}. */
6096 u32MinInterval = RT_NS_10MS;
6097 u32SystemResolution = RTTimerGetSystemGranularity();
6098 u32Interval = u32MinInterval;
6099 uMod = u32MinInterval % u32SystemResolution;
6100 if (uMod)
6101 u32Interval += u32SystemResolution - uMod;
6102
6103 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6104
6105 if (RT_UNLIKELY( g_fOsTscDeltasInSync
6106 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
6107 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
6108 {
6109 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
6110 return VERR_INTERNAL_ERROR_2;
6111 }
6112
6113#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6114 if (!g_fOsTscDeltasInSync)
6115 {
6116 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6117 rc = supdrvTscDeltaInit(pDevExt);
6118 }
6119#endif
6120 if (RT_SUCCESS(rc))
6121 {
6122 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6123 if (RT_SUCCESS(rc))
6124 {
6125 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6126 if (RT_SUCCESS(rc))
6127 {
6128#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6129 uint16_t iCpu;
6130 if (!g_fOsTscDeltasInSync)
6131 {
6132 /*
6133 * Measure the TSC deltas now that we have MP notifications.
6134 */
6135 int cTries = 5;
6136 do
6137 {
6138 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6139 if (rc != VERR_TRY_AGAIN)
6140 break;
6141 } while (--cTries > 0);
6142 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6143 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6144 }
6145 else
6146 {
6147 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6148 Assert(!pGip->aCPUs[iCpu].i64TSCDelta);
6149 }
6150#endif
6151 if (RT_SUCCESS(rc))
6152 {
6153 rc = supdrvGipMeasureTscFreq(pGip);
6154 if (RT_SUCCESS(rc))
6155 {
6156 if (supdrvIsInvariantTsc())
6157 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6158
6159 /*
6160 * Create the timer.
6161 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6162 */
6163 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6164 {
6165 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
6166 pDevExt);
6167 if (rc == VERR_NOT_SUPPORTED)
6168 {
6169 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6170 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6171 }
6172 }
6173 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6174 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */, supdrvGipSyncTimer, pDevExt);
6175 if (RT_SUCCESS(rc))
6176 {
6177 /*
6178 * We're good.
6179 */
6180 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6181 g_pSUPGlobalInfoPage = pGip;
6182 return VINF_SUCCESS;
6183 }
6184 else
6185 {
6186 OSDBGPRINT(("supdrvGipCreate: RTTimerCreateEx failed (%u ns interval). rc=%Rrc\n", u32Interval, rc));
6187 Assert(!pDevExt->pGipTimer);
6188 }
6189 }
6190 else
6191 OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
6192 }
6193 else
6194 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureTscDeltas failed. rc=%Rrc\n", rc));
6195 }
6196 else
6197 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6198 }
6199 else
6200 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6201 }
6202 else
6203 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6204
6205 supdrvGipDestroy(pDevExt);
6206 return rc;
6207}
6208
6209
6210/**
6211 * Terminates the GIP.
6212 *
6213 * @param pDevExt Instance data. GIP stuff may be updated.
6214 */
6215static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6216{
6217 int rc;
6218#ifdef DEBUG_DARWIN_GIP
6219 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6220 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6221 pDevExt->pGipTimer, pDevExt->GipMemObj));
6222#endif
6223
6224 /*
6225 * Stop receiving MP notifications before tearing anything else down.
6226 */
6227 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6228
6229#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6230 /*
6231 * Terminate the TSC-delta measurement thread and resources.
6232 */
6233 supdrvTscDeltaTerm(pDevExt);
6234#endif
6235
6236 /*
6237 * Invalid the GIP data.
6238 */
6239 if (pDevExt->pGip)
6240 {
6241 supdrvGipTerm(pDevExt->pGip);
6242 pDevExt->pGip = NULL;
6243 }
6244 g_pSUPGlobalInfoPage = NULL;
6245
6246 /*
6247 * Destroy the timer and free the GIP memory object.
6248 */
6249 if (pDevExt->pGipTimer)
6250 {
6251 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6252 pDevExt->pGipTimer = NULL;
6253 }
6254
6255 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6256 {
6257 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6258 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6259 }
6260
6261 /*
6262 * Finally, make sure we've release the system timer resolution request
6263 * if one actually succeeded and is still pending.
6264 */
6265 if (pDevExt->u32SystemTimerGranularityGrant)
6266 {
6267 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
6268 pDevExt->u32SystemTimerGranularityGrant = 0;
6269 }
6270}
6271
6272
6273/**
6274 * Timer callback function sync GIP mode.
6275 * @param pTimer The timer.
6276 * @param pvUser The device extension.
6277 */
6278static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6279{
6280 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6281 uint64_t u64TSC = ASMReadTSC();
6282 uint64_t NanoTS = RTTimeSystemNanoTS();
6283 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6284
6285 if (supdrvIsInvariantTsc())
6286 {
6287 PSUPGIPCPU pGipCpu;
6288 unsigned iCpu;
6289 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6290 uint8_t idApic = ASMGetApicId();
6291
6292 iCpu = pGip->aiCpuFromApicId[idApic];
6293 Assert(iCpu < pGip->cCpus);
6294 pGipCpu = &pGip->aCPUs[iCpu];
6295 Assert(pGipCpu->idCpu == RTMpCpuId());
6296
6297 /*
6298 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6299 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6300 * affected a bit until we get proper TSC deltas than implementing options like
6301 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6302 *
6303 * The likely hood of this happening is really low. On Windows, Linux timers
6304 * fire on the CPU they were registered/started on. Darwin, Solaris need verification.
6305 */
6306 if (pGipCpu->i64TSCDelta != INT64_MAX)
6307 u64TSC -= pGipCpu->i64TSCDelta;
6308 }
6309
6310 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, NIL_RTCPUID, iTick);
6311
6312 ASMSetFlags(fOldFlags);
6313
6314 if (supdrvIsInvariantTsc())
6315 {
6316 /*
6317 * Refine the TSC frequency measurement over a longer interval. Ideally, we want to keep the
6318 * interval as small as possible while gaining the most consistent and accurate frequency
6319 * (compared to what the host OS might have measured).
6320 *
6321 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
6322 * same TSC frequency whenever possible so we need to keep the interval short.
6323 */
6324 uint8_t idApic;
6325 uint64_t u64NanoTS;
6326 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6327 const int cSeconds = 3;
6328 if (RT_UNLIKELY(iTick == 3)) /* Helps with more consistent values across multiple runs (esp. Windows). */
6329 {
6330 u64NanoTS = RTTimeSystemNanoTS();
6331 while (RTTimeSystemNanoTS() == u64NanoTS)
6332 ASMNopPause();
6333 fOldFlags = ASMIntDisableFlags();
6334 idApic = ASMGetApicId();
6335 g_u64TSCAnchor = ASMReadTSC();
6336 g_u64NanoTSAnchor = RTTimeSystemNanoTS();
6337 ASMSetFlags(fOldFlags);
6338 SUPTscDeltaApply(pGip, &g_u64TSCAnchor, idApic, NULL /* pfDeltaApplied */);
6339 ++g_u64TSCAnchor;
6340 }
6341 else if (g_u64TSCAnchor)
6342 {
6343 uint64_t u64DeltaNanoTS;
6344 u64NanoTS = RTTimeSystemNanoTS();
6345 while (RTTimeSystemNanoTS() == u64NanoTS)
6346 ASMNopPause();
6347 fOldFlags = ASMIntDisableFlags();
6348 idApic = ASMGetApicId();
6349 u64TSC = ASMReadTSC();
6350 u64NanoTS = RTTimeSystemNanoTS();
6351 ASMSetFlags(fOldFlags);
6352 SUPTscDeltaApply(pGip, &u64TSC, idApic, NULL /* pfDeltaApplied */);
6353 u64DeltaNanoTS = u64NanoTS - g_u64NanoTSAnchor;
6354 if (u64DeltaNanoTS >= cSeconds * RT_NS_1SEC_64)
6355 {
6356 uint16_t iCpu;
6357 if (u64DeltaNanoTS < UINT32_MAX)
6358 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64TSC - g_u64TSCAnchor, RT_NS_1SEC, u64DeltaNanoTS);
6359 else
6360 pGip->u64CpuHz = (u64TSC - g_u64TSCAnchor) / (u64DeltaNanoTS / RT_NS_1SEC);
6361
6362 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6363 g_u64TSCAnchor = 0;
6364 }
6365 }
6366 }
6367}
6368
6369
6370/**
6371 * Timer callback function for async GIP mode.
6372 * @param pTimer The timer.
6373 * @param pvUser The device extension.
6374 */
6375static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6376{
6377 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6378 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6379 RTCPUID idCpu = RTMpCpuId();
6380 uint64_t u64TSC = ASMReadTSC();
6381 uint64_t NanoTS = RTTimeSystemNanoTS();
6382
6383 /** @todo reset the transaction number and whatnot when iTick == 1. */
6384 if (pDevExt->idGipMaster == idCpu)
6385 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6386 else
6387 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6388
6389 ASMSetFlags(fOldFlags);
6390}
6391
6392
6393/**
6394 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6395 *
6396 * @returns Index of the CPU in the cache set.
6397 * @param pGip The GIP.
6398 * @param idCpu The CPU ID.
6399 */
6400static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6401{
6402 uint32_t i, cTries;
6403
6404 /*
6405 * ASSUMES that CPU IDs are constant.
6406 */
6407 for (i = 0; i < pGip->cCpus; i++)
6408 if (pGip->aCPUs[i].idCpu == idCpu)
6409 return i;
6410
6411 cTries = 0;
6412 do
6413 {
6414 for (i = 0; i < pGip->cCpus; i++)
6415 {
6416 bool fRc;
6417 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6418 if (fRc)
6419 return i;
6420 }
6421 } while (cTries++ < 32);
6422 AssertReleaseFailed();
6423 return i - 1;
6424}
6425
6426
6427/**
6428 * The calling CPU should be accounted as online, update GIP accordingly.
6429 *
6430 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
6431 *
6432 * @param pDevExt The device extension.
6433 * @param idCpu The CPU ID.
6434 */
6435static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6436{
6437 int iCpuSet = 0;
6438 uint16_t idApic = UINT16_MAX;
6439 uint32_t i = 0;
6440 uint64_t u64NanoTS = 0;
6441 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6442
6443 AssertPtrReturnVoid(pGip);
6444 AssertRelease(idCpu == RTMpCpuId());
6445 Assert(pGip->cPossibleCpus == RTMpGetCount());
6446
6447 /*
6448 * Do this behind a spinlock with interrupts disabled as this can fire
6449 * on all CPUs simultaneously, see @bugref{6110}.
6450 */
6451 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6452
6453 /*
6454 * Update the globals.
6455 */
6456 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
6457 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
6458 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6459 if (iCpuSet >= 0)
6460 {
6461 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6462 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
6463 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
6464 }
6465
6466 /*
6467 * Update the entry.
6468 */
6469 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
6470 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
6471 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
6472 idApic = ASMGetApicId();
6473 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
6474 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
6475 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
6476
6477 /*
6478 * Update the APIC ID and CPU set index mappings.
6479 */
6480 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
6481 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
6482
6483 /* Update the Mp online/offline counter. */
6484 ASMAtomicIncU32(&g_cMpOnOffEvents);
6485
6486#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6487 /*
6488 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
6489 *
6490 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
6491 * update the state and it'll get serviced when the thread's listening interval times out.
6492 */
6493 if ( !g_fOsTscDeltasInSync
6494 && supdrvIsInvariantTsc())
6495 {
6496 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
6497 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6498 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
6499 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
6500 {
6501 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
6502 }
6503 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6504 }
6505#endif
6506
6507 /* commit it */
6508 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
6509
6510 RTSpinlockRelease(pDevExt->hGipSpinlock);
6511}
6512
6513
6514/**
6515 * The CPU should be accounted as offline, update the GIP accordingly.
6516 *
6517 * This is used by supdrvGipMpEvent.
6518 *
6519 * @param pDevExt The device extension.
6520 * @param idCpu The CPU ID.
6521 */
6522static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6523{
6524 int iCpuSet;
6525 unsigned i;
6526
6527 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6528
6529 AssertPtrReturnVoid(pGip);
6530 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6531
6532 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6533 AssertReturnVoid(iCpuSet >= 0);
6534
6535 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
6536 AssertReturnVoid(i < pGip->cCpus);
6537 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
6538
6539 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6540 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
6541
6542 /* Update the Mp online/offline counter. */
6543 ASMAtomicIncU32(&g_cMpOnOffEvents);
6544
6545 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
6546 if (ASMAtomicReadU32(&g_idTscDeltaInitiator) == idCpu)
6547 {
6548 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6549 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
6550 }
6551
6552 /* Reset the TSC delta (if required), we will recalculate it lazily. */
6553 if (!g_fOsTscDeltasInSync)
6554 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
6555
6556 /* commit it */
6557 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
6558
6559 RTSpinlockRelease(pDevExt->hGipSpinlock);
6560}
6561
6562
6563/**
6564 * Multiprocessor event notification callback.
6565 *
6566 * This is used to make sure that the GIP master gets passed on to
6567 * another CPU. It also updates the associated CPU data.
6568 *
6569 * @param enmEvent The event.
6570 * @param idCpu The cpu it applies to.
6571 * @param pvUser Pointer to the device extension.
6572 *
6573 * @remarks This function -must- fire on the newly online'd CPU for the
6574 * RTMPEVENT_ONLINE case and can fire on any CPU for the
6575 * RTMPEVENT_OFFLINE case.
6576 */
6577static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
6578{
6579 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6580 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6581
6582 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6583
6584 /*
6585 * Update the GIP CPU data.
6586 */
6587 if (pGip)
6588 {
6589 switch (enmEvent)
6590 {
6591 case RTMPEVENT_ONLINE:
6592 AssertRelease(idCpu == RTMpCpuId());
6593 supdrvGipMpEventOnline(pDevExt, idCpu);
6594 break;
6595 case RTMPEVENT_OFFLINE:
6596 supdrvGipMpEventOffline(pDevExt, idCpu);
6597 break;
6598 }
6599 }
6600
6601 /*
6602 * Make sure there is a master GIP.
6603 */
6604 if (enmEvent == RTMPEVENT_OFFLINE)
6605 {
6606 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
6607 if (idGipMaster == idCpu)
6608 {
6609 /*
6610 * Find a new GIP master.
6611 */
6612 bool fIgnored;
6613 unsigned i;
6614 int64_t iTSCDelta;
6615 uint32_t idxNewGipMaster;
6616 RTCPUID idNewGipMaster = NIL_RTCPUID;
6617 RTCPUSET OnlineCpus;
6618 RTMpGetOnlineSet(&OnlineCpus);
6619
6620 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
6621 {
6622 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
6623 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
6624 && idCurCpu != idGipMaster)
6625 {
6626 idNewGipMaster = idCurCpu;
6627 break;
6628 }
6629 }
6630
6631 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
6632 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
6633 NOREF(fIgnored);
6634
6635 /*
6636 * Adjust all the TSC deltas against the new GIP master.
6637 */
6638 if (pGip)
6639 {
6640 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
6641 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
6642 Assert(iTSCDelta != INT64_MAX);
6643 for (i = 0; i < pGip->cCpus; i++)
6644 {
6645 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
6646 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
6647 if (iWorkerDelta != INT64_MAX)
6648 iWorkerDelta -= iTSCDelta;
6649 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
6650 }
6651 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
6652 }
6653 }
6654 }
6655}
6656
6657
6658/**
6659 * Returns whether the host CPU sports an invariant TSC or not.
6660 *
6661 * @returns true if invariant TSC is supported, false otherwise.
6662 */
6663static bool supdrvIsInvariantTsc(void)
6664{
6665 static bool s_fQueried = false;
6666 static bool s_fIsInvariantTsc = false;
6667 if (!s_fQueried)
6668 {
6669 uint32_t uEax, uEbx, uEcx, uEdx;
6670 ASMCpuId(0x80000000, &uEax, &uEbx, &uEcx, &uEdx);
6671 if (uEax >= 0x80000007)
6672 {
6673 ASMCpuId(0x80000007, &uEax, &uEbx, &uEcx, &uEdx);
6674 if (uEdx & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
6675 s_fIsInvariantTsc = true;
6676 }
6677 s_fQueried = true;
6678 }
6679
6680 return s_fIsInvariantTsc;
6681}
6682
6683
6684/**
6685 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
6686 * compute the delta between them.
6687 *
6688 * @param idCpu The CPU we are current scheduled on.
6689 * @param pvUser1 Opaque pointer to the GIP.
6690 * @param pvUser2 Opaque pointer to the worker Cpu Id.
6691 *
6692 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
6693 * read the TSC at exactly the same time on both the master and the worker
6694 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
6695 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
6696 * try to minimize the measurement error by computing the minimum read time
6697 * of the compare statement in the worker by taking TSC measurements across
6698 * it.
6699 *
6700 * We ignore the first few runs of the loop in order to prime the cache.
6701 * Also, be careful about using 'pause' instruction in critical busy-wait
6702 * loops in this code - it can cause undesired behaviour with
6703 * hyperthreading.
6704 *
6705 * It must be noted that the computed minimum read time is mostly to
6706 * eliminate huge deltas when the worker is too early and doesn't by itself
6707 * help produce more accurate deltas. We allow two times the computed
6708 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
6709 * possible to get negative deltas where there are none when the worker is
6710 * earlier. As long as these occasional negative deltas are lower than the
6711 * time it takes to exit guest-context and the OS to reschedule EMT on a
6712 * different CPU we won't expose a TSC that jumped backwards. It is because
6713 * of the existence of the negative deltas we don't recompute the delta with
6714 * the master and worker interchanged to eliminate the remaining measurement
6715 * error.
6716 */
6717static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6718{
6719 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
6720 uint32_t *pidWorker = (uint32_t *)pvUser2;
6721 RTCPUID idMaster = ASMAtomicUoReadU32(&g_idTscDeltaInitiator);
6722 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6723 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
6724 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
6725 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6726 int cTriesLeft = 12;
6727
6728 if ( idCpu != idMaster
6729 && idCpu != *pidWorker)
6730 return;
6731
6732 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
6733 with a timeout to avoid deadlocking the entire system. */
6734 if (!RTMpOnAllIsConcurrentSafe())
6735 {
6736 /** @todo This was introduced for Windows, but since Windows doesn't use this
6737 * code path any longer (as DPC timeouts BSOD regardless of interrupts,
6738 * see @bugref{6710} comment 81), eventually phase it out. */
6739 uint64_t uTscNow;
6740 uint64_t uTscStart;
6741 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
6742
6743 ASMSerializeInstruction();
6744 uTscStart = ASMReadTSC();
6745 if (idCpu == idMaster)
6746 {
6747 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
6748 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
6749 {
6750 ASMSerializeInstruction();
6751 uTscNow = ASMReadTSC();
6752 if (uTscNow - uTscStart > cWaitTicks)
6753 {
6754 /* Set the worker delta to indicate failure, not the master. */
6755 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6756 return;
6757 }
6758
6759 ASMNopPause();
6760 }
6761 }
6762 else
6763 {
6764 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
6765 {
6766 ASMSerializeInstruction();
6767 uTscNow = ASMReadTSC();
6768 if (uTscNow - uTscStart > cWaitTicks)
6769 {
6770 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6771 return;
6772 }
6773
6774 ASMNopPause();
6775 }
6776 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
6777 }
6778 }
6779
6780 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
6781 while (cTriesLeft-- > 0)
6782 {
6783 unsigned i;
6784 uint64_t uMinCmpReadTime = UINT64_MAX;
6785 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
6786 {
6787 if (idCpu == idMaster)
6788 {
6789 /*
6790 * The master.
6791 */
6792 RTCCUINTREG uFlags;
6793 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6794 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6795
6796 /* Disable interrupts only in the master for as short a period
6797 as possible, thanks again to Windows. See @bugref{6710} comment #73. */
6798 uFlags = ASMIntDisableFlags();
6799
6800 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
6801 ;
6802
6803 do
6804 {
6805 ASMSerializeInstruction();
6806 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
6807 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6808
6809 ASMSetFlags(uFlags);
6810
6811 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
6812 ;
6813
6814 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6815 {
6816 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
6817 {
6818 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
6819 if (iDelta < pGipCpuWorker->i64TSCDelta)
6820 pGipCpuWorker->i64TSCDelta = iDelta;
6821 }
6822 }
6823
6824 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
6825 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6826 }
6827 else
6828 {
6829 /*
6830 * The worker.
6831 */
6832 uint64_t uTscWorker;
6833 uint64_t uTscWorkerFlushed;
6834 uint64_t uCmpReadTime;
6835
6836 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
6837 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
6838 ;
6839 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6840 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
6841
6842 /*
6843 * Keep reading the TSC until we notice that the master has read his. Reading
6844 * the TSC -after- the master has updated the memory is way too late. We thus
6845 * compensate by trying to measure how long it took for the worker to notice
6846 * the memory flushed from the master.
6847 */
6848 do
6849 {
6850 ASMSerializeInstruction();
6851 uTscWorker = ASMReadTSC();
6852 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6853 ASMSerializeInstruction();
6854 uTscWorkerFlushed = ASMReadTSC();
6855
6856 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
6857 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6858 {
6859 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
6860 if (uCmpReadTime < (uMinCmpReadTime << 1))
6861 {
6862 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
6863 if (uCmpReadTime < uMinCmpReadTime)
6864 uMinCmpReadTime = uCmpReadTime;
6865 }
6866 else
6867 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
6868 }
6869 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
6870 {
6871 if (uCmpReadTime < uMinCmpReadTime)
6872 uMinCmpReadTime = uCmpReadTime;
6873 }
6874
6875 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
6876 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
6877 ASMNopPause();
6878 }
6879 }
6880
6881 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
6882 break;
6883 }
6884}
6885
6886
6887/**
6888 * Clears all TSCs on the per-CPUs GIP struct. as well as the delta
6889 * synchronization variable. Optionally also clears the deltas on the per-CPU
6890 * GIP struct. as well.
6891 *
6892 * @param pGip Pointer to the GIP.
6893 * @param fClearDeltas Whether the deltas are also to be cleared.
6894 */
6895DECLINLINE(void) supdrvClearTscSamples(PSUPGLOBALINFOPAGE pGip, bool fClearDeltas)
6896{
6897 unsigned iCpu;
6898 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6899 {
6900 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
6901 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
6902 if (fClearDeltas)
6903 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
6904 }
6905 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6906}
6907
6908
6909/**
6910 * Measures the TSC delta between the master GIP CPU and one specified worker
6911 * CPU.
6912 *
6913 * @returns VBox status code.
6914 * @param pDevExt Pointer to the device instance data.
6915 * @param idxWorker The index of the worker CPU from the GIP's array of
6916 * CPUs.
6917 *
6918 * @remarks This can be called with preemption disabled!
6919 */
6920static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
6921{
6922 int rc;
6923 PSUPGLOBALINFOPAGE pGip;
6924 PSUPGIPCPU pGipCpuWorker;
6925 RTCPUID idMaster;
6926
6927 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
6928 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
6929 Assert(!g_fOsTscDeltasInSync);
6930
6931 pGip = pDevExt->pGip;
6932 idMaster = pDevExt->idGipMaster;
6933 pGipCpuWorker = &pGip->aCPUs[idxWorker];
6934
6935 if (pGipCpuWorker->idCpu == idMaster)
6936 {
6937 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
6938 return VINF_SUCCESS;
6939 }
6940
6941 /* Set the master TSC as the initiator. */
6942 while (ASMAtomicCmpXchgU32(&g_idTscDeltaInitiator, idMaster, NIL_RTCPUID) == false)
6943 {
6944 /*
6945 * Sleep here rather than spin as there is a parallel measurement
6946 * being executed and that can take a good while to be done.
6947 */
6948 RTThreadSleep(1);
6949 }
6950
6951 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
6952 {
6953 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
6954 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6955 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6956 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pGip, &pGipCpuWorker->idCpu);
6957 if (RT_SUCCESS(rc))
6958 {
6959 if (RT_UNLIKELY(pGipCpuWorker->i64TSCDelta == INT64_MAX))
6960 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
6961 }
6962 }
6963 else
6964 rc = VERR_CPU_OFFLINE;
6965
6966 ASMAtomicWriteU32(&g_idTscDeltaInitiator, NIL_RTCPUID);
6967 return rc;
6968}
6969
6970
6971/**
6972 * Measures the TSC deltas between CPUs.
6973 *
6974 * @param pDevExt Pointer to the device instance data.
6975 * @param pidxMaster Where to store the index of the chosen master TSC if we
6976 * managed to determine the TSC deltas successfully.
6977 * Optional, can be NULL.
6978 *
6979 * @returns VBox status code.
6980 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
6981 * idCpu, GIP's online CPU set which are populated in
6982 * supdrvGipInitOnCpu().
6983 */
6984static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
6985{
6986 PSUPGIPCPU pGipCpuMaster;
6987 unsigned iCpu;
6988 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6989 uint32_t idxMaster = UINT32_MAX;
6990 int rc = VINF_SUCCESS;
6991 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&g_cMpOnOffEvents);
6992 uint32_t cOnlineCpus = pGip->cOnlineCpus;
6993
6994 Assert(!g_fOsTscDeltasInSync);
6995
6996 /*
6997 * If we determined the TSC is async., don't bother with measuring deltas.
6998 */
6999 if (RT_UNLIKELY(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC))
7000 return VINF_SUCCESS;
7001
7002 /*
7003 * Pick the first CPU online as the master TSC and make it the new GIP master based
7004 * on the APIC ID.
7005 *
7006 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
7007 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
7008 * master as this point since the sync/async timer isn't created yet.
7009 */
7010 supdrvClearTscSamples(pGip, true /* fClearDeltas */);
7011 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
7012 {
7013 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
7014 if (idxCpu != UINT16_MAX)
7015 {
7016 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
7017 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
7018 {
7019 idxMaster = idxCpu;
7020 pGipCpu->i64TSCDelta = 0;
7021 break;
7022 }
7023 }
7024 }
7025 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
7026 pGipCpuMaster = &pGip->aCPUs[idxMaster];
7027 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7028
7029 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
7030 if (pGip->cOnlineCpus <= 1)
7031 {
7032 if (pidxMaster)
7033 *pidxMaster = idxMaster;
7034 return VINF_SUCCESS;
7035 }
7036
7037 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7038 {
7039 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7040 if ( iCpu != idxMaster
7041 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7042 {
7043 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7044 if (RT_FAILURE(rc))
7045 {
7046 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
7047 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7048 break;
7049 }
7050
7051 if (ASMAtomicReadU32(&g_cMpOnOffEvents) != cMpOnOffEvents)
7052 {
7053 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
7054 rc = VERR_TRY_AGAIN;
7055 break;
7056 }
7057 }
7058 }
7059
7060 if ( RT_SUCCESS(rc)
7061 && !pGipCpuMaster->i64TSCDelta
7062 && pidxMaster)
7063 {
7064 *pidxMaster = idxMaster;
7065 }
7066 return rc;
7067}
7068
7069
7070/**
7071 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
7072 *
7073 * @param idCpu Ignored.
7074 * @param pvUser1 Where to put the TSC.
7075 * @param pvUser2 Ignored.
7076 */
7077static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7078{
7079 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
7080}
7081
7082
7083/**
7084 * Determine if Async GIP mode is required because of TSC drift.
7085 *
7086 * When using the default/normal timer code it is essential that the time stamp counter
7087 * (TSC) runs never backwards, that is, a read operation to the counter should return
7088 * a bigger value than any previous read operation. This is guaranteed by the latest
7089 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
7090 * case we have to choose the asynchronous timer mode.
7091 *
7092 * @param poffMin Pointer to the determined difference between different
7093 * cores (optional, can be NULL).
7094 * @return false if the time stamp counters appear to be synchronized, true otherwise.
7095 */
7096static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
7097{
7098 /*
7099 * Just iterate all the cpus 8 times and make sure that the TSC is
7100 * ever increasing. We don't bother taking TSC rollover into account.
7101 */
7102 int iEndCpu = RTMpGetArraySize();
7103 int iCpu;
7104 int cLoops = 8;
7105 bool fAsync = false;
7106 int rc = VINF_SUCCESS;
7107 uint64_t offMax = 0;
7108 uint64_t offMin = ~(uint64_t)0;
7109 uint64_t PrevTsc = ASMReadTSC();
7110
7111 while (cLoops-- > 0)
7112 {
7113 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7114 {
7115 uint64_t CurTsc;
7116 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7117 if (RT_SUCCESS(rc))
7118 {
7119 if (CurTsc <= PrevTsc)
7120 {
7121 fAsync = true;
7122 offMin = offMax = PrevTsc - CurTsc;
7123 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7124 iCpu, cLoops, CurTsc, PrevTsc));
7125 break;
7126 }
7127
7128 /* Gather statistics (except the first time). */
7129 if (iCpu != 0 || cLoops != 7)
7130 {
7131 uint64_t off = CurTsc - PrevTsc;
7132 if (off < offMin)
7133 offMin = off;
7134 if (off > offMax)
7135 offMax = off;
7136 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7137 }
7138
7139 /* Next */
7140 PrevTsc = CurTsc;
7141 }
7142 else if (rc == VERR_NOT_SUPPORTED)
7143 break;
7144 else
7145 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7146 }
7147
7148 /* broke out of the loop. */
7149 if (iCpu < iEndCpu)
7150 break;
7151 }
7152
7153 if (poffMin)
7154 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7155 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7156 fAsync, iEndCpu, rc, offMin, offMax));
7157#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7158 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7159#endif
7160 return fAsync;
7161}
7162
7163
7164/**
7165 * Determine the GIP TSC mode.
7166 *
7167 * @returns The most suitable TSC mode.
7168 * @param pDevExt Pointer to the device instance data.
7169 */
7170static SUPGIPMODE supdrvGipDetermineTscMode(PSUPDRVDEVEXT pDevExt)
7171{
7172#if 0
7173 if (supdrvIsInvariantTsc())
7174 return SUPGIPMODE_SYNC_TSC; /** @todo Switch to SUPGIPMODE_INVARIANT_TSC later. */
7175#endif
7176
7177 /*
7178 * On SMP we're faced with two problems:
7179 * (1) There might be a skew between the CPU, so that cpu0
7180 * returns a TSC that is slightly different from cpu1.
7181 * (2) Power management (and other things) may cause the TSC
7182 * to run at a non-constant speed, and cause the speed
7183 * to be different on the cpus. This will result in (1).
7184 *
7185 * So, on SMP systems we'll have to select the ASYNC update method
7186 * if there are symptoms of these problems.
7187 */
7188 if (RTMpGetCount() > 1)
7189 {
7190 uint32_t uEAX, uEBX, uECX, uEDX;
7191 uint64_t u64DiffCoresIgnored;
7192
7193 /* Permit the user and/or the OS specific bits to force async mode. */
7194 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7195 return SUPGIPMODE_ASYNC_TSC;
7196
7197 /* Try check for current differences between the cpus. */
7198 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7199 return SUPGIPMODE_ASYNC_TSC;
7200
7201 /*
7202 * If the CPU supports power management and is an AMD one we
7203 * won't trust it unless it has the TscInvariant bit is set.
7204 */
7205 /* Check for "AuthenticAMD" */
7206 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7207 if ( uEAX >= 1
7208 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7209 {
7210 /* Check for APM support and that TscInvariant is cleared. */
7211 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
7212 if (uEAX >= 0x80000007)
7213 {
7214 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
7215 if ( !(uEDX & RT_BIT(8))/* TscInvariant */
7216 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7217 return SUPGIPMODE_ASYNC_TSC;
7218 }
7219 }
7220 }
7221 return SUPGIPMODE_SYNC_TSC;
7222}
7223
7224
7225/**
7226 * Initializes per-CPU GIP information.
7227 *
7228 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7229 * @param pCpu Pointer to which GIP CPU to initalize.
7230 * @param u64NanoTS The current nanosecond timestamp.
7231 */
7232static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7233{
7234 pCpu->u32TransactionId = 2;
7235 pCpu->u64NanoTS = u64NanoTS;
7236 pCpu->u64TSC = ASMReadTSC();
7237 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7238 pCpu->i64TSCDelta = g_fOsTscDeltasInSync ? 0 : INT64_MAX;
7239
7240 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7241 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7242 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7243 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7244
7245 /*
7246 * We don't know the following values until we've executed updates.
7247 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7248 * the 2nd timer callout.
7249 */
7250 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7251 pCpu->u32UpdateIntervalTSC
7252 = pCpu->au32TSCHistory[0]
7253 = pCpu->au32TSCHistory[1]
7254 = pCpu->au32TSCHistory[2]
7255 = pCpu->au32TSCHistory[3]
7256 = pCpu->au32TSCHistory[4]
7257 = pCpu->au32TSCHistory[5]
7258 = pCpu->au32TSCHistory[6]
7259 = pCpu->au32TSCHistory[7]
7260 = (uint32_t)(_4G / pGip->u32UpdateHz);
7261}
7262
7263
7264/**
7265 * Initializes the GIP data.
7266 *
7267 * @param pDevExt Pointer to the device instance data.
7268 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7269 * @param HCPhys The physical address of the GIP.
7270 * @param u64NanoTS The current nanosecond timestamp.
7271 * @param uUpdateHz The update frequency.
7272 * @param uUpdateIntervalNS The update interval in nanoseconds.
7273 * @param cCpus The CPU count.
7274 */
7275static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7276 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7277{
7278 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7279 unsigned i;
7280#ifdef DEBUG_DARWIN_GIP
7281 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7282#else
7283 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7284#endif
7285
7286 /*
7287 * Record whether the host OS has already normalized inter-CPU deltas for the hardware TSC.
7288 * We only bother with TSC-deltas only on invariant CPUs for now.
7289 */
7290 g_fOsTscDeltasInSync = supdrvIsInvariantTsc() && supdrvOSAreTscDeltasInSync();
7291
7292 /*
7293 * Initialize the structure.
7294 */
7295 memset(pGip, 0, cbGip);
7296 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7297 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7298 pGip->u32Mode = supdrvGipDetermineTscMode(pDevExt);
7299 pGip->cCpus = (uint16_t)cCpus;
7300 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7301 pGip->u32UpdateHz = uUpdateHz;
7302 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7303 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7304 RTCpuSetEmpty(&pGip->PresentCpuSet);
7305 RTMpGetSet(&pGip->PossibleCpuSet);
7306 pGip->cOnlineCpus = RTMpGetOnlineCount();
7307 pGip->cPresentCpus = RTMpGetPresentCount();
7308 pGip->cPossibleCpus = RTMpGetCount();
7309 pGip->idCpuMax = RTMpGetMaxCpuId();
7310 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7311 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7312 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7313 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7314
7315 for (i = 0; i < cCpus; i++)
7316 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
7317
7318 /*
7319 * Link it to the device extension.
7320 */
7321 pDevExt->pGip = pGip;
7322 pDevExt->HCPhysGip = HCPhys;
7323 pDevExt->cGipUsers = 0;
7324
7325 /*
7326 * Allocate the TSC delta sync. struct. on a separate cache line.
7327 */
7328 g_pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
7329 g_pTscDeltaSync = RT_ALIGN_PT(g_pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
7330 Assert(RT_ALIGN_PT(g_pTscDeltaSync, 64, PSUPTSCDELTASYNC) == g_pTscDeltaSync);
7331}
7332
7333
7334/**
7335 * On CPU initialization callback for RTMpOnAll.
7336 *
7337 * @param idCpu The CPU ID.
7338 * @param pvUser1 The device extension.
7339 * @param pvUser2 The GIP.
7340 */
7341static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7342{
7343 /* This is good enough, even though it will update some of the globals a
7344 bit to much. */
7345 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7346}
7347
7348
7349/**
7350 * Invalidates the GIP data upon termination.
7351 *
7352 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7353 */
7354static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7355{
7356 unsigned i;
7357 pGip->u32Magic = 0;
7358 for (i = 0; i < pGip->cCpus; i++)
7359 {
7360 pGip->aCPUs[i].u64NanoTS = 0;
7361 pGip->aCPUs[i].u64TSC = 0;
7362 pGip->aCPUs[i].iTSCHistoryHead = 0;
7363 pGip->aCPUs[i].u64TSCSample = 0;
7364 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7365 }
7366
7367 if (g_pvTscDeltaSync)
7368 {
7369 RTMemFree(g_pvTscDeltaSync);
7370 g_pTscDeltaSync = NULL;
7371 g_pvTscDeltaSync = NULL;
7372 }
7373}
7374
7375
7376/**
7377 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
7378 * updates all the per cpu data except the transaction id.
7379 *
7380 * @param pDevExt The device extension.
7381 * @param pGipCpu Pointer to the per cpu data.
7382 * @param u64NanoTS The current time stamp.
7383 * @param u64TSC The current TSC.
7384 * @param iTick The current timer tick.
7385 *
7386 * @remarks Can be called with interrupts disabled!
7387 */
7388static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7389{
7390 uint64_t u64TSCDelta;
7391 uint32_t u32UpdateIntervalTSC;
7392 uint32_t u32UpdateIntervalTSCSlack;
7393 unsigned iTSCHistoryHead;
7394 uint64_t u64CpuHz;
7395 uint32_t u32TransactionId;
7396
7397 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7398 AssertPtrReturnVoid(pGip);
7399
7400 /* Delta between this and the previous update. */
7401 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7402
7403 /*
7404 * Update the NanoTS.
7405 */
7406 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
7407
7408 /*
7409 * Calc TSC delta.
7410 */
7411 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
7412 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
7413 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
7414
7415 if (u64TSCDelta >> 32)
7416 {
7417 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
7418 pGipCpu->cErrors++;
7419 }
7420
7421 /*
7422 * On the 2nd and 3rd callout, reset the history with the current TSC
7423 * interval since the values entered by supdrvGipInit are totally off.
7424 * The interval on the 1st callout completely unreliable, the 2nd is a bit
7425 * better, while the 3rd should be most reliable.
7426 */
7427 u32TransactionId = pGipCpu->u32TransactionId;
7428 if (RT_UNLIKELY( ( u32TransactionId == 5
7429 || u32TransactionId == 7)
7430 && ( iTick == 2
7431 || iTick == 3) ))
7432 {
7433 unsigned i;
7434 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
7435 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
7436 }
7437
7438 /*
7439 * TSC History.
7440 */
7441 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
7442 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
7443 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
7444 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
7445
7446 /*
7447 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
7448 *
7449 * On Windows, we have an occasional (but recurring) sour value that messed up
7450 * the history but taking only 1 interval reduces the precision overall.
7451 * However, this problem existed before the invariant mode was introduced.
7452 */
7453 if ( supdrvIsInvariantTsc()
7454 || pGip->u32UpdateHz >= 1000)
7455 {
7456 uint32_t u32;
7457 u32 = pGipCpu->au32TSCHistory[0];
7458 u32 += pGipCpu->au32TSCHistory[1];
7459 u32 += pGipCpu->au32TSCHistory[2];
7460 u32 += pGipCpu->au32TSCHistory[3];
7461 u32 >>= 2;
7462 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
7463 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
7464 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
7465 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
7466 u32UpdateIntervalTSC >>= 2;
7467 u32UpdateIntervalTSC += u32;
7468 u32UpdateIntervalTSC >>= 1;
7469
7470 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
7471 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
7472 }
7473 else if (pGip->u32UpdateHz >= 90)
7474 {
7475 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7476 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
7477 u32UpdateIntervalTSC >>= 1;
7478
7479 /* value chosen on a 2GHz thinkpad running windows */
7480 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
7481 }
7482 else
7483 {
7484 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7485
7486 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
7487 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
7488 }
7489 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
7490
7491 if (supdrvIsInvariantTsc())
7492 return;
7493
7494 /*
7495 * CpuHz.
7496 */
7497 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
7498 u64CpuHz /= pGip->u32UpdateIntervalNS;
7499 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
7500}
7501
7502
7503/**
7504 * Updates the GIP.
7505 *
7506 * @param pDevExt The device extension.
7507 * @param u64NanoTS The current nanosecond timesamp.
7508 * @param u64TSC The current TSC timesamp.
7509 * @param idCpu The CPU ID.
7510 * @param iTick The current timer tick.
7511 *
7512 * @remarks Can be called with interrupts disabled!
7513 */
7514static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
7515{
7516 /*
7517 * Determine the relevant CPU data.
7518 */
7519 PSUPGIPCPU pGipCpu;
7520 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7521 AssertPtrReturnVoid(pGip);
7522
7523 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7524 pGipCpu = &pGip->aCPUs[0];
7525 else
7526 {
7527 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
7528 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
7529 return;
7530 pGipCpu = &pGip->aCPUs[iCpu];
7531 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
7532 return;
7533 }
7534
7535 /*
7536 * Start update transaction.
7537 */
7538 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7539 {
7540 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
7541 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7542 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7543 pGipCpu->cErrors++;
7544 return;
7545 }
7546
7547 /*
7548 * Recalc the update frequency every 0x800th time.
7549 */
7550 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
7551 {
7552 if (pGip->u64NanoTSLastUpdateHz)
7553 {
7554#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
7555 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
7556 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
7557 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
7558 {
7559 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
7560 * calculation on non-invariant hosts if it changes the history decision
7561 * taken in supdrvGipDoUpdateCpu(). */
7562 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
7563 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
7564 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
7565 }
7566#endif
7567 }
7568 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS + 1);
7569 }
7570
7571 /*
7572 * Update the data.
7573 */
7574 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7575
7576 /*
7577 * Complete transaction.
7578 */
7579 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7580}
7581
7582
7583/**
7584 * Updates the per cpu GIP data for the calling cpu.
7585 *
7586 * @param pDevExt The device extension.
7587 * @param u64NanoTS The current nanosecond timesamp.
7588 * @param u64TSC The current TSC timesamp.
7589 * @param idCpu The CPU ID.
7590 * @param idApic The APIC id for the CPU index.
7591 * @param iTick The current timer tick.
7592 *
7593 * @remarks Can be called with interrupts disabled!
7594 */
7595static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
7596 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
7597{
7598 uint32_t iCpu;
7599 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7600
7601 /*
7602 * Avoid a potential race when a CPU online notification doesn't fire on
7603 * the onlined CPU but the tick creeps in before the event notification is
7604 * run.
7605 */
7606 if (RT_UNLIKELY(iTick == 1))
7607 {
7608 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7609 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
7610 supdrvGipMpEventOnline(pDevExt, idCpu);
7611 }
7612
7613 iCpu = pGip->aiCpuFromApicId[idApic];
7614 if (RT_LIKELY(iCpu < pGip->cCpus))
7615 {
7616 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7617 if (pGipCpu->idCpu == idCpu)
7618 {
7619 /*
7620 * Start update transaction.
7621 */
7622 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7623 {
7624 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7625 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7626 pGipCpu->cErrors++;
7627 return;
7628 }
7629
7630 /*
7631 * Update the data.
7632 */
7633 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7634
7635 /*
7636 * Complete transaction.
7637 */
7638 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7639 }
7640 }
7641}
7642
7643
7644/**
7645 * Resume built-in keyboard on MacBook Air and Pro hosts.
7646 * If there is no built-in keyboard device, return success anyway.
7647 *
7648 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
7649 */
7650static int supdrvIOCtl_ResumeSuspendedKbds(void)
7651{
7652#if defined(RT_OS_DARWIN)
7653 return supdrvDarwinResumeSuspendedKbds();
7654#else
7655 return VERR_NOT_IMPLEMENTED;
7656#endif
7657}
7658
7659
7660/**
7661 * Service a TSC-delta measurement request.
7662 *
7663 * @returns VBox status code.
7664 * @param pDevExt Pointer to the device instance data.
7665 * @param pReq Pointer to the TSC-delta measurement request.
7666 */
7667static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq)
7668{
7669 PSUPGLOBALINFOPAGE pGip;
7670 RTCPUID idCpuWorker;
7671 int rc = VERR_CPU_NOT_FOUND;
7672 int16_t cTries;
7673 RTMSINTERVAL cMsWaitRetry;
7674 uint16_t iCpu;
7675
7676 /*
7677 * Validate.
7678 */
7679 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7680 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7681 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7682 idCpuWorker = pReq->u.In.idCpu;
7683 if (idCpuWorker == NIL_RTCPUID)
7684 return VERR_INVALID_CPU_ID;
7685
7686 if (g_fOsTscDeltasInSync)
7687 return VINF_SUCCESS;
7688
7689 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
7690 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
7691 pGip = pDevExt->pGip;
7692 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7693 {
7694 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7695 if (pGipCpuWorker->idCpu == idCpuWorker)
7696 {
7697 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
7698 && !pReq->u.In.fForce)
7699 return VINF_SUCCESS;
7700
7701#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7702 if (pReq->u.In.fAsync)
7703 {
7704 /** @todo Async. doesn't implement options like retries, waiting. We'll need
7705 * to pass those options to the thread somehow and implement it in the
7706 * thread. Check if anyone uses/needs fAsync before implementing this. */
7707 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
7708 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7709 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
7710 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
7711 {
7712 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
7713 }
7714 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7715 RTThreadUserSignal(pDevExt->hTscDeltaThread);
7716 return VINF_SUCCESS;
7717 }
7718#endif
7719
7720 while (cTries--)
7721 {
7722 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7723 if (RT_SUCCESS(rc))
7724 {
7725 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
7726 break;
7727 }
7728
7729 if (cMsWaitRetry)
7730 RTThreadSleep(cMsWaitRetry);
7731 }
7732
7733 break;
7734 }
7735 }
7736 return rc;
7737}
7738
7739
7740/**
7741 * Reads the TSC and TSC-delta atomically, applies the TSC delta.
7742 *
7743 * @returns VBox status code.
7744 * @param pDevExt Pointer to the device instance data.
7745 * @param pReq Pointer to the TSC-read request.
7746 */
7747static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq)
7748{
7749 uint64_t uTsc;
7750 uint16_t idApic;
7751 int16_t cTries;
7752 PSUPGLOBALINFOPAGE pGip;
7753 int rc;
7754
7755 /*
7756 * Validate.
7757 */
7758 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7759 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7760 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7761 pGip = pDevExt->pGip;
7762
7763 cTries = 4;
7764 while (cTries-- > 0)
7765 {
7766 rc = SUPReadTsc(&uTsc, &idApic);
7767 if (RT_SUCCESS(rc))
7768 {
7769 pReq->u.Out.u64AdjustedTsc = uTsc;
7770 pReq->u.Out.idApic = idApic;
7771 return VINF_SUCCESS;
7772 }
7773 else
7774 {
7775 int rc2;
7776 uint16_t iCpu;
7777
7778 /* If we failed to have a delta, measurement the delta and retry. */
7779 AssertMsgReturn(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId),
7780 ("idApic=%u ArraySize=%u\n", idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)), VERR_INVALID_CPU_INDEX);
7781 iCpu = pGip->aiCpuFromApicId[idApic];
7782 AssertMsgReturn(iCpu < pGip->cCpus, ("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus), VERR_INVALID_CPU_INDEX);
7783
7784 Assert(!g_fOsTscDeltasInSync);
7785 rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7786 if (RT_SUCCESS(rc2))
7787 AssertReturn(pGip->aCPUs[iCpu].i64TSCDelta != INT64_MAX, VERR_INTERNAL_ERROR_2);
7788 }
7789 }
7790
7791 return rc;
7792}
7793
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette