VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 53571

Last change on this file since 53571 was 53568, checked in by vboxsync, 10 years ago

SUPDrv.c: Disabled annoying assertion in i64TSCDelta init code.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 296.5 KB
Line 
1/* $Id: SUPDrv.c 53568 2014-12-18 21:09:54Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63#include <VBox/vmm/hm_svm.h>
64#include <VBox/vmm/hm_vmx.h>
65
66#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
67# include "dtrace/SUPDrv.h"
68#else
69# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
70# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
71# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
72# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
73#endif
74
75/*
76 * Logging assignments:
77 * Log - useful stuff, like failures.
78 * LogFlow - program flow, except the really noisy bits.
79 * Log2 - Cleanup.
80 * Log3 - Loader flow noise.
81 * Log4 - Call VMMR0 flow noise.
82 * Log5 - Native yet-to-be-defined noise.
83 * Log6 - Native ioctl flow noise.
84 *
85 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
86 * instantiation in log-vbox.c(pp).
87 */
88
89
90/*******************************************************************************
91* Defined Constants And Macros *
92*******************************************************************************/
93/** The frequency by which we recalculate the u32UpdateHz and
94 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
95 *
96 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
97 */
98#define GIP_UPDATEHZ_RECALC_FREQ 0x800
99
100/** A reserved TSC value used for synchronization as well as measurement of
101 * TSC deltas. */
102#define GIP_TSC_DELTA_RSVD UINT64_MAX
103/** The number of TSC delta measurement loops in total (includes primer and
104 * read-time loops). */
105#define GIP_TSC_DELTA_LOOPS 96
106/** The number of cache primer loops. */
107#define GIP_TSC_DELTA_PRIMER_LOOPS 4
108/** The number of loops until we keep computing the minumum read time. */
109#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
110/** Stop measurement of TSC delta. */
111#define GIP_TSC_DELTA_SYNC_STOP 0
112/** Start measurement of TSC delta. */
113#define GIP_TSC_DELTA_SYNC_START 1
114/** Worker thread is ready for reading the TSC. */
115#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
116/** Worker thread is done updating TSC delta info. */
117#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
118/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
119 * with a timeout. */
120#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
121/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
122 * master with a timeout. */
123#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
124/** The TSC-refinement interval in seconds. */
125#define GIP_TSC_REFINE_INTERVAL 5
126
127AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
128AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
129
130/** @def VBOX_SVN_REV
131 * The makefile should define this if it can. */
132#ifndef VBOX_SVN_REV
133# define VBOX_SVN_REV 0
134#endif
135
136#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
137# define DO_NOT_START_GIP
138#endif
139
140/** Whether the application of TSC-deltas is required. */
141#define GIP_ARE_TSC_DELTAS_APPLICABLE(a_pDevExt) \
142 ((a_pDevExt)->pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC && !((a_pDevExt)->fOsTscDeltasInSync))
143
144
145/*******************************************************************************
146* Internal Functions *
147*******************************************************************************/
148static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
149static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
150static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
151static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
152static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
153static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
154static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
155static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
156static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
157static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt,void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
158static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
159static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
160static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
161DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
162DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
163static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
164static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
165static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
166static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq);
167static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq);
168static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
169static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
170static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
171static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
172static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
173static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,
174 unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
175static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
176static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
177static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
178static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
179 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
180static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
181static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
182static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
183static int supdrvIOCtl_ResumeSuspendedKbds(void);
184
185
186/*******************************************************************************
187* Global Variables *
188*******************************************************************************/
189DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
190
191
192/**
193 * Array of the R0 SUP API.
194 */
195static SUPFUNC g_aFunctions[] =
196{
197/* SED: START */
198 /* name function */
199 /* Entries with absolute addresses determined at runtime, fixup
200 code makes ugly ASSUMPTIONS about the order here: */
201 { "SUPR0AbsIs64bit", (void *)0 },
202 { "SUPR0Abs64bitKernelCS", (void *)0 },
203 { "SUPR0Abs64bitKernelSS", (void *)0 },
204 { "SUPR0Abs64bitKernelDS", (void *)0 },
205 { "SUPR0AbsKernelCS", (void *)0 },
206 { "SUPR0AbsKernelSS", (void *)0 },
207 { "SUPR0AbsKernelDS", (void *)0 },
208 { "SUPR0AbsKernelES", (void *)0 },
209 { "SUPR0AbsKernelFS", (void *)0 },
210 { "SUPR0AbsKernelGS", (void *)0 },
211 /* Normal function pointers: */
212 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
213 { "SUPGetGIP", (void *)SUPGetGIP },
214 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
215 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
216 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
217 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
218 { "SUPR0ContFree", (void *)SUPR0ContFree },
219 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
220 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
221 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
222 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
223 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
224 { "SUPR0LockMem", (void *)SUPR0LockMem },
225 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
226 { "SUPR0LowFree", (void *)SUPR0LowFree },
227 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
228 { "SUPR0MemFree", (void *)SUPR0MemFree },
229 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
230 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
231 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
232 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
233 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
234 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
235 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
236 { "SUPR0PageFree", (void *)SUPR0PageFree },
237 { "SUPR0Printf", (void *)SUPR0Printf },
238 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
239 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
240 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
241 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
242 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
243 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
244 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
245 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
246 { "SUPSemEventClose", (void *)SUPSemEventClose },
247 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
248 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
249 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
250 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
251 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
252 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
253 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
254 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
255 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
256 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
257 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
258 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
259 { "SUPSemEventWait", (void *)SUPSemEventWait },
260 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
261 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
262 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
263
264 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
265 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
266 { "RTAssertMsg1", (void *)RTAssertMsg1 },
267 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
268 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
269 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
270 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
271 { "RTCrc32", (void *)RTCrc32 },
272 { "RTCrc32Finish", (void *)RTCrc32Finish },
273 { "RTCrc32Process", (void *)RTCrc32Process },
274 { "RTCrc32Start", (void *)RTCrc32Start },
275 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
276 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
277 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
278 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
279 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
280 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
281 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
282 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
283 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
284 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
285 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
286 { "RTLogPrintfV", (void *)RTLogPrintfV },
287 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
288 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
289 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
290 { "RTMemAllocTag", (void *)RTMemAllocTag },
291 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
292 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
293 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
294 { "RTMemDupExTag", (void *)RTMemDupExTag },
295 { "RTMemDupTag", (void *)RTMemDupTag },
296 { "RTMemFree", (void *)RTMemFree },
297 { "RTMemFreeEx", (void *)RTMemFreeEx },
298 { "RTMemReallocTag", (void *)RTMemReallocTag },
299 { "RTMpCpuId", (void *)RTMpCpuId },
300 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
301 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
302 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
303 { "RTMpGetCount", (void *)RTMpGetCount },
304 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
305 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
306 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
307 { "RTMpGetSet", (void *)RTMpGetSet },
308 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
309 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
310 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
311 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
312 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
313 { "RTMpOnAll", (void *)RTMpOnAll },
314 { "RTMpOnOthers", (void *)RTMpOnOthers },
315 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
316 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
317 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
318 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
319 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
320 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
321 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
322 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
323 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
324 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
325 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
326 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
327 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
328 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
329 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
330 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
331 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
332 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
333 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
334 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
335 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
336 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
337 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
338 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
339 { "RTProcSelf", (void *)RTProcSelf },
340 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
341 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
342 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
343 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
344 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
345 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
346 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
347 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
348 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
349 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
350 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
351 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
352 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
353 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
354 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
355 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
356 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
357 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
358 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
359 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
360 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
361 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
362 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
363 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
364 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
365 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
366 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
367 { "RTSemEventCreate", (void *)RTSemEventCreate },
368 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
369 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
370 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
371 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
372 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
373 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
374 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
375 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
376 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
377 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
378 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
379 { "RTSemEventSignal", (void *)RTSemEventSignal },
380 { "RTSemEventWait", (void *)RTSemEventWait },
381 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
382 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
383 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
384 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
385 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
386 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
387 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
388 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
389 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
390 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
391 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
392 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
393 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
394 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
395 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
396 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
397 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
398 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
399 { "RTStrCopy", (void *)RTStrCopy },
400 { "RTStrDupTag", (void *)RTStrDupTag },
401 { "RTStrFormat", (void *)RTStrFormat },
402 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
403 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
404 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
405 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
406 { "RTStrFormatV", (void *)RTStrFormatV },
407 { "RTStrFree", (void *)RTStrFree },
408 { "RTStrNCmp", (void *)RTStrNCmp },
409 { "RTStrPrintf", (void *)RTStrPrintf },
410 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
411 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
412 { "RTStrPrintfV", (void *)RTStrPrintfV },
413 { "RTThreadCreate", (void *)RTThreadCreate },
414 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
415 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
416 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
417 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
418 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
419 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
420 { "RTThreadGetName", (void *)RTThreadGetName },
421 { "RTThreadGetNative", (void *)RTThreadGetNative },
422 { "RTThreadGetType", (void *)RTThreadGetType },
423 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
424 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
425 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
426 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
427 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
428 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
429 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
430 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
431 { "RTThreadSelf", (void *)RTThreadSelf },
432 { "RTThreadSelfName", (void *)RTThreadSelfName },
433 { "RTThreadSleep", (void *)RTThreadSleep },
434 { "RTThreadUserReset", (void *)RTThreadUserReset },
435 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
436 { "RTThreadUserWait", (void *)RTThreadUserWait },
437 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
438 { "RTThreadWait", (void *)RTThreadWait },
439 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
440 { "RTThreadYield", (void *)RTThreadYield },
441 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
442 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
443 { "RTTimeNow", (void *)RTTimeNow },
444 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
445 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
446 { "RTTimerCreate", (void *)RTTimerCreate },
447 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
448 { "RTTimerDestroy", (void *)RTTimerDestroy },
449 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
450 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
451 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
452 { "RTTimerStart", (void *)RTTimerStart },
453 { "RTTimerStop", (void *)RTTimerStop },
454 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
455 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
456 { "RTUuidCompare", (void *)RTUuidCompare },
457 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
458 { "RTUuidFromStr", (void *)RTUuidFromStr },
459/* SED: END */
460};
461
462#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
463/**
464 * Drag in the rest of IRPT since we share it with the
465 * rest of the kernel modules on darwin.
466 */
467PFNRT g_apfnVBoxDrvIPRTDeps[] =
468{
469 /* VBoxNetAdp */
470 (PFNRT)RTRandBytes,
471 /* VBoxUSB */
472 (PFNRT)RTPathStripFilename,
473 NULL
474};
475#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
476
477
478/**
479 * Initializes the device extentsion structure.
480 *
481 * @returns IPRT status code.
482 * @param pDevExt The device extension to initialize.
483 * @param cbSession The size of the session structure. The size of
484 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
485 * defined because we're skipping the OS specific members
486 * then.
487 */
488int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
489{
490 int rc;
491
492#ifdef SUPDRV_WITH_RELEASE_LOGGER
493 /*
494 * Create the release log.
495 */
496 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
497 PRTLOGGER pRelLogger;
498 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
499 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
500 if (RT_SUCCESS(rc))
501 RTLogRelSetDefaultInstance(pRelLogger);
502 /** @todo Add native hook for getting logger config parameters and setting
503 * them. On linux we should use the module parameter stuff... */
504#endif
505
506 /*
507 * Initialize it.
508 */
509 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
510 pDevExt->Spinlock = NIL_RTSPINLOCK;
511 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
512 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
513 pDevExt->idTscDeltaInitiator = NIL_RTCPUID;
514 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
515 if (RT_SUCCESS(rc))
516 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
517 if (RT_SUCCESS(rc))
518 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
519
520 if (RT_SUCCESS(rc))
521#ifdef SUPDRV_USE_MUTEX_FOR_LDR
522 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
523#else
524 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
525#endif
526 if (RT_SUCCESS(rc))
527 {
528 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
529 if (RT_SUCCESS(rc))
530 {
531#ifdef SUPDRV_USE_MUTEX_FOR_LDR
532 rc = RTSemMutexCreate(&pDevExt->mtxGip);
533#else
534 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
535#endif
536 if (RT_SUCCESS(rc))
537 {
538 rc = supdrvGipCreate(pDevExt);
539 if (RT_SUCCESS(rc))
540 {
541 rc = supdrvTracerInit(pDevExt);
542 if (RT_SUCCESS(rc))
543 {
544 pDevExt->pLdrInitImage = NULL;
545 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
546 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
547 pDevExt->cbSession = (uint32_t)cbSession;
548
549 /*
550 * Fixup the absolute symbols.
551 *
552 * Because of the table indexing assumptions we'll have a little #ifdef orgy
553 * here rather than distributing this to OS specific files. At least for now.
554 */
555#ifdef RT_OS_DARWIN
556# if ARCH_BITS == 32
557 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
558 {
559 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
560 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
561 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
562 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
563 }
564 else
565 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
566 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
567 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
568 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
569 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
570 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
571 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
572# else /* 64-bit darwin: */
573 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
574 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
575 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
576 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
577 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
578 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
579 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
580 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
581 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
582 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
583
584# endif
585#else /* !RT_OS_DARWIN */
586# if ARCH_BITS == 64
587 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
588 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
589 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
590 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
591# else
592 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
593# endif
594 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
595 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
596 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
597 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
598 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
599 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
600#endif /* !RT_OS_DARWIN */
601 return VINF_SUCCESS;
602 }
603
604 supdrvGipDestroy(pDevExt);
605 }
606
607#ifdef SUPDRV_USE_MUTEX_FOR_GIP
608 RTSemMutexDestroy(pDevExt->mtxGip);
609 pDevExt->mtxGip = NIL_RTSEMMUTEX;
610#else
611 RTSemFastMutexDestroy(pDevExt->mtxGip);
612 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
613#endif
614 }
615 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
616 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
617 }
618#ifdef SUPDRV_USE_MUTEX_FOR_LDR
619 RTSemMutexDestroy(pDevExt->mtxLdr);
620 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
621#else
622 RTSemFastMutexDestroy(pDevExt->mtxLdr);
623 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
624#endif
625 }
626
627 RTSpinlockDestroy(pDevExt->Spinlock);
628 pDevExt->Spinlock = NIL_RTSPINLOCK;
629 RTSpinlockDestroy(pDevExt->hGipSpinlock);
630 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
631 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
632 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
633
634#ifdef SUPDRV_WITH_RELEASE_LOGGER
635 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
636 RTLogDestroy(RTLogSetDefaultInstance(NULL));
637#endif
638
639 return rc;
640}
641
642
643/**
644 * Delete the device extension (e.g. cleanup members).
645 *
646 * @param pDevExt The device extension to delete.
647 */
648void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
649{
650 PSUPDRVOBJ pObj;
651 PSUPDRVUSAGE pUsage;
652
653 /*
654 * Kill mutexes and spinlocks.
655 */
656#ifdef SUPDRV_USE_MUTEX_FOR_GIP
657 RTSemMutexDestroy(pDevExt->mtxGip);
658 pDevExt->mtxGip = NIL_RTSEMMUTEX;
659#else
660 RTSemFastMutexDestroy(pDevExt->mtxGip);
661 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
662#endif
663#ifdef SUPDRV_USE_MUTEX_FOR_LDR
664 RTSemMutexDestroy(pDevExt->mtxLdr);
665 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
666#else
667 RTSemFastMutexDestroy(pDevExt->mtxLdr);
668 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
669#endif
670 RTSpinlockDestroy(pDevExt->Spinlock);
671 pDevExt->Spinlock = NIL_RTSPINLOCK;
672 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
673 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
674 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
675 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
676
677 /*
678 * Free lists.
679 */
680 /* objects. */
681 pObj = pDevExt->pObjs;
682 Assert(!pObj); /* (can trigger on forced unloads) */
683 pDevExt->pObjs = NULL;
684 while (pObj)
685 {
686 void *pvFree = pObj;
687 pObj = pObj->pNext;
688 RTMemFree(pvFree);
689 }
690
691 /* usage records. */
692 pUsage = pDevExt->pUsageFree;
693 pDevExt->pUsageFree = NULL;
694 while (pUsage)
695 {
696 void *pvFree = pUsage;
697 pUsage = pUsage->pNext;
698 RTMemFree(pvFree);
699 }
700
701 /* kill the GIP. */
702 supdrvGipDestroy(pDevExt);
703 RTSpinlockDestroy(pDevExt->hGipSpinlock);
704 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
705
706 supdrvTracerTerm(pDevExt);
707
708#ifdef SUPDRV_WITH_RELEASE_LOGGER
709 /* destroy the loggers. */
710 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
711 RTLogDestroy(RTLogSetDefaultInstance(NULL));
712#endif
713}
714
715
716/**
717 * Create session.
718 *
719 * @returns IPRT status code.
720 * @param pDevExt Device extension.
721 * @param fUser Flag indicating whether this is a user or kernel
722 * session.
723 * @param fUnrestricted Unrestricted access (system) or restricted access
724 * (user)?
725 * @param ppSession Where to store the pointer to the session data.
726 */
727int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
728{
729 int rc;
730 PSUPDRVSESSION pSession;
731
732 if (!SUP_IS_DEVEXT_VALID(pDevExt))
733 return VERR_INVALID_PARAMETER;
734
735 /*
736 * Allocate memory for the session data.
737 */
738 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
739 if (pSession)
740 {
741 /* Initialize session data. */
742 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
743 if (!rc)
744 {
745 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
746 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
747 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
748 if (RT_SUCCESS(rc))
749 {
750 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
751 pSession->pDevExt = pDevExt;
752 pSession->u32Cookie = BIRD_INV;
753 pSession->fUnrestricted = fUnrestricted;
754 /*pSession->fInHashTable = false; */
755 pSession->cRefs = 1;
756 /*pSession->pCommonNextHash = NULL;
757 pSession->ppOsSessionPtr = NULL; */
758 if (fUser)
759 {
760 pSession->Process = RTProcSelf();
761 pSession->R0Process = RTR0ProcHandleSelf();
762 }
763 else
764 {
765 pSession->Process = NIL_RTPROCESS;
766 pSession->R0Process = NIL_RTR0PROCESS;
767 }
768 /*pSession->pLdrUsage = NULL;
769 pSession->pVM = NULL;
770 pSession->pUsage = NULL;
771 pSession->pGip = NULL;
772 pSession->fGipReferenced = false;
773 pSession->Bundle.cUsed = 0; */
774 pSession->Uid = NIL_RTUID;
775 pSession->Gid = NIL_RTGID;
776 /*pSession->uTracerData = 0;*/
777 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
778 RTListInit(&pSession->TpProviders);
779 /*pSession->cTpProviders = 0;*/
780 /*pSession->cTpProbesFiring = 0;*/
781 RTListInit(&pSession->TpUmods);
782 /*RT_ZERO(pSession->apTpLookupTable);*/
783
784 VBOXDRV_SESSION_CREATE(pSession, fUser);
785 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
786 return VINF_SUCCESS;
787 }
788
789 RTSpinlockDestroy(pSession->Spinlock);
790 }
791 RTMemFree(pSession);
792 *ppSession = NULL;
793 Log(("Failed to create spinlock, rc=%d!\n", rc));
794 }
795 else
796 rc = VERR_NO_MEMORY;
797
798 return rc;
799}
800
801
802/**
803 * Cleans up the session in the context of the process to which it belongs, the
804 * caller will free the session and the session spinlock.
805 *
806 * This should normally occur when the session is closed or as the process
807 * exits. Careful reference counting in the OS specfic code makes sure that
808 * there cannot be any races between process/handle cleanup callbacks and
809 * threads doing I/O control calls.
810 *
811 * @param pDevExt The device extension.
812 * @param pSession Session data.
813 */
814static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
815{
816 int rc;
817 PSUPDRVBUNDLE pBundle;
818 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
819
820 Assert(!pSession->fInHashTable);
821 Assert(!pSession->ppOsSessionPtr);
822 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
823 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
824
825 /*
826 * Remove logger instances related to this session.
827 */
828 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
829
830 /*
831 * Destroy the handle table.
832 */
833 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
834 AssertRC(rc);
835 pSession->hHandleTable = NIL_RTHANDLETABLE;
836
837 /*
838 * Release object references made in this session.
839 * In theory there should be noone racing us in this session.
840 */
841 Log2(("release objects - start\n"));
842 if (pSession->pUsage)
843 {
844 PSUPDRVUSAGE pUsage;
845 RTSpinlockAcquire(pDevExt->Spinlock);
846
847 while ((pUsage = pSession->pUsage) != NULL)
848 {
849 PSUPDRVOBJ pObj = pUsage->pObj;
850 pSession->pUsage = pUsage->pNext;
851
852 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
853 if (pUsage->cUsage < pObj->cUsage)
854 {
855 pObj->cUsage -= pUsage->cUsage;
856 RTSpinlockRelease(pDevExt->Spinlock);
857 }
858 else
859 {
860 /* Destroy the object and free the record. */
861 if (pDevExt->pObjs == pObj)
862 pDevExt->pObjs = pObj->pNext;
863 else
864 {
865 PSUPDRVOBJ pObjPrev;
866 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
867 if (pObjPrev->pNext == pObj)
868 {
869 pObjPrev->pNext = pObj->pNext;
870 break;
871 }
872 Assert(pObjPrev);
873 }
874 RTSpinlockRelease(pDevExt->Spinlock);
875
876 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
877 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
878 if (pObj->pfnDestructor)
879 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
880 RTMemFree(pObj);
881 }
882
883 /* free it and continue. */
884 RTMemFree(pUsage);
885
886 RTSpinlockAcquire(pDevExt->Spinlock);
887 }
888
889 RTSpinlockRelease(pDevExt->Spinlock);
890 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
891 }
892 Log2(("release objects - done\n"));
893
894 /*
895 * Do tracer cleanups related to this session.
896 */
897 Log2(("release tracer stuff - start\n"));
898 supdrvTracerCleanupSession(pDevExt, pSession);
899 Log2(("release tracer stuff - end\n"));
900
901 /*
902 * Release memory allocated in the session.
903 *
904 * We do not serialize this as we assume that the application will
905 * not allocated memory while closing the file handle object.
906 */
907 Log2(("freeing memory:\n"));
908 pBundle = &pSession->Bundle;
909 while (pBundle)
910 {
911 PSUPDRVBUNDLE pToFree;
912 unsigned i;
913
914 /*
915 * Check and unlock all entries in the bundle.
916 */
917 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
918 {
919 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
920 {
921 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
922 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
923 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
924 {
925 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
926 AssertRC(rc); /** @todo figure out how to handle this. */
927 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
928 }
929 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
930 AssertRC(rc); /** @todo figure out how to handle this. */
931 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
932 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
933 }
934 }
935
936 /*
937 * Advance and free previous bundle.
938 */
939 pToFree = pBundle;
940 pBundle = pBundle->pNext;
941
942 pToFree->pNext = NULL;
943 pToFree->cUsed = 0;
944 if (pToFree != &pSession->Bundle)
945 RTMemFree(pToFree);
946 }
947 Log2(("freeing memory - done\n"));
948
949 /*
950 * Deregister component factories.
951 */
952 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
953 Log2(("deregistering component factories:\n"));
954 if (pDevExt->pComponentFactoryHead)
955 {
956 PSUPDRVFACTORYREG pPrev = NULL;
957 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
958 while (pCur)
959 {
960 if (pCur->pSession == pSession)
961 {
962 /* unlink it */
963 PSUPDRVFACTORYREG pNext = pCur->pNext;
964 if (pPrev)
965 pPrev->pNext = pNext;
966 else
967 pDevExt->pComponentFactoryHead = pNext;
968
969 /* free it */
970 pCur->pNext = NULL;
971 pCur->pSession = NULL;
972 pCur->pFactory = NULL;
973 RTMemFree(pCur);
974
975 /* next */
976 pCur = pNext;
977 }
978 else
979 {
980 /* next */
981 pPrev = pCur;
982 pCur = pCur->pNext;
983 }
984 }
985 }
986 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
987 Log2(("deregistering component factories - done\n"));
988
989 /*
990 * Loaded images needs to be dereferenced and possibly freed up.
991 */
992 supdrvLdrLock(pDevExt);
993 Log2(("freeing images:\n"));
994 if (pSession->pLdrUsage)
995 {
996 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
997 pSession->pLdrUsage = NULL;
998 while (pUsage)
999 {
1000 void *pvFree = pUsage;
1001 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1002 if (pImage->cUsage > pUsage->cUsage)
1003 pImage->cUsage -= pUsage->cUsage;
1004 else
1005 supdrvLdrFree(pDevExt, pImage);
1006 pUsage->pImage = NULL;
1007 pUsage = pUsage->pNext;
1008 RTMemFree(pvFree);
1009 }
1010 }
1011 supdrvLdrUnlock(pDevExt);
1012 Log2(("freeing images - done\n"));
1013
1014 /*
1015 * Unmap the GIP.
1016 */
1017 Log2(("umapping GIP:\n"));
1018 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1019 {
1020 SUPR0GipUnmap(pSession);
1021 pSession->fGipReferenced = 0;
1022 }
1023 Log2(("umapping GIP - done\n"));
1024}
1025
1026
1027/**
1028 * Common code for freeing a session when the reference count reaches zero.
1029 *
1030 * @param pDevExt Device extension.
1031 * @param pSession Session data.
1032 * This data will be freed by this routine.
1033 */
1034static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1035{
1036 VBOXDRV_SESSION_CLOSE(pSession);
1037
1038 /*
1039 * Cleanup the session first.
1040 */
1041 supdrvCleanupSession(pDevExt, pSession);
1042 supdrvOSCleanupSession(pDevExt, pSession);
1043
1044 /*
1045 * Free the rest of the session stuff.
1046 */
1047 RTSpinlockDestroy(pSession->Spinlock);
1048 pSession->Spinlock = NIL_RTSPINLOCK;
1049 pSession->pDevExt = NULL;
1050 RTMemFree(pSession);
1051 LogFlow(("supdrvDestroySession: returns\n"));
1052}
1053
1054
1055/**
1056 * Inserts the session into the global hash table.
1057 *
1058 * @retval VINF_SUCCESS on success.
1059 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1060 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1061 * session (asserted).
1062 * @retval VERR_DUPLICATE if there is already a session for that pid.
1063 *
1064 * @param pDevExt The device extension.
1065 * @param pSession The session.
1066 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1067 * available and used. This will set to point to the
1068 * session while under the protection of the session
1069 * hash table spinlock. It will also be kept in
1070 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1071 * cleanup use.
1072 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1073 */
1074int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1075 void *pvUser)
1076{
1077 PSUPDRVSESSION pCur;
1078 unsigned iHash;
1079
1080 /*
1081 * Validate input.
1082 */
1083 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1084 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1085
1086 /*
1087 * Calculate the hash table index and acquire the spinlock.
1088 */
1089 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1090
1091 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1092
1093 /*
1094 * If there are a collisions, we need to carefully check if we got a
1095 * duplicate. There can only be one open session per process.
1096 */
1097 pCur = pDevExt->apSessionHashTab[iHash];
1098 if (pCur)
1099 {
1100 while (pCur && pCur->Process != pSession->Process)
1101 pCur = pCur->pCommonNextHash;
1102
1103 if (pCur)
1104 {
1105 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1106 if (pCur == pSession)
1107 {
1108 Assert(pSession->fInHashTable);
1109 AssertFailed();
1110 return VERR_WRONG_ORDER;
1111 }
1112 Assert(!pSession->fInHashTable);
1113 if (pCur->R0Process == pSession->R0Process)
1114 return VERR_RESOURCE_IN_USE;
1115 return VERR_DUPLICATE;
1116 }
1117 }
1118 Assert(!pSession->fInHashTable);
1119 Assert(!pSession->ppOsSessionPtr);
1120
1121 /*
1122 * Insert it, doing a callout to the OS specific code in case it has
1123 * anything it wishes to do while we're holding the spinlock.
1124 */
1125 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1126 pDevExt->apSessionHashTab[iHash] = pSession;
1127 pSession->fInHashTable = true;
1128 ASMAtomicIncS32(&pDevExt->cSessions);
1129
1130 pSession->ppOsSessionPtr = ppOsSessionPtr;
1131 if (ppOsSessionPtr)
1132 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1133
1134 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1135
1136 /*
1137 * Retain a reference for the pointer in the session table.
1138 */
1139 ASMAtomicIncU32(&pSession->cRefs);
1140
1141 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1142 return VINF_SUCCESS;
1143}
1144
1145
1146/**
1147 * Removes the session from the global hash table.
1148 *
1149 * @retval VINF_SUCCESS on success.
1150 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1151 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1152 * session (asserted).
1153 *
1154 * @param pDevExt The device extension.
1155 * @param pSession The session. The caller is expected to have a reference
1156 * to this so it won't croak on us when we release the hash
1157 * table reference.
1158 * @param pvUser OS specific context value for the
1159 * supdrvOSSessionHashTabInserted callback.
1160 */
1161int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1162{
1163 PSUPDRVSESSION pCur;
1164 unsigned iHash;
1165 int32_t cRefs;
1166
1167 /*
1168 * Validate input.
1169 */
1170 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1171 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1172
1173 /*
1174 * Calculate the hash table index and acquire the spinlock.
1175 */
1176 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1177
1178 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1179
1180 /*
1181 * Unlink it.
1182 */
1183 pCur = pDevExt->apSessionHashTab[iHash];
1184 if (pCur == pSession)
1185 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1186 else
1187 {
1188 PSUPDRVSESSION pPrev = pCur;
1189 while (pCur && pCur != pSession)
1190 {
1191 pPrev = pCur;
1192 pCur = pCur->pCommonNextHash;
1193 }
1194 if (pCur)
1195 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1196 else
1197 {
1198 Assert(!pSession->fInHashTable);
1199 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1200 return VERR_NOT_FOUND;
1201 }
1202 }
1203
1204 pSession->pCommonNextHash = NULL;
1205 pSession->fInHashTable = false;
1206
1207 ASMAtomicDecS32(&pDevExt->cSessions);
1208
1209 /*
1210 * Clear OS specific session pointer if available and do the OS callback.
1211 */
1212 if (pSession->ppOsSessionPtr)
1213 {
1214 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1215 pSession->ppOsSessionPtr = NULL;
1216 }
1217
1218 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1219
1220 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1221
1222 /*
1223 * Drop the reference the hash table had to the session. This shouldn't
1224 * be the last reference!
1225 */
1226 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1227 Assert(cRefs > 0 && cRefs < _1M);
1228 if (cRefs == 0)
1229 supdrvDestroySession(pDevExt, pSession);
1230
1231 return VINF_SUCCESS;
1232}
1233
1234
1235/**
1236 * Looks up the session for the current process in the global hash table or in
1237 * OS specific pointer.
1238 *
1239 * @returns Pointer to the session with a reference that the caller must
1240 * release. If no valid session was found, NULL is returned.
1241 *
1242 * @param pDevExt The device extension.
1243 * @param Process The process ID.
1244 * @param R0Process The ring-0 process handle.
1245 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1246 * this is used instead of the hash table. For
1247 * additional safety it must then be equal to the
1248 * SUPDRVSESSION::ppOsSessionPtr member.
1249 * This can be NULL even if the OS has a session
1250 * pointer.
1251 */
1252PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1253 PSUPDRVSESSION *ppOsSessionPtr)
1254{
1255 PSUPDRVSESSION pCur;
1256 unsigned iHash;
1257
1258 /*
1259 * Validate input.
1260 */
1261 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1262
1263 /*
1264 * Calculate the hash table index and acquire the spinlock.
1265 */
1266 iHash = SUPDRV_SESSION_HASH(Process);
1267
1268 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1269
1270 /*
1271 * If an OS session pointer is provided, always use it.
1272 */
1273 if (ppOsSessionPtr)
1274 {
1275 pCur = *ppOsSessionPtr;
1276 if ( pCur
1277 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1278 || pCur->Process != Process
1279 || pCur->R0Process != R0Process) )
1280 pCur = NULL;
1281 }
1282 else
1283 {
1284 /*
1285 * Otherwise, do the hash table lookup.
1286 */
1287 pCur = pDevExt->apSessionHashTab[iHash];
1288 while ( pCur
1289 && ( pCur->Process != Process
1290 || pCur->R0Process != R0Process) )
1291 pCur = pCur->pCommonNextHash;
1292 }
1293
1294 /*
1295 * Retain the session.
1296 */
1297 if (pCur)
1298 {
1299 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1300 NOREF(cRefs);
1301 Assert(cRefs > 1 && cRefs < _1M);
1302 }
1303
1304 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1305
1306 return pCur;
1307}
1308
1309
1310/**
1311 * Retain a session to make sure it doesn't go away while it is in use.
1312 *
1313 * @returns New reference count on success, UINT32_MAX on failure.
1314 * @param pSession Session data.
1315 */
1316uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1317{
1318 uint32_t cRefs;
1319 AssertPtrReturn(pSession, UINT32_MAX);
1320 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1321
1322 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1323 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1324 return cRefs;
1325}
1326
1327
1328/**
1329 * Releases a given session.
1330 *
1331 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1332 * @param pSession Session data.
1333 */
1334uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1335{
1336 uint32_t cRefs;
1337 AssertPtrReturn(pSession, UINT32_MAX);
1338 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1339
1340 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1341 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1342 if (cRefs == 0)
1343 supdrvDestroySession(pSession->pDevExt, pSession);
1344 return cRefs;
1345}
1346
1347
1348/**
1349 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1350 *
1351 * @returns IPRT status code, see SUPR0ObjAddRef.
1352 * @param hHandleTable The handle table handle. Ignored.
1353 * @param pvObj The object pointer.
1354 * @param pvCtx Context, the handle type. Ignored.
1355 * @param pvUser Session pointer.
1356 */
1357static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1358{
1359 NOREF(pvCtx);
1360 NOREF(hHandleTable);
1361 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1362}
1363
1364
1365/**
1366 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1367 *
1368 * @param hHandleTable The handle table handle. Ignored.
1369 * @param h The handle value. Ignored.
1370 * @param pvObj The object pointer.
1371 * @param pvCtx Context, the handle type. Ignored.
1372 * @param pvUser Session pointer.
1373 */
1374static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1375{
1376 NOREF(pvCtx);
1377 NOREF(h);
1378 NOREF(hHandleTable);
1379 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1380}
1381
1382
1383/**
1384 * Fast path I/O Control worker.
1385 *
1386 * @returns VBox status code that should be passed down to ring-3 unchanged.
1387 * @param uIOCtl Function number.
1388 * @param idCpu VMCPU id.
1389 * @param pDevExt Device extention.
1390 * @param pSession Session data.
1391 */
1392int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1393{
1394 /*
1395 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1396 */
1397 if (RT_LIKELY( RT_VALID_PTR(pSession)
1398 && pSession->pVM
1399 && pDevExt->pfnVMMR0EntryFast))
1400 {
1401 switch (uIOCtl)
1402 {
1403 case SUP_IOCTL_FAST_DO_RAW_RUN:
1404 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1405 break;
1406 case SUP_IOCTL_FAST_DO_HM_RUN:
1407 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1408 break;
1409 case SUP_IOCTL_FAST_DO_NOP:
1410 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1411 break;
1412 default:
1413 return VERR_INTERNAL_ERROR;
1414 }
1415 return VINF_SUCCESS;
1416 }
1417 return VERR_INTERNAL_ERROR;
1418}
1419
1420
1421/**
1422 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1423 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1424 * list, see http://www.kerneldrivers.org/RHEL5.
1425 *
1426 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1427 * @param pszStr String to check
1428 * @param pszChars Character set
1429 */
1430static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1431{
1432 int chCur;
1433 while ((chCur = *pszStr++) != '\0')
1434 {
1435 int ch;
1436 const char *psz = pszChars;
1437 while ((ch = *psz++) != '\0')
1438 if (ch == chCur)
1439 return 1;
1440
1441 }
1442 return 0;
1443}
1444
1445
1446
1447/**
1448 * I/O Control inner worker (tracing reasons).
1449 *
1450 * @returns IPRT status code.
1451 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1452 *
1453 * @param uIOCtl Function number.
1454 * @param pDevExt Device extention.
1455 * @param pSession Session data.
1456 * @param pReqHdr The request header.
1457 */
1458static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1459{
1460 /*
1461 * Validation macros
1462 */
1463#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1464 do { \
1465 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1466 { \
1467 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1468 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1469 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1470 } \
1471 } while (0)
1472
1473#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1474
1475#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1476 do { \
1477 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1478 { \
1479 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1480 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1481 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1482 } \
1483 } while (0)
1484
1485#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1486 do { \
1487 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1488 { \
1489 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1490 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1491 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1492 } \
1493 } while (0)
1494
1495#define REQ_CHECK_EXPR(Name, expr) \
1496 do { \
1497 if (RT_UNLIKELY(!(expr))) \
1498 { \
1499 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1500 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1501 } \
1502 } while (0)
1503
1504#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1505 do { \
1506 if (RT_UNLIKELY(!(expr))) \
1507 { \
1508 OSDBGPRINT( fmt ); \
1509 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1510 } \
1511 } while (0)
1512
1513 /*
1514 * The switch.
1515 */
1516 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1517 {
1518 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1519 {
1520 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1521 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1522 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1523 {
1524 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1525 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1526 return 0;
1527 }
1528
1529#if 0
1530 /*
1531 * Call out to the OS specific code and let it do permission checks on the
1532 * client process.
1533 */
1534 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1535 {
1536 pReq->u.Out.u32Cookie = 0xffffffff;
1537 pReq->u.Out.u32SessionCookie = 0xffffffff;
1538 pReq->u.Out.u32SessionVersion = 0xffffffff;
1539 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1540 pReq->u.Out.pSession = NULL;
1541 pReq->u.Out.cFunctions = 0;
1542 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1543 return 0;
1544 }
1545#endif
1546
1547 /*
1548 * Match the version.
1549 * The current logic is very simple, match the major interface version.
1550 */
1551 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1552 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1553 {
1554 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1555 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1556 pReq->u.Out.u32Cookie = 0xffffffff;
1557 pReq->u.Out.u32SessionCookie = 0xffffffff;
1558 pReq->u.Out.u32SessionVersion = 0xffffffff;
1559 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1560 pReq->u.Out.pSession = NULL;
1561 pReq->u.Out.cFunctions = 0;
1562 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1563 return 0;
1564 }
1565
1566 /*
1567 * Fill in return data and be gone.
1568 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1569 * u32SessionVersion <= u32ReqVersion!
1570 */
1571 /** @todo Somehow validate the client and negotiate a secure cookie... */
1572 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1573 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1574 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1575 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1576 pReq->u.Out.pSession = pSession;
1577 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1578 pReq->Hdr.rc = VINF_SUCCESS;
1579 return 0;
1580 }
1581
1582 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1583 {
1584 /* validate */
1585 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1586 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1587
1588 /* execute */
1589 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1590 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1591 pReq->Hdr.rc = VINF_SUCCESS;
1592 return 0;
1593 }
1594
1595 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1596 {
1597 /* validate */
1598 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1599 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1600 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1601 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1602 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1603
1604 /* execute */
1605 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1606 if (RT_FAILURE(pReq->Hdr.rc))
1607 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1608 return 0;
1609 }
1610
1611 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1612 {
1613 /* validate */
1614 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1615 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1616
1617 /* execute */
1618 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1619 return 0;
1620 }
1621
1622 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1623 {
1624 /* validate */
1625 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1626 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1627
1628 /* execute */
1629 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1630 if (RT_FAILURE(pReq->Hdr.rc))
1631 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1632 return 0;
1633 }
1634
1635 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1636 {
1637 /* validate */
1638 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1639 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1640
1641 /* execute */
1642 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1643 return 0;
1644 }
1645
1646 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1647 {
1648 /* validate */
1649 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1650 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1651 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1652 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1653 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1654 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1655 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1656 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1657 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1658 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1659 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1660
1661 /* execute */
1662 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1663 return 0;
1664 }
1665
1666 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1667 {
1668 /* validate */
1669 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1670 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1671 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1672 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1673 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1674 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1675 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1676 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1677 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1678 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1679 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1680 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1681 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1682 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1683 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1684
1685 if (pReq->u.In.cSymbols)
1686 {
1687 uint32_t i;
1688 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1689 for (i = 0; i < pReq->u.In.cSymbols; i++)
1690 {
1691 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1692 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1693 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1694 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1695 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1696 pReq->u.In.cbStrTab - paSyms[i].offName),
1697 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1698 }
1699 }
1700
1701 /* execute */
1702 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1703 return 0;
1704 }
1705
1706 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1707 {
1708 /* validate */
1709 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1710 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1711
1712 /* execute */
1713 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1714 return 0;
1715 }
1716
1717 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1718 {
1719 /* validate */
1720 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1721 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1722 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1723
1724 /* execute */
1725 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1726 return 0;
1727 }
1728
1729 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1730 {
1731 /* validate */
1732 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1733 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1734 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1735
1736 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1737 {
1738 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1739
1740 /* execute */
1741 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1742 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1743 else
1744 pReq->Hdr.rc = VERR_WRONG_ORDER;
1745 }
1746 else
1747 {
1748 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1749 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1750 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1751 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1752 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1753
1754 /* execute */
1755 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1756 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1757 else
1758 pReq->Hdr.rc = VERR_WRONG_ORDER;
1759 }
1760
1761 if ( RT_FAILURE(pReq->Hdr.rc)
1762 && pReq->Hdr.rc != VERR_INTERRUPTED
1763 && pReq->Hdr.rc != VERR_TIMEOUT)
1764 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1765 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1766 else
1767 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1768 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1769 return 0;
1770 }
1771
1772 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1773 {
1774 /* validate */
1775 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1776 PSUPVMMR0REQHDR pVMMReq;
1777 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1778 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1779
1780 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1781 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1782 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1783 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1784 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1785
1786 /* execute */
1787 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1788 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1789 else
1790 pReq->Hdr.rc = VERR_WRONG_ORDER;
1791
1792 if ( RT_FAILURE(pReq->Hdr.rc)
1793 && pReq->Hdr.rc != VERR_INTERRUPTED
1794 && pReq->Hdr.rc != VERR_TIMEOUT)
1795 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1796 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1797 else
1798 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1799 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1800 return 0;
1801 }
1802
1803 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1804 {
1805 /* validate */
1806 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1807 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1808
1809 /* execute */
1810 pReq->Hdr.rc = VINF_SUCCESS;
1811 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1812 return 0;
1813 }
1814
1815 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1816 {
1817 /* validate */
1818 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1819 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1820 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1821
1822 /* execute */
1823 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1824 if (RT_FAILURE(pReq->Hdr.rc))
1825 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1826 return 0;
1827 }
1828
1829 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1830 {
1831 /* validate */
1832 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1833 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1834
1835 /* execute */
1836 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1837 return 0;
1838 }
1839
1840 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1841 {
1842 /* validate */
1843 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1844 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1845
1846 /* execute */
1847 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1848 if (RT_SUCCESS(pReq->Hdr.rc))
1849 pReq->u.Out.pGipR0 = pDevExt->pGip;
1850 return 0;
1851 }
1852
1853 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1854 {
1855 /* validate */
1856 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1857 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1858
1859 /* execute */
1860 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1861 return 0;
1862 }
1863
1864 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1865 {
1866 /* validate */
1867 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1868 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1869 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1870 || ( VALID_PTR(pReq->u.In.pVMR0)
1871 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1872 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1873 /* execute */
1874 pSession->pVM = pReq->u.In.pVMR0;
1875 pReq->Hdr.rc = VINF_SUCCESS;
1876 return 0;
1877 }
1878
1879 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1880 {
1881 /* validate */
1882 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1883 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1884 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1885 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1886 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1887 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1888 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1889 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1890 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1891
1892 /* execute */
1893 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1894 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1895 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1896 &pReq->u.Out.aPages[0]);
1897 if (RT_FAILURE(pReq->Hdr.rc))
1898 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1899 return 0;
1900 }
1901
1902 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1903 {
1904 /* validate */
1905 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1906 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1907 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1908 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1909 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1910 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1911
1912 /* execute */
1913 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1914 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1915 if (RT_FAILURE(pReq->Hdr.rc))
1916 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1917 return 0;
1918 }
1919
1920 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1921 {
1922 /* validate */
1923 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1924 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1925 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1926 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1927 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1928 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1929 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1930
1931 /* execute */
1932 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1933 return 0;
1934 }
1935
1936 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1937 {
1938 /* validate */
1939 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1940 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1941
1942 /* execute */
1943 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1944 return 0;
1945 }
1946
1947 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1948 {
1949 /* validate */
1950 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1951 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1952 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1953
1954 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1955 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1956 else
1957 {
1958 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1959 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1960 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1961 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1962 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1963 }
1964 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1965
1966 /* execute */
1967 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1968 return 0;
1969 }
1970
1971 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1972 {
1973 /* validate */
1974 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1975 size_t cbStrTab;
1976 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1977 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
1978 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
1979 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
1980 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
1981 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
1982 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
1983 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
1984 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
1985 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
1986 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
1987
1988 /* execute */
1989 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
1990 return 0;
1991 }
1992
1993 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
1994 {
1995 /* validate */
1996 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
1997 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
1998 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
1999
2000 /* execute */
2001 switch (pReq->u.In.uType)
2002 {
2003 case SUP_SEM_TYPE_EVENT:
2004 {
2005 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2006 switch (pReq->u.In.uOp)
2007 {
2008 case SUPSEMOP2_WAIT_MS_REL:
2009 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2010 break;
2011 case SUPSEMOP2_WAIT_NS_ABS:
2012 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2013 break;
2014 case SUPSEMOP2_WAIT_NS_REL:
2015 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2016 break;
2017 case SUPSEMOP2_SIGNAL:
2018 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2019 break;
2020 case SUPSEMOP2_CLOSE:
2021 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2022 break;
2023 case SUPSEMOP2_RESET:
2024 default:
2025 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2026 break;
2027 }
2028 break;
2029 }
2030
2031 case SUP_SEM_TYPE_EVENT_MULTI:
2032 {
2033 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2034 switch (pReq->u.In.uOp)
2035 {
2036 case SUPSEMOP2_WAIT_MS_REL:
2037 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2038 break;
2039 case SUPSEMOP2_WAIT_NS_ABS:
2040 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2041 break;
2042 case SUPSEMOP2_WAIT_NS_REL:
2043 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2044 break;
2045 case SUPSEMOP2_SIGNAL:
2046 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2047 break;
2048 case SUPSEMOP2_CLOSE:
2049 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2050 break;
2051 case SUPSEMOP2_RESET:
2052 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2053 break;
2054 default:
2055 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2056 break;
2057 }
2058 break;
2059 }
2060
2061 default:
2062 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2063 break;
2064 }
2065 return 0;
2066 }
2067
2068 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2069 {
2070 /* validate */
2071 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2072 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2073 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2074
2075 /* execute */
2076 switch (pReq->u.In.uType)
2077 {
2078 case SUP_SEM_TYPE_EVENT:
2079 {
2080 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2081 switch (pReq->u.In.uOp)
2082 {
2083 case SUPSEMOP3_CREATE:
2084 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2085 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2086 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2087 break;
2088 case SUPSEMOP3_GET_RESOLUTION:
2089 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2090 pReq->Hdr.rc = VINF_SUCCESS;
2091 pReq->Hdr.cbOut = sizeof(*pReq);
2092 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2093 break;
2094 default:
2095 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2096 break;
2097 }
2098 break;
2099 }
2100
2101 case SUP_SEM_TYPE_EVENT_MULTI:
2102 {
2103 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2104 switch (pReq->u.In.uOp)
2105 {
2106 case SUPSEMOP3_CREATE:
2107 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2108 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2109 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2110 break;
2111 case SUPSEMOP3_GET_RESOLUTION:
2112 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2113 pReq->Hdr.rc = VINF_SUCCESS;
2114 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2115 break;
2116 default:
2117 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2118 break;
2119 }
2120 break;
2121 }
2122
2123 default:
2124 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2125 break;
2126 }
2127 return 0;
2128 }
2129
2130 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2131 {
2132 /* validate */
2133 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2134 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2135
2136 /* execute */
2137 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2138 if (RT_FAILURE(pReq->Hdr.rc))
2139 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2140 return 0;
2141 }
2142
2143 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2144 {
2145 /* validate */
2146 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2147 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2148
2149 /* execute */
2150 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2151 return 0;
2152 }
2153
2154 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2155 {
2156 /* validate */
2157 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2158
2159 /* execute */
2160 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2161 return 0;
2162 }
2163
2164 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2165 {
2166 /* validate */
2167 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2168 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2169
2170 /* execute */
2171 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2172 return 0;
2173 }
2174
2175 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2176 {
2177 /* validate */
2178 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2179 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2180 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2181 return VERR_INVALID_PARAMETER;
2182
2183 /* execute */
2184 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2185 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2186 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2187 pReq->u.In.szName, pReq->u.In.fFlags);
2188 return 0;
2189 }
2190
2191 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2192 {
2193 /* validate */
2194 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2195 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2196
2197 /* execute */
2198 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2199 return 0;
2200 }
2201
2202 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2203 {
2204 /* validate */
2205 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2206 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2207
2208 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2209 pReqHdr->rc = VINF_SUCCESS;
2210 return 0;
2211 }
2212
2213 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2214 {
2215 /* validate */
2216 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2217 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2218 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2219 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2220
2221 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2222 return 0;
2223 }
2224
2225 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2226 {
2227 /* validate */
2228 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2229
2230 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2231 return 0;
2232 }
2233
2234 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2235 {
2236 /* validate */
2237 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2238 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2239
2240 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pReq);
2241 return 0;
2242 }
2243
2244 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2245 {
2246 /* validate */
2247 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2248 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2249
2250 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pReq);
2251 return 0;
2252 }
2253
2254 default:
2255 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2256 break;
2257 }
2258 return VERR_GENERAL_FAILURE;
2259}
2260
2261
2262/**
2263 * I/O Control inner worker for the restricted operations.
2264 *
2265 * @returns IPRT status code.
2266 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2267 *
2268 * @param uIOCtl Function number.
2269 * @param pDevExt Device extention.
2270 * @param pSession Session data.
2271 * @param pReqHdr The request header.
2272 */
2273static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2274{
2275 /*
2276 * The switch.
2277 */
2278 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2279 {
2280 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2281 {
2282 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2283 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2284 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2285 {
2286 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2287 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2288 return 0;
2289 }
2290
2291 /*
2292 * Match the version.
2293 * The current logic is very simple, match the major interface version.
2294 */
2295 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2296 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2297 {
2298 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2299 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2300 pReq->u.Out.u32Cookie = 0xffffffff;
2301 pReq->u.Out.u32SessionCookie = 0xffffffff;
2302 pReq->u.Out.u32SessionVersion = 0xffffffff;
2303 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2304 pReq->u.Out.pSession = NULL;
2305 pReq->u.Out.cFunctions = 0;
2306 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2307 return 0;
2308 }
2309
2310 /*
2311 * Fill in return data and be gone.
2312 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2313 * u32SessionVersion <= u32ReqVersion!
2314 */
2315 /** @todo Somehow validate the client and negotiate a secure cookie... */
2316 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2317 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2318 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2319 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2320 pReq->u.Out.pSession = pSession;
2321 pReq->u.Out.cFunctions = 0;
2322 pReq->Hdr.rc = VINF_SUCCESS;
2323 return 0;
2324 }
2325
2326 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2327 {
2328 /* validate */
2329 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2330 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2331
2332 /* execute */
2333 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2334 if (RT_FAILURE(pReq->Hdr.rc))
2335 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2336 return 0;
2337 }
2338
2339 default:
2340 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2341 break;
2342 }
2343 return VERR_GENERAL_FAILURE;
2344}
2345
2346
2347/**
2348 * I/O Control worker.
2349 *
2350 * @returns IPRT status code.
2351 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2352 *
2353 * @param uIOCtl Function number.
2354 * @param pDevExt Device extention.
2355 * @param pSession Session data.
2356 * @param pReqHdr The request header.
2357 */
2358int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2359{
2360 int rc;
2361 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2362
2363 /*
2364 * Validate the request.
2365 */
2366 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2367 {
2368 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2369 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2370 return VERR_INVALID_PARAMETER;
2371 }
2372 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2373 || pReqHdr->cbIn < sizeof(*pReqHdr)
2374 || pReqHdr->cbIn > cbReq
2375 || pReqHdr->cbOut < sizeof(*pReqHdr)
2376 || pReqHdr->cbOut > cbReq))
2377 {
2378 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2379 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2380 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2381 return VERR_INVALID_PARAMETER;
2382 }
2383 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2384 {
2385 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2386 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2387 return VERR_INVALID_PARAMETER;
2388 }
2389 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2390 {
2391 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2392 {
2393 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2394 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2395 return VERR_INVALID_PARAMETER;
2396 }
2397 }
2398 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2399 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2400 {
2401 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2402 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2403 return VERR_INVALID_PARAMETER;
2404 }
2405
2406 /*
2407 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2408 */
2409 if (pSession->fUnrestricted)
2410 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2411 else
2412 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2413
2414 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2415 return rc;
2416}
2417
2418
2419/**
2420 * Inter-Driver Communication (IDC) worker.
2421 *
2422 * @returns VBox status code.
2423 * @retval VINF_SUCCESS on success.
2424 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2425 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2426 *
2427 * @param uReq The request (function) code.
2428 * @param pDevExt Device extention.
2429 * @param pSession Session data.
2430 * @param pReqHdr The request header.
2431 */
2432int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2433{
2434 /*
2435 * The OS specific code has already validated the pSession
2436 * pointer, and the request size being greater or equal to
2437 * size of the header.
2438 *
2439 * So, just check that pSession is a kernel context session.
2440 */
2441 if (RT_UNLIKELY( pSession
2442 && pSession->R0Process != NIL_RTR0PROCESS))
2443 return VERR_INVALID_PARAMETER;
2444
2445/*
2446 * Validation macro.
2447 */
2448#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2449 do { \
2450 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2451 { \
2452 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2453 (long)pReqHdr->cb, (long)(cbExpect))); \
2454 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2455 } \
2456 } while (0)
2457
2458 switch (uReq)
2459 {
2460 case SUPDRV_IDC_REQ_CONNECT:
2461 {
2462 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2463 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2464
2465 /*
2466 * Validate the cookie and other input.
2467 */
2468 if (pReq->Hdr.pSession != NULL)
2469 {
2470 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2471 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2472 }
2473 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2474 {
2475 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2476 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2477 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2478 }
2479 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2480 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2481 {
2482 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2483 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2484 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2485 }
2486 if (pSession != NULL)
2487 {
2488 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2489 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2490 }
2491
2492 /*
2493 * Match the version.
2494 * The current logic is very simple, match the major interface version.
2495 */
2496 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2497 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2498 {
2499 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2500 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2501 pReq->u.Out.pSession = NULL;
2502 pReq->u.Out.uSessionVersion = 0xffffffff;
2503 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2504 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2505 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2506 return VINF_SUCCESS;
2507 }
2508
2509 pReq->u.Out.pSession = NULL;
2510 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2511 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2512 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2513
2514 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2515 if (RT_FAILURE(pReq->Hdr.rc))
2516 {
2517 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2518 return VINF_SUCCESS;
2519 }
2520
2521 pReq->u.Out.pSession = pSession;
2522 pReq->Hdr.pSession = pSession;
2523
2524 return VINF_SUCCESS;
2525 }
2526
2527 case SUPDRV_IDC_REQ_DISCONNECT:
2528 {
2529 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2530
2531 supdrvSessionRelease(pSession);
2532 return pReqHdr->rc = VINF_SUCCESS;
2533 }
2534
2535 case SUPDRV_IDC_REQ_GET_SYMBOL:
2536 {
2537 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2538 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2539
2540 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2541 return VINF_SUCCESS;
2542 }
2543
2544 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2545 {
2546 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2547 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2548
2549 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2550 return VINF_SUCCESS;
2551 }
2552
2553 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2554 {
2555 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2556 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2557
2558 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2559 return VINF_SUCCESS;
2560 }
2561
2562 default:
2563 Log(("Unknown IDC %#lx\n", (long)uReq));
2564 break;
2565 }
2566
2567#undef REQ_CHECK_IDC_SIZE
2568 return VERR_NOT_SUPPORTED;
2569}
2570
2571
2572/**
2573 * Register a object for reference counting.
2574 * The object is registered with one reference in the specified session.
2575 *
2576 * @returns Unique identifier on success (pointer).
2577 * All future reference must use this identifier.
2578 * @returns NULL on failure.
2579 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2580 * @param pvUser1 The first user argument.
2581 * @param pvUser2 The second user argument.
2582 */
2583SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2584{
2585 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2586 PSUPDRVOBJ pObj;
2587 PSUPDRVUSAGE pUsage;
2588
2589 /*
2590 * Validate the input.
2591 */
2592 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2593 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2594 AssertPtrReturn(pfnDestructor, NULL);
2595
2596 /*
2597 * Allocate and initialize the object.
2598 */
2599 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2600 if (!pObj)
2601 return NULL;
2602 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2603 pObj->enmType = enmType;
2604 pObj->pNext = NULL;
2605 pObj->cUsage = 1;
2606 pObj->pfnDestructor = pfnDestructor;
2607 pObj->pvUser1 = pvUser1;
2608 pObj->pvUser2 = pvUser2;
2609 pObj->CreatorUid = pSession->Uid;
2610 pObj->CreatorGid = pSession->Gid;
2611 pObj->CreatorProcess= pSession->Process;
2612 supdrvOSObjInitCreator(pObj, pSession);
2613
2614 /*
2615 * Allocate the usage record.
2616 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2617 */
2618 RTSpinlockAcquire(pDevExt->Spinlock);
2619
2620 pUsage = pDevExt->pUsageFree;
2621 if (pUsage)
2622 pDevExt->pUsageFree = pUsage->pNext;
2623 else
2624 {
2625 RTSpinlockRelease(pDevExt->Spinlock);
2626 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2627 if (!pUsage)
2628 {
2629 RTMemFree(pObj);
2630 return NULL;
2631 }
2632 RTSpinlockAcquire(pDevExt->Spinlock);
2633 }
2634
2635 /*
2636 * Insert the object and create the session usage record.
2637 */
2638 /* The object. */
2639 pObj->pNext = pDevExt->pObjs;
2640 pDevExt->pObjs = pObj;
2641
2642 /* The session record. */
2643 pUsage->cUsage = 1;
2644 pUsage->pObj = pObj;
2645 pUsage->pNext = pSession->pUsage;
2646 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2647 pSession->pUsage = pUsage;
2648
2649 RTSpinlockRelease(pDevExt->Spinlock);
2650
2651 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2652 return pObj;
2653}
2654
2655
2656/**
2657 * Increment the reference counter for the object associating the reference
2658 * with the specified session.
2659 *
2660 * @returns IPRT status code.
2661 * @param pvObj The identifier returned by SUPR0ObjRegister().
2662 * @param pSession The session which is referencing the object.
2663 *
2664 * @remarks The caller should not own any spinlocks and must carefully protect
2665 * itself against potential race with the destructor so freed memory
2666 * isn't accessed here.
2667 */
2668SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2669{
2670 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2671}
2672
2673
2674/**
2675 * Increment the reference counter for the object associating the reference
2676 * with the specified session.
2677 *
2678 * @returns IPRT status code.
2679 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2680 * couldn't be allocated. (If you see this you're not doing the right
2681 * thing and it won't ever work reliably.)
2682 *
2683 * @param pvObj The identifier returned by SUPR0ObjRegister().
2684 * @param pSession The session which is referencing the object.
2685 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2686 * first reference to an object in a session with this
2687 * argument set.
2688 *
2689 * @remarks The caller should not own any spinlocks and must carefully protect
2690 * itself against potential race with the destructor so freed memory
2691 * isn't accessed here.
2692 */
2693SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2694{
2695 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2696 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2697 int rc = VINF_SUCCESS;
2698 PSUPDRVUSAGE pUsagePre;
2699 PSUPDRVUSAGE pUsage;
2700
2701 /*
2702 * Validate the input.
2703 * Be ready for the destruction race (someone might be stuck in the
2704 * destructor waiting a lock we own).
2705 */
2706 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2707 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2708 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2709 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2710 VERR_INVALID_PARAMETER);
2711
2712 RTSpinlockAcquire(pDevExt->Spinlock);
2713
2714 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2715 {
2716 RTSpinlockRelease(pDevExt->Spinlock);
2717
2718 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2719 return VERR_WRONG_ORDER;
2720 }
2721
2722 /*
2723 * Preallocate the usage record if we can.
2724 */
2725 pUsagePre = pDevExt->pUsageFree;
2726 if (pUsagePre)
2727 pDevExt->pUsageFree = pUsagePre->pNext;
2728 else if (!fNoBlocking)
2729 {
2730 RTSpinlockRelease(pDevExt->Spinlock);
2731 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2732 if (!pUsagePre)
2733 return VERR_NO_MEMORY;
2734
2735 RTSpinlockAcquire(pDevExt->Spinlock);
2736 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2737 {
2738 RTSpinlockRelease(pDevExt->Spinlock);
2739
2740 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2741 return VERR_WRONG_ORDER;
2742 }
2743 }
2744
2745 /*
2746 * Reference the object.
2747 */
2748 pObj->cUsage++;
2749
2750 /*
2751 * Look for the session record.
2752 */
2753 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2754 {
2755 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2756 if (pUsage->pObj == pObj)
2757 break;
2758 }
2759 if (pUsage)
2760 pUsage->cUsage++;
2761 else if (pUsagePre)
2762 {
2763 /* create a new session record. */
2764 pUsagePre->cUsage = 1;
2765 pUsagePre->pObj = pObj;
2766 pUsagePre->pNext = pSession->pUsage;
2767 pSession->pUsage = pUsagePre;
2768 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2769
2770 pUsagePre = NULL;
2771 }
2772 else
2773 {
2774 pObj->cUsage--;
2775 rc = VERR_TRY_AGAIN;
2776 }
2777
2778 /*
2779 * Put any unused usage record into the free list..
2780 */
2781 if (pUsagePre)
2782 {
2783 pUsagePre->pNext = pDevExt->pUsageFree;
2784 pDevExt->pUsageFree = pUsagePre;
2785 }
2786
2787 RTSpinlockRelease(pDevExt->Spinlock);
2788
2789 return rc;
2790}
2791
2792
2793/**
2794 * Decrement / destroy a reference counter record for an object.
2795 *
2796 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2797 *
2798 * @returns IPRT status code.
2799 * @retval VINF_SUCCESS if not destroyed.
2800 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2801 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2802 * string builds.
2803 *
2804 * @param pvObj The identifier returned by SUPR0ObjRegister().
2805 * @param pSession The session which is referencing the object.
2806 */
2807SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2808{
2809 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2810 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2811 int rc = VERR_INVALID_PARAMETER;
2812 PSUPDRVUSAGE pUsage;
2813 PSUPDRVUSAGE pUsagePrev;
2814
2815 /*
2816 * Validate the input.
2817 */
2818 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2819 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2820 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2821 VERR_INVALID_PARAMETER);
2822
2823 /*
2824 * Acquire the spinlock and look for the usage record.
2825 */
2826 RTSpinlockAcquire(pDevExt->Spinlock);
2827
2828 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2829 pUsage;
2830 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2831 {
2832 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2833 if (pUsage->pObj == pObj)
2834 {
2835 rc = VINF_SUCCESS;
2836 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2837 if (pUsage->cUsage > 1)
2838 {
2839 pObj->cUsage--;
2840 pUsage->cUsage--;
2841 }
2842 else
2843 {
2844 /*
2845 * Free the session record.
2846 */
2847 if (pUsagePrev)
2848 pUsagePrev->pNext = pUsage->pNext;
2849 else
2850 pSession->pUsage = pUsage->pNext;
2851 pUsage->pNext = pDevExt->pUsageFree;
2852 pDevExt->pUsageFree = pUsage;
2853
2854 /* What about the object? */
2855 if (pObj->cUsage > 1)
2856 pObj->cUsage--;
2857 else
2858 {
2859 /*
2860 * Object is to be destroyed, unlink it.
2861 */
2862 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2863 rc = VINF_OBJECT_DESTROYED;
2864 if (pDevExt->pObjs == pObj)
2865 pDevExt->pObjs = pObj->pNext;
2866 else
2867 {
2868 PSUPDRVOBJ pObjPrev;
2869 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2870 if (pObjPrev->pNext == pObj)
2871 {
2872 pObjPrev->pNext = pObj->pNext;
2873 break;
2874 }
2875 Assert(pObjPrev);
2876 }
2877 }
2878 }
2879 break;
2880 }
2881 }
2882
2883 RTSpinlockRelease(pDevExt->Spinlock);
2884
2885 /*
2886 * Call the destructor and free the object if required.
2887 */
2888 if (rc == VINF_OBJECT_DESTROYED)
2889 {
2890 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2891 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2892 if (pObj->pfnDestructor)
2893 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2894 RTMemFree(pObj);
2895 }
2896
2897 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2898 return rc;
2899}
2900
2901
2902/**
2903 * Verifies that the current process can access the specified object.
2904 *
2905 * @returns The following IPRT status code:
2906 * @retval VINF_SUCCESS if access was granted.
2907 * @retval VERR_PERMISSION_DENIED if denied access.
2908 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2909 *
2910 * @param pvObj The identifier returned by SUPR0ObjRegister().
2911 * @param pSession The session which wishes to access the object.
2912 * @param pszObjName Object string name. This is optional and depends on the object type.
2913 *
2914 * @remark The caller is responsible for making sure the object isn't removed while
2915 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2916 */
2917SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2918{
2919 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2920 int rc;
2921
2922 /*
2923 * Validate the input.
2924 */
2925 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2926 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2927 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2928 VERR_INVALID_PARAMETER);
2929
2930 /*
2931 * Check access. (returns true if a decision has been made.)
2932 */
2933 rc = VERR_INTERNAL_ERROR;
2934 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2935 return rc;
2936
2937 /*
2938 * Default policy is to allow the user to access his own
2939 * stuff but nothing else.
2940 */
2941 if (pObj->CreatorUid == pSession->Uid)
2942 return VINF_SUCCESS;
2943 return VERR_PERMISSION_DENIED;
2944}
2945
2946
2947/**
2948 * Lock pages.
2949 *
2950 * @returns IPRT status code.
2951 * @param pSession Session to which the locked memory should be associated.
2952 * @param pvR3 Start of the memory range to lock.
2953 * This must be page aligned.
2954 * @param cPages Number of pages to lock.
2955 * @param paPages Where to put the physical addresses of locked memory.
2956 */
2957SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2958{
2959 int rc;
2960 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2961 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2962 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2963
2964 /*
2965 * Verify input.
2966 */
2967 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2968 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2969 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2970 || !pvR3)
2971 {
2972 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2973 return VERR_INVALID_PARAMETER;
2974 }
2975
2976 /*
2977 * Let IPRT do the job.
2978 */
2979 Mem.eType = MEMREF_TYPE_LOCKED;
2980 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
2981 if (RT_SUCCESS(rc))
2982 {
2983 uint32_t iPage = cPages;
2984 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
2985 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
2986
2987 while (iPage-- > 0)
2988 {
2989 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
2990 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
2991 {
2992 AssertMsgFailed(("iPage=%d\n", iPage));
2993 rc = VERR_INTERNAL_ERROR;
2994 break;
2995 }
2996 }
2997 if (RT_SUCCESS(rc))
2998 rc = supdrvMemAdd(&Mem, pSession);
2999 if (RT_FAILURE(rc))
3000 {
3001 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3002 AssertRC(rc2);
3003 }
3004 }
3005
3006 return rc;
3007}
3008
3009
3010/**
3011 * Unlocks the memory pointed to by pv.
3012 *
3013 * @returns IPRT status code.
3014 * @param pSession Session to which the memory was locked.
3015 * @param pvR3 Memory to unlock.
3016 */
3017SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3018{
3019 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3020 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3021 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3022}
3023
3024
3025/**
3026 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3027 * backing.
3028 *
3029 * @returns IPRT status code.
3030 * @param pSession Session data.
3031 * @param cPages Number of pages to allocate.
3032 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3033 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3034 * @param pHCPhys Where to put the physical address of allocated memory.
3035 */
3036SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3037{
3038 int rc;
3039 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3040 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3041
3042 /*
3043 * Validate input.
3044 */
3045 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3046 if (!ppvR3 || !ppvR0 || !pHCPhys)
3047 {
3048 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3049 pSession, ppvR0, ppvR3, pHCPhys));
3050 return VERR_INVALID_PARAMETER;
3051
3052 }
3053 if (cPages < 1 || cPages >= 256)
3054 {
3055 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3056 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3057 }
3058
3059 /*
3060 * Let IPRT do the job.
3061 */
3062 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3063 if (RT_SUCCESS(rc))
3064 {
3065 int rc2;
3066 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3067 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3068 if (RT_SUCCESS(rc))
3069 {
3070 Mem.eType = MEMREF_TYPE_CONT;
3071 rc = supdrvMemAdd(&Mem, pSession);
3072 if (!rc)
3073 {
3074 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3075 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3076 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3077 return 0;
3078 }
3079
3080 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3081 AssertRC(rc2);
3082 }
3083 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3084 AssertRC(rc2);
3085 }
3086
3087 return rc;
3088}
3089
3090
3091/**
3092 * Frees memory allocated using SUPR0ContAlloc().
3093 *
3094 * @returns IPRT status code.
3095 * @param pSession The session to which the memory was allocated.
3096 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3097 */
3098SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3099{
3100 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3101 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3102 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3103}
3104
3105
3106/**
3107 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3108 *
3109 * The memory isn't zeroed.
3110 *
3111 * @returns IPRT status code.
3112 * @param pSession Session data.
3113 * @param cPages Number of pages to allocate.
3114 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3115 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3116 * @param paPages Where to put the physical addresses of allocated memory.
3117 */
3118SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3119{
3120 unsigned iPage;
3121 int rc;
3122 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3123 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3124
3125 /*
3126 * Validate input.
3127 */
3128 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3129 if (!ppvR3 || !ppvR0 || !paPages)
3130 {
3131 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3132 pSession, ppvR3, ppvR0, paPages));
3133 return VERR_INVALID_PARAMETER;
3134
3135 }
3136 if (cPages < 1 || cPages >= 256)
3137 {
3138 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3139 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3140 }
3141
3142 /*
3143 * Let IPRT do the work.
3144 */
3145 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3146 if (RT_SUCCESS(rc))
3147 {
3148 int rc2;
3149 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3150 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3151 if (RT_SUCCESS(rc))
3152 {
3153 Mem.eType = MEMREF_TYPE_LOW;
3154 rc = supdrvMemAdd(&Mem, pSession);
3155 if (!rc)
3156 {
3157 for (iPage = 0; iPage < cPages; iPage++)
3158 {
3159 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3160 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3161 }
3162 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3163 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3164 return 0;
3165 }
3166
3167 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3168 AssertRC(rc2);
3169 }
3170
3171 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3172 AssertRC(rc2);
3173 }
3174
3175 return rc;
3176}
3177
3178
3179/**
3180 * Frees memory allocated using SUPR0LowAlloc().
3181 *
3182 * @returns IPRT status code.
3183 * @param pSession The session to which the memory was allocated.
3184 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3185 */
3186SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3187{
3188 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3189 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3190 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3191}
3192
3193
3194
3195/**
3196 * Allocates a chunk of memory with both R0 and R3 mappings.
3197 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3198 *
3199 * @returns IPRT status code.
3200 * @param pSession The session to associated the allocation with.
3201 * @param cb Number of bytes to allocate.
3202 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3203 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3204 */
3205SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3206{
3207 int rc;
3208 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3209 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3210
3211 /*
3212 * Validate input.
3213 */
3214 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3215 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3216 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3217 if (cb < 1 || cb >= _4M)
3218 {
3219 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3220 return VERR_INVALID_PARAMETER;
3221 }
3222
3223 /*
3224 * Let IPRT do the work.
3225 */
3226 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3227 if (RT_SUCCESS(rc))
3228 {
3229 int rc2;
3230 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3231 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3232 if (RT_SUCCESS(rc))
3233 {
3234 Mem.eType = MEMREF_TYPE_MEM;
3235 rc = supdrvMemAdd(&Mem, pSession);
3236 if (!rc)
3237 {
3238 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3239 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3240 return VINF_SUCCESS;
3241 }
3242
3243 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3244 AssertRC(rc2);
3245 }
3246
3247 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3248 AssertRC(rc2);
3249 }
3250
3251 return rc;
3252}
3253
3254
3255/**
3256 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3257 *
3258 * @returns IPRT status code.
3259 * @param pSession The session to which the memory was allocated.
3260 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3261 * @param paPages Where to store the physical addresses.
3262 */
3263SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3264{
3265 PSUPDRVBUNDLE pBundle;
3266 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3267
3268 /*
3269 * Validate input.
3270 */
3271 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3272 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3273 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3274
3275 /*
3276 * Search for the address.
3277 */
3278 RTSpinlockAcquire(pSession->Spinlock);
3279 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3280 {
3281 if (pBundle->cUsed > 0)
3282 {
3283 unsigned i;
3284 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3285 {
3286 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3287 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3288 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3289 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3290 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3291 )
3292 )
3293 {
3294 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3295 size_t iPage;
3296 for (iPage = 0; iPage < cPages; iPage++)
3297 {
3298 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3299 paPages[iPage].uReserved = 0;
3300 }
3301 RTSpinlockRelease(pSession->Spinlock);
3302 return VINF_SUCCESS;
3303 }
3304 }
3305 }
3306 }
3307 RTSpinlockRelease(pSession->Spinlock);
3308 Log(("Failed to find %p!!!\n", (void *)uPtr));
3309 return VERR_INVALID_PARAMETER;
3310}
3311
3312
3313/**
3314 * Free memory allocated by SUPR0MemAlloc().
3315 *
3316 * @returns IPRT status code.
3317 * @param pSession The session owning the allocation.
3318 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3319 */
3320SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3321{
3322 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3323 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3324 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3325}
3326
3327
3328/**
3329 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3330 *
3331 * The memory is fixed and it's possible to query the physical addresses using
3332 * SUPR0MemGetPhys().
3333 *
3334 * @returns IPRT status code.
3335 * @param pSession The session to associated the allocation with.
3336 * @param cPages The number of pages to allocate.
3337 * @param fFlags Flags, reserved for the future. Must be zero.
3338 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3339 * NULL if no ring-3 mapping.
3340 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3341 * NULL if no ring-0 mapping.
3342 * @param paPages Where to store the addresses of the pages. Optional.
3343 */
3344SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3345{
3346 int rc;
3347 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3348 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3349
3350 /*
3351 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3352 */
3353 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3354 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3355 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3356 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3357 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3358 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3359 {
3360 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3361 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3362 }
3363
3364 /*
3365 * Let IPRT do the work.
3366 */
3367 if (ppvR0)
3368 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3369 else
3370 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3371 if (RT_SUCCESS(rc))
3372 {
3373 int rc2;
3374 if (ppvR3)
3375 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3376 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3377 else
3378 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3379 if (RT_SUCCESS(rc))
3380 {
3381 Mem.eType = MEMREF_TYPE_PAGE;
3382 rc = supdrvMemAdd(&Mem, pSession);
3383 if (!rc)
3384 {
3385 if (ppvR3)
3386 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3387 if (ppvR0)
3388 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3389 if (paPages)
3390 {
3391 uint32_t iPage = cPages;
3392 while (iPage-- > 0)
3393 {
3394 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3395 Assert(paPages[iPage] != NIL_RTHCPHYS);
3396 }
3397 }
3398 return VINF_SUCCESS;
3399 }
3400
3401 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3402 AssertRC(rc2);
3403 }
3404
3405 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3406 AssertRC(rc2);
3407 }
3408 return rc;
3409}
3410
3411
3412/**
3413 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3414 * space.
3415 *
3416 * @returns IPRT status code.
3417 * @param pSession The session to associated the allocation with.
3418 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3419 * @param offSub Where to start mapping. Must be page aligned.
3420 * @param cbSub How much to map. Must be page aligned.
3421 * @param fFlags Flags, MBZ.
3422 * @param ppvR0 Where to return the address of the ring-0 mapping on
3423 * success.
3424 */
3425SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3426 uint32_t fFlags, PRTR0PTR ppvR0)
3427{
3428 int rc;
3429 PSUPDRVBUNDLE pBundle;
3430 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3431 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3432
3433 /*
3434 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3435 */
3436 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3437 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3438 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3439 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3440 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3441 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3442
3443 /*
3444 * Find the memory object.
3445 */
3446 RTSpinlockAcquire(pSession->Spinlock);
3447 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3448 {
3449 if (pBundle->cUsed > 0)
3450 {
3451 unsigned i;
3452 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3453 {
3454 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3455 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3456 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3457 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3458 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3459 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3460 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3461 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3462 {
3463 hMemObj = pBundle->aMem[i].MemObj;
3464 break;
3465 }
3466 }
3467 }
3468 }
3469 RTSpinlockRelease(pSession->Spinlock);
3470
3471 rc = VERR_INVALID_PARAMETER;
3472 if (hMemObj != NIL_RTR0MEMOBJ)
3473 {
3474 /*
3475 * Do some further input validations before calling IPRT.
3476 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3477 */
3478 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3479 if ( offSub < cbMemObj
3480 && cbSub <= cbMemObj
3481 && offSub + cbSub <= cbMemObj)
3482 {
3483 RTR0MEMOBJ hMapObj;
3484 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3485 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3486 if (RT_SUCCESS(rc))
3487 *ppvR0 = RTR0MemObjAddress(hMapObj);
3488 }
3489 else
3490 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3491
3492 }
3493 return rc;
3494}
3495
3496
3497/**
3498 * Changes the page level protection of one or more pages previously allocated
3499 * by SUPR0PageAllocEx.
3500 *
3501 * @returns IPRT status code.
3502 * @param pSession The session to associated the allocation with.
3503 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3504 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3505 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3506 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3507 * @param offSub Where to start changing. Must be page aligned.
3508 * @param cbSub How much to change. Must be page aligned.
3509 * @param fProt The new page level protection, see RTMEM_PROT_*.
3510 */
3511SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3512{
3513 int rc;
3514 PSUPDRVBUNDLE pBundle;
3515 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3516 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3517 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3518
3519 /*
3520 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3521 */
3522 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3523 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3524 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3525 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3526 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3527
3528 /*
3529 * Find the memory object.
3530 */
3531 RTSpinlockAcquire(pSession->Spinlock);
3532 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3533 {
3534 if (pBundle->cUsed > 0)
3535 {
3536 unsigned i;
3537 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3538 {
3539 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3540 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3541 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3542 || pvR3 == NIL_RTR3PTR)
3543 && ( pvR0 == NIL_RTR0PTR
3544 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3545 && ( pvR3 == NIL_RTR3PTR
3546 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3547 {
3548 if (pvR0 != NIL_RTR0PTR)
3549 hMemObjR0 = pBundle->aMem[i].MemObj;
3550 if (pvR3 != NIL_RTR3PTR)
3551 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3552 break;
3553 }
3554 }
3555 }
3556 }
3557 RTSpinlockRelease(pSession->Spinlock);
3558
3559 rc = VERR_INVALID_PARAMETER;
3560 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3561 || hMemObjR3 != NIL_RTR0MEMOBJ)
3562 {
3563 /*
3564 * Do some further input validations before calling IPRT.
3565 */
3566 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3567 if ( offSub < cbMemObj
3568 && cbSub <= cbMemObj
3569 && offSub + cbSub <= cbMemObj)
3570 {
3571 rc = VINF_SUCCESS;
3572 if (hMemObjR3 != NIL_RTR0PTR)
3573 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3574 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3575 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3576 }
3577 else
3578 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3579
3580 }
3581 return rc;
3582
3583}
3584
3585
3586/**
3587 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3588 *
3589 * @returns IPRT status code.
3590 * @param pSession The session owning the allocation.
3591 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3592 * SUPR0PageAllocEx().
3593 */
3594SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3595{
3596 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3597 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3598 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3599}
3600
3601
3602/**
3603 * Gets the paging mode of the current CPU.
3604 *
3605 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3606 */
3607SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3608{
3609 SUPPAGINGMODE enmMode;
3610
3611 RTR0UINTREG cr0 = ASMGetCR0();
3612 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3613 enmMode = SUPPAGINGMODE_INVALID;
3614 else
3615 {
3616 RTR0UINTREG cr4 = ASMGetCR4();
3617 uint32_t fNXEPlusLMA = 0;
3618 if (cr4 & X86_CR4_PAE)
3619 {
3620 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3621 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3622 {
3623 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3624 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3625 fNXEPlusLMA |= RT_BIT(0);
3626 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3627 fNXEPlusLMA |= RT_BIT(1);
3628 }
3629 }
3630
3631 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3632 {
3633 case 0:
3634 enmMode = SUPPAGINGMODE_32_BIT;
3635 break;
3636
3637 case X86_CR4_PGE:
3638 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3639 break;
3640
3641 case X86_CR4_PAE:
3642 enmMode = SUPPAGINGMODE_PAE;
3643 break;
3644
3645 case X86_CR4_PAE | RT_BIT(0):
3646 enmMode = SUPPAGINGMODE_PAE_NX;
3647 break;
3648
3649 case X86_CR4_PAE | X86_CR4_PGE:
3650 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3651 break;
3652
3653 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3654 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3655 break;
3656
3657 case RT_BIT(1) | X86_CR4_PAE:
3658 enmMode = SUPPAGINGMODE_AMD64;
3659 break;
3660
3661 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3662 enmMode = SUPPAGINGMODE_AMD64_NX;
3663 break;
3664
3665 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3666 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3667 break;
3668
3669 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3670 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3671 break;
3672
3673 default:
3674 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3675 enmMode = SUPPAGINGMODE_INVALID;
3676 break;
3677 }
3678 }
3679 return enmMode;
3680}
3681
3682
3683/**
3684 * Enables or disabled hardware virtualization extensions using native OS APIs.
3685 *
3686 * @returns VBox status code.
3687 * @retval VINF_SUCCESS on success.
3688 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3689 *
3690 * @param fEnable Whether to enable or disable.
3691 */
3692SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3693{
3694#ifdef RT_OS_DARWIN
3695 return supdrvOSEnableVTx(fEnable);
3696#else
3697 return VERR_NOT_SUPPORTED;
3698#endif
3699}
3700
3701
3702/**
3703 * Suspends hardware virtualization extensions using the native OS API.
3704 *
3705 * This is called prior to entering raw-mode context.
3706 *
3707 * @returns @c true if suspended, @c false if not.
3708 */
3709SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3710{
3711#ifdef RT_OS_DARWIN
3712 return supdrvOSSuspendVTxOnCpu();
3713#else
3714 return false;
3715#endif
3716}
3717
3718
3719/**
3720 * Resumes hardware virtualization extensions using the native OS API.
3721 *
3722 * This is called after to entering raw-mode context.
3723 *
3724 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3725 */
3726SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3727{
3728#ifdef RT_OS_DARWIN
3729 supdrvOSResumeVTxOnCpu(fSuspended);
3730#else
3731 Assert(!fSuspended);
3732#endif
3733}
3734
3735
3736/**
3737 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3738 *
3739 * @returns VBox status code.
3740 * @retval VERR_VMX_NO_VMX
3741 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3742 * @retval VERR_VMX_MSR_VMXON_DISABLED
3743 * @retval VERR_VMX_MSR_LOCKING_FAILED
3744 * @retval VERR_SVM_NO_SVM
3745 * @retval VERR_SVM_DISABLED
3746 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3747 * (centaur) CPU.
3748 *
3749 * @param pSession The session handle.
3750 * @param pfCaps Where to store the capabilities.
3751 */
3752SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3753{
3754 int rc = VERR_UNSUPPORTED_CPU;
3755 bool fIsSmxModeAmbiguous = false;
3756 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3757
3758 /*
3759 * Input validation.
3760 */
3761 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3762 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3763
3764 *pfCaps = 0;
3765 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3766 RTThreadPreemptDisable(&PreemptState);
3767 if (ASMHasCpuId())
3768 {
3769 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3770 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3771
3772 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3773 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3774
3775 if ( ASMIsValidStdRange(uMaxId)
3776 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3777 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3778 )
3779 {
3780 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3781 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3782 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3783 )
3784 {
3785 /** @todo Unify code with hmR0InitIntelCpu(). */
3786 uint64_t u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3787 bool const fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3788 bool fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3789 bool fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3790 bool fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3791
3792 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3793 if (fMsrLocked)
3794 {
3795 if (fVmxAllowed && fSmxVmxAllowed)
3796 rc = VINF_SUCCESS;
3797 else if (!fVmxAllowed && !fSmxVmxAllowed)
3798 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3799 else if (!fMaybeSmxMode)
3800 {
3801 if (fVmxAllowed)
3802 rc = VINF_SUCCESS;
3803 else
3804 rc = VERR_VMX_MSR_VMXON_DISABLED;
3805 }
3806 else
3807 {
3808 /*
3809 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3810 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3811 * See @bugref{6873}.
3812 */
3813 Assert(fMaybeSmxMode == true);
3814 fIsSmxModeAmbiguous = true;
3815 rc = VINF_SUCCESS;
3816 }
3817 }
3818 else
3819 {
3820 /*
3821 * MSR is not yet locked; we can change it ourselves here.
3822 * Once the lock bit is set, this MSR can no longer be modified.
3823 *
3824 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3825 * accurately. See @bugref{6873}.
3826 */
3827 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3828 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3829 | MSR_IA32_FEATURE_CONTROL_VMXON;
3830 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3831
3832 /* Verify. */
3833 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3834 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3835 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3836 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3837 if (fSmxVmxAllowed && fVmxAllowed)
3838 rc = VINF_SUCCESS;
3839 else
3840 rc = VERR_VMX_MSR_LOCKING_FAILED;
3841 }
3842
3843 if (rc == VINF_SUCCESS)
3844 {
3845 VMXCAPABILITY vtCaps;
3846
3847 *pfCaps |= SUPVTCAPS_VT_X;
3848
3849 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3850 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3851 {
3852 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3853 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3854 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3855 }
3856 }
3857 }
3858 else
3859 rc = VERR_VMX_NO_VMX;
3860 }
3861 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3862 && ASMIsValidStdRange(uMaxId))
3863 {
3864 uint32_t fExtFeaturesEcx, uExtMaxId;
3865 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3866 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3867 if ( ASMIsValidExtRange(uExtMaxId)
3868 && uExtMaxId >= 0x8000000a
3869 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3870 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3871 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3872 )
3873 {
3874 /* Check if SVM is disabled */
3875 uint64_t u64FeatMsr = ASMRdMsr(MSR_K8_VM_CR);
3876 if (!(u64FeatMsr & MSR_K8_VM_CR_SVM_DISABLE))
3877 {
3878 uint32_t fSvmFeatures;
3879 *pfCaps |= SUPVTCAPS_AMD_V;
3880
3881 /* Query AMD-V features. */
3882 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3883 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3884 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3885
3886 rc = VINF_SUCCESS;
3887 }
3888 else
3889 rc = VERR_SVM_DISABLED;
3890 }
3891 else
3892 rc = VERR_SVM_NO_SVM;
3893 }
3894 }
3895
3896 RTThreadPreemptRestore(&PreemptState);
3897 if (fIsSmxModeAmbiguous)
3898 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3899 return rc;
3900}
3901
3902
3903/**
3904 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3905 * updating.
3906 *
3907 * @param pGip Pointer to the GIP.
3908 * @param pGipCpu The per CPU structure for this CPU.
3909 * @param u64NanoTS The current time.
3910 */
3911static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3912{
3913 pGipCpu->u64TSC = SUPReadTsc() - pGipCpu->u32UpdateIntervalTSC;
3914 pGipCpu->u64NanoTS = u64NanoTS;
3915}
3916
3917
3918/**
3919 * Set the current TSC and NanoTS value for the CPU.
3920 *
3921 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
3922 * @param pvUser1 Pointer to the ring-0 GIP mapping.
3923 * @param pvUser2 Pointer to the variable holding the current time.
3924 */
3925static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3926{
3927 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
3928 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
3929
3930 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
3931 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
3932
3933 NOREF(pvUser2);
3934 NOREF(idCpu);
3935}
3936
3937
3938/**
3939 * Increase the timer freqency on hosts where this is possible (NT).
3940 *
3941 * The idea is that more interrupts is better for us... Also, it's better than
3942 * we increase the timer frequence, because we might end up getting inaccurate
3943 * callbacks if someone else does it.
3944 *
3945 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
3946 */
3947static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
3948{
3949 if (pDevExt->u32SystemTimerGranularityGrant == 0)
3950 {
3951 uint32_t u32SystemResolution;
3952 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3953 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3954 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
3955 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
3956 )
3957 {
3958 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3959 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3960 }
3961 }
3962}
3963
3964
3965/**
3966 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
3967 *
3968 * @param pDevExt Clears u32SystemTimerGranularityGrant.
3969 */
3970static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
3971{
3972 if (pDevExt->u32SystemTimerGranularityGrant)
3973 {
3974 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
3975 AssertRC(rc2);
3976 pDevExt->u32SystemTimerGranularityGrant = 0;
3977 }
3978}
3979
3980
3981/**
3982 * Maps the GIP into userspace and/or get the physical address of the GIP.
3983 *
3984 * @returns IPRT status code.
3985 * @param pSession Session to which the GIP mapping should belong.
3986 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
3987 * @param pHCPhysGip Where to store the physical address. (optional)
3988 *
3989 * @remark There is no reference counting on the mapping, so one call to this function
3990 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
3991 * and remove the session as a GIP user.
3992 */
3993SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
3994{
3995 int rc;
3996 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
3997 RTR3PTR pGipR3 = NIL_RTR3PTR;
3998 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3999 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
4000
4001 /*
4002 * Validate
4003 */
4004 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4005 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
4006 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
4007
4008#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4009 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4010#else
4011 RTSemFastMutexRequest(pDevExt->mtxGip);
4012#endif
4013 if (pDevExt->pGip)
4014 {
4015 /*
4016 * Map it?
4017 */
4018 rc = VINF_SUCCESS;
4019 if (ppGipR3)
4020 {
4021 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4022 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
4023 RTMEM_PROT_READ, RTR0ProcHandleSelf());
4024 if (RT_SUCCESS(rc))
4025 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
4026 }
4027
4028 /*
4029 * Get physical address.
4030 */
4031 if (pHCPhysGip && RT_SUCCESS(rc))
4032 HCPhys = pDevExt->HCPhysGip;
4033
4034 /*
4035 * Reference globally.
4036 */
4037 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4038 {
4039 pSession->fGipReferenced = 1;
4040 pDevExt->cGipUsers++;
4041 if (pDevExt->cGipUsers == 1)
4042 {
4043 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4044 uint64_t u64NanoTS;
4045
4046 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4047
4048 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
4049
4050 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4051 {
4052 unsigned i;
4053 for (i = 0; i < pGipR0->cCpus; i++)
4054 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4055 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4056 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4057 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4058 }
4059
4060 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4061 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
4062 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4063 || RTMpGetOnlineCount() == 1)
4064 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
4065 else
4066 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4067
4068#ifndef DO_NOT_START_GIP
4069 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
4070#endif
4071 rc = VINF_SUCCESS;
4072 }
4073 }
4074 }
4075 else
4076 {
4077 rc = VERR_GENERAL_FAILURE;
4078 Log(("SUPR0GipMap: GIP is not available!\n"));
4079 }
4080#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4081 RTSemMutexRelease(pDevExt->mtxGip);
4082#else
4083 RTSemFastMutexRelease(pDevExt->mtxGip);
4084#endif
4085
4086 /*
4087 * Write returns.
4088 */
4089 if (pHCPhysGip)
4090 *pHCPhysGip = HCPhys;
4091 if (ppGipR3)
4092 *ppGipR3 = pGipR3;
4093
4094#ifdef DEBUG_DARWIN_GIP
4095 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4096#else
4097 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4098#endif
4099 return rc;
4100}
4101
4102
4103/**
4104 * Unmaps any user mapping of the GIP and terminates all GIP access
4105 * from this session.
4106 *
4107 * @returns IPRT status code.
4108 * @param pSession Session to which the GIP mapping should belong.
4109 */
4110SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4111{
4112 int rc = VINF_SUCCESS;
4113 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4114#ifdef DEBUG_DARWIN_GIP
4115 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4116 pSession,
4117 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4118 pSession->GipMapObjR3));
4119#else
4120 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4121#endif
4122 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4123
4124#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4125 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4126#else
4127 RTSemFastMutexRequest(pDevExt->mtxGip);
4128#endif
4129
4130 /*
4131 * Unmap anything?
4132 */
4133 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4134 {
4135 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4136 AssertRC(rc);
4137 if (RT_SUCCESS(rc))
4138 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4139 }
4140
4141 /*
4142 * Dereference global GIP.
4143 */
4144 if (pSession->fGipReferenced && !rc)
4145 {
4146 pSession->fGipReferenced = 0;
4147 if ( pDevExt->cGipUsers > 0
4148 && !--pDevExt->cGipUsers)
4149 {
4150 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4151#ifndef DO_NOT_START_GIP
4152 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4153#endif
4154 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
4155 }
4156 }
4157
4158#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4159 RTSemMutexRelease(pDevExt->mtxGip);
4160#else
4161 RTSemFastMutexRelease(pDevExt->mtxGip);
4162#endif
4163
4164 return rc;
4165}
4166
4167
4168/**
4169 * Gets the GIP pointer.
4170 *
4171 * @returns Pointer to the GIP or NULL.
4172 */
4173SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4174{
4175 return g_pSUPGlobalInfoPage;
4176}
4177
4178
4179/**
4180 * Register a component factory with the support driver.
4181 *
4182 * This is currently restricted to kernel sessions only.
4183 *
4184 * @returns VBox status code.
4185 * @retval VINF_SUCCESS on success.
4186 * @retval VERR_NO_MEMORY if we're out of memory.
4187 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4188 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4189 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4190 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4191 *
4192 * @param pSession The SUPDRV session (must be a ring-0 session).
4193 * @param pFactory Pointer to the component factory registration structure.
4194 *
4195 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4196 */
4197SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4198{
4199 PSUPDRVFACTORYREG pNewReg;
4200 const char *psz;
4201 int rc;
4202
4203 /*
4204 * Validate parameters.
4205 */
4206 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4207 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4208 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4209 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4210 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4211 AssertReturn(psz, VERR_INVALID_PARAMETER);
4212
4213 /*
4214 * Allocate and initialize a new registration structure.
4215 */
4216 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4217 if (pNewReg)
4218 {
4219 pNewReg->pNext = NULL;
4220 pNewReg->pFactory = pFactory;
4221 pNewReg->pSession = pSession;
4222 pNewReg->cchName = psz - &pFactory->szName[0];
4223
4224 /*
4225 * Add it to the tail of the list after checking for prior registration.
4226 */
4227 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4228 if (RT_SUCCESS(rc))
4229 {
4230 PSUPDRVFACTORYREG pPrev = NULL;
4231 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4232 while (pCur && pCur->pFactory != pFactory)
4233 {
4234 pPrev = pCur;
4235 pCur = pCur->pNext;
4236 }
4237 if (!pCur)
4238 {
4239 if (pPrev)
4240 pPrev->pNext = pNewReg;
4241 else
4242 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4243 rc = VINF_SUCCESS;
4244 }
4245 else
4246 rc = VERR_ALREADY_EXISTS;
4247
4248 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4249 }
4250
4251 if (RT_FAILURE(rc))
4252 RTMemFree(pNewReg);
4253 }
4254 else
4255 rc = VERR_NO_MEMORY;
4256 return rc;
4257}
4258
4259
4260/**
4261 * Deregister a component factory.
4262 *
4263 * @returns VBox status code.
4264 * @retval VINF_SUCCESS on success.
4265 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4266 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4267 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4268 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4269 *
4270 * @param pSession The SUPDRV session (must be a ring-0 session).
4271 * @param pFactory Pointer to the component factory registration structure
4272 * previously passed SUPR0ComponentRegisterFactory().
4273 *
4274 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4275 */
4276SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4277{
4278 int rc;
4279
4280 /*
4281 * Validate parameters.
4282 */
4283 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4284 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4285 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4286
4287 /*
4288 * Take the lock and look for the registration record.
4289 */
4290 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4291 if (RT_SUCCESS(rc))
4292 {
4293 PSUPDRVFACTORYREG pPrev = NULL;
4294 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4295 while (pCur && pCur->pFactory != pFactory)
4296 {
4297 pPrev = pCur;
4298 pCur = pCur->pNext;
4299 }
4300 if (pCur)
4301 {
4302 if (!pPrev)
4303 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4304 else
4305 pPrev->pNext = pCur->pNext;
4306
4307 pCur->pNext = NULL;
4308 pCur->pFactory = NULL;
4309 pCur->pSession = NULL;
4310 rc = VINF_SUCCESS;
4311 }
4312 else
4313 rc = VERR_NOT_FOUND;
4314
4315 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4316
4317 RTMemFree(pCur);
4318 }
4319 return rc;
4320}
4321
4322
4323/**
4324 * Queries a component factory.
4325 *
4326 * @returns VBox status code.
4327 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4328 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4329 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4330 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4331 *
4332 * @param pSession The SUPDRV session.
4333 * @param pszName The name of the component factory.
4334 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4335 * @param ppvFactoryIf Where to store the factory interface.
4336 */
4337SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4338{
4339 const char *pszEnd;
4340 size_t cchName;
4341 int rc;
4342
4343 /*
4344 * Validate parameters.
4345 */
4346 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4347
4348 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4349 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4350 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4351 cchName = pszEnd - pszName;
4352
4353 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4354 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4355 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4356
4357 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4358 *ppvFactoryIf = NULL;
4359
4360 /*
4361 * Take the lock and try all factories by this name.
4362 */
4363 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4364 if (RT_SUCCESS(rc))
4365 {
4366 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4367 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4368 while (pCur)
4369 {
4370 if ( pCur->cchName == cchName
4371 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4372 {
4373 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4374 if (pvFactory)
4375 {
4376 *ppvFactoryIf = pvFactory;
4377 rc = VINF_SUCCESS;
4378 break;
4379 }
4380 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4381 }
4382
4383 /* next */
4384 pCur = pCur->pNext;
4385 }
4386
4387 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4388 }
4389 return rc;
4390}
4391
4392
4393/**
4394 * Adds a memory object to the session.
4395 *
4396 * @returns IPRT status code.
4397 * @param pMem Memory tracking structure containing the
4398 * information to track.
4399 * @param pSession The session.
4400 */
4401static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4402{
4403 PSUPDRVBUNDLE pBundle;
4404
4405 /*
4406 * Find free entry and record the allocation.
4407 */
4408 RTSpinlockAcquire(pSession->Spinlock);
4409 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4410 {
4411 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4412 {
4413 unsigned i;
4414 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4415 {
4416 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4417 {
4418 pBundle->cUsed++;
4419 pBundle->aMem[i] = *pMem;
4420 RTSpinlockRelease(pSession->Spinlock);
4421 return VINF_SUCCESS;
4422 }
4423 }
4424 AssertFailed(); /* !!this can't be happening!!! */
4425 }
4426 }
4427 RTSpinlockRelease(pSession->Spinlock);
4428
4429 /*
4430 * Need to allocate a new bundle.
4431 * Insert into the last entry in the bundle.
4432 */
4433 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4434 if (!pBundle)
4435 return VERR_NO_MEMORY;
4436
4437 /* take last entry. */
4438 pBundle->cUsed++;
4439 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4440
4441 /* insert into list. */
4442 RTSpinlockAcquire(pSession->Spinlock);
4443 pBundle->pNext = pSession->Bundle.pNext;
4444 pSession->Bundle.pNext = pBundle;
4445 RTSpinlockRelease(pSession->Spinlock);
4446
4447 return VINF_SUCCESS;
4448}
4449
4450
4451/**
4452 * Releases a memory object referenced by pointer and type.
4453 *
4454 * @returns IPRT status code.
4455 * @param pSession Session data.
4456 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4457 * @param eType Memory type.
4458 */
4459static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4460{
4461 PSUPDRVBUNDLE pBundle;
4462
4463 /*
4464 * Validate input.
4465 */
4466 if (!uPtr)
4467 {
4468 Log(("Illegal address %p\n", (void *)uPtr));
4469 return VERR_INVALID_PARAMETER;
4470 }
4471
4472 /*
4473 * Search for the address.
4474 */
4475 RTSpinlockAcquire(pSession->Spinlock);
4476 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4477 {
4478 if (pBundle->cUsed > 0)
4479 {
4480 unsigned i;
4481 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4482 {
4483 if ( pBundle->aMem[i].eType == eType
4484 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4485 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4486 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4487 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4488 )
4489 {
4490 /* Make a copy of it and release it outside the spinlock. */
4491 SUPDRVMEMREF Mem = pBundle->aMem[i];
4492 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4493 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4494 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4495 RTSpinlockRelease(pSession->Spinlock);
4496
4497 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4498 {
4499 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4500 AssertRC(rc); /** @todo figure out how to handle this. */
4501 }
4502 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4503 {
4504 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4505 AssertRC(rc); /** @todo figure out how to handle this. */
4506 }
4507 return VINF_SUCCESS;
4508 }
4509 }
4510 }
4511 }
4512 RTSpinlockRelease(pSession->Spinlock);
4513 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4514 return VERR_INVALID_PARAMETER;
4515}
4516
4517
4518/**
4519 * Opens an image. If it's the first time it's opened the call must upload
4520 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4521 *
4522 * This is the 1st step of the loading.
4523 *
4524 * @returns IPRT status code.
4525 * @param pDevExt Device globals.
4526 * @param pSession Session data.
4527 * @param pReq The open request.
4528 */
4529static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4530{
4531 int rc;
4532 PSUPDRVLDRIMAGE pImage;
4533 void *pv;
4534 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4535 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4536
4537 /*
4538 * Check if we got an instance of the image already.
4539 */
4540 supdrvLdrLock(pDevExt);
4541 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4542 {
4543 if ( pImage->szName[cchName] == '\0'
4544 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4545 {
4546 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4547 {
4548 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4549 pImage->cUsage++;
4550 pReq->u.Out.pvImageBase = pImage->pvImage;
4551 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4552 pReq->u.Out.fNativeLoader = pImage->fNative;
4553 supdrvLdrAddUsage(pSession, pImage);
4554 supdrvLdrUnlock(pDevExt);
4555 return VINF_SUCCESS;
4556 }
4557 supdrvLdrUnlock(pDevExt);
4558 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4559 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4560 }
4561 }
4562 /* (not found - add it!) */
4563
4564 /*
4565 * Allocate memory.
4566 */
4567 Assert(cchName < sizeof(pImage->szName));
4568 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4569 if (!pv)
4570 {
4571 supdrvLdrUnlock(pDevExt);
4572 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4573 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4574 }
4575
4576 /*
4577 * Setup and link in the LDR stuff.
4578 */
4579 pImage = (PSUPDRVLDRIMAGE)pv;
4580 pImage->pvImage = NULL;
4581 pImage->pvImageAlloc = NULL;
4582 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4583 pImage->cbImageBits = pReq->u.In.cbImageBits;
4584 pImage->cSymbols = 0;
4585 pImage->paSymbols = NULL;
4586 pImage->pachStrTab = NULL;
4587 pImage->cbStrTab = 0;
4588 pImage->pfnModuleInit = NULL;
4589 pImage->pfnModuleTerm = NULL;
4590 pImage->pfnServiceReqHandler = NULL;
4591 pImage->uState = SUP_IOCTL_LDR_OPEN;
4592 pImage->cUsage = 1;
4593 pImage->pDevExt = pDevExt;
4594 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4595
4596 /*
4597 * Try load it using the native loader, if that isn't supported, fall back
4598 * on the older method.
4599 */
4600 pImage->fNative = true;
4601 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4602 if (rc == VERR_NOT_SUPPORTED)
4603 {
4604 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4605 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4606 pImage->fNative = false;
4607 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4608 }
4609 if (RT_FAILURE(rc))
4610 {
4611 supdrvLdrUnlock(pDevExt);
4612 RTMemFree(pImage);
4613 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4614 return rc;
4615 }
4616 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4617
4618 /*
4619 * Link it.
4620 */
4621 pImage->pNext = pDevExt->pLdrImages;
4622 pDevExt->pLdrImages = pImage;
4623
4624 supdrvLdrAddUsage(pSession, pImage);
4625
4626 pReq->u.Out.pvImageBase = pImage->pvImage;
4627 pReq->u.Out.fNeedsLoading = true;
4628 pReq->u.Out.fNativeLoader = pImage->fNative;
4629 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4630
4631 supdrvLdrUnlock(pDevExt);
4632 return VINF_SUCCESS;
4633}
4634
4635
4636/**
4637 * Worker that validates a pointer to an image entrypoint.
4638 *
4639 * @returns IPRT status code.
4640 * @param pDevExt The device globals.
4641 * @param pImage The loader image.
4642 * @param pv The pointer into the image.
4643 * @param fMayBeNull Whether it may be NULL.
4644 * @param pszWhat What is this entrypoint? (for logging)
4645 * @param pbImageBits The image bits prepared by ring-3.
4646 *
4647 * @remarks Will leave the lock on failure.
4648 */
4649static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4650 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4651{
4652 if (!fMayBeNull || pv)
4653 {
4654 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4655 {
4656 supdrvLdrUnlock(pDevExt);
4657 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4658 return VERR_INVALID_PARAMETER;
4659 }
4660
4661 if (pImage->fNative)
4662 {
4663 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4664 if (RT_FAILURE(rc))
4665 {
4666 supdrvLdrUnlock(pDevExt);
4667 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4668 return rc;
4669 }
4670 }
4671 }
4672 return VINF_SUCCESS;
4673}
4674
4675
4676/**
4677 * Loads the image bits.
4678 *
4679 * This is the 2nd step of the loading.
4680 *
4681 * @returns IPRT status code.
4682 * @param pDevExt Device globals.
4683 * @param pSession Session data.
4684 * @param pReq The request.
4685 */
4686static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
4687{
4688 PSUPDRVLDRUSAGE pUsage;
4689 PSUPDRVLDRIMAGE pImage;
4690 int rc;
4691 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
4692
4693 /*
4694 * Find the ldr image.
4695 */
4696 supdrvLdrLock(pDevExt);
4697 pUsage = pSession->pLdrUsage;
4698 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4699 pUsage = pUsage->pNext;
4700 if (!pUsage)
4701 {
4702 supdrvLdrUnlock(pDevExt);
4703 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
4704 return VERR_INVALID_HANDLE;
4705 }
4706 pImage = pUsage->pImage;
4707
4708 /*
4709 * Validate input.
4710 */
4711 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
4712 || pImage->cbImageBits != pReq->u.In.cbImageBits)
4713 {
4714 supdrvLdrUnlock(pDevExt);
4715 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
4716 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
4717 return VERR_INVALID_HANDLE;
4718 }
4719
4720 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
4721 {
4722 unsigned uState = pImage->uState;
4723 supdrvLdrUnlock(pDevExt);
4724 if (uState != SUP_IOCTL_LDR_LOAD)
4725 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
4726 return VERR_ALREADY_LOADED;
4727 }
4728
4729 switch (pReq->u.In.eEPType)
4730 {
4731 case SUPLDRLOADEP_NOTHING:
4732 break;
4733
4734 case SUPLDRLOADEP_VMMR0:
4735 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
4736 if (RT_SUCCESS(rc))
4737 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
4738 if (RT_SUCCESS(rc))
4739 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
4740 if (RT_SUCCESS(rc))
4741 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
4742 if (RT_FAILURE(rc))
4743 return rc;
4744 break;
4745
4746 case SUPLDRLOADEP_SERVICE:
4747 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
4748 if (RT_FAILURE(rc))
4749 return rc;
4750 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
4751 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
4752 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
4753 {
4754 supdrvLdrUnlock(pDevExt);
4755 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
4756 pImage->pvImage, pReq->u.In.cbImageWithTabs,
4757 pReq->u.In.EP.Service.apvReserved[0],
4758 pReq->u.In.EP.Service.apvReserved[1],
4759 pReq->u.In.EP.Service.apvReserved[2]));
4760 return VERR_INVALID_PARAMETER;
4761 }
4762 break;
4763
4764 default:
4765 supdrvLdrUnlock(pDevExt);
4766 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
4767 return VERR_INVALID_PARAMETER;
4768 }
4769
4770 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
4771 if (RT_FAILURE(rc))
4772 return rc;
4773 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
4774 if (RT_FAILURE(rc))
4775 return rc;
4776
4777 /*
4778 * Allocate and copy the tables.
4779 * (No need to do try/except as this is a buffered request.)
4780 */
4781 pImage->cbStrTab = pReq->u.In.cbStrTab;
4782 if (pImage->cbStrTab)
4783 {
4784 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
4785 if (pImage->pachStrTab)
4786 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
4787 else
4788 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
4789 }
4790
4791 pImage->cSymbols = pReq->u.In.cSymbols;
4792 if (RT_SUCCESS(rc) && pImage->cSymbols)
4793 {
4794 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
4795 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
4796 if (pImage->paSymbols)
4797 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
4798 else
4799 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
4800 }
4801
4802 /*
4803 * Copy the bits / complete native loading.
4804 */
4805 if (RT_SUCCESS(rc))
4806 {
4807 pImage->uState = SUP_IOCTL_LDR_LOAD;
4808 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
4809 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
4810
4811 if (pImage->fNative)
4812 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
4813 else
4814 {
4815 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
4816 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
4817 }
4818 }
4819
4820 /*
4821 * Update any entry points.
4822 */
4823 if (RT_SUCCESS(rc))
4824 {
4825 switch (pReq->u.In.eEPType)
4826 {
4827 default:
4828 case SUPLDRLOADEP_NOTHING:
4829 rc = VINF_SUCCESS;
4830 break;
4831 case SUPLDRLOADEP_VMMR0:
4832 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
4833 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
4834 break;
4835 case SUPLDRLOADEP_SERVICE:
4836 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
4837 rc = VINF_SUCCESS;
4838 break;
4839 }
4840 }
4841
4842 /*
4843 * On success call the module initialization.
4844 */
4845 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
4846 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
4847 {
4848 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
4849 pDevExt->pLdrInitImage = pImage;
4850 pDevExt->hLdrInitThread = RTThreadNativeSelf();
4851 rc = pImage->pfnModuleInit(pImage);
4852 pDevExt->pLdrInitImage = NULL;
4853 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
4854 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
4855 supdrvLdrUnsetVMMR0EPs(pDevExt);
4856 }
4857 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
4858
4859 if (RT_FAILURE(rc))
4860 {
4861 /* Inform the tracing component in case ModuleInit registered TPs. */
4862 supdrvTracerModuleUnloading(pDevExt, pImage);
4863
4864 pImage->uState = SUP_IOCTL_LDR_OPEN;
4865 pImage->pfnModuleInit = NULL;
4866 pImage->pfnModuleTerm = NULL;
4867 pImage->pfnServiceReqHandler= NULL;
4868 pImage->cbStrTab = 0;
4869 RTMemFree(pImage->pachStrTab);
4870 pImage->pachStrTab = NULL;
4871 RTMemFree(pImage->paSymbols);
4872 pImage->paSymbols = NULL;
4873 pImage->cSymbols = 0;
4874 }
4875
4876 supdrvLdrUnlock(pDevExt);
4877 return rc;
4878}
4879
4880
4881/**
4882 * Frees a previously loaded (prep'ed) image.
4883 *
4884 * @returns IPRT status code.
4885 * @param pDevExt Device globals.
4886 * @param pSession Session data.
4887 * @param pReq The request.
4888 */
4889static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
4890{
4891 int rc;
4892 PSUPDRVLDRUSAGE pUsagePrev;
4893 PSUPDRVLDRUSAGE pUsage;
4894 PSUPDRVLDRIMAGE pImage;
4895 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
4896
4897 /*
4898 * Find the ldr image.
4899 */
4900 supdrvLdrLock(pDevExt);
4901 pUsagePrev = NULL;
4902 pUsage = pSession->pLdrUsage;
4903 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4904 {
4905 pUsagePrev = pUsage;
4906 pUsage = pUsage->pNext;
4907 }
4908 if (!pUsage)
4909 {
4910 supdrvLdrUnlock(pDevExt);
4911 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
4912 return VERR_INVALID_HANDLE;
4913 }
4914
4915 /*
4916 * Check if we can remove anything.
4917 */
4918 rc = VINF_SUCCESS;
4919 pImage = pUsage->pImage;
4920 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
4921 {
4922 /*
4923 * Check if there are any objects with destructors in the image, if
4924 * so leave it for the session cleanup routine so we get a chance to
4925 * clean things up in the right order and not leave them all dangling.
4926 */
4927 RTSpinlockAcquire(pDevExt->Spinlock);
4928 if (pImage->cUsage <= 1)
4929 {
4930 PSUPDRVOBJ pObj;
4931 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
4932 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4933 {
4934 rc = VERR_DANGLING_OBJECTS;
4935 break;
4936 }
4937 }
4938 else
4939 {
4940 PSUPDRVUSAGE pGenUsage;
4941 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
4942 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4943 {
4944 rc = VERR_DANGLING_OBJECTS;
4945 break;
4946 }
4947 }
4948 RTSpinlockRelease(pDevExt->Spinlock);
4949 if (rc == VINF_SUCCESS)
4950 {
4951 /* unlink it */
4952 if (pUsagePrev)
4953 pUsagePrev->pNext = pUsage->pNext;
4954 else
4955 pSession->pLdrUsage = pUsage->pNext;
4956
4957 /* free it */
4958 pUsage->pImage = NULL;
4959 pUsage->pNext = NULL;
4960 RTMemFree(pUsage);
4961
4962 /*
4963 * Dereference the image.
4964 */
4965 if (pImage->cUsage <= 1)
4966 supdrvLdrFree(pDevExt, pImage);
4967 else
4968 pImage->cUsage--;
4969 }
4970 else
4971 {
4972 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
4973 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
4974 }
4975 }
4976 else
4977 {
4978 /*
4979 * Dereference both image and usage.
4980 */
4981 pImage->cUsage--;
4982 pUsage->cUsage--;
4983 }
4984
4985 supdrvLdrUnlock(pDevExt);
4986 return rc;
4987}
4988
4989
4990/**
4991 * Gets the address of a symbol in an open image.
4992 *
4993 * @returns IPRT status code.
4994 * @param pDevExt Device globals.
4995 * @param pSession Session data.
4996 * @param pReq The request buffer.
4997 */
4998static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
4999{
5000 PSUPDRVLDRIMAGE pImage;
5001 PSUPDRVLDRUSAGE pUsage;
5002 uint32_t i;
5003 PSUPLDRSYM paSyms;
5004 const char *pchStrings;
5005 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
5006 void *pvSymbol = NULL;
5007 int rc = VERR_GENERAL_FAILURE;
5008 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
5009
5010 /*
5011 * Find the ldr image.
5012 */
5013 supdrvLdrLock(pDevExt);
5014 pUsage = pSession->pLdrUsage;
5015 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5016 pUsage = pUsage->pNext;
5017 if (!pUsage)
5018 {
5019 supdrvLdrUnlock(pDevExt);
5020 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5021 return VERR_INVALID_HANDLE;
5022 }
5023 pImage = pUsage->pImage;
5024 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5025 {
5026 unsigned uState = pImage->uState;
5027 supdrvLdrUnlock(pDevExt);
5028 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5029 return VERR_ALREADY_LOADED;
5030 }
5031
5032 /*
5033 * Search the symbol strings.
5034 *
5035 * Note! The int32_t is for native loading on solaris where the data
5036 * and text segments are in very different places.
5037 */
5038 pchStrings = pImage->pachStrTab;
5039 paSyms = pImage->paSymbols;
5040 for (i = 0; i < pImage->cSymbols; i++)
5041 {
5042 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5043 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5044 {
5045 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5046 rc = VINF_SUCCESS;
5047 break;
5048 }
5049 }
5050 supdrvLdrUnlock(pDevExt);
5051 pReq->u.Out.pvSymbol = pvSymbol;
5052 return rc;
5053}
5054
5055
5056/**
5057 * Gets the address of a symbol in an open image or the support driver.
5058 *
5059 * @returns VINF_SUCCESS on success.
5060 * @returns
5061 * @param pDevExt Device globals.
5062 * @param pSession Session data.
5063 * @param pReq The request buffer.
5064 */
5065static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5066{
5067 int rc = VINF_SUCCESS;
5068 const char *pszSymbol = pReq->u.In.pszSymbol;
5069 const char *pszModule = pReq->u.In.pszModule;
5070 size_t cbSymbol;
5071 char const *pszEnd;
5072 uint32_t i;
5073
5074 /*
5075 * Input validation.
5076 */
5077 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5078 pszEnd = RTStrEnd(pszSymbol, 512);
5079 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5080 cbSymbol = pszEnd - pszSymbol + 1;
5081
5082 if (pszModule)
5083 {
5084 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5085 pszEnd = RTStrEnd(pszModule, 64);
5086 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5087 }
5088 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5089
5090
5091 if ( !pszModule
5092 || !strcmp(pszModule, "SupDrv"))
5093 {
5094 /*
5095 * Search the support driver export table.
5096 */
5097 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5098 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5099 {
5100 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5101 break;
5102 }
5103 }
5104 else
5105 {
5106 /*
5107 * Find the loader image.
5108 */
5109 PSUPDRVLDRIMAGE pImage;
5110
5111 supdrvLdrLock(pDevExt);
5112
5113 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5114 if (!strcmp(pImage->szName, pszModule))
5115 break;
5116 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5117 {
5118 /*
5119 * Search the symbol strings.
5120 */
5121 const char *pchStrings = pImage->pachStrTab;
5122 PCSUPLDRSYM paSyms = pImage->paSymbols;
5123 for (i = 0; i < pImage->cSymbols; i++)
5124 {
5125 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5126 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5127 {
5128 /*
5129 * Found it! Calc the symbol address and add a reference to the module.
5130 */
5131 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5132 rc = supdrvLdrAddUsage(pSession, pImage);
5133 break;
5134 }
5135 }
5136 }
5137 else
5138 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5139
5140 supdrvLdrUnlock(pDevExt);
5141 }
5142 return rc;
5143}
5144
5145
5146/**
5147 * Updates the VMMR0 entry point pointers.
5148 *
5149 * @returns IPRT status code.
5150 * @param pDevExt Device globals.
5151 * @param pSession Session data.
5152 * @param pVMMR0 VMMR0 image handle.
5153 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5154 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5155 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5156 * @remark Caller must own the loader mutex.
5157 */
5158static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5159{
5160 int rc = VINF_SUCCESS;
5161 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5162
5163
5164 /*
5165 * Check if not yet set.
5166 */
5167 if (!pDevExt->pvVMMR0)
5168 {
5169 pDevExt->pvVMMR0 = pvVMMR0;
5170 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5171 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5172 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5173 }
5174 else
5175 {
5176 /*
5177 * Return failure or success depending on whether the values match or not.
5178 */
5179 if ( pDevExt->pvVMMR0 != pvVMMR0
5180 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5181 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5182 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5183 {
5184 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5185 rc = VERR_INVALID_PARAMETER;
5186 }
5187 }
5188 return rc;
5189}
5190
5191
5192/**
5193 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5194 *
5195 * @param pDevExt Device globals.
5196 */
5197static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5198{
5199 pDevExt->pvVMMR0 = NULL;
5200 pDevExt->pfnVMMR0EntryInt = NULL;
5201 pDevExt->pfnVMMR0EntryFast = NULL;
5202 pDevExt->pfnVMMR0EntryEx = NULL;
5203}
5204
5205
5206/**
5207 * Adds a usage reference in the specified session of an image.
5208 *
5209 * Called while owning the loader semaphore.
5210 *
5211 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5212 * @param pSession Session in question.
5213 * @param pImage Image which the session is using.
5214 */
5215static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5216{
5217 PSUPDRVLDRUSAGE pUsage;
5218 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5219
5220 /*
5221 * Referenced it already?
5222 */
5223 pUsage = pSession->pLdrUsage;
5224 while (pUsage)
5225 {
5226 if (pUsage->pImage == pImage)
5227 {
5228 pUsage->cUsage++;
5229 return VINF_SUCCESS;
5230 }
5231 pUsage = pUsage->pNext;
5232 }
5233
5234 /*
5235 * Allocate new usage record.
5236 */
5237 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5238 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5239 pUsage->cUsage = 1;
5240 pUsage->pImage = pImage;
5241 pUsage->pNext = pSession->pLdrUsage;
5242 pSession->pLdrUsage = pUsage;
5243 return VINF_SUCCESS;
5244}
5245
5246
5247/**
5248 * Frees a load image.
5249 *
5250 * @param pDevExt Pointer to device extension.
5251 * @param pImage Pointer to the image we're gonna free.
5252 * This image must exit!
5253 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5254 */
5255static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5256{
5257 PSUPDRVLDRIMAGE pImagePrev;
5258 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5259
5260 /* find it - arg. should've used doubly linked list. */
5261 Assert(pDevExt->pLdrImages);
5262 pImagePrev = NULL;
5263 if (pDevExt->pLdrImages != pImage)
5264 {
5265 pImagePrev = pDevExt->pLdrImages;
5266 while (pImagePrev->pNext != pImage)
5267 pImagePrev = pImagePrev->pNext;
5268 Assert(pImagePrev->pNext == pImage);
5269 }
5270
5271 /* unlink */
5272 if (pImagePrev)
5273 pImagePrev->pNext = pImage->pNext;
5274 else
5275 pDevExt->pLdrImages = pImage->pNext;
5276
5277 /* check if this is VMMR0.r0 unset its entry point pointers. */
5278 if (pDevExt->pvVMMR0 == pImage->pvImage)
5279 supdrvLdrUnsetVMMR0EPs(pDevExt);
5280
5281 /* check for objects with destructors in this image. (Shouldn't happen.) */
5282 if (pDevExt->pObjs)
5283 {
5284 unsigned cObjs = 0;
5285 PSUPDRVOBJ pObj;
5286 RTSpinlockAcquire(pDevExt->Spinlock);
5287 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5288 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5289 {
5290 pObj->pfnDestructor = NULL;
5291 cObjs++;
5292 }
5293 RTSpinlockRelease(pDevExt->Spinlock);
5294 if (cObjs)
5295 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5296 }
5297
5298 /* call termination function if fully loaded. */
5299 if ( pImage->pfnModuleTerm
5300 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5301 {
5302 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5303 pImage->pfnModuleTerm(pImage);
5304 }
5305
5306 /* Inform the tracing component. */
5307 supdrvTracerModuleUnloading(pDevExt, pImage);
5308
5309 /* do native unload if appropriate. */
5310 if (pImage->fNative)
5311 supdrvOSLdrUnload(pDevExt, pImage);
5312
5313 /* free the image */
5314 pImage->cUsage = 0;
5315 pImage->pDevExt = NULL;
5316 pImage->pNext = NULL;
5317 pImage->uState = SUP_IOCTL_LDR_FREE;
5318 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5319 pImage->pvImageAlloc = NULL;
5320 RTMemFree(pImage->pachStrTab);
5321 pImage->pachStrTab = NULL;
5322 RTMemFree(pImage->paSymbols);
5323 pImage->paSymbols = NULL;
5324 RTMemFree(pImage);
5325}
5326
5327
5328/**
5329 * Acquires the loader lock.
5330 *
5331 * @returns IPRT status code.
5332 * @param pDevExt The device extension.
5333 */
5334DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5335{
5336#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5337 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5338#else
5339 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5340#endif
5341 AssertRC(rc);
5342 return rc;
5343}
5344
5345
5346/**
5347 * Releases the loader lock.
5348 *
5349 * @returns IPRT status code.
5350 * @param pDevExt The device extension.
5351 */
5352DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5353{
5354#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5355 return RTSemMutexRelease(pDevExt->mtxLdr);
5356#else
5357 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5358#endif
5359}
5360
5361
5362/**
5363 * Implements the service call request.
5364 *
5365 * @returns VBox status code.
5366 * @param pDevExt The device extension.
5367 * @param pSession The calling session.
5368 * @param pReq The request packet, valid.
5369 */
5370static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5371{
5372#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5373 int rc;
5374
5375 /*
5376 * Find the module first in the module referenced by the calling session.
5377 */
5378 rc = supdrvLdrLock(pDevExt);
5379 if (RT_SUCCESS(rc))
5380 {
5381 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5382 PSUPDRVLDRUSAGE pUsage;
5383
5384 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5385 if ( pUsage->pImage->pfnServiceReqHandler
5386 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5387 {
5388 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5389 break;
5390 }
5391 supdrvLdrUnlock(pDevExt);
5392
5393 if (pfnServiceReqHandler)
5394 {
5395 /*
5396 * Call it.
5397 */
5398 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5399 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5400 else
5401 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5402 }
5403 else
5404 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5405 }
5406
5407 /* log it */
5408 if ( RT_FAILURE(rc)
5409 && rc != VERR_INTERRUPTED
5410 && rc != VERR_TIMEOUT)
5411 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5412 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5413 else
5414 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5415 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5416 return rc;
5417#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5418 return VERR_NOT_IMPLEMENTED;
5419#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5420}
5421
5422
5423/**
5424 * Implements the logger settings request.
5425 *
5426 * @returns VBox status code.
5427 * @param pDevExt The device extension.
5428 * @param pSession The caller's session.
5429 * @param pReq The request.
5430 */
5431static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5432{
5433 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5434 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5435 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5436 PRTLOGGER pLogger = NULL;
5437 int rc;
5438
5439 /*
5440 * Some further validation.
5441 */
5442 switch (pReq->u.In.fWhat)
5443 {
5444 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5445 case SUPLOGGERSETTINGS_WHAT_CREATE:
5446 break;
5447
5448 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5449 if (*pszGroup || *pszFlags || *pszDest)
5450 return VERR_INVALID_PARAMETER;
5451 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5452 return VERR_ACCESS_DENIED;
5453 break;
5454
5455 default:
5456 return VERR_INTERNAL_ERROR;
5457 }
5458
5459 /*
5460 * Get the logger.
5461 */
5462 switch (pReq->u.In.fWhich)
5463 {
5464 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5465 pLogger = RTLogGetDefaultInstance();
5466 break;
5467
5468 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5469 pLogger = RTLogRelDefaultInstance();
5470 break;
5471
5472 default:
5473 return VERR_INTERNAL_ERROR;
5474 }
5475
5476 /*
5477 * Do the job.
5478 */
5479 switch (pReq->u.In.fWhat)
5480 {
5481 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5482 if (pLogger)
5483 {
5484 rc = RTLogFlags(pLogger, pszFlags);
5485 if (RT_SUCCESS(rc))
5486 rc = RTLogGroupSettings(pLogger, pszGroup);
5487 NOREF(pszDest);
5488 }
5489 else
5490 rc = VERR_NOT_FOUND;
5491 break;
5492
5493 case SUPLOGGERSETTINGS_WHAT_CREATE:
5494 {
5495 if (pLogger)
5496 rc = VERR_ALREADY_EXISTS;
5497 else
5498 {
5499 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5500
5501 rc = RTLogCreate(&pLogger,
5502 0 /* fFlags */,
5503 pszGroup,
5504 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5505 ? "VBOX_LOG"
5506 : "VBOX_RELEASE_LOG",
5507 RT_ELEMENTS(s_apszGroups),
5508 s_apszGroups,
5509 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5510 NULL);
5511 if (RT_SUCCESS(rc))
5512 {
5513 rc = RTLogFlags(pLogger, pszFlags);
5514 NOREF(pszDest);
5515 if (RT_SUCCESS(rc))
5516 {
5517 switch (pReq->u.In.fWhich)
5518 {
5519 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5520 pLogger = RTLogSetDefaultInstance(pLogger);
5521 break;
5522 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5523 pLogger = RTLogRelSetDefaultInstance(pLogger);
5524 break;
5525 }
5526 }
5527 RTLogDestroy(pLogger);
5528 }
5529 }
5530 break;
5531 }
5532
5533 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5534 switch (pReq->u.In.fWhich)
5535 {
5536 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5537 pLogger = RTLogSetDefaultInstance(NULL);
5538 break;
5539 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5540 pLogger = RTLogRelSetDefaultInstance(NULL);
5541 break;
5542 }
5543 rc = RTLogDestroy(pLogger);
5544 break;
5545
5546 default:
5547 {
5548 rc = VERR_INTERNAL_ERROR;
5549 break;
5550 }
5551 }
5552
5553 return rc;
5554}
5555
5556
5557/**
5558 * Implements the MSR prober operations.
5559 *
5560 * @returns VBox status code.
5561 * @param pDevExt The device extension.
5562 * @param pReq The request.
5563 */
5564static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5565{
5566#ifdef SUPDRV_WITH_MSR_PROBER
5567 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5568 int rc;
5569
5570 switch (pReq->u.In.enmOp)
5571 {
5572 case SUPMSRPROBEROP_READ:
5573 {
5574 uint64_t uValue;
5575 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5576 if (RT_SUCCESS(rc))
5577 {
5578 pReq->u.Out.uResults.Read.uValue = uValue;
5579 pReq->u.Out.uResults.Read.fGp = false;
5580 }
5581 else if (rc == VERR_ACCESS_DENIED)
5582 {
5583 pReq->u.Out.uResults.Read.uValue = 0;
5584 pReq->u.Out.uResults.Read.fGp = true;
5585 rc = VINF_SUCCESS;
5586 }
5587 break;
5588 }
5589
5590 case SUPMSRPROBEROP_WRITE:
5591 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5592 if (RT_SUCCESS(rc))
5593 pReq->u.Out.uResults.Write.fGp = false;
5594 else if (rc == VERR_ACCESS_DENIED)
5595 {
5596 pReq->u.Out.uResults.Write.fGp = true;
5597 rc = VINF_SUCCESS;
5598 }
5599 break;
5600
5601 case SUPMSRPROBEROP_MODIFY:
5602 case SUPMSRPROBEROP_MODIFY_FASTER:
5603 rc = supdrvOSMsrProberModify(idCpu, pReq);
5604 break;
5605
5606 default:
5607 return VERR_INVALID_FUNCTION;
5608 }
5609 return rc;
5610#else
5611 return VERR_NOT_IMPLEMENTED;
5612#endif
5613}
5614
5615
5616/**
5617 * Returns whether the host CPU sports an invariant TSC or not.
5618 *
5619 * @returns true if invariant TSC is supported, false otherwise.
5620 */
5621static bool supdrvIsInvariantTsc(void)
5622{
5623 static bool s_fQueried = false;
5624 static bool s_fIsInvariantTsc = false;
5625 if (!s_fQueried)
5626 {
5627 if (ASMHasCpuId())
5628 {
5629 uint32_t uEax, uEbx, uEcx, uEdx;
5630 ASMCpuId(0x80000000, &uEax, &uEbx, &uEcx, &uEdx);
5631 if (uEax >= 0x80000007)
5632 {
5633 ASMCpuId(0x80000007, &uEax, &uEbx, &uEcx, &uEdx);
5634 if (uEdx & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
5635 s_fIsInvariantTsc = true;
5636 }
5637 }
5638 s_fQueried = true;
5639 }
5640
5641 return s_fIsInvariantTsc;
5642}
5643
5644
5645#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5646/**
5647 * Switches the TSC-delta measurement thread into the butchered state.
5648 *
5649 * @returns VBox status code.
5650 * @param pDevExt Pointer to the device instance data.
5651 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5652 * @param pszFailed An error message to log.
5653 * @param rcFailed The error code to exit the thread with.
5654 */
5655static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5656{
5657 if (!fSpinlockHeld)
5658 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5659
5660 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Butchered;
5661 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5662 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5663 return rcFailed;
5664}
5665
5666
5667/**
5668 * The TSC-delta measurement thread.
5669 *
5670 * @returns VBox status code.
5671 * @param hThread The thread handle.
5672 * @param pvUser Opaque pointer to the device instance data.
5673 */
5674static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
5675{
5676 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
5677 static uint32_t cTimesMeasured = 0;
5678 uint32_t cConsecutiveTimeouts = 0;
5679 int rc = VERR_INTERNAL_ERROR_2;
5680 for (;;)
5681 {
5682 /*
5683 * Switch on the current state.
5684 */
5685 SUPDRVTSCDELTASTATE enmState;
5686 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5687 enmState = pDevExt->enmTscDeltaState;
5688 switch (enmState)
5689 {
5690 case kSupDrvTscDeltaState_Creating:
5691 {
5692 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5693 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5694 if (RT_FAILURE(rc))
5695 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5696 /* fall thru */
5697 }
5698
5699 case kSupDrvTscDeltaState_Listening:
5700 {
5701 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5702
5703 /* Simple adaptive timeout. */
5704 if (cConsecutiveTimeouts++ == 10)
5705 {
5706 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
5707 pDevExt->cMsTscDeltaTimeout = 10;
5708 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
5709 pDevExt->cMsTscDeltaTimeout = 100;
5710 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
5711 pDevExt->cMsTscDeltaTimeout = 500;
5712 cConsecutiveTimeouts = 0;
5713 }
5714 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
5715 if ( RT_FAILURE(rc)
5716 && rc != VERR_TIMEOUT)
5717 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
5718 break;
5719 }
5720
5721 case kSupDrvTscDeltaState_WaitAndMeasure:
5722 {
5723 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Measuring;
5724 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5725 if (RT_FAILURE(rc))
5726 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5727 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5728 pDevExt->cMsTscDeltaTimeout = 1;
5729 RTThreadSleep(10);
5730 /* fall thru */
5731 }
5732
5733 case kSupDrvTscDeltaState_Measuring:
5734 {
5735 cConsecutiveTimeouts = 0;
5736 if (!cTimesMeasured++)
5737 {
5738 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5739 RTCpuSetCopy(&pDevExt->TscDeltaObtainedCpuSet, &pDevExt->pGip->OnlineCpuSet);
5740 }
5741 else
5742 {
5743 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5744 unsigned iCpu;
5745
5746 if (cTimesMeasured == UINT32_MAX)
5747 cTimesMeasured = 1;
5748
5749 /* Measure TSC-deltas only for the CPUs that are in the set. */
5750 rc = VINF_SUCCESS;
5751 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5752 {
5753 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
5754 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
5755 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
5756 {
5757 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
5758 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
5759 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
5760 RTCpuSetAdd(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->idCpu);
5761 }
5762 }
5763 }
5764 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5765 if (pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
5766 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5767 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5768 pDevExt->rcTscDelta = rc;
5769 break;
5770 }
5771
5772 case kSupDrvTscDeltaState_Terminating:
5773 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5774 return VINF_SUCCESS;
5775
5776 case kSupDrvTscDeltaState_Butchered:
5777 default:
5778 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
5779 }
5780 }
5781
5782 return rc;
5783}
5784
5785
5786/**
5787 * Waits for the TSC-delta measurement thread to respond to a state change.
5788 *
5789 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
5790 * other error code on internal error.
5791 *
5792 * @param pThis Pointer to the grant service instance data.
5793 * @param enmCurState The current state.
5794 * @param enmNewState The new state we're waiting for it to enter.
5795 */
5796static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTASTATE enmCurState, SUPDRVTSCDELTASTATE enmNewState)
5797{
5798 /*
5799 * Wait a short while for the expected state transition.
5800 */
5801 int rc;
5802 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
5803 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5804 if (pDevExt->enmTscDeltaState == enmNewState)
5805 {
5806 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5807 rc = VINF_SUCCESS;
5808 }
5809 else if (pDevExt->enmTscDeltaState == enmCurState)
5810 {
5811 /*
5812 * Wait longer if the state has not yet transitioned to the one we want.
5813 */
5814 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5815 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
5816 if ( RT_SUCCESS(rc)
5817 || rc == VERR_TIMEOUT)
5818 {
5819 /*
5820 * Check the state whether we've succeeded.
5821 */
5822 SUPDRVTSCDELTASTATE enmState;
5823 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5824 enmState = pDevExt->enmTscDeltaState;
5825 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5826 if (enmState == enmNewState)
5827 rc = VINF_SUCCESS;
5828 else if (enmState == enmCurState)
5829 {
5830 rc = VERR_TIMEOUT;
5831 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
5832 enmNewState));
5833 }
5834 else
5835 {
5836 rc = VERR_INTERNAL_ERROR;
5837 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
5838 enmState, enmNewState));
5839 }
5840 }
5841 else
5842 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
5843 }
5844 else
5845 {
5846 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5847 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
5848 rc = VERR_INTERNAL_ERROR;
5849 }
5850
5851 return rc;
5852}
5853
5854
5855/**
5856 * Terminates the TSC-delta measurement thread.
5857 *
5858 * @param pDevExt Pointer to the device instance data.
5859 */
5860static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
5861{
5862 int rc;
5863 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5864 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Terminating;
5865 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5866 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5867 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
5868 if (RT_FAILURE(rc))
5869 {
5870 /* Signal a few more times before giving up. */
5871 int cTriesLeft = 5;
5872 while (--cTriesLeft > 0)
5873 {
5874 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5875 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
5876 if (rc != VERR_TIMEOUT)
5877 break;
5878 }
5879 }
5880}
5881
5882
5883/**
5884 * Initializes and spawns the TSC-delta measurement thread.
5885 *
5886 * A thread is required for servicing re-measurement requests from events like
5887 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
5888 * under all contexts on all OSs.
5889 *
5890 * @returns VBox status code.
5891 * @param pDevExt Pointer to the device instance data.
5892 *
5893 * @remarks Must only be called -after- initializing GIP and setting up MP
5894 * notifications!
5895 */
5896static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
5897{
5898 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
5899
5900 int rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
5901 if (RT_SUCCESS(rc))
5902 {
5903 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
5904 if (RT_SUCCESS(rc))
5905 {
5906 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Creating;
5907 pDevExt->cMsTscDeltaTimeout = 1;
5908 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
5909 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
5910 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
5911 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
5912 if (RT_SUCCESS(rc))
5913 {
5914 rc = supdrvTscDeltaThreadWait(pDevExt, kSupDrvTscDeltaState_Creating, kSupDrvTscDeltaState_Listening);
5915 if (RT_SUCCESS(rc))
5916 {
5917 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5918 return rc;
5919 }
5920
5921 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
5922 supdrvTscDeltaThreadTerminate(pDevExt);
5923 }
5924 else
5925 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
5926 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5927 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5928 }
5929 else
5930 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
5931 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5932 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5933 }
5934 else
5935 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
5936
5937 return rc;
5938}
5939
5940
5941/**
5942 * Terminates the TSC-delta measurement thread and cleanup.
5943 *
5944 * @param pDevExt Pointer to the device instance data.
5945 */
5946static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
5947{
5948 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
5949 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5950 {
5951 supdrvTscDeltaThreadTerminate(pDevExt);
5952 }
5953
5954 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
5955 {
5956 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5957 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5958 }
5959
5960 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5961 {
5962 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5963 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5964 }
5965
5966 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5967}
5968
5969
5970/**
5971 * Waits for TSC-delta measurements to be completed for all online CPUs.
5972 *
5973 * @returns VBox status code.
5974 * @param pDevExt Pointer to the device instance data.
5975 */
5976static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
5977{
5978 int cTriesLeft = 5;
5979 int cMsTotalWait;
5980 int cMsWaited = 0;
5981 int cMsWaitGranularity = 1;
5982
5983 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5984 AssertReturn(pGip, VERR_INVALID_POINTER);
5985
5986 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 2, 150);
5987 while (cTriesLeft-- > 0)
5988 {
5989 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
5990 return VINF_SUCCESS;
5991 RTThreadSleep(cMsWaitGranularity);
5992 cMsWaited += cMsWaitGranularity;
5993 if (cMsWaited >= cMsTotalWait)
5994 break;
5995 }
5996
5997 return VERR_TIMEOUT;
5998}
5999#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
6000
6001
6002/**
6003 * Measures the TSC frequency of the system.
6004 *
6005 * Uses a busy-wait method for the async. case as it is intended to help push
6006 * the CPU frequency up, while for the invariant cases using a sleeping method.
6007 *
6008 * The TSC frequency can vary on systems which are not reported as invariant.
6009 * On such systems the object of this function is to find out what the nominal,
6010 * maximum TSC frequency under 'normal' CPU operation.
6011 *
6012 * @returns VBox status code.
6013 * @param pDevExt Pointer to the device instance.
6014 *
6015 * @remarks Must be called only -after- measuring the TSC deltas.
6016 */
6017static int supdrvGipMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
6018{
6019 int cTriesLeft = 4;
6020 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6021
6022 /* Assert order. */
6023 AssertReturn(pGip, VERR_INVALID_PARAMETER);
6024 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
6025
6026 while (cTriesLeft-- > 0)
6027 {
6028 RTCCUINTREG uFlags;
6029 uint64_t u64NanoTsBefore;
6030 uint64_t u64NanoTsAfter;
6031 uint64_t u64TscBefore;
6032 uint64_t u64TscAfter;
6033 uint8_t idApicBefore;
6034 uint8_t idApicAfter;
6035
6036 /*
6037 * Synchronize with the host OS clock tick before reading the TSC.
6038 * Especially important on older Windows version where the granularity is terrible.
6039 */
6040 u64NanoTsBefore = RTTimeSystemNanoTS();
6041 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
6042 ASMNopPause();
6043
6044 uFlags = ASMIntDisableFlags();
6045 idApicBefore = ASMGetApicId();
6046 u64TscBefore = ASMReadTSC();
6047 u64NanoTsBefore = RTTimeSystemNanoTS();
6048 ASMSetFlags(uFlags);
6049
6050 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6051 {
6052 /*
6053 * Sleep-wait since the TSC frequency is constant, it eases host load.
6054 * Shorter interval produces more variance in the frequency (esp. Windows).
6055 */
6056 RTThreadSleep(200);
6057 u64NanoTsAfter = RTTimeSystemNanoTS();
6058 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
6059 ASMNopPause();
6060 u64NanoTsAfter = RTTimeSystemNanoTS();
6061 }
6062 else
6063 {
6064 /* Busy-wait keeping the frequency up and measure. */
6065 for (;;)
6066 {
6067 u64NanoTsAfter = RTTimeSystemNanoTS();
6068 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
6069 ASMNopPause();
6070 else
6071 break;
6072 }
6073 }
6074
6075 uFlags = ASMIntDisableFlags();
6076 idApicAfter = ASMGetApicId();
6077 u64TscAfter = ASMReadTSC();
6078 ASMSetFlags(uFlags);
6079
6080 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6081 {
6082 int rc;
6083 bool fAppliedBefore;
6084 bool fAppliedAfter;
6085 rc = SUPTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
6086 rc = SUPTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
6087
6088 if ( !fAppliedBefore
6089 || !fAppliedAfter)
6090 {
6091#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6092 /*
6093 * The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
6094 * Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
6095 * and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
6096 * proceed. This should be triggered just once if we're rather unlucky.
6097 */
6098 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6099 if (rc == VERR_TIMEOUT)
6100 {
6101 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
6102 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6103 }
6104#else
6105 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
6106 idApicBefore, idApicAfter, cTriesLeft);
6107#endif
6108 continue;
6109 }
6110 }
6111
6112 /*
6113 * Update GIP.
6114 */
6115 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
6116 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6117 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6118 return VINF_SUCCESS;
6119 }
6120
6121 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6122}
6123
6124
6125/**
6126 * Timer callback function for TSC frequency refinement in invariant GIP mode.
6127 *
6128 * @param pTimer The timer.
6129 * @param pvUser Opaque pointer to the device instance data.
6130 * @param iTick The timer tick.
6131 */
6132static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6133{
6134 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6135 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6136 bool fDeltaApplied = false;
6137 uint8_t idApic;
6138 uint64_t u64DeltaNanoTS;
6139 uint64_t u64DeltaTsc;
6140 uint64_t u64NanoTS;
6141 uint64_t u64Tsc;
6142 RTCCUINTREG uFlags;
6143
6144 /* Paranoia. */
6145 Assert(pGip);
6146 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
6147
6148#if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
6149 u64NanoTS = RTTimeSystemNanoTS();
6150 while (RTTimeSystemNanoTS() == u64NanoTS)
6151 ASMNopPause();
6152#endif
6153 uFlags = ASMIntDisableFlags();
6154 idApic = ASMGetApicId();
6155 u64Tsc = ASMReadTSC();
6156 u64NanoTS = RTTimeSystemNanoTS();
6157 ASMSetFlags(uFlags);
6158 SUPTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
6159 u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
6160 u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
6161
6162 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6163 && !fDeltaApplied)
6164 {
6165 SUPR0Printf("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
6166 GIP_TSC_REFINE_INTERVAL);
6167 return;
6168 }
6169
6170 /* Calculate the TSC frequency. */
6171 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
6172 && u64DeltaNanoTS < UINT32_MAX)
6173 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS);
6174 else
6175 {
6176#if 1
6177 RTUINT128U CpuHz, Tmp, Divisor;
6178 CpuHz.s.Lo = CpuHz.s.Hi = 0;
6179 RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64);
6180 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS));
6181 pGip->u64CpuHz = CpuHz.s.Lo;
6182#else
6183 /** @todo remove later */
6184 /* Try not to lose precision, the larger the interval the more likely we overflow. */
6185 if ( u64DeltaTsc < UINT64_MAX / RT_NS_100MS
6186 && u64DeltaNanoTS / 10 < UINT32_MAX)
6187 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_100MS, (uint32_t)(u64DeltaNanoTS / 10));
6188 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_10MS
6189 && u64DeltaNanoTS / 100 < UINT32_MAX)
6190 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_10MS, (uint32_t)(u64DeltaNanoTS / 100));
6191 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_1MS
6192 && u64DeltaNanoTS / 1000 < UINT32_MAX)
6193 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1MS, (uint32_t)(u64DeltaNanoTS / 1000));
6194 else /* Screw it. */
6195 pGip->u64CpuHz = u64DeltaTsc / (u64DeltaNanoTS / RT_NS_1SEC_64);
6196#endif
6197 }
6198
6199 /* Update rest of GIP. */
6200 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
6201 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6202}
6203
6204
6205/**
6206 * Starts the TSC-frequency refinement phase asynchronously.
6207 *
6208 * @param pDevExt Pointer to the device instance data.
6209 */
6210static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt)
6211{
6212 uint64_t u64NanoTS;
6213 RTCCUINTREG uFlags;
6214 uint8_t idApic;
6215 int rc;
6216 bool fDeltaApplied = false;
6217 PSUPGLOBALINFOPAGE pGip;
6218
6219 /* Validate. */
6220 Assert(pDevExt);
6221 Assert(pDevExt->pGip);
6222
6223 pGip = pDevExt->pGip;
6224 u64NanoTS = RTTimeSystemNanoTS();
6225 while (RTTimeSystemNanoTS() == u64NanoTS)
6226 ASMNopPause();
6227 uFlags = ASMIntDisableFlags();
6228 idApic = ASMGetApicId();
6229 pDevExt->u64TscAnchor = ASMReadTSC();
6230 pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
6231 ASMSetFlags(uFlags);
6232 SUPTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, &fDeltaApplied);
6233
6234#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6235 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6236 && !fDeltaApplied)
6237 {
6238 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6239 if (rc == VERR_TIMEOUT)
6240 {
6241 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
6242 return;
6243 }
6244 }
6245#endif
6246
6247 rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pDevExt);
6248 if (RT_SUCCESS(rc))
6249 {
6250 /*
6251 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
6252 * interval as small as possible while gaining the most consistent and accurate frequency
6253 * (compared to what the host OS might have measured).
6254 *
6255 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
6256 * same TSC frequency whenever possible so we need to keep the interval short.
6257 */
6258 rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
6259 AssertRC(rc);
6260 }
6261 else
6262 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
6263}
6264
6265
6266/**
6267 * Creates the GIP.
6268 *
6269 * @returns VBox status code.
6270 * @param pDevExt Instance data. GIP stuff may be updated.
6271 */
6272static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6273{
6274 PSUPGLOBALINFOPAGE pGip;
6275 RTHCPHYS HCPhysGip;
6276 uint32_t u32SystemResolution;
6277 uint32_t u32Interval;
6278 uint32_t u32MinInterval;
6279 uint32_t uMod;
6280 unsigned cCpus;
6281 int rc;
6282
6283 LogFlow(("supdrvGipCreate:\n"));
6284
6285 /* Assert order. */
6286 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6287 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6288 Assert(!pDevExt->pGipTimer);
6289
6290 /*
6291 * Check the CPU count.
6292 */
6293 cCpus = RTMpGetArraySize();
6294 if ( cCpus > RTCPUSET_MAX_CPUS
6295 || cCpus > 256 /* ApicId is used for the mappings */)
6296 {
6297 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6298 return VERR_TOO_MANY_CPUS;
6299 }
6300
6301 /*
6302 * Allocate a contiguous set of pages with a default kernel mapping.
6303 */
6304 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6305 if (RT_FAILURE(rc))
6306 {
6307 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6308 return rc;
6309 }
6310 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6311 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6312
6313 /*
6314 * Allocate the TSC-delta sync struct on a separate cache line.
6315 */
6316 pDevExt->pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
6317 pDevExt->pTscDeltaSync = RT_ALIGN_PT(pDevExt->pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
6318 Assert(RT_ALIGN_PT(pDevExt->pTscDeltaSync, 64, PSUPTSCDELTASYNC) == pDevExt->pTscDeltaSync);
6319
6320 /*
6321 * Find a reasonable update interval and initialize the structure.
6322 */
6323 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
6324 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6325 * See @bugref{6710}. */
6326 u32MinInterval = RT_NS_10MS;
6327 u32SystemResolution = RTTimerGetSystemGranularity();
6328 u32Interval = u32MinInterval;
6329 uMod = u32MinInterval % u32SystemResolution;
6330 if (uMod)
6331 u32Interval += u32SystemResolution - uMod;
6332
6333 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6334
6335 if (RT_UNLIKELY( pDevExt->fOsTscDeltasInSync
6336 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
6337 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
6338 {
6339 /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
6340 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
6341 return VERR_INTERNAL_ERROR_2;
6342 }
6343
6344#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6345 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6346 {
6347 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6348 rc = supdrvTscDeltaThreadInit(pDevExt);
6349 }
6350#endif
6351 if (RT_SUCCESS(rc))
6352 {
6353 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6354 if (RT_SUCCESS(rc))
6355 {
6356 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6357 if (RT_SUCCESS(rc))
6358 {
6359 uint16_t iCpu;
6360#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6361 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6362 {
6363 /*
6364 * Measure the TSC deltas now that we have MP notifications.
6365 */
6366 int cTries = 5;
6367 do
6368 {
6369 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6370 if (rc != VERR_TRY_AGAIN)
6371 break;
6372 } while (--cTries > 0);
6373 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6374 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6375 }
6376 else
6377 {
6378#if 0 /** @todo Hitting this on mac pro runing maverics. panicing on driver load is annoying.*/
6379 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6380 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("i=%u iCpu=%u %lld mode=%d\n", i, iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->enmMode);
6381#endif
6382 }
6383#endif
6384 if (RT_SUCCESS(rc))
6385 {
6386 rc = supdrvGipMeasureTscFreq(pDevExt);
6387 if (RT_SUCCESS(rc))
6388 {
6389 /*
6390 * Create the timer.
6391 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6392 */
6393 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6394 {
6395 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
6396 pDevExt);
6397 if (rc == VERR_NOT_SUPPORTED)
6398 {
6399 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6400 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6401 }
6402 }
6403 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6404 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */, supdrvGipSyncTimer, pDevExt);
6405 if (RT_SUCCESS(rc))
6406 {
6407 /*
6408 * We're good.
6409 */
6410 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6411 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6412
6413 g_pSUPGlobalInfoPage = pGip;
6414 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6415 supdrvRefineTscFreq(pDevExt);
6416 return VINF_SUCCESS;
6417 }
6418
6419 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6420 Assert(!pDevExt->pGipTimer);
6421 }
6422 else
6423 OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
6424 }
6425 else
6426 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureTscDeltas failed. rc=%Rrc\n", rc));
6427 }
6428 else
6429 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6430 }
6431 else
6432 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6433 }
6434 else
6435 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6436
6437 supdrvGipDestroy(pDevExt); /* Releases timer frequency increase too. */
6438 return rc;
6439}
6440
6441
6442/**
6443 * Terminates the GIP.
6444 *
6445 * @param pDevExt Instance data. GIP stuff may be updated.
6446 */
6447static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6448{
6449 int rc;
6450#ifdef DEBUG_DARWIN_GIP
6451 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6452 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6453 pDevExt->pGipTimer, pDevExt->GipMemObj));
6454#endif
6455
6456 /*
6457 * Stop receiving MP notifications before tearing anything else down.
6458 */
6459 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6460
6461#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6462 /*
6463 * Terminate the TSC-delta measurement thread and resources.
6464 */
6465 supdrvTscDeltaTerm(pDevExt);
6466#endif
6467
6468 /*
6469 * Destroy the TSC-refinement one-shot timer.
6470 */
6471 if (pDevExt->pTscRefineTimer)
6472 {
6473 RTTimerDestroy(pDevExt->pTscRefineTimer);
6474 pDevExt->pTscRefineTimer = NULL;
6475 }
6476
6477 if (pDevExt->pvTscDeltaSync)
6478 {
6479 RTMemFree(pDevExt->pvTscDeltaSync);
6480 pDevExt->pTscDeltaSync = NULL;
6481 pDevExt->pvTscDeltaSync = NULL;
6482 }
6483
6484 /*
6485 * Invalid the GIP data.
6486 */
6487 if (pDevExt->pGip)
6488 {
6489 supdrvGipTerm(pDevExt->pGip);
6490 pDevExt->pGip = NULL;
6491 }
6492 g_pSUPGlobalInfoPage = NULL;
6493
6494 /*
6495 * Destroy the timer and free the GIP memory object.
6496 */
6497 if (pDevExt->pGipTimer)
6498 {
6499 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6500 pDevExt->pGipTimer = NULL;
6501 }
6502
6503 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6504 {
6505 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6506 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6507 }
6508
6509 /*
6510 * Finally, make sure we've release the system timer resolution request
6511 * if one actually succeeded and is still pending.
6512 */
6513 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6514}
6515
6516
6517/**
6518 * Timer callback function sync GIP mode.
6519 * @param pTimer The timer.
6520 * @param pvUser Opaque pointer to the device extension.
6521 * @param iTick The timer tick.
6522 */
6523static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6524{
6525 RTCCUINTREG uFlags;
6526 uint64_t u64TSC;
6527 uint64_t u64NanoTS;
6528 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6529 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6530
6531 uFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6532 u64TSC = ASMReadTSC();
6533 u64NanoTS = RTTimeSystemNanoTS();
6534
6535 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6536 {
6537 /*
6538 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6539 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6540 * affected a bit until we get proper TSC deltas than implementing options like
6541 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6542 *
6543 * The likely hood of this happening is really low. On Windows, Linux timers
6544 * fire on the CPU they were registered/started on. Darwin, Solaris need verification.
6545 */
6546 Assert(!ASMIntAreEnabled());
6547 SUPTscDeltaApply(pGip, &u64TSC, ASMGetApicId(), NULL /* pfDeltaApplied */);
6548 }
6549
6550 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
6551
6552 ASMSetFlags(uFlags);
6553}
6554
6555
6556/**
6557 * Timer callback function for async GIP mode.
6558 * @param pTimer The timer.
6559 * @param pvUser Opaque pointer to the device extension.
6560 * @param iTick The timer tick.
6561 */
6562static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6563{
6564 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6565 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6566 RTCPUID idCpu = RTMpCpuId();
6567 uint64_t u64TSC = ASMReadTSC();
6568 uint64_t NanoTS = RTTimeSystemNanoTS();
6569
6570 /** @todo reset the transaction number and whatnot when iTick == 1. */
6571 if (pDevExt->idGipMaster == idCpu)
6572 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6573 else
6574 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6575
6576 ASMSetFlags(fOldFlags);
6577}
6578
6579
6580/**
6581 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6582 *
6583 * @returns Index of the CPU in the cache set.
6584 * @param pGip The GIP.
6585 * @param idCpu The CPU ID.
6586 */
6587static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6588{
6589 uint32_t i, cTries;
6590
6591 /*
6592 * ASSUMES that CPU IDs are constant.
6593 */
6594 for (i = 0; i < pGip->cCpus; i++)
6595 if (pGip->aCPUs[i].idCpu == idCpu)
6596 return i;
6597
6598 cTries = 0;
6599 do
6600 {
6601 for (i = 0; i < pGip->cCpus; i++)
6602 {
6603 bool fRc;
6604 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6605 if (fRc)
6606 return i;
6607 }
6608 } while (cTries++ < 32);
6609 AssertReleaseFailed();
6610 return i - 1;
6611}
6612
6613
6614/**
6615 * The calling CPU should be accounted as online, update GIP accordingly.
6616 *
6617 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
6618 *
6619 * @param pDevExt The device extension.
6620 * @param idCpu The CPU ID.
6621 */
6622static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6623{
6624 int iCpuSet = 0;
6625 uint16_t idApic = UINT16_MAX;
6626 uint32_t i = 0;
6627 uint64_t u64NanoTS = 0;
6628 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6629
6630 AssertPtrReturnVoid(pGip);
6631 AssertRelease(idCpu == RTMpCpuId());
6632 Assert(pGip->cPossibleCpus == RTMpGetCount());
6633
6634 /*
6635 * Do this behind a spinlock with interrupts disabled as this can fire
6636 * on all CPUs simultaneously, see @bugref{6110}.
6637 */
6638 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6639
6640 /*
6641 * Update the globals.
6642 */
6643 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
6644 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
6645 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6646 if (iCpuSet >= 0)
6647 {
6648 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6649 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
6650 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
6651 }
6652
6653 /*
6654 * Update the entry.
6655 */
6656 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
6657 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
6658 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
6659 idApic = ASMGetApicId();
6660 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
6661 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
6662 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
6663
6664 /*
6665 * Update the APIC ID and CPU set index mappings.
6666 */
6667 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
6668 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
6669
6670 /* Update the Mp online/offline counter. */
6671 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
6672
6673#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6674 /*
6675 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
6676 *
6677 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
6678 * update the state and it'll get serviced when the thread's listening interval times out.
6679 */
6680 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6681 {
6682 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6683 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
6684 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
6685 {
6686 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
6687 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
6688 }
6689 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6690 }
6691#endif
6692
6693 /* commit it */
6694 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
6695
6696 RTSpinlockRelease(pDevExt->hGipSpinlock);
6697}
6698
6699
6700/**
6701 * The CPU should be accounted as offline, update the GIP accordingly.
6702 *
6703 * This is used by supdrvGipMpEvent.
6704 *
6705 * @param pDevExt The device extension.
6706 * @param idCpu The CPU ID.
6707 */
6708static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6709{
6710 int iCpuSet;
6711 unsigned i;
6712
6713 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6714
6715 AssertPtrReturnVoid(pGip);
6716 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6717
6718 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6719 AssertReturnVoid(iCpuSet >= 0);
6720
6721 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
6722 AssertReturnVoid(i < pGip->cCpus);
6723 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
6724
6725 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6726 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
6727
6728 /* Update the Mp online/offline counter. */
6729 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
6730
6731 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
6732 if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
6733 {
6734 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6735 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
6736 }
6737
6738 /* Reset the TSC delta, we will recalculate it lazily. */
6739 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6740 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
6741
6742#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6743 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
6744 if (supdrvIsInvariantTsc())
6745 RTCpuSetDel(&pDevExt->TscDeltaObtainedCpuSet, idCpu);
6746#endif
6747
6748 /* commit it */
6749 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
6750
6751 RTSpinlockRelease(pDevExt->hGipSpinlock);
6752}
6753
6754
6755/**
6756 * Multiprocessor event notification callback.
6757 *
6758 * This is used to make sure that the GIP master gets passed on to
6759 * another CPU. It also updates the associated CPU data.
6760 *
6761 * @param enmEvent The event.
6762 * @param idCpu The cpu it applies to.
6763 * @param pvUser Pointer to the device extension.
6764 *
6765 * @remarks This function -must- fire on the newly online'd CPU for the
6766 * RTMPEVENT_ONLINE case and can fire on any CPU for the
6767 * RTMPEVENT_OFFLINE case.
6768 */
6769static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
6770{
6771 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6772 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6773
6774 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6775
6776 /*
6777 * Update the GIP CPU data.
6778 */
6779 if (pGip)
6780 {
6781 switch (enmEvent)
6782 {
6783 case RTMPEVENT_ONLINE:
6784 AssertRelease(idCpu == RTMpCpuId());
6785 supdrvGipMpEventOnline(pDevExt, idCpu);
6786 break;
6787 case RTMPEVENT_OFFLINE:
6788 supdrvGipMpEventOffline(pDevExt, idCpu);
6789 break;
6790 }
6791 }
6792
6793 /*
6794 * Make sure there is a master GIP.
6795 */
6796 if (enmEvent == RTMPEVENT_OFFLINE)
6797 {
6798 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
6799 if (idGipMaster == idCpu)
6800 {
6801 /*
6802 * Find a new GIP master.
6803 */
6804 bool fIgnored;
6805 unsigned i;
6806 int64_t iTSCDelta;
6807 uint32_t idxNewGipMaster;
6808 RTCPUID idNewGipMaster = NIL_RTCPUID;
6809 RTCPUSET OnlineCpus;
6810 RTMpGetOnlineSet(&OnlineCpus);
6811
6812 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
6813 {
6814 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
6815 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
6816 && idCurCpu != idGipMaster)
6817 {
6818 idNewGipMaster = idCurCpu;
6819 break;
6820 }
6821 }
6822
6823 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
6824 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
6825 NOREF(fIgnored);
6826
6827 /*
6828 * Adjust all the TSC deltas against the new GIP master.
6829 */
6830 if (pGip)
6831 {
6832 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
6833 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
6834 Assert(iTSCDelta != INT64_MAX);
6835 for (i = 0; i < pGip->cCpus; i++)
6836 {
6837 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
6838 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
6839 if (iWorkerDelta != INT64_MAX)
6840 iWorkerDelta -= iTSCDelta;
6841 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
6842 }
6843 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
6844 }
6845 }
6846 }
6847}
6848
6849
6850/**
6851 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
6852 * compute the delta between them.
6853 *
6854 * @param idCpu The CPU we are current scheduled on.
6855 * @param pvUser1 Opaque pointer to the device instance data.
6856 * @param pvUser2 Opaque pointer to the worker Cpu Id.
6857 *
6858 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
6859 * read the TSC at exactly the same time on both the master and the worker
6860 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
6861 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
6862 * try to minimize the measurement error by computing the minimum read time
6863 * of the compare statement in the worker by taking TSC measurements across
6864 * it.
6865 *
6866 * We ignore the first few runs of the loop in order to prime the cache.
6867 * Also, be careful about using 'pause' instruction in critical busy-wait
6868 * loops in this code - it can cause undesired behaviour with
6869 * hyperthreading.
6870 *
6871 * It must be noted that the computed minimum read time is mostly to
6872 * eliminate huge deltas when the worker is too early and doesn't by itself
6873 * help produce more accurate deltas. We allow two times the computed
6874 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
6875 * possible to get negative deltas where there are none when the worker is
6876 * earlier. As long as these occasional negative deltas are lower than the
6877 * time it takes to exit guest-context and the OS to reschedule EMT on a
6878 * different CPU we won't expose a TSC that jumped backwards. It is because
6879 * of the existence of the negative deltas we don't recompute the delta with
6880 * the master and worker interchanged to eliminate the remaining measurement
6881 * error.
6882 */
6883static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6884{
6885 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
6886 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6887 uint32_t *pidWorker = (uint32_t *)pvUser2;
6888 RTCPUID idMaster = ASMAtomicUoReadU32(&pDevExt->idTscDeltaInitiator);
6889 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6890 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
6891 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
6892 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6893 int cTriesLeft = 12;
6894
6895 if ( idCpu != idMaster
6896 && idCpu != *pidWorker)
6897 return;
6898
6899 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
6900 with a timeout to avoid deadlocking the entire system. */
6901 if (!RTMpOnAllIsConcurrentSafe())
6902 {
6903 /** @todo This was introduced for Windows, but since Windows doesn't use this
6904 * code path any longer (as DPC timeouts BSOD regardless of interrupts,
6905 * see @bugref{6710} comment 81), eventually phase it out. */
6906 uint64_t uTscNow;
6907 uint64_t uTscStart;
6908 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
6909
6910 ASMSerializeInstruction();
6911 uTscStart = ASMReadTSC();
6912 if (idCpu == idMaster)
6913 {
6914 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
6915 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
6916 {
6917 ASMSerializeInstruction();
6918 uTscNow = ASMReadTSC();
6919 if (uTscNow - uTscStart > cWaitTicks)
6920 {
6921 /* Set the worker delta to indicate failure, not the master. */
6922 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6923 return;
6924 }
6925
6926 ASMNopPause();
6927 }
6928 }
6929 else
6930 {
6931 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
6932 {
6933 ASMSerializeInstruction();
6934 uTscNow = ASMReadTSC();
6935 if (uTscNow - uTscStart > cWaitTicks)
6936 {
6937 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6938 return;
6939 }
6940
6941 ASMNopPause();
6942 }
6943 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
6944 }
6945 }
6946
6947 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
6948 while (cTriesLeft-- > 0)
6949 {
6950 unsigned i;
6951 uint64_t uMinCmpReadTime = UINT64_MAX;
6952 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
6953 {
6954 if (idCpu == idMaster)
6955 {
6956 /*
6957 * The master.
6958 */
6959 RTCCUINTREG uFlags;
6960 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6961 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6962
6963 /* Disable interrupts only in the master for as short a period
6964 as possible, thanks again to Windows. See @bugref{6710} comment #73. */
6965 uFlags = ASMIntDisableFlags();
6966
6967 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
6968 ;
6969
6970 do
6971 {
6972 ASMSerializeInstruction();
6973 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
6974 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6975
6976 ASMSetFlags(uFlags);
6977
6978 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
6979 ;
6980
6981 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6982 {
6983 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
6984 {
6985 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
6986 if (iDelta < pGipCpuWorker->i64TSCDelta)
6987 pGipCpuWorker->i64TSCDelta = iDelta;
6988 }
6989 }
6990
6991 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
6992 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6993 }
6994 else
6995 {
6996 /*
6997 * The worker.
6998 */
6999 uint64_t uTscWorker;
7000 uint64_t uTscWorkerFlushed;
7001 uint64_t uCmpReadTime;
7002
7003 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
7004 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
7005 ;
7006 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7007 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
7008
7009 /*
7010 * Keep reading the TSC until we notice that the master has read his. Reading
7011 * the TSC -after- the master has updated the memory is way too late. We thus
7012 * compensate by trying to measure how long it took for the worker to notice
7013 * the memory flushed from the master.
7014 */
7015 do
7016 {
7017 ASMSerializeInstruction();
7018 uTscWorker = ASMReadTSC();
7019 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7020 ASMSerializeInstruction();
7021 uTscWorkerFlushed = ASMReadTSC();
7022
7023 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
7024 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7025 {
7026 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
7027 if (uCmpReadTime < (uMinCmpReadTime << 1))
7028 {
7029 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
7030 if (uCmpReadTime < uMinCmpReadTime)
7031 uMinCmpReadTime = uCmpReadTime;
7032 }
7033 else
7034 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
7035 }
7036 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
7037 {
7038 if (uCmpReadTime < uMinCmpReadTime)
7039 uMinCmpReadTime = uCmpReadTime;
7040 }
7041
7042 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
7043 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
7044 ASMNopPause();
7045 }
7046 }
7047
7048 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
7049 break;
7050 }
7051}
7052
7053
7054/**
7055 * Clears TSC delta related variables.
7056 *
7057 * Clears all TSC samples as well as the delta synchronization variable on the
7058 * all the per-CPU structs. Optionally also clears the per-cpu deltas too.
7059 *
7060 * @param pDevExt Pointer to the device instance data.
7061 * @param fClearDeltas Whether the deltas are also to be cleared.
7062 */
7063DECLINLINE(void) supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
7064{
7065 unsigned iCpu;
7066 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7067 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7068 {
7069 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7070 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
7071 if (fClearDeltas)
7072 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
7073 }
7074 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7075}
7076
7077
7078/**
7079 * Measures the TSC delta between the master GIP CPU and one specified worker
7080 * CPU.
7081 *
7082 * @returns VBox status code.
7083 * @param pDevExt Pointer to the device instance data.
7084 * @param idxWorker The index of the worker CPU from the GIP's array of
7085 * CPUs.
7086 *
7087 * @remarks This can be called with preemption disabled!
7088 */
7089static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
7090{
7091 int rc;
7092 PSUPGLOBALINFOPAGE pGip;
7093 PSUPGIPCPU pGipCpuWorker;
7094 RTCPUID idMaster;
7095
7096 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7097 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7098
7099 pGip = pDevExt->pGip;
7100 idMaster = pDevExt->idGipMaster;
7101 pGipCpuWorker = &pGip->aCPUs[idxWorker];
7102
7103 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
7104
7105 if (pGipCpuWorker->idCpu == idMaster)
7106 {
7107 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
7108 return VINF_SUCCESS;
7109 }
7110
7111 /* Set the master TSC as the initiator. */
7112 while (ASMAtomicCmpXchgU32(&pDevExt->idTscDeltaInitiator, idMaster, NIL_RTCPUID) == false)
7113 {
7114 /*
7115 * Sleep here rather than spin as there is a parallel measurement
7116 * being executed and that can take a good while to be done.
7117 */
7118 RTThreadSleep(1);
7119 }
7120
7121 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7122 {
7123 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
7124 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7125 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7126 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pDevExt, &pGipCpuWorker->idCpu);
7127 if (RT_SUCCESS(rc))
7128 {
7129 if (RT_UNLIKELY(pGipCpuWorker->i64TSCDelta == INT64_MAX))
7130 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
7131 }
7132 }
7133 else
7134 rc = VERR_CPU_OFFLINE;
7135
7136 ASMAtomicWriteU32(&pDevExt->idTscDeltaInitiator, NIL_RTCPUID);
7137 return rc;
7138}
7139
7140
7141/**
7142 * Measures the TSC deltas between CPUs.
7143 *
7144 * @param pDevExt Pointer to the device instance data.
7145 * @param pidxMaster Where to store the index of the chosen master TSC if we
7146 * managed to determine the TSC deltas successfully.
7147 * Optional, can be NULL.
7148 *
7149 * @returns VBox status code.
7150 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
7151 * idCpu, GIP's online CPU set which are populated in
7152 * supdrvGipInitOnCpu().
7153 */
7154static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
7155{
7156 PSUPGIPCPU pGipCpuMaster;
7157 unsigned iCpu;
7158 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7159 uint32_t idxMaster = UINT32_MAX;
7160 int rc = VINF_SUCCESS;
7161 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
7162 uint32_t cOnlineCpus = pGip->cOnlineCpus;
7163
7164 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
7165
7166 /*
7167 * Pick the first CPU online as the master TSC and make it the new GIP master based
7168 * on the APIC ID.
7169 *
7170 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
7171 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
7172 * master as this point since the sync/async timer isn't created yet.
7173 */
7174 supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
7175 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
7176 {
7177 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
7178 if (idxCpu != UINT16_MAX)
7179 {
7180 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
7181 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
7182 {
7183 idxMaster = idxCpu;
7184 pGipCpu->i64TSCDelta = 0;
7185 break;
7186 }
7187 }
7188 }
7189 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
7190 pGipCpuMaster = &pGip->aCPUs[idxMaster];
7191 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7192
7193 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
7194 if (pGip->cOnlineCpus <= 1)
7195 {
7196 if (pidxMaster)
7197 *pidxMaster = idxMaster;
7198 return VINF_SUCCESS;
7199 }
7200
7201 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7202 {
7203 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7204 if ( iCpu != idxMaster
7205 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7206 {
7207 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7208 if (RT_FAILURE(rc))
7209 {
7210 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
7211 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7212 break;
7213 }
7214
7215 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
7216 {
7217 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
7218 rc = VERR_TRY_AGAIN;
7219 break;
7220 }
7221 }
7222 }
7223
7224 if ( RT_SUCCESS(rc)
7225 && !pGipCpuMaster->i64TSCDelta
7226 && pidxMaster)
7227 {
7228 *pidxMaster = idxMaster;
7229 }
7230 return rc;
7231}
7232
7233
7234/**
7235 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
7236 *
7237 * @param idCpu Ignored.
7238 * @param pvUser1 Where to put the TSC.
7239 * @param pvUser2 Ignored.
7240 */
7241static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7242{
7243 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
7244}
7245
7246
7247/**
7248 * Determine if Async GIP mode is required because of TSC drift.
7249 *
7250 * When using the default/normal timer code it is essential that the time stamp counter
7251 * (TSC) runs never backwards, that is, a read operation to the counter should return
7252 * a bigger value than any previous read operation. This is guaranteed by the latest
7253 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
7254 * case we have to choose the asynchronous timer mode.
7255 *
7256 * @param poffMin Pointer to the determined difference between different
7257 * cores (optional, can be NULL).
7258 * @return false if the time stamp counters appear to be synchronized, true otherwise.
7259 */
7260static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
7261{
7262 /*
7263 * Just iterate all the cpus 8 times and make sure that the TSC is
7264 * ever increasing. We don't bother taking TSC rollover into account.
7265 */
7266 int iEndCpu = RTMpGetArraySize();
7267 int iCpu;
7268 int cLoops = 8;
7269 bool fAsync = false;
7270 int rc = VINF_SUCCESS;
7271 uint64_t offMax = 0;
7272 uint64_t offMin = ~(uint64_t)0;
7273 uint64_t PrevTsc = ASMReadTSC();
7274
7275 while (cLoops-- > 0)
7276 {
7277 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7278 {
7279 uint64_t CurTsc;
7280 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7281 if (RT_SUCCESS(rc))
7282 {
7283 if (CurTsc <= PrevTsc)
7284 {
7285 fAsync = true;
7286 offMin = offMax = PrevTsc - CurTsc;
7287 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7288 iCpu, cLoops, CurTsc, PrevTsc));
7289 break;
7290 }
7291
7292 /* Gather statistics (except the first time). */
7293 if (iCpu != 0 || cLoops != 7)
7294 {
7295 uint64_t off = CurTsc - PrevTsc;
7296 if (off < offMin)
7297 offMin = off;
7298 if (off > offMax)
7299 offMax = off;
7300 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7301 }
7302
7303 /* Next */
7304 PrevTsc = CurTsc;
7305 }
7306 else if (rc == VERR_NOT_SUPPORTED)
7307 break;
7308 else
7309 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7310 }
7311
7312 /* broke out of the loop. */
7313 if (iCpu < iEndCpu)
7314 break;
7315 }
7316
7317 if (poffMin)
7318 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7319 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7320 fAsync, iEndCpu, rc, offMin, offMax));
7321#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7322 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7323#endif
7324 return fAsync;
7325}
7326
7327
7328/**
7329 * Determine the GIP TSC mode.
7330 *
7331 * @returns The most suitable TSC mode.
7332 * @param pDevExt Pointer to the device instance data.
7333 */
7334static SUPGIPMODE supdrvGipDetermineTscMode(PSUPDRVDEVEXT pDevExt)
7335{
7336 /* Trust CPUs that declare their TSC to be invariant. */
7337#if 0 /** @todo this cannot be enabled until Michal's AMD laptop with insane deltas are working. */
7338 if (supdrvIsInvariantTsc())
7339 return SUPGIPMODE_INVARIANT_TSC;
7340#endif
7341
7342 /*
7343 * Without invariant CPU ID bit - On SMP we're faced with two problems:
7344 * (1) There might be a skew between the CPU, so that cpu0
7345 * returns a TSC that is slightly different from cpu1.
7346 * (2) Power management (and other things) may cause the TSC
7347 * to run at a non-constant speed, and cause the speed
7348 * to be different on the cpus. This will result in (1).
7349 *
7350 * So, on SMP systems we'll have to select the ASYNC update method
7351 * if there are symptoms of these problems.
7352 */
7353 if (RTMpGetCount() > 1)
7354 {
7355 uint32_t uEAX, uEBX, uECX, uEDX;
7356 uint64_t u64DiffCoresIgnored;
7357
7358 /* Permit the user and/or the OS specific bits to force async mode. */
7359 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7360 return SUPGIPMODE_ASYNC_TSC;
7361
7362 /* Try check for current differences between the cpus. */
7363 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7364 return SUPGIPMODE_ASYNC_TSC;
7365
7366 /*
7367 * If the CPU supports power management and is an AMD one we
7368 * won't trust it unless it has the TscInvariant bit is set.
7369 */
7370 /** @todo this is now redundant. remove later. */
7371 /* Check for "AuthenticAMD" */
7372 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7373 if ( uEAX >= 1
7374 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7375 {
7376 /* Check for APM support and that TscInvariant is cleared. */
7377 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
7378 if (uEAX >= 0x80000007)
7379 {
7380 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
7381 if ( !(uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR) /* TscInvariant */
7382 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7383 return SUPGIPMODE_ASYNC_TSC;
7384 }
7385 }
7386 }
7387
7388 /** @todo later remove this when the above todo with AMD laptop is done (i.e.
7389 * TSC deltas handled everywhere). */
7390 if (supdrvIsInvariantTsc())
7391 return SUPGIPMODE_INVARIANT_TSC;
7392 return SUPGIPMODE_SYNC_TSC;
7393}
7394
7395
7396/**
7397 * Initializes per-CPU GIP information.
7398 *
7399 * @param pDevExt Pointer to the device instance data.
7400 * @param pGip Pointer to the GIP.
7401 * @param pCpu Pointer to which GIP CPU to initalize.
7402 * @param u64NanoTS The current nanosecond timestamp.
7403 */
7404static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7405{
7406 /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
7407 which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
7408 pCpu->u32TransactionId = 2;
7409 pCpu->u64NanoTS = u64NanoTS;
7410 pCpu->u64TSC = ASMReadTSC();
7411 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7412 pCpu->i64TSCDelta = pDevExt->fOsTscDeltasInSync ? 0 : INT64_MAX;
7413
7414 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7415 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7416 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7417 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7418
7419 /*
7420 * We don't know the following values until we've executed updates.
7421 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7422 * the 2nd timer callout.
7423 */
7424 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7425 pCpu->u32UpdateIntervalTSC
7426 = pCpu->au32TSCHistory[0]
7427 = pCpu->au32TSCHistory[1]
7428 = pCpu->au32TSCHistory[2]
7429 = pCpu->au32TSCHistory[3]
7430 = pCpu->au32TSCHistory[4]
7431 = pCpu->au32TSCHistory[5]
7432 = pCpu->au32TSCHistory[6]
7433 = pCpu->au32TSCHistory[7]
7434 = (uint32_t)(_4G / pGip->u32UpdateHz);
7435}
7436
7437
7438/**
7439 * Initializes the GIP data.
7440 *
7441 * @param pDevExt Pointer to the device instance data.
7442 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7443 * @param HCPhys The physical address of the GIP.
7444 * @param u64NanoTS The current nanosecond timestamp.
7445 * @param uUpdateHz The update frequency.
7446 * @param uUpdateIntervalNS The update interval in nanoseconds.
7447 * @param cCpus The CPU count.
7448 */
7449static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7450 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7451{
7452 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7453 unsigned i;
7454#ifdef DEBUG_DARWIN_GIP
7455 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7456#else
7457 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7458#endif
7459
7460 /*
7461 * Record whether the host OS has already normalized inter-CPU deltas for the hardware TSC.
7462 * We only bother with TSC-deltas on invariant CPUs for now.
7463 */
7464 pDevExt->fOsTscDeltasInSync = supdrvIsInvariantTsc() && supdrvOSAreTscDeltasInSync();
7465
7466 /*
7467 * Initialize the structure.
7468 */
7469 memset(pGip, 0, cbGip);
7470 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7471 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7472 pGip->u32Mode = supdrvGipDetermineTscMode(pDevExt);
7473 pGip->cCpus = (uint16_t)cCpus;
7474 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7475 pGip->u32UpdateHz = uUpdateHz;
7476 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7477 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7478 RTCpuSetEmpty(&pGip->PresentCpuSet);
7479 RTMpGetSet(&pGip->PossibleCpuSet);
7480 pGip->cOnlineCpus = RTMpGetOnlineCount();
7481 pGip->cPresentCpus = RTMpGetPresentCount();
7482 pGip->cPossibleCpus = RTMpGetCount();
7483 pGip->idCpuMax = RTMpGetMaxCpuId();
7484 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7485 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7486 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7487 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7488
7489 for (i = 0; i < cCpus; i++)
7490 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
7491
7492 /*
7493 * Link it to the device extension.
7494 */
7495 pDevExt->pGip = pGip;
7496 pDevExt->HCPhysGip = HCPhys;
7497 pDevExt->cGipUsers = 0;
7498}
7499
7500
7501/**
7502 * On CPU initialization callback for RTMpOnAll.
7503 *
7504 * @param idCpu The CPU ID.
7505 * @param pvUser1 The device extension.
7506 * @param pvUser2 The GIP.
7507 */
7508static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7509{
7510 /* This is good enough, even though it will update some of the globals a
7511 bit to much. */
7512 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7513}
7514
7515
7516/**
7517 * Invalidates the GIP data upon termination.
7518 *
7519 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7520 */
7521static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7522{
7523 unsigned i;
7524 pGip->u32Magic = 0;
7525 for (i = 0; i < pGip->cCpus; i++)
7526 {
7527 pGip->aCPUs[i].u64NanoTS = 0;
7528 pGip->aCPUs[i].u64TSC = 0;
7529 pGip->aCPUs[i].iTSCHistoryHead = 0;
7530 pGip->aCPUs[i].u64TSCSample = 0;
7531 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7532 }
7533}
7534
7535
7536/**
7537 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
7538 * updates all the per cpu data except the transaction id.
7539 *
7540 * @param pDevExt The device extension.
7541 * @param pGipCpu Pointer to the per cpu data.
7542 * @param u64NanoTS The current time stamp.
7543 * @param u64TSC The current TSC.
7544 * @param iTick The current timer tick.
7545 *
7546 * @remarks Can be called with interrupts disabled!
7547 */
7548static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7549{
7550 uint64_t u64TSCDelta;
7551 uint32_t u32UpdateIntervalTSC;
7552 uint32_t u32UpdateIntervalTSCSlack;
7553 unsigned iTSCHistoryHead;
7554 uint64_t u64CpuHz;
7555 uint32_t u32TransactionId;
7556
7557 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7558 AssertPtrReturnVoid(pGip);
7559
7560 /* Delta between this and the previous update. */
7561 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7562
7563 /*
7564 * Update the NanoTS.
7565 */
7566 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
7567
7568 /*
7569 * Calc TSC delta.
7570 */
7571 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
7572 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
7573
7574 /* We don't need to keep realculating the frequency when it's invariant. */
7575 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
7576 return;
7577
7578 if (u64TSCDelta >> 32)
7579 {
7580 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
7581 pGipCpu->cErrors++;
7582 }
7583
7584 /*
7585 * On the 2nd and 3rd callout, reset the history with the current TSC
7586 * interval since the values entered by supdrvGipInit are totally off.
7587 * The interval on the 1st callout completely unreliable, the 2nd is a bit
7588 * better, while the 3rd should be most reliable.
7589 */
7590 u32TransactionId = pGipCpu->u32TransactionId;
7591 if (RT_UNLIKELY( ( u32TransactionId == 5
7592 || u32TransactionId == 7)
7593 && ( iTick == 2
7594 || iTick == 3) ))
7595 {
7596 unsigned i;
7597 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
7598 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
7599 }
7600
7601 /*
7602 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
7603 * Wait until we have at least one full history since the above history reset. The
7604 * assumption is that the majority of the previous history values will be tolerable.
7605 * See @bugref{6710} comment #67.
7606 */
7607 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
7608 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7609 {
7610 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
7611 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
7612 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
7613 {
7614 uint32_t u32;
7615 u32 = pGipCpu->au32TSCHistory[0];
7616 u32 += pGipCpu->au32TSCHistory[1];
7617 u32 += pGipCpu->au32TSCHistory[2];
7618 u32 += pGipCpu->au32TSCHistory[3];
7619 u32 >>= 2;
7620 u64TSCDelta = pGipCpu->au32TSCHistory[4];
7621 u64TSCDelta += pGipCpu->au32TSCHistory[5];
7622 u64TSCDelta += pGipCpu->au32TSCHistory[6];
7623 u64TSCDelta += pGipCpu->au32TSCHistory[7];
7624 u64TSCDelta >>= 2;
7625 u64TSCDelta += u32;
7626 u64TSCDelta >>= 1;
7627 }
7628 }
7629
7630
7631 /*
7632 * TSC History.
7633 */
7634 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
7635 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
7636 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
7637 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
7638
7639 /*
7640 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
7641 *
7642 * On Windows, we have an occasional (but recurring) sour value that messed up
7643 * the history but taking only 1 interval reduces the precision overall.
7644 * However, this problem existed before the invariant mode was introduced.
7645 */
7646 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
7647 || pGip->u32UpdateHz >= 1000)
7648 {
7649 uint32_t u32;
7650 u32 = pGipCpu->au32TSCHistory[0];
7651 u32 += pGipCpu->au32TSCHistory[1];
7652 u32 += pGipCpu->au32TSCHistory[2];
7653 u32 += pGipCpu->au32TSCHistory[3];
7654 u32 >>= 2;
7655 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
7656 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
7657 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
7658 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
7659 u32UpdateIntervalTSC >>= 2;
7660 u32UpdateIntervalTSC += u32;
7661 u32UpdateIntervalTSC >>= 1;
7662
7663 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
7664 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
7665 }
7666 else if (pGip->u32UpdateHz >= 90)
7667 {
7668 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7669 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
7670 u32UpdateIntervalTSC >>= 1;
7671
7672 /* value chosen on a 2GHz thinkpad running windows */
7673 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
7674 }
7675 else
7676 {
7677 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7678
7679 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
7680 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
7681 }
7682 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
7683
7684 /*
7685 * CpuHz.
7686 */
7687 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
7688 u64CpuHz /= pGip->u32UpdateIntervalNS;
7689 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
7690}
7691
7692
7693/**
7694 * Updates the GIP.
7695 *
7696 * @param pDevExt The device extension.
7697 * @param u64NanoTS The current nanosecond timesamp.
7698 * @param u64TSC The current TSC timesamp.
7699 * @param idCpu The CPU ID.
7700 * @param iTick The current timer tick.
7701 *
7702 * @remarks Can be called with interrupts disabled!
7703 */
7704static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
7705{
7706 /*
7707 * Determine the relevant CPU data.
7708 */
7709 PSUPGIPCPU pGipCpu;
7710 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7711 AssertPtrReturnVoid(pGip);
7712
7713 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7714 pGipCpu = &pGip->aCPUs[0];
7715 else
7716 {
7717 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
7718 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
7719 return;
7720 pGipCpu = &pGip->aCPUs[iCpu];
7721 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
7722 return;
7723 }
7724
7725 /*
7726 * Start update transaction.
7727 */
7728 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7729 {
7730 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
7731 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7732 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7733 pGipCpu->cErrors++;
7734 return;
7735 }
7736
7737 /*
7738 * Recalc the update frequency every 0x800th time.
7739 */
7740 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
7741 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
7742 {
7743 if (pGip->u64NanoTSLastUpdateHz)
7744 {
7745#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
7746 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
7747 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
7748 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
7749 {
7750 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
7751 * calculation on non-invariant hosts if it changes the history decision
7752 * taken in supdrvGipDoUpdateCpu(). */
7753 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
7754 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
7755 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
7756 }
7757#endif
7758 }
7759 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
7760 }
7761
7762 /*
7763 * Update the data.
7764 */
7765 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7766
7767 /*
7768 * Complete transaction.
7769 */
7770 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7771}
7772
7773
7774/**
7775 * Updates the per cpu GIP data for the calling cpu.
7776 *
7777 * @param pDevExt The device extension.
7778 * @param u64NanoTS The current nanosecond timesamp.
7779 * @param u64TSC The current TSC timesamp.
7780 * @param idCpu The CPU ID.
7781 * @param idApic The APIC id for the CPU index.
7782 * @param iTick The current timer tick.
7783 *
7784 * @remarks Can be called with interrupts disabled!
7785 */
7786static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
7787 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
7788{
7789 uint32_t iCpu;
7790 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7791
7792 /*
7793 * Avoid a potential race when a CPU online notification doesn't fire on
7794 * the onlined CPU but the tick creeps in before the event notification is
7795 * run.
7796 */
7797 if (RT_UNLIKELY(iTick == 1))
7798 {
7799 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7800 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
7801 supdrvGipMpEventOnline(pDevExt, idCpu);
7802 }
7803
7804 iCpu = pGip->aiCpuFromApicId[idApic];
7805 if (RT_LIKELY(iCpu < pGip->cCpus))
7806 {
7807 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7808 if (pGipCpu->idCpu == idCpu)
7809 {
7810 /*
7811 * Start update transaction.
7812 */
7813 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7814 {
7815 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7816 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7817 pGipCpu->cErrors++;
7818 return;
7819 }
7820
7821 /*
7822 * Update the data.
7823 */
7824 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7825
7826 /*
7827 * Complete transaction.
7828 */
7829 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7830 }
7831 }
7832}
7833
7834
7835/**
7836 * Resume built-in keyboard on MacBook Air and Pro hosts.
7837 * If there is no built-in keyboard device, return success anyway.
7838 *
7839 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
7840 */
7841static int supdrvIOCtl_ResumeSuspendedKbds(void)
7842{
7843#if defined(RT_OS_DARWIN)
7844 return supdrvDarwinResumeSuspendedKbds();
7845#else
7846 return VERR_NOT_IMPLEMENTED;
7847#endif
7848}
7849
7850
7851/**
7852 * Service a TSC-delta measurement request.
7853 *
7854 * @returns VBox status code.
7855 * @param pDevExt Pointer to the device instance data.
7856 * @param pReq Pointer to the TSC-delta measurement request.
7857 */
7858static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq)
7859{
7860 PSUPGLOBALINFOPAGE pGip;
7861 RTCPUID idCpuWorker;
7862 int rc = VERR_CPU_NOT_FOUND;
7863 int16_t cTries;
7864 RTMSINTERVAL cMsWaitRetry;
7865 uint16_t iCpu;
7866
7867 /*
7868 * Validate.
7869 */
7870 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7871 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7872 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7873 idCpuWorker = pReq->u.In.idCpu;
7874 if (idCpuWorker == NIL_RTCPUID)
7875 return VERR_INVALID_CPU_ID;
7876
7877 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
7878 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
7879 pGip = pDevExt->pGip;
7880
7881 if (!GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
7882 return VINF_SUCCESS;
7883
7884 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7885 {
7886 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7887 if (pGipCpuWorker->idCpu == idCpuWorker)
7888 {
7889 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
7890 && !pReq->u.In.fForce)
7891 return VINF_SUCCESS;
7892
7893#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7894 if (pReq->u.In.fAsync)
7895 {
7896 /** @todo Async. doesn't implement options like retries, waiting. We'll need
7897 * to pass those options to the thread somehow and implement it in the
7898 * thread. Check if anyone uses/needs fAsync before implementing this. */
7899 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
7900 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7901 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
7902 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
7903 {
7904 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
7905 }
7906 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7907 RTThreadUserSignal(pDevExt->hTscDeltaThread);
7908 return VINF_SUCCESS;
7909 }
7910#endif
7911
7912 while (cTries-- > 0)
7913 {
7914 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7915 if (RT_SUCCESS(rc))
7916 {
7917 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
7918 break;
7919 }
7920
7921 if (cMsWaitRetry)
7922 RTThreadSleep(cMsWaitRetry);
7923 }
7924
7925 break;
7926 }
7927 }
7928 return rc;
7929}
7930
7931
7932/**
7933 * Reads the TSC and TSC-delta atomically, applies the TSC delta.
7934 *
7935 * @returns VBox status code.
7936 * @param pDevExt Pointer to the device instance data.
7937 * @param pReq Pointer to the TSC-read request.
7938 */
7939static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq)
7940{
7941 uint64_t uTsc;
7942 uint16_t idApic;
7943 int16_t cTries;
7944 PSUPGLOBALINFOPAGE pGip;
7945 int rc;
7946
7947 /*
7948 * Validate.
7949 */
7950 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7951 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7952 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7953 pGip = pDevExt->pGip;
7954
7955 cTries = 4;
7956 while (cTries-- > 0)
7957 {
7958 int rc2;
7959 uint16_t iCpu;
7960
7961 rc = SUPGetTsc(&uTsc, &idApic);
7962 if (RT_SUCCESS(rc))
7963 {
7964 pReq->u.Out.u64AdjustedTsc = uTsc;
7965 pReq->u.Out.idApic = idApic;
7966 return VINF_SUCCESS;
7967 }
7968
7969 /* If we failed to have a TSC-delta, measurement the TSC-delta and retry. */
7970 AssertMsgReturn(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId),
7971 ("idApic=%u ArraySize=%u\n", idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)), VERR_INVALID_CPU_INDEX);
7972 iCpu = pGip->aiCpuFromApicId[idApic];
7973 AssertMsgReturn(iCpu < pGip->cCpus, ("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus), VERR_INVALID_CPU_INDEX);
7974
7975 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
7976 rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7977 if (RT_SUCCESS(rc2))
7978 AssertReturn(pGip->aCPUs[iCpu].i64TSCDelta != INT64_MAX, VERR_INTERNAL_ERROR_2);
7979 }
7980
7981 return rc;
7982}
7983
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette