VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 54181

Last change on this file since 54181 was 54181, checked in by vboxsync, 10 years ago

HostDrivers/support: fix reinit of GipCpu data where TSC-delta isn't really relevant.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 298.2 KB
Line 
1/* $Id: SUPDrv.c 54181 2015-02-12 17:34:24Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63#include <VBox/vmm/hm_svm.h>
64#include <VBox/vmm/hm_vmx.h>
65
66#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
67# include "dtrace/SUPDrv.h"
68#else
69# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
70# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
71# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
72# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
73#endif
74
75/*
76 * Logging assignments:
77 * Log - useful stuff, like failures.
78 * LogFlow - program flow, except the really noisy bits.
79 * Log2 - Cleanup.
80 * Log3 - Loader flow noise.
81 * Log4 - Call VMMR0 flow noise.
82 * Log5 - Native yet-to-be-defined noise.
83 * Log6 - Native ioctl flow noise.
84 *
85 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
86 * instantiation in log-vbox.c(pp).
87 */
88
89
90/*******************************************************************************
91* Defined Constants And Macros *
92*******************************************************************************/
93/** The frequency by which we recalculate the u32UpdateHz and
94 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
95 *
96 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
97 */
98#define GIP_UPDATEHZ_RECALC_FREQ 0x800
99
100/** A reserved TSC value used for synchronization as well as measurement of
101 * TSC deltas. */
102#define GIP_TSC_DELTA_RSVD UINT64_MAX
103/** The number of TSC delta measurement loops in total (includes primer and
104 * read-time loops). */
105#define GIP_TSC_DELTA_LOOPS 96
106/** The number of cache primer loops. */
107#define GIP_TSC_DELTA_PRIMER_LOOPS 4
108/** The number of loops until we keep computing the minumum read time. */
109#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
110/** Stop measurement of TSC delta. */
111#define GIP_TSC_DELTA_SYNC_STOP 0
112/** Start measurement of TSC delta. */
113#define GIP_TSC_DELTA_SYNC_START 1
114/** Worker thread is ready for reading the TSC. */
115#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
116/** Worker thread is done updating TSC delta info. */
117#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
118/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
119 * with a timeout. */
120#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
121/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
122 * master with a timeout. */
123#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
124/** The TSC-refinement interval in seconds. */
125#define GIP_TSC_REFINE_INTERVAL 5
126
127AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
128AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
129
130/** @def VBOX_SVN_REV
131 * The makefile should define this if it can. */
132#ifndef VBOX_SVN_REV
133# define VBOX_SVN_REV 0
134#endif
135
136#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
137# define DO_NOT_START_GIP
138#endif
139
140/*******************************************************************************
141* Internal Functions *
142*******************************************************************************/
143static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
144static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
145static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
146static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
147static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
148static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
149static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
150static int supdrvIOCtl_LdrLockDown(PSUPDRVDEVEXT pDevExt);
151static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
152static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
153static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt,void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
154static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
155static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
156static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
157DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
158DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
159static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
160static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
161static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
162static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq);
163static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq);
164static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
165static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
166static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
167static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
168static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
169static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,
170 unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
171static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
172static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
173static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
174static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
175 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
176static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
177static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
178static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
179static int supdrvIOCtl_ResumeSuspendedKbds(void);
180
181
182/*******************************************************************************
183* Global Variables *
184*******************************************************************************/
185DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
186
187
188/**
189 * Array of the R0 SUP API.
190 */
191static SUPFUNC g_aFunctions[] =
192{
193/* SED: START */
194 /* name function */
195 /* Entries with absolute addresses determined at runtime, fixup
196 code makes ugly ASSUMPTIONS about the order here: */
197 { "SUPR0AbsIs64bit", (void *)0 },
198 { "SUPR0Abs64bitKernelCS", (void *)0 },
199 { "SUPR0Abs64bitKernelSS", (void *)0 },
200 { "SUPR0Abs64bitKernelDS", (void *)0 },
201 { "SUPR0AbsKernelCS", (void *)0 },
202 { "SUPR0AbsKernelSS", (void *)0 },
203 { "SUPR0AbsKernelDS", (void *)0 },
204 { "SUPR0AbsKernelES", (void *)0 },
205 { "SUPR0AbsKernelFS", (void *)0 },
206 { "SUPR0AbsKernelGS", (void *)0 },
207 /* Normal function pointers: */
208 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
209 { "SUPGetGIP", (void *)SUPGetGIP },
210 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
211 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
212 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
213 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
214 { "SUPR0ContFree", (void *)SUPR0ContFree },
215 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
216 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
217 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
218 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
219 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
220 { "SUPR0LockMem", (void *)SUPR0LockMem },
221 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
222 { "SUPR0LowFree", (void *)SUPR0LowFree },
223 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
224 { "SUPR0MemFree", (void *)SUPR0MemFree },
225 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
226 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
227 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
228 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
229 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
230 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
231 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
232 { "SUPR0PageFree", (void *)SUPR0PageFree },
233 { "SUPR0Printf", (void *)SUPR0Printf },
234 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
235 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
236 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
237 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
238 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
239 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
240 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
241 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
242 { "SUPSemEventClose", (void *)SUPSemEventClose },
243 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
244 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
245 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
246 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
247 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
248 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
249 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
250 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
251 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
252 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
253 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
254 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
255 { "SUPSemEventWait", (void *)SUPSemEventWait },
256 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
257 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
258 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
259
260 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
261 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
262 { "RTAssertMsg1", (void *)RTAssertMsg1 },
263 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
264 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
265 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
266 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
267 { "RTCrc32", (void *)RTCrc32 },
268 { "RTCrc32Finish", (void *)RTCrc32Finish },
269 { "RTCrc32Process", (void *)RTCrc32Process },
270 { "RTCrc32Start", (void *)RTCrc32Start },
271 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
272 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
273 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
274 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
275 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
276 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
277 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
278 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
279 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
280 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
281 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
282 { "RTLogPrintfV", (void *)RTLogPrintfV },
283 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
284 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
285 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
286 { "RTMemAllocTag", (void *)RTMemAllocTag },
287 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
288 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
289 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
290 { "RTMemDupExTag", (void *)RTMemDupExTag },
291 { "RTMemDupTag", (void *)RTMemDupTag },
292 { "RTMemFree", (void *)RTMemFree },
293 { "RTMemFreeEx", (void *)RTMemFreeEx },
294 { "RTMemReallocTag", (void *)RTMemReallocTag },
295 { "RTMpCpuId", (void *)RTMpCpuId },
296 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
297 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
298 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
299 { "RTMpGetCount", (void *)RTMpGetCount },
300 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
301 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
302 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
303 { "RTMpGetSet", (void *)RTMpGetSet },
304 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
305 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
306 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
307 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
308 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
309 { "RTMpOnAll", (void *)RTMpOnAll },
310 { "RTMpOnOthers", (void *)RTMpOnOthers },
311 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
312 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
313 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
314 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
315 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
316 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
317 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
318 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
319 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
320 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
321 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
322 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
323 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
324 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
325 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
326 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
327 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
328 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
329 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
330 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
331 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
332 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
333 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
334 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
335 { "RTProcSelf", (void *)RTProcSelf },
336 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
337 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
338 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
339 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
340 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
341 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
342 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
343 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
344 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
345 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
346 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
347 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
348 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
349 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
350 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
351 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
352 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
353 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
354 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
355 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
356 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
357 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
358 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
359 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
360 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
361 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
362 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
363 { "RTSemEventCreate", (void *)RTSemEventCreate },
364 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
365 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
366 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
367 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
368 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
369 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
370 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
371 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
372 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
373 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
374 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
375 { "RTSemEventSignal", (void *)RTSemEventSignal },
376 { "RTSemEventWait", (void *)RTSemEventWait },
377 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
378 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
379 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
380 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
381 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
382 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
383 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
384 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
385 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
386 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
387 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
388 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
389 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
390 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
391 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
392 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
393 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
394 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
395 { "RTStrCopy", (void *)RTStrCopy },
396 { "RTStrDupTag", (void *)RTStrDupTag },
397 { "RTStrFormat", (void *)RTStrFormat },
398 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
399 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
400 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
401 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
402 { "RTStrFormatV", (void *)RTStrFormatV },
403 { "RTStrFree", (void *)RTStrFree },
404 { "RTStrNCmp", (void *)RTStrNCmp },
405 { "RTStrPrintf", (void *)RTStrPrintf },
406 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
407 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
408 { "RTStrPrintfV", (void *)RTStrPrintfV },
409 { "RTThreadCreate", (void *)RTThreadCreate },
410 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
411 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
412 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
413 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
414 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
415 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
416 { "RTThreadGetName", (void *)RTThreadGetName },
417 { "RTThreadGetNative", (void *)RTThreadGetNative },
418 { "RTThreadGetType", (void *)RTThreadGetType },
419 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
420 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
421 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
422 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
423 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
424 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
425 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
426 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
427 { "RTThreadSelf", (void *)RTThreadSelf },
428 { "RTThreadSelfName", (void *)RTThreadSelfName },
429 { "RTThreadSleep", (void *)RTThreadSleep },
430 { "RTThreadUserReset", (void *)RTThreadUserReset },
431 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
432 { "RTThreadUserWait", (void *)RTThreadUserWait },
433 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
434 { "RTThreadWait", (void *)RTThreadWait },
435 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
436 { "RTThreadYield", (void *)RTThreadYield },
437 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
438 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
439 { "RTTimeNow", (void *)RTTimeNow },
440 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
441 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
442 { "RTTimerCreate", (void *)RTTimerCreate },
443 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
444 { "RTTimerDestroy", (void *)RTTimerDestroy },
445 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
446 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
447 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
448 { "RTTimerStart", (void *)RTTimerStart },
449 { "RTTimerStop", (void *)RTTimerStop },
450 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
451 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
452 { "RTUuidCompare", (void *)RTUuidCompare },
453 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
454 { "RTUuidFromStr", (void *)RTUuidFromStr },
455/* SED: END */
456};
457
458#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
459/**
460 * Drag in the rest of IRPT since we share it with the
461 * rest of the kernel modules on darwin.
462 */
463PFNRT g_apfnVBoxDrvIPRTDeps[] =
464{
465 /* VBoxNetAdp */
466 (PFNRT)RTRandBytes,
467 /* VBoxUSB */
468 (PFNRT)RTPathStripFilename,
469 NULL
470};
471#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
472
473
474/**
475 * Initializes the device extentsion structure.
476 *
477 * @returns IPRT status code.
478 * @param pDevExt The device extension to initialize.
479 * @param cbSession The size of the session structure. The size of
480 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
481 * defined because we're skipping the OS specific members
482 * then.
483 */
484int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
485{
486 int rc;
487
488#ifdef SUPDRV_WITH_RELEASE_LOGGER
489 /*
490 * Create the release log.
491 */
492 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
493 PRTLOGGER pRelLogger;
494 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
495 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
496 if (RT_SUCCESS(rc))
497 RTLogRelSetDefaultInstance(pRelLogger);
498 /** @todo Add native hook for getting logger config parameters and setting
499 * them. On linux we should use the module parameter stuff... */
500#endif
501
502 /*
503 * Initialize it.
504 */
505 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
506 pDevExt->Spinlock = NIL_RTSPINLOCK;
507 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
508 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
509 pDevExt->idTscDeltaInitiator = NIL_RTCPUID;
510 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
511 if (RT_SUCCESS(rc))
512 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
513 if (RT_SUCCESS(rc))
514 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
515
516 if (RT_SUCCESS(rc))
517#ifdef SUPDRV_USE_MUTEX_FOR_LDR
518 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
519#else
520 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
521#endif
522 if (RT_SUCCESS(rc))
523 {
524 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
525 if (RT_SUCCESS(rc))
526 {
527#ifdef SUPDRV_USE_MUTEX_FOR_LDR
528 rc = RTSemMutexCreate(&pDevExt->mtxGip);
529#else
530 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
531#endif
532 if (RT_SUCCESS(rc))
533 {
534 rc = supdrvGipCreate(pDevExt);
535 if (RT_SUCCESS(rc))
536 {
537 rc = supdrvTracerInit(pDevExt);
538 if (RT_SUCCESS(rc))
539 {
540 pDevExt->pLdrInitImage = NULL;
541 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
542 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
543 pDevExt->cbSession = (uint32_t)cbSession;
544
545 /*
546 * Fixup the absolute symbols.
547 *
548 * Because of the table indexing assumptions we'll have a little #ifdef orgy
549 * here rather than distributing this to OS specific files. At least for now.
550 */
551#ifdef RT_OS_DARWIN
552# if ARCH_BITS == 32
553 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
554 {
555 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
556 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
557 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
558 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
559 }
560 else
561 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
562 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
563 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
564 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
565 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
566 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
567 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
568# else /* 64-bit darwin: */
569 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
570 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
571 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
572 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
573 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
574 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
575 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
576 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
577 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
578 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
579
580# endif
581#else /* !RT_OS_DARWIN */
582# if ARCH_BITS == 64
583 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
584 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
585 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
586 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
587# else
588 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
589# endif
590 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
591 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
592 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
593 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
594 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
595 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
596#endif /* !RT_OS_DARWIN */
597 return VINF_SUCCESS;
598 }
599
600 supdrvGipDestroy(pDevExt);
601 }
602
603#ifdef SUPDRV_USE_MUTEX_FOR_GIP
604 RTSemMutexDestroy(pDevExt->mtxGip);
605 pDevExt->mtxGip = NIL_RTSEMMUTEX;
606#else
607 RTSemFastMutexDestroy(pDevExt->mtxGip);
608 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
609#endif
610 }
611 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
612 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
613 }
614#ifdef SUPDRV_USE_MUTEX_FOR_LDR
615 RTSemMutexDestroy(pDevExt->mtxLdr);
616 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
617#else
618 RTSemFastMutexDestroy(pDevExt->mtxLdr);
619 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
620#endif
621 }
622
623 RTSpinlockDestroy(pDevExt->Spinlock);
624 pDevExt->Spinlock = NIL_RTSPINLOCK;
625 RTSpinlockDestroy(pDevExt->hGipSpinlock);
626 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
627 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
628 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
629
630#ifdef SUPDRV_WITH_RELEASE_LOGGER
631 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
632 RTLogDestroy(RTLogSetDefaultInstance(NULL));
633#endif
634
635 return rc;
636}
637
638
639/**
640 * Delete the device extension (e.g. cleanup members).
641 *
642 * @param pDevExt The device extension to delete.
643 */
644void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
645{
646 PSUPDRVOBJ pObj;
647 PSUPDRVUSAGE pUsage;
648
649 /*
650 * Kill mutexes and spinlocks.
651 */
652#ifdef SUPDRV_USE_MUTEX_FOR_GIP
653 RTSemMutexDestroy(pDevExt->mtxGip);
654 pDevExt->mtxGip = NIL_RTSEMMUTEX;
655#else
656 RTSemFastMutexDestroy(pDevExt->mtxGip);
657 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
658#endif
659#ifdef SUPDRV_USE_MUTEX_FOR_LDR
660 RTSemMutexDestroy(pDevExt->mtxLdr);
661 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
662#else
663 RTSemFastMutexDestroy(pDevExt->mtxLdr);
664 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
665#endif
666 RTSpinlockDestroy(pDevExt->Spinlock);
667 pDevExt->Spinlock = NIL_RTSPINLOCK;
668 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
669 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
670 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
671 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
672
673 /*
674 * Free lists.
675 */
676 /* objects. */
677 pObj = pDevExt->pObjs;
678 Assert(!pObj); /* (can trigger on forced unloads) */
679 pDevExt->pObjs = NULL;
680 while (pObj)
681 {
682 void *pvFree = pObj;
683 pObj = pObj->pNext;
684 RTMemFree(pvFree);
685 }
686
687 /* usage records. */
688 pUsage = pDevExt->pUsageFree;
689 pDevExt->pUsageFree = NULL;
690 while (pUsage)
691 {
692 void *pvFree = pUsage;
693 pUsage = pUsage->pNext;
694 RTMemFree(pvFree);
695 }
696
697 /* kill the GIP. */
698 supdrvGipDestroy(pDevExt);
699 RTSpinlockDestroy(pDevExt->hGipSpinlock);
700 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
701
702 supdrvTracerTerm(pDevExt);
703
704#ifdef SUPDRV_WITH_RELEASE_LOGGER
705 /* destroy the loggers. */
706 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
707 RTLogDestroy(RTLogSetDefaultInstance(NULL));
708#endif
709}
710
711
712/**
713 * Create session.
714 *
715 * @returns IPRT status code.
716 * @param pDevExt Device extension.
717 * @param fUser Flag indicating whether this is a user or kernel
718 * session.
719 * @param fUnrestricted Unrestricted access (system) or restricted access
720 * (user)?
721 * @param ppSession Where to store the pointer to the session data.
722 */
723int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
724{
725 int rc;
726 PSUPDRVSESSION pSession;
727
728 if (!SUP_IS_DEVEXT_VALID(pDevExt))
729 return VERR_INVALID_PARAMETER;
730
731 /*
732 * Allocate memory for the session data.
733 */
734 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
735 if (pSession)
736 {
737 /* Initialize session data. */
738 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
739 if (!rc)
740 {
741 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
742 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
743 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
744 if (RT_SUCCESS(rc))
745 {
746 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
747 pSession->pDevExt = pDevExt;
748 pSession->u32Cookie = BIRD_INV;
749 pSession->fUnrestricted = fUnrestricted;
750 /*pSession->fInHashTable = false; */
751 pSession->cRefs = 1;
752 /*pSession->pCommonNextHash = NULL;
753 pSession->ppOsSessionPtr = NULL; */
754 if (fUser)
755 {
756 pSession->Process = RTProcSelf();
757 pSession->R0Process = RTR0ProcHandleSelf();
758 }
759 else
760 {
761 pSession->Process = NIL_RTPROCESS;
762 pSession->R0Process = NIL_RTR0PROCESS;
763 }
764 /*pSession->pLdrUsage = NULL;
765 pSession->pVM = NULL;
766 pSession->pUsage = NULL;
767 pSession->pGip = NULL;
768 pSession->fGipReferenced = false;
769 pSession->Bundle.cUsed = 0; */
770 pSession->Uid = NIL_RTUID;
771 pSession->Gid = NIL_RTGID;
772 /*pSession->uTracerData = 0;*/
773 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
774 RTListInit(&pSession->TpProviders);
775 /*pSession->cTpProviders = 0;*/
776 /*pSession->cTpProbesFiring = 0;*/
777 RTListInit(&pSession->TpUmods);
778 /*RT_ZERO(pSession->apTpLookupTable);*/
779
780 VBOXDRV_SESSION_CREATE(pSession, fUser);
781 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
782 return VINF_SUCCESS;
783 }
784
785 RTSpinlockDestroy(pSession->Spinlock);
786 }
787 RTMemFree(pSession);
788 *ppSession = NULL;
789 Log(("Failed to create spinlock, rc=%d!\n", rc));
790 }
791 else
792 rc = VERR_NO_MEMORY;
793
794 return rc;
795}
796
797
798/**
799 * Cleans up the session in the context of the process to which it belongs, the
800 * caller will free the session and the session spinlock.
801 *
802 * This should normally occur when the session is closed or as the process
803 * exits. Careful reference counting in the OS specfic code makes sure that
804 * there cannot be any races between process/handle cleanup callbacks and
805 * threads doing I/O control calls.
806 *
807 * @param pDevExt The device extension.
808 * @param pSession Session data.
809 */
810static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
811{
812 int rc;
813 PSUPDRVBUNDLE pBundle;
814 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
815
816 Assert(!pSession->fInHashTable);
817 Assert(!pSession->ppOsSessionPtr);
818 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
819 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
820
821 /*
822 * Remove logger instances related to this session.
823 */
824 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
825
826 /*
827 * Destroy the handle table.
828 */
829 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
830 AssertRC(rc);
831 pSession->hHandleTable = NIL_RTHANDLETABLE;
832
833 /*
834 * Release object references made in this session.
835 * In theory there should be noone racing us in this session.
836 */
837 Log2(("release objects - start\n"));
838 if (pSession->pUsage)
839 {
840 PSUPDRVUSAGE pUsage;
841 RTSpinlockAcquire(pDevExt->Spinlock);
842
843 while ((pUsage = pSession->pUsage) != NULL)
844 {
845 PSUPDRVOBJ pObj = pUsage->pObj;
846 pSession->pUsage = pUsage->pNext;
847
848 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
849 if (pUsage->cUsage < pObj->cUsage)
850 {
851 pObj->cUsage -= pUsage->cUsage;
852 RTSpinlockRelease(pDevExt->Spinlock);
853 }
854 else
855 {
856 /* Destroy the object and free the record. */
857 if (pDevExt->pObjs == pObj)
858 pDevExt->pObjs = pObj->pNext;
859 else
860 {
861 PSUPDRVOBJ pObjPrev;
862 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
863 if (pObjPrev->pNext == pObj)
864 {
865 pObjPrev->pNext = pObj->pNext;
866 break;
867 }
868 Assert(pObjPrev);
869 }
870 RTSpinlockRelease(pDevExt->Spinlock);
871
872 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
873 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
874 if (pObj->pfnDestructor)
875 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
876 RTMemFree(pObj);
877 }
878
879 /* free it and continue. */
880 RTMemFree(pUsage);
881
882 RTSpinlockAcquire(pDevExt->Spinlock);
883 }
884
885 RTSpinlockRelease(pDevExt->Spinlock);
886 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
887 }
888 Log2(("release objects - done\n"));
889
890 /*
891 * Do tracer cleanups related to this session.
892 */
893 Log2(("release tracer stuff - start\n"));
894 supdrvTracerCleanupSession(pDevExt, pSession);
895 Log2(("release tracer stuff - end\n"));
896
897 /*
898 * Release memory allocated in the session.
899 *
900 * We do not serialize this as we assume that the application will
901 * not allocated memory while closing the file handle object.
902 */
903 Log2(("freeing memory:\n"));
904 pBundle = &pSession->Bundle;
905 while (pBundle)
906 {
907 PSUPDRVBUNDLE pToFree;
908 unsigned i;
909
910 /*
911 * Check and unlock all entries in the bundle.
912 */
913 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
914 {
915 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
916 {
917 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
918 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
919 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
920 {
921 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
922 AssertRC(rc); /** @todo figure out how to handle this. */
923 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
924 }
925 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
926 AssertRC(rc); /** @todo figure out how to handle this. */
927 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
928 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
929 }
930 }
931
932 /*
933 * Advance and free previous bundle.
934 */
935 pToFree = pBundle;
936 pBundle = pBundle->pNext;
937
938 pToFree->pNext = NULL;
939 pToFree->cUsed = 0;
940 if (pToFree != &pSession->Bundle)
941 RTMemFree(pToFree);
942 }
943 Log2(("freeing memory - done\n"));
944
945 /*
946 * Deregister component factories.
947 */
948 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
949 Log2(("deregistering component factories:\n"));
950 if (pDevExt->pComponentFactoryHead)
951 {
952 PSUPDRVFACTORYREG pPrev = NULL;
953 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
954 while (pCur)
955 {
956 if (pCur->pSession == pSession)
957 {
958 /* unlink it */
959 PSUPDRVFACTORYREG pNext = pCur->pNext;
960 if (pPrev)
961 pPrev->pNext = pNext;
962 else
963 pDevExt->pComponentFactoryHead = pNext;
964
965 /* free it */
966 pCur->pNext = NULL;
967 pCur->pSession = NULL;
968 pCur->pFactory = NULL;
969 RTMemFree(pCur);
970
971 /* next */
972 pCur = pNext;
973 }
974 else
975 {
976 /* next */
977 pPrev = pCur;
978 pCur = pCur->pNext;
979 }
980 }
981 }
982 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
983 Log2(("deregistering component factories - done\n"));
984
985 /*
986 * Loaded images needs to be dereferenced and possibly freed up.
987 */
988 supdrvLdrLock(pDevExt);
989 Log2(("freeing images:\n"));
990 if (pSession->pLdrUsage)
991 {
992 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
993 pSession->pLdrUsage = NULL;
994 while (pUsage)
995 {
996 void *pvFree = pUsage;
997 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
998 if (pImage->cUsage > pUsage->cUsage)
999 pImage->cUsage -= pUsage->cUsage;
1000 else
1001 supdrvLdrFree(pDevExt, pImage);
1002 pUsage->pImage = NULL;
1003 pUsage = pUsage->pNext;
1004 RTMemFree(pvFree);
1005 }
1006 }
1007 supdrvLdrUnlock(pDevExt);
1008 Log2(("freeing images - done\n"));
1009
1010 /*
1011 * Unmap the GIP.
1012 */
1013 Log2(("umapping GIP:\n"));
1014 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1015 {
1016 SUPR0GipUnmap(pSession);
1017 pSession->fGipReferenced = 0;
1018 }
1019 Log2(("umapping GIP - done\n"));
1020}
1021
1022
1023/**
1024 * Common code for freeing a session when the reference count reaches zero.
1025 *
1026 * @param pDevExt Device extension.
1027 * @param pSession Session data.
1028 * This data will be freed by this routine.
1029 */
1030static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1031{
1032 VBOXDRV_SESSION_CLOSE(pSession);
1033
1034 /*
1035 * Cleanup the session first.
1036 */
1037 supdrvCleanupSession(pDevExt, pSession);
1038 supdrvOSCleanupSession(pDevExt, pSession);
1039
1040 /*
1041 * Free the rest of the session stuff.
1042 */
1043 RTSpinlockDestroy(pSession->Spinlock);
1044 pSession->Spinlock = NIL_RTSPINLOCK;
1045 pSession->pDevExt = NULL;
1046 RTMemFree(pSession);
1047 LogFlow(("supdrvDestroySession: returns\n"));
1048}
1049
1050
1051/**
1052 * Inserts the session into the global hash table.
1053 *
1054 * @retval VINF_SUCCESS on success.
1055 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1056 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1057 * session (asserted).
1058 * @retval VERR_DUPLICATE if there is already a session for that pid.
1059 *
1060 * @param pDevExt The device extension.
1061 * @param pSession The session.
1062 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1063 * available and used. This will set to point to the
1064 * session while under the protection of the session
1065 * hash table spinlock. It will also be kept in
1066 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1067 * cleanup use.
1068 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1069 */
1070int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1071 void *pvUser)
1072{
1073 PSUPDRVSESSION pCur;
1074 unsigned iHash;
1075
1076 /*
1077 * Validate input.
1078 */
1079 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1080 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1081
1082 /*
1083 * Calculate the hash table index and acquire the spinlock.
1084 */
1085 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1086
1087 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1088
1089 /*
1090 * If there are a collisions, we need to carefully check if we got a
1091 * duplicate. There can only be one open session per process.
1092 */
1093 pCur = pDevExt->apSessionHashTab[iHash];
1094 if (pCur)
1095 {
1096 while (pCur && pCur->Process != pSession->Process)
1097 pCur = pCur->pCommonNextHash;
1098
1099 if (pCur)
1100 {
1101 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1102 if (pCur == pSession)
1103 {
1104 Assert(pSession->fInHashTable);
1105 AssertFailed();
1106 return VERR_WRONG_ORDER;
1107 }
1108 Assert(!pSession->fInHashTable);
1109 if (pCur->R0Process == pSession->R0Process)
1110 return VERR_RESOURCE_IN_USE;
1111 return VERR_DUPLICATE;
1112 }
1113 }
1114 Assert(!pSession->fInHashTable);
1115 Assert(!pSession->ppOsSessionPtr);
1116
1117 /*
1118 * Insert it, doing a callout to the OS specific code in case it has
1119 * anything it wishes to do while we're holding the spinlock.
1120 */
1121 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1122 pDevExt->apSessionHashTab[iHash] = pSession;
1123 pSession->fInHashTable = true;
1124 ASMAtomicIncS32(&pDevExt->cSessions);
1125
1126 pSession->ppOsSessionPtr = ppOsSessionPtr;
1127 if (ppOsSessionPtr)
1128 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1129
1130 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1131
1132 /*
1133 * Retain a reference for the pointer in the session table.
1134 */
1135 ASMAtomicIncU32(&pSession->cRefs);
1136
1137 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1138 return VINF_SUCCESS;
1139}
1140
1141
1142/**
1143 * Removes the session from the global hash table.
1144 *
1145 * @retval VINF_SUCCESS on success.
1146 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1147 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1148 * session (asserted).
1149 *
1150 * @param pDevExt The device extension.
1151 * @param pSession The session. The caller is expected to have a reference
1152 * to this so it won't croak on us when we release the hash
1153 * table reference.
1154 * @param pvUser OS specific context value for the
1155 * supdrvOSSessionHashTabInserted callback.
1156 */
1157int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1158{
1159 PSUPDRVSESSION pCur;
1160 unsigned iHash;
1161 int32_t cRefs;
1162
1163 /*
1164 * Validate input.
1165 */
1166 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1167 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1168
1169 /*
1170 * Calculate the hash table index and acquire the spinlock.
1171 */
1172 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1173
1174 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1175
1176 /*
1177 * Unlink it.
1178 */
1179 pCur = pDevExt->apSessionHashTab[iHash];
1180 if (pCur == pSession)
1181 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1182 else
1183 {
1184 PSUPDRVSESSION pPrev = pCur;
1185 while (pCur && pCur != pSession)
1186 {
1187 pPrev = pCur;
1188 pCur = pCur->pCommonNextHash;
1189 }
1190 if (pCur)
1191 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1192 else
1193 {
1194 Assert(!pSession->fInHashTable);
1195 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1196 return VERR_NOT_FOUND;
1197 }
1198 }
1199
1200 pSession->pCommonNextHash = NULL;
1201 pSession->fInHashTable = false;
1202
1203 ASMAtomicDecS32(&pDevExt->cSessions);
1204
1205 /*
1206 * Clear OS specific session pointer if available and do the OS callback.
1207 */
1208 if (pSession->ppOsSessionPtr)
1209 {
1210 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1211 pSession->ppOsSessionPtr = NULL;
1212 }
1213
1214 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1215
1216 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1217
1218 /*
1219 * Drop the reference the hash table had to the session. This shouldn't
1220 * be the last reference!
1221 */
1222 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1223 Assert(cRefs > 0 && cRefs < _1M);
1224 if (cRefs == 0)
1225 supdrvDestroySession(pDevExt, pSession);
1226
1227 return VINF_SUCCESS;
1228}
1229
1230
1231/**
1232 * Looks up the session for the current process in the global hash table or in
1233 * OS specific pointer.
1234 *
1235 * @returns Pointer to the session with a reference that the caller must
1236 * release. If no valid session was found, NULL is returned.
1237 *
1238 * @param pDevExt The device extension.
1239 * @param Process The process ID.
1240 * @param R0Process The ring-0 process handle.
1241 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1242 * this is used instead of the hash table. For
1243 * additional safety it must then be equal to the
1244 * SUPDRVSESSION::ppOsSessionPtr member.
1245 * This can be NULL even if the OS has a session
1246 * pointer.
1247 */
1248PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1249 PSUPDRVSESSION *ppOsSessionPtr)
1250{
1251 PSUPDRVSESSION pCur;
1252 unsigned iHash;
1253
1254 /*
1255 * Validate input.
1256 */
1257 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1258
1259 /*
1260 * Calculate the hash table index and acquire the spinlock.
1261 */
1262 iHash = SUPDRV_SESSION_HASH(Process);
1263
1264 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1265
1266 /*
1267 * If an OS session pointer is provided, always use it.
1268 */
1269 if (ppOsSessionPtr)
1270 {
1271 pCur = *ppOsSessionPtr;
1272 if ( pCur
1273 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1274 || pCur->Process != Process
1275 || pCur->R0Process != R0Process) )
1276 pCur = NULL;
1277 }
1278 else
1279 {
1280 /*
1281 * Otherwise, do the hash table lookup.
1282 */
1283 pCur = pDevExt->apSessionHashTab[iHash];
1284 while ( pCur
1285 && ( pCur->Process != Process
1286 || pCur->R0Process != R0Process) )
1287 pCur = pCur->pCommonNextHash;
1288 }
1289
1290 /*
1291 * Retain the session.
1292 */
1293 if (pCur)
1294 {
1295 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1296 NOREF(cRefs);
1297 Assert(cRefs > 1 && cRefs < _1M);
1298 }
1299
1300 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1301
1302 return pCur;
1303}
1304
1305
1306/**
1307 * Retain a session to make sure it doesn't go away while it is in use.
1308 *
1309 * @returns New reference count on success, UINT32_MAX on failure.
1310 * @param pSession Session data.
1311 */
1312uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1313{
1314 uint32_t cRefs;
1315 AssertPtrReturn(pSession, UINT32_MAX);
1316 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1317
1318 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1319 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1320 return cRefs;
1321}
1322
1323
1324/**
1325 * Releases a given session.
1326 *
1327 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1328 * @param pSession Session data.
1329 */
1330uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1331{
1332 uint32_t cRefs;
1333 AssertPtrReturn(pSession, UINT32_MAX);
1334 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1335
1336 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1337 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1338 if (cRefs == 0)
1339 supdrvDestroySession(pSession->pDevExt, pSession);
1340 return cRefs;
1341}
1342
1343
1344/**
1345 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1346 *
1347 * @returns IPRT status code, see SUPR0ObjAddRef.
1348 * @param hHandleTable The handle table handle. Ignored.
1349 * @param pvObj The object pointer.
1350 * @param pvCtx Context, the handle type. Ignored.
1351 * @param pvUser Session pointer.
1352 */
1353static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1354{
1355 NOREF(pvCtx);
1356 NOREF(hHandleTable);
1357 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1358}
1359
1360
1361/**
1362 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1363 *
1364 * @param hHandleTable The handle table handle. Ignored.
1365 * @param h The handle value. Ignored.
1366 * @param pvObj The object pointer.
1367 * @param pvCtx Context, the handle type. Ignored.
1368 * @param pvUser Session pointer.
1369 */
1370static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1371{
1372 NOREF(pvCtx);
1373 NOREF(h);
1374 NOREF(hHandleTable);
1375 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1376}
1377
1378
1379/**
1380 * Fast path I/O Control worker.
1381 *
1382 * @returns VBox status code that should be passed down to ring-3 unchanged.
1383 * @param uIOCtl Function number.
1384 * @param idCpu VMCPU id.
1385 * @param pDevExt Device extention.
1386 * @param pSession Session data.
1387 */
1388int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1389{
1390 /*
1391 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1392 */
1393 if (RT_LIKELY( RT_VALID_PTR(pSession)
1394 && pSession->pVM
1395 && pDevExt->pfnVMMR0EntryFast))
1396 {
1397 switch (uIOCtl)
1398 {
1399 case SUP_IOCTL_FAST_DO_RAW_RUN:
1400 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1401 break;
1402 case SUP_IOCTL_FAST_DO_HM_RUN:
1403 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1404 break;
1405 case SUP_IOCTL_FAST_DO_NOP:
1406 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1407 break;
1408 default:
1409 return VERR_INTERNAL_ERROR;
1410 }
1411 return VINF_SUCCESS;
1412 }
1413 return VERR_INTERNAL_ERROR;
1414}
1415
1416
1417/**
1418 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1419 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1420 * list, see http://www.kerneldrivers.org/RHEL5.
1421 *
1422 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1423 * @param pszStr String to check
1424 * @param pszChars Character set
1425 */
1426static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1427{
1428 int chCur;
1429 while ((chCur = *pszStr++) != '\0')
1430 {
1431 int ch;
1432 const char *psz = pszChars;
1433 while ((ch = *psz++) != '\0')
1434 if (ch == chCur)
1435 return 1;
1436
1437 }
1438 return 0;
1439}
1440
1441
1442
1443/**
1444 * I/O Control inner worker (tracing reasons).
1445 *
1446 * @returns IPRT status code.
1447 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1448 *
1449 * @param uIOCtl Function number.
1450 * @param pDevExt Device extention.
1451 * @param pSession Session data.
1452 * @param pReqHdr The request header.
1453 */
1454static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1455{
1456 /*
1457 * Validation macros
1458 */
1459#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1460 do { \
1461 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1462 { \
1463 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1464 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1465 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1466 } \
1467 } while (0)
1468
1469#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1470
1471#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1472 do { \
1473 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1474 { \
1475 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1476 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1477 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1478 } \
1479 } while (0)
1480
1481#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1482 do { \
1483 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1484 { \
1485 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1486 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1487 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1488 } \
1489 } while (0)
1490
1491#define REQ_CHECK_EXPR(Name, expr) \
1492 do { \
1493 if (RT_UNLIKELY(!(expr))) \
1494 { \
1495 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1496 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1497 } \
1498 } while (0)
1499
1500#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1501 do { \
1502 if (RT_UNLIKELY(!(expr))) \
1503 { \
1504 OSDBGPRINT( fmt ); \
1505 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1506 } \
1507 } while (0)
1508
1509 /*
1510 * The switch.
1511 */
1512 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1513 {
1514 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1515 {
1516 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1517 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1518 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1519 {
1520 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1521 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1522 return 0;
1523 }
1524
1525#if 0
1526 /*
1527 * Call out to the OS specific code and let it do permission checks on the
1528 * client process.
1529 */
1530 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1531 {
1532 pReq->u.Out.u32Cookie = 0xffffffff;
1533 pReq->u.Out.u32SessionCookie = 0xffffffff;
1534 pReq->u.Out.u32SessionVersion = 0xffffffff;
1535 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1536 pReq->u.Out.pSession = NULL;
1537 pReq->u.Out.cFunctions = 0;
1538 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1539 return 0;
1540 }
1541#endif
1542
1543 /*
1544 * Match the version.
1545 * The current logic is very simple, match the major interface version.
1546 */
1547 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1548 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1549 {
1550 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1551 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1552 pReq->u.Out.u32Cookie = 0xffffffff;
1553 pReq->u.Out.u32SessionCookie = 0xffffffff;
1554 pReq->u.Out.u32SessionVersion = 0xffffffff;
1555 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1556 pReq->u.Out.pSession = NULL;
1557 pReq->u.Out.cFunctions = 0;
1558 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1559 return 0;
1560 }
1561
1562 /*
1563 * Fill in return data and be gone.
1564 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1565 * u32SessionVersion <= u32ReqVersion!
1566 */
1567 /** @todo Somehow validate the client and negotiate a secure cookie... */
1568 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1569 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1570 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1571 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1572 pReq->u.Out.pSession = pSession;
1573 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1574 pReq->Hdr.rc = VINF_SUCCESS;
1575 return 0;
1576 }
1577
1578 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1579 {
1580 /* validate */
1581 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1582 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1583
1584 /* execute */
1585 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1586 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1587 pReq->Hdr.rc = VINF_SUCCESS;
1588 return 0;
1589 }
1590
1591 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1592 {
1593 /* validate */
1594 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1595 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1596 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1597 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1598 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1599
1600 /* execute */
1601 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1602 if (RT_FAILURE(pReq->Hdr.rc))
1603 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1604 return 0;
1605 }
1606
1607 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1608 {
1609 /* validate */
1610 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1611 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1612
1613 /* execute */
1614 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1615 return 0;
1616 }
1617
1618 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1619 {
1620 /* validate */
1621 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1622 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1623
1624 /* execute */
1625 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1626 if (RT_FAILURE(pReq->Hdr.rc))
1627 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1628 return 0;
1629 }
1630
1631 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1632 {
1633 /* validate */
1634 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1635 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1636
1637 /* execute */
1638 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1639 return 0;
1640 }
1641
1642 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1643 {
1644 /* validate */
1645 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1646 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1647 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1648 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1649 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1650 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1651 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1652 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1653 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1654 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1655 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1656
1657 /* execute */
1658 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1659 return 0;
1660 }
1661
1662 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1663 {
1664 /* validate */
1665 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1666 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1667 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1668 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1669 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1670 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1671 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1672 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1673 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1674 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1675 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1676 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1677 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1678 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1679 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1680
1681 if (pReq->u.In.cSymbols)
1682 {
1683 uint32_t i;
1684 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1685 for (i = 0; i < pReq->u.In.cSymbols; i++)
1686 {
1687 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1688 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1689 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1690 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1691 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1692 pReq->u.In.cbStrTab - paSyms[i].offName),
1693 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1694 }
1695 }
1696
1697 /* execute */
1698 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1699 return 0;
1700 }
1701
1702 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1703 {
1704 /* validate */
1705 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1706 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1707
1708 /* execute */
1709 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1710 return 0;
1711 }
1712
1713 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOCK_DOWN):
1714 {
1715 /* validate */
1716 REQ_CHECK_SIZES(SUP_IOCTL_LDR_LOCK_DOWN);
1717
1718 /* execute */
1719 pReqHdr->rc = supdrvIOCtl_LdrLockDown(pDevExt);
1720 return 0;
1721 }
1722
1723 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1724 {
1725 /* validate */
1726 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1727 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1728 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1729
1730 /* execute */
1731 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1732 return 0;
1733 }
1734
1735 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1736 {
1737 /* validate */
1738 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1739 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1740 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1741
1742 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1743 {
1744 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1745
1746 /* execute */
1747 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1748 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1749 else
1750 pReq->Hdr.rc = VERR_WRONG_ORDER;
1751 }
1752 else
1753 {
1754 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1755 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1756 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1757 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1758 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1759
1760 /* execute */
1761 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1762 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1763 else
1764 pReq->Hdr.rc = VERR_WRONG_ORDER;
1765 }
1766
1767 if ( RT_FAILURE(pReq->Hdr.rc)
1768 && pReq->Hdr.rc != VERR_INTERRUPTED
1769 && pReq->Hdr.rc != VERR_TIMEOUT)
1770 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1771 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1772 else
1773 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1774 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1775 return 0;
1776 }
1777
1778 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1779 {
1780 /* validate */
1781 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1782 PSUPVMMR0REQHDR pVMMReq;
1783 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1784 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1785
1786 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1787 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1788 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1789 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1790 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1791
1792 /* execute */
1793 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1794 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1795 else
1796 pReq->Hdr.rc = VERR_WRONG_ORDER;
1797
1798 if ( RT_FAILURE(pReq->Hdr.rc)
1799 && pReq->Hdr.rc != VERR_INTERRUPTED
1800 && pReq->Hdr.rc != VERR_TIMEOUT)
1801 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1802 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1803 else
1804 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1805 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1806 return 0;
1807 }
1808
1809 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1810 {
1811 /* validate */
1812 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1813 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1814
1815 /* execute */
1816 pReq->Hdr.rc = VINF_SUCCESS;
1817 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1818 return 0;
1819 }
1820
1821 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1822 {
1823 /* validate */
1824 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1825 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1826 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1827
1828 /* execute */
1829 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1830 if (RT_FAILURE(pReq->Hdr.rc))
1831 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1832 return 0;
1833 }
1834
1835 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1836 {
1837 /* validate */
1838 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1839 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1840
1841 /* execute */
1842 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1843 return 0;
1844 }
1845
1846 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1847 {
1848 /* validate */
1849 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1850 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1851
1852 /* execute */
1853 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1854 if (RT_SUCCESS(pReq->Hdr.rc))
1855 pReq->u.Out.pGipR0 = pDevExt->pGip;
1856 return 0;
1857 }
1858
1859 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1860 {
1861 /* validate */
1862 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1863 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1864
1865 /* execute */
1866 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1867 return 0;
1868 }
1869
1870 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1871 {
1872 /* validate */
1873 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1874 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1875 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1876 || ( VALID_PTR(pReq->u.In.pVMR0)
1877 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1878 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1879 /* execute */
1880 pSession->pVM = pReq->u.In.pVMR0;
1881 pReq->Hdr.rc = VINF_SUCCESS;
1882 return 0;
1883 }
1884
1885 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1886 {
1887 /* validate */
1888 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1889 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1890 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1891 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1892 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1893 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1894 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1895 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1896 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1897
1898 /* execute */
1899 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1900 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1901 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1902 &pReq->u.Out.aPages[0]);
1903 if (RT_FAILURE(pReq->Hdr.rc))
1904 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1905 return 0;
1906 }
1907
1908 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1909 {
1910 /* validate */
1911 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1912 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1913 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1914 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1915 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1916 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1917
1918 /* execute */
1919 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1920 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1921 if (RT_FAILURE(pReq->Hdr.rc))
1922 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1923 return 0;
1924 }
1925
1926 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1927 {
1928 /* validate */
1929 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1930 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1931 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1932 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1933 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1934 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1935 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1936
1937 /* execute */
1938 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1939 return 0;
1940 }
1941
1942 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1943 {
1944 /* validate */
1945 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1946 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1947
1948 /* execute */
1949 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1950 return 0;
1951 }
1952
1953 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1954 {
1955 /* validate */
1956 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1957 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1958 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1959
1960 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1961 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1962 else
1963 {
1964 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1965 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1966 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1967 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1968 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1969 }
1970 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1971
1972 /* execute */
1973 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1974 return 0;
1975 }
1976
1977 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1978 {
1979 /* validate */
1980 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1981 size_t cbStrTab;
1982 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1983 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
1984 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
1985 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
1986 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
1987 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
1988 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
1989 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
1990 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
1991 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
1992 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
1993
1994 /* execute */
1995 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
1996 return 0;
1997 }
1998
1999 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
2000 {
2001 /* validate */
2002 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
2003 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
2004 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
2005
2006 /* execute */
2007 switch (pReq->u.In.uType)
2008 {
2009 case SUP_SEM_TYPE_EVENT:
2010 {
2011 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2012 switch (pReq->u.In.uOp)
2013 {
2014 case SUPSEMOP2_WAIT_MS_REL:
2015 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2016 break;
2017 case SUPSEMOP2_WAIT_NS_ABS:
2018 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2019 break;
2020 case SUPSEMOP2_WAIT_NS_REL:
2021 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2022 break;
2023 case SUPSEMOP2_SIGNAL:
2024 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2025 break;
2026 case SUPSEMOP2_CLOSE:
2027 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2028 break;
2029 case SUPSEMOP2_RESET:
2030 default:
2031 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2032 break;
2033 }
2034 break;
2035 }
2036
2037 case SUP_SEM_TYPE_EVENT_MULTI:
2038 {
2039 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2040 switch (pReq->u.In.uOp)
2041 {
2042 case SUPSEMOP2_WAIT_MS_REL:
2043 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2044 break;
2045 case SUPSEMOP2_WAIT_NS_ABS:
2046 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2047 break;
2048 case SUPSEMOP2_WAIT_NS_REL:
2049 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2050 break;
2051 case SUPSEMOP2_SIGNAL:
2052 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2053 break;
2054 case SUPSEMOP2_CLOSE:
2055 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2056 break;
2057 case SUPSEMOP2_RESET:
2058 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2059 break;
2060 default:
2061 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2062 break;
2063 }
2064 break;
2065 }
2066
2067 default:
2068 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2069 break;
2070 }
2071 return 0;
2072 }
2073
2074 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2075 {
2076 /* validate */
2077 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2078 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2079 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2080
2081 /* execute */
2082 switch (pReq->u.In.uType)
2083 {
2084 case SUP_SEM_TYPE_EVENT:
2085 {
2086 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2087 switch (pReq->u.In.uOp)
2088 {
2089 case SUPSEMOP3_CREATE:
2090 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2091 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2092 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2093 break;
2094 case SUPSEMOP3_GET_RESOLUTION:
2095 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2096 pReq->Hdr.rc = VINF_SUCCESS;
2097 pReq->Hdr.cbOut = sizeof(*pReq);
2098 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2099 break;
2100 default:
2101 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2102 break;
2103 }
2104 break;
2105 }
2106
2107 case SUP_SEM_TYPE_EVENT_MULTI:
2108 {
2109 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2110 switch (pReq->u.In.uOp)
2111 {
2112 case SUPSEMOP3_CREATE:
2113 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2114 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2115 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2116 break;
2117 case SUPSEMOP3_GET_RESOLUTION:
2118 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2119 pReq->Hdr.rc = VINF_SUCCESS;
2120 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2121 break;
2122 default:
2123 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2124 break;
2125 }
2126 break;
2127 }
2128
2129 default:
2130 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2131 break;
2132 }
2133 return 0;
2134 }
2135
2136 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2137 {
2138 /* validate */
2139 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2140 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2141
2142 /* execute */
2143 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2144 if (RT_FAILURE(pReq->Hdr.rc))
2145 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2146 return 0;
2147 }
2148
2149 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2150 {
2151 /* validate */
2152 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2153 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2154
2155 /* execute */
2156 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2157 return 0;
2158 }
2159
2160 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2161 {
2162 /* validate */
2163 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2164
2165 /* execute */
2166 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2167 return 0;
2168 }
2169
2170 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2171 {
2172 /* validate */
2173 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2174 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2175
2176 /* execute */
2177 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2178 return 0;
2179 }
2180
2181 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2182 {
2183 /* validate */
2184 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2185 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2186 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2187 return VERR_INVALID_PARAMETER;
2188
2189 /* execute */
2190 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2191 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2192 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2193 pReq->u.In.szName, pReq->u.In.fFlags);
2194 return 0;
2195 }
2196
2197 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2198 {
2199 /* validate */
2200 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2201 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2202
2203 /* execute */
2204 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2205 return 0;
2206 }
2207
2208 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2209 {
2210 /* validate */
2211 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2212 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2213
2214 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2215 pReqHdr->rc = VINF_SUCCESS;
2216 return 0;
2217 }
2218
2219 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2220 {
2221 /* validate */
2222 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2223 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2224 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2225 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2226
2227 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2228 return 0;
2229 }
2230
2231 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2232 {
2233 /* validate */
2234 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2235
2236 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2237 return 0;
2238 }
2239
2240 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2241 {
2242 /* validate */
2243 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2244 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2245
2246 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pReq);
2247 return 0;
2248 }
2249
2250 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2251 {
2252 /* validate */
2253 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2254 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2255
2256 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pReq);
2257 return 0;
2258 }
2259
2260 default:
2261 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2262 break;
2263 }
2264 return VERR_GENERAL_FAILURE;
2265}
2266
2267
2268/**
2269 * I/O Control inner worker for the restricted operations.
2270 *
2271 * @returns IPRT status code.
2272 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2273 *
2274 * @param uIOCtl Function number.
2275 * @param pDevExt Device extention.
2276 * @param pSession Session data.
2277 * @param pReqHdr The request header.
2278 */
2279static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2280{
2281 /*
2282 * The switch.
2283 */
2284 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2285 {
2286 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2287 {
2288 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2289 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2290 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2291 {
2292 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2293 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2294 return 0;
2295 }
2296
2297 /*
2298 * Match the version.
2299 * The current logic is very simple, match the major interface version.
2300 */
2301 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2302 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2303 {
2304 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2305 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2306 pReq->u.Out.u32Cookie = 0xffffffff;
2307 pReq->u.Out.u32SessionCookie = 0xffffffff;
2308 pReq->u.Out.u32SessionVersion = 0xffffffff;
2309 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2310 pReq->u.Out.pSession = NULL;
2311 pReq->u.Out.cFunctions = 0;
2312 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2313 return 0;
2314 }
2315
2316 /*
2317 * Fill in return data and be gone.
2318 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2319 * u32SessionVersion <= u32ReqVersion!
2320 */
2321 /** @todo Somehow validate the client and negotiate a secure cookie... */
2322 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2323 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2324 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2325 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2326 pReq->u.Out.pSession = pSession;
2327 pReq->u.Out.cFunctions = 0;
2328 pReq->Hdr.rc = VINF_SUCCESS;
2329 return 0;
2330 }
2331
2332 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2333 {
2334 /* validate */
2335 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2336 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2337
2338 /* execute */
2339 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2340 if (RT_FAILURE(pReq->Hdr.rc))
2341 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2342 return 0;
2343 }
2344
2345 default:
2346 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2347 break;
2348 }
2349 return VERR_GENERAL_FAILURE;
2350}
2351
2352
2353/**
2354 * I/O Control worker.
2355 *
2356 * @returns IPRT status code.
2357 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2358 *
2359 * @param uIOCtl Function number.
2360 * @param pDevExt Device extention.
2361 * @param pSession Session data.
2362 * @param pReqHdr The request header.
2363 */
2364int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2365{
2366 int rc;
2367 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2368
2369 /*
2370 * Validate the request.
2371 */
2372 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2373 {
2374 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2375 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2376 return VERR_INVALID_PARAMETER;
2377 }
2378 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2379 || pReqHdr->cbIn < sizeof(*pReqHdr)
2380 || pReqHdr->cbIn > cbReq
2381 || pReqHdr->cbOut < sizeof(*pReqHdr)
2382 || pReqHdr->cbOut > cbReq))
2383 {
2384 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2385 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2386 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2387 return VERR_INVALID_PARAMETER;
2388 }
2389 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2390 {
2391 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2392 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2393 return VERR_INVALID_PARAMETER;
2394 }
2395 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2396 {
2397 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2398 {
2399 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2400 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2401 return VERR_INVALID_PARAMETER;
2402 }
2403 }
2404 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2405 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2406 {
2407 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2408 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2409 return VERR_INVALID_PARAMETER;
2410 }
2411
2412 /*
2413 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2414 */
2415 if (pSession->fUnrestricted)
2416 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2417 else
2418 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2419
2420 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2421 return rc;
2422}
2423
2424
2425/**
2426 * Inter-Driver Communication (IDC) worker.
2427 *
2428 * @returns VBox status code.
2429 * @retval VINF_SUCCESS on success.
2430 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2431 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2432 *
2433 * @param uReq The request (function) code.
2434 * @param pDevExt Device extention.
2435 * @param pSession Session data.
2436 * @param pReqHdr The request header.
2437 */
2438int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2439{
2440 /*
2441 * The OS specific code has already validated the pSession
2442 * pointer, and the request size being greater or equal to
2443 * size of the header.
2444 *
2445 * So, just check that pSession is a kernel context session.
2446 */
2447 if (RT_UNLIKELY( pSession
2448 && pSession->R0Process != NIL_RTR0PROCESS))
2449 return VERR_INVALID_PARAMETER;
2450
2451/*
2452 * Validation macro.
2453 */
2454#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2455 do { \
2456 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2457 { \
2458 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2459 (long)pReqHdr->cb, (long)(cbExpect))); \
2460 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2461 } \
2462 } while (0)
2463
2464 switch (uReq)
2465 {
2466 case SUPDRV_IDC_REQ_CONNECT:
2467 {
2468 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2469 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2470
2471 /*
2472 * Validate the cookie and other input.
2473 */
2474 if (pReq->Hdr.pSession != NULL)
2475 {
2476 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2477 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2478 }
2479 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2480 {
2481 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2482 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2483 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2484 }
2485 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2486 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2487 {
2488 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2489 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2490 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2491 }
2492 if (pSession != NULL)
2493 {
2494 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2495 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2496 }
2497
2498 /*
2499 * Match the version.
2500 * The current logic is very simple, match the major interface version.
2501 */
2502 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2503 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2504 {
2505 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2506 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2507 pReq->u.Out.pSession = NULL;
2508 pReq->u.Out.uSessionVersion = 0xffffffff;
2509 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2510 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2511 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2512 return VINF_SUCCESS;
2513 }
2514
2515 pReq->u.Out.pSession = NULL;
2516 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2517 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2518 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2519
2520 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2521 if (RT_FAILURE(pReq->Hdr.rc))
2522 {
2523 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2524 return VINF_SUCCESS;
2525 }
2526
2527 pReq->u.Out.pSession = pSession;
2528 pReq->Hdr.pSession = pSession;
2529
2530 return VINF_SUCCESS;
2531 }
2532
2533 case SUPDRV_IDC_REQ_DISCONNECT:
2534 {
2535 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2536
2537 supdrvSessionRelease(pSession);
2538 return pReqHdr->rc = VINF_SUCCESS;
2539 }
2540
2541 case SUPDRV_IDC_REQ_GET_SYMBOL:
2542 {
2543 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2544 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2545
2546 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2547 return VINF_SUCCESS;
2548 }
2549
2550 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2551 {
2552 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2553 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2554
2555 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2556 return VINF_SUCCESS;
2557 }
2558
2559 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2560 {
2561 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2562 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2563
2564 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2565 return VINF_SUCCESS;
2566 }
2567
2568 default:
2569 Log(("Unknown IDC %#lx\n", (long)uReq));
2570 break;
2571 }
2572
2573#undef REQ_CHECK_IDC_SIZE
2574 return VERR_NOT_SUPPORTED;
2575}
2576
2577
2578/**
2579 * Register a object for reference counting.
2580 * The object is registered with one reference in the specified session.
2581 *
2582 * @returns Unique identifier on success (pointer).
2583 * All future reference must use this identifier.
2584 * @returns NULL on failure.
2585 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2586 * @param pvUser1 The first user argument.
2587 * @param pvUser2 The second user argument.
2588 */
2589SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2590{
2591 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2592 PSUPDRVOBJ pObj;
2593 PSUPDRVUSAGE pUsage;
2594
2595 /*
2596 * Validate the input.
2597 */
2598 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2599 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2600 AssertPtrReturn(pfnDestructor, NULL);
2601
2602 /*
2603 * Allocate and initialize the object.
2604 */
2605 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2606 if (!pObj)
2607 return NULL;
2608 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2609 pObj->enmType = enmType;
2610 pObj->pNext = NULL;
2611 pObj->cUsage = 1;
2612 pObj->pfnDestructor = pfnDestructor;
2613 pObj->pvUser1 = pvUser1;
2614 pObj->pvUser2 = pvUser2;
2615 pObj->CreatorUid = pSession->Uid;
2616 pObj->CreatorGid = pSession->Gid;
2617 pObj->CreatorProcess= pSession->Process;
2618 supdrvOSObjInitCreator(pObj, pSession);
2619
2620 /*
2621 * Allocate the usage record.
2622 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2623 */
2624 RTSpinlockAcquire(pDevExt->Spinlock);
2625
2626 pUsage = pDevExt->pUsageFree;
2627 if (pUsage)
2628 pDevExt->pUsageFree = pUsage->pNext;
2629 else
2630 {
2631 RTSpinlockRelease(pDevExt->Spinlock);
2632 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2633 if (!pUsage)
2634 {
2635 RTMemFree(pObj);
2636 return NULL;
2637 }
2638 RTSpinlockAcquire(pDevExt->Spinlock);
2639 }
2640
2641 /*
2642 * Insert the object and create the session usage record.
2643 */
2644 /* The object. */
2645 pObj->pNext = pDevExt->pObjs;
2646 pDevExt->pObjs = pObj;
2647
2648 /* The session record. */
2649 pUsage->cUsage = 1;
2650 pUsage->pObj = pObj;
2651 pUsage->pNext = pSession->pUsage;
2652 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2653 pSession->pUsage = pUsage;
2654
2655 RTSpinlockRelease(pDevExt->Spinlock);
2656
2657 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2658 return pObj;
2659}
2660
2661
2662/**
2663 * Increment the reference counter for the object associating the reference
2664 * with the specified session.
2665 *
2666 * @returns IPRT status code.
2667 * @param pvObj The identifier returned by SUPR0ObjRegister().
2668 * @param pSession The session which is referencing the object.
2669 *
2670 * @remarks The caller should not own any spinlocks and must carefully protect
2671 * itself against potential race with the destructor so freed memory
2672 * isn't accessed here.
2673 */
2674SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2675{
2676 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2677}
2678
2679
2680/**
2681 * Increment the reference counter for the object associating the reference
2682 * with the specified session.
2683 *
2684 * @returns IPRT status code.
2685 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2686 * couldn't be allocated. (If you see this you're not doing the right
2687 * thing and it won't ever work reliably.)
2688 *
2689 * @param pvObj The identifier returned by SUPR0ObjRegister().
2690 * @param pSession The session which is referencing the object.
2691 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2692 * first reference to an object in a session with this
2693 * argument set.
2694 *
2695 * @remarks The caller should not own any spinlocks and must carefully protect
2696 * itself against potential race with the destructor so freed memory
2697 * isn't accessed here.
2698 */
2699SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2700{
2701 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2702 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2703 int rc = VINF_SUCCESS;
2704 PSUPDRVUSAGE pUsagePre;
2705 PSUPDRVUSAGE pUsage;
2706
2707 /*
2708 * Validate the input.
2709 * Be ready for the destruction race (someone might be stuck in the
2710 * destructor waiting a lock we own).
2711 */
2712 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2713 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2714 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2715 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2716 VERR_INVALID_PARAMETER);
2717
2718 RTSpinlockAcquire(pDevExt->Spinlock);
2719
2720 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2721 {
2722 RTSpinlockRelease(pDevExt->Spinlock);
2723
2724 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2725 return VERR_WRONG_ORDER;
2726 }
2727
2728 /*
2729 * Preallocate the usage record if we can.
2730 */
2731 pUsagePre = pDevExt->pUsageFree;
2732 if (pUsagePre)
2733 pDevExt->pUsageFree = pUsagePre->pNext;
2734 else if (!fNoBlocking)
2735 {
2736 RTSpinlockRelease(pDevExt->Spinlock);
2737 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2738 if (!pUsagePre)
2739 return VERR_NO_MEMORY;
2740
2741 RTSpinlockAcquire(pDevExt->Spinlock);
2742 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2743 {
2744 RTSpinlockRelease(pDevExt->Spinlock);
2745
2746 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2747 return VERR_WRONG_ORDER;
2748 }
2749 }
2750
2751 /*
2752 * Reference the object.
2753 */
2754 pObj->cUsage++;
2755
2756 /*
2757 * Look for the session record.
2758 */
2759 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2760 {
2761 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2762 if (pUsage->pObj == pObj)
2763 break;
2764 }
2765 if (pUsage)
2766 pUsage->cUsage++;
2767 else if (pUsagePre)
2768 {
2769 /* create a new session record. */
2770 pUsagePre->cUsage = 1;
2771 pUsagePre->pObj = pObj;
2772 pUsagePre->pNext = pSession->pUsage;
2773 pSession->pUsage = pUsagePre;
2774 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2775
2776 pUsagePre = NULL;
2777 }
2778 else
2779 {
2780 pObj->cUsage--;
2781 rc = VERR_TRY_AGAIN;
2782 }
2783
2784 /*
2785 * Put any unused usage record into the free list..
2786 */
2787 if (pUsagePre)
2788 {
2789 pUsagePre->pNext = pDevExt->pUsageFree;
2790 pDevExt->pUsageFree = pUsagePre;
2791 }
2792
2793 RTSpinlockRelease(pDevExt->Spinlock);
2794
2795 return rc;
2796}
2797
2798
2799/**
2800 * Decrement / destroy a reference counter record for an object.
2801 *
2802 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2803 *
2804 * @returns IPRT status code.
2805 * @retval VINF_SUCCESS if not destroyed.
2806 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2807 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2808 * string builds.
2809 *
2810 * @param pvObj The identifier returned by SUPR0ObjRegister().
2811 * @param pSession The session which is referencing the object.
2812 */
2813SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2814{
2815 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2816 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2817 int rc = VERR_INVALID_PARAMETER;
2818 PSUPDRVUSAGE pUsage;
2819 PSUPDRVUSAGE pUsagePrev;
2820
2821 /*
2822 * Validate the input.
2823 */
2824 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2825 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2826 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2827 VERR_INVALID_PARAMETER);
2828
2829 /*
2830 * Acquire the spinlock and look for the usage record.
2831 */
2832 RTSpinlockAcquire(pDevExt->Spinlock);
2833
2834 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2835 pUsage;
2836 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2837 {
2838 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2839 if (pUsage->pObj == pObj)
2840 {
2841 rc = VINF_SUCCESS;
2842 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2843 if (pUsage->cUsage > 1)
2844 {
2845 pObj->cUsage--;
2846 pUsage->cUsage--;
2847 }
2848 else
2849 {
2850 /*
2851 * Free the session record.
2852 */
2853 if (pUsagePrev)
2854 pUsagePrev->pNext = pUsage->pNext;
2855 else
2856 pSession->pUsage = pUsage->pNext;
2857 pUsage->pNext = pDevExt->pUsageFree;
2858 pDevExt->pUsageFree = pUsage;
2859
2860 /* What about the object? */
2861 if (pObj->cUsage > 1)
2862 pObj->cUsage--;
2863 else
2864 {
2865 /*
2866 * Object is to be destroyed, unlink it.
2867 */
2868 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2869 rc = VINF_OBJECT_DESTROYED;
2870 if (pDevExt->pObjs == pObj)
2871 pDevExt->pObjs = pObj->pNext;
2872 else
2873 {
2874 PSUPDRVOBJ pObjPrev;
2875 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2876 if (pObjPrev->pNext == pObj)
2877 {
2878 pObjPrev->pNext = pObj->pNext;
2879 break;
2880 }
2881 Assert(pObjPrev);
2882 }
2883 }
2884 }
2885 break;
2886 }
2887 }
2888
2889 RTSpinlockRelease(pDevExt->Spinlock);
2890
2891 /*
2892 * Call the destructor and free the object if required.
2893 */
2894 if (rc == VINF_OBJECT_DESTROYED)
2895 {
2896 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2897 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2898 if (pObj->pfnDestructor)
2899 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2900 RTMemFree(pObj);
2901 }
2902
2903 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2904 return rc;
2905}
2906
2907
2908/**
2909 * Verifies that the current process can access the specified object.
2910 *
2911 * @returns The following IPRT status code:
2912 * @retval VINF_SUCCESS if access was granted.
2913 * @retval VERR_PERMISSION_DENIED if denied access.
2914 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2915 *
2916 * @param pvObj The identifier returned by SUPR0ObjRegister().
2917 * @param pSession The session which wishes to access the object.
2918 * @param pszObjName Object string name. This is optional and depends on the object type.
2919 *
2920 * @remark The caller is responsible for making sure the object isn't removed while
2921 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2922 */
2923SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2924{
2925 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2926 int rc;
2927
2928 /*
2929 * Validate the input.
2930 */
2931 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2932 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2933 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2934 VERR_INVALID_PARAMETER);
2935
2936 /*
2937 * Check access. (returns true if a decision has been made.)
2938 */
2939 rc = VERR_INTERNAL_ERROR;
2940 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2941 return rc;
2942
2943 /*
2944 * Default policy is to allow the user to access his own
2945 * stuff but nothing else.
2946 */
2947 if (pObj->CreatorUid == pSession->Uid)
2948 return VINF_SUCCESS;
2949 return VERR_PERMISSION_DENIED;
2950}
2951
2952
2953/**
2954 * Lock pages.
2955 *
2956 * @returns IPRT status code.
2957 * @param pSession Session to which the locked memory should be associated.
2958 * @param pvR3 Start of the memory range to lock.
2959 * This must be page aligned.
2960 * @param cPages Number of pages to lock.
2961 * @param paPages Where to put the physical addresses of locked memory.
2962 */
2963SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2964{
2965 int rc;
2966 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2967 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2968 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2969
2970 /*
2971 * Verify input.
2972 */
2973 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2974 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2975 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2976 || !pvR3)
2977 {
2978 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2979 return VERR_INVALID_PARAMETER;
2980 }
2981
2982 /*
2983 * Let IPRT do the job.
2984 */
2985 Mem.eType = MEMREF_TYPE_LOCKED;
2986 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
2987 if (RT_SUCCESS(rc))
2988 {
2989 uint32_t iPage = cPages;
2990 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
2991 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
2992
2993 while (iPage-- > 0)
2994 {
2995 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
2996 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
2997 {
2998 AssertMsgFailed(("iPage=%d\n", iPage));
2999 rc = VERR_INTERNAL_ERROR;
3000 break;
3001 }
3002 }
3003 if (RT_SUCCESS(rc))
3004 rc = supdrvMemAdd(&Mem, pSession);
3005 if (RT_FAILURE(rc))
3006 {
3007 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3008 AssertRC(rc2);
3009 }
3010 }
3011
3012 return rc;
3013}
3014
3015
3016/**
3017 * Unlocks the memory pointed to by pv.
3018 *
3019 * @returns IPRT status code.
3020 * @param pSession Session to which the memory was locked.
3021 * @param pvR3 Memory to unlock.
3022 */
3023SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3024{
3025 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3026 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3027 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3028}
3029
3030
3031/**
3032 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3033 * backing.
3034 *
3035 * @returns IPRT status code.
3036 * @param pSession Session data.
3037 * @param cPages Number of pages to allocate.
3038 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3039 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3040 * @param pHCPhys Where to put the physical address of allocated memory.
3041 */
3042SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3043{
3044 int rc;
3045 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3046 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3047
3048 /*
3049 * Validate input.
3050 */
3051 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3052 if (!ppvR3 || !ppvR0 || !pHCPhys)
3053 {
3054 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3055 pSession, ppvR0, ppvR3, pHCPhys));
3056 return VERR_INVALID_PARAMETER;
3057
3058 }
3059 if (cPages < 1 || cPages >= 256)
3060 {
3061 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3062 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3063 }
3064
3065 /*
3066 * Let IPRT do the job.
3067 */
3068 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3069 if (RT_SUCCESS(rc))
3070 {
3071 int rc2;
3072 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3073 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3074 if (RT_SUCCESS(rc))
3075 {
3076 Mem.eType = MEMREF_TYPE_CONT;
3077 rc = supdrvMemAdd(&Mem, pSession);
3078 if (!rc)
3079 {
3080 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3081 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3082 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3083 return 0;
3084 }
3085
3086 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3087 AssertRC(rc2);
3088 }
3089 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3090 AssertRC(rc2);
3091 }
3092
3093 return rc;
3094}
3095
3096
3097/**
3098 * Frees memory allocated using SUPR0ContAlloc().
3099 *
3100 * @returns IPRT status code.
3101 * @param pSession The session to which the memory was allocated.
3102 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3103 */
3104SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3105{
3106 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3107 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3108 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3109}
3110
3111
3112/**
3113 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3114 *
3115 * The memory isn't zeroed.
3116 *
3117 * @returns IPRT status code.
3118 * @param pSession Session data.
3119 * @param cPages Number of pages to allocate.
3120 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3121 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3122 * @param paPages Where to put the physical addresses of allocated memory.
3123 */
3124SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3125{
3126 unsigned iPage;
3127 int rc;
3128 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3129 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3130
3131 /*
3132 * Validate input.
3133 */
3134 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3135 if (!ppvR3 || !ppvR0 || !paPages)
3136 {
3137 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3138 pSession, ppvR3, ppvR0, paPages));
3139 return VERR_INVALID_PARAMETER;
3140
3141 }
3142 if (cPages < 1 || cPages >= 256)
3143 {
3144 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3145 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3146 }
3147
3148 /*
3149 * Let IPRT do the work.
3150 */
3151 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3152 if (RT_SUCCESS(rc))
3153 {
3154 int rc2;
3155 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3156 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3157 if (RT_SUCCESS(rc))
3158 {
3159 Mem.eType = MEMREF_TYPE_LOW;
3160 rc = supdrvMemAdd(&Mem, pSession);
3161 if (!rc)
3162 {
3163 for (iPage = 0; iPage < cPages; iPage++)
3164 {
3165 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3166 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3167 }
3168 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3169 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3170 return 0;
3171 }
3172
3173 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3174 AssertRC(rc2);
3175 }
3176
3177 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3178 AssertRC(rc2);
3179 }
3180
3181 return rc;
3182}
3183
3184
3185/**
3186 * Frees memory allocated using SUPR0LowAlloc().
3187 *
3188 * @returns IPRT status code.
3189 * @param pSession The session to which the memory was allocated.
3190 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3191 */
3192SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3193{
3194 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3195 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3196 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3197}
3198
3199
3200
3201/**
3202 * Allocates a chunk of memory with both R0 and R3 mappings.
3203 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3204 *
3205 * @returns IPRT status code.
3206 * @param pSession The session to associated the allocation with.
3207 * @param cb Number of bytes to allocate.
3208 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3209 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3210 */
3211SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3212{
3213 int rc;
3214 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3215 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3216
3217 /*
3218 * Validate input.
3219 */
3220 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3221 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3222 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3223 if (cb < 1 || cb >= _4M)
3224 {
3225 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3226 return VERR_INVALID_PARAMETER;
3227 }
3228
3229 /*
3230 * Let IPRT do the work.
3231 */
3232 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3233 if (RT_SUCCESS(rc))
3234 {
3235 int rc2;
3236 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3237 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3238 if (RT_SUCCESS(rc))
3239 {
3240 Mem.eType = MEMREF_TYPE_MEM;
3241 rc = supdrvMemAdd(&Mem, pSession);
3242 if (!rc)
3243 {
3244 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3245 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3246 return VINF_SUCCESS;
3247 }
3248
3249 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3250 AssertRC(rc2);
3251 }
3252
3253 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3254 AssertRC(rc2);
3255 }
3256
3257 return rc;
3258}
3259
3260
3261/**
3262 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3263 *
3264 * @returns IPRT status code.
3265 * @param pSession The session to which the memory was allocated.
3266 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3267 * @param paPages Where to store the physical addresses.
3268 */
3269SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3270{
3271 PSUPDRVBUNDLE pBundle;
3272 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3273
3274 /*
3275 * Validate input.
3276 */
3277 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3278 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3279 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3280
3281 /*
3282 * Search for the address.
3283 */
3284 RTSpinlockAcquire(pSession->Spinlock);
3285 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3286 {
3287 if (pBundle->cUsed > 0)
3288 {
3289 unsigned i;
3290 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3291 {
3292 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3293 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3294 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3295 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3296 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3297 )
3298 )
3299 {
3300 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3301 size_t iPage;
3302 for (iPage = 0; iPage < cPages; iPage++)
3303 {
3304 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3305 paPages[iPage].uReserved = 0;
3306 }
3307 RTSpinlockRelease(pSession->Spinlock);
3308 return VINF_SUCCESS;
3309 }
3310 }
3311 }
3312 }
3313 RTSpinlockRelease(pSession->Spinlock);
3314 Log(("Failed to find %p!!!\n", (void *)uPtr));
3315 return VERR_INVALID_PARAMETER;
3316}
3317
3318
3319/**
3320 * Free memory allocated by SUPR0MemAlloc().
3321 *
3322 * @returns IPRT status code.
3323 * @param pSession The session owning the allocation.
3324 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3325 */
3326SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3327{
3328 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3329 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3330 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3331}
3332
3333
3334/**
3335 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3336 *
3337 * The memory is fixed and it's possible to query the physical addresses using
3338 * SUPR0MemGetPhys().
3339 *
3340 * @returns IPRT status code.
3341 * @param pSession The session to associated the allocation with.
3342 * @param cPages The number of pages to allocate.
3343 * @param fFlags Flags, reserved for the future. Must be zero.
3344 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3345 * NULL if no ring-3 mapping.
3346 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3347 * NULL if no ring-0 mapping.
3348 * @param paPages Where to store the addresses of the pages. Optional.
3349 */
3350SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3351{
3352 int rc;
3353 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3354 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3355
3356 /*
3357 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3358 */
3359 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3360 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3361 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3362 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3363 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3364 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3365 {
3366 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3367 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3368 }
3369
3370 /*
3371 * Let IPRT do the work.
3372 */
3373 if (ppvR0)
3374 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3375 else
3376 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3377 if (RT_SUCCESS(rc))
3378 {
3379 int rc2;
3380 if (ppvR3)
3381 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3382 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3383 else
3384 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3385 if (RT_SUCCESS(rc))
3386 {
3387 Mem.eType = MEMREF_TYPE_PAGE;
3388 rc = supdrvMemAdd(&Mem, pSession);
3389 if (!rc)
3390 {
3391 if (ppvR3)
3392 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3393 if (ppvR0)
3394 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3395 if (paPages)
3396 {
3397 uint32_t iPage = cPages;
3398 while (iPage-- > 0)
3399 {
3400 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3401 Assert(paPages[iPage] != NIL_RTHCPHYS);
3402 }
3403 }
3404 return VINF_SUCCESS;
3405 }
3406
3407 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3408 AssertRC(rc2);
3409 }
3410
3411 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3412 AssertRC(rc2);
3413 }
3414 return rc;
3415}
3416
3417
3418/**
3419 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3420 * space.
3421 *
3422 * @returns IPRT status code.
3423 * @param pSession The session to associated the allocation with.
3424 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3425 * @param offSub Where to start mapping. Must be page aligned.
3426 * @param cbSub How much to map. Must be page aligned.
3427 * @param fFlags Flags, MBZ.
3428 * @param ppvR0 Where to return the address of the ring-0 mapping on
3429 * success.
3430 */
3431SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3432 uint32_t fFlags, PRTR0PTR ppvR0)
3433{
3434 int rc;
3435 PSUPDRVBUNDLE pBundle;
3436 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3437 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3438
3439 /*
3440 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3441 */
3442 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3443 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3444 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3445 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3446 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3447 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3448
3449 /*
3450 * Find the memory object.
3451 */
3452 RTSpinlockAcquire(pSession->Spinlock);
3453 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3454 {
3455 if (pBundle->cUsed > 0)
3456 {
3457 unsigned i;
3458 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3459 {
3460 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3461 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3462 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3463 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3464 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3465 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3466 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3467 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3468 {
3469 hMemObj = pBundle->aMem[i].MemObj;
3470 break;
3471 }
3472 }
3473 }
3474 }
3475 RTSpinlockRelease(pSession->Spinlock);
3476
3477 rc = VERR_INVALID_PARAMETER;
3478 if (hMemObj != NIL_RTR0MEMOBJ)
3479 {
3480 /*
3481 * Do some further input validations before calling IPRT.
3482 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3483 */
3484 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3485 if ( offSub < cbMemObj
3486 && cbSub <= cbMemObj
3487 && offSub + cbSub <= cbMemObj)
3488 {
3489 RTR0MEMOBJ hMapObj;
3490 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3491 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3492 if (RT_SUCCESS(rc))
3493 *ppvR0 = RTR0MemObjAddress(hMapObj);
3494 }
3495 else
3496 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3497
3498 }
3499 return rc;
3500}
3501
3502
3503/**
3504 * Changes the page level protection of one or more pages previously allocated
3505 * by SUPR0PageAllocEx.
3506 *
3507 * @returns IPRT status code.
3508 * @param pSession The session to associated the allocation with.
3509 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3510 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3511 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3512 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3513 * @param offSub Where to start changing. Must be page aligned.
3514 * @param cbSub How much to change. Must be page aligned.
3515 * @param fProt The new page level protection, see RTMEM_PROT_*.
3516 */
3517SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3518{
3519 int rc;
3520 PSUPDRVBUNDLE pBundle;
3521 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3522 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3523 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3524
3525 /*
3526 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3527 */
3528 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3529 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3530 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3531 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3532 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3533
3534 /*
3535 * Find the memory object.
3536 */
3537 RTSpinlockAcquire(pSession->Spinlock);
3538 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3539 {
3540 if (pBundle->cUsed > 0)
3541 {
3542 unsigned i;
3543 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3544 {
3545 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3546 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3547 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3548 || pvR3 == NIL_RTR3PTR)
3549 && ( pvR0 == NIL_RTR0PTR
3550 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3551 && ( pvR3 == NIL_RTR3PTR
3552 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3553 {
3554 if (pvR0 != NIL_RTR0PTR)
3555 hMemObjR0 = pBundle->aMem[i].MemObj;
3556 if (pvR3 != NIL_RTR3PTR)
3557 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3558 break;
3559 }
3560 }
3561 }
3562 }
3563 RTSpinlockRelease(pSession->Spinlock);
3564
3565 rc = VERR_INVALID_PARAMETER;
3566 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3567 || hMemObjR3 != NIL_RTR0MEMOBJ)
3568 {
3569 /*
3570 * Do some further input validations before calling IPRT.
3571 */
3572 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3573 if ( offSub < cbMemObj
3574 && cbSub <= cbMemObj
3575 && offSub + cbSub <= cbMemObj)
3576 {
3577 rc = VINF_SUCCESS;
3578 if (hMemObjR3 != NIL_RTR0PTR)
3579 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3580 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3581 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3582 }
3583 else
3584 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3585
3586 }
3587 return rc;
3588
3589}
3590
3591
3592/**
3593 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3594 *
3595 * @returns IPRT status code.
3596 * @param pSession The session owning the allocation.
3597 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3598 * SUPR0PageAllocEx().
3599 */
3600SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3601{
3602 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3603 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3604 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3605}
3606
3607
3608/**
3609 * Gets the paging mode of the current CPU.
3610 *
3611 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3612 */
3613SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3614{
3615 SUPPAGINGMODE enmMode;
3616
3617 RTR0UINTREG cr0 = ASMGetCR0();
3618 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3619 enmMode = SUPPAGINGMODE_INVALID;
3620 else
3621 {
3622 RTR0UINTREG cr4 = ASMGetCR4();
3623 uint32_t fNXEPlusLMA = 0;
3624 if (cr4 & X86_CR4_PAE)
3625 {
3626 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3627 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3628 {
3629 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3630 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3631 fNXEPlusLMA |= RT_BIT(0);
3632 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3633 fNXEPlusLMA |= RT_BIT(1);
3634 }
3635 }
3636
3637 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3638 {
3639 case 0:
3640 enmMode = SUPPAGINGMODE_32_BIT;
3641 break;
3642
3643 case X86_CR4_PGE:
3644 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3645 break;
3646
3647 case X86_CR4_PAE:
3648 enmMode = SUPPAGINGMODE_PAE;
3649 break;
3650
3651 case X86_CR4_PAE | RT_BIT(0):
3652 enmMode = SUPPAGINGMODE_PAE_NX;
3653 break;
3654
3655 case X86_CR4_PAE | X86_CR4_PGE:
3656 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3657 break;
3658
3659 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3660 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3661 break;
3662
3663 case RT_BIT(1) | X86_CR4_PAE:
3664 enmMode = SUPPAGINGMODE_AMD64;
3665 break;
3666
3667 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3668 enmMode = SUPPAGINGMODE_AMD64_NX;
3669 break;
3670
3671 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3672 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3673 break;
3674
3675 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3676 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3677 break;
3678
3679 default:
3680 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3681 enmMode = SUPPAGINGMODE_INVALID;
3682 break;
3683 }
3684 }
3685 return enmMode;
3686}
3687
3688
3689/**
3690 * Enables or disabled hardware virtualization extensions using native OS APIs.
3691 *
3692 * @returns VBox status code.
3693 * @retval VINF_SUCCESS on success.
3694 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3695 *
3696 * @param fEnable Whether to enable or disable.
3697 */
3698SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3699{
3700#ifdef RT_OS_DARWIN
3701 return supdrvOSEnableVTx(fEnable);
3702#else
3703 return VERR_NOT_SUPPORTED;
3704#endif
3705}
3706
3707
3708/**
3709 * Suspends hardware virtualization extensions using the native OS API.
3710 *
3711 * This is called prior to entering raw-mode context.
3712 *
3713 * @returns @c true if suspended, @c false if not.
3714 */
3715SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3716{
3717#ifdef RT_OS_DARWIN
3718 return supdrvOSSuspendVTxOnCpu();
3719#else
3720 return false;
3721#endif
3722}
3723
3724
3725/**
3726 * Resumes hardware virtualization extensions using the native OS API.
3727 *
3728 * This is called after to entering raw-mode context.
3729 *
3730 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3731 */
3732SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3733{
3734#ifdef RT_OS_DARWIN
3735 supdrvOSResumeVTxOnCpu(fSuspended);
3736#else
3737 Assert(!fSuspended);
3738#endif
3739}
3740
3741
3742/**
3743 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3744 *
3745 * @returns VBox status code.
3746 * @retval VERR_VMX_NO_VMX
3747 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3748 * @retval VERR_VMX_MSR_VMXON_DISABLED
3749 * @retval VERR_VMX_MSR_LOCKING_FAILED
3750 * @retval VERR_SVM_NO_SVM
3751 * @retval VERR_SVM_DISABLED
3752 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3753 * (centaur) CPU.
3754 *
3755 * @param pSession The session handle.
3756 * @param pfCaps Where to store the capabilities.
3757 */
3758SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3759{
3760 int rc = VERR_UNSUPPORTED_CPU;
3761 bool fIsSmxModeAmbiguous = false;
3762 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3763
3764 /*
3765 * Input validation.
3766 */
3767 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3768 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3769
3770 *pfCaps = 0;
3771 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3772 RTThreadPreemptDisable(&PreemptState);
3773 if (ASMHasCpuId())
3774 {
3775 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3776 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3777
3778 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3779 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3780
3781 if ( ASMIsValidStdRange(uMaxId)
3782 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3783 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3784 )
3785 {
3786 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3787 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3788 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3789 )
3790 {
3791 /** @todo Unify code with hmR0InitIntelCpu(). */
3792 uint64_t u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3793 bool const fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3794 bool fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3795 bool fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3796 bool fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3797
3798 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3799 if (fMsrLocked)
3800 {
3801 if (fVmxAllowed && fSmxVmxAllowed)
3802 rc = VINF_SUCCESS;
3803 else if (!fVmxAllowed && !fSmxVmxAllowed)
3804 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3805 else if (!fMaybeSmxMode)
3806 {
3807 if (fVmxAllowed)
3808 rc = VINF_SUCCESS;
3809 else
3810 rc = VERR_VMX_MSR_VMXON_DISABLED;
3811 }
3812 else
3813 {
3814 /*
3815 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3816 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3817 * See @bugref{6873}.
3818 */
3819 Assert(fMaybeSmxMode == true);
3820 fIsSmxModeAmbiguous = true;
3821 rc = VINF_SUCCESS;
3822 }
3823 }
3824 else
3825 {
3826 /*
3827 * MSR is not yet locked; we can change it ourselves here.
3828 * Once the lock bit is set, this MSR can no longer be modified.
3829 *
3830 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3831 * accurately. See @bugref{6873}.
3832 */
3833 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3834 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3835 | MSR_IA32_FEATURE_CONTROL_VMXON;
3836 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3837
3838 /* Verify. */
3839 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3840 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3841 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3842 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3843 if (fSmxVmxAllowed && fVmxAllowed)
3844 rc = VINF_SUCCESS;
3845 else
3846 rc = VERR_VMX_MSR_LOCKING_FAILED;
3847 }
3848
3849 if (rc == VINF_SUCCESS)
3850 {
3851 VMXCAPABILITY vtCaps;
3852
3853 *pfCaps |= SUPVTCAPS_VT_X;
3854
3855 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3856 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3857 {
3858 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3859 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3860 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3861 }
3862 }
3863 }
3864 else
3865 rc = VERR_VMX_NO_VMX;
3866 }
3867 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3868 && ASMIsValidStdRange(uMaxId))
3869 {
3870 uint32_t fExtFeaturesEcx, uExtMaxId;
3871 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3872 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3873 if ( ASMIsValidExtRange(uExtMaxId)
3874 && uExtMaxId >= 0x8000000a
3875 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3876 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3877 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3878 )
3879 {
3880 /* Check if SVM is disabled */
3881 uint64_t u64FeatMsr = ASMRdMsr(MSR_K8_VM_CR);
3882 if (!(u64FeatMsr & MSR_K8_VM_CR_SVM_DISABLE))
3883 {
3884 uint32_t fSvmFeatures;
3885 *pfCaps |= SUPVTCAPS_AMD_V;
3886
3887 /* Query AMD-V features. */
3888 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3889 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3890 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3891
3892 rc = VINF_SUCCESS;
3893 }
3894 else
3895 rc = VERR_SVM_DISABLED;
3896 }
3897 else
3898 rc = VERR_SVM_NO_SVM;
3899 }
3900 }
3901
3902 RTThreadPreemptRestore(&PreemptState);
3903 if (fIsSmxModeAmbiguous)
3904 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3905 return rc;
3906}
3907
3908
3909/**
3910 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3911 * updating.
3912 *
3913 * @param pGip Pointer to the GIP.
3914 * @param pGipCpu The per CPU structure for this CPU.
3915 * @param u64NanoTS The current time.
3916 */
3917static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3918{
3919 /*
3920 * Here we don't really care about applying the TSC delta. The re-initialization of this
3921 * value is not relevant especially while (re)starting the GIP as the first few ones will
3922 * be ignored anyway, see supdrvGipDoUpdateCpu().
3923 */
3924 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
3925 pGipCpu->u64NanoTS = u64NanoTS;
3926}
3927
3928
3929/**
3930 * Set the current TSC and NanoTS value for the CPU.
3931 *
3932 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
3933 * @param pvUser1 Pointer to the ring-0 GIP mapping.
3934 * @param pvUser2 Pointer to the variable holding the current time.
3935 */
3936static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3937{
3938 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
3939 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
3940
3941 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
3942 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
3943
3944 NOREF(pvUser2);
3945 NOREF(idCpu);
3946}
3947
3948
3949/**
3950 * Increase the timer freqency on hosts where this is possible (NT).
3951 *
3952 * The idea is that more interrupts is better for us... Also, it's better than
3953 * we increase the timer frequence, because we might end up getting inaccurate
3954 * callbacks if someone else does it.
3955 *
3956 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
3957 */
3958static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
3959{
3960 if (pDevExt->u32SystemTimerGranularityGrant == 0)
3961 {
3962 uint32_t u32SystemResolution;
3963 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3964 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3965 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
3966 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
3967 )
3968 {
3969 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3970 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3971 }
3972 }
3973}
3974
3975
3976/**
3977 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
3978 *
3979 * @param pDevExt Clears u32SystemTimerGranularityGrant.
3980 */
3981static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
3982{
3983 if (pDevExt->u32SystemTimerGranularityGrant)
3984 {
3985 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
3986 AssertRC(rc2);
3987 pDevExt->u32SystemTimerGranularityGrant = 0;
3988 }
3989}
3990
3991
3992/**
3993 * Maps the GIP into userspace and/or get the physical address of the GIP.
3994 *
3995 * @returns IPRT status code.
3996 * @param pSession Session to which the GIP mapping should belong.
3997 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
3998 * @param pHCPhysGip Where to store the physical address. (optional)
3999 *
4000 * @remark There is no reference counting on the mapping, so one call to this function
4001 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
4002 * and remove the session as a GIP user.
4003 */
4004SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
4005{
4006 int rc;
4007 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4008 RTR3PTR pGipR3 = NIL_RTR3PTR;
4009 RTHCPHYS HCPhys = NIL_RTHCPHYS;
4010 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
4011
4012 /*
4013 * Validate
4014 */
4015 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4016 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
4017 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
4018
4019#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4020 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4021#else
4022 RTSemFastMutexRequest(pDevExt->mtxGip);
4023#endif
4024 if (pDevExt->pGip)
4025 {
4026 /*
4027 * Map it?
4028 */
4029 rc = VINF_SUCCESS;
4030 if (ppGipR3)
4031 {
4032 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4033 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
4034 RTMEM_PROT_READ, RTR0ProcHandleSelf());
4035 if (RT_SUCCESS(rc))
4036 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
4037 }
4038
4039 /*
4040 * Get physical address.
4041 */
4042 if (pHCPhysGip && RT_SUCCESS(rc))
4043 HCPhys = pDevExt->HCPhysGip;
4044
4045 /*
4046 * Reference globally.
4047 */
4048 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4049 {
4050 pSession->fGipReferenced = 1;
4051 pDevExt->cGipUsers++;
4052 if (pDevExt->cGipUsers == 1)
4053 {
4054 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4055 uint64_t u64NanoTS;
4056
4057 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4058
4059 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
4060
4061 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4062 {
4063 unsigned i;
4064 for (i = 0; i < pGipR0->cCpus; i++)
4065 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4066 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4067 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4068 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4069 }
4070
4071 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4072 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
4073 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4074 || RTMpGetOnlineCount() == 1)
4075 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
4076 else
4077 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4078
4079#ifndef DO_NOT_START_GIP
4080 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
4081#endif
4082 rc = VINF_SUCCESS;
4083 }
4084 }
4085 }
4086 else
4087 {
4088 rc = VERR_GENERAL_FAILURE;
4089 Log(("SUPR0GipMap: GIP is not available!\n"));
4090 }
4091#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4092 RTSemMutexRelease(pDevExt->mtxGip);
4093#else
4094 RTSemFastMutexRelease(pDevExt->mtxGip);
4095#endif
4096
4097 /*
4098 * Write returns.
4099 */
4100 if (pHCPhysGip)
4101 *pHCPhysGip = HCPhys;
4102 if (ppGipR3)
4103 *ppGipR3 = pGipR3;
4104
4105#ifdef DEBUG_DARWIN_GIP
4106 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4107#else
4108 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4109#endif
4110 return rc;
4111}
4112
4113
4114/**
4115 * Unmaps any user mapping of the GIP and terminates all GIP access
4116 * from this session.
4117 *
4118 * @returns IPRT status code.
4119 * @param pSession Session to which the GIP mapping should belong.
4120 */
4121SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4122{
4123 int rc = VINF_SUCCESS;
4124 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4125#ifdef DEBUG_DARWIN_GIP
4126 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4127 pSession,
4128 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4129 pSession->GipMapObjR3));
4130#else
4131 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4132#endif
4133 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4134
4135#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4136 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4137#else
4138 RTSemFastMutexRequest(pDevExt->mtxGip);
4139#endif
4140
4141 /*
4142 * Unmap anything?
4143 */
4144 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4145 {
4146 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4147 AssertRC(rc);
4148 if (RT_SUCCESS(rc))
4149 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4150 }
4151
4152 /*
4153 * Dereference global GIP.
4154 */
4155 if (pSession->fGipReferenced && !rc)
4156 {
4157 pSession->fGipReferenced = 0;
4158 if ( pDevExt->cGipUsers > 0
4159 && !--pDevExt->cGipUsers)
4160 {
4161 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4162#ifndef DO_NOT_START_GIP
4163 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4164#endif
4165 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
4166 }
4167 }
4168
4169#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4170 RTSemMutexRelease(pDevExt->mtxGip);
4171#else
4172 RTSemFastMutexRelease(pDevExt->mtxGip);
4173#endif
4174
4175 return rc;
4176}
4177
4178
4179/**
4180 * Gets the GIP pointer.
4181 *
4182 * @returns Pointer to the GIP or NULL.
4183 */
4184SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4185{
4186 return g_pSUPGlobalInfoPage;
4187}
4188
4189
4190/**
4191 * Register a component factory with the support driver.
4192 *
4193 * This is currently restricted to kernel sessions only.
4194 *
4195 * @returns VBox status code.
4196 * @retval VINF_SUCCESS on success.
4197 * @retval VERR_NO_MEMORY if we're out of memory.
4198 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4199 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4200 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4201 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4202 *
4203 * @param pSession The SUPDRV session (must be a ring-0 session).
4204 * @param pFactory Pointer to the component factory registration structure.
4205 *
4206 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4207 */
4208SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4209{
4210 PSUPDRVFACTORYREG pNewReg;
4211 const char *psz;
4212 int rc;
4213
4214 /*
4215 * Validate parameters.
4216 */
4217 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4218 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4219 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4220 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4221 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4222 AssertReturn(psz, VERR_INVALID_PARAMETER);
4223
4224 /*
4225 * Allocate and initialize a new registration structure.
4226 */
4227 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4228 if (pNewReg)
4229 {
4230 pNewReg->pNext = NULL;
4231 pNewReg->pFactory = pFactory;
4232 pNewReg->pSession = pSession;
4233 pNewReg->cchName = psz - &pFactory->szName[0];
4234
4235 /*
4236 * Add it to the tail of the list after checking for prior registration.
4237 */
4238 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4239 if (RT_SUCCESS(rc))
4240 {
4241 PSUPDRVFACTORYREG pPrev = NULL;
4242 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4243 while (pCur && pCur->pFactory != pFactory)
4244 {
4245 pPrev = pCur;
4246 pCur = pCur->pNext;
4247 }
4248 if (!pCur)
4249 {
4250 if (pPrev)
4251 pPrev->pNext = pNewReg;
4252 else
4253 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4254 rc = VINF_SUCCESS;
4255 }
4256 else
4257 rc = VERR_ALREADY_EXISTS;
4258
4259 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4260 }
4261
4262 if (RT_FAILURE(rc))
4263 RTMemFree(pNewReg);
4264 }
4265 else
4266 rc = VERR_NO_MEMORY;
4267 return rc;
4268}
4269
4270
4271/**
4272 * Deregister a component factory.
4273 *
4274 * @returns VBox status code.
4275 * @retval VINF_SUCCESS on success.
4276 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4277 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4278 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4279 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4280 *
4281 * @param pSession The SUPDRV session (must be a ring-0 session).
4282 * @param pFactory Pointer to the component factory registration structure
4283 * previously passed SUPR0ComponentRegisterFactory().
4284 *
4285 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4286 */
4287SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4288{
4289 int rc;
4290
4291 /*
4292 * Validate parameters.
4293 */
4294 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4295 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4296 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4297
4298 /*
4299 * Take the lock and look for the registration record.
4300 */
4301 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4302 if (RT_SUCCESS(rc))
4303 {
4304 PSUPDRVFACTORYREG pPrev = NULL;
4305 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4306 while (pCur && pCur->pFactory != pFactory)
4307 {
4308 pPrev = pCur;
4309 pCur = pCur->pNext;
4310 }
4311 if (pCur)
4312 {
4313 if (!pPrev)
4314 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4315 else
4316 pPrev->pNext = pCur->pNext;
4317
4318 pCur->pNext = NULL;
4319 pCur->pFactory = NULL;
4320 pCur->pSession = NULL;
4321 rc = VINF_SUCCESS;
4322 }
4323 else
4324 rc = VERR_NOT_FOUND;
4325
4326 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4327
4328 RTMemFree(pCur);
4329 }
4330 return rc;
4331}
4332
4333
4334/**
4335 * Queries a component factory.
4336 *
4337 * @returns VBox status code.
4338 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4339 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4340 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4341 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4342 *
4343 * @param pSession The SUPDRV session.
4344 * @param pszName The name of the component factory.
4345 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4346 * @param ppvFactoryIf Where to store the factory interface.
4347 */
4348SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4349{
4350 const char *pszEnd;
4351 size_t cchName;
4352 int rc;
4353
4354 /*
4355 * Validate parameters.
4356 */
4357 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4358
4359 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4360 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4361 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4362 cchName = pszEnd - pszName;
4363
4364 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4365 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4366 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4367
4368 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4369 *ppvFactoryIf = NULL;
4370
4371 /*
4372 * Take the lock and try all factories by this name.
4373 */
4374 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4375 if (RT_SUCCESS(rc))
4376 {
4377 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4378 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4379 while (pCur)
4380 {
4381 if ( pCur->cchName == cchName
4382 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4383 {
4384 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4385 if (pvFactory)
4386 {
4387 *ppvFactoryIf = pvFactory;
4388 rc = VINF_SUCCESS;
4389 break;
4390 }
4391 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4392 }
4393
4394 /* next */
4395 pCur = pCur->pNext;
4396 }
4397
4398 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4399 }
4400 return rc;
4401}
4402
4403
4404/**
4405 * Adds a memory object to the session.
4406 *
4407 * @returns IPRT status code.
4408 * @param pMem Memory tracking structure containing the
4409 * information to track.
4410 * @param pSession The session.
4411 */
4412static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4413{
4414 PSUPDRVBUNDLE pBundle;
4415
4416 /*
4417 * Find free entry and record the allocation.
4418 */
4419 RTSpinlockAcquire(pSession->Spinlock);
4420 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4421 {
4422 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4423 {
4424 unsigned i;
4425 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4426 {
4427 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4428 {
4429 pBundle->cUsed++;
4430 pBundle->aMem[i] = *pMem;
4431 RTSpinlockRelease(pSession->Spinlock);
4432 return VINF_SUCCESS;
4433 }
4434 }
4435 AssertFailed(); /* !!this can't be happening!!! */
4436 }
4437 }
4438 RTSpinlockRelease(pSession->Spinlock);
4439
4440 /*
4441 * Need to allocate a new bundle.
4442 * Insert into the last entry in the bundle.
4443 */
4444 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4445 if (!pBundle)
4446 return VERR_NO_MEMORY;
4447
4448 /* take last entry. */
4449 pBundle->cUsed++;
4450 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4451
4452 /* insert into list. */
4453 RTSpinlockAcquire(pSession->Spinlock);
4454 pBundle->pNext = pSession->Bundle.pNext;
4455 pSession->Bundle.pNext = pBundle;
4456 RTSpinlockRelease(pSession->Spinlock);
4457
4458 return VINF_SUCCESS;
4459}
4460
4461
4462/**
4463 * Releases a memory object referenced by pointer and type.
4464 *
4465 * @returns IPRT status code.
4466 * @param pSession Session data.
4467 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4468 * @param eType Memory type.
4469 */
4470static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4471{
4472 PSUPDRVBUNDLE pBundle;
4473
4474 /*
4475 * Validate input.
4476 */
4477 if (!uPtr)
4478 {
4479 Log(("Illegal address %p\n", (void *)uPtr));
4480 return VERR_INVALID_PARAMETER;
4481 }
4482
4483 /*
4484 * Search for the address.
4485 */
4486 RTSpinlockAcquire(pSession->Spinlock);
4487 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4488 {
4489 if (pBundle->cUsed > 0)
4490 {
4491 unsigned i;
4492 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4493 {
4494 if ( pBundle->aMem[i].eType == eType
4495 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4496 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4497 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4498 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4499 )
4500 {
4501 /* Make a copy of it and release it outside the spinlock. */
4502 SUPDRVMEMREF Mem = pBundle->aMem[i];
4503 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4504 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4505 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4506 RTSpinlockRelease(pSession->Spinlock);
4507
4508 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4509 {
4510 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4511 AssertRC(rc); /** @todo figure out how to handle this. */
4512 }
4513 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4514 {
4515 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4516 AssertRC(rc); /** @todo figure out how to handle this. */
4517 }
4518 return VINF_SUCCESS;
4519 }
4520 }
4521 }
4522 }
4523 RTSpinlockRelease(pSession->Spinlock);
4524 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4525 return VERR_INVALID_PARAMETER;
4526}
4527
4528
4529/**
4530 * Opens an image. If it's the first time it's opened the call must upload
4531 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4532 *
4533 * This is the 1st step of the loading.
4534 *
4535 * @returns IPRT status code.
4536 * @param pDevExt Device globals.
4537 * @param pSession Session data.
4538 * @param pReq The open request.
4539 */
4540static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4541{
4542 int rc;
4543 PSUPDRVLDRIMAGE pImage;
4544 void *pv;
4545 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4546 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4547
4548 /*
4549 * Check if we got an instance of the image already.
4550 */
4551 supdrvLdrLock(pDevExt);
4552 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4553 {
4554 if ( pImage->szName[cchName] == '\0'
4555 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4556 {
4557 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4558 {
4559 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4560 pImage->cUsage++;
4561 pReq->u.Out.pvImageBase = pImage->pvImage;
4562 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4563 pReq->u.Out.fNativeLoader = pImage->fNative;
4564 supdrvLdrAddUsage(pSession, pImage);
4565 supdrvLdrUnlock(pDevExt);
4566 return VINF_SUCCESS;
4567 }
4568 supdrvLdrUnlock(pDevExt);
4569 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4570 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4571 }
4572 }
4573 /* (not found - add it!) */
4574
4575 /* If the loader interface is locked down, make userland fail early */
4576 if (pDevExt->fLdrLockedDown)
4577 {
4578 supdrvLdrUnlock(pDevExt);
4579 Log(("supdrvIOCtl_LdrOpen: Not adding '%s' to image list, loader interface is locked down!\n", pReq->u.In.szName));
4580 return VERR_PERMISSION_DENIED;
4581 }
4582
4583 /*
4584 * Allocate memory.
4585 */
4586 Assert(cchName < sizeof(pImage->szName));
4587 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4588 if (!pv)
4589 {
4590 supdrvLdrUnlock(pDevExt);
4591 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4592 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4593 }
4594
4595 /*
4596 * Setup and link in the LDR stuff.
4597 */
4598 pImage = (PSUPDRVLDRIMAGE)pv;
4599 pImage->pvImage = NULL;
4600 pImage->pvImageAlloc = NULL;
4601 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4602 pImage->cbImageBits = pReq->u.In.cbImageBits;
4603 pImage->cSymbols = 0;
4604 pImage->paSymbols = NULL;
4605 pImage->pachStrTab = NULL;
4606 pImage->cbStrTab = 0;
4607 pImage->pfnModuleInit = NULL;
4608 pImage->pfnModuleTerm = NULL;
4609 pImage->pfnServiceReqHandler = NULL;
4610 pImage->uState = SUP_IOCTL_LDR_OPEN;
4611 pImage->cUsage = 1;
4612 pImage->pDevExt = pDevExt;
4613 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4614
4615 /*
4616 * Try load it using the native loader, if that isn't supported, fall back
4617 * on the older method.
4618 */
4619 pImage->fNative = true;
4620 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4621 if (rc == VERR_NOT_SUPPORTED)
4622 {
4623 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4624 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4625 pImage->fNative = false;
4626 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4627 }
4628 if (RT_FAILURE(rc))
4629 {
4630 supdrvLdrUnlock(pDevExt);
4631 RTMemFree(pImage);
4632 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4633 return rc;
4634 }
4635 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4636
4637 /*
4638 * Link it.
4639 */
4640 pImage->pNext = pDevExt->pLdrImages;
4641 pDevExt->pLdrImages = pImage;
4642
4643 supdrvLdrAddUsage(pSession, pImage);
4644
4645 pReq->u.Out.pvImageBase = pImage->pvImage;
4646 pReq->u.Out.fNeedsLoading = true;
4647 pReq->u.Out.fNativeLoader = pImage->fNative;
4648 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4649
4650 supdrvLdrUnlock(pDevExt);
4651 return VINF_SUCCESS;
4652}
4653
4654
4655/**
4656 * Worker that validates a pointer to an image entrypoint.
4657 *
4658 * @returns IPRT status code.
4659 * @param pDevExt The device globals.
4660 * @param pImage The loader image.
4661 * @param pv The pointer into the image.
4662 * @param fMayBeNull Whether it may be NULL.
4663 * @param pszWhat What is this entrypoint? (for logging)
4664 * @param pbImageBits The image bits prepared by ring-3.
4665 *
4666 * @remarks Will leave the lock on failure.
4667 */
4668static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4669 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4670{
4671 if (!fMayBeNull || pv)
4672 {
4673 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4674 {
4675 supdrvLdrUnlock(pDevExt);
4676 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4677 return VERR_INVALID_PARAMETER;
4678 }
4679
4680 if (pImage->fNative)
4681 {
4682 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4683 if (RT_FAILURE(rc))
4684 {
4685 supdrvLdrUnlock(pDevExt);
4686 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4687 return rc;
4688 }
4689 }
4690 }
4691 return VINF_SUCCESS;
4692}
4693
4694
4695/**
4696 * Loads the image bits.
4697 *
4698 * This is the 2nd step of the loading.
4699 *
4700 * @returns IPRT status code.
4701 * @param pDevExt Device globals.
4702 * @param pSession Session data.
4703 * @param pReq The request.
4704 */
4705static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
4706{
4707 PSUPDRVLDRUSAGE pUsage;
4708 PSUPDRVLDRIMAGE pImage;
4709 int rc;
4710 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
4711
4712 /*
4713 * Find the ldr image.
4714 */
4715 supdrvLdrLock(pDevExt);
4716 pUsage = pSession->pLdrUsage;
4717 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4718 pUsage = pUsage->pNext;
4719 if (!pUsage)
4720 {
4721 supdrvLdrUnlock(pDevExt);
4722 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
4723 return VERR_INVALID_HANDLE;
4724 }
4725 pImage = pUsage->pImage;
4726
4727 /*
4728 * Validate input.
4729 */
4730 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
4731 || pImage->cbImageBits != pReq->u.In.cbImageBits)
4732 {
4733 supdrvLdrUnlock(pDevExt);
4734 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
4735 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
4736 return VERR_INVALID_HANDLE;
4737 }
4738
4739 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
4740 {
4741 unsigned uState = pImage->uState;
4742 supdrvLdrUnlock(pDevExt);
4743 if (uState != SUP_IOCTL_LDR_LOAD)
4744 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
4745 return VERR_ALREADY_LOADED;
4746 }
4747
4748 /* If the loader interface is locked down, don't load new images */
4749 if (pDevExt->fLdrLockedDown)
4750 {
4751 supdrvLdrUnlock(pDevExt);
4752 Log(("SUP_IOCTL_LDR_LOAD: Not loading '%s' image bits, loader interface is locked down!\n", pImage->szName));
4753 return VERR_PERMISSION_DENIED;
4754 }
4755
4756 switch (pReq->u.In.eEPType)
4757 {
4758 case SUPLDRLOADEP_NOTHING:
4759 break;
4760
4761 case SUPLDRLOADEP_VMMR0:
4762 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
4763 if (RT_SUCCESS(rc))
4764 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
4765 if (RT_SUCCESS(rc))
4766 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
4767 if (RT_SUCCESS(rc))
4768 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
4769 if (RT_FAILURE(rc))
4770 return rc;
4771 break;
4772
4773 case SUPLDRLOADEP_SERVICE:
4774 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
4775 if (RT_FAILURE(rc))
4776 return rc;
4777 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
4778 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
4779 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
4780 {
4781 supdrvLdrUnlock(pDevExt);
4782 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
4783 pImage->pvImage, pReq->u.In.cbImageWithTabs,
4784 pReq->u.In.EP.Service.apvReserved[0],
4785 pReq->u.In.EP.Service.apvReserved[1],
4786 pReq->u.In.EP.Service.apvReserved[2]));
4787 return VERR_INVALID_PARAMETER;
4788 }
4789 break;
4790
4791 default:
4792 supdrvLdrUnlock(pDevExt);
4793 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
4794 return VERR_INVALID_PARAMETER;
4795 }
4796
4797 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
4798 if (RT_FAILURE(rc))
4799 return rc;
4800 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
4801 if (RT_FAILURE(rc))
4802 return rc;
4803
4804 /*
4805 * Allocate and copy the tables.
4806 * (No need to do try/except as this is a buffered request.)
4807 */
4808 pImage->cbStrTab = pReq->u.In.cbStrTab;
4809 if (pImage->cbStrTab)
4810 {
4811 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
4812 if (pImage->pachStrTab)
4813 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
4814 else
4815 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
4816 }
4817
4818 pImage->cSymbols = pReq->u.In.cSymbols;
4819 if (RT_SUCCESS(rc) && pImage->cSymbols)
4820 {
4821 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
4822 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
4823 if (pImage->paSymbols)
4824 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
4825 else
4826 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
4827 }
4828
4829 /*
4830 * Copy the bits / complete native loading.
4831 */
4832 if (RT_SUCCESS(rc))
4833 {
4834 pImage->uState = SUP_IOCTL_LDR_LOAD;
4835 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
4836 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
4837
4838 if (pImage->fNative)
4839 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
4840 else
4841 {
4842 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
4843 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
4844 }
4845 }
4846
4847 /*
4848 * Update any entry points.
4849 */
4850 if (RT_SUCCESS(rc))
4851 {
4852 switch (pReq->u.In.eEPType)
4853 {
4854 default:
4855 case SUPLDRLOADEP_NOTHING:
4856 rc = VINF_SUCCESS;
4857 break;
4858 case SUPLDRLOADEP_VMMR0:
4859 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
4860 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
4861 break;
4862 case SUPLDRLOADEP_SERVICE:
4863 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
4864 rc = VINF_SUCCESS;
4865 break;
4866 }
4867 }
4868
4869 /*
4870 * On success call the module initialization.
4871 */
4872 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
4873 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
4874 {
4875 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
4876 pDevExt->pLdrInitImage = pImage;
4877 pDevExt->hLdrInitThread = RTThreadNativeSelf();
4878 rc = pImage->pfnModuleInit(pImage);
4879 pDevExt->pLdrInitImage = NULL;
4880 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
4881 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
4882 supdrvLdrUnsetVMMR0EPs(pDevExt);
4883 }
4884 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
4885
4886 if (RT_FAILURE(rc))
4887 {
4888 /* Inform the tracing component in case ModuleInit registered TPs. */
4889 supdrvTracerModuleUnloading(pDevExt, pImage);
4890
4891 pImage->uState = SUP_IOCTL_LDR_OPEN;
4892 pImage->pfnModuleInit = NULL;
4893 pImage->pfnModuleTerm = NULL;
4894 pImage->pfnServiceReqHandler= NULL;
4895 pImage->cbStrTab = 0;
4896 RTMemFree(pImage->pachStrTab);
4897 pImage->pachStrTab = NULL;
4898 RTMemFree(pImage->paSymbols);
4899 pImage->paSymbols = NULL;
4900 pImage->cSymbols = 0;
4901 }
4902
4903 supdrvLdrUnlock(pDevExt);
4904 return rc;
4905}
4906
4907
4908/**
4909 * Frees a previously loaded (prep'ed) image.
4910 *
4911 * @returns IPRT status code.
4912 * @param pDevExt Device globals.
4913 * @param pSession Session data.
4914 * @param pReq The request.
4915 */
4916static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
4917{
4918 int rc;
4919 PSUPDRVLDRUSAGE pUsagePrev;
4920 PSUPDRVLDRUSAGE pUsage;
4921 PSUPDRVLDRIMAGE pImage;
4922 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
4923
4924 /*
4925 * Find the ldr image.
4926 */
4927 supdrvLdrLock(pDevExt);
4928 pUsagePrev = NULL;
4929 pUsage = pSession->pLdrUsage;
4930 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4931 {
4932 pUsagePrev = pUsage;
4933 pUsage = pUsage->pNext;
4934 }
4935 if (!pUsage)
4936 {
4937 supdrvLdrUnlock(pDevExt);
4938 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
4939 return VERR_INVALID_HANDLE;
4940 }
4941
4942 /*
4943 * Check if we can remove anything.
4944 */
4945 rc = VINF_SUCCESS;
4946 pImage = pUsage->pImage;
4947 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
4948 {
4949 /*
4950 * Check if there are any objects with destructors in the image, if
4951 * so leave it for the session cleanup routine so we get a chance to
4952 * clean things up in the right order and not leave them all dangling.
4953 */
4954 RTSpinlockAcquire(pDevExt->Spinlock);
4955 if (pImage->cUsage <= 1)
4956 {
4957 PSUPDRVOBJ pObj;
4958 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
4959 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4960 {
4961 rc = VERR_DANGLING_OBJECTS;
4962 break;
4963 }
4964 }
4965 else
4966 {
4967 PSUPDRVUSAGE pGenUsage;
4968 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
4969 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4970 {
4971 rc = VERR_DANGLING_OBJECTS;
4972 break;
4973 }
4974 }
4975 RTSpinlockRelease(pDevExt->Spinlock);
4976 if (rc == VINF_SUCCESS)
4977 {
4978 /* unlink it */
4979 if (pUsagePrev)
4980 pUsagePrev->pNext = pUsage->pNext;
4981 else
4982 pSession->pLdrUsage = pUsage->pNext;
4983
4984 /* free it */
4985 pUsage->pImage = NULL;
4986 pUsage->pNext = NULL;
4987 RTMemFree(pUsage);
4988
4989 /*
4990 * Dereference the image.
4991 */
4992 if (pImage->cUsage <= 1)
4993 supdrvLdrFree(pDevExt, pImage);
4994 else
4995 pImage->cUsage--;
4996 }
4997 else
4998 {
4999 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
5000 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
5001 }
5002 }
5003 else
5004 {
5005 /*
5006 * Dereference both image and usage.
5007 */
5008 pImage->cUsage--;
5009 pUsage->cUsage--;
5010 }
5011
5012 supdrvLdrUnlock(pDevExt);
5013 return rc;
5014}
5015
5016
5017/**
5018 * Lock down the image loader interface.
5019 *
5020 * @returns IPRT status code.
5021 * @param pDevExt Device globals.
5022 */
5023static int supdrvIOCtl_LdrLockDown(PSUPDRVDEVEXT pDevExt)
5024{
5025 LogFlow(("supdrvIOCtl_LdrLockDown:\n"));
5026
5027 supdrvLdrLock(pDevExt);
5028 if (!pDevExt->fLdrLockedDown)
5029 {
5030 pDevExt->fLdrLockedDown = true;
5031 Log(("supdrvIOCtl_LdrLockDown: Image loader interface locked down\n"));
5032 }
5033 supdrvLdrUnlock(pDevExt);
5034
5035 return VINF_SUCCESS;
5036}
5037
5038
5039/**
5040 * Gets the address of a symbol in an open image.
5041 *
5042 * @returns IPRT status code.
5043 * @param pDevExt Device globals.
5044 * @param pSession Session data.
5045 * @param pReq The request buffer.
5046 */
5047static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
5048{
5049 PSUPDRVLDRIMAGE pImage;
5050 PSUPDRVLDRUSAGE pUsage;
5051 uint32_t i;
5052 PSUPLDRSYM paSyms;
5053 const char *pchStrings;
5054 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
5055 void *pvSymbol = NULL;
5056 int rc = VERR_GENERAL_FAILURE;
5057 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
5058
5059 /*
5060 * Find the ldr image.
5061 */
5062 supdrvLdrLock(pDevExt);
5063 pUsage = pSession->pLdrUsage;
5064 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5065 pUsage = pUsage->pNext;
5066 if (!pUsage)
5067 {
5068 supdrvLdrUnlock(pDevExt);
5069 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5070 return VERR_INVALID_HANDLE;
5071 }
5072 pImage = pUsage->pImage;
5073 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5074 {
5075 unsigned uState = pImage->uState;
5076 supdrvLdrUnlock(pDevExt);
5077 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5078 return VERR_ALREADY_LOADED;
5079 }
5080
5081 /*
5082 * Search the symbol strings.
5083 *
5084 * Note! The int32_t is for native loading on solaris where the data
5085 * and text segments are in very different places.
5086 */
5087 pchStrings = pImage->pachStrTab;
5088 paSyms = pImage->paSymbols;
5089 for (i = 0; i < pImage->cSymbols; i++)
5090 {
5091 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5092 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5093 {
5094 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5095 rc = VINF_SUCCESS;
5096 break;
5097 }
5098 }
5099 supdrvLdrUnlock(pDevExt);
5100 pReq->u.Out.pvSymbol = pvSymbol;
5101 return rc;
5102}
5103
5104
5105/**
5106 * Gets the address of a symbol in an open image or the support driver.
5107 *
5108 * @returns VINF_SUCCESS on success.
5109 * @returns
5110 * @param pDevExt Device globals.
5111 * @param pSession Session data.
5112 * @param pReq The request buffer.
5113 */
5114static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5115{
5116 int rc = VINF_SUCCESS;
5117 const char *pszSymbol = pReq->u.In.pszSymbol;
5118 const char *pszModule = pReq->u.In.pszModule;
5119 size_t cbSymbol;
5120 char const *pszEnd;
5121 uint32_t i;
5122
5123 /*
5124 * Input validation.
5125 */
5126 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5127 pszEnd = RTStrEnd(pszSymbol, 512);
5128 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5129 cbSymbol = pszEnd - pszSymbol + 1;
5130
5131 if (pszModule)
5132 {
5133 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5134 pszEnd = RTStrEnd(pszModule, 64);
5135 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5136 }
5137 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5138
5139
5140 if ( !pszModule
5141 || !strcmp(pszModule, "SupDrv"))
5142 {
5143 /*
5144 * Search the support driver export table.
5145 */
5146 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5147 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5148 {
5149 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5150 break;
5151 }
5152 }
5153 else
5154 {
5155 /*
5156 * Find the loader image.
5157 */
5158 PSUPDRVLDRIMAGE pImage;
5159
5160 supdrvLdrLock(pDevExt);
5161
5162 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5163 if (!strcmp(pImage->szName, pszModule))
5164 break;
5165 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5166 {
5167 /*
5168 * Search the symbol strings.
5169 */
5170 const char *pchStrings = pImage->pachStrTab;
5171 PCSUPLDRSYM paSyms = pImage->paSymbols;
5172 for (i = 0; i < pImage->cSymbols; i++)
5173 {
5174 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5175 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5176 {
5177 /*
5178 * Found it! Calc the symbol address and add a reference to the module.
5179 */
5180 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5181 rc = supdrvLdrAddUsage(pSession, pImage);
5182 break;
5183 }
5184 }
5185 }
5186 else
5187 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5188
5189 supdrvLdrUnlock(pDevExt);
5190 }
5191 return rc;
5192}
5193
5194
5195/**
5196 * Updates the VMMR0 entry point pointers.
5197 *
5198 * @returns IPRT status code.
5199 * @param pDevExt Device globals.
5200 * @param pSession Session data.
5201 * @param pVMMR0 VMMR0 image handle.
5202 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5203 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5204 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5205 * @remark Caller must own the loader mutex.
5206 */
5207static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5208{
5209 int rc = VINF_SUCCESS;
5210 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5211
5212
5213 /*
5214 * Check if not yet set.
5215 */
5216 if (!pDevExt->pvVMMR0)
5217 {
5218 pDevExt->pvVMMR0 = pvVMMR0;
5219 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5220 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5221 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5222 }
5223 else
5224 {
5225 /*
5226 * Return failure or success depending on whether the values match or not.
5227 */
5228 if ( pDevExt->pvVMMR0 != pvVMMR0
5229 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5230 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5231 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5232 {
5233 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5234 rc = VERR_INVALID_PARAMETER;
5235 }
5236 }
5237 return rc;
5238}
5239
5240
5241/**
5242 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5243 *
5244 * @param pDevExt Device globals.
5245 */
5246static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5247{
5248 pDevExt->pvVMMR0 = NULL;
5249 pDevExt->pfnVMMR0EntryInt = NULL;
5250 pDevExt->pfnVMMR0EntryFast = NULL;
5251 pDevExt->pfnVMMR0EntryEx = NULL;
5252}
5253
5254
5255/**
5256 * Adds a usage reference in the specified session of an image.
5257 *
5258 * Called while owning the loader semaphore.
5259 *
5260 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5261 * @param pSession Session in question.
5262 * @param pImage Image which the session is using.
5263 */
5264static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5265{
5266 PSUPDRVLDRUSAGE pUsage;
5267 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5268
5269 /*
5270 * Referenced it already?
5271 */
5272 pUsage = pSession->pLdrUsage;
5273 while (pUsage)
5274 {
5275 if (pUsage->pImage == pImage)
5276 {
5277 pUsage->cUsage++;
5278 return VINF_SUCCESS;
5279 }
5280 pUsage = pUsage->pNext;
5281 }
5282
5283 /*
5284 * Allocate new usage record.
5285 */
5286 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5287 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5288 pUsage->cUsage = 1;
5289 pUsage->pImage = pImage;
5290 pUsage->pNext = pSession->pLdrUsage;
5291 pSession->pLdrUsage = pUsage;
5292 return VINF_SUCCESS;
5293}
5294
5295
5296/**
5297 * Frees a load image.
5298 *
5299 * @param pDevExt Pointer to device extension.
5300 * @param pImage Pointer to the image we're gonna free.
5301 * This image must exit!
5302 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5303 */
5304static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5305{
5306 PSUPDRVLDRIMAGE pImagePrev;
5307 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5308
5309 /*
5310 * Warn if we're releasing images while the image loader interface is
5311 * locked down -- we won't be able to reload them!
5312 */
5313 if (pDevExt->fLdrLockedDown)
5314 Log(("supdrvLdrFree: Warning: unloading '%s' image, while loader interface is locked down!\n", pImage->szName));
5315
5316 /* find it - arg. should've used doubly linked list. */
5317 Assert(pDevExt->pLdrImages);
5318 pImagePrev = NULL;
5319 if (pDevExt->pLdrImages != pImage)
5320 {
5321 pImagePrev = pDevExt->pLdrImages;
5322 while (pImagePrev->pNext != pImage)
5323 pImagePrev = pImagePrev->pNext;
5324 Assert(pImagePrev->pNext == pImage);
5325 }
5326
5327 /* unlink */
5328 if (pImagePrev)
5329 pImagePrev->pNext = pImage->pNext;
5330 else
5331 pDevExt->pLdrImages = pImage->pNext;
5332
5333 /* check if this is VMMR0.r0 unset its entry point pointers. */
5334 if (pDevExt->pvVMMR0 == pImage->pvImage)
5335 supdrvLdrUnsetVMMR0EPs(pDevExt);
5336
5337 /* check for objects with destructors in this image. (Shouldn't happen.) */
5338 if (pDevExt->pObjs)
5339 {
5340 unsigned cObjs = 0;
5341 PSUPDRVOBJ pObj;
5342 RTSpinlockAcquire(pDevExt->Spinlock);
5343 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5344 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5345 {
5346 pObj->pfnDestructor = NULL;
5347 cObjs++;
5348 }
5349 RTSpinlockRelease(pDevExt->Spinlock);
5350 if (cObjs)
5351 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5352 }
5353
5354 /* call termination function if fully loaded. */
5355 if ( pImage->pfnModuleTerm
5356 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5357 {
5358 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5359 pImage->pfnModuleTerm(pImage);
5360 }
5361
5362 /* Inform the tracing component. */
5363 supdrvTracerModuleUnloading(pDevExt, pImage);
5364
5365 /* do native unload if appropriate. */
5366 if (pImage->fNative)
5367 supdrvOSLdrUnload(pDevExt, pImage);
5368
5369 /* free the image */
5370 pImage->cUsage = 0;
5371 pImage->pDevExt = NULL;
5372 pImage->pNext = NULL;
5373 pImage->uState = SUP_IOCTL_LDR_FREE;
5374 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5375 pImage->pvImageAlloc = NULL;
5376 RTMemFree(pImage->pachStrTab);
5377 pImage->pachStrTab = NULL;
5378 RTMemFree(pImage->paSymbols);
5379 pImage->paSymbols = NULL;
5380 RTMemFree(pImage);
5381}
5382
5383
5384/**
5385 * Acquires the loader lock.
5386 *
5387 * @returns IPRT status code.
5388 * @param pDevExt The device extension.
5389 */
5390DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5391{
5392#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5393 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5394#else
5395 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5396#endif
5397 AssertRC(rc);
5398 return rc;
5399}
5400
5401
5402/**
5403 * Releases the loader lock.
5404 *
5405 * @returns IPRT status code.
5406 * @param pDevExt The device extension.
5407 */
5408DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5409{
5410#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5411 return RTSemMutexRelease(pDevExt->mtxLdr);
5412#else
5413 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5414#endif
5415}
5416
5417
5418/**
5419 * Implements the service call request.
5420 *
5421 * @returns VBox status code.
5422 * @param pDevExt The device extension.
5423 * @param pSession The calling session.
5424 * @param pReq The request packet, valid.
5425 */
5426static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5427{
5428#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5429 int rc;
5430
5431 /*
5432 * Find the module first in the module referenced by the calling session.
5433 */
5434 rc = supdrvLdrLock(pDevExt);
5435 if (RT_SUCCESS(rc))
5436 {
5437 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5438 PSUPDRVLDRUSAGE pUsage;
5439
5440 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5441 if ( pUsage->pImage->pfnServiceReqHandler
5442 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5443 {
5444 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5445 break;
5446 }
5447 supdrvLdrUnlock(pDevExt);
5448
5449 if (pfnServiceReqHandler)
5450 {
5451 /*
5452 * Call it.
5453 */
5454 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5455 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5456 else
5457 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5458 }
5459 else
5460 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5461 }
5462
5463 /* log it */
5464 if ( RT_FAILURE(rc)
5465 && rc != VERR_INTERRUPTED
5466 && rc != VERR_TIMEOUT)
5467 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5468 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5469 else
5470 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5471 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5472 return rc;
5473#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5474 return VERR_NOT_IMPLEMENTED;
5475#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5476}
5477
5478
5479/**
5480 * Implements the logger settings request.
5481 *
5482 * @returns VBox status code.
5483 * @param pDevExt The device extension.
5484 * @param pSession The caller's session.
5485 * @param pReq The request.
5486 */
5487static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5488{
5489 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5490 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5491 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5492 PRTLOGGER pLogger = NULL;
5493 int rc;
5494
5495 /*
5496 * Some further validation.
5497 */
5498 switch (pReq->u.In.fWhat)
5499 {
5500 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5501 case SUPLOGGERSETTINGS_WHAT_CREATE:
5502 break;
5503
5504 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5505 if (*pszGroup || *pszFlags || *pszDest)
5506 return VERR_INVALID_PARAMETER;
5507 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5508 return VERR_ACCESS_DENIED;
5509 break;
5510
5511 default:
5512 return VERR_INTERNAL_ERROR;
5513 }
5514
5515 /*
5516 * Get the logger.
5517 */
5518 switch (pReq->u.In.fWhich)
5519 {
5520 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5521 pLogger = RTLogGetDefaultInstance();
5522 break;
5523
5524 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5525 pLogger = RTLogRelDefaultInstance();
5526 break;
5527
5528 default:
5529 return VERR_INTERNAL_ERROR;
5530 }
5531
5532 /*
5533 * Do the job.
5534 */
5535 switch (pReq->u.In.fWhat)
5536 {
5537 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5538 if (pLogger)
5539 {
5540 rc = RTLogFlags(pLogger, pszFlags);
5541 if (RT_SUCCESS(rc))
5542 rc = RTLogGroupSettings(pLogger, pszGroup);
5543 NOREF(pszDest);
5544 }
5545 else
5546 rc = VERR_NOT_FOUND;
5547 break;
5548
5549 case SUPLOGGERSETTINGS_WHAT_CREATE:
5550 {
5551 if (pLogger)
5552 rc = VERR_ALREADY_EXISTS;
5553 else
5554 {
5555 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5556
5557 rc = RTLogCreate(&pLogger,
5558 0 /* fFlags */,
5559 pszGroup,
5560 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5561 ? "VBOX_LOG"
5562 : "VBOX_RELEASE_LOG",
5563 RT_ELEMENTS(s_apszGroups),
5564 s_apszGroups,
5565 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5566 NULL);
5567 if (RT_SUCCESS(rc))
5568 {
5569 rc = RTLogFlags(pLogger, pszFlags);
5570 NOREF(pszDest);
5571 if (RT_SUCCESS(rc))
5572 {
5573 switch (pReq->u.In.fWhich)
5574 {
5575 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5576 pLogger = RTLogSetDefaultInstance(pLogger);
5577 break;
5578 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5579 pLogger = RTLogRelSetDefaultInstance(pLogger);
5580 break;
5581 }
5582 }
5583 RTLogDestroy(pLogger);
5584 }
5585 }
5586 break;
5587 }
5588
5589 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5590 switch (pReq->u.In.fWhich)
5591 {
5592 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5593 pLogger = RTLogSetDefaultInstance(NULL);
5594 break;
5595 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5596 pLogger = RTLogRelSetDefaultInstance(NULL);
5597 break;
5598 }
5599 rc = RTLogDestroy(pLogger);
5600 break;
5601
5602 default:
5603 {
5604 rc = VERR_INTERNAL_ERROR;
5605 break;
5606 }
5607 }
5608
5609 return rc;
5610}
5611
5612
5613/**
5614 * Implements the MSR prober operations.
5615 *
5616 * @returns VBox status code.
5617 * @param pDevExt The device extension.
5618 * @param pReq The request.
5619 */
5620static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5621{
5622#ifdef SUPDRV_WITH_MSR_PROBER
5623 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5624 int rc;
5625
5626 switch (pReq->u.In.enmOp)
5627 {
5628 case SUPMSRPROBEROP_READ:
5629 {
5630 uint64_t uValue;
5631 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5632 if (RT_SUCCESS(rc))
5633 {
5634 pReq->u.Out.uResults.Read.uValue = uValue;
5635 pReq->u.Out.uResults.Read.fGp = false;
5636 }
5637 else if (rc == VERR_ACCESS_DENIED)
5638 {
5639 pReq->u.Out.uResults.Read.uValue = 0;
5640 pReq->u.Out.uResults.Read.fGp = true;
5641 rc = VINF_SUCCESS;
5642 }
5643 break;
5644 }
5645
5646 case SUPMSRPROBEROP_WRITE:
5647 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5648 if (RT_SUCCESS(rc))
5649 pReq->u.Out.uResults.Write.fGp = false;
5650 else if (rc == VERR_ACCESS_DENIED)
5651 {
5652 pReq->u.Out.uResults.Write.fGp = true;
5653 rc = VINF_SUCCESS;
5654 }
5655 break;
5656
5657 case SUPMSRPROBEROP_MODIFY:
5658 case SUPMSRPROBEROP_MODIFY_FASTER:
5659 rc = supdrvOSMsrProberModify(idCpu, pReq);
5660 break;
5661
5662 default:
5663 return VERR_INVALID_FUNCTION;
5664 }
5665 return rc;
5666#else
5667 return VERR_NOT_IMPLEMENTED;
5668#endif
5669}
5670
5671
5672/**
5673 * Returns whether the host CPU sports an invariant TSC or not.
5674 *
5675 * @returns true if invariant TSC is supported, false otherwise.
5676 */
5677static bool supdrvIsInvariantTsc(void)
5678{
5679 static bool s_fQueried = false;
5680 static bool s_fIsInvariantTsc = false;
5681 if (!s_fQueried)
5682 {
5683 if (ASMHasCpuId())
5684 {
5685 uint32_t uEax, uEbx, uEcx, uEdx;
5686 ASMCpuId(0x80000000, &uEax, &uEbx, &uEcx, &uEdx);
5687 if (uEax >= 0x80000007)
5688 {
5689 ASMCpuId(0x80000007, &uEax, &uEbx, &uEcx, &uEdx);
5690 if (uEdx & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
5691 s_fIsInvariantTsc = true;
5692 }
5693 }
5694 s_fQueried = true;
5695 }
5696
5697 return s_fIsInvariantTsc;
5698}
5699
5700
5701#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5702/**
5703 * Switches the TSC-delta measurement thread into the butchered state.
5704 *
5705 * @returns VBox status code.
5706 * @param pDevExt Pointer to the device instance data.
5707 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5708 * @param pszFailed An error message to log.
5709 * @param rcFailed The error code to exit the thread with.
5710 */
5711static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5712{
5713 if (!fSpinlockHeld)
5714 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5715
5716 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Butchered;
5717 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5718 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5719 return rcFailed;
5720}
5721
5722
5723/**
5724 * The TSC-delta measurement thread.
5725 *
5726 * @returns VBox status code.
5727 * @param hThread The thread handle.
5728 * @param pvUser Opaque pointer to the device instance data.
5729 */
5730static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
5731{
5732 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
5733 static uint32_t cTimesMeasured = 0;
5734 uint32_t cConsecutiveTimeouts = 0;
5735 int rc = VERR_INTERNAL_ERROR_2;
5736 for (;;)
5737 {
5738 /*
5739 * Switch on the current state.
5740 */
5741 SUPDRVTSCDELTASTATE enmState;
5742 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5743 enmState = pDevExt->enmTscDeltaState;
5744 switch (enmState)
5745 {
5746 case kSupDrvTscDeltaState_Creating:
5747 {
5748 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5749 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5750 if (RT_FAILURE(rc))
5751 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5752 /* fall thru */
5753 }
5754
5755 case kSupDrvTscDeltaState_Listening:
5756 {
5757 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5758
5759 /* Simple adaptive timeout. */
5760 if (cConsecutiveTimeouts++ == 10)
5761 {
5762 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
5763 pDevExt->cMsTscDeltaTimeout = 10;
5764 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
5765 pDevExt->cMsTscDeltaTimeout = 100;
5766 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
5767 pDevExt->cMsTscDeltaTimeout = 500;
5768 cConsecutiveTimeouts = 0;
5769 }
5770 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
5771 if ( RT_FAILURE(rc)
5772 && rc != VERR_TIMEOUT)
5773 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
5774 break;
5775 }
5776
5777 case kSupDrvTscDeltaState_WaitAndMeasure:
5778 {
5779 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Measuring;
5780 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5781 if (RT_FAILURE(rc))
5782 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5783 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5784 pDevExt->cMsTscDeltaTimeout = 1;
5785 RTThreadSleep(10);
5786 /* fall thru */
5787 }
5788
5789 case kSupDrvTscDeltaState_Measuring:
5790 {
5791 cConsecutiveTimeouts = 0;
5792 if (!cTimesMeasured++)
5793 {
5794 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5795 RTCpuSetCopy(&pDevExt->TscDeltaObtainedCpuSet, &pDevExt->pGip->OnlineCpuSet);
5796 }
5797 else
5798 {
5799 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5800 unsigned iCpu;
5801
5802 if (cTimesMeasured == UINT32_MAX)
5803 cTimesMeasured = 1;
5804
5805 /* Measure TSC-deltas only for the CPUs that are in the set. */
5806 rc = VINF_SUCCESS;
5807 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5808 {
5809 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
5810 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
5811 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
5812 {
5813 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
5814 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
5815 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
5816 RTCpuSetAdd(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->idCpu);
5817 }
5818 }
5819 }
5820 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5821 if (pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
5822 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5823 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5824 pDevExt->rcTscDelta = rc;
5825 break;
5826 }
5827
5828 case kSupDrvTscDeltaState_Terminating:
5829 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5830 return VINF_SUCCESS;
5831
5832 case kSupDrvTscDeltaState_Butchered:
5833 default:
5834 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
5835 }
5836 }
5837
5838 return rc;
5839}
5840
5841
5842/**
5843 * Waits for the TSC-delta measurement thread to respond to a state change.
5844 *
5845 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
5846 * other error code on internal error.
5847 *
5848 * @param pThis Pointer to the grant service instance data.
5849 * @param enmCurState The current state.
5850 * @param enmNewState The new state we're waiting for it to enter.
5851 */
5852static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTASTATE enmCurState, SUPDRVTSCDELTASTATE enmNewState)
5853{
5854 /*
5855 * Wait a short while for the expected state transition.
5856 */
5857 int rc;
5858 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
5859 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5860 if (pDevExt->enmTscDeltaState == enmNewState)
5861 {
5862 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5863 rc = VINF_SUCCESS;
5864 }
5865 else if (pDevExt->enmTscDeltaState == enmCurState)
5866 {
5867 /*
5868 * Wait longer if the state has not yet transitioned to the one we want.
5869 */
5870 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5871 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
5872 if ( RT_SUCCESS(rc)
5873 || rc == VERR_TIMEOUT)
5874 {
5875 /*
5876 * Check the state whether we've succeeded.
5877 */
5878 SUPDRVTSCDELTASTATE enmState;
5879 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5880 enmState = pDevExt->enmTscDeltaState;
5881 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5882 if (enmState == enmNewState)
5883 rc = VINF_SUCCESS;
5884 else if (enmState == enmCurState)
5885 {
5886 rc = VERR_TIMEOUT;
5887 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
5888 enmNewState));
5889 }
5890 else
5891 {
5892 rc = VERR_INTERNAL_ERROR;
5893 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
5894 enmState, enmNewState));
5895 }
5896 }
5897 else
5898 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
5899 }
5900 else
5901 {
5902 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5903 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
5904 rc = VERR_INTERNAL_ERROR;
5905 }
5906
5907 return rc;
5908}
5909
5910
5911/**
5912 * Terminates the TSC-delta measurement thread.
5913 *
5914 * @param pDevExt Pointer to the device instance data.
5915 */
5916static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
5917{
5918 int rc;
5919 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5920 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Terminating;
5921 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5922 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5923 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
5924 if (RT_FAILURE(rc))
5925 {
5926 /* Signal a few more times before giving up. */
5927 int cTriesLeft = 5;
5928 while (--cTriesLeft > 0)
5929 {
5930 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5931 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
5932 if (rc != VERR_TIMEOUT)
5933 break;
5934 }
5935 }
5936}
5937
5938
5939/**
5940 * Initializes and spawns the TSC-delta measurement thread.
5941 *
5942 * A thread is required for servicing re-measurement requests from events like
5943 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
5944 * under all contexts on all OSs.
5945 *
5946 * @returns VBox status code.
5947 * @param pDevExt Pointer to the device instance data.
5948 *
5949 * @remarks Must only be called -after- initializing GIP and setting up MP
5950 * notifications!
5951 */
5952static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
5953{
5954 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt->pGip));
5955
5956 int rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
5957 if (RT_SUCCESS(rc))
5958 {
5959 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
5960 if (RT_SUCCESS(rc))
5961 {
5962 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Creating;
5963 pDevExt->cMsTscDeltaTimeout = 1;
5964 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
5965 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
5966 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
5967 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
5968 if (RT_SUCCESS(rc))
5969 {
5970 rc = supdrvTscDeltaThreadWait(pDevExt, kSupDrvTscDeltaState_Creating, kSupDrvTscDeltaState_Listening);
5971 if (RT_SUCCESS(rc))
5972 {
5973 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5974 return rc;
5975 }
5976
5977 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
5978 supdrvTscDeltaThreadTerminate(pDevExt);
5979 }
5980 else
5981 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
5982 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5983 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5984 }
5985 else
5986 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
5987 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5988 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5989 }
5990 else
5991 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
5992
5993 return rc;
5994}
5995
5996
5997/**
5998 * Terminates the TSC-delta measurement thread and cleanup.
5999 *
6000 * @param pDevExt Pointer to the device instance data.
6001 */
6002static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
6003{
6004 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
6005 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
6006 {
6007 supdrvTscDeltaThreadTerminate(pDevExt);
6008 }
6009
6010 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
6011 {
6012 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
6013 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
6014 }
6015
6016 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
6017 {
6018 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
6019 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
6020 }
6021
6022 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
6023}
6024
6025
6026/**
6027 * Waits for TSC-delta measurements to be completed for all online CPUs.
6028 *
6029 * @returns VBox status code.
6030 * @param pDevExt Pointer to the device instance data.
6031 */
6032static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
6033{
6034 int cTriesLeft = 5;
6035 int cMsTotalWait;
6036 int cMsWaited = 0;
6037 int cMsWaitGranularity = 1;
6038
6039 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6040 AssertReturn(pGip, VERR_INVALID_POINTER);
6041
6042 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 2, 150);
6043 while (cTriesLeft-- > 0)
6044 {
6045 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
6046 return VINF_SUCCESS;
6047 RTThreadSleep(cMsWaitGranularity);
6048 cMsWaited += cMsWaitGranularity;
6049 if (cMsWaited >= cMsTotalWait)
6050 break;
6051 }
6052
6053 return VERR_TIMEOUT;
6054}
6055#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
6056
6057
6058/**
6059 * Measures the TSC frequency of the system.
6060 *
6061 * Uses a busy-wait method for the async. case as it is intended to help push
6062 * the CPU frequency up, while for the invariant cases using a sleeping method.
6063 *
6064 * The TSC frequency can vary on systems which are not reported as invariant.
6065 * On such systems the object of this function is to find out what the nominal,
6066 * maximum TSC frequency under 'normal' CPU operation.
6067 *
6068 * @returns VBox status code.
6069 * @param pDevExt Pointer to the device instance.
6070 *
6071 * @remarks Must be called only -after- measuring the TSC deltas.
6072 */
6073static int supdrvGipMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
6074{
6075 int cTriesLeft = 4;
6076 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6077
6078 /* Assert order. */
6079 AssertReturn(pGip, VERR_INVALID_PARAMETER);
6080 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
6081
6082 while (cTriesLeft-- > 0)
6083 {
6084 RTCCUINTREG uFlags;
6085 uint64_t u64NanoTsBefore;
6086 uint64_t u64NanoTsAfter;
6087 uint64_t u64TscBefore;
6088 uint64_t u64TscAfter;
6089 uint8_t idApicBefore;
6090 uint8_t idApicAfter;
6091
6092 /*
6093 * Synchronize with the host OS clock tick before reading the TSC.
6094 * Especially important on older Windows version where the granularity is terrible.
6095 */
6096 u64NanoTsBefore = RTTimeSystemNanoTS();
6097 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
6098 ASMNopPause();
6099
6100 uFlags = ASMIntDisableFlags();
6101 idApicBefore = ASMGetApicId();
6102 u64TscBefore = ASMReadTSC();
6103 u64NanoTsBefore = RTTimeSystemNanoTS();
6104 ASMSetFlags(uFlags);
6105
6106 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6107 {
6108 /*
6109 * Sleep-wait since the TSC frequency is constant, it eases host load.
6110 * Shorter interval produces more variance in the frequency (esp. Windows).
6111 */
6112 RTThreadSleep(200);
6113 u64NanoTsAfter = RTTimeSystemNanoTS();
6114 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
6115 ASMNopPause();
6116 u64NanoTsAfter = RTTimeSystemNanoTS();
6117 }
6118 else
6119 {
6120 /* Busy-wait keeping the frequency up and measure. */
6121 for (;;)
6122 {
6123 u64NanoTsAfter = RTTimeSystemNanoTS();
6124 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
6125 ASMNopPause();
6126 else
6127 break;
6128 }
6129 }
6130
6131 uFlags = ASMIntDisableFlags();
6132 idApicAfter = ASMGetApicId();
6133 u64TscAfter = ASMReadTSC();
6134 ASMSetFlags(uFlags);
6135
6136 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6137 {
6138 int rc;
6139 bool fAppliedBefore;
6140 bool fAppliedAfter;
6141 rc = SUPTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
6142 rc = SUPTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
6143
6144 if ( !fAppliedBefore
6145 || !fAppliedAfter)
6146 {
6147#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6148 /*
6149 * The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
6150 * Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
6151 * and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
6152 * proceed. This should be triggered just once if we're rather unlucky.
6153 */
6154 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6155 if (rc == VERR_TIMEOUT)
6156 {
6157 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
6158 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6159 }
6160#else
6161 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
6162 idApicBefore, idApicAfter, cTriesLeft);
6163#endif
6164 continue;
6165 }
6166 }
6167
6168 /*
6169 * Update GIP.
6170 */
6171 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
6172 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6173 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6174 return VINF_SUCCESS;
6175 }
6176
6177 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6178}
6179
6180
6181/**
6182 * Timer callback function for TSC frequency refinement in invariant GIP mode.
6183 *
6184 * @param pTimer The timer.
6185 * @param pvUser Opaque pointer to the device instance data.
6186 * @param iTick The timer tick.
6187 */
6188static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6189{
6190 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6191 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6192 bool fDeltaApplied = false;
6193 uint8_t idApic;
6194 uint64_t u64DeltaNanoTS;
6195 uint64_t u64DeltaTsc;
6196 uint64_t u64NanoTS;
6197 uint64_t u64Tsc;
6198 RTCCUINTREG uFlags;
6199
6200 /* Paranoia. */
6201 Assert(pGip);
6202 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
6203
6204#if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
6205 u64NanoTS = RTTimeSystemNanoTS();
6206 while (RTTimeSystemNanoTS() == u64NanoTS)
6207 ASMNopPause();
6208#endif
6209 uFlags = ASMIntDisableFlags();
6210 idApic = ASMGetApicId();
6211 u64Tsc = ASMReadTSC();
6212 u64NanoTS = RTTimeSystemNanoTS();
6213 ASMSetFlags(uFlags);
6214 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6215 SUPTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
6216 u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
6217 u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
6218
6219 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6220 && !fDeltaApplied)
6221 {
6222 SUPR0Printf("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
6223 GIP_TSC_REFINE_INTERVAL);
6224 return;
6225 }
6226
6227 /* Calculate the TSC frequency. */
6228 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
6229 && u64DeltaNanoTS < UINT32_MAX)
6230 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS);
6231 else
6232 {
6233#if 1
6234 RTUINT128U CpuHz, Tmp, Divisor;
6235 CpuHz.s.Lo = CpuHz.s.Hi = 0;
6236 RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64);
6237 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS));
6238 pGip->u64CpuHz = CpuHz.s.Lo;
6239#else
6240 /** @todo remove later */
6241 /* Try not to lose precision, the larger the interval the more likely we overflow. */
6242 if ( u64DeltaTsc < UINT64_MAX / RT_NS_100MS
6243 && u64DeltaNanoTS / 10 < UINT32_MAX)
6244 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_100MS, (uint32_t)(u64DeltaNanoTS / 10));
6245 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_10MS
6246 && u64DeltaNanoTS / 100 < UINT32_MAX)
6247 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_10MS, (uint32_t)(u64DeltaNanoTS / 100));
6248 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_1MS
6249 && u64DeltaNanoTS / 1000 < UINT32_MAX)
6250 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1MS, (uint32_t)(u64DeltaNanoTS / 1000));
6251 else /* Screw it. */
6252 pGip->u64CpuHz = u64DeltaTsc / (u64DeltaNanoTS / RT_NS_1SEC_64);
6253#endif
6254 }
6255
6256 /* Update rest of GIP. */
6257 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
6258 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6259}
6260
6261
6262/**
6263 * Starts the TSC-frequency refinement phase asynchronously.
6264 *
6265 * @param pDevExt Pointer to the device instance data.
6266 */
6267static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt)
6268{
6269 uint64_t u64NanoTS;
6270 RTCCUINTREG uFlags;
6271 uint8_t idApic;
6272 int rc;
6273 bool fDeltaApplied = false;
6274 PSUPGLOBALINFOPAGE pGip;
6275
6276 /* Validate. */
6277 Assert(pDevExt);
6278 Assert(pDevExt->pGip);
6279
6280 pGip = pDevExt->pGip;
6281 u64NanoTS = RTTimeSystemNanoTS();
6282 while (RTTimeSystemNanoTS() == u64NanoTS)
6283 ASMNopPause();
6284 uFlags = ASMIntDisableFlags();
6285 idApic = ASMGetApicId();
6286 pDevExt->u64TscAnchor = ASMReadTSC();
6287 pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
6288 ASMSetFlags(uFlags);
6289 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6290 SUPTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, &fDeltaApplied);
6291
6292#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6293 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6294 && !fDeltaApplied)
6295 {
6296 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6297 if (rc == VERR_TIMEOUT)
6298 {
6299 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
6300 return;
6301 }
6302 }
6303#endif
6304
6305 rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pDevExt);
6306 if (RT_SUCCESS(rc))
6307 {
6308 /*
6309 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
6310 * interval as small as possible while gaining the most consistent and accurate frequency
6311 * (compared to what the host OS might have measured).
6312 *
6313 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
6314 * same TSC frequency whenever possible so we need to keep the interval short.
6315 */
6316 rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
6317 AssertRC(rc);
6318 }
6319 else
6320 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
6321}
6322
6323
6324/**
6325 * Creates the GIP.
6326 *
6327 * @returns VBox status code.
6328 * @param pDevExt Instance data. GIP stuff may be updated.
6329 */
6330static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6331{
6332 PSUPGLOBALINFOPAGE pGip;
6333 RTHCPHYS HCPhysGip;
6334 uint32_t u32SystemResolution;
6335 uint32_t u32Interval;
6336 uint32_t u32MinInterval;
6337 uint32_t uMod;
6338 unsigned cCpus;
6339 int rc;
6340
6341 LogFlow(("supdrvGipCreate:\n"));
6342
6343 /* Assert order. */
6344 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6345 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6346 Assert(!pDevExt->pGipTimer);
6347
6348 /*
6349 * Check the CPU count.
6350 */
6351 cCpus = RTMpGetArraySize();
6352 if ( cCpus > RTCPUSET_MAX_CPUS
6353 || cCpus > 256 /* ApicId is used for the mappings */)
6354 {
6355 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6356 return VERR_TOO_MANY_CPUS;
6357 }
6358
6359 /*
6360 * Allocate a contiguous set of pages with a default kernel mapping.
6361 */
6362 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6363 if (RT_FAILURE(rc))
6364 {
6365 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6366 return rc;
6367 }
6368 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6369 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6370
6371 /*
6372 * Allocate the TSC-delta sync struct on a separate cache line.
6373 */
6374 pDevExt->pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
6375 pDevExt->pTscDeltaSync = RT_ALIGN_PT(pDevExt->pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
6376 Assert(RT_ALIGN_PT(pDevExt->pTscDeltaSync, 64, PSUPTSCDELTASYNC) == pDevExt->pTscDeltaSync);
6377
6378 /*
6379 * Find a reasonable update interval and initialize the structure.
6380 */
6381 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
6382 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6383 * See @bugref{6710}. */
6384 u32MinInterval = RT_NS_10MS;
6385 u32SystemResolution = RTTimerGetSystemGranularity();
6386 u32Interval = u32MinInterval;
6387 uMod = u32MinInterval % u32SystemResolution;
6388 if (uMod)
6389 u32Interval += u32SystemResolution - uMod;
6390
6391 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6392
6393 if (RT_UNLIKELY( pGip->fOsTscDeltasInSync
6394 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
6395 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
6396 {
6397 /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
6398 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
6399 return VERR_INTERNAL_ERROR_2;
6400 }
6401
6402#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6403 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6404 {
6405 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6406 rc = supdrvTscDeltaThreadInit(pDevExt);
6407 }
6408#endif
6409 if (RT_SUCCESS(rc))
6410 {
6411 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6412 if (RT_SUCCESS(rc))
6413 {
6414 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6415 if (RT_SUCCESS(rc))
6416 {
6417 uint16_t iCpu;
6418#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6419 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6420 {
6421 /*
6422 * Measure the TSC deltas now that we have MP notifications.
6423 */
6424 int cTries = 5;
6425 do
6426 {
6427 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6428 if (rc != VERR_TRY_AGAIN)
6429 break;
6430 } while (--cTries > 0);
6431 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6432 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6433 }
6434 else
6435 {
6436 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6437 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
6438 }
6439#endif
6440 if (RT_SUCCESS(rc))
6441 {
6442 rc = supdrvGipMeasureTscFreq(pDevExt);
6443 if (RT_SUCCESS(rc))
6444 {
6445 /*
6446 * Create the timer.
6447 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6448 */
6449 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6450 {
6451 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
6452 pDevExt);
6453 if (rc == VERR_NOT_SUPPORTED)
6454 {
6455 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6456 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6457 }
6458 }
6459 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6460 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */, supdrvGipSyncTimer, pDevExt);
6461 if (RT_SUCCESS(rc))
6462 {
6463 /*
6464 * We're good.
6465 */
6466 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6467 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6468
6469 g_pSUPGlobalInfoPage = pGip;
6470 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6471 supdrvRefineTscFreq(pDevExt);
6472 return VINF_SUCCESS;
6473 }
6474
6475 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6476 Assert(!pDevExt->pGipTimer);
6477 }
6478 else
6479 OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
6480 }
6481 else
6482 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureTscDeltas failed. rc=%Rrc\n", rc));
6483 }
6484 else
6485 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6486 }
6487 else
6488 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6489 }
6490 else
6491 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6492
6493 supdrvGipDestroy(pDevExt); /* Releases timer frequency increase too. */
6494 return rc;
6495}
6496
6497
6498/**
6499 * Terminates the GIP.
6500 *
6501 * @param pDevExt Instance data. GIP stuff may be updated.
6502 */
6503static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6504{
6505 int rc;
6506#ifdef DEBUG_DARWIN_GIP
6507 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6508 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6509 pDevExt->pGipTimer, pDevExt->GipMemObj));
6510#endif
6511
6512 /*
6513 * Stop receiving MP notifications before tearing anything else down.
6514 */
6515 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6516
6517#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6518 /*
6519 * Terminate the TSC-delta measurement thread and resources.
6520 */
6521 supdrvTscDeltaTerm(pDevExt);
6522#endif
6523
6524 /*
6525 * Destroy the TSC-refinement one-shot timer.
6526 */
6527 if (pDevExt->pTscRefineTimer)
6528 {
6529 RTTimerDestroy(pDevExt->pTscRefineTimer);
6530 pDevExt->pTscRefineTimer = NULL;
6531 }
6532
6533 if (pDevExt->pvTscDeltaSync)
6534 {
6535 RTMemFree(pDevExt->pvTscDeltaSync);
6536 pDevExt->pTscDeltaSync = NULL;
6537 pDevExt->pvTscDeltaSync = NULL;
6538 }
6539
6540 /*
6541 * Invalid the GIP data.
6542 */
6543 if (pDevExt->pGip)
6544 {
6545 supdrvGipTerm(pDevExt->pGip);
6546 pDevExt->pGip = NULL;
6547 }
6548 g_pSUPGlobalInfoPage = NULL;
6549
6550 /*
6551 * Destroy the timer and free the GIP memory object.
6552 */
6553 if (pDevExt->pGipTimer)
6554 {
6555 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6556 pDevExt->pGipTimer = NULL;
6557 }
6558
6559 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6560 {
6561 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6562 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6563 }
6564
6565 /*
6566 * Finally, make sure we've release the system timer resolution request
6567 * if one actually succeeded and is still pending.
6568 */
6569 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6570}
6571
6572
6573/**
6574 * Timer callback function sync GIP mode.
6575 * @param pTimer The timer.
6576 * @param pvUser Opaque pointer to the device extension.
6577 * @param iTick The timer tick.
6578 */
6579static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6580{
6581 RTCCUINTREG uFlags;
6582 uint64_t u64TSC;
6583 uint64_t u64NanoTS;
6584 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6585 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6586
6587 uFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6588 u64TSC = ASMReadTSC();
6589 u64NanoTS = RTTimeSystemNanoTS();
6590
6591 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6592 {
6593 /*
6594 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6595 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6596 * affected a bit until we get proper TSC deltas than implementing options like
6597 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6598 *
6599 * The likely hood of this happening is really low. On Windows, Linux timers
6600 * fire on the CPU they were registered/started on. Darwin, Solaris need verification.
6601 */
6602 Assert(!ASMIntAreEnabled());
6603 SUPTscDeltaApply(pGip, &u64TSC, ASMGetApicId(), NULL /* pfDeltaApplied */);
6604 }
6605
6606 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
6607
6608 ASMSetFlags(uFlags);
6609}
6610
6611
6612/**
6613 * Timer callback function for async GIP mode.
6614 * @param pTimer The timer.
6615 * @param pvUser Opaque pointer to the device extension.
6616 * @param iTick The timer tick.
6617 */
6618static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6619{
6620 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6621 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6622 RTCPUID idCpu = RTMpCpuId();
6623 uint64_t u64TSC = ASMReadTSC();
6624 uint64_t NanoTS = RTTimeSystemNanoTS();
6625
6626 /** @todo reset the transaction number and whatnot when iTick == 1. */
6627 if (pDevExt->idGipMaster == idCpu)
6628 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6629 else
6630 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6631
6632 ASMSetFlags(fOldFlags);
6633}
6634
6635
6636/**
6637 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6638 *
6639 * @returns Index of the CPU in the cache set.
6640 * @param pGip The GIP.
6641 * @param idCpu The CPU ID.
6642 */
6643static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6644{
6645 uint32_t i, cTries;
6646
6647 /*
6648 * ASSUMES that CPU IDs are constant.
6649 */
6650 for (i = 0; i < pGip->cCpus; i++)
6651 if (pGip->aCPUs[i].idCpu == idCpu)
6652 return i;
6653
6654 cTries = 0;
6655 do
6656 {
6657 for (i = 0; i < pGip->cCpus; i++)
6658 {
6659 bool fRc;
6660 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6661 if (fRc)
6662 return i;
6663 }
6664 } while (cTries++ < 32);
6665 AssertReleaseFailed();
6666 return i - 1;
6667}
6668
6669
6670/**
6671 * The calling CPU should be accounted as online, update GIP accordingly.
6672 *
6673 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
6674 *
6675 * @param pDevExt The device extension.
6676 * @param idCpu The CPU ID.
6677 */
6678static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6679{
6680 int iCpuSet = 0;
6681 uint16_t idApic = UINT16_MAX;
6682 uint32_t i = 0;
6683 uint64_t u64NanoTS = 0;
6684 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6685
6686 AssertPtrReturnVoid(pGip);
6687 AssertRelease(idCpu == RTMpCpuId());
6688 Assert(pGip->cPossibleCpus == RTMpGetCount());
6689
6690 /*
6691 * Do this behind a spinlock with interrupts disabled as this can fire
6692 * on all CPUs simultaneously, see @bugref{6110}.
6693 */
6694 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6695
6696 /*
6697 * Update the globals.
6698 */
6699 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
6700 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
6701 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6702 if (iCpuSet >= 0)
6703 {
6704 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6705 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
6706 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
6707 }
6708
6709 /*
6710 * Update the entry.
6711 */
6712 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
6713 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
6714 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
6715 idApic = ASMGetApicId();
6716 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
6717 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
6718 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
6719
6720 /*
6721 * Update the APIC ID and CPU set index mappings.
6722 */
6723 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
6724 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
6725
6726 /* Update the Mp online/offline counter. */
6727 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
6728
6729#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6730 /*
6731 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
6732 *
6733 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
6734 * update the state and it'll get serviced when the thread's listening interval times out.
6735 */
6736 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6737 {
6738 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6739 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
6740 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
6741 {
6742 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
6743 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
6744 }
6745 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6746 }
6747#endif
6748
6749 /* commit it */
6750 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
6751
6752 RTSpinlockRelease(pDevExt->hGipSpinlock);
6753}
6754
6755
6756/**
6757 * The CPU should be accounted as offline, update the GIP accordingly.
6758 *
6759 * This is used by supdrvGipMpEvent.
6760 *
6761 * @param pDevExt The device extension.
6762 * @param idCpu The CPU ID.
6763 */
6764static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6765{
6766 int iCpuSet;
6767 unsigned i;
6768
6769 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6770
6771 AssertPtrReturnVoid(pGip);
6772 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6773
6774 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6775 AssertReturnVoid(iCpuSet >= 0);
6776
6777 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
6778 AssertReturnVoid(i < pGip->cCpus);
6779 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
6780
6781 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6782 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
6783
6784 /* Update the Mp online/offline counter. */
6785 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
6786
6787 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
6788 if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
6789 {
6790 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6791 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
6792 }
6793
6794 /* Reset the TSC delta, we will recalculate it lazily. */
6795 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6796 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
6797
6798#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6799 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
6800 if (supdrvIsInvariantTsc())
6801 RTCpuSetDel(&pDevExt->TscDeltaObtainedCpuSet, idCpu);
6802#endif
6803
6804 /* commit it */
6805 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
6806
6807 RTSpinlockRelease(pDevExt->hGipSpinlock);
6808}
6809
6810
6811/**
6812 * Multiprocessor event notification callback.
6813 *
6814 * This is used to make sure that the GIP master gets passed on to
6815 * another CPU. It also updates the associated CPU data.
6816 *
6817 * @param enmEvent The event.
6818 * @param idCpu The cpu it applies to.
6819 * @param pvUser Pointer to the device extension.
6820 *
6821 * @remarks This function -must- fire on the newly online'd CPU for the
6822 * RTMPEVENT_ONLINE case and can fire on any CPU for the
6823 * RTMPEVENT_OFFLINE case.
6824 */
6825static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
6826{
6827 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6828 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6829
6830 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6831
6832 /*
6833 * Update the GIP CPU data.
6834 */
6835 if (pGip)
6836 {
6837 switch (enmEvent)
6838 {
6839 case RTMPEVENT_ONLINE:
6840 AssertRelease(idCpu == RTMpCpuId());
6841 supdrvGipMpEventOnline(pDevExt, idCpu);
6842 break;
6843 case RTMPEVENT_OFFLINE:
6844 supdrvGipMpEventOffline(pDevExt, idCpu);
6845 break;
6846 }
6847 }
6848
6849 /*
6850 * Make sure there is a master GIP.
6851 */
6852 if (enmEvent == RTMPEVENT_OFFLINE)
6853 {
6854 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
6855 if (idGipMaster == idCpu)
6856 {
6857 /*
6858 * Find a new GIP master.
6859 */
6860 bool fIgnored;
6861 unsigned i;
6862 int64_t iTSCDelta;
6863 uint32_t idxNewGipMaster;
6864 RTCPUID idNewGipMaster = NIL_RTCPUID;
6865 RTCPUSET OnlineCpus;
6866 RTMpGetOnlineSet(&OnlineCpus);
6867
6868 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
6869 {
6870 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
6871 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
6872 && idCurCpu != idGipMaster)
6873 {
6874 idNewGipMaster = idCurCpu;
6875 break;
6876 }
6877 }
6878
6879 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
6880 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
6881 NOREF(fIgnored);
6882
6883 /*
6884 * Adjust all the TSC deltas against the new GIP master.
6885 */
6886 if (pGip)
6887 {
6888 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
6889 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
6890 Assert(iTSCDelta != INT64_MAX);
6891 for (i = 0; i < pGip->cCpus; i++)
6892 {
6893 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
6894 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
6895 if (iWorkerDelta != INT64_MAX)
6896 iWorkerDelta -= iTSCDelta;
6897 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
6898 }
6899 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
6900 }
6901 }
6902 }
6903}
6904
6905
6906/**
6907 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
6908 * compute the delta between them.
6909 *
6910 * @param idCpu The CPU we are current scheduled on.
6911 * @param pvUser1 Opaque pointer to the device instance data.
6912 * @param pvUser2 Opaque pointer to the worker Cpu Id.
6913 *
6914 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
6915 * read the TSC at exactly the same time on both the master and the worker
6916 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
6917 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
6918 * try to minimize the measurement error by computing the minimum read time
6919 * of the compare statement in the worker by taking TSC measurements across
6920 * it.
6921 *
6922 * We ignore the first few runs of the loop in order to prime the cache.
6923 * Also, be careful about using 'pause' instruction in critical busy-wait
6924 * loops in this code - it can cause undesired behaviour with
6925 * hyperthreading.
6926 *
6927 * It must be noted that the computed minimum read time is mostly to
6928 * eliminate huge deltas when the worker is too early and doesn't by itself
6929 * help produce more accurate deltas. We allow two times the computed
6930 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
6931 * possible to get negative deltas where there are none when the worker is
6932 * earlier. As long as these occasional negative deltas are lower than the
6933 * time it takes to exit guest-context and the OS to reschedule EMT on a
6934 * different CPU we won't expose a TSC that jumped backwards. It is because
6935 * of the existence of the negative deltas we don't recompute the delta with
6936 * the master and worker interchanged to eliminate the remaining measurement
6937 * error.
6938 */
6939static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6940{
6941 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
6942 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6943 uint32_t *pidWorker = (uint32_t *)pvUser2;
6944 RTCPUID idMaster = ASMAtomicUoReadU32(&pDevExt->idTscDeltaInitiator);
6945 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6946 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
6947 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
6948 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6949 int cTriesLeft = 12;
6950
6951 if ( idCpu != idMaster
6952 && idCpu != *pidWorker)
6953 return;
6954
6955 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
6956 with a timeout to avoid deadlocking the entire system. */
6957 if (!RTMpOnAllIsConcurrentSafe())
6958 {
6959 /** @todo This was introduced for Windows, but since Windows doesn't use this
6960 * code path any longer (as DPC timeouts BSOD regardless of interrupts,
6961 * see @bugref{6710} comment 81), eventually phase it out. */
6962 uint64_t uTscNow;
6963 uint64_t uTscStart;
6964 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
6965
6966 ASMSerializeInstruction();
6967 uTscStart = ASMReadTSC();
6968 if (idCpu == idMaster)
6969 {
6970 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
6971 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
6972 {
6973 ASMSerializeInstruction();
6974 uTscNow = ASMReadTSC();
6975 if (uTscNow - uTscStart > cWaitTicks)
6976 {
6977 /* Set the worker delta to indicate failure, not the master. */
6978 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6979 return;
6980 }
6981
6982 ASMNopPause();
6983 }
6984 }
6985 else
6986 {
6987 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
6988 {
6989 ASMSerializeInstruction();
6990 uTscNow = ASMReadTSC();
6991 if (uTscNow - uTscStart > cWaitTicks)
6992 {
6993 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6994 return;
6995 }
6996
6997 ASMNopPause();
6998 }
6999 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
7000 }
7001 }
7002
7003 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
7004 while (cTriesLeft-- > 0)
7005 {
7006 unsigned i;
7007 uint64_t uMinCmpReadTime = UINT64_MAX;
7008 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
7009 {
7010 if (idCpu == idMaster)
7011 {
7012 /*
7013 * The master.
7014 */
7015 RTCCUINTREG uFlags;
7016 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7017 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
7018
7019 /* Disable interrupts only in the master for as short a period
7020 as possible, thanks again to Windows. See @bugref{6710} comment #73. */
7021 uFlags = ASMIntDisableFlags();
7022
7023 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
7024 ;
7025
7026 do
7027 {
7028 ASMSerializeInstruction();
7029 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
7030 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7031
7032 ASMSetFlags(uFlags);
7033
7034 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
7035 ;
7036
7037 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7038 {
7039 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
7040 {
7041 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
7042 if (iDelta < pGipCpuWorker->i64TSCDelta)
7043 pGipCpuWorker->i64TSCDelta = iDelta;
7044 }
7045 }
7046
7047 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
7048 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7049 }
7050 else
7051 {
7052 /*
7053 * The worker.
7054 */
7055 uint64_t uTscWorker;
7056 uint64_t uTscWorkerFlushed;
7057 uint64_t uCmpReadTime;
7058
7059 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
7060 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
7061 ;
7062 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7063 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
7064
7065 /*
7066 * Keep reading the TSC until we notice that the master has read his. Reading
7067 * the TSC -after- the master has updated the memory is way too late. We thus
7068 * compensate by trying to measure how long it took for the worker to notice
7069 * the memory flushed from the master.
7070 */
7071 do
7072 {
7073 ASMSerializeInstruction();
7074 uTscWorker = ASMReadTSC();
7075 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7076 ASMSerializeInstruction();
7077 uTscWorkerFlushed = ASMReadTSC();
7078
7079 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
7080 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7081 {
7082 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
7083 if (uCmpReadTime < (uMinCmpReadTime << 1))
7084 {
7085 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
7086 if (uCmpReadTime < uMinCmpReadTime)
7087 uMinCmpReadTime = uCmpReadTime;
7088 }
7089 else
7090 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
7091 }
7092 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
7093 {
7094 if (uCmpReadTime < uMinCmpReadTime)
7095 uMinCmpReadTime = uCmpReadTime;
7096 }
7097
7098 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
7099 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
7100 ASMNopPause();
7101 }
7102 }
7103
7104 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
7105 break;
7106 }
7107}
7108
7109
7110/**
7111 * Clears TSC delta related variables.
7112 *
7113 * Clears all TSC samples as well as the delta synchronization variable on the
7114 * all the per-CPU structs. Optionally also clears the per-cpu deltas too.
7115 *
7116 * @param pDevExt Pointer to the device instance data.
7117 * @param fClearDeltas Whether the deltas are also to be cleared.
7118 */
7119DECLINLINE(void) supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
7120{
7121 unsigned iCpu;
7122 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7123 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7124 {
7125 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7126 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
7127 if (fClearDeltas)
7128 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
7129 }
7130 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7131}
7132
7133
7134/**
7135 * Measures the TSC delta between the master GIP CPU and one specified worker
7136 * CPU.
7137 *
7138 * @returns VBox status code.
7139 * @param pDevExt Pointer to the device instance data.
7140 * @param idxWorker The index of the worker CPU from the GIP's array of
7141 * CPUs.
7142 *
7143 * @remarks This can be called with preemption disabled!
7144 */
7145static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
7146{
7147 int rc;
7148 PSUPGLOBALINFOPAGE pGip;
7149 PSUPGIPCPU pGipCpuWorker;
7150 RTCPUID idMaster;
7151
7152 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7153 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7154
7155 pGip = pDevExt->pGip;
7156 idMaster = pDevExt->idGipMaster;
7157 pGipCpuWorker = &pGip->aCPUs[idxWorker];
7158
7159 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pGip));
7160
7161 if (pGipCpuWorker->idCpu == idMaster)
7162 {
7163 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
7164 return VINF_SUCCESS;
7165 }
7166
7167 /* Set the master TSC as the initiator. */
7168 while (ASMAtomicCmpXchgU32(&pDevExt->idTscDeltaInitiator, idMaster, NIL_RTCPUID) == false)
7169 {
7170 /*
7171 * Sleep here rather than spin as there is a parallel measurement
7172 * being executed and that can take a good while to be done.
7173 */
7174 RTThreadSleep(1);
7175 }
7176
7177 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7178 {
7179 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
7180 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7181 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7182 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pDevExt, &pGipCpuWorker->idCpu);
7183 if (RT_SUCCESS(rc))
7184 {
7185 if (RT_UNLIKELY(pGipCpuWorker->i64TSCDelta == INT64_MAX))
7186 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
7187 }
7188 }
7189 else
7190 rc = VERR_CPU_OFFLINE;
7191
7192 ASMAtomicWriteU32(&pDevExt->idTscDeltaInitiator, NIL_RTCPUID);
7193 return rc;
7194}
7195
7196
7197/**
7198 * Measures the TSC deltas between CPUs.
7199 *
7200 * @param pDevExt Pointer to the device instance data.
7201 * @param pidxMaster Where to store the index of the chosen master TSC if we
7202 * managed to determine the TSC deltas successfully.
7203 * Optional, can be NULL.
7204 *
7205 * @returns VBox status code.
7206 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
7207 * idCpu, GIP's online CPU set which are populated in
7208 * supdrvGipInitOnCpu().
7209 */
7210static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
7211{
7212 PSUPGIPCPU pGipCpuMaster;
7213 unsigned iCpu;
7214 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7215 uint32_t idxMaster = UINT32_MAX;
7216 int rc = VINF_SUCCESS;
7217 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
7218 uint32_t cOnlineCpus = pGip->cOnlineCpus;
7219
7220 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pGip));
7221
7222 /*
7223 * Pick the first CPU online as the master TSC and make it the new GIP master based
7224 * on the APIC ID.
7225 *
7226 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
7227 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
7228 * master as this point since the sync/async timer isn't created yet.
7229 */
7230 supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
7231 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
7232 {
7233 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
7234 if (idxCpu != UINT16_MAX)
7235 {
7236 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
7237 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
7238 {
7239 idxMaster = idxCpu;
7240 pGipCpu->i64TSCDelta = 0;
7241 break;
7242 }
7243 }
7244 }
7245 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
7246 pGipCpuMaster = &pGip->aCPUs[idxMaster];
7247 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7248
7249 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
7250 if (pGip->cOnlineCpus <= 1)
7251 {
7252 if (pidxMaster)
7253 *pidxMaster = idxMaster;
7254 return VINF_SUCCESS;
7255 }
7256
7257 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7258 {
7259 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7260 if ( iCpu != idxMaster
7261 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7262 {
7263 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7264 if (RT_FAILURE(rc))
7265 {
7266 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
7267 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7268 break;
7269 }
7270
7271 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
7272 {
7273 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
7274 rc = VERR_TRY_AGAIN;
7275 break;
7276 }
7277 }
7278 }
7279
7280 if ( RT_SUCCESS(rc)
7281 && !pGipCpuMaster->i64TSCDelta
7282 && pidxMaster)
7283 {
7284 *pidxMaster = idxMaster;
7285 }
7286 return rc;
7287}
7288
7289
7290/**
7291 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
7292 *
7293 * @param idCpu Ignored.
7294 * @param pvUser1 Where to put the TSC.
7295 * @param pvUser2 Ignored.
7296 */
7297static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7298{
7299 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
7300}
7301
7302
7303/**
7304 * Determine if Async GIP mode is required because of TSC drift.
7305 *
7306 * When using the default/normal timer code it is essential that the time stamp counter
7307 * (TSC) runs never backwards, that is, a read operation to the counter should return
7308 * a bigger value than any previous read operation. This is guaranteed by the latest
7309 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
7310 * case we have to choose the asynchronous timer mode.
7311 *
7312 * @param poffMin Pointer to the determined difference between different
7313 * cores (optional, can be NULL).
7314 * @return false if the time stamp counters appear to be synchronized, true otherwise.
7315 */
7316static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
7317{
7318 /*
7319 * Just iterate all the cpus 8 times and make sure that the TSC is
7320 * ever increasing. We don't bother taking TSC rollover into account.
7321 */
7322 int iEndCpu = RTMpGetArraySize();
7323 int iCpu;
7324 int cLoops = 8;
7325 bool fAsync = false;
7326 int rc = VINF_SUCCESS;
7327 uint64_t offMax = 0;
7328 uint64_t offMin = ~(uint64_t)0;
7329 uint64_t PrevTsc = ASMReadTSC();
7330
7331 while (cLoops-- > 0)
7332 {
7333 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7334 {
7335 uint64_t CurTsc;
7336 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7337 if (RT_SUCCESS(rc))
7338 {
7339 if (CurTsc <= PrevTsc)
7340 {
7341 fAsync = true;
7342 offMin = offMax = PrevTsc - CurTsc;
7343 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7344 iCpu, cLoops, CurTsc, PrevTsc));
7345 break;
7346 }
7347
7348 /* Gather statistics (except the first time). */
7349 if (iCpu != 0 || cLoops != 7)
7350 {
7351 uint64_t off = CurTsc - PrevTsc;
7352 if (off < offMin)
7353 offMin = off;
7354 if (off > offMax)
7355 offMax = off;
7356 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7357 }
7358
7359 /* Next */
7360 PrevTsc = CurTsc;
7361 }
7362 else if (rc == VERR_NOT_SUPPORTED)
7363 break;
7364 else
7365 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7366 }
7367
7368 /* broke out of the loop. */
7369 if (iCpu < iEndCpu)
7370 break;
7371 }
7372
7373 if (poffMin)
7374 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7375 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7376 fAsync, iEndCpu, rc, offMin, offMax));
7377#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7378 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7379#endif
7380 return fAsync;
7381}
7382
7383
7384/**
7385 * Determine the GIP TSC mode.
7386 *
7387 * @returns The most suitable TSC mode.
7388 * @param pDevExt Pointer to the device instance data.
7389 */
7390static SUPGIPMODE supdrvGipDetermineTscMode(PSUPDRVDEVEXT pDevExt)
7391{
7392 /* Trust CPUs that declare their TSC to be invariant. */
7393#if 0 /** @todo this cannot be enabled until Michal's AMD laptop with insane deltas are working. */
7394 if (supdrvIsInvariantTsc())
7395 return SUPGIPMODE_INVARIANT_TSC;
7396#endif
7397
7398 /*
7399 * Without invariant CPU ID bit - On SMP we're faced with two problems:
7400 * (1) There might be a skew between the CPU, so that cpu0
7401 * returns a TSC that is slightly different from cpu1.
7402 * (2) Power management (and other things) may cause the TSC
7403 * to run at a non-constant speed, and cause the speed
7404 * to be different on the cpus. This will result in (1).
7405 *
7406 * So, on SMP systems we'll have to select the ASYNC update method
7407 * if there are symptoms of these problems.
7408 */
7409 if (RTMpGetCount() > 1)
7410 {
7411 uint32_t uEAX, uEBX, uECX, uEDX;
7412 uint64_t u64DiffCoresIgnored;
7413
7414 /* Permit the user and/or the OS specific bits to force async mode. */
7415 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7416 return SUPGIPMODE_ASYNC_TSC;
7417
7418 /* Try check for current differences between the cpus. */
7419 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7420 return SUPGIPMODE_ASYNC_TSC;
7421
7422 /*
7423 * If the CPU supports power management and is an AMD one we
7424 * won't trust it unless it has the TscInvariant bit is set.
7425 */
7426 /** @todo this is now redundant. remove later. */
7427 /* Check for "AuthenticAMD" */
7428 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7429 if ( uEAX >= 1
7430 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7431 {
7432 /* Check for APM support and that TscInvariant is cleared. */
7433 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
7434 if (uEAX >= 0x80000007)
7435 {
7436 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
7437 if ( !(uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR) /* TscInvariant */
7438 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7439 return SUPGIPMODE_ASYNC_TSC;
7440 }
7441 }
7442 }
7443
7444 /** @todo later remove this when the above todo with AMD laptop is done (i.e.
7445 * TSC deltas handled everywhere). */
7446 if (supdrvIsInvariantTsc())
7447 return SUPGIPMODE_INVARIANT_TSC;
7448 return SUPGIPMODE_SYNC_TSC;
7449}
7450
7451
7452/**
7453 * Initializes per-CPU GIP information.
7454 *
7455 * @param pDevExt Pointer to the device instance data.
7456 * @param pGip Pointer to the GIP.
7457 * @param pCpu Pointer to which GIP CPU to initalize.
7458 * @param u64NanoTS The current nanosecond timestamp.
7459 */
7460static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7461{
7462 /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
7463 which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
7464 pCpu->u32TransactionId = 2;
7465 pCpu->u64NanoTS = u64NanoTS;
7466 pCpu->u64TSC = ASMReadTSC();
7467 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7468 pCpu->i64TSCDelta = GIP_ARE_TSC_DELTAS_APPLICABLE(pGip) ? INT64_MAX : 0;
7469
7470 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7471 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7472 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7473 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7474
7475 /*
7476 * We don't know the following values until we've executed updates.
7477 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7478 * the 2nd timer callout.
7479 */
7480 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7481 pCpu->u32UpdateIntervalTSC
7482 = pCpu->au32TSCHistory[0]
7483 = pCpu->au32TSCHistory[1]
7484 = pCpu->au32TSCHistory[2]
7485 = pCpu->au32TSCHistory[3]
7486 = pCpu->au32TSCHistory[4]
7487 = pCpu->au32TSCHistory[5]
7488 = pCpu->au32TSCHistory[6]
7489 = pCpu->au32TSCHistory[7]
7490 = (uint32_t)(_4G / pGip->u32UpdateHz);
7491}
7492
7493
7494/**
7495 * Initializes the GIP data.
7496 *
7497 * @param pDevExt Pointer to the device instance data.
7498 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7499 * @param HCPhys The physical address of the GIP.
7500 * @param u64NanoTS The current nanosecond timestamp.
7501 * @param uUpdateHz The update frequency.
7502 * @param uUpdateIntervalNS The update interval in nanoseconds.
7503 * @param cCpus The CPU count.
7504 */
7505static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7506 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7507{
7508 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7509 unsigned i;
7510#ifdef DEBUG_DARWIN_GIP
7511 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7512#else
7513 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7514#endif
7515
7516 /*
7517 * Record whether the host OS has already normalized inter-CPU deltas for the hardware TSC.
7518 * We only bother with TSC-deltas on invariant CPUs for now.
7519 */
7520 pGip->fOsTscDeltasInSync = supdrvIsInvariantTsc() && supdrvOSAreTscDeltasInSync();
7521
7522 /*
7523 * Initialize the structure.
7524 */
7525 memset(pGip, 0, cbGip);
7526 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7527 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7528 pGip->u32Mode = supdrvGipDetermineTscMode(pDevExt);
7529 pGip->cCpus = (uint16_t)cCpus;
7530 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7531 pGip->u32UpdateHz = uUpdateHz;
7532 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7533 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7534 RTCpuSetEmpty(&pGip->PresentCpuSet);
7535 RTMpGetSet(&pGip->PossibleCpuSet);
7536 pGip->cOnlineCpus = RTMpGetOnlineCount();
7537 pGip->cPresentCpus = RTMpGetPresentCount();
7538 pGip->cPossibleCpus = RTMpGetCount();
7539 pGip->idCpuMax = RTMpGetMaxCpuId();
7540 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7541 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7542 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7543 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7544
7545 for (i = 0; i < cCpus; i++)
7546 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
7547
7548 /*
7549 * Link it to the device extension.
7550 */
7551 pDevExt->pGip = pGip;
7552 pDevExt->HCPhysGip = HCPhys;
7553 pDevExt->cGipUsers = 0;
7554}
7555
7556
7557/**
7558 * On CPU initialization callback for RTMpOnAll.
7559 *
7560 * @param idCpu The CPU ID.
7561 * @param pvUser1 The device extension.
7562 * @param pvUser2 The GIP.
7563 */
7564static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7565{
7566 /* This is good enough, even though it will update some of the globals a
7567 bit to much. */
7568 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7569}
7570
7571
7572/**
7573 * Invalidates the GIP data upon termination.
7574 *
7575 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7576 */
7577static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7578{
7579 unsigned i;
7580 pGip->u32Magic = 0;
7581 for (i = 0; i < pGip->cCpus; i++)
7582 {
7583 pGip->aCPUs[i].u64NanoTS = 0;
7584 pGip->aCPUs[i].u64TSC = 0;
7585 pGip->aCPUs[i].iTSCHistoryHead = 0;
7586 pGip->aCPUs[i].u64TSCSample = 0;
7587 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7588 }
7589}
7590
7591
7592/**
7593 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
7594 * updates all the per cpu data except the transaction id.
7595 *
7596 * @param pDevExt The device extension.
7597 * @param pGipCpu Pointer to the per cpu data.
7598 * @param u64NanoTS The current time stamp.
7599 * @param u64TSC The current TSC.
7600 * @param iTick The current timer tick.
7601 *
7602 * @remarks Can be called with interrupts disabled!
7603 */
7604static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7605{
7606 uint64_t u64TSCDelta;
7607 uint32_t u32UpdateIntervalTSC;
7608 uint32_t u32UpdateIntervalTSCSlack;
7609 unsigned iTSCHistoryHead;
7610 uint64_t u64CpuHz;
7611 uint32_t u32TransactionId;
7612
7613 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7614 AssertPtrReturnVoid(pGip);
7615
7616 /* Delta between this and the previous update. */
7617 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7618
7619 /*
7620 * Update the NanoTS.
7621 */
7622 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
7623
7624 /*
7625 * Calc TSC delta.
7626 */
7627 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
7628 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
7629
7630 /* We don't need to keep realculating the frequency when it's invariant. */
7631 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
7632 return;
7633
7634 if (u64TSCDelta >> 32)
7635 {
7636 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
7637 pGipCpu->cErrors++;
7638 }
7639
7640 /*
7641 * On the 2nd and 3rd callout, reset the history with the current TSC
7642 * interval since the values entered by supdrvGipInit are totally off.
7643 * The interval on the 1st callout completely unreliable, the 2nd is a bit
7644 * better, while the 3rd should be most reliable.
7645 */
7646 u32TransactionId = pGipCpu->u32TransactionId;
7647 if (RT_UNLIKELY( ( u32TransactionId == 5
7648 || u32TransactionId == 7)
7649 && ( iTick == 2
7650 || iTick == 3) ))
7651 {
7652 unsigned i;
7653 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
7654 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
7655 }
7656
7657 /*
7658 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
7659 * Wait until we have at least one full history since the above history reset. The
7660 * assumption is that the majority of the previous history values will be tolerable.
7661 * See @bugref{6710} comment #67.
7662 */
7663 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
7664 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7665 {
7666 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
7667 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
7668 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
7669 {
7670 uint32_t u32;
7671 u32 = pGipCpu->au32TSCHistory[0];
7672 u32 += pGipCpu->au32TSCHistory[1];
7673 u32 += pGipCpu->au32TSCHistory[2];
7674 u32 += pGipCpu->au32TSCHistory[3];
7675 u32 >>= 2;
7676 u64TSCDelta = pGipCpu->au32TSCHistory[4];
7677 u64TSCDelta += pGipCpu->au32TSCHistory[5];
7678 u64TSCDelta += pGipCpu->au32TSCHistory[6];
7679 u64TSCDelta += pGipCpu->au32TSCHistory[7];
7680 u64TSCDelta >>= 2;
7681 u64TSCDelta += u32;
7682 u64TSCDelta >>= 1;
7683 }
7684 }
7685
7686
7687 /*
7688 * TSC History.
7689 */
7690 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
7691 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
7692 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
7693 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
7694
7695 /*
7696 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
7697 *
7698 * On Windows, we have an occasional (but recurring) sour value that messed up
7699 * the history but taking only 1 interval reduces the precision overall.
7700 * However, this problem existed before the invariant mode was introduced.
7701 */
7702 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
7703 || pGip->u32UpdateHz >= 1000)
7704 {
7705 uint32_t u32;
7706 u32 = pGipCpu->au32TSCHistory[0];
7707 u32 += pGipCpu->au32TSCHistory[1];
7708 u32 += pGipCpu->au32TSCHistory[2];
7709 u32 += pGipCpu->au32TSCHistory[3];
7710 u32 >>= 2;
7711 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
7712 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
7713 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
7714 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
7715 u32UpdateIntervalTSC >>= 2;
7716 u32UpdateIntervalTSC += u32;
7717 u32UpdateIntervalTSC >>= 1;
7718
7719 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
7720 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
7721 }
7722 else if (pGip->u32UpdateHz >= 90)
7723 {
7724 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7725 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
7726 u32UpdateIntervalTSC >>= 1;
7727
7728 /* value chosen on a 2GHz thinkpad running windows */
7729 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
7730 }
7731 else
7732 {
7733 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7734
7735 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
7736 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
7737 }
7738 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
7739
7740 /*
7741 * CpuHz.
7742 */
7743 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
7744 u64CpuHz /= pGip->u32UpdateIntervalNS;
7745 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
7746}
7747
7748
7749/**
7750 * Updates the GIP.
7751 *
7752 * @param pDevExt The device extension.
7753 * @param u64NanoTS The current nanosecond timesamp.
7754 * @param u64TSC The current TSC timesamp.
7755 * @param idCpu The CPU ID.
7756 * @param iTick The current timer tick.
7757 *
7758 * @remarks Can be called with interrupts disabled!
7759 */
7760static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
7761{
7762 /*
7763 * Determine the relevant CPU data.
7764 */
7765 PSUPGIPCPU pGipCpu;
7766 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7767 AssertPtrReturnVoid(pGip);
7768
7769 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7770 pGipCpu = &pGip->aCPUs[0];
7771 else
7772 {
7773 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
7774 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
7775 return;
7776 pGipCpu = &pGip->aCPUs[iCpu];
7777 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
7778 return;
7779 }
7780
7781 /*
7782 * Start update transaction.
7783 */
7784 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7785 {
7786 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
7787 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7788 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7789 pGipCpu->cErrors++;
7790 return;
7791 }
7792
7793 /*
7794 * Recalc the update frequency every 0x800th time.
7795 */
7796 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
7797 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
7798 {
7799 if (pGip->u64NanoTSLastUpdateHz)
7800 {
7801#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
7802 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
7803 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
7804 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
7805 {
7806 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
7807 * calculation on non-invariant hosts if it changes the history decision
7808 * taken in supdrvGipDoUpdateCpu(). */
7809 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
7810 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
7811 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
7812 }
7813#endif
7814 }
7815 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
7816 }
7817
7818 /*
7819 * Update the data.
7820 */
7821 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7822
7823 /*
7824 * Complete transaction.
7825 */
7826 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7827}
7828
7829
7830/**
7831 * Updates the per cpu GIP data for the calling cpu.
7832 *
7833 * @param pDevExt The device extension.
7834 * @param u64NanoTS The current nanosecond timesamp.
7835 * @param u64TSC The current TSC timesamp.
7836 * @param idCpu The CPU ID.
7837 * @param idApic The APIC id for the CPU index.
7838 * @param iTick The current timer tick.
7839 *
7840 * @remarks Can be called with interrupts disabled!
7841 */
7842static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
7843 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
7844{
7845 uint32_t iCpu;
7846 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7847
7848 /*
7849 * Avoid a potential race when a CPU online notification doesn't fire on
7850 * the onlined CPU but the tick creeps in before the event notification is
7851 * run.
7852 */
7853 if (RT_UNLIKELY(iTick == 1))
7854 {
7855 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7856 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
7857 supdrvGipMpEventOnline(pDevExt, idCpu);
7858 }
7859
7860 iCpu = pGip->aiCpuFromApicId[idApic];
7861 if (RT_LIKELY(iCpu < pGip->cCpus))
7862 {
7863 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7864 if (pGipCpu->idCpu == idCpu)
7865 {
7866 /*
7867 * Start update transaction.
7868 */
7869 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7870 {
7871 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7872 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7873 pGipCpu->cErrors++;
7874 return;
7875 }
7876
7877 /*
7878 * Update the data.
7879 */
7880 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7881
7882 /*
7883 * Complete transaction.
7884 */
7885 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7886 }
7887 }
7888}
7889
7890
7891/**
7892 * Resume built-in keyboard on MacBook Air and Pro hosts.
7893 * If there is no built-in keyboard device, return success anyway.
7894 *
7895 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
7896 */
7897static int supdrvIOCtl_ResumeSuspendedKbds(void)
7898{
7899#if defined(RT_OS_DARWIN)
7900 return supdrvDarwinResumeSuspendedKbds();
7901#else
7902 return VERR_NOT_IMPLEMENTED;
7903#endif
7904}
7905
7906
7907/**
7908 * Service a TSC-delta measurement request.
7909 *
7910 * @returns VBox status code.
7911 * @param pDevExt Pointer to the device instance data.
7912 * @param pReq Pointer to the TSC-delta measurement request.
7913 */
7914static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq)
7915{
7916 PSUPGLOBALINFOPAGE pGip;
7917 RTCPUID idCpuWorker;
7918 int rc = VERR_CPU_NOT_FOUND;
7919 int16_t cTries;
7920 RTMSINTERVAL cMsWaitRetry;
7921 uint16_t iCpu;
7922
7923 /*
7924 * Validate.
7925 */
7926 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7927 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7928 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7929 idCpuWorker = pReq->u.In.idCpu;
7930 if (idCpuWorker == NIL_RTCPUID)
7931 return VERR_INVALID_CPU_ID;
7932
7933 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
7934 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
7935 pGip = pDevExt->pGip;
7936
7937 if (!GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
7938 return VINF_SUCCESS;
7939
7940 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7941 {
7942 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7943 if (pGipCpuWorker->idCpu == idCpuWorker)
7944 {
7945 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
7946 && !pReq->u.In.fForce)
7947 return VINF_SUCCESS;
7948
7949#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7950 if (pReq->u.In.fAsync)
7951 {
7952 /** @todo Async. doesn't implement options like retries, waiting. We'll need
7953 * to pass those options to the thread somehow and implement it in the
7954 * thread. Check if anyone uses/needs fAsync before implementing this. */
7955 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
7956 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7957 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
7958 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
7959 {
7960 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
7961 }
7962 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7963 RTThreadUserSignal(pDevExt->hTscDeltaThread);
7964 return VINF_SUCCESS;
7965 }
7966#endif
7967
7968 while (cTries-- > 0)
7969 {
7970 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7971 if (RT_SUCCESS(rc))
7972 {
7973 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
7974 break;
7975 }
7976
7977 if (cMsWaitRetry)
7978 RTThreadSleep(cMsWaitRetry);
7979 }
7980
7981 break;
7982 }
7983 }
7984 return rc;
7985}
7986
7987
7988/**
7989 * Reads the TSC and TSC-delta atomically, applies the TSC delta.
7990 *
7991 * @returns VBox status code.
7992 * @param pDevExt Pointer to the device instance data.
7993 * @param pReq Pointer to the TSC-read request.
7994 */
7995static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq)
7996{
7997 uint64_t uTsc;
7998 uint16_t idApic;
7999 int16_t cTries;
8000 PSUPGLOBALINFOPAGE pGip;
8001 int rc;
8002
8003 /*
8004 * Validate.
8005 */
8006 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
8007 AssertReturn(pReq, VERR_INVALID_PARAMETER);
8008 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
8009
8010 pGip = pDevExt->pGip;
8011 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pGip));
8012
8013 cTries = 4;
8014 while (cTries-- > 0)
8015 {
8016 int rc2;
8017 uint16_t iCpu;
8018
8019 rc = SUPGetTsc(&uTsc, &idApic);
8020 if (RT_SUCCESS(rc))
8021 {
8022 pReq->u.Out.u64AdjustedTsc = uTsc;
8023 pReq->u.Out.idApic = idApic;
8024 return VINF_SUCCESS;
8025 }
8026
8027 /* If we failed to have a TSC-delta, measurement the TSC-delta and retry. */
8028 AssertMsgReturn(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId),
8029 ("idApic=%u ArraySize=%u\n", idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)), VERR_INVALID_CPU_INDEX);
8030 iCpu = pGip->aiCpuFromApicId[idApic];
8031 AssertMsgReturn(iCpu < pGip->cCpus, ("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus), VERR_INVALID_CPU_INDEX);
8032
8033 rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
8034 if (RT_SUCCESS(rc2))
8035 AssertReturn(pGip->aCPUs[iCpu].i64TSCDelta != INT64_MAX, VERR_INTERNAL_ERROR_2);
8036 }
8037
8038 return rc;
8039}
8040
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette