VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 53464

Last change on this file since 53464 was 53464, checked in by vboxsync, 10 years ago

HostDrivers/Support: Move certain globals into the device extension, addressing todo in r97183.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 295.7 KB
Line 
1/* $Id: SUPDrv.c 53464 2014-12-05 14:52:21Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/x86.h>
58
59#include <VBox/param.h>
60#include <VBox/log.h>
61#include <VBox/err.h>
62#include <VBox/vmm/hm_svm.h>
63#include <VBox/vmm/hm_vmx.h>
64
65#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
66# include "dtrace/SUPDrv.h"
67#else
68# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
69# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
70# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
71# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
72#endif
73
74/*
75 * Logging assignments:
76 * Log - useful stuff, like failures.
77 * LogFlow - program flow, except the really noisy bits.
78 * Log2 - Cleanup.
79 * Log3 - Loader flow noise.
80 * Log4 - Call VMMR0 flow noise.
81 * Log5 - Native yet-to-be-defined noise.
82 * Log6 - Native ioctl flow noise.
83 *
84 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
85 * instantiation in log-vbox.c(pp).
86 */
87
88
89/*******************************************************************************
90* Defined Constants And Macros *
91*******************************************************************************/
92/** The frequency by which we recalculate the u32UpdateHz and
93 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
94 *
95 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
96 */
97#define GIP_UPDATEHZ_RECALC_FREQ 0x800
98
99/** A reserved TSC value used for synchronization as well as measurement of
100 * TSC deltas. */
101#define GIP_TSC_DELTA_RSVD UINT64_MAX
102/** The number of TSC delta measurement loops in total (includes primer and
103 * read-time loops). */
104#define GIP_TSC_DELTA_LOOPS 96
105/** The number of cache primer loops. */
106#define GIP_TSC_DELTA_PRIMER_LOOPS 4
107/** The number of loops until we keep computing the minumum read time. */
108#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
109/** Stop measurement of TSC delta. */
110#define GIP_TSC_DELTA_SYNC_STOP 0
111/** Start measurement of TSC delta. */
112#define GIP_TSC_DELTA_SYNC_START 1
113/** Worker thread is ready for reading the TSC. */
114#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
115/** Worker thread is done updating TSC delta info. */
116#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
117/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
118 * with a timeout. */
119#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
120/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
121 * master with a timeout. */
122#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
123/** The TSC-refinement interval in seconds. */
124#define GIP_TSC_REFINE_INTERVAL 5
125
126AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
127AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
128
129/** @def VBOX_SVN_REV
130 * The makefile should define this if it can. */
131#ifndef VBOX_SVN_REV
132# define VBOX_SVN_REV 0
133#endif
134
135#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
136# define DO_NOT_START_GIP
137#endif
138
139/** Whether the application of TSC-deltas is required. */
140#define GIP_ARE_TSC_DELTAS_APPLICABLE(a_pDevExt) ((a_pDevExt)->pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC && !((a_pDevExt)->fOsTscDeltasInSync))
141
142
143/*******************************************************************************
144* Internal Functions *
145*******************************************************************************/
146static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
147static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
148static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
149static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
150static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
151static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
152static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
153static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
154static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
155static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt,void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
156static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
157static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
158static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
159DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
160DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
161static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
162static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
163static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
164static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq);
165static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq);
166static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
167static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
168static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
169static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
170static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
171static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,
172 unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
173static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
174static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
175static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
176static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
177 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
178static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
179static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
180static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
181static int supdrvIOCtl_ResumeSuspendedKbds(void);
182
183
184/*******************************************************************************
185* Global Variables *
186*******************************************************************************/
187DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
188
189
190/**
191 * Array of the R0 SUP API.
192 */
193static SUPFUNC g_aFunctions[] =
194{
195/* SED: START */
196 /* name function */
197 /* Entries with absolute addresses determined at runtime, fixup
198 code makes ugly ASSUMPTIONS about the order here: */
199 { "SUPR0AbsIs64bit", (void *)0 },
200 { "SUPR0Abs64bitKernelCS", (void *)0 },
201 { "SUPR0Abs64bitKernelSS", (void *)0 },
202 { "SUPR0Abs64bitKernelDS", (void *)0 },
203 { "SUPR0AbsKernelCS", (void *)0 },
204 { "SUPR0AbsKernelSS", (void *)0 },
205 { "SUPR0AbsKernelDS", (void *)0 },
206 { "SUPR0AbsKernelES", (void *)0 },
207 { "SUPR0AbsKernelFS", (void *)0 },
208 { "SUPR0AbsKernelGS", (void *)0 },
209 /* Normal function pointers: */
210 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
211 { "SUPGetGIP", (void *)SUPGetGIP },
212 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
213 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
214 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
215 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
216 { "SUPR0ContFree", (void *)SUPR0ContFree },
217 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
218 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
219 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
220 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
221 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
222 { "SUPR0LockMem", (void *)SUPR0LockMem },
223 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
224 { "SUPR0LowFree", (void *)SUPR0LowFree },
225 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
226 { "SUPR0MemFree", (void *)SUPR0MemFree },
227 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
228 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
229 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
230 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
231 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
232 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
233 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
234 { "SUPR0PageFree", (void *)SUPR0PageFree },
235 { "SUPR0Printf", (void *)SUPR0Printf },
236 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
237 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
238 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
239 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
240 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
241 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
242 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
243 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
244 { "SUPSemEventClose", (void *)SUPSemEventClose },
245 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
246 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
247 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
248 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
249 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
250 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
251 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
252 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
253 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
254 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
255 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
256 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
257 { "SUPSemEventWait", (void *)SUPSemEventWait },
258 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
259 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
260 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
261
262 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
263 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
264 { "RTAssertMsg1", (void *)RTAssertMsg1 },
265 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
266 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
267 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
268 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
269 { "RTCrc32", (void *)RTCrc32 },
270 { "RTCrc32Finish", (void *)RTCrc32Finish },
271 { "RTCrc32Process", (void *)RTCrc32Process },
272 { "RTCrc32Start", (void *)RTCrc32Start },
273 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
274 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
275 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
276 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
277 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
278 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
279 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
280 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
281 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
282 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
283 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
284 { "RTLogPrintfV", (void *)RTLogPrintfV },
285 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
286 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
287 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
288 { "RTMemAllocTag", (void *)RTMemAllocTag },
289 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
290 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
291 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
292 { "RTMemDupExTag", (void *)RTMemDupExTag },
293 { "RTMemDupTag", (void *)RTMemDupTag },
294 { "RTMemFree", (void *)RTMemFree },
295 { "RTMemFreeEx", (void *)RTMemFreeEx },
296 { "RTMemReallocTag", (void *)RTMemReallocTag },
297 { "RTMpCpuId", (void *)RTMpCpuId },
298 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
299 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
300 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
301 { "RTMpGetCount", (void *)RTMpGetCount },
302 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
303 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
304 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
305 { "RTMpGetSet", (void *)RTMpGetSet },
306 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
307 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
308 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
309 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
310 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
311 { "RTMpOnAll", (void *)RTMpOnAll },
312 { "RTMpOnOthers", (void *)RTMpOnOthers },
313 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
314 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
315 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
316 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
317 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
318 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
319 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
320 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
321 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
322 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
323 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
324 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
325 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
326 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
327 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
328 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
329 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
330 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
331 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
332 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
333 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
334 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
335 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
336 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
337 { "RTProcSelf", (void *)RTProcSelf },
338 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
339 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
340 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
341 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
342 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
343 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
344 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
345 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
346 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
347 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
348 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
349 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
350 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
351 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
352 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
353 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
354 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
355 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
356 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
357 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
358 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
359 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
360 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
361 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
362 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
363 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
364 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
365 { "RTSemEventCreate", (void *)RTSemEventCreate },
366 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
367 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
368 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
369 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
370 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
371 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
372 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
373 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
374 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
375 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
376 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
377 { "RTSemEventSignal", (void *)RTSemEventSignal },
378 { "RTSemEventWait", (void *)RTSemEventWait },
379 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
380 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
381 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
382 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
383 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
384 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
385 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
386 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
387 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
388 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
389 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
390 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
391 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
392 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
393 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
394 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
395 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
396 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
397 { "RTStrCopy", (void *)RTStrCopy },
398 { "RTStrDupTag", (void *)RTStrDupTag },
399 { "RTStrFormat", (void *)RTStrFormat },
400 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
401 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
402 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
403 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
404 { "RTStrFormatV", (void *)RTStrFormatV },
405 { "RTStrFree", (void *)RTStrFree },
406 { "RTStrNCmp", (void *)RTStrNCmp },
407 { "RTStrPrintf", (void *)RTStrPrintf },
408 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
409 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
410 { "RTStrPrintfV", (void *)RTStrPrintfV },
411 { "RTThreadCreate", (void *)RTThreadCreate },
412 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
413 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
414 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
415 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
416 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
417 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
418 { "RTThreadGetName", (void *)RTThreadGetName },
419 { "RTThreadGetNative", (void *)RTThreadGetNative },
420 { "RTThreadGetType", (void *)RTThreadGetType },
421 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
422 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
423 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
424 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
425 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
426 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
427 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
428 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
429 { "RTThreadSelf", (void *)RTThreadSelf },
430 { "RTThreadSelfName", (void *)RTThreadSelfName },
431 { "RTThreadSleep", (void *)RTThreadSleep },
432 { "RTThreadUserReset", (void *)RTThreadUserReset },
433 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
434 { "RTThreadUserWait", (void *)RTThreadUserWait },
435 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
436 { "RTThreadWait", (void *)RTThreadWait },
437 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
438 { "RTThreadYield", (void *)RTThreadYield },
439 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
440 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
441 { "RTTimeNow", (void *)RTTimeNow },
442 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
443 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
444 { "RTTimerCreate", (void *)RTTimerCreate },
445 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
446 { "RTTimerDestroy", (void *)RTTimerDestroy },
447 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
448 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
449 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
450 { "RTTimerStart", (void *)RTTimerStart },
451 { "RTTimerStop", (void *)RTTimerStop },
452 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
453 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
454 { "RTUuidCompare", (void *)RTUuidCompare },
455 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
456 { "RTUuidFromStr", (void *)RTUuidFromStr },
457/* SED: END */
458};
459
460#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
461/**
462 * Drag in the rest of IRPT since we share it with the
463 * rest of the kernel modules on darwin.
464 */
465PFNRT g_apfnVBoxDrvIPRTDeps[] =
466{
467 /* VBoxNetAdp */
468 (PFNRT)RTRandBytes,
469 /* VBoxUSB */
470 (PFNRT)RTPathStripFilename,
471 NULL
472};
473#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
474
475
476/**
477 * Initializes the device extentsion structure.
478 *
479 * @returns IPRT status code.
480 * @param pDevExt The device extension to initialize.
481 * @param cbSession The size of the session structure. The size of
482 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
483 * defined because we're skipping the OS specific members
484 * then.
485 */
486int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
487{
488 int rc;
489
490#ifdef SUPDRV_WITH_RELEASE_LOGGER
491 /*
492 * Create the release log.
493 */
494 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
495 PRTLOGGER pRelLogger;
496 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
497 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
498 if (RT_SUCCESS(rc))
499 RTLogRelSetDefaultInstance(pRelLogger);
500 /** @todo Add native hook for getting logger config parameters and setting
501 * them. On linux we should use the module parameter stuff... */
502#endif
503
504 /*
505 * Initialize it.
506 */
507 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
508 pDevExt->Spinlock = NIL_RTSPINLOCK;
509 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
510 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
511 pDevExt->idTscDeltaInitiator = NIL_RTCPUID;
512 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
513 if (RT_SUCCESS(rc))
514 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
515 if (RT_SUCCESS(rc))
516 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
517
518 if (RT_SUCCESS(rc))
519#ifdef SUPDRV_USE_MUTEX_FOR_LDR
520 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
521#else
522 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
523#endif
524 if (RT_SUCCESS(rc))
525 {
526 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
527 if (RT_SUCCESS(rc))
528 {
529#ifdef SUPDRV_USE_MUTEX_FOR_LDR
530 rc = RTSemMutexCreate(&pDevExt->mtxGip);
531#else
532 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
533#endif
534 if (RT_SUCCESS(rc))
535 {
536 rc = supdrvGipCreate(pDevExt);
537 if (RT_SUCCESS(rc))
538 {
539 rc = supdrvTracerInit(pDevExt);
540 if (RT_SUCCESS(rc))
541 {
542 pDevExt->pLdrInitImage = NULL;
543 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
544 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
545 pDevExt->cbSession = (uint32_t)cbSession;
546
547 /*
548 * Fixup the absolute symbols.
549 *
550 * Because of the table indexing assumptions we'll have a little #ifdef orgy
551 * here rather than distributing this to OS specific files. At least for now.
552 */
553#ifdef RT_OS_DARWIN
554# if ARCH_BITS == 32
555 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
556 {
557 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
558 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
559 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
560 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
561 }
562 else
563 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
564 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
565 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
566 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
567 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
568 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
569 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
570# else /* 64-bit darwin: */
571 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
572 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
573 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
574 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
575 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
576 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
577 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
578 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
579 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
580 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
581
582# endif
583#else /* !RT_OS_DARWIN */
584# if ARCH_BITS == 64
585 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
586 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
587 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
588 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
589# else
590 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
591# endif
592 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
593 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
594 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
595 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
596 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
597 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
598#endif /* !RT_OS_DARWIN */
599 return VINF_SUCCESS;
600 }
601
602 supdrvGipDestroy(pDevExt);
603 }
604
605#ifdef SUPDRV_USE_MUTEX_FOR_GIP
606 RTSemMutexDestroy(pDevExt->mtxGip);
607 pDevExt->mtxGip = NIL_RTSEMMUTEX;
608#else
609 RTSemFastMutexDestroy(pDevExt->mtxGip);
610 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
611#endif
612 }
613 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
614 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
615 }
616#ifdef SUPDRV_USE_MUTEX_FOR_LDR
617 RTSemMutexDestroy(pDevExt->mtxLdr);
618 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
619#else
620 RTSemFastMutexDestroy(pDevExt->mtxLdr);
621 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
622#endif
623 }
624
625 RTSpinlockDestroy(pDevExt->Spinlock);
626 pDevExt->Spinlock = NIL_RTSPINLOCK;
627 RTSpinlockDestroy(pDevExt->hGipSpinlock);
628 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
629 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
630 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
631
632#ifdef SUPDRV_WITH_RELEASE_LOGGER
633 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
634 RTLogDestroy(RTLogSetDefaultInstance(NULL));
635#endif
636
637 return rc;
638}
639
640
641/**
642 * Delete the device extension (e.g. cleanup members).
643 *
644 * @param pDevExt The device extension to delete.
645 */
646void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
647{
648 PSUPDRVOBJ pObj;
649 PSUPDRVUSAGE pUsage;
650
651 /*
652 * Kill mutexes and spinlocks.
653 */
654#ifdef SUPDRV_USE_MUTEX_FOR_GIP
655 RTSemMutexDestroy(pDevExt->mtxGip);
656 pDevExt->mtxGip = NIL_RTSEMMUTEX;
657#else
658 RTSemFastMutexDestroy(pDevExt->mtxGip);
659 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
660#endif
661#ifdef SUPDRV_USE_MUTEX_FOR_LDR
662 RTSemMutexDestroy(pDevExt->mtxLdr);
663 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
664#else
665 RTSemFastMutexDestroy(pDevExt->mtxLdr);
666 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
667#endif
668 RTSpinlockDestroy(pDevExt->Spinlock);
669 pDevExt->Spinlock = NIL_RTSPINLOCK;
670 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
671 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
672 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
673 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
674
675 /*
676 * Free lists.
677 */
678 /* objects. */
679 pObj = pDevExt->pObjs;
680 Assert(!pObj); /* (can trigger on forced unloads) */
681 pDevExt->pObjs = NULL;
682 while (pObj)
683 {
684 void *pvFree = pObj;
685 pObj = pObj->pNext;
686 RTMemFree(pvFree);
687 }
688
689 /* usage records. */
690 pUsage = pDevExt->pUsageFree;
691 pDevExt->pUsageFree = NULL;
692 while (pUsage)
693 {
694 void *pvFree = pUsage;
695 pUsage = pUsage->pNext;
696 RTMemFree(pvFree);
697 }
698
699 /* kill the GIP. */
700 supdrvGipDestroy(pDevExt);
701 RTSpinlockDestroy(pDevExt->hGipSpinlock);
702 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
703
704 supdrvTracerTerm(pDevExt);
705
706#ifdef SUPDRV_WITH_RELEASE_LOGGER
707 /* destroy the loggers. */
708 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
709 RTLogDestroy(RTLogSetDefaultInstance(NULL));
710#endif
711}
712
713
714/**
715 * Create session.
716 *
717 * @returns IPRT status code.
718 * @param pDevExt Device extension.
719 * @param fUser Flag indicating whether this is a user or kernel
720 * session.
721 * @param fUnrestricted Unrestricted access (system) or restricted access
722 * (user)?
723 * @param ppSession Where to store the pointer to the session data.
724 */
725int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
726{
727 int rc;
728 PSUPDRVSESSION pSession;
729
730 if (!SUP_IS_DEVEXT_VALID(pDevExt))
731 return VERR_INVALID_PARAMETER;
732
733 /*
734 * Allocate memory for the session data.
735 */
736 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
737 if (pSession)
738 {
739 /* Initialize session data. */
740 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
741 if (!rc)
742 {
743 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
744 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
745 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
746 if (RT_SUCCESS(rc))
747 {
748 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
749 pSession->pDevExt = pDevExt;
750 pSession->u32Cookie = BIRD_INV;
751 pSession->fUnrestricted = fUnrestricted;
752 /*pSession->fInHashTable = false; */
753 pSession->cRefs = 1;
754 /*pSession->pCommonNextHash = NULL;
755 pSession->ppOsSessionPtr = NULL; */
756 if (fUser)
757 {
758 pSession->Process = RTProcSelf();
759 pSession->R0Process = RTR0ProcHandleSelf();
760 }
761 else
762 {
763 pSession->Process = NIL_RTPROCESS;
764 pSession->R0Process = NIL_RTR0PROCESS;
765 }
766 /*pSession->pLdrUsage = NULL;
767 pSession->pVM = NULL;
768 pSession->pUsage = NULL;
769 pSession->pGip = NULL;
770 pSession->fGipReferenced = false;
771 pSession->Bundle.cUsed = 0; */
772 pSession->Uid = NIL_RTUID;
773 pSession->Gid = NIL_RTGID;
774 /*pSession->uTracerData = 0;*/
775 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
776 RTListInit(&pSession->TpProviders);
777 /*pSession->cTpProviders = 0;*/
778 /*pSession->cTpProbesFiring = 0;*/
779 RTListInit(&pSession->TpUmods);
780 /*RT_ZERO(pSession->apTpLookupTable);*/
781
782 VBOXDRV_SESSION_CREATE(pSession, fUser);
783 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
784 return VINF_SUCCESS;
785 }
786
787 RTSpinlockDestroy(pSession->Spinlock);
788 }
789 RTMemFree(pSession);
790 *ppSession = NULL;
791 Log(("Failed to create spinlock, rc=%d!\n", rc));
792 }
793 else
794 rc = VERR_NO_MEMORY;
795
796 return rc;
797}
798
799
800/**
801 * Cleans up the session in the context of the process to which it belongs, the
802 * caller will free the session and the session spinlock.
803 *
804 * This should normally occur when the session is closed or as the process
805 * exits. Careful reference counting in the OS specfic code makes sure that
806 * there cannot be any races between process/handle cleanup callbacks and
807 * threads doing I/O control calls.
808 *
809 * @param pDevExt The device extension.
810 * @param pSession Session data.
811 */
812static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
813{
814 int rc;
815 PSUPDRVBUNDLE pBundle;
816 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
817
818 Assert(!pSession->fInHashTable);
819 Assert(!pSession->ppOsSessionPtr);
820 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
821 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
822
823 /*
824 * Remove logger instances related to this session.
825 */
826 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
827
828 /*
829 * Destroy the handle table.
830 */
831 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
832 AssertRC(rc);
833 pSession->hHandleTable = NIL_RTHANDLETABLE;
834
835 /*
836 * Release object references made in this session.
837 * In theory there should be noone racing us in this session.
838 */
839 Log2(("release objects - start\n"));
840 if (pSession->pUsage)
841 {
842 PSUPDRVUSAGE pUsage;
843 RTSpinlockAcquire(pDevExt->Spinlock);
844
845 while ((pUsage = pSession->pUsage) != NULL)
846 {
847 PSUPDRVOBJ pObj = pUsage->pObj;
848 pSession->pUsage = pUsage->pNext;
849
850 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
851 if (pUsage->cUsage < pObj->cUsage)
852 {
853 pObj->cUsage -= pUsage->cUsage;
854 RTSpinlockRelease(pDevExt->Spinlock);
855 }
856 else
857 {
858 /* Destroy the object and free the record. */
859 if (pDevExt->pObjs == pObj)
860 pDevExt->pObjs = pObj->pNext;
861 else
862 {
863 PSUPDRVOBJ pObjPrev;
864 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
865 if (pObjPrev->pNext == pObj)
866 {
867 pObjPrev->pNext = pObj->pNext;
868 break;
869 }
870 Assert(pObjPrev);
871 }
872 RTSpinlockRelease(pDevExt->Spinlock);
873
874 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
875 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
876 if (pObj->pfnDestructor)
877 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
878 RTMemFree(pObj);
879 }
880
881 /* free it and continue. */
882 RTMemFree(pUsage);
883
884 RTSpinlockAcquire(pDevExt->Spinlock);
885 }
886
887 RTSpinlockRelease(pDevExt->Spinlock);
888 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
889 }
890 Log2(("release objects - done\n"));
891
892 /*
893 * Do tracer cleanups related to this session.
894 */
895 Log2(("release tracer stuff - start\n"));
896 supdrvTracerCleanupSession(pDevExt, pSession);
897 Log2(("release tracer stuff - end\n"));
898
899 /*
900 * Release memory allocated in the session.
901 *
902 * We do not serialize this as we assume that the application will
903 * not allocated memory while closing the file handle object.
904 */
905 Log2(("freeing memory:\n"));
906 pBundle = &pSession->Bundle;
907 while (pBundle)
908 {
909 PSUPDRVBUNDLE pToFree;
910 unsigned i;
911
912 /*
913 * Check and unlock all entries in the bundle.
914 */
915 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
916 {
917 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
918 {
919 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
920 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
921 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
922 {
923 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
924 AssertRC(rc); /** @todo figure out how to handle this. */
925 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
926 }
927 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
928 AssertRC(rc); /** @todo figure out how to handle this. */
929 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
930 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
931 }
932 }
933
934 /*
935 * Advance and free previous bundle.
936 */
937 pToFree = pBundle;
938 pBundle = pBundle->pNext;
939
940 pToFree->pNext = NULL;
941 pToFree->cUsed = 0;
942 if (pToFree != &pSession->Bundle)
943 RTMemFree(pToFree);
944 }
945 Log2(("freeing memory - done\n"));
946
947 /*
948 * Deregister component factories.
949 */
950 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
951 Log2(("deregistering component factories:\n"));
952 if (pDevExt->pComponentFactoryHead)
953 {
954 PSUPDRVFACTORYREG pPrev = NULL;
955 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
956 while (pCur)
957 {
958 if (pCur->pSession == pSession)
959 {
960 /* unlink it */
961 PSUPDRVFACTORYREG pNext = pCur->pNext;
962 if (pPrev)
963 pPrev->pNext = pNext;
964 else
965 pDevExt->pComponentFactoryHead = pNext;
966
967 /* free it */
968 pCur->pNext = NULL;
969 pCur->pSession = NULL;
970 pCur->pFactory = NULL;
971 RTMemFree(pCur);
972
973 /* next */
974 pCur = pNext;
975 }
976 else
977 {
978 /* next */
979 pPrev = pCur;
980 pCur = pCur->pNext;
981 }
982 }
983 }
984 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
985 Log2(("deregistering component factories - done\n"));
986
987 /*
988 * Loaded images needs to be dereferenced and possibly freed up.
989 */
990 supdrvLdrLock(pDevExt);
991 Log2(("freeing images:\n"));
992 if (pSession->pLdrUsage)
993 {
994 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
995 pSession->pLdrUsage = NULL;
996 while (pUsage)
997 {
998 void *pvFree = pUsage;
999 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1000 if (pImage->cUsage > pUsage->cUsage)
1001 pImage->cUsage -= pUsage->cUsage;
1002 else
1003 supdrvLdrFree(pDevExt, pImage);
1004 pUsage->pImage = NULL;
1005 pUsage = pUsage->pNext;
1006 RTMemFree(pvFree);
1007 }
1008 }
1009 supdrvLdrUnlock(pDevExt);
1010 Log2(("freeing images - done\n"));
1011
1012 /*
1013 * Unmap the GIP.
1014 */
1015 Log2(("umapping GIP:\n"));
1016 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1017 {
1018 SUPR0GipUnmap(pSession);
1019 pSession->fGipReferenced = 0;
1020 }
1021 Log2(("umapping GIP - done\n"));
1022}
1023
1024
1025/**
1026 * Common code for freeing a session when the reference count reaches zero.
1027 *
1028 * @param pDevExt Device extension.
1029 * @param pSession Session data.
1030 * This data will be freed by this routine.
1031 */
1032static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1033{
1034 VBOXDRV_SESSION_CLOSE(pSession);
1035
1036 /*
1037 * Cleanup the session first.
1038 */
1039 supdrvCleanupSession(pDevExt, pSession);
1040 supdrvOSCleanupSession(pDevExt, pSession);
1041
1042 /*
1043 * Free the rest of the session stuff.
1044 */
1045 RTSpinlockDestroy(pSession->Spinlock);
1046 pSession->Spinlock = NIL_RTSPINLOCK;
1047 pSession->pDevExt = NULL;
1048 RTMemFree(pSession);
1049 LogFlow(("supdrvDestroySession: returns\n"));
1050}
1051
1052
1053/**
1054 * Inserts the session into the global hash table.
1055 *
1056 * @retval VINF_SUCCESS on success.
1057 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1058 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1059 * session (asserted).
1060 * @retval VERR_DUPLICATE if there is already a session for that pid.
1061 *
1062 * @param pDevExt The device extension.
1063 * @param pSession The session.
1064 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1065 * available and used. This will set to point to the
1066 * session while under the protection of the session
1067 * hash table spinlock. It will also be kept in
1068 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1069 * cleanup use.
1070 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1071 */
1072int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1073 void *pvUser)
1074{
1075 PSUPDRVSESSION pCur;
1076 unsigned iHash;
1077
1078 /*
1079 * Validate input.
1080 */
1081 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1082 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1083
1084 /*
1085 * Calculate the hash table index and acquire the spinlock.
1086 */
1087 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1088
1089 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1090
1091 /*
1092 * If there are a collisions, we need to carefully check if we got a
1093 * duplicate. There can only be one open session per process.
1094 */
1095 pCur = pDevExt->apSessionHashTab[iHash];
1096 if (pCur)
1097 {
1098 while (pCur && pCur->Process != pSession->Process)
1099 pCur = pCur->pCommonNextHash;
1100
1101 if (pCur)
1102 {
1103 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1104 if (pCur == pSession)
1105 {
1106 Assert(pSession->fInHashTable);
1107 AssertFailed();
1108 return VERR_WRONG_ORDER;
1109 }
1110 Assert(!pSession->fInHashTable);
1111 if (pCur->R0Process == pSession->R0Process)
1112 return VERR_RESOURCE_IN_USE;
1113 return VERR_DUPLICATE;
1114 }
1115 }
1116 Assert(!pSession->fInHashTable);
1117 Assert(!pSession->ppOsSessionPtr);
1118
1119 /*
1120 * Insert it, doing a callout to the OS specific code in case it has
1121 * anything it wishes to do while we're holding the spinlock.
1122 */
1123 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1124 pDevExt->apSessionHashTab[iHash] = pSession;
1125 pSession->fInHashTable = true;
1126 ASMAtomicIncS32(&pDevExt->cSessions);
1127
1128 pSession->ppOsSessionPtr = ppOsSessionPtr;
1129 if (ppOsSessionPtr)
1130 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1131
1132 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1133
1134 /*
1135 * Retain a reference for the pointer in the session table.
1136 */
1137 ASMAtomicIncU32(&pSession->cRefs);
1138
1139 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1140 return VINF_SUCCESS;
1141}
1142
1143
1144/**
1145 * Removes the session from the global hash table.
1146 *
1147 * @retval VINF_SUCCESS on success.
1148 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1149 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1150 * session (asserted).
1151 *
1152 * @param pDevExt The device extension.
1153 * @param pSession The session. The caller is expected to have a reference
1154 * to this so it won't croak on us when we release the hash
1155 * table reference.
1156 * @param pvUser OS specific context value for the
1157 * supdrvOSSessionHashTabInserted callback.
1158 */
1159int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1160{
1161 PSUPDRVSESSION pCur;
1162 unsigned iHash;
1163 int32_t cRefs;
1164
1165 /*
1166 * Validate input.
1167 */
1168 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1169 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1170
1171 /*
1172 * Calculate the hash table index and acquire the spinlock.
1173 */
1174 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1175
1176 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1177
1178 /*
1179 * Unlink it.
1180 */
1181 pCur = pDevExt->apSessionHashTab[iHash];
1182 if (pCur == pSession)
1183 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1184 else
1185 {
1186 PSUPDRVSESSION pPrev = pCur;
1187 while (pCur && pCur != pSession)
1188 {
1189 pPrev = pCur;
1190 pCur = pCur->pCommonNextHash;
1191 }
1192 if (pCur)
1193 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1194 else
1195 {
1196 Assert(!pSession->fInHashTable);
1197 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1198 return VERR_NOT_FOUND;
1199 }
1200 }
1201
1202 pSession->pCommonNextHash = NULL;
1203 pSession->fInHashTable = false;
1204
1205 ASMAtomicDecS32(&pDevExt->cSessions);
1206
1207 /*
1208 * Clear OS specific session pointer if available and do the OS callback.
1209 */
1210 if (pSession->ppOsSessionPtr)
1211 {
1212 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1213 pSession->ppOsSessionPtr = NULL;
1214 }
1215
1216 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1217
1218 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1219
1220 /*
1221 * Drop the reference the hash table had to the session. This shouldn't
1222 * be the last reference!
1223 */
1224 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1225 Assert(cRefs > 0 && cRefs < _1M);
1226 if (cRefs == 0)
1227 supdrvDestroySession(pDevExt, pSession);
1228
1229 return VINF_SUCCESS;
1230}
1231
1232
1233/**
1234 * Looks up the session for the current process in the global hash table or in
1235 * OS specific pointer.
1236 *
1237 * @returns Pointer to the session with a reference that the caller must
1238 * release. If no valid session was found, NULL is returned.
1239 *
1240 * @param pDevExt The device extension.
1241 * @param Process The process ID.
1242 * @param R0Process The ring-0 process handle.
1243 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1244 * this is used instead of the hash table. For
1245 * additional safety it must then be equal to the
1246 * SUPDRVSESSION::ppOsSessionPtr member.
1247 * This can be NULL even if the OS has a session
1248 * pointer.
1249 */
1250PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1251 PSUPDRVSESSION *ppOsSessionPtr)
1252{
1253 PSUPDRVSESSION pCur;
1254 unsigned iHash;
1255
1256 /*
1257 * Validate input.
1258 */
1259 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1260
1261 /*
1262 * Calculate the hash table index and acquire the spinlock.
1263 */
1264 iHash = SUPDRV_SESSION_HASH(Process);
1265
1266 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1267
1268 /*
1269 * If an OS session pointer is provided, always use it.
1270 */
1271 if (ppOsSessionPtr)
1272 {
1273 pCur = *ppOsSessionPtr;
1274 if ( pCur
1275 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1276 || pCur->Process != Process
1277 || pCur->R0Process != R0Process) )
1278 pCur = NULL;
1279 }
1280 else
1281 {
1282 /*
1283 * Otherwise, do the hash table lookup.
1284 */
1285 pCur = pDevExt->apSessionHashTab[iHash];
1286 while ( pCur
1287 && ( pCur->Process != Process
1288 || pCur->R0Process != R0Process) )
1289 pCur = pCur->pCommonNextHash;
1290 }
1291
1292 /*
1293 * Retain the session.
1294 */
1295 if (pCur)
1296 {
1297 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1298 NOREF(cRefs);
1299 Assert(cRefs > 1 && cRefs < _1M);
1300 }
1301
1302 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1303
1304 return pCur;
1305}
1306
1307
1308/**
1309 * Retain a session to make sure it doesn't go away while it is in use.
1310 *
1311 * @returns New reference count on success, UINT32_MAX on failure.
1312 * @param pSession Session data.
1313 */
1314uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1315{
1316 uint32_t cRefs;
1317 AssertPtrReturn(pSession, UINT32_MAX);
1318 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1319
1320 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1321 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1322 return cRefs;
1323}
1324
1325
1326/**
1327 * Releases a given session.
1328 *
1329 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1330 * @param pSession Session data.
1331 */
1332uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1333{
1334 uint32_t cRefs;
1335 AssertPtrReturn(pSession, UINT32_MAX);
1336 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1337
1338 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1339 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1340 if (cRefs == 0)
1341 supdrvDestroySession(pSession->pDevExt, pSession);
1342 return cRefs;
1343}
1344
1345
1346/**
1347 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1348 *
1349 * @returns IPRT status code, see SUPR0ObjAddRef.
1350 * @param hHandleTable The handle table handle. Ignored.
1351 * @param pvObj The object pointer.
1352 * @param pvCtx Context, the handle type. Ignored.
1353 * @param pvUser Session pointer.
1354 */
1355static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1356{
1357 NOREF(pvCtx);
1358 NOREF(hHandleTable);
1359 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1360}
1361
1362
1363/**
1364 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1365 *
1366 * @param hHandleTable The handle table handle. Ignored.
1367 * @param h The handle value. Ignored.
1368 * @param pvObj The object pointer.
1369 * @param pvCtx Context, the handle type. Ignored.
1370 * @param pvUser Session pointer.
1371 */
1372static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1373{
1374 NOREF(pvCtx);
1375 NOREF(h);
1376 NOREF(hHandleTable);
1377 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1378}
1379
1380
1381/**
1382 * Fast path I/O Control worker.
1383 *
1384 * @returns VBox status code that should be passed down to ring-3 unchanged.
1385 * @param uIOCtl Function number.
1386 * @param idCpu VMCPU id.
1387 * @param pDevExt Device extention.
1388 * @param pSession Session data.
1389 */
1390int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1391{
1392 /*
1393 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1394 */
1395 if (RT_LIKELY( RT_VALID_PTR(pSession)
1396 && pSession->pVM
1397 && pDevExt->pfnVMMR0EntryFast))
1398 {
1399 switch (uIOCtl)
1400 {
1401 case SUP_IOCTL_FAST_DO_RAW_RUN:
1402 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1403 break;
1404 case SUP_IOCTL_FAST_DO_HM_RUN:
1405 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1406 break;
1407 case SUP_IOCTL_FAST_DO_NOP:
1408 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1409 break;
1410 default:
1411 return VERR_INTERNAL_ERROR;
1412 }
1413 return VINF_SUCCESS;
1414 }
1415 return VERR_INTERNAL_ERROR;
1416}
1417
1418
1419/**
1420 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1421 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1422 * list, see http://www.kerneldrivers.org/RHEL5.
1423 *
1424 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1425 * @param pszStr String to check
1426 * @param pszChars Character set
1427 */
1428static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1429{
1430 int chCur;
1431 while ((chCur = *pszStr++) != '\0')
1432 {
1433 int ch;
1434 const char *psz = pszChars;
1435 while ((ch = *psz++) != '\0')
1436 if (ch == chCur)
1437 return 1;
1438
1439 }
1440 return 0;
1441}
1442
1443
1444
1445/**
1446 * I/O Control inner worker (tracing reasons).
1447 *
1448 * @returns IPRT status code.
1449 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1450 *
1451 * @param uIOCtl Function number.
1452 * @param pDevExt Device extention.
1453 * @param pSession Session data.
1454 * @param pReqHdr The request header.
1455 */
1456static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1457{
1458 /*
1459 * Validation macros
1460 */
1461#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1462 do { \
1463 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1464 { \
1465 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1466 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1467 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1468 } \
1469 } while (0)
1470
1471#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1472
1473#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1474 do { \
1475 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1476 { \
1477 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1478 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1479 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1480 } \
1481 } while (0)
1482
1483#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1484 do { \
1485 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1486 { \
1487 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1488 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1489 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1490 } \
1491 } while (0)
1492
1493#define REQ_CHECK_EXPR(Name, expr) \
1494 do { \
1495 if (RT_UNLIKELY(!(expr))) \
1496 { \
1497 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1498 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1499 } \
1500 } while (0)
1501
1502#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1503 do { \
1504 if (RT_UNLIKELY(!(expr))) \
1505 { \
1506 OSDBGPRINT( fmt ); \
1507 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1508 } \
1509 } while (0)
1510
1511 /*
1512 * The switch.
1513 */
1514 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1515 {
1516 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1517 {
1518 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1519 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1520 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1521 {
1522 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1523 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1524 return 0;
1525 }
1526
1527#if 0
1528 /*
1529 * Call out to the OS specific code and let it do permission checks on the
1530 * client process.
1531 */
1532 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1533 {
1534 pReq->u.Out.u32Cookie = 0xffffffff;
1535 pReq->u.Out.u32SessionCookie = 0xffffffff;
1536 pReq->u.Out.u32SessionVersion = 0xffffffff;
1537 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1538 pReq->u.Out.pSession = NULL;
1539 pReq->u.Out.cFunctions = 0;
1540 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1541 return 0;
1542 }
1543#endif
1544
1545 /*
1546 * Match the version.
1547 * The current logic is very simple, match the major interface version.
1548 */
1549 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1550 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1551 {
1552 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1553 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1554 pReq->u.Out.u32Cookie = 0xffffffff;
1555 pReq->u.Out.u32SessionCookie = 0xffffffff;
1556 pReq->u.Out.u32SessionVersion = 0xffffffff;
1557 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1558 pReq->u.Out.pSession = NULL;
1559 pReq->u.Out.cFunctions = 0;
1560 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1561 return 0;
1562 }
1563
1564 /*
1565 * Fill in return data and be gone.
1566 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1567 * u32SessionVersion <= u32ReqVersion!
1568 */
1569 /** @todo Somehow validate the client and negotiate a secure cookie... */
1570 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1571 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1572 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1573 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1574 pReq->u.Out.pSession = pSession;
1575 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1576 pReq->Hdr.rc = VINF_SUCCESS;
1577 return 0;
1578 }
1579
1580 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1581 {
1582 /* validate */
1583 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1584 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1585
1586 /* execute */
1587 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1588 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1589 pReq->Hdr.rc = VINF_SUCCESS;
1590 return 0;
1591 }
1592
1593 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1594 {
1595 /* validate */
1596 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1597 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1598 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1599 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1600 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1601
1602 /* execute */
1603 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1604 if (RT_FAILURE(pReq->Hdr.rc))
1605 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1606 return 0;
1607 }
1608
1609 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1610 {
1611 /* validate */
1612 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1613 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1614
1615 /* execute */
1616 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1617 return 0;
1618 }
1619
1620 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1621 {
1622 /* validate */
1623 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1624 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1625
1626 /* execute */
1627 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1628 if (RT_FAILURE(pReq->Hdr.rc))
1629 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1630 return 0;
1631 }
1632
1633 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1634 {
1635 /* validate */
1636 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1637 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1638
1639 /* execute */
1640 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1641 return 0;
1642 }
1643
1644 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1645 {
1646 /* validate */
1647 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1648 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1649 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1650 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1651 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1652 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1653 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1654 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1655 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1656 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1657 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1658
1659 /* execute */
1660 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1661 return 0;
1662 }
1663
1664 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1665 {
1666 /* validate */
1667 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1668 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1669 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1670 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1671 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1672 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1673 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1674 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1675 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1676 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1677 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1678 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1679 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1680 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1681 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1682
1683 if (pReq->u.In.cSymbols)
1684 {
1685 uint32_t i;
1686 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1687 for (i = 0; i < pReq->u.In.cSymbols; i++)
1688 {
1689 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1690 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1691 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1692 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1693 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1694 pReq->u.In.cbStrTab - paSyms[i].offName),
1695 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1696 }
1697 }
1698
1699 /* execute */
1700 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1701 return 0;
1702 }
1703
1704 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1705 {
1706 /* validate */
1707 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1708 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1709
1710 /* execute */
1711 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1712 return 0;
1713 }
1714
1715 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1716 {
1717 /* validate */
1718 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1719 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1720 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1721
1722 /* execute */
1723 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1724 return 0;
1725 }
1726
1727 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1728 {
1729 /* validate */
1730 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1731 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1732 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1733
1734 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1735 {
1736 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1737
1738 /* execute */
1739 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1740 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1741 else
1742 pReq->Hdr.rc = VERR_WRONG_ORDER;
1743 }
1744 else
1745 {
1746 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1747 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1748 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1749 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1750 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1751
1752 /* execute */
1753 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1754 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1755 else
1756 pReq->Hdr.rc = VERR_WRONG_ORDER;
1757 }
1758
1759 if ( RT_FAILURE(pReq->Hdr.rc)
1760 && pReq->Hdr.rc != VERR_INTERRUPTED
1761 && pReq->Hdr.rc != VERR_TIMEOUT)
1762 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1763 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1764 else
1765 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1766 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1767 return 0;
1768 }
1769
1770 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1771 {
1772 /* validate */
1773 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1774 PSUPVMMR0REQHDR pVMMReq;
1775 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1776 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1777
1778 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1779 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1780 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1781 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1782 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1783
1784 /* execute */
1785 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1786 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1787 else
1788 pReq->Hdr.rc = VERR_WRONG_ORDER;
1789
1790 if ( RT_FAILURE(pReq->Hdr.rc)
1791 && pReq->Hdr.rc != VERR_INTERRUPTED
1792 && pReq->Hdr.rc != VERR_TIMEOUT)
1793 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1794 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1795 else
1796 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1797 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1798 return 0;
1799 }
1800
1801 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1802 {
1803 /* validate */
1804 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1805 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1806
1807 /* execute */
1808 pReq->Hdr.rc = VINF_SUCCESS;
1809 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1810 return 0;
1811 }
1812
1813 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1814 {
1815 /* validate */
1816 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1817 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1818 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1819
1820 /* execute */
1821 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1822 if (RT_FAILURE(pReq->Hdr.rc))
1823 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1824 return 0;
1825 }
1826
1827 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1828 {
1829 /* validate */
1830 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1831 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1832
1833 /* execute */
1834 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1835 return 0;
1836 }
1837
1838 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1839 {
1840 /* validate */
1841 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1842 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1843
1844 /* execute */
1845 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1846 if (RT_SUCCESS(pReq->Hdr.rc))
1847 pReq->u.Out.pGipR0 = pDevExt->pGip;
1848 return 0;
1849 }
1850
1851 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1852 {
1853 /* validate */
1854 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1855 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1856
1857 /* execute */
1858 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1859 return 0;
1860 }
1861
1862 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1863 {
1864 /* validate */
1865 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1866 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1867 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1868 || ( VALID_PTR(pReq->u.In.pVMR0)
1869 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1870 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1871 /* execute */
1872 pSession->pVM = pReq->u.In.pVMR0;
1873 pReq->Hdr.rc = VINF_SUCCESS;
1874 return 0;
1875 }
1876
1877 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1878 {
1879 /* validate */
1880 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1881 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1882 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1883 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1884 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1885 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1886 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1887 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1888 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1889
1890 /* execute */
1891 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1892 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1893 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1894 &pReq->u.Out.aPages[0]);
1895 if (RT_FAILURE(pReq->Hdr.rc))
1896 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1897 return 0;
1898 }
1899
1900 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1901 {
1902 /* validate */
1903 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1904 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1905 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1906 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1907 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1908 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1909
1910 /* execute */
1911 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1912 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1913 if (RT_FAILURE(pReq->Hdr.rc))
1914 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1915 return 0;
1916 }
1917
1918 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1919 {
1920 /* validate */
1921 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1922 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1923 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1924 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1925 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1926 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1927 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1928
1929 /* execute */
1930 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1931 return 0;
1932 }
1933
1934 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1935 {
1936 /* validate */
1937 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1938 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1939
1940 /* execute */
1941 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1942 return 0;
1943 }
1944
1945 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1946 {
1947 /* validate */
1948 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1949 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1950 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1951
1952 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1953 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1954 else
1955 {
1956 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1957 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1958 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1959 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1960 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1961 }
1962 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1963
1964 /* execute */
1965 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1966 return 0;
1967 }
1968
1969 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1970 {
1971 /* validate */
1972 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1973 size_t cbStrTab;
1974 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1975 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
1976 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
1977 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
1978 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
1979 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
1980 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
1981 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
1982 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
1983 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
1984 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
1985
1986 /* execute */
1987 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
1988 return 0;
1989 }
1990
1991 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
1992 {
1993 /* validate */
1994 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
1995 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
1996 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
1997
1998 /* execute */
1999 switch (pReq->u.In.uType)
2000 {
2001 case SUP_SEM_TYPE_EVENT:
2002 {
2003 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2004 switch (pReq->u.In.uOp)
2005 {
2006 case SUPSEMOP2_WAIT_MS_REL:
2007 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2008 break;
2009 case SUPSEMOP2_WAIT_NS_ABS:
2010 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2011 break;
2012 case SUPSEMOP2_WAIT_NS_REL:
2013 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2014 break;
2015 case SUPSEMOP2_SIGNAL:
2016 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2017 break;
2018 case SUPSEMOP2_CLOSE:
2019 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2020 break;
2021 case SUPSEMOP2_RESET:
2022 default:
2023 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2024 break;
2025 }
2026 break;
2027 }
2028
2029 case SUP_SEM_TYPE_EVENT_MULTI:
2030 {
2031 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2032 switch (pReq->u.In.uOp)
2033 {
2034 case SUPSEMOP2_WAIT_MS_REL:
2035 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2036 break;
2037 case SUPSEMOP2_WAIT_NS_ABS:
2038 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2039 break;
2040 case SUPSEMOP2_WAIT_NS_REL:
2041 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2042 break;
2043 case SUPSEMOP2_SIGNAL:
2044 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2045 break;
2046 case SUPSEMOP2_CLOSE:
2047 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2048 break;
2049 case SUPSEMOP2_RESET:
2050 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2051 break;
2052 default:
2053 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2054 break;
2055 }
2056 break;
2057 }
2058
2059 default:
2060 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2061 break;
2062 }
2063 return 0;
2064 }
2065
2066 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2067 {
2068 /* validate */
2069 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2070 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2071 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2072
2073 /* execute */
2074 switch (pReq->u.In.uType)
2075 {
2076 case SUP_SEM_TYPE_EVENT:
2077 {
2078 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2079 switch (pReq->u.In.uOp)
2080 {
2081 case SUPSEMOP3_CREATE:
2082 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2083 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2084 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2085 break;
2086 case SUPSEMOP3_GET_RESOLUTION:
2087 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2088 pReq->Hdr.rc = VINF_SUCCESS;
2089 pReq->Hdr.cbOut = sizeof(*pReq);
2090 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2091 break;
2092 default:
2093 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2094 break;
2095 }
2096 break;
2097 }
2098
2099 case SUP_SEM_TYPE_EVENT_MULTI:
2100 {
2101 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2102 switch (pReq->u.In.uOp)
2103 {
2104 case SUPSEMOP3_CREATE:
2105 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2106 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2107 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2108 break;
2109 case SUPSEMOP3_GET_RESOLUTION:
2110 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2111 pReq->Hdr.rc = VINF_SUCCESS;
2112 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2113 break;
2114 default:
2115 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2116 break;
2117 }
2118 break;
2119 }
2120
2121 default:
2122 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2123 break;
2124 }
2125 return 0;
2126 }
2127
2128 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2129 {
2130 /* validate */
2131 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2132 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2133
2134 /* execute */
2135 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2136 if (RT_FAILURE(pReq->Hdr.rc))
2137 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2138 return 0;
2139 }
2140
2141 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2142 {
2143 /* validate */
2144 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2145 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2146
2147 /* execute */
2148 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2149 return 0;
2150 }
2151
2152 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2153 {
2154 /* validate */
2155 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2156
2157 /* execute */
2158 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2159 return 0;
2160 }
2161
2162 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2163 {
2164 /* validate */
2165 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2166 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2167
2168 /* execute */
2169 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2170 return 0;
2171 }
2172
2173 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2174 {
2175 /* validate */
2176 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2177 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2178 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2179 return VERR_INVALID_PARAMETER;
2180
2181 /* execute */
2182 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2183 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2184 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2185 pReq->u.In.szName, pReq->u.In.fFlags);
2186 return 0;
2187 }
2188
2189 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2190 {
2191 /* validate */
2192 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2193 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2194
2195 /* execute */
2196 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2197 return 0;
2198 }
2199
2200 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2201 {
2202 /* validate */
2203 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2204 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2205
2206 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2207 pReqHdr->rc = VINF_SUCCESS;
2208 return 0;
2209 }
2210
2211 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2212 {
2213 /* validate */
2214 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2215 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2216 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2217 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2218
2219 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2220 return 0;
2221 }
2222
2223 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2224 {
2225 /* validate */
2226 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2227
2228 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2229 return 0;
2230 }
2231
2232 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2233 {
2234 /* validate */
2235 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2236 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2237
2238 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pReq);
2239 return 0;
2240 }
2241
2242 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2243 {
2244 /* validate */
2245 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2246 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2247
2248 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pReq);
2249 return 0;
2250 }
2251
2252 default:
2253 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2254 break;
2255 }
2256 return VERR_GENERAL_FAILURE;
2257}
2258
2259
2260/**
2261 * I/O Control inner worker for the restricted operations.
2262 *
2263 * @returns IPRT status code.
2264 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2265 *
2266 * @param uIOCtl Function number.
2267 * @param pDevExt Device extention.
2268 * @param pSession Session data.
2269 * @param pReqHdr The request header.
2270 */
2271static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2272{
2273 /*
2274 * The switch.
2275 */
2276 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2277 {
2278 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2279 {
2280 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2281 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2282 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2283 {
2284 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2285 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2286 return 0;
2287 }
2288
2289 /*
2290 * Match the version.
2291 * The current logic is very simple, match the major interface version.
2292 */
2293 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2294 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2295 {
2296 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2297 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2298 pReq->u.Out.u32Cookie = 0xffffffff;
2299 pReq->u.Out.u32SessionCookie = 0xffffffff;
2300 pReq->u.Out.u32SessionVersion = 0xffffffff;
2301 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2302 pReq->u.Out.pSession = NULL;
2303 pReq->u.Out.cFunctions = 0;
2304 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2305 return 0;
2306 }
2307
2308 /*
2309 * Fill in return data and be gone.
2310 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2311 * u32SessionVersion <= u32ReqVersion!
2312 */
2313 /** @todo Somehow validate the client and negotiate a secure cookie... */
2314 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2315 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2316 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2317 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2318 pReq->u.Out.pSession = pSession;
2319 pReq->u.Out.cFunctions = 0;
2320 pReq->Hdr.rc = VINF_SUCCESS;
2321 return 0;
2322 }
2323
2324 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2325 {
2326 /* validate */
2327 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2328 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2329
2330 /* execute */
2331 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2332 if (RT_FAILURE(pReq->Hdr.rc))
2333 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2334 return 0;
2335 }
2336
2337 default:
2338 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2339 break;
2340 }
2341 return VERR_GENERAL_FAILURE;
2342}
2343
2344
2345/**
2346 * I/O Control worker.
2347 *
2348 * @returns IPRT status code.
2349 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2350 *
2351 * @param uIOCtl Function number.
2352 * @param pDevExt Device extention.
2353 * @param pSession Session data.
2354 * @param pReqHdr The request header.
2355 */
2356int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2357{
2358 int rc;
2359 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2360
2361 /*
2362 * Validate the request.
2363 */
2364 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2365 {
2366 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2367 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2368 return VERR_INVALID_PARAMETER;
2369 }
2370 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2371 || pReqHdr->cbIn < sizeof(*pReqHdr)
2372 || pReqHdr->cbIn > cbReq
2373 || pReqHdr->cbOut < sizeof(*pReqHdr)
2374 || pReqHdr->cbOut > cbReq))
2375 {
2376 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2377 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2378 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2379 return VERR_INVALID_PARAMETER;
2380 }
2381 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2382 {
2383 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2384 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2385 return VERR_INVALID_PARAMETER;
2386 }
2387 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2388 {
2389 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2390 {
2391 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2392 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2393 return VERR_INVALID_PARAMETER;
2394 }
2395 }
2396 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2397 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2398 {
2399 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2400 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2401 return VERR_INVALID_PARAMETER;
2402 }
2403
2404 /*
2405 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2406 */
2407 if (pSession->fUnrestricted)
2408 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2409 else
2410 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2411
2412 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2413 return rc;
2414}
2415
2416
2417/**
2418 * Inter-Driver Communication (IDC) worker.
2419 *
2420 * @returns VBox status code.
2421 * @retval VINF_SUCCESS on success.
2422 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2423 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2424 *
2425 * @param uReq The request (function) code.
2426 * @param pDevExt Device extention.
2427 * @param pSession Session data.
2428 * @param pReqHdr The request header.
2429 */
2430int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2431{
2432 /*
2433 * The OS specific code has already validated the pSession
2434 * pointer, and the request size being greater or equal to
2435 * size of the header.
2436 *
2437 * So, just check that pSession is a kernel context session.
2438 */
2439 if (RT_UNLIKELY( pSession
2440 && pSession->R0Process != NIL_RTR0PROCESS))
2441 return VERR_INVALID_PARAMETER;
2442
2443/*
2444 * Validation macro.
2445 */
2446#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2447 do { \
2448 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2449 { \
2450 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2451 (long)pReqHdr->cb, (long)(cbExpect))); \
2452 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2453 } \
2454 } while (0)
2455
2456 switch (uReq)
2457 {
2458 case SUPDRV_IDC_REQ_CONNECT:
2459 {
2460 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2461 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2462
2463 /*
2464 * Validate the cookie and other input.
2465 */
2466 if (pReq->Hdr.pSession != NULL)
2467 {
2468 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2469 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2470 }
2471 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2472 {
2473 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2474 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2475 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2476 }
2477 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2478 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2479 {
2480 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2481 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2482 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2483 }
2484 if (pSession != NULL)
2485 {
2486 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2487 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2488 }
2489
2490 /*
2491 * Match the version.
2492 * The current logic is very simple, match the major interface version.
2493 */
2494 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2495 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2496 {
2497 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2498 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2499 pReq->u.Out.pSession = NULL;
2500 pReq->u.Out.uSessionVersion = 0xffffffff;
2501 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2502 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2503 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2504 return VINF_SUCCESS;
2505 }
2506
2507 pReq->u.Out.pSession = NULL;
2508 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2509 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2510 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2511
2512 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2513 if (RT_FAILURE(pReq->Hdr.rc))
2514 {
2515 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2516 return VINF_SUCCESS;
2517 }
2518
2519 pReq->u.Out.pSession = pSession;
2520 pReq->Hdr.pSession = pSession;
2521
2522 return VINF_SUCCESS;
2523 }
2524
2525 case SUPDRV_IDC_REQ_DISCONNECT:
2526 {
2527 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2528
2529 supdrvSessionRelease(pSession);
2530 return pReqHdr->rc = VINF_SUCCESS;
2531 }
2532
2533 case SUPDRV_IDC_REQ_GET_SYMBOL:
2534 {
2535 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2536 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2537
2538 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2539 return VINF_SUCCESS;
2540 }
2541
2542 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2543 {
2544 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2545 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2546
2547 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2548 return VINF_SUCCESS;
2549 }
2550
2551 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2552 {
2553 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2554 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2555
2556 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2557 return VINF_SUCCESS;
2558 }
2559
2560 default:
2561 Log(("Unknown IDC %#lx\n", (long)uReq));
2562 break;
2563 }
2564
2565#undef REQ_CHECK_IDC_SIZE
2566 return VERR_NOT_SUPPORTED;
2567}
2568
2569
2570/**
2571 * Register a object for reference counting.
2572 * The object is registered with one reference in the specified session.
2573 *
2574 * @returns Unique identifier on success (pointer).
2575 * All future reference must use this identifier.
2576 * @returns NULL on failure.
2577 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2578 * @param pvUser1 The first user argument.
2579 * @param pvUser2 The second user argument.
2580 */
2581SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2582{
2583 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2584 PSUPDRVOBJ pObj;
2585 PSUPDRVUSAGE pUsage;
2586
2587 /*
2588 * Validate the input.
2589 */
2590 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2591 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2592 AssertPtrReturn(pfnDestructor, NULL);
2593
2594 /*
2595 * Allocate and initialize the object.
2596 */
2597 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2598 if (!pObj)
2599 return NULL;
2600 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2601 pObj->enmType = enmType;
2602 pObj->pNext = NULL;
2603 pObj->cUsage = 1;
2604 pObj->pfnDestructor = pfnDestructor;
2605 pObj->pvUser1 = pvUser1;
2606 pObj->pvUser2 = pvUser2;
2607 pObj->CreatorUid = pSession->Uid;
2608 pObj->CreatorGid = pSession->Gid;
2609 pObj->CreatorProcess= pSession->Process;
2610 supdrvOSObjInitCreator(pObj, pSession);
2611
2612 /*
2613 * Allocate the usage record.
2614 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2615 */
2616 RTSpinlockAcquire(pDevExt->Spinlock);
2617
2618 pUsage = pDevExt->pUsageFree;
2619 if (pUsage)
2620 pDevExt->pUsageFree = pUsage->pNext;
2621 else
2622 {
2623 RTSpinlockRelease(pDevExt->Spinlock);
2624 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2625 if (!pUsage)
2626 {
2627 RTMemFree(pObj);
2628 return NULL;
2629 }
2630 RTSpinlockAcquire(pDevExt->Spinlock);
2631 }
2632
2633 /*
2634 * Insert the object and create the session usage record.
2635 */
2636 /* The object. */
2637 pObj->pNext = pDevExt->pObjs;
2638 pDevExt->pObjs = pObj;
2639
2640 /* The session record. */
2641 pUsage->cUsage = 1;
2642 pUsage->pObj = pObj;
2643 pUsage->pNext = pSession->pUsage;
2644 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2645 pSession->pUsage = pUsage;
2646
2647 RTSpinlockRelease(pDevExt->Spinlock);
2648
2649 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2650 return pObj;
2651}
2652
2653
2654/**
2655 * Increment the reference counter for the object associating the reference
2656 * with the specified session.
2657 *
2658 * @returns IPRT status code.
2659 * @param pvObj The identifier returned by SUPR0ObjRegister().
2660 * @param pSession The session which is referencing the object.
2661 *
2662 * @remarks The caller should not own any spinlocks and must carefully protect
2663 * itself against potential race with the destructor so freed memory
2664 * isn't accessed here.
2665 */
2666SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2667{
2668 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2669}
2670
2671
2672/**
2673 * Increment the reference counter for the object associating the reference
2674 * with the specified session.
2675 *
2676 * @returns IPRT status code.
2677 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2678 * couldn't be allocated. (If you see this you're not doing the right
2679 * thing and it won't ever work reliably.)
2680 *
2681 * @param pvObj The identifier returned by SUPR0ObjRegister().
2682 * @param pSession The session which is referencing the object.
2683 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2684 * first reference to an object in a session with this
2685 * argument set.
2686 *
2687 * @remarks The caller should not own any spinlocks and must carefully protect
2688 * itself against potential race with the destructor so freed memory
2689 * isn't accessed here.
2690 */
2691SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2692{
2693 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2694 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2695 int rc = VINF_SUCCESS;
2696 PSUPDRVUSAGE pUsagePre;
2697 PSUPDRVUSAGE pUsage;
2698
2699 /*
2700 * Validate the input.
2701 * Be ready for the destruction race (someone might be stuck in the
2702 * destructor waiting a lock we own).
2703 */
2704 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2705 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2706 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2707 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2708 VERR_INVALID_PARAMETER);
2709
2710 RTSpinlockAcquire(pDevExt->Spinlock);
2711
2712 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2713 {
2714 RTSpinlockRelease(pDevExt->Spinlock);
2715
2716 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2717 return VERR_WRONG_ORDER;
2718 }
2719
2720 /*
2721 * Preallocate the usage record if we can.
2722 */
2723 pUsagePre = pDevExt->pUsageFree;
2724 if (pUsagePre)
2725 pDevExt->pUsageFree = pUsagePre->pNext;
2726 else if (!fNoBlocking)
2727 {
2728 RTSpinlockRelease(pDevExt->Spinlock);
2729 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2730 if (!pUsagePre)
2731 return VERR_NO_MEMORY;
2732
2733 RTSpinlockAcquire(pDevExt->Spinlock);
2734 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2735 {
2736 RTSpinlockRelease(pDevExt->Spinlock);
2737
2738 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2739 return VERR_WRONG_ORDER;
2740 }
2741 }
2742
2743 /*
2744 * Reference the object.
2745 */
2746 pObj->cUsage++;
2747
2748 /*
2749 * Look for the session record.
2750 */
2751 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2752 {
2753 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2754 if (pUsage->pObj == pObj)
2755 break;
2756 }
2757 if (pUsage)
2758 pUsage->cUsage++;
2759 else if (pUsagePre)
2760 {
2761 /* create a new session record. */
2762 pUsagePre->cUsage = 1;
2763 pUsagePre->pObj = pObj;
2764 pUsagePre->pNext = pSession->pUsage;
2765 pSession->pUsage = pUsagePre;
2766 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2767
2768 pUsagePre = NULL;
2769 }
2770 else
2771 {
2772 pObj->cUsage--;
2773 rc = VERR_TRY_AGAIN;
2774 }
2775
2776 /*
2777 * Put any unused usage record into the free list..
2778 */
2779 if (pUsagePre)
2780 {
2781 pUsagePre->pNext = pDevExt->pUsageFree;
2782 pDevExt->pUsageFree = pUsagePre;
2783 }
2784
2785 RTSpinlockRelease(pDevExt->Spinlock);
2786
2787 return rc;
2788}
2789
2790
2791/**
2792 * Decrement / destroy a reference counter record for an object.
2793 *
2794 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2795 *
2796 * @returns IPRT status code.
2797 * @retval VINF_SUCCESS if not destroyed.
2798 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2799 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2800 * string builds.
2801 *
2802 * @param pvObj The identifier returned by SUPR0ObjRegister().
2803 * @param pSession The session which is referencing the object.
2804 */
2805SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2806{
2807 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2808 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2809 int rc = VERR_INVALID_PARAMETER;
2810 PSUPDRVUSAGE pUsage;
2811 PSUPDRVUSAGE pUsagePrev;
2812
2813 /*
2814 * Validate the input.
2815 */
2816 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2817 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2818 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2819 VERR_INVALID_PARAMETER);
2820
2821 /*
2822 * Acquire the spinlock and look for the usage record.
2823 */
2824 RTSpinlockAcquire(pDevExt->Spinlock);
2825
2826 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2827 pUsage;
2828 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2829 {
2830 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2831 if (pUsage->pObj == pObj)
2832 {
2833 rc = VINF_SUCCESS;
2834 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2835 if (pUsage->cUsage > 1)
2836 {
2837 pObj->cUsage--;
2838 pUsage->cUsage--;
2839 }
2840 else
2841 {
2842 /*
2843 * Free the session record.
2844 */
2845 if (pUsagePrev)
2846 pUsagePrev->pNext = pUsage->pNext;
2847 else
2848 pSession->pUsage = pUsage->pNext;
2849 pUsage->pNext = pDevExt->pUsageFree;
2850 pDevExt->pUsageFree = pUsage;
2851
2852 /* What about the object? */
2853 if (pObj->cUsage > 1)
2854 pObj->cUsage--;
2855 else
2856 {
2857 /*
2858 * Object is to be destroyed, unlink it.
2859 */
2860 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2861 rc = VINF_OBJECT_DESTROYED;
2862 if (pDevExt->pObjs == pObj)
2863 pDevExt->pObjs = pObj->pNext;
2864 else
2865 {
2866 PSUPDRVOBJ pObjPrev;
2867 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2868 if (pObjPrev->pNext == pObj)
2869 {
2870 pObjPrev->pNext = pObj->pNext;
2871 break;
2872 }
2873 Assert(pObjPrev);
2874 }
2875 }
2876 }
2877 break;
2878 }
2879 }
2880
2881 RTSpinlockRelease(pDevExt->Spinlock);
2882
2883 /*
2884 * Call the destructor and free the object if required.
2885 */
2886 if (rc == VINF_OBJECT_DESTROYED)
2887 {
2888 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2889 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2890 if (pObj->pfnDestructor)
2891 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2892 RTMemFree(pObj);
2893 }
2894
2895 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2896 return rc;
2897}
2898
2899
2900/**
2901 * Verifies that the current process can access the specified object.
2902 *
2903 * @returns The following IPRT status code:
2904 * @retval VINF_SUCCESS if access was granted.
2905 * @retval VERR_PERMISSION_DENIED if denied access.
2906 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2907 *
2908 * @param pvObj The identifier returned by SUPR0ObjRegister().
2909 * @param pSession The session which wishes to access the object.
2910 * @param pszObjName Object string name. This is optional and depends on the object type.
2911 *
2912 * @remark The caller is responsible for making sure the object isn't removed while
2913 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2914 */
2915SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2916{
2917 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2918 int rc;
2919
2920 /*
2921 * Validate the input.
2922 */
2923 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2924 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2925 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2926 VERR_INVALID_PARAMETER);
2927
2928 /*
2929 * Check access. (returns true if a decision has been made.)
2930 */
2931 rc = VERR_INTERNAL_ERROR;
2932 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2933 return rc;
2934
2935 /*
2936 * Default policy is to allow the user to access his own
2937 * stuff but nothing else.
2938 */
2939 if (pObj->CreatorUid == pSession->Uid)
2940 return VINF_SUCCESS;
2941 return VERR_PERMISSION_DENIED;
2942}
2943
2944
2945/**
2946 * Lock pages.
2947 *
2948 * @returns IPRT status code.
2949 * @param pSession Session to which the locked memory should be associated.
2950 * @param pvR3 Start of the memory range to lock.
2951 * This must be page aligned.
2952 * @param cPages Number of pages to lock.
2953 * @param paPages Where to put the physical addresses of locked memory.
2954 */
2955SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2956{
2957 int rc;
2958 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2959 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2960 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2961
2962 /*
2963 * Verify input.
2964 */
2965 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2966 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2967 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2968 || !pvR3)
2969 {
2970 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2971 return VERR_INVALID_PARAMETER;
2972 }
2973
2974 /*
2975 * Let IPRT do the job.
2976 */
2977 Mem.eType = MEMREF_TYPE_LOCKED;
2978 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
2979 if (RT_SUCCESS(rc))
2980 {
2981 uint32_t iPage = cPages;
2982 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
2983 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
2984
2985 while (iPage-- > 0)
2986 {
2987 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
2988 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
2989 {
2990 AssertMsgFailed(("iPage=%d\n", iPage));
2991 rc = VERR_INTERNAL_ERROR;
2992 break;
2993 }
2994 }
2995 if (RT_SUCCESS(rc))
2996 rc = supdrvMemAdd(&Mem, pSession);
2997 if (RT_FAILURE(rc))
2998 {
2999 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3000 AssertRC(rc2);
3001 }
3002 }
3003
3004 return rc;
3005}
3006
3007
3008/**
3009 * Unlocks the memory pointed to by pv.
3010 *
3011 * @returns IPRT status code.
3012 * @param pSession Session to which the memory was locked.
3013 * @param pvR3 Memory to unlock.
3014 */
3015SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3016{
3017 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3018 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3019 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3020}
3021
3022
3023/**
3024 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3025 * backing.
3026 *
3027 * @returns IPRT status code.
3028 * @param pSession Session data.
3029 * @param cPages Number of pages to allocate.
3030 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3031 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3032 * @param pHCPhys Where to put the physical address of allocated memory.
3033 */
3034SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3035{
3036 int rc;
3037 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3038 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3039
3040 /*
3041 * Validate input.
3042 */
3043 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3044 if (!ppvR3 || !ppvR0 || !pHCPhys)
3045 {
3046 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3047 pSession, ppvR0, ppvR3, pHCPhys));
3048 return VERR_INVALID_PARAMETER;
3049
3050 }
3051 if (cPages < 1 || cPages >= 256)
3052 {
3053 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3054 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3055 }
3056
3057 /*
3058 * Let IPRT do the job.
3059 */
3060 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3061 if (RT_SUCCESS(rc))
3062 {
3063 int rc2;
3064 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3065 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3066 if (RT_SUCCESS(rc))
3067 {
3068 Mem.eType = MEMREF_TYPE_CONT;
3069 rc = supdrvMemAdd(&Mem, pSession);
3070 if (!rc)
3071 {
3072 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3073 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3074 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3075 return 0;
3076 }
3077
3078 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3079 AssertRC(rc2);
3080 }
3081 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3082 AssertRC(rc2);
3083 }
3084
3085 return rc;
3086}
3087
3088
3089/**
3090 * Frees memory allocated using SUPR0ContAlloc().
3091 *
3092 * @returns IPRT status code.
3093 * @param pSession The session to which the memory was allocated.
3094 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3095 */
3096SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3097{
3098 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3099 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3100 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3101}
3102
3103
3104/**
3105 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3106 *
3107 * The memory isn't zeroed.
3108 *
3109 * @returns IPRT status code.
3110 * @param pSession Session data.
3111 * @param cPages Number of pages to allocate.
3112 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3113 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3114 * @param paPages Where to put the physical addresses of allocated memory.
3115 */
3116SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3117{
3118 unsigned iPage;
3119 int rc;
3120 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3121 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3122
3123 /*
3124 * Validate input.
3125 */
3126 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3127 if (!ppvR3 || !ppvR0 || !paPages)
3128 {
3129 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3130 pSession, ppvR3, ppvR0, paPages));
3131 return VERR_INVALID_PARAMETER;
3132
3133 }
3134 if (cPages < 1 || cPages >= 256)
3135 {
3136 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3137 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3138 }
3139
3140 /*
3141 * Let IPRT do the work.
3142 */
3143 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3144 if (RT_SUCCESS(rc))
3145 {
3146 int rc2;
3147 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3148 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3149 if (RT_SUCCESS(rc))
3150 {
3151 Mem.eType = MEMREF_TYPE_LOW;
3152 rc = supdrvMemAdd(&Mem, pSession);
3153 if (!rc)
3154 {
3155 for (iPage = 0; iPage < cPages; iPage++)
3156 {
3157 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3158 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3159 }
3160 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3161 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3162 return 0;
3163 }
3164
3165 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3166 AssertRC(rc2);
3167 }
3168
3169 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3170 AssertRC(rc2);
3171 }
3172
3173 return rc;
3174}
3175
3176
3177/**
3178 * Frees memory allocated using SUPR0LowAlloc().
3179 *
3180 * @returns IPRT status code.
3181 * @param pSession The session to which the memory was allocated.
3182 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3183 */
3184SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3185{
3186 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3187 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3188 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3189}
3190
3191
3192
3193/**
3194 * Allocates a chunk of memory with both R0 and R3 mappings.
3195 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3196 *
3197 * @returns IPRT status code.
3198 * @param pSession The session to associated the allocation with.
3199 * @param cb Number of bytes to allocate.
3200 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3201 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3202 */
3203SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3204{
3205 int rc;
3206 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3207 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3208
3209 /*
3210 * Validate input.
3211 */
3212 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3213 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3214 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3215 if (cb < 1 || cb >= _4M)
3216 {
3217 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3218 return VERR_INVALID_PARAMETER;
3219 }
3220
3221 /*
3222 * Let IPRT do the work.
3223 */
3224 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3225 if (RT_SUCCESS(rc))
3226 {
3227 int rc2;
3228 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3229 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3230 if (RT_SUCCESS(rc))
3231 {
3232 Mem.eType = MEMREF_TYPE_MEM;
3233 rc = supdrvMemAdd(&Mem, pSession);
3234 if (!rc)
3235 {
3236 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3237 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3238 return VINF_SUCCESS;
3239 }
3240
3241 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3242 AssertRC(rc2);
3243 }
3244
3245 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3246 AssertRC(rc2);
3247 }
3248
3249 return rc;
3250}
3251
3252
3253/**
3254 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3255 *
3256 * @returns IPRT status code.
3257 * @param pSession The session to which the memory was allocated.
3258 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3259 * @param paPages Where to store the physical addresses.
3260 */
3261SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3262{
3263 PSUPDRVBUNDLE pBundle;
3264 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3265
3266 /*
3267 * Validate input.
3268 */
3269 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3270 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3271 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3272
3273 /*
3274 * Search for the address.
3275 */
3276 RTSpinlockAcquire(pSession->Spinlock);
3277 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3278 {
3279 if (pBundle->cUsed > 0)
3280 {
3281 unsigned i;
3282 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3283 {
3284 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3285 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3286 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3287 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3288 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3289 )
3290 )
3291 {
3292 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3293 size_t iPage;
3294 for (iPage = 0; iPage < cPages; iPage++)
3295 {
3296 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3297 paPages[iPage].uReserved = 0;
3298 }
3299 RTSpinlockRelease(pSession->Spinlock);
3300 return VINF_SUCCESS;
3301 }
3302 }
3303 }
3304 }
3305 RTSpinlockRelease(pSession->Spinlock);
3306 Log(("Failed to find %p!!!\n", (void *)uPtr));
3307 return VERR_INVALID_PARAMETER;
3308}
3309
3310
3311/**
3312 * Free memory allocated by SUPR0MemAlloc().
3313 *
3314 * @returns IPRT status code.
3315 * @param pSession The session owning the allocation.
3316 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3317 */
3318SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3319{
3320 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3321 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3322 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3323}
3324
3325
3326/**
3327 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3328 *
3329 * The memory is fixed and it's possible to query the physical addresses using
3330 * SUPR0MemGetPhys().
3331 *
3332 * @returns IPRT status code.
3333 * @param pSession The session to associated the allocation with.
3334 * @param cPages The number of pages to allocate.
3335 * @param fFlags Flags, reserved for the future. Must be zero.
3336 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3337 * NULL if no ring-3 mapping.
3338 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3339 * NULL if no ring-0 mapping.
3340 * @param paPages Where to store the addresses of the pages. Optional.
3341 */
3342SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3343{
3344 int rc;
3345 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3346 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3347
3348 /*
3349 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3350 */
3351 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3352 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3353 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3354 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3355 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3356 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3357 {
3358 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3359 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3360 }
3361
3362 /*
3363 * Let IPRT do the work.
3364 */
3365 if (ppvR0)
3366 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3367 else
3368 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3369 if (RT_SUCCESS(rc))
3370 {
3371 int rc2;
3372 if (ppvR3)
3373 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3374 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3375 else
3376 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3377 if (RT_SUCCESS(rc))
3378 {
3379 Mem.eType = MEMREF_TYPE_PAGE;
3380 rc = supdrvMemAdd(&Mem, pSession);
3381 if (!rc)
3382 {
3383 if (ppvR3)
3384 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3385 if (ppvR0)
3386 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3387 if (paPages)
3388 {
3389 uint32_t iPage = cPages;
3390 while (iPage-- > 0)
3391 {
3392 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3393 Assert(paPages[iPage] != NIL_RTHCPHYS);
3394 }
3395 }
3396 return VINF_SUCCESS;
3397 }
3398
3399 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3400 AssertRC(rc2);
3401 }
3402
3403 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3404 AssertRC(rc2);
3405 }
3406 return rc;
3407}
3408
3409
3410/**
3411 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3412 * space.
3413 *
3414 * @returns IPRT status code.
3415 * @param pSession The session to associated the allocation with.
3416 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3417 * @param offSub Where to start mapping. Must be page aligned.
3418 * @param cbSub How much to map. Must be page aligned.
3419 * @param fFlags Flags, MBZ.
3420 * @param ppvR0 Where to return the address of the ring-0 mapping on
3421 * success.
3422 */
3423SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3424 uint32_t fFlags, PRTR0PTR ppvR0)
3425{
3426 int rc;
3427 PSUPDRVBUNDLE pBundle;
3428 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3429 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3430
3431 /*
3432 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3433 */
3434 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3435 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3436 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3437 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3438 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3439 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3440
3441 /*
3442 * Find the memory object.
3443 */
3444 RTSpinlockAcquire(pSession->Spinlock);
3445 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3446 {
3447 if (pBundle->cUsed > 0)
3448 {
3449 unsigned i;
3450 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3451 {
3452 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3453 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3454 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3455 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3456 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3457 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3458 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3459 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3460 {
3461 hMemObj = pBundle->aMem[i].MemObj;
3462 break;
3463 }
3464 }
3465 }
3466 }
3467 RTSpinlockRelease(pSession->Spinlock);
3468
3469 rc = VERR_INVALID_PARAMETER;
3470 if (hMemObj != NIL_RTR0MEMOBJ)
3471 {
3472 /*
3473 * Do some further input validations before calling IPRT.
3474 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3475 */
3476 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3477 if ( offSub < cbMemObj
3478 && cbSub <= cbMemObj
3479 && offSub + cbSub <= cbMemObj)
3480 {
3481 RTR0MEMOBJ hMapObj;
3482 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3483 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3484 if (RT_SUCCESS(rc))
3485 *ppvR0 = RTR0MemObjAddress(hMapObj);
3486 }
3487 else
3488 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3489
3490 }
3491 return rc;
3492}
3493
3494
3495/**
3496 * Changes the page level protection of one or more pages previously allocated
3497 * by SUPR0PageAllocEx.
3498 *
3499 * @returns IPRT status code.
3500 * @param pSession The session to associated the allocation with.
3501 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3502 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3503 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3504 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3505 * @param offSub Where to start changing. Must be page aligned.
3506 * @param cbSub How much to change. Must be page aligned.
3507 * @param fProt The new page level protection, see RTMEM_PROT_*.
3508 */
3509SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3510{
3511 int rc;
3512 PSUPDRVBUNDLE pBundle;
3513 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3514 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3515 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3516
3517 /*
3518 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3519 */
3520 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3521 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3522 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3523 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3524 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3525
3526 /*
3527 * Find the memory object.
3528 */
3529 RTSpinlockAcquire(pSession->Spinlock);
3530 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3531 {
3532 if (pBundle->cUsed > 0)
3533 {
3534 unsigned i;
3535 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3536 {
3537 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3538 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3539 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3540 || pvR3 == NIL_RTR3PTR)
3541 && ( pvR0 == NIL_RTR0PTR
3542 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3543 && ( pvR3 == NIL_RTR3PTR
3544 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3545 {
3546 if (pvR0 != NIL_RTR0PTR)
3547 hMemObjR0 = pBundle->aMem[i].MemObj;
3548 if (pvR3 != NIL_RTR3PTR)
3549 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3550 break;
3551 }
3552 }
3553 }
3554 }
3555 RTSpinlockRelease(pSession->Spinlock);
3556
3557 rc = VERR_INVALID_PARAMETER;
3558 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3559 || hMemObjR3 != NIL_RTR0MEMOBJ)
3560 {
3561 /*
3562 * Do some further input validations before calling IPRT.
3563 */
3564 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3565 if ( offSub < cbMemObj
3566 && cbSub <= cbMemObj
3567 && offSub + cbSub <= cbMemObj)
3568 {
3569 rc = VINF_SUCCESS;
3570 if (hMemObjR3 != NIL_RTR0PTR)
3571 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3572 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3573 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3574 }
3575 else
3576 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3577
3578 }
3579 return rc;
3580
3581}
3582
3583
3584/**
3585 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3586 *
3587 * @returns IPRT status code.
3588 * @param pSession The session owning the allocation.
3589 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3590 * SUPR0PageAllocEx().
3591 */
3592SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3593{
3594 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3595 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3596 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3597}
3598
3599
3600/**
3601 * Gets the paging mode of the current CPU.
3602 *
3603 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3604 */
3605SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3606{
3607 SUPPAGINGMODE enmMode;
3608
3609 RTR0UINTREG cr0 = ASMGetCR0();
3610 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3611 enmMode = SUPPAGINGMODE_INVALID;
3612 else
3613 {
3614 RTR0UINTREG cr4 = ASMGetCR4();
3615 uint32_t fNXEPlusLMA = 0;
3616 if (cr4 & X86_CR4_PAE)
3617 {
3618 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3619 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3620 {
3621 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3622 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3623 fNXEPlusLMA |= RT_BIT(0);
3624 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3625 fNXEPlusLMA |= RT_BIT(1);
3626 }
3627 }
3628
3629 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3630 {
3631 case 0:
3632 enmMode = SUPPAGINGMODE_32_BIT;
3633 break;
3634
3635 case X86_CR4_PGE:
3636 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3637 break;
3638
3639 case X86_CR4_PAE:
3640 enmMode = SUPPAGINGMODE_PAE;
3641 break;
3642
3643 case X86_CR4_PAE | RT_BIT(0):
3644 enmMode = SUPPAGINGMODE_PAE_NX;
3645 break;
3646
3647 case X86_CR4_PAE | X86_CR4_PGE:
3648 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3649 break;
3650
3651 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3652 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3653 break;
3654
3655 case RT_BIT(1) | X86_CR4_PAE:
3656 enmMode = SUPPAGINGMODE_AMD64;
3657 break;
3658
3659 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3660 enmMode = SUPPAGINGMODE_AMD64_NX;
3661 break;
3662
3663 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3664 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3665 break;
3666
3667 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3668 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3669 break;
3670
3671 default:
3672 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3673 enmMode = SUPPAGINGMODE_INVALID;
3674 break;
3675 }
3676 }
3677 return enmMode;
3678}
3679
3680
3681/**
3682 * Enables or disabled hardware virtualization extensions using native OS APIs.
3683 *
3684 * @returns VBox status code.
3685 * @retval VINF_SUCCESS on success.
3686 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3687 *
3688 * @param fEnable Whether to enable or disable.
3689 */
3690SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3691{
3692#ifdef RT_OS_DARWIN
3693 return supdrvOSEnableVTx(fEnable);
3694#else
3695 return VERR_NOT_SUPPORTED;
3696#endif
3697}
3698
3699
3700/**
3701 * Suspends hardware virtualization extensions using the native OS API.
3702 *
3703 * This is called prior to entering raw-mode context.
3704 *
3705 * @returns @c true if suspended, @c false if not.
3706 */
3707SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3708{
3709#ifdef RT_OS_DARWIN
3710 return supdrvOSSuspendVTxOnCpu();
3711#else
3712 return false;
3713#endif
3714}
3715
3716
3717/**
3718 * Resumes hardware virtualization extensions using the native OS API.
3719 *
3720 * This is called after to entering raw-mode context.
3721 *
3722 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3723 */
3724SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3725{
3726#ifdef RT_OS_DARWIN
3727 supdrvOSResumeVTxOnCpu(fSuspended);
3728#else
3729 Assert(!fSuspended);
3730#endif
3731}
3732
3733
3734/**
3735 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3736 *
3737 * @returns VBox status code.
3738 * @retval VERR_VMX_NO_VMX
3739 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3740 * @retval VERR_VMX_MSR_VMXON_DISABLED
3741 * @retval VERR_VMX_MSR_LOCKING_FAILED
3742 * @retval VERR_SVM_NO_SVM
3743 * @retval VERR_SVM_DISABLED
3744 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3745 * (centaur) CPU.
3746 *
3747 * @param pSession The session handle.
3748 * @param pfCaps Where to store the capabilities.
3749 */
3750SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3751{
3752 int rc = VERR_UNSUPPORTED_CPU;
3753 bool fIsSmxModeAmbiguous = false;
3754 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3755
3756 /*
3757 * Input validation.
3758 */
3759 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3760 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3761
3762 *pfCaps = 0;
3763 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3764 RTThreadPreemptDisable(&PreemptState);
3765 if (ASMHasCpuId())
3766 {
3767 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3768 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3769
3770 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3771 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3772
3773 if ( ASMIsValidStdRange(uMaxId)
3774 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3775 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3776 )
3777 {
3778 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3779 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3780 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3781 )
3782 {
3783 /** @todo Unify code with hmR0InitIntelCpu(). */
3784 uint64_t u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3785 bool const fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3786 bool fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3787 bool fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3788 bool fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3789
3790 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3791 if (fMsrLocked)
3792 {
3793 if (fVmxAllowed && fSmxVmxAllowed)
3794 rc = VINF_SUCCESS;
3795 else if (!fVmxAllowed && !fSmxVmxAllowed)
3796 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3797 else if (!fMaybeSmxMode)
3798 {
3799 if (fVmxAllowed)
3800 rc = VINF_SUCCESS;
3801 else
3802 rc = VERR_VMX_MSR_VMXON_DISABLED;
3803 }
3804 else
3805 {
3806 /*
3807 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3808 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3809 * See @bugref{6873}.
3810 */
3811 Assert(fMaybeSmxMode == true);
3812 fIsSmxModeAmbiguous = true;
3813 rc = VINF_SUCCESS;
3814 }
3815 }
3816 else
3817 {
3818 /*
3819 * MSR is not yet locked; we can change it ourselves here.
3820 * Once the lock bit is set, this MSR can no longer be modified.
3821 *
3822 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3823 * accurately. See @bugref{6873}.
3824 */
3825 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3826 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3827 | MSR_IA32_FEATURE_CONTROL_VMXON;
3828 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3829
3830 /* Verify. */
3831 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3832 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3833 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3834 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3835 if (fSmxVmxAllowed && fVmxAllowed)
3836 rc = VINF_SUCCESS;
3837 else
3838 rc = VERR_VMX_MSR_LOCKING_FAILED;
3839 }
3840
3841 if (rc == VINF_SUCCESS)
3842 {
3843 VMXCAPABILITY vtCaps;
3844
3845 *pfCaps |= SUPVTCAPS_VT_X;
3846
3847 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3848 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3849 {
3850 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3851 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3852 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3853 }
3854 }
3855 }
3856 else
3857 rc = VERR_VMX_NO_VMX;
3858 }
3859 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3860 && ASMIsValidStdRange(uMaxId))
3861 {
3862 uint32_t fExtFeaturesEcx, uExtMaxId;
3863 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3864 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3865 if ( ASMIsValidExtRange(uExtMaxId)
3866 && uExtMaxId >= 0x8000000a
3867 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3868 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3869 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3870 )
3871 {
3872 /* Check if SVM is disabled */
3873 uint64_t u64FeatMsr = ASMRdMsr(MSR_K8_VM_CR);
3874 if (!(u64FeatMsr & MSR_K8_VM_CR_SVM_DISABLE))
3875 {
3876 uint32_t fSvmFeatures;
3877 *pfCaps |= SUPVTCAPS_AMD_V;
3878
3879 /* Query AMD-V features. */
3880 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3881 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3882 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3883
3884 rc = VINF_SUCCESS;
3885 }
3886 else
3887 rc = VERR_SVM_DISABLED;
3888 }
3889 else
3890 rc = VERR_SVM_NO_SVM;
3891 }
3892 }
3893
3894 RTThreadPreemptRestore(&PreemptState);
3895 if (fIsSmxModeAmbiguous)
3896 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3897 return rc;
3898}
3899
3900
3901/**
3902 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3903 * updating.
3904 *
3905 * @param pGip Pointer to the GIP.
3906 * @param pGipCpu The per CPU structure for this CPU.
3907 * @param u64NanoTS The current time.
3908 */
3909static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3910{
3911 pGipCpu->u64TSC = SUPReadTsc() - pGipCpu->u32UpdateIntervalTSC;
3912 pGipCpu->u64NanoTS = u64NanoTS;
3913}
3914
3915
3916/**
3917 * Set the current TSC and NanoTS value for the CPU.
3918 *
3919 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
3920 * @param pvUser1 Pointer to the ring-0 GIP mapping.
3921 * @param pvUser2 Pointer to the variable holding the current time.
3922 */
3923static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3924{
3925 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
3926 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
3927
3928 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
3929 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
3930
3931 NOREF(pvUser2);
3932 NOREF(idCpu);
3933}
3934
3935
3936/**
3937 * Increase the timer freqency on hosts where this is possible (NT).
3938 *
3939 * The idea is that more interrupts is better for us... Also, it's better than
3940 * we increase the timer frequence, because we might end up getting inaccuract
3941 * callbacks if someone else does it.
3942 *
3943 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
3944 */
3945static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
3946{
3947 if (pDevExt->u32SystemTimerGranularityGrant == 0)
3948 {
3949 uint32_t u32SystemResolution;
3950 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3951 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3952 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
3953 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
3954 )
3955 {
3956 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3957 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3958 }
3959 }
3960}
3961
3962
3963/**
3964 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
3965 *
3966 * @param pDevExt Clears u32SystemTimerGranularityGrant.
3967 */
3968static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
3969{
3970 if (pDevExt->u32SystemTimerGranularityGrant)
3971 {
3972 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
3973 AssertRC(rc2);
3974 pDevExt->u32SystemTimerGranularityGrant = 0;
3975 }
3976}
3977
3978
3979/**
3980 * Maps the GIP into userspace and/or get the physical address of the GIP.
3981 *
3982 * @returns IPRT status code.
3983 * @param pSession Session to which the GIP mapping should belong.
3984 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
3985 * @param pHCPhysGip Where to store the physical address. (optional)
3986 *
3987 * @remark There is no reference counting on the mapping, so one call to this function
3988 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
3989 * and remove the session as a GIP user.
3990 */
3991SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
3992{
3993 int rc;
3994 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
3995 RTR3PTR pGipR3 = NIL_RTR3PTR;
3996 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3997 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
3998
3999 /*
4000 * Validate
4001 */
4002 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4003 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
4004 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
4005
4006#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4007 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4008#else
4009 RTSemFastMutexRequest(pDevExt->mtxGip);
4010#endif
4011 if (pDevExt->pGip)
4012 {
4013 /*
4014 * Map it?
4015 */
4016 rc = VINF_SUCCESS;
4017 if (ppGipR3)
4018 {
4019 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4020 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
4021 RTMEM_PROT_READ, RTR0ProcHandleSelf());
4022 if (RT_SUCCESS(rc))
4023 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
4024 }
4025
4026 /*
4027 * Get physical address.
4028 */
4029 if (pHCPhysGip && RT_SUCCESS(rc))
4030 HCPhys = pDevExt->HCPhysGip;
4031
4032 /*
4033 * Reference globally.
4034 */
4035 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4036 {
4037 pSession->fGipReferenced = 1;
4038 pDevExt->cGipUsers++;
4039 if (pDevExt->cGipUsers == 1)
4040 {
4041 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4042 uint64_t u64NanoTS;
4043
4044 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4045
4046 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
4047
4048 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4049 {
4050 unsigned i;
4051 for (i = 0; i < pGipR0->cCpus; i++)
4052 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4053 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4054 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4055 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4056 }
4057
4058 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4059 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
4060 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4061 || RTMpGetOnlineCount() == 1)
4062 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
4063 else
4064 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4065
4066#ifndef DO_NOT_START_GIP
4067 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
4068#endif
4069 rc = VINF_SUCCESS;
4070 }
4071 }
4072 }
4073 else
4074 {
4075 rc = VERR_GENERAL_FAILURE;
4076 Log(("SUPR0GipMap: GIP is not available!\n"));
4077 }
4078#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4079 RTSemMutexRelease(pDevExt->mtxGip);
4080#else
4081 RTSemFastMutexRelease(pDevExt->mtxGip);
4082#endif
4083
4084 /*
4085 * Write returns.
4086 */
4087 if (pHCPhysGip)
4088 *pHCPhysGip = HCPhys;
4089 if (ppGipR3)
4090 *ppGipR3 = pGipR3;
4091
4092#ifdef DEBUG_DARWIN_GIP
4093 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4094#else
4095 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4096#endif
4097 return rc;
4098}
4099
4100
4101/**
4102 * Unmaps any user mapping of the GIP and terminates all GIP access
4103 * from this session.
4104 *
4105 * @returns IPRT status code.
4106 * @param pSession Session to which the GIP mapping should belong.
4107 */
4108SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4109{
4110 int rc = VINF_SUCCESS;
4111 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4112#ifdef DEBUG_DARWIN_GIP
4113 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4114 pSession,
4115 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4116 pSession->GipMapObjR3));
4117#else
4118 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4119#endif
4120 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4121
4122#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4123 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4124#else
4125 RTSemFastMutexRequest(pDevExt->mtxGip);
4126#endif
4127
4128 /*
4129 * Unmap anything?
4130 */
4131 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4132 {
4133 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4134 AssertRC(rc);
4135 if (RT_SUCCESS(rc))
4136 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4137 }
4138
4139 /*
4140 * Dereference global GIP.
4141 */
4142 if (pSession->fGipReferenced && !rc)
4143 {
4144 pSession->fGipReferenced = 0;
4145 if ( pDevExt->cGipUsers > 0
4146 && !--pDevExt->cGipUsers)
4147 {
4148 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4149#ifndef DO_NOT_START_GIP
4150 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4151#endif
4152 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
4153 }
4154 }
4155
4156#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4157 RTSemMutexRelease(pDevExt->mtxGip);
4158#else
4159 RTSemFastMutexRelease(pDevExt->mtxGip);
4160#endif
4161
4162 return rc;
4163}
4164
4165
4166/**
4167 * Gets the GIP pointer.
4168 *
4169 * @returns Pointer to the GIP or NULL.
4170 */
4171SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4172{
4173 return g_pSUPGlobalInfoPage;
4174}
4175
4176
4177/**
4178 * Register a component factory with the support driver.
4179 *
4180 * This is currently restricted to kernel sessions only.
4181 *
4182 * @returns VBox status code.
4183 * @retval VINF_SUCCESS on success.
4184 * @retval VERR_NO_MEMORY if we're out of memory.
4185 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4186 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4187 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4188 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4189 *
4190 * @param pSession The SUPDRV session (must be a ring-0 session).
4191 * @param pFactory Pointer to the component factory registration structure.
4192 *
4193 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4194 */
4195SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4196{
4197 PSUPDRVFACTORYREG pNewReg;
4198 const char *psz;
4199 int rc;
4200
4201 /*
4202 * Validate parameters.
4203 */
4204 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4205 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4206 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4207 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4208 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4209 AssertReturn(psz, VERR_INVALID_PARAMETER);
4210
4211 /*
4212 * Allocate and initialize a new registration structure.
4213 */
4214 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4215 if (pNewReg)
4216 {
4217 pNewReg->pNext = NULL;
4218 pNewReg->pFactory = pFactory;
4219 pNewReg->pSession = pSession;
4220 pNewReg->cchName = psz - &pFactory->szName[0];
4221
4222 /*
4223 * Add it to the tail of the list after checking for prior registration.
4224 */
4225 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4226 if (RT_SUCCESS(rc))
4227 {
4228 PSUPDRVFACTORYREG pPrev = NULL;
4229 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4230 while (pCur && pCur->pFactory != pFactory)
4231 {
4232 pPrev = pCur;
4233 pCur = pCur->pNext;
4234 }
4235 if (!pCur)
4236 {
4237 if (pPrev)
4238 pPrev->pNext = pNewReg;
4239 else
4240 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4241 rc = VINF_SUCCESS;
4242 }
4243 else
4244 rc = VERR_ALREADY_EXISTS;
4245
4246 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4247 }
4248
4249 if (RT_FAILURE(rc))
4250 RTMemFree(pNewReg);
4251 }
4252 else
4253 rc = VERR_NO_MEMORY;
4254 return rc;
4255}
4256
4257
4258/**
4259 * Deregister a component factory.
4260 *
4261 * @returns VBox status code.
4262 * @retval VINF_SUCCESS on success.
4263 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4264 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4265 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4266 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4267 *
4268 * @param pSession The SUPDRV session (must be a ring-0 session).
4269 * @param pFactory Pointer to the component factory registration structure
4270 * previously passed SUPR0ComponentRegisterFactory().
4271 *
4272 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4273 */
4274SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4275{
4276 int rc;
4277
4278 /*
4279 * Validate parameters.
4280 */
4281 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4282 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4283 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4284
4285 /*
4286 * Take the lock and look for the registration record.
4287 */
4288 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4289 if (RT_SUCCESS(rc))
4290 {
4291 PSUPDRVFACTORYREG pPrev = NULL;
4292 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4293 while (pCur && pCur->pFactory != pFactory)
4294 {
4295 pPrev = pCur;
4296 pCur = pCur->pNext;
4297 }
4298 if (pCur)
4299 {
4300 if (!pPrev)
4301 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4302 else
4303 pPrev->pNext = pCur->pNext;
4304
4305 pCur->pNext = NULL;
4306 pCur->pFactory = NULL;
4307 pCur->pSession = NULL;
4308 rc = VINF_SUCCESS;
4309 }
4310 else
4311 rc = VERR_NOT_FOUND;
4312
4313 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4314
4315 RTMemFree(pCur);
4316 }
4317 return rc;
4318}
4319
4320
4321/**
4322 * Queries a component factory.
4323 *
4324 * @returns VBox status code.
4325 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4326 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4327 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4328 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4329 *
4330 * @param pSession The SUPDRV session.
4331 * @param pszName The name of the component factory.
4332 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4333 * @param ppvFactoryIf Where to store the factory interface.
4334 */
4335SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4336{
4337 const char *pszEnd;
4338 size_t cchName;
4339 int rc;
4340
4341 /*
4342 * Validate parameters.
4343 */
4344 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4345
4346 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4347 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4348 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4349 cchName = pszEnd - pszName;
4350
4351 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4352 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4353 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4354
4355 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4356 *ppvFactoryIf = NULL;
4357
4358 /*
4359 * Take the lock and try all factories by this name.
4360 */
4361 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4362 if (RT_SUCCESS(rc))
4363 {
4364 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4365 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4366 while (pCur)
4367 {
4368 if ( pCur->cchName == cchName
4369 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4370 {
4371 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4372 if (pvFactory)
4373 {
4374 *ppvFactoryIf = pvFactory;
4375 rc = VINF_SUCCESS;
4376 break;
4377 }
4378 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4379 }
4380
4381 /* next */
4382 pCur = pCur->pNext;
4383 }
4384
4385 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4386 }
4387 return rc;
4388}
4389
4390
4391/**
4392 * Adds a memory object to the session.
4393 *
4394 * @returns IPRT status code.
4395 * @param pMem Memory tracking structure containing the
4396 * information to track.
4397 * @param pSession The session.
4398 */
4399static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4400{
4401 PSUPDRVBUNDLE pBundle;
4402
4403 /*
4404 * Find free entry and record the allocation.
4405 */
4406 RTSpinlockAcquire(pSession->Spinlock);
4407 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4408 {
4409 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4410 {
4411 unsigned i;
4412 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4413 {
4414 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4415 {
4416 pBundle->cUsed++;
4417 pBundle->aMem[i] = *pMem;
4418 RTSpinlockRelease(pSession->Spinlock);
4419 return VINF_SUCCESS;
4420 }
4421 }
4422 AssertFailed(); /* !!this can't be happening!!! */
4423 }
4424 }
4425 RTSpinlockRelease(pSession->Spinlock);
4426
4427 /*
4428 * Need to allocate a new bundle.
4429 * Insert into the last entry in the bundle.
4430 */
4431 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4432 if (!pBundle)
4433 return VERR_NO_MEMORY;
4434
4435 /* take last entry. */
4436 pBundle->cUsed++;
4437 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4438
4439 /* insert into list. */
4440 RTSpinlockAcquire(pSession->Spinlock);
4441 pBundle->pNext = pSession->Bundle.pNext;
4442 pSession->Bundle.pNext = pBundle;
4443 RTSpinlockRelease(pSession->Spinlock);
4444
4445 return VINF_SUCCESS;
4446}
4447
4448
4449/**
4450 * Releases a memory object referenced by pointer and type.
4451 *
4452 * @returns IPRT status code.
4453 * @param pSession Session data.
4454 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4455 * @param eType Memory type.
4456 */
4457static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4458{
4459 PSUPDRVBUNDLE pBundle;
4460
4461 /*
4462 * Validate input.
4463 */
4464 if (!uPtr)
4465 {
4466 Log(("Illegal address %p\n", (void *)uPtr));
4467 return VERR_INVALID_PARAMETER;
4468 }
4469
4470 /*
4471 * Search for the address.
4472 */
4473 RTSpinlockAcquire(pSession->Spinlock);
4474 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4475 {
4476 if (pBundle->cUsed > 0)
4477 {
4478 unsigned i;
4479 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4480 {
4481 if ( pBundle->aMem[i].eType == eType
4482 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4483 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4484 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4485 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4486 )
4487 {
4488 /* Make a copy of it and release it outside the spinlock. */
4489 SUPDRVMEMREF Mem = pBundle->aMem[i];
4490 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4491 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4492 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4493 RTSpinlockRelease(pSession->Spinlock);
4494
4495 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4496 {
4497 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4498 AssertRC(rc); /** @todo figure out how to handle this. */
4499 }
4500 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4501 {
4502 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4503 AssertRC(rc); /** @todo figure out how to handle this. */
4504 }
4505 return VINF_SUCCESS;
4506 }
4507 }
4508 }
4509 }
4510 RTSpinlockRelease(pSession->Spinlock);
4511 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4512 return VERR_INVALID_PARAMETER;
4513}
4514
4515
4516/**
4517 * Opens an image. If it's the first time it's opened the call must upload
4518 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4519 *
4520 * This is the 1st step of the loading.
4521 *
4522 * @returns IPRT status code.
4523 * @param pDevExt Device globals.
4524 * @param pSession Session data.
4525 * @param pReq The open request.
4526 */
4527static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4528{
4529 int rc;
4530 PSUPDRVLDRIMAGE pImage;
4531 void *pv;
4532 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4533 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4534
4535 /*
4536 * Check if we got an instance of the image already.
4537 */
4538 supdrvLdrLock(pDevExt);
4539 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4540 {
4541 if ( pImage->szName[cchName] == '\0'
4542 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4543 {
4544 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4545 {
4546 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4547 pImage->cUsage++;
4548 pReq->u.Out.pvImageBase = pImage->pvImage;
4549 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4550 pReq->u.Out.fNativeLoader = pImage->fNative;
4551 supdrvLdrAddUsage(pSession, pImage);
4552 supdrvLdrUnlock(pDevExt);
4553 return VINF_SUCCESS;
4554 }
4555 supdrvLdrUnlock(pDevExt);
4556 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4557 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4558 }
4559 }
4560 /* (not found - add it!) */
4561
4562 /*
4563 * Allocate memory.
4564 */
4565 Assert(cchName < sizeof(pImage->szName));
4566 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4567 if (!pv)
4568 {
4569 supdrvLdrUnlock(pDevExt);
4570 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4571 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4572 }
4573
4574 /*
4575 * Setup and link in the LDR stuff.
4576 */
4577 pImage = (PSUPDRVLDRIMAGE)pv;
4578 pImage->pvImage = NULL;
4579 pImage->pvImageAlloc = NULL;
4580 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4581 pImage->cbImageBits = pReq->u.In.cbImageBits;
4582 pImage->cSymbols = 0;
4583 pImage->paSymbols = NULL;
4584 pImage->pachStrTab = NULL;
4585 pImage->cbStrTab = 0;
4586 pImage->pfnModuleInit = NULL;
4587 pImage->pfnModuleTerm = NULL;
4588 pImage->pfnServiceReqHandler = NULL;
4589 pImage->uState = SUP_IOCTL_LDR_OPEN;
4590 pImage->cUsage = 1;
4591 pImage->pDevExt = pDevExt;
4592 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4593
4594 /*
4595 * Try load it using the native loader, if that isn't supported, fall back
4596 * on the older method.
4597 */
4598 pImage->fNative = true;
4599 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4600 if (rc == VERR_NOT_SUPPORTED)
4601 {
4602 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4603 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4604 pImage->fNative = false;
4605 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4606 }
4607 if (RT_FAILURE(rc))
4608 {
4609 supdrvLdrUnlock(pDevExt);
4610 RTMemFree(pImage);
4611 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4612 return rc;
4613 }
4614 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4615
4616 /*
4617 * Link it.
4618 */
4619 pImage->pNext = pDevExt->pLdrImages;
4620 pDevExt->pLdrImages = pImage;
4621
4622 supdrvLdrAddUsage(pSession, pImage);
4623
4624 pReq->u.Out.pvImageBase = pImage->pvImage;
4625 pReq->u.Out.fNeedsLoading = true;
4626 pReq->u.Out.fNativeLoader = pImage->fNative;
4627 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4628
4629 supdrvLdrUnlock(pDevExt);
4630 return VINF_SUCCESS;
4631}
4632
4633
4634/**
4635 * Worker that validates a pointer to an image entrypoint.
4636 *
4637 * @returns IPRT status code.
4638 * @param pDevExt The device globals.
4639 * @param pImage The loader image.
4640 * @param pv The pointer into the image.
4641 * @param fMayBeNull Whether it may be NULL.
4642 * @param pszWhat What is this entrypoint? (for logging)
4643 * @param pbImageBits The image bits prepared by ring-3.
4644 *
4645 * @remarks Will leave the lock on failure.
4646 */
4647static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4648 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4649{
4650 if (!fMayBeNull || pv)
4651 {
4652 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4653 {
4654 supdrvLdrUnlock(pDevExt);
4655 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4656 return VERR_INVALID_PARAMETER;
4657 }
4658
4659 if (pImage->fNative)
4660 {
4661 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4662 if (RT_FAILURE(rc))
4663 {
4664 supdrvLdrUnlock(pDevExt);
4665 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4666 return rc;
4667 }
4668 }
4669 }
4670 return VINF_SUCCESS;
4671}
4672
4673
4674/**
4675 * Loads the image bits.
4676 *
4677 * This is the 2nd step of the loading.
4678 *
4679 * @returns IPRT status code.
4680 * @param pDevExt Device globals.
4681 * @param pSession Session data.
4682 * @param pReq The request.
4683 */
4684static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
4685{
4686 PSUPDRVLDRUSAGE pUsage;
4687 PSUPDRVLDRIMAGE pImage;
4688 int rc;
4689 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
4690
4691 /*
4692 * Find the ldr image.
4693 */
4694 supdrvLdrLock(pDevExt);
4695 pUsage = pSession->pLdrUsage;
4696 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4697 pUsage = pUsage->pNext;
4698 if (!pUsage)
4699 {
4700 supdrvLdrUnlock(pDevExt);
4701 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
4702 return VERR_INVALID_HANDLE;
4703 }
4704 pImage = pUsage->pImage;
4705
4706 /*
4707 * Validate input.
4708 */
4709 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
4710 || pImage->cbImageBits != pReq->u.In.cbImageBits)
4711 {
4712 supdrvLdrUnlock(pDevExt);
4713 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
4714 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
4715 return VERR_INVALID_HANDLE;
4716 }
4717
4718 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
4719 {
4720 unsigned uState = pImage->uState;
4721 supdrvLdrUnlock(pDevExt);
4722 if (uState != SUP_IOCTL_LDR_LOAD)
4723 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
4724 return VERR_ALREADY_LOADED;
4725 }
4726
4727 switch (pReq->u.In.eEPType)
4728 {
4729 case SUPLDRLOADEP_NOTHING:
4730 break;
4731
4732 case SUPLDRLOADEP_VMMR0:
4733 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
4734 if (RT_SUCCESS(rc))
4735 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
4736 if (RT_SUCCESS(rc))
4737 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
4738 if (RT_SUCCESS(rc))
4739 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
4740 if (RT_FAILURE(rc))
4741 return rc;
4742 break;
4743
4744 case SUPLDRLOADEP_SERVICE:
4745 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
4746 if (RT_FAILURE(rc))
4747 return rc;
4748 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
4749 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
4750 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
4751 {
4752 supdrvLdrUnlock(pDevExt);
4753 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
4754 pImage->pvImage, pReq->u.In.cbImageWithTabs,
4755 pReq->u.In.EP.Service.apvReserved[0],
4756 pReq->u.In.EP.Service.apvReserved[1],
4757 pReq->u.In.EP.Service.apvReserved[2]));
4758 return VERR_INVALID_PARAMETER;
4759 }
4760 break;
4761
4762 default:
4763 supdrvLdrUnlock(pDevExt);
4764 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
4765 return VERR_INVALID_PARAMETER;
4766 }
4767
4768 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
4769 if (RT_FAILURE(rc))
4770 return rc;
4771 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
4772 if (RT_FAILURE(rc))
4773 return rc;
4774
4775 /*
4776 * Allocate and copy the tables.
4777 * (No need to do try/except as this is a buffered request.)
4778 */
4779 pImage->cbStrTab = pReq->u.In.cbStrTab;
4780 if (pImage->cbStrTab)
4781 {
4782 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
4783 if (pImage->pachStrTab)
4784 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
4785 else
4786 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
4787 }
4788
4789 pImage->cSymbols = pReq->u.In.cSymbols;
4790 if (RT_SUCCESS(rc) && pImage->cSymbols)
4791 {
4792 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
4793 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
4794 if (pImage->paSymbols)
4795 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
4796 else
4797 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
4798 }
4799
4800 /*
4801 * Copy the bits / complete native loading.
4802 */
4803 if (RT_SUCCESS(rc))
4804 {
4805 pImage->uState = SUP_IOCTL_LDR_LOAD;
4806 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
4807 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
4808
4809 if (pImage->fNative)
4810 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
4811 else
4812 {
4813 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
4814 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
4815 }
4816 }
4817
4818 /*
4819 * Update any entry points.
4820 */
4821 if (RT_SUCCESS(rc))
4822 {
4823 switch (pReq->u.In.eEPType)
4824 {
4825 default:
4826 case SUPLDRLOADEP_NOTHING:
4827 rc = VINF_SUCCESS;
4828 break;
4829 case SUPLDRLOADEP_VMMR0:
4830 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
4831 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
4832 break;
4833 case SUPLDRLOADEP_SERVICE:
4834 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
4835 rc = VINF_SUCCESS;
4836 break;
4837 }
4838 }
4839
4840 /*
4841 * On success call the module initialization.
4842 */
4843 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
4844 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
4845 {
4846 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
4847 pDevExt->pLdrInitImage = pImage;
4848 pDevExt->hLdrInitThread = RTThreadNativeSelf();
4849 rc = pImage->pfnModuleInit(pImage);
4850 pDevExt->pLdrInitImage = NULL;
4851 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
4852 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
4853 supdrvLdrUnsetVMMR0EPs(pDevExt);
4854 }
4855 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
4856
4857 if (RT_FAILURE(rc))
4858 {
4859 /* Inform the tracing component in case ModuleInit registered TPs. */
4860 supdrvTracerModuleUnloading(pDevExt, pImage);
4861
4862 pImage->uState = SUP_IOCTL_LDR_OPEN;
4863 pImage->pfnModuleInit = NULL;
4864 pImage->pfnModuleTerm = NULL;
4865 pImage->pfnServiceReqHandler= NULL;
4866 pImage->cbStrTab = 0;
4867 RTMemFree(pImage->pachStrTab);
4868 pImage->pachStrTab = NULL;
4869 RTMemFree(pImage->paSymbols);
4870 pImage->paSymbols = NULL;
4871 pImage->cSymbols = 0;
4872 }
4873
4874 supdrvLdrUnlock(pDevExt);
4875 return rc;
4876}
4877
4878
4879/**
4880 * Frees a previously loaded (prep'ed) image.
4881 *
4882 * @returns IPRT status code.
4883 * @param pDevExt Device globals.
4884 * @param pSession Session data.
4885 * @param pReq The request.
4886 */
4887static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
4888{
4889 int rc;
4890 PSUPDRVLDRUSAGE pUsagePrev;
4891 PSUPDRVLDRUSAGE pUsage;
4892 PSUPDRVLDRIMAGE pImage;
4893 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
4894
4895 /*
4896 * Find the ldr image.
4897 */
4898 supdrvLdrLock(pDevExt);
4899 pUsagePrev = NULL;
4900 pUsage = pSession->pLdrUsage;
4901 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4902 {
4903 pUsagePrev = pUsage;
4904 pUsage = pUsage->pNext;
4905 }
4906 if (!pUsage)
4907 {
4908 supdrvLdrUnlock(pDevExt);
4909 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
4910 return VERR_INVALID_HANDLE;
4911 }
4912
4913 /*
4914 * Check if we can remove anything.
4915 */
4916 rc = VINF_SUCCESS;
4917 pImage = pUsage->pImage;
4918 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
4919 {
4920 /*
4921 * Check if there are any objects with destructors in the image, if
4922 * so leave it for the session cleanup routine so we get a chance to
4923 * clean things up in the right order and not leave them all dangling.
4924 */
4925 RTSpinlockAcquire(pDevExt->Spinlock);
4926 if (pImage->cUsage <= 1)
4927 {
4928 PSUPDRVOBJ pObj;
4929 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
4930 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4931 {
4932 rc = VERR_DANGLING_OBJECTS;
4933 break;
4934 }
4935 }
4936 else
4937 {
4938 PSUPDRVUSAGE pGenUsage;
4939 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
4940 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4941 {
4942 rc = VERR_DANGLING_OBJECTS;
4943 break;
4944 }
4945 }
4946 RTSpinlockRelease(pDevExt->Spinlock);
4947 if (rc == VINF_SUCCESS)
4948 {
4949 /* unlink it */
4950 if (pUsagePrev)
4951 pUsagePrev->pNext = pUsage->pNext;
4952 else
4953 pSession->pLdrUsage = pUsage->pNext;
4954
4955 /* free it */
4956 pUsage->pImage = NULL;
4957 pUsage->pNext = NULL;
4958 RTMemFree(pUsage);
4959
4960 /*
4961 * Dereference the image.
4962 */
4963 if (pImage->cUsage <= 1)
4964 supdrvLdrFree(pDevExt, pImage);
4965 else
4966 pImage->cUsage--;
4967 }
4968 else
4969 {
4970 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
4971 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
4972 }
4973 }
4974 else
4975 {
4976 /*
4977 * Dereference both image and usage.
4978 */
4979 pImage->cUsage--;
4980 pUsage->cUsage--;
4981 }
4982
4983 supdrvLdrUnlock(pDevExt);
4984 return rc;
4985}
4986
4987
4988/**
4989 * Gets the address of a symbol in an open image.
4990 *
4991 * @returns IPRT status code.
4992 * @param pDevExt Device globals.
4993 * @param pSession Session data.
4994 * @param pReq The request buffer.
4995 */
4996static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
4997{
4998 PSUPDRVLDRIMAGE pImage;
4999 PSUPDRVLDRUSAGE pUsage;
5000 uint32_t i;
5001 PSUPLDRSYM paSyms;
5002 const char *pchStrings;
5003 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
5004 void *pvSymbol = NULL;
5005 int rc = VERR_GENERAL_FAILURE;
5006 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
5007
5008 /*
5009 * Find the ldr image.
5010 */
5011 supdrvLdrLock(pDevExt);
5012 pUsage = pSession->pLdrUsage;
5013 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5014 pUsage = pUsage->pNext;
5015 if (!pUsage)
5016 {
5017 supdrvLdrUnlock(pDevExt);
5018 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5019 return VERR_INVALID_HANDLE;
5020 }
5021 pImage = pUsage->pImage;
5022 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5023 {
5024 unsigned uState = pImage->uState;
5025 supdrvLdrUnlock(pDevExt);
5026 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5027 return VERR_ALREADY_LOADED;
5028 }
5029
5030 /*
5031 * Search the symbol strings.
5032 *
5033 * Note! The int32_t is for native loading on solaris where the data
5034 * and text segments are in very different places.
5035 */
5036 pchStrings = pImage->pachStrTab;
5037 paSyms = pImage->paSymbols;
5038 for (i = 0; i < pImage->cSymbols; i++)
5039 {
5040 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5041 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5042 {
5043 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5044 rc = VINF_SUCCESS;
5045 break;
5046 }
5047 }
5048 supdrvLdrUnlock(pDevExt);
5049 pReq->u.Out.pvSymbol = pvSymbol;
5050 return rc;
5051}
5052
5053
5054/**
5055 * Gets the address of a symbol in an open image or the support driver.
5056 *
5057 * @returns VINF_SUCCESS on success.
5058 * @returns
5059 * @param pDevExt Device globals.
5060 * @param pSession Session data.
5061 * @param pReq The request buffer.
5062 */
5063static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5064{
5065 int rc = VINF_SUCCESS;
5066 const char *pszSymbol = pReq->u.In.pszSymbol;
5067 const char *pszModule = pReq->u.In.pszModule;
5068 size_t cbSymbol;
5069 char const *pszEnd;
5070 uint32_t i;
5071
5072 /*
5073 * Input validation.
5074 */
5075 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5076 pszEnd = RTStrEnd(pszSymbol, 512);
5077 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5078 cbSymbol = pszEnd - pszSymbol + 1;
5079
5080 if (pszModule)
5081 {
5082 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5083 pszEnd = RTStrEnd(pszModule, 64);
5084 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5085 }
5086 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5087
5088
5089 if ( !pszModule
5090 || !strcmp(pszModule, "SupDrv"))
5091 {
5092 /*
5093 * Search the support driver export table.
5094 */
5095 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5096 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5097 {
5098 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5099 break;
5100 }
5101 }
5102 else
5103 {
5104 /*
5105 * Find the loader image.
5106 */
5107 PSUPDRVLDRIMAGE pImage;
5108
5109 supdrvLdrLock(pDevExt);
5110
5111 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5112 if (!strcmp(pImage->szName, pszModule))
5113 break;
5114 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5115 {
5116 /*
5117 * Search the symbol strings.
5118 */
5119 const char *pchStrings = pImage->pachStrTab;
5120 PCSUPLDRSYM paSyms = pImage->paSymbols;
5121 for (i = 0; i < pImage->cSymbols; i++)
5122 {
5123 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5124 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5125 {
5126 /*
5127 * Found it! Calc the symbol address and add a reference to the module.
5128 */
5129 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5130 rc = supdrvLdrAddUsage(pSession, pImage);
5131 break;
5132 }
5133 }
5134 }
5135 else
5136 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5137
5138 supdrvLdrUnlock(pDevExt);
5139 }
5140 return rc;
5141}
5142
5143
5144/**
5145 * Updates the VMMR0 entry point pointers.
5146 *
5147 * @returns IPRT status code.
5148 * @param pDevExt Device globals.
5149 * @param pSession Session data.
5150 * @param pVMMR0 VMMR0 image handle.
5151 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5152 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5153 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5154 * @remark Caller must own the loader mutex.
5155 */
5156static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5157{
5158 int rc = VINF_SUCCESS;
5159 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5160
5161
5162 /*
5163 * Check if not yet set.
5164 */
5165 if (!pDevExt->pvVMMR0)
5166 {
5167 pDevExt->pvVMMR0 = pvVMMR0;
5168 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5169 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5170 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5171 }
5172 else
5173 {
5174 /*
5175 * Return failure or success depending on whether the values match or not.
5176 */
5177 if ( pDevExt->pvVMMR0 != pvVMMR0
5178 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5179 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5180 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5181 {
5182 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5183 rc = VERR_INVALID_PARAMETER;
5184 }
5185 }
5186 return rc;
5187}
5188
5189
5190/**
5191 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5192 *
5193 * @param pDevExt Device globals.
5194 */
5195static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5196{
5197 pDevExt->pvVMMR0 = NULL;
5198 pDevExt->pfnVMMR0EntryInt = NULL;
5199 pDevExt->pfnVMMR0EntryFast = NULL;
5200 pDevExt->pfnVMMR0EntryEx = NULL;
5201}
5202
5203
5204/**
5205 * Adds a usage reference in the specified session of an image.
5206 *
5207 * Called while owning the loader semaphore.
5208 *
5209 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5210 * @param pSession Session in question.
5211 * @param pImage Image which the session is using.
5212 */
5213static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5214{
5215 PSUPDRVLDRUSAGE pUsage;
5216 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5217
5218 /*
5219 * Referenced it already?
5220 */
5221 pUsage = pSession->pLdrUsage;
5222 while (pUsage)
5223 {
5224 if (pUsage->pImage == pImage)
5225 {
5226 pUsage->cUsage++;
5227 return VINF_SUCCESS;
5228 }
5229 pUsage = pUsage->pNext;
5230 }
5231
5232 /*
5233 * Allocate new usage record.
5234 */
5235 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5236 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5237 pUsage->cUsage = 1;
5238 pUsage->pImage = pImage;
5239 pUsage->pNext = pSession->pLdrUsage;
5240 pSession->pLdrUsage = pUsage;
5241 return VINF_SUCCESS;
5242}
5243
5244
5245/**
5246 * Frees a load image.
5247 *
5248 * @param pDevExt Pointer to device extension.
5249 * @param pImage Pointer to the image we're gonna free.
5250 * This image must exit!
5251 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5252 */
5253static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5254{
5255 PSUPDRVLDRIMAGE pImagePrev;
5256 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5257
5258 /* find it - arg. should've used doubly linked list. */
5259 Assert(pDevExt->pLdrImages);
5260 pImagePrev = NULL;
5261 if (pDevExt->pLdrImages != pImage)
5262 {
5263 pImagePrev = pDevExt->pLdrImages;
5264 while (pImagePrev->pNext != pImage)
5265 pImagePrev = pImagePrev->pNext;
5266 Assert(pImagePrev->pNext == pImage);
5267 }
5268
5269 /* unlink */
5270 if (pImagePrev)
5271 pImagePrev->pNext = pImage->pNext;
5272 else
5273 pDevExt->pLdrImages = pImage->pNext;
5274
5275 /* check if this is VMMR0.r0 unset its entry point pointers. */
5276 if (pDevExt->pvVMMR0 == pImage->pvImage)
5277 supdrvLdrUnsetVMMR0EPs(pDevExt);
5278
5279 /* check for objects with destructors in this image. (Shouldn't happen.) */
5280 if (pDevExt->pObjs)
5281 {
5282 unsigned cObjs = 0;
5283 PSUPDRVOBJ pObj;
5284 RTSpinlockAcquire(pDevExt->Spinlock);
5285 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5286 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5287 {
5288 pObj->pfnDestructor = NULL;
5289 cObjs++;
5290 }
5291 RTSpinlockRelease(pDevExt->Spinlock);
5292 if (cObjs)
5293 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5294 }
5295
5296 /* call termination function if fully loaded. */
5297 if ( pImage->pfnModuleTerm
5298 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5299 {
5300 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5301 pImage->pfnModuleTerm(pImage);
5302 }
5303
5304 /* Inform the tracing component. */
5305 supdrvTracerModuleUnloading(pDevExt, pImage);
5306
5307 /* do native unload if appropriate. */
5308 if (pImage->fNative)
5309 supdrvOSLdrUnload(pDevExt, pImage);
5310
5311 /* free the image */
5312 pImage->cUsage = 0;
5313 pImage->pDevExt = NULL;
5314 pImage->pNext = NULL;
5315 pImage->uState = SUP_IOCTL_LDR_FREE;
5316 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5317 pImage->pvImageAlloc = NULL;
5318 RTMemFree(pImage->pachStrTab);
5319 pImage->pachStrTab = NULL;
5320 RTMemFree(pImage->paSymbols);
5321 pImage->paSymbols = NULL;
5322 RTMemFree(pImage);
5323}
5324
5325
5326/**
5327 * Acquires the loader lock.
5328 *
5329 * @returns IPRT status code.
5330 * @param pDevExt The device extension.
5331 */
5332DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5333{
5334#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5335 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5336#else
5337 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5338#endif
5339 AssertRC(rc);
5340 return rc;
5341}
5342
5343
5344/**
5345 * Releases the loader lock.
5346 *
5347 * @returns IPRT status code.
5348 * @param pDevExt The device extension.
5349 */
5350DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5351{
5352#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5353 return RTSemMutexRelease(pDevExt->mtxLdr);
5354#else
5355 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5356#endif
5357}
5358
5359
5360/**
5361 * Implements the service call request.
5362 *
5363 * @returns VBox status code.
5364 * @param pDevExt The device extension.
5365 * @param pSession The calling session.
5366 * @param pReq The request packet, valid.
5367 */
5368static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5369{
5370#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5371 int rc;
5372
5373 /*
5374 * Find the module first in the module referenced by the calling session.
5375 */
5376 rc = supdrvLdrLock(pDevExt);
5377 if (RT_SUCCESS(rc))
5378 {
5379 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5380 PSUPDRVLDRUSAGE pUsage;
5381
5382 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5383 if ( pUsage->pImage->pfnServiceReqHandler
5384 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5385 {
5386 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5387 break;
5388 }
5389 supdrvLdrUnlock(pDevExt);
5390
5391 if (pfnServiceReqHandler)
5392 {
5393 /*
5394 * Call it.
5395 */
5396 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5397 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5398 else
5399 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5400 }
5401 else
5402 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5403 }
5404
5405 /* log it */
5406 if ( RT_FAILURE(rc)
5407 && rc != VERR_INTERRUPTED
5408 && rc != VERR_TIMEOUT)
5409 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5410 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5411 else
5412 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5413 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5414 return rc;
5415#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5416 return VERR_NOT_IMPLEMENTED;
5417#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5418}
5419
5420
5421/**
5422 * Implements the logger settings request.
5423 *
5424 * @returns VBox status code.
5425 * @param pDevExt The device extension.
5426 * @param pSession The caller's session.
5427 * @param pReq The request.
5428 */
5429static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5430{
5431 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5432 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5433 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5434 PRTLOGGER pLogger = NULL;
5435 int rc;
5436
5437 /*
5438 * Some further validation.
5439 */
5440 switch (pReq->u.In.fWhat)
5441 {
5442 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5443 case SUPLOGGERSETTINGS_WHAT_CREATE:
5444 break;
5445
5446 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5447 if (*pszGroup || *pszFlags || *pszDest)
5448 return VERR_INVALID_PARAMETER;
5449 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5450 return VERR_ACCESS_DENIED;
5451 break;
5452
5453 default:
5454 return VERR_INTERNAL_ERROR;
5455 }
5456
5457 /*
5458 * Get the logger.
5459 */
5460 switch (pReq->u.In.fWhich)
5461 {
5462 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5463 pLogger = RTLogGetDefaultInstance();
5464 break;
5465
5466 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5467 pLogger = RTLogRelDefaultInstance();
5468 break;
5469
5470 default:
5471 return VERR_INTERNAL_ERROR;
5472 }
5473
5474 /*
5475 * Do the job.
5476 */
5477 switch (pReq->u.In.fWhat)
5478 {
5479 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5480 if (pLogger)
5481 {
5482 rc = RTLogFlags(pLogger, pszFlags);
5483 if (RT_SUCCESS(rc))
5484 rc = RTLogGroupSettings(pLogger, pszGroup);
5485 NOREF(pszDest);
5486 }
5487 else
5488 rc = VERR_NOT_FOUND;
5489 break;
5490
5491 case SUPLOGGERSETTINGS_WHAT_CREATE:
5492 {
5493 if (pLogger)
5494 rc = VERR_ALREADY_EXISTS;
5495 else
5496 {
5497 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5498
5499 rc = RTLogCreate(&pLogger,
5500 0 /* fFlags */,
5501 pszGroup,
5502 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5503 ? "VBOX_LOG"
5504 : "VBOX_RELEASE_LOG",
5505 RT_ELEMENTS(s_apszGroups),
5506 s_apszGroups,
5507 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5508 NULL);
5509 if (RT_SUCCESS(rc))
5510 {
5511 rc = RTLogFlags(pLogger, pszFlags);
5512 NOREF(pszDest);
5513 if (RT_SUCCESS(rc))
5514 {
5515 switch (pReq->u.In.fWhich)
5516 {
5517 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5518 pLogger = RTLogSetDefaultInstance(pLogger);
5519 break;
5520 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5521 pLogger = RTLogRelSetDefaultInstance(pLogger);
5522 break;
5523 }
5524 }
5525 RTLogDestroy(pLogger);
5526 }
5527 }
5528 break;
5529 }
5530
5531 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5532 switch (pReq->u.In.fWhich)
5533 {
5534 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5535 pLogger = RTLogSetDefaultInstance(NULL);
5536 break;
5537 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5538 pLogger = RTLogRelSetDefaultInstance(NULL);
5539 break;
5540 }
5541 rc = RTLogDestroy(pLogger);
5542 break;
5543
5544 default:
5545 {
5546 rc = VERR_INTERNAL_ERROR;
5547 break;
5548 }
5549 }
5550
5551 return rc;
5552}
5553
5554
5555/**
5556 * Implements the MSR prober operations.
5557 *
5558 * @returns VBox status code.
5559 * @param pDevExt The device extension.
5560 * @param pReq The request.
5561 */
5562static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5563{
5564#ifdef SUPDRV_WITH_MSR_PROBER
5565 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5566 int rc;
5567
5568 switch (pReq->u.In.enmOp)
5569 {
5570 case SUPMSRPROBEROP_READ:
5571 {
5572 uint64_t uValue;
5573 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5574 if (RT_SUCCESS(rc))
5575 {
5576 pReq->u.Out.uResults.Read.uValue = uValue;
5577 pReq->u.Out.uResults.Read.fGp = false;
5578 }
5579 else if (rc == VERR_ACCESS_DENIED)
5580 {
5581 pReq->u.Out.uResults.Read.uValue = 0;
5582 pReq->u.Out.uResults.Read.fGp = true;
5583 rc = VINF_SUCCESS;
5584 }
5585 break;
5586 }
5587
5588 case SUPMSRPROBEROP_WRITE:
5589 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5590 if (RT_SUCCESS(rc))
5591 pReq->u.Out.uResults.Write.fGp = false;
5592 else if (rc == VERR_ACCESS_DENIED)
5593 {
5594 pReq->u.Out.uResults.Write.fGp = true;
5595 rc = VINF_SUCCESS;
5596 }
5597 break;
5598
5599 case SUPMSRPROBEROP_MODIFY:
5600 case SUPMSRPROBEROP_MODIFY_FASTER:
5601 rc = supdrvOSMsrProberModify(idCpu, pReq);
5602 break;
5603
5604 default:
5605 return VERR_INVALID_FUNCTION;
5606 }
5607 return rc;
5608#else
5609 return VERR_NOT_IMPLEMENTED;
5610#endif
5611}
5612
5613
5614/**
5615 * Returns whether the host CPU sports an invariant TSC or not.
5616 *
5617 * @returns true if invariant TSC is supported, false otherwise.
5618 */
5619static bool supdrvIsInvariantTsc(void)
5620{
5621 static bool s_fQueried = false;
5622 static bool s_fIsInvariantTsc = false;
5623 if (!s_fQueried)
5624 {
5625 uint32_t uEax, uEbx, uEcx, uEdx;
5626 ASMCpuId(0x80000000, &uEax, &uEbx, &uEcx, &uEdx);
5627 if (uEax >= 0x80000007)
5628 {
5629 ASMCpuId(0x80000007, &uEax, &uEbx, &uEcx, &uEdx);
5630 if (uEdx & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
5631 s_fIsInvariantTsc = true;
5632 }
5633 s_fQueried = true;
5634 }
5635
5636 return s_fIsInvariantTsc;
5637}
5638
5639
5640#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5641/**
5642 * Switches the TSC-delta measurement thread into the butchered state.
5643 *
5644 * @returns VBox status code.
5645 * @param pDevExt Pointer to the device instance data.
5646 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5647 * @param pszFailed An error message to log.
5648 * @param rcFailed The error code to exit the thread with.
5649 */
5650static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5651{
5652 if (!fSpinlockHeld)
5653 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5654
5655 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Butchered;
5656 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5657 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5658 return rcFailed;
5659}
5660
5661
5662/**
5663 * The TSC-delta measurement thread.
5664 *
5665 * @returns VBox status code.
5666 * @param hThread The thread handle.
5667 * @param pvUser Opaque pointer to the device instance data.
5668 */
5669static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
5670{
5671 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
5672 static uint32_t cTimesMeasured = 0;
5673 uint32_t cConsecutiveTimeouts = 0;
5674 int rc = VERR_INTERNAL_ERROR_2;
5675 for (;;)
5676 {
5677 /*
5678 * Switch on the current state.
5679 */
5680 SUPDRVTSCDELTASTATE enmState;
5681 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5682 enmState = pDevExt->enmTscDeltaState;
5683 switch (enmState)
5684 {
5685 case kSupDrvTscDeltaState_Creating:
5686 {
5687 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5688 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5689 if (RT_FAILURE(rc))
5690 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5691 /* fall thru */
5692 }
5693
5694 case kSupDrvTscDeltaState_Listening:
5695 {
5696 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5697
5698 /* Simple adaptive timeout. */
5699 if (cConsecutiveTimeouts++ == 10)
5700 {
5701 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
5702 pDevExt->cMsTscDeltaTimeout = 10;
5703 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
5704 pDevExt->cMsTscDeltaTimeout = 100;
5705 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
5706 pDevExt->cMsTscDeltaTimeout = 500;
5707 cConsecutiveTimeouts = 0;
5708 }
5709 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
5710 if ( RT_FAILURE(rc)
5711 && rc != VERR_TIMEOUT)
5712 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
5713 break;
5714 }
5715
5716 case kSupDrvTscDeltaState_WaitAndMeasure:
5717 {
5718 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Measuring;
5719 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5720 if (RT_FAILURE(rc))
5721 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5722 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5723 pDevExt->cMsTscDeltaTimeout = 1;
5724 RTThreadSleep(10);
5725 /* fall thru */
5726 }
5727
5728 case kSupDrvTscDeltaState_Measuring:
5729 {
5730 cConsecutiveTimeouts = 0;
5731 if (!cTimesMeasured++)
5732 {
5733 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5734 RTCpuSetCopy(&pDevExt->TscDeltaObtainedCpuSet, &pDevExt->pGip->OnlineCpuSet);
5735 }
5736 else
5737 {
5738 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5739 unsigned iCpu;
5740
5741 if (cTimesMeasured == UINT32_MAX)
5742 cTimesMeasured = 1;
5743
5744 /* Measure TSC-deltas only for the CPUs that are in the set. */
5745 rc = VINF_SUCCESS;
5746 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5747 {
5748 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
5749 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
5750 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
5751 {
5752 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
5753 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
5754 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
5755 RTCpuSetAdd(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->idCpu);
5756 }
5757 }
5758 }
5759 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5760 if (pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
5761 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5762 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5763 pDevExt->rcTscDelta = rc;
5764 break;
5765 }
5766
5767 case kSupDrvTscDeltaState_Terminating:
5768 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5769 return VINF_SUCCESS;
5770
5771 case kSupDrvTscDeltaState_Butchered:
5772 default:
5773 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
5774 }
5775 }
5776
5777 return rc;
5778}
5779
5780
5781/**
5782 * Waits for the TSC-delta measurement thread to respond to a state change.
5783 *
5784 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
5785 * other error code on internal error.
5786 *
5787 * @param pThis Pointer to the grant service instance data.
5788 * @param enmCurState The current state.
5789 * @param enmNewState The new state we're waiting for it to enter.
5790 */
5791static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTASTATE enmCurState, SUPDRVTSCDELTASTATE enmNewState)
5792{
5793 /*
5794 * Wait a short while for the expected state transition.
5795 */
5796 int rc;
5797 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
5798 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5799 if (pDevExt->enmTscDeltaState == enmNewState)
5800 {
5801 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5802 rc = VINF_SUCCESS;
5803 }
5804 else if (pDevExt->enmTscDeltaState == enmCurState)
5805 {
5806 /*
5807 * Wait longer if the state has not yet transitioned to the one we want.
5808 */
5809 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5810 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
5811 if ( RT_SUCCESS(rc)
5812 || rc == VERR_TIMEOUT)
5813 {
5814 /*
5815 * Check the state whether we've succeeded.
5816 */
5817 SUPDRVTSCDELTASTATE enmState;
5818 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5819 enmState = pDevExt->enmTscDeltaState;
5820 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5821 if (enmState == enmNewState)
5822 rc = VINF_SUCCESS;
5823 else if (enmState == enmCurState)
5824 {
5825 rc = VERR_TIMEOUT;
5826 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
5827 enmNewState));
5828 }
5829 else
5830 {
5831 rc = VERR_INTERNAL_ERROR;
5832 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
5833 enmState, enmNewState));
5834 }
5835 }
5836 else
5837 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
5838 }
5839 else
5840 {
5841 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5842 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
5843 rc = VERR_INTERNAL_ERROR;
5844 }
5845
5846 return rc;
5847}
5848
5849
5850/**
5851 * Terminates the TSC-delta measurement thread.
5852 *
5853 * @param pDevExt Pointer to the device instance data.
5854 */
5855static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
5856{
5857 int rc;
5858 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5859 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Terminating;
5860 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5861 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5862 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
5863 if (RT_FAILURE(rc))
5864 {
5865 /* Signal a few more times before giving up. */
5866 int cTriesLeft = 5;
5867 while (--cTriesLeft > 0)
5868 {
5869 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5870 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
5871 if (rc != VERR_TIMEOUT)
5872 break;
5873 }
5874 }
5875}
5876
5877
5878/**
5879 * Initializes and spawns the TSC-delta measurement thread.
5880 *
5881 * A thread is required for servicing re-measurement requests from events like
5882 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
5883 * under all contexts on all OSs.
5884 *
5885 * @returns VBox status code.
5886 * @param pDevExt Pointer to the device instance data.
5887 *
5888 * @remarks Must only be called -after- initializing GIP and setting up MP
5889 * notifications!
5890 */
5891static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
5892{
5893 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
5894
5895 int rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
5896 if (RT_SUCCESS(rc))
5897 {
5898 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
5899 if (RT_SUCCESS(rc))
5900 {
5901 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Creating;
5902 pDevExt->cMsTscDeltaTimeout = 1;
5903 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
5904 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
5905 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
5906 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
5907 if (RT_SUCCESS(rc))
5908 {
5909 rc = supdrvTscDeltaThreadWait(pDevExt, kSupDrvTscDeltaState_Creating, kSupDrvTscDeltaState_Listening);
5910 if (RT_SUCCESS(rc))
5911 {
5912 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5913 return rc;
5914 }
5915
5916 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
5917 supdrvTscDeltaThreadTerminate(pDevExt);
5918 }
5919 else
5920 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
5921 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5922 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5923 }
5924 else
5925 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
5926 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5927 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5928 }
5929 else
5930 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
5931
5932 return rc;
5933}
5934
5935
5936/**
5937 * Terminates the TSC-delta measurement thread and cleanup.
5938 *
5939 * @param pDevExt Pointer to the device instance data.
5940 */
5941static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
5942{
5943 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
5944 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5945 {
5946 supdrvTscDeltaThreadTerminate(pDevExt);
5947 }
5948
5949 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
5950 {
5951 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5952 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5953 }
5954
5955 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5956 {
5957 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5958 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5959 }
5960
5961 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5962}
5963
5964
5965/**
5966 * Waits for TSC-delta measurements to be completed for all online CPUs.
5967 *
5968 * @returns VBox status code.
5969 * @param pDevExt Pointer to the device instance data.
5970 */
5971static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
5972{
5973 int cTriesLeft = 5;
5974 int cMsTotalWait;
5975 int cMsWaited = 0;
5976 int cMsWaitGranularity = 1;
5977
5978 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5979 AssertReturn(pGip, VERR_INVALID_POINTER);
5980
5981 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 2, 150);
5982 while (cTriesLeft-- > 0)
5983 {
5984 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
5985 return VINF_SUCCESS;
5986 RTThreadSleep(cMsWaitGranularity);
5987 cMsWaited += cMsWaitGranularity;
5988 if (cMsWaited >= cMsTotalWait)
5989 break;
5990 }
5991
5992 return VERR_TIMEOUT;
5993}
5994#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
5995
5996
5997/**
5998 * Measures the TSC frequency of the system.
5999 *
6000 * Uses a busy-wait method for the async. case as it is intended to help push
6001 * the CPU frequency up, while for the invariant cases using a sleeping method.
6002 *
6003 * The TSC frequency can vary on systems which are not reported as invariant.
6004 * On such systems the object of this function is to find out what the nominal,
6005 * maximum TSC frequency under 'normal' CPU operation.
6006 *
6007 * @returns VBox status code.
6008 * @param pDevExt Pointer to the device instance.
6009 *
6010 * @remarks Must be called only -after- measuring the TSC deltas.
6011 */
6012static int supdrvGipMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
6013{
6014 int cTriesLeft = 4;
6015 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6016
6017 /* Assert order. */
6018 AssertReturn(pGip, VERR_INVALID_PARAMETER);
6019 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
6020
6021 while (cTriesLeft-- > 0)
6022 {
6023 RTCCUINTREG uFlags;
6024 uint64_t u64NanoTsBefore;
6025 uint64_t u64NanoTsAfter;
6026 uint64_t u64TscBefore;
6027 uint64_t u64TscAfter;
6028 uint8_t idApicBefore;
6029 uint8_t idApicAfter;
6030
6031 /*
6032 * Synchronize with the host OS clock tick before reading the TSC.
6033 * Especially important on older Windows version where the granularity is terrible.
6034 */
6035 u64NanoTsBefore = RTTimeSystemNanoTS();
6036 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
6037 ASMNopPause();
6038
6039 uFlags = ASMIntDisableFlags();
6040 idApicBefore = ASMGetApicId();
6041 u64TscBefore = ASMReadTSC();
6042 u64NanoTsBefore = RTTimeSystemNanoTS();
6043 ASMSetFlags(uFlags);
6044
6045 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6046 {
6047 /*
6048 * Sleep-wait since the TSC frequency is constant, it eases host load.
6049 * Shorter interval produces more variance in the frequency (esp. Windows).
6050 */
6051 RTThreadSleep(200);
6052 u64NanoTsAfter = RTTimeSystemNanoTS();
6053 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
6054 ASMNopPause();
6055 u64NanoTsAfter = RTTimeSystemNanoTS();
6056 }
6057 else
6058 {
6059 /* Busy-wait keeping the frequency up and measure. */
6060 for (;;)
6061 {
6062 u64NanoTsAfter = RTTimeSystemNanoTS();
6063 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
6064 ASMNopPause();
6065 else
6066 break;
6067 }
6068 }
6069
6070 uFlags = ASMIntDisableFlags();
6071 idApicAfter = ASMGetApicId();
6072 u64TscAfter = ASMReadTSC();
6073 ASMSetFlags(uFlags);
6074
6075 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6076 {
6077 int rc;
6078 bool fAppliedBefore;
6079 bool fAppliedAfter;
6080 rc = SUPTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
6081 rc = SUPTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
6082
6083 if ( !fAppliedBefore
6084 || !fAppliedAfter)
6085 {
6086#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6087 /*
6088 * The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
6089 * Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
6090 * and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
6091 * proceed. This should be triggered just once if we're rather unlucky.
6092 */
6093 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6094 if (rc == VERR_TIMEOUT)
6095 {
6096 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
6097 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6098 }
6099#else
6100 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
6101 idApicBefore, idApicAfter, cTriesLeft);
6102#endif
6103 continue;
6104 }
6105 }
6106
6107 /*
6108 * Update GIP.
6109 */
6110 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
6111 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6112 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6113 return VINF_SUCCESS;
6114 }
6115
6116 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6117}
6118
6119
6120/**
6121 * Timer callback function for TSC frequency refinement in invariant GIP mode.
6122 *
6123 * @param pTimer The timer.
6124 * @param pvUser Opaque pointer to the device instance data.
6125 * @param iTick The timer tick.
6126 */
6127static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6128{
6129 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6130 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6131 bool fDeltaApplied = false;
6132 uint8_t idApic;
6133 uint64_t u64DeltaNanoTS;
6134 uint64_t u64DeltaTsc;
6135 uint64_t u64NanoTS;
6136 uint64_t u64Tsc;
6137 RTCCUINTREG uFlags;
6138
6139 /* Paranoia. */
6140 Assert(pGip);
6141 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
6142
6143#if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
6144 u64NanoTS = RTTimeSystemNanoTS();
6145 while (RTTimeSystemNanoTS() == u64NanoTS)
6146 ASMNopPause();
6147#endif
6148 uFlags = ASMIntDisableFlags();
6149 idApic = ASMGetApicId();
6150 u64Tsc = ASMReadTSC();
6151 u64NanoTS = RTTimeSystemNanoTS();
6152 ASMSetFlags(uFlags);
6153 SUPTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
6154 u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
6155 u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
6156
6157 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6158 && !fDeltaApplied)
6159 {
6160 SUPR0Printf("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
6161 GIP_TSC_REFINE_INTERVAL);
6162 return;
6163 }
6164
6165 /* Calculate the TSC frequency. */
6166 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
6167 && u64DeltaNanoTS < UINT32_MAX)
6168 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, u64DeltaNanoTS);
6169 else
6170 {
6171 /* Try not to lose precision, the larger the interval the more likely we overflow. */
6172 if ( u64DeltaTsc < UINT64_MAX / RT_NS_100MS
6173 && u64DeltaNanoTS / 10 < UINT32_MAX)
6174 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_100MS, u64DeltaNanoTS / 10);
6175 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_10MS
6176 && u64DeltaNanoTS / 100 < UINT32_MAX)
6177 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_10MS, u64DeltaNanoTS / 100);
6178 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_1MS
6179 && u64DeltaNanoTS / 1000 < UINT32_MAX)
6180 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1MS, u64DeltaNanoTS / 1000);
6181 else /* Screw it. */
6182 pGip->u64CpuHz = u64DeltaTsc / (u64DeltaNanoTS / RT_NS_1SEC_64);
6183 }
6184
6185 /* Update rest of GIP. */
6186 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
6187 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6188}
6189
6190
6191/**
6192 * Starts the TSC-frequency refinement phase asynchronously.
6193 *
6194 * @param pDevExt Pointer to the device instance data.
6195 */
6196static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt)
6197{
6198 uint64_t u64NanoTS;
6199 RTCCUINTREG uFlags;
6200 uint8_t idApic;
6201 int rc;
6202 bool fDeltaApplied = false;
6203 PSUPGLOBALINFOPAGE pGip;
6204
6205 /* Validate. */
6206 Assert(pDevExt);
6207 Assert(pDevExt->pGip);
6208
6209 pGip = pDevExt->pGip;
6210 u64NanoTS = RTTimeSystemNanoTS();
6211 while (RTTimeSystemNanoTS() == u64NanoTS)
6212 ASMNopPause();
6213 uFlags = ASMIntDisableFlags();
6214 idApic = ASMGetApicId();
6215 pDevExt->u64TscAnchor = ASMReadTSC();
6216 pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
6217 ASMSetFlags(uFlags);
6218 SUPTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, &fDeltaApplied);
6219
6220#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6221 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6222 && !fDeltaApplied)
6223 {
6224 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6225 if (rc == VERR_TIMEOUT)
6226 {
6227 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
6228 return;
6229 }
6230 }
6231#endif
6232
6233 rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pDevExt);
6234 if (RT_SUCCESS(rc))
6235 {
6236 /*
6237 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
6238 * interval as small as possible while gaining the most consistent and accurate frequency
6239 * (compared to what the host OS might have measured).
6240 *
6241 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
6242 * same TSC frequency whenever possible so we need to keep the interval short.
6243 */
6244 rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
6245 AssertRC(rc);
6246 }
6247 else
6248 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
6249}
6250
6251
6252/**
6253 * Creates the GIP.
6254 *
6255 * @returns VBox status code.
6256 * @param pDevExt Instance data. GIP stuff may be updated.
6257 */
6258static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6259{
6260 PSUPGLOBALINFOPAGE pGip;
6261 RTHCPHYS HCPhysGip;
6262 uint32_t u32SystemResolution;
6263 uint32_t u32Interval;
6264 uint32_t u32MinInterval;
6265 uint32_t uMod;
6266 unsigned cCpus;
6267 int rc;
6268
6269 LogFlow(("supdrvGipCreate:\n"));
6270
6271 /* Assert order. */
6272 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6273 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6274 Assert(!pDevExt->pGipTimer);
6275
6276 /*
6277 * Check the CPU count.
6278 */
6279 cCpus = RTMpGetArraySize();
6280 if ( cCpus > RTCPUSET_MAX_CPUS
6281 || cCpus > 256 /* ApicId is used for the mappings */)
6282 {
6283 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6284 return VERR_TOO_MANY_CPUS;
6285 }
6286
6287 /*
6288 * Allocate a contiguous set of pages with a default kernel mapping.
6289 */
6290 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6291 if (RT_FAILURE(rc))
6292 {
6293 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6294 return rc;
6295 }
6296 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6297 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6298
6299 /*
6300 * Allocate the TSC-delta sync struct on a separate cache line.
6301 */
6302 pDevExt->pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
6303 pDevExt->pTscDeltaSync = RT_ALIGN_PT(pDevExt->pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
6304 Assert(RT_ALIGN_PT(pDevExt->pTscDeltaSync, 64, PSUPTSCDELTASYNC) == pDevExt->pTscDeltaSync);
6305
6306 /*
6307 * Find a reasonable update interval and initialize the structure.
6308 */
6309 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
6310 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6311 * See @bugref{6710}. */
6312 u32MinInterval = RT_NS_10MS;
6313 u32SystemResolution = RTTimerGetSystemGranularity();
6314 u32Interval = u32MinInterval;
6315 uMod = u32MinInterval % u32SystemResolution;
6316 if (uMod)
6317 u32Interval += u32SystemResolution - uMod;
6318
6319 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6320
6321 if (RT_UNLIKELY( pDevExt->fOsTscDeltasInSync
6322 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
6323 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
6324 {
6325 /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
6326 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
6327 return VERR_INTERNAL_ERROR_2;
6328 }
6329
6330#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6331 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6332 {
6333 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6334 rc = supdrvTscDeltaThreadInit(pDevExt);
6335 }
6336#endif
6337 if (RT_SUCCESS(rc))
6338 {
6339 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6340 if (RT_SUCCESS(rc))
6341 {
6342 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6343 if (RT_SUCCESS(rc))
6344 {
6345 uint16_t iCpu;
6346#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6347 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6348 {
6349 /*
6350 * Measure the TSC deltas now that we have MP notifications.
6351 */
6352 int cTries = 5;
6353 do
6354 {
6355 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6356 if (rc != VERR_TRY_AGAIN)
6357 break;
6358 } while (--cTries > 0);
6359 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6360 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6361 }
6362 else
6363 {
6364 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6365 Assert(!pGip->aCPUs[iCpu].i64TSCDelta);
6366 }
6367#endif
6368 if (RT_SUCCESS(rc))
6369 {
6370 rc = supdrvGipMeasureTscFreq(pDevExt);
6371 if (RT_SUCCESS(rc))
6372 {
6373 /*
6374 * Create the timer.
6375 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6376 */
6377 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6378 {
6379 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
6380 pDevExt);
6381 if (rc == VERR_NOT_SUPPORTED)
6382 {
6383 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6384 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6385 }
6386 }
6387 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6388 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */, supdrvGipSyncTimer, pDevExt);
6389 if (RT_SUCCESS(rc))
6390 {
6391 /*
6392 * We're good.
6393 */
6394 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6395 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6396
6397 g_pSUPGlobalInfoPage = pGip;
6398 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6399 supdrvRefineTscFreq(pDevExt);
6400 return VINF_SUCCESS;
6401 }
6402 else
6403 {
6404 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6405 Assert(!pDevExt->pGipTimer);
6406 }
6407 }
6408 else
6409 OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
6410 }
6411 else
6412 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureTscDeltas failed. rc=%Rrc\n", rc));
6413 }
6414 else
6415 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6416 }
6417 else
6418 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6419 }
6420 else
6421 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6422
6423 supdrvGipDestroy(pDevExt); /* Releases timer frequency increase too. */
6424 return rc;
6425}
6426
6427
6428/**
6429 * Terminates the GIP.
6430 *
6431 * @param pDevExt Instance data. GIP stuff may be updated.
6432 */
6433static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6434{
6435 int rc;
6436#ifdef DEBUG_DARWIN_GIP
6437 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6438 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6439 pDevExt->pGipTimer, pDevExt->GipMemObj));
6440#endif
6441
6442 /*
6443 * Stop receiving MP notifications before tearing anything else down.
6444 */
6445 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6446
6447#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6448 /*
6449 * Terminate the TSC-delta measurement thread and resources.
6450 */
6451 supdrvTscDeltaTerm(pDevExt);
6452#endif
6453
6454 /*
6455 * Destroy the TSC-refinement one-shot timer.
6456 */
6457 if (pDevExt->pTscRefineTimer)
6458 {
6459 RTTimerDestroy(pDevExt->pTscRefineTimer);
6460 pDevExt->pTscRefineTimer = NULL;
6461 }
6462
6463 if (pDevExt->pvTscDeltaSync)
6464 {
6465 RTMemFree(pDevExt->pvTscDeltaSync);
6466 pDevExt->pTscDeltaSync = NULL;
6467 pDevExt->pvTscDeltaSync = NULL;
6468 }
6469
6470 /*
6471 * Invalid the GIP data.
6472 */
6473 if (pDevExt->pGip)
6474 {
6475 supdrvGipTerm(pDevExt->pGip);
6476 pDevExt->pGip = NULL;
6477 }
6478 g_pSUPGlobalInfoPage = NULL;
6479
6480 /*
6481 * Destroy the timer and free the GIP memory object.
6482 */
6483 if (pDevExt->pGipTimer)
6484 {
6485 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6486 pDevExt->pGipTimer = NULL;
6487 }
6488
6489 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6490 {
6491 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6492 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6493 }
6494
6495 /*
6496 * Finally, make sure we've release the system timer resolution request
6497 * if one actually succeeded and is still pending.
6498 */
6499 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6500}
6501
6502
6503/**
6504 * Timer callback function sync GIP mode.
6505 * @param pTimer The timer.
6506 * @param pvUser Opaque pointer to the device extension.
6507 * @param iTick The timer tick.
6508 */
6509static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6510{
6511 RTCCUINTREG uFlags;
6512 uint64_t u64TSC;
6513 uint64_t u64NanoTS;
6514 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6515 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6516
6517 uFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6518 u64TSC = ASMReadTSC();
6519 u64NanoTS = RTTimeSystemNanoTS();
6520
6521 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6522 {
6523 /*
6524 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6525 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6526 * affected a bit until we get proper TSC deltas than implementing options like
6527 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6528 *
6529 * The likely hood of this happening is really low. On Windows, Linux timers
6530 * fire on the CPU they were registered/started on. Darwin, Solaris need verification.
6531 */
6532 Assert(!ASMIntAreEnabled());
6533 SUPTscDeltaApply(pGip, &u64TSC, ASMGetApicId(), NULL /* pfDeltaApplied */);
6534 }
6535
6536 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
6537
6538 ASMSetFlags(uFlags);
6539}
6540
6541
6542/**
6543 * Timer callback function for async GIP mode.
6544 * @param pTimer The timer.
6545 * @param pvUser Opaque pointer to the device extension.
6546 * @param iTick The timer tick.
6547 */
6548static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6549{
6550 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6551 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6552 RTCPUID idCpu = RTMpCpuId();
6553 uint64_t u64TSC = ASMReadTSC();
6554 uint64_t NanoTS = RTTimeSystemNanoTS();
6555
6556 /** @todo reset the transaction number and whatnot when iTick == 1. */
6557 if (pDevExt->idGipMaster == idCpu)
6558 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6559 else
6560 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6561
6562 ASMSetFlags(fOldFlags);
6563}
6564
6565
6566/**
6567 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6568 *
6569 * @returns Index of the CPU in the cache set.
6570 * @param pGip The GIP.
6571 * @param idCpu The CPU ID.
6572 */
6573static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6574{
6575 uint32_t i, cTries;
6576
6577 /*
6578 * ASSUMES that CPU IDs are constant.
6579 */
6580 for (i = 0; i < pGip->cCpus; i++)
6581 if (pGip->aCPUs[i].idCpu == idCpu)
6582 return i;
6583
6584 cTries = 0;
6585 do
6586 {
6587 for (i = 0; i < pGip->cCpus; i++)
6588 {
6589 bool fRc;
6590 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6591 if (fRc)
6592 return i;
6593 }
6594 } while (cTries++ < 32);
6595 AssertReleaseFailed();
6596 return i - 1;
6597}
6598
6599
6600/**
6601 * The calling CPU should be accounted as online, update GIP accordingly.
6602 *
6603 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
6604 *
6605 * @param pDevExt The device extension.
6606 * @param idCpu The CPU ID.
6607 */
6608static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6609{
6610 int iCpuSet = 0;
6611 uint16_t idApic = UINT16_MAX;
6612 uint32_t i = 0;
6613 uint64_t u64NanoTS = 0;
6614 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6615
6616 AssertPtrReturnVoid(pGip);
6617 AssertRelease(idCpu == RTMpCpuId());
6618 Assert(pGip->cPossibleCpus == RTMpGetCount());
6619
6620 /*
6621 * Do this behind a spinlock with interrupts disabled as this can fire
6622 * on all CPUs simultaneously, see @bugref{6110}.
6623 */
6624 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6625
6626 /*
6627 * Update the globals.
6628 */
6629 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
6630 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
6631 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6632 if (iCpuSet >= 0)
6633 {
6634 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6635 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
6636 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
6637 }
6638
6639 /*
6640 * Update the entry.
6641 */
6642 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
6643 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
6644 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
6645 idApic = ASMGetApicId();
6646 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
6647 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
6648 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
6649
6650 /*
6651 * Update the APIC ID and CPU set index mappings.
6652 */
6653 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
6654 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
6655
6656 /* Update the Mp online/offline counter. */
6657 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
6658
6659#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6660 /*
6661 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
6662 *
6663 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
6664 * update the state and it'll get serviced when the thread's listening interval times out.
6665 */
6666 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6667 {
6668 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6669 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
6670 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
6671 {
6672 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
6673 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
6674 }
6675 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6676 }
6677#endif
6678
6679 /* commit it */
6680 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
6681
6682 RTSpinlockRelease(pDevExt->hGipSpinlock);
6683}
6684
6685
6686/**
6687 * The CPU should be accounted as offline, update the GIP accordingly.
6688 *
6689 * This is used by supdrvGipMpEvent.
6690 *
6691 * @param pDevExt The device extension.
6692 * @param idCpu The CPU ID.
6693 */
6694static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6695{
6696 int iCpuSet;
6697 unsigned i;
6698
6699 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6700
6701 AssertPtrReturnVoid(pGip);
6702 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6703
6704 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6705 AssertReturnVoid(iCpuSet >= 0);
6706
6707 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
6708 AssertReturnVoid(i < pGip->cCpus);
6709 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
6710
6711 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6712 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
6713
6714 /* Update the Mp online/offline counter. */
6715 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
6716
6717 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
6718 if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
6719 {
6720 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6721 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
6722 }
6723
6724 /* Reset the TSC delta, we will recalculate it lazily. */
6725 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6726 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
6727
6728#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6729 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
6730 if (supdrvIsInvariantTsc())
6731 RTCpuSetDel(&pDevExt->TscDeltaObtainedCpuSet, idCpu);
6732#endif
6733
6734 /* commit it */
6735 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
6736
6737 RTSpinlockRelease(pDevExt->hGipSpinlock);
6738}
6739
6740
6741/**
6742 * Multiprocessor event notification callback.
6743 *
6744 * This is used to make sure that the GIP master gets passed on to
6745 * another CPU. It also updates the associated CPU data.
6746 *
6747 * @param enmEvent The event.
6748 * @param idCpu The cpu it applies to.
6749 * @param pvUser Pointer to the device extension.
6750 *
6751 * @remarks This function -must- fire on the newly online'd CPU for the
6752 * RTMPEVENT_ONLINE case and can fire on any CPU for the
6753 * RTMPEVENT_OFFLINE case.
6754 */
6755static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
6756{
6757 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6758 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6759
6760 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6761
6762 /*
6763 * Update the GIP CPU data.
6764 */
6765 if (pGip)
6766 {
6767 switch (enmEvent)
6768 {
6769 case RTMPEVENT_ONLINE:
6770 AssertRelease(idCpu == RTMpCpuId());
6771 supdrvGipMpEventOnline(pDevExt, idCpu);
6772 break;
6773 case RTMPEVENT_OFFLINE:
6774 supdrvGipMpEventOffline(pDevExt, idCpu);
6775 break;
6776 }
6777 }
6778
6779 /*
6780 * Make sure there is a master GIP.
6781 */
6782 if (enmEvent == RTMPEVENT_OFFLINE)
6783 {
6784 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
6785 if (idGipMaster == idCpu)
6786 {
6787 /*
6788 * Find a new GIP master.
6789 */
6790 bool fIgnored;
6791 unsigned i;
6792 int64_t iTSCDelta;
6793 uint32_t idxNewGipMaster;
6794 RTCPUID idNewGipMaster = NIL_RTCPUID;
6795 RTCPUSET OnlineCpus;
6796 RTMpGetOnlineSet(&OnlineCpus);
6797
6798 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
6799 {
6800 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
6801 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
6802 && idCurCpu != idGipMaster)
6803 {
6804 idNewGipMaster = idCurCpu;
6805 break;
6806 }
6807 }
6808
6809 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
6810 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
6811 NOREF(fIgnored);
6812
6813 /*
6814 * Adjust all the TSC deltas against the new GIP master.
6815 */
6816 if (pGip)
6817 {
6818 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
6819 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
6820 Assert(iTSCDelta != INT64_MAX);
6821 for (i = 0; i < pGip->cCpus; i++)
6822 {
6823 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
6824 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
6825 if (iWorkerDelta != INT64_MAX)
6826 iWorkerDelta -= iTSCDelta;
6827 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
6828 }
6829 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
6830 }
6831 }
6832 }
6833}
6834
6835
6836/**
6837 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
6838 * compute the delta between them.
6839 *
6840 * @param idCpu The CPU we are current scheduled on.
6841 * @param pvUser1 Opaque pointer to the device instance data.
6842 * @param pvUser2 Opaque pointer to the worker Cpu Id.
6843 *
6844 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
6845 * read the TSC at exactly the same time on both the master and the worker
6846 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
6847 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
6848 * try to minimize the measurement error by computing the minimum read time
6849 * of the compare statement in the worker by taking TSC measurements across
6850 * it.
6851 *
6852 * We ignore the first few runs of the loop in order to prime the cache.
6853 * Also, be careful about using 'pause' instruction in critical busy-wait
6854 * loops in this code - it can cause undesired behaviour with
6855 * hyperthreading.
6856 *
6857 * It must be noted that the computed minimum read time is mostly to
6858 * eliminate huge deltas when the worker is too early and doesn't by itself
6859 * help produce more accurate deltas. We allow two times the computed
6860 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
6861 * possible to get negative deltas where there are none when the worker is
6862 * earlier. As long as these occasional negative deltas are lower than the
6863 * time it takes to exit guest-context and the OS to reschedule EMT on a
6864 * different CPU we won't expose a TSC that jumped backwards. It is because
6865 * of the existence of the negative deltas we don't recompute the delta with
6866 * the master and worker interchanged to eliminate the remaining measurement
6867 * error.
6868 */
6869static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6870{
6871 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
6872 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6873 uint32_t *pidWorker = (uint32_t *)pvUser2;
6874 RTCPUID idMaster = ASMAtomicUoReadU32(&pDevExt->idTscDeltaInitiator);
6875 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6876 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
6877 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
6878 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6879 int cTriesLeft = 12;
6880
6881 if ( idCpu != idMaster
6882 && idCpu != *pidWorker)
6883 return;
6884
6885 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
6886 with a timeout to avoid deadlocking the entire system. */
6887 if (!RTMpOnAllIsConcurrentSafe())
6888 {
6889 /** @todo This was introduced for Windows, but since Windows doesn't use this
6890 * code path any longer (as DPC timeouts BSOD regardless of interrupts,
6891 * see @bugref{6710} comment 81), eventually phase it out. */
6892 uint64_t uTscNow;
6893 uint64_t uTscStart;
6894 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
6895
6896 ASMSerializeInstruction();
6897 uTscStart = ASMReadTSC();
6898 if (idCpu == idMaster)
6899 {
6900 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
6901 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
6902 {
6903 ASMSerializeInstruction();
6904 uTscNow = ASMReadTSC();
6905 if (uTscNow - uTscStart > cWaitTicks)
6906 {
6907 /* Set the worker delta to indicate failure, not the master. */
6908 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6909 return;
6910 }
6911
6912 ASMNopPause();
6913 }
6914 }
6915 else
6916 {
6917 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
6918 {
6919 ASMSerializeInstruction();
6920 uTscNow = ASMReadTSC();
6921 if (uTscNow - uTscStart > cWaitTicks)
6922 {
6923 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6924 return;
6925 }
6926
6927 ASMNopPause();
6928 }
6929 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
6930 }
6931 }
6932
6933 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
6934 while (cTriesLeft-- > 0)
6935 {
6936 unsigned i;
6937 uint64_t uMinCmpReadTime = UINT64_MAX;
6938 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
6939 {
6940 if (idCpu == idMaster)
6941 {
6942 /*
6943 * The master.
6944 */
6945 RTCCUINTREG uFlags;
6946 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6947 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6948
6949 /* Disable interrupts only in the master for as short a period
6950 as possible, thanks again to Windows. See @bugref{6710} comment #73. */
6951 uFlags = ASMIntDisableFlags();
6952
6953 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
6954 ;
6955
6956 do
6957 {
6958 ASMSerializeInstruction();
6959 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
6960 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6961
6962 ASMSetFlags(uFlags);
6963
6964 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
6965 ;
6966
6967 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6968 {
6969 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
6970 {
6971 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
6972 if (iDelta < pGipCpuWorker->i64TSCDelta)
6973 pGipCpuWorker->i64TSCDelta = iDelta;
6974 }
6975 }
6976
6977 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
6978 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6979 }
6980 else
6981 {
6982 /*
6983 * The worker.
6984 */
6985 uint64_t uTscWorker;
6986 uint64_t uTscWorkerFlushed;
6987 uint64_t uCmpReadTime;
6988
6989 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
6990 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
6991 ;
6992 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6993 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
6994
6995 /*
6996 * Keep reading the TSC until we notice that the master has read his. Reading
6997 * the TSC -after- the master has updated the memory is way too late. We thus
6998 * compensate by trying to measure how long it took for the worker to notice
6999 * the memory flushed from the master.
7000 */
7001 do
7002 {
7003 ASMSerializeInstruction();
7004 uTscWorker = ASMReadTSC();
7005 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7006 ASMSerializeInstruction();
7007 uTscWorkerFlushed = ASMReadTSC();
7008
7009 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
7010 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7011 {
7012 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
7013 if (uCmpReadTime < (uMinCmpReadTime << 1))
7014 {
7015 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
7016 if (uCmpReadTime < uMinCmpReadTime)
7017 uMinCmpReadTime = uCmpReadTime;
7018 }
7019 else
7020 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
7021 }
7022 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
7023 {
7024 if (uCmpReadTime < uMinCmpReadTime)
7025 uMinCmpReadTime = uCmpReadTime;
7026 }
7027
7028 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
7029 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
7030 ASMNopPause();
7031 }
7032 }
7033
7034 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
7035 break;
7036 }
7037}
7038
7039
7040/**
7041 * Clears all TSCs on the per-CPUs GIP struct. as well as the delta
7042 * synchronization variable. Optionally also clears the deltas on the per-CPU
7043 * GIP struct. as well.
7044 *
7045 * @param pDevExt Pointer to the device instance data.
7046 * @param fClearDeltas Whether the deltas are also to be cleared.
7047 */
7048DECLINLINE(void) supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
7049{
7050 unsigned iCpu;
7051 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7052 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7053 {
7054 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7055 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
7056 if (fClearDeltas)
7057 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
7058 }
7059 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7060}
7061
7062
7063/**
7064 * Measures the TSC delta between the master GIP CPU and one specified worker
7065 * CPU.
7066 *
7067 * @returns VBox status code.
7068 * @param pDevExt Pointer to the device instance data.
7069 * @param idxWorker The index of the worker CPU from the GIP's array of
7070 * CPUs.
7071 *
7072 * @remarks This can be called with preemption disabled!
7073 */
7074static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
7075{
7076 int rc;
7077 PSUPGLOBALINFOPAGE pGip;
7078 PSUPGIPCPU pGipCpuWorker;
7079 RTCPUID idMaster;
7080
7081 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7082 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7083
7084 pGip = pDevExt->pGip;
7085 idMaster = pDevExt->idGipMaster;
7086 pGipCpuWorker = &pGip->aCPUs[idxWorker];
7087
7088 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
7089
7090 if (pGipCpuWorker->idCpu == idMaster)
7091 {
7092 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
7093 return VINF_SUCCESS;
7094 }
7095
7096 /* Set the master TSC as the initiator. */
7097 while (ASMAtomicCmpXchgU32(&pDevExt->idTscDeltaInitiator, idMaster, NIL_RTCPUID) == false)
7098 {
7099 /*
7100 * Sleep here rather than spin as there is a parallel measurement
7101 * being executed and that can take a good while to be done.
7102 */
7103 RTThreadSleep(1);
7104 }
7105
7106 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7107 {
7108 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
7109 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7110 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7111 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pDevExt, &pGipCpuWorker->idCpu);
7112 if (RT_SUCCESS(rc))
7113 {
7114 if (RT_UNLIKELY(pGipCpuWorker->i64TSCDelta == INT64_MAX))
7115 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
7116 }
7117 }
7118 else
7119 rc = VERR_CPU_OFFLINE;
7120
7121 ASMAtomicWriteU32(&pDevExt->idTscDeltaInitiator, NIL_RTCPUID);
7122 return rc;
7123}
7124
7125
7126/**
7127 * Measures the TSC deltas between CPUs.
7128 *
7129 * @param pDevExt Pointer to the device instance data.
7130 * @param pidxMaster Where to store the index of the chosen master TSC if we
7131 * managed to determine the TSC deltas successfully.
7132 * Optional, can be NULL.
7133 *
7134 * @returns VBox status code.
7135 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
7136 * idCpu, GIP's online CPU set which are populated in
7137 * supdrvGipInitOnCpu().
7138 */
7139static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
7140{
7141 PSUPGIPCPU pGipCpuMaster;
7142 unsigned iCpu;
7143 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7144 uint32_t idxMaster = UINT32_MAX;
7145 int rc = VINF_SUCCESS;
7146 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
7147 uint32_t cOnlineCpus = pGip->cOnlineCpus;
7148
7149 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
7150
7151 /*
7152 * Pick the first CPU online as the master TSC and make it the new GIP master based
7153 * on the APIC ID.
7154 *
7155 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
7156 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
7157 * master as this point since the sync/async timer isn't created yet.
7158 */
7159 supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
7160 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
7161 {
7162 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
7163 if (idxCpu != UINT16_MAX)
7164 {
7165 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
7166 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
7167 {
7168 idxMaster = idxCpu;
7169 pGipCpu->i64TSCDelta = 0;
7170 break;
7171 }
7172 }
7173 }
7174 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
7175 pGipCpuMaster = &pGip->aCPUs[idxMaster];
7176 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7177
7178 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
7179 if (pGip->cOnlineCpus <= 1)
7180 {
7181 if (pidxMaster)
7182 *pidxMaster = idxMaster;
7183 return VINF_SUCCESS;
7184 }
7185
7186 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7187 {
7188 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7189 if ( iCpu != idxMaster
7190 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7191 {
7192 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7193 if (RT_FAILURE(rc))
7194 {
7195 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
7196 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7197 break;
7198 }
7199
7200 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
7201 {
7202 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
7203 rc = VERR_TRY_AGAIN;
7204 break;
7205 }
7206 }
7207 }
7208
7209 if ( RT_SUCCESS(rc)
7210 && !pGipCpuMaster->i64TSCDelta
7211 && pidxMaster)
7212 {
7213 *pidxMaster = idxMaster;
7214 }
7215 return rc;
7216}
7217
7218
7219/**
7220 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
7221 *
7222 * @param idCpu Ignored.
7223 * @param pvUser1 Where to put the TSC.
7224 * @param pvUser2 Ignored.
7225 */
7226static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7227{
7228 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
7229}
7230
7231
7232/**
7233 * Determine if Async GIP mode is required because of TSC drift.
7234 *
7235 * When using the default/normal timer code it is essential that the time stamp counter
7236 * (TSC) runs never backwards, that is, a read operation to the counter should return
7237 * a bigger value than any previous read operation. This is guaranteed by the latest
7238 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
7239 * case we have to choose the asynchronous timer mode.
7240 *
7241 * @param poffMin Pointer to the determined difference between different
7242 * cores (optional, can be NULL).
7243 * @return false if the time stamp counters appear to be synchronized, true otherwise.
7244 */
7245static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
7246{
7247 /*
7248 * Just iterate all the cpus 8 times and make sure that the TSC is
7249 * ever increasing. We don't bother taking TSC rollover into account.
7250 */
7251 int iEndCpu = RTMpGetArraySize();
7252 int iCpu;
7253 int cLoops = 8;
7254 bool fAsync = false;
7255 int rc = VINF_SUCCESS;
7256 uint64_t offMax = 0;
7257 uint64_t offMin = ~(uint64_t)0;
7258 uint64_t PrevTsc = ASMReadTSC();
7259
7260 while (cLoops-- > 0)
7261 {
7262 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7263 {
7264 uint64_t CurTsc;
7265 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7266 if (RT_SUCCESS(rc))
7267 {
7268 if (CurTsc <= PrevTsc)
7269 {
7270 fAsync = true;
7271 offMin = offMax = PrevTsc - CurTsc;
7272 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7273 iCpu, cLoops, CurTsc, PrevTsc));
7274 break;
7275 }
7276
7277 /* Gather statistics (except the first time). */
7278 if (iCpu != 0 || cLoops != 7)
7279 {
7280 uint64_t off = CurTsc - PrevTsc;
7281 if (off < offMin)
7282 offMin = off;
7283 if (off > offMax)
7284 offMax = off;
7285 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7286 }
7287
7288 /* Next */
7289 PrevTsc = CurTsc;
7290 }
7291 else if (rc == VERR_NOT_SUPPORTED)
7292 break;
7293 else
7294 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7295 }
7296
7297 /* broke out of the loop. */
7298 if (iCpu < iEndCpu)
7299 break;
7300 }
7301
7302 if (poffMin)
7303 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7304 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7305 fAsync, iEndCpu, rc, offMin, offMax));
7306#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7307 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7308#endif
7309 return fAsync;
7310}
7311
7312
7313/**
7314 * Determine the GIP TSC mode.
7315 *
7316 * @returns The most suitable TSC mode.
7317 * @param pDevExt Pointer to the device instance data.
7318 */
7319static SUPGIPMODE supdrvGipDetermineTscMode(PSUPDRVDEVEXT pDevExt)
7320{
7321 /* Trust CPUs that declare their TSC to be invariant. */
7322 if (supdrvIsInvariantTsc())
7323 return SUPGIPMODE_INVARIANT_TSC;
7324
7325 /*
7326 * Without invariant CPU ID bit - On SMP we're faced with two problems:
7327 * (1) There might be a skew between the CPU, so that cpu0
7328 * returns a TSC that is slightly different from cpu1.
7329 * (2) Power management (and other things) may cause the TSC
7330 * to run at a non-constant speed, and cause the speed
7331 * to be different on the cpus. This will result in (1).
7332 *
7333 * So, on SMP systems we'll have to select the ASYNC update method
7334 * if there are symptoms of these problems.
7335 */
7336 if (RTMpGetCount() > 1)
7337 {
7338 uint32_t uEAX, uEBX, uECX, uEDX;
7339 uint64_t u64DiffCoresIgnored;
7340
7341 /* Permit the user and/or the OS specific bits to force async mode. */
7342 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7343 return SUPGIPMODE_ASYNC_TSC;
7344
7345 /* Try check for current differences between the cpus. */
7346 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7347 return SUPGIPMODE_ASYNC_TSC;
7348
7349 /*
7350 * If the CPU supports power management and is an AMD one we
7351 * won't trust it unless it has the TscInvariant bit is set.
7352 */
7353 /** @todo this is now redundant. remove later. */
7354 /* Check for "AuthenticAMD" */
7355 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7356 if ( uEAX >= 1
7357 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7358 {
7359 /* Check for APM support and that TscInvariant is cleared. */
7360 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
7361 if (uEAX >= 0x80000007)
7362 {
7363 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
7364 if ( !(uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR) /* TscInvariant */
7365 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7366 return SUPGIPMODE_ASYNC_TSC;
7367 }
7368 }
7369 }
7370 return SUPGIPMODE_SYNC_TSC;
7371}
7372
7373
7374/**
7375 * Initializes per-CPU GIP information.
7376 *
7377 * @param pDevExt Pointer to the device instance data.
7378 * @param pGip Pointer to the GIP.
7379 * @param pCpu Pointer to which GIP CPU to initalize.
7380 * @param u64NanoTS The current nanosecond timestamp.
7381 */
7382static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7383{
7384 /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
7385 which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
7386 pCpu->u32TransactionId = 2;
7387 pCpu->u64NanoTS = u64NanoTS;
7388 pCpu->u64TSC = ASMReadTSC();
7389 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7390 pCpu->i64TSCDelta = pDevExt->fOsTscDeltasInSync ? 0 : INT64_MAX;
7391
7392 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7393 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7394 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7395 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7396
7397 /*
7398 * We don't know the following values until we've executed updates.
7399 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7400 * the 2nd timer callout.
7401 */
7402 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7403 pCpu->u32UpdateIntervalTSC
7404 = pCpu->au32TSCHistory[0]
7405 = pCpu->au32TSCHistory[1]
7406 = pCpu->au32TSCHistory[2]
7407 = pCpu->au32TSCHistory[3]
7408 = pCpu->au32TSCHistory[4]
7409 = pCpu->au32TSCHistory[5]
7410 = pCpu->au32TSCHistory[6]
7411 = pCpu->au32TSCHistory[7]
7412 = (uint32_t)(_4G / pGip->u32UpdateHz);
7413}
7414
7415
7416/**
7417 * Initializes the GIP data.
7418 *
7419 * @param pDevExt Pointer to the device instance data.
7420 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7421 * @param HCPhys The physical address of the GIP.
7422 * @param u64NanoTS The current nanosecond timestamp.
7423 * @param uUpdateHz The update frequency.
7424 * @param uUpdateIntervalNS The update interval in nanoseconds.
7425 * @param cCpus The CPU count.
7426 */
7427static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7428 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7429{
7430 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7431 unsigned i;
7432#ifdef DEBUG_DARWIN_GIP
7433 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7434#else
7435 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7436#endif
7437
7438 /*
7439 * Record whether the host OS has already normalized inter-CPU deltas for the hardware TSC.
7440 * We only bother with TSC-deltas only on invariant CPUs for now.
7441 */
7442 pDevExt->fOsTscDeltasInSync = supdrvIsInvariantTsc() && supdrvOSAreTscDeltasInSync();
7443
7444 /*
7445 * Initialize the structure.
7446 */
7447 memset(pGip, 0, cbGip);
7448 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7449 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7450 pGip->u32Mode = supdrvGipDetermineTscMode(pDevExt);
7451 pGip->cCpus = (uint16_t)cCpus;
7452 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7453 pGip->u32UpdateHz = uUpdateHz;
7454 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7455 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7456 RTCpuSetEmpty(&pGip->PresentCpuSet);
7457 RTMpGetSet(&pGip->PossibleCpuSet);
7458 pGip->cOnlineCpus = RTMpGetOnlineCount();
7459 pGip->cPresentCpus = RTMpGetPresentCount();
7460 pGip->cPossibleCpus = RTMpGetCount();
7461 pGip->idCpuMax = RTMpGetMaxCpuId();
7462 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7463 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7464 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7465 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7466
7467 for (i = 0; i < cCpus; i++)
7468 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
7469
7470 /*
7471 * Link it to the device extension.
7472 */
7473 pDevExt->pGip = pGip;
7474 pDevExt->HCPhysGip = HCPhys;
7475 pDevExt->cGipUsers = 0;
7476}
7477
7478
7479/**
7480 * On CPU initialization callback for RTMpOnAll.
7481 *
7482 * @param idCpu The CPU ID.
7483 * @param pvUser1 The device extension.
7484 * @param pvUser2 The GIP.
7485 */
7486static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7487{
7488 /* This is good enough, even though it will update some of the globals a
7489 bit to much. */
7490 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7491}
7492
7493
7494/**
7495 * Invalidates the GIP data upon termination.
7496 *
7497 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7498 */
7499static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7500{
7501 unsigned i;
7502 pGip->u32Magic = 0;
7503 for (i = 0; i < pGip->cCpus; i++)
7504 {
7505 pGip->aCPUs[i].u64NanoTS = 0;
7506 pGip->aCPUs[i].u64TSC = 0;
7507 pGip->aCPUs[i].iTSCHistoryHead = 0;
7508 pGip->aCPUs[i].u64TSCSample = 0;
7509 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7510 }
7511}
7512
7513
7514/**
7515 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
7516 * updates all the per cpu data except the transaction id.
7517 *
7518 * @param pDevExt The device extension.
7519 * @param pGipCpu Pointer to the per cpu data.
7520 * @param u64NanoTS The current time stamp.
7521 * @param u64TSC The current TSC.
7522 * @param iTick The current timer tick.
7523 *
7524 * @remarks Can be called with interrupts disabled!
7525 */
7526static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7527{
7528 uint64_t u64TSCDelta;
7529 uint32_t u32UpdateIntervalTSC;
7530 uint32_t u32UpdateIntervalTSCSlack;
7531 unsigned iTSCHistoryHead;
7532 uint64_t u64CpuHz;
7533 uint32_t u32TransactionId;
7534
7535 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7536 AssertPtrReturnVoid(pGip);
7537
7538 /* Delta between this and the previous update. */
7539 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7540
7541 /*
7542 * Update the NanoTS.
7543 */
7544 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
7545
7546 /*
7547 * Calc TSC delta.
7548 */
7549 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
7550 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
7551
7552 /* We don't need to keep realculating the frequency when it's invariant. */
7553 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
7554 return;
7555
7556 if (u64TSCDelta >> 32)
7557 {
7558 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
7559 pGipCpu->cErrors++;
7560 }
7561
7562 /*
7563 * On the 2nd and 3rd callout, reset the history with the current TSC
7564 * interval since the values entered by supdrvGipInit are totally off.
7565 * The interval on the 1st callout completely unreliable, the 2nd is a bit
7566 * better, while the 3rd should be most reliable.
7567 */
7568 u32TransactionId = pGipCpu->u32TransactionId;
7569 if (RT_UNLIKELY( ( u32TransactionId == 5
7570 || u32TransactionId == 7)
7571 && ( iTick == 2
7572 || iTick == 3) ))
7573 {
7574 unsigned i;
7575 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
7576 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
7577 }
7578
7579 /*
7580 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
7581 * Wait until we have at least one full history since the above history reset. The
7582 * assumption is that the majority of the previous history values will be tolerable.
7583 * See @bugref{6710} comment #67.
7584 */
7585 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
7586 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7587 {
7588 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
7589 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
7590 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
7591 {
7592 uint32_t u32;
7593 u32 = pGipCpu->au32TSCHistory[0];
7594 u32 += pGipCpu->au32TSCHistory[1];
7595 u32 += pGipCpu->au32TSCHistory[2];
7596 u32 += pGipCpu->au32TSCHistory[3];
7597 u32 >>= 2;
7598 u64TSCDelta = pGipCpu->au32TSCHistory[4];
7599 u64TSCDelta += pGipCpu->au32TSCHistory[5];
7600 u64TSCDelta += pGipCpu->au32TSCHistory[6];
7601 u64TSCDelta += pGipCpu->au32TSCHistory[7];
7602 u64TSCDelta >>= 2;
7603 u64TSCDelta += u32;
7604 u64TSCDelta >>= 1;
7605 }
7606 }
7607
7608
7609 /*
7610 * TSC History.
7611 */
7612 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
7613 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
7614 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
7615 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
7616
7617 /*
7618 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
7619 *
7620 * On Windows, we have an occasional (but recurring) sour value that messed up
7621 * the history but taking only 1 interval reduces the precision overall.
7622 * However, this problem existed before the invariant mode was introduced.
7623 */
7624 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
7625 || pGip->u32UpdateHz >= 1000)
7626 {
7627 uint32_t u32;
7628 u32 = pGipCpu->au32TSCHistory[0];
7629 u32 += pGipCpu->au32TSCHistory[1];
7630 u32 += pGipCpu->au32TSCHistory[2];
7631 u32 += pGipCpu->au32TSCHistory[3];
7632 u32 >>= 2;
7633 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
7634 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
7635 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
7636 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
7637 u32UpdateIntervalTSC >>= 2;
7638 u32UpdateIntervalTSC += u32;
7639 u32UpdateIntervalTSC >>= 1;
7640
7641 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
7642 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
7643 }
7644 else if (pGip->u32UpdateHz >= 90)
7645 {
7646 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7647 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
7648 u32UpdateIntervalTSC >>= 1;
7649
7650 /* value chosen on a 2GHz thinkpad running windows */
7651 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
7652 }
7653 else
7654 {
7655 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7656
7657 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
7658 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
7659 }
7660 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
7661
7662 /*
7663 * CpuHz.
7664 */
7665 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
7666 u64CpuHz /= pGip->u32UpdateIntervalNS;
7667 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
7668}
7669
7670
7671/**
7672 * Updates the GIP.
7673 *
7674 * @param pDevExt The device extension.
7675 * @param u64NanoTS The current nanosecond timesamp.
7676 * @param u64TSC The current TSC timesamp.
7677 * @param idCpu The CPU ID.
7678 * @param iTick The current timer tick.
7679 *
7680 * @remarks Can be called with interrupts disabled!
7681 */
7682static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
7683{
7684 /*
7685 * Determine the relevant CPU data.
7686 */
7687 PSUPGIPCPU pGipCpu;
7688 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7689 AssertPtrReturnVoid(pGip);
7690
7691 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7692 pGipCpu = &pGip->aCPUs[0];
7693 else
7694 {
7695 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
7696 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
7697 return;
7698 pGipCpu = &pGip->aCPUs[iCpu];
7699 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
7700 return;
7701 }
7702
7703 /*
7704 * Start update transaction.
7705 */
7706 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7707 {
7708 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
7709 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7710 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7711 pGipCpu->cErrors++;
7712 return;
7713 }
7714
7715 /*
7716 * Recalc the update frequency every 0x800th time.
7717 */
7718 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
7719 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
7720 {
7721 if (pGip->u64NanoTSLastUpdateHz)
7722 {
7723#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
7724 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
7725 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
7726 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
7727 {
7728 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
7729 * calculation on non-invariant hosts if it changes the history decision
7730 * taken in supdrvGipDoUpdateCpu(). */
7731 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
7732 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
7733 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
7734 }
7735#endif
7736 }
7737 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
7738 }
7739
7740 /*
7741 * Update the data.
7742 */
7743 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7744
7745 /*
7746 * Complete transaction.
7747 */
7748 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7749}
7750
7751
7752/**
7753 * Updates the per cpu GIP data for the calling cpu.
7754 *
7755 * @param pDevExt The device extension.
7756 * @param u64NanoTS The current nanosecond timesamp.
7757 * @param u64TSC The current TSC timesamp.
7758 * @param idCpu The CPU ID.
7759 * @param idApic The APIC id for the CPU index.
7760 * @param iTick The current timer tick.
7761 *
7762 * @remarks Can be called with interrupts disabled!
7763 */
7764static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
7765 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
7766{
7767 uint32_t iCpu;
7768 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7769
7770 /*
7771 * Avoid a potential race when a CPU online notification doesn't fire on
7772 * the onlined CPU but the tick creeps in before the event notification is
7773 * run.
7774 */
7775 if (RT_UNLIKELY(iTick == 1))
7776 {
7777 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7778 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
7779 supdrvGipMpEventOnline(pDevExt, idCpu);
7780 }
7781
7782 iCpu = pGip->aiCpuFromApicId[idApic];
7783 if (RT_LIKELY(iCpu < pGip->cCpus))
7784 {
7785 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7786 if (pGipCpu->idCpu == idCpu)
7787 {
7788 /*
7789 * Start update transaction.
7790 */
7791 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7792 {
7793 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7794 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7795 pGipCpu->cErrors++;
7796 return;
7797 }
7798
7799 /*
7800 * Update the data.
7801 */
7802 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7803
7804 /*
7805 * Complete transaction.
7806 */
7807 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7808 }
7809 }
7810}
7811
7812
7813/**
7814 * Resume built-in keyboard on MacBook Air and Pro hosts.
7815 * If there is no built-in keyboard device, return success anyway.
7816 *
7817 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
7818 */
7819static int supdrvIOCtl_ResumeSuspendedKbds(void)
7820{
7821#if defined(RT_OS_DARWIN)
7822 return supdrvDarwinResumeSuspendedKbds();
7823#else
7824 return VERR_NOT_IMPLEMENTED;
7825#endif
7826}
7827
7828
7829/**
7830 * Service a TSC-delta measurement request.
7831 *
7832 * @returns VBox status code.
7833 * @param pDevExt Pointer to the device instance data.
7834 * @param pReq Pointer to the TSC-delta measurement request.
7835 */
7836static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq)
7837{
7838 PSUPGLOBALINFOPAGE pGip;
7839 RTCPUID idCpuWorker;
7840 int rc = VERR_CPU_NOT_FOUND;
7841 int16_t cTries;
7842 RTMSINTERVAL cMsWaitRetry;
7843 uint16_t iCpu;
7844
7845 /*
7846 * Validate.
7847 */
7848 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7849 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7850 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7851 idCpuWorker = pReq->u.In.idCpu;
7852 if (idCpuWorker == NIL_RTCPUID)
7853 return VERR_INVALID_CPU_ID;
7854
7855 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
7856 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
7857 pGip = pDevExt->pGip;
7858
7859 if (!GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
7860 return VINF_SUCCESS;
7861
7862 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7863 {
7864 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7865 if (pGipCpuWorker->idCpu == idCpuWorker)
7866 {
7867 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
7868 && !pReq->u.In.fForce)
7869 return VINF_SUCCESS;
7870
7871#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7872 if (pReq->u.In.fAsync)
7873 {
7874 /** @todo Async. doesn't implement options like retries, waiting. We'll need
7875 * to pass those options to the thread somehow and implement it in the
7876 * thread. Check if anyone uses/needs fAsync before implementing this. */
7877 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
7878 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7879 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
7880 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
7881 {
7882 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
7883 }
7884 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7885 RTThreadUserSignal(pDevExt->hTscDeltaThread);
7886 return VINF_SUCCESS;
7887 }
7888#endif
7889
7890 while (cTries-- > 0)
7891 {
7892 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7893 if (RT_SUCCESS(rc))
7894 {
7895 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
7896 break;
7897 }
7898
7899 if (cMsWaitRetry)
7900 RTThreadSleep(cMsWaitRetry);
7901 }
7902
7903 break;
7904 }
7905 }
7906 return rc;
7907}
7908
7909
7910/**
7911 * Reads the TSC and TSC-delta atomically, applies the TSC delta.
7912 *
7913 * @returns VBox status code.
7914 * @param pDevExt Pointer to the device instance data.
7915 * @param pReq Pointer to the TSC-read request.
7916 */
7917static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq)
7918{
7919 uint64_t uTsc;
7920 uint16_t idApic;
7921 int16_t cTries;
7922 PSUPGLOBALINFOPAGE pGip;
7923 int rc;
7924
7925 /*
7926 * Validate.
7927 */
7928 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7929 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7930 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7931 pGip = pDevExt->pGip;
7932
7933 cTries = 4;
7934 while (cTries-- > 0)
7935 {
7936 rc = SUPGetTsc(&uTsc, &idApic);
7937 if (RT_SUCCESS(rc))
7938 {
7939 pReq->u.Out.u64AdjustedTsc = uTsc;
7940 pReq->u.Out.idApic = idApic;
7941 return VINF_SUCCESS;
7942 }
7943 else
7944 {
7945 /* If we failed to have a TSC-delta, measurement the TSC-delta and retry. */
7946 int rc2;
7947 uint16_t iCpu;
7948 AssertMsgReturn(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId),
7949 ("idApic=%u ArraySize=%u\n", idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)), VERR_INVALID_CPU_INDEX);
7950 iCpu = pGip->aiCpuFromApicId[idApic];
7951 AssertMsgReturn(iCpu < pGip->cCpus, ("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus), VERR_INVALID_CPU_INDEX);
7952
7953 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
7954 rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7955 if (RT_SUCCESS(rc2))
7956 AssertReturn(pGip->aCPUs[iCpu].i64TSCDelta != INT64_MAX, VERR_INTERNAL_ERROR_2);
7957 }
7958 }
7959
7960 return rc;
7961}
7962
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette