VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 53473

Last change on this file since 53473 was 53473, checked in by vboxsync, 10 years ago

build fix and warnings.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 295.6 KB
Line 
1/* $Id: SUPDrv.c 53473 2014-12-06 04:01:02Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/x86.h>
58
59#include <VBox/param.h>
60#include <VBox/log.h>
61#include <VBox/err.h>
62#include <VBox/vmm/hm_svm.h>
63#include <VBox/vmm/hm_vmx.h>
64
65#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
66# include "dtrace/SUPDrv.h"
67#else
68# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
69# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
70# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
71# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
72#endif
73
74/*
75 * Logging assignments:
76 * Log - useful stuff, like failures.
77 * LogFlow - program flow, except the really noisy bits.
78 * Log2 - Cleanup.
79 * Log3 - Loader flow noise.
80 * Log4 - Call VMMR0 flow noise.
81 * Log5 - Native yet-to-be-defined noise.
82 * Log6 - Native ioctl flow noise.
83 *
84 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
85 * instantiation in log-vbox.c(pp).
86 */
87
88
89/*******************************************************************************
90* Defined Constants And Macros *
91*******************************************************************************/
92/** The frequency by which we recalculate the u32UpdateHz and
93 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
94 *
95 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
96 */
97#define GIP_UPDATEHZ_RECALC_FREQ 0x800
98
99/** A reserved TSC value used for synchronization as well as measurement of
100 * TSC deltas. */
101#define GIP_TSC_DELTA_RSVD UINT64_MAX
102/** The number of TSC delta measurement loops in total (includes primer and
103 * read-time loops). */
104#define GIP_TSC_DELTA_LOOPS 96
105/** The number of cache primer loops. */
106#define GIP_TSC_DELTA_PRIMER_LOOPS 4
107/** The number of loops until we keep computing the minumum read time. */
108#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
109/** Stop measurement of TSC delta. */
110#define GIP_TSC_DELTA_SYNC_STOP 0
111/** Start measurement of TSC delta. */
112#define GIP_TSC_DELTA_SYNC_START 1
113/** Worker thread is ready for reading the TSC. */
114#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
115/** Worker thread is done updating TSC delta info. */
116#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
117/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
118 * with a timeout. */
119#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
120/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
121 * master with a timeout. */
122#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
123/** The TSC-refinement interval in seconds. */
124#define GIP_TSC_REFINE_INTERVAL 5
125
126AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
127AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
128
129/** @def VBOX_SVN_REV
130 * The makefile should define this if it can. */
131#ifndef VBOX_SVN_REV
132# define VBOX_SVN_REV 0
133#endif
134
135#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
136# define DO_NOT_START_GIP
137#endif
138
139/** Whether the application of TSC-deltas is required. */
140#define GIP_ARE_TSC_DELTAS_APPLICABLE(a_pDevExt) \
141 ((a_pDevExt)->pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC && !((a_pDevExt)->fOsTscDeltasInSync))
142
143
144/*******************************************************************************
145* Internal Functions *
146*******************************************************************************/
147static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
148static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
149static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
150static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
151static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
152static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
153static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
154static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
155static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
156static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt,void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
157static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
158static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
159static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
160DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
161DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
162static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
163static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
164static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
165static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq);
166static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq);
167static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
168static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
169static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
170static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
171static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
172static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,
173 unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
174static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
175static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
176static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
177static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
178 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
179static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
180static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
181static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
182static int supdrvIOCtl_ResumeSuspendedKbds(void);
183
184
185/*******************************************************************************
186* Global Variables *
187*******************************************************************************/
188DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
189
190
191/**
192 * Array of the R0 SUP API.
193 */
194static SUPFUNC g_aFunctions[] =
195{
196/* SED: START */
197 /* name function */
198 /* Entries with absolute addresses determined at runtime, fixup
199 code makes ugly ASSUMPTIONS about the order here: */
200 { "SUPR0AbsIs64bit", (void *)0 },
201 { "SUPR0Abs64bitKernelCS", (void *)0 },
202 { "SUPR0Abs64bitKernelSS", (void *)0 },
203 { "SUPR0Abs64bitKernelDS", (void *)0 },
204 { "SUPR0AbsKernelCS", (void *)0 },
205 { "SUPR0AbsKernelSS", (void *)0 },
206 { "SUPR0AbsKernelDS", (void *)0 },
207 { "SUPR0AbsKernelES", (void *)0 },
208 { "SUPR0AbsKernelFS", (void *)0 },
209 { "SUPR0AbsKernelGS", (void *)0 },
210 /* Normal function pointers: */
211 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
212 { "SUPGetGIP", (void *)SUPGetGIP },
213 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
214 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
215 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
216 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
217 { "SUPR0ContFree", (void *)SUPR0ContFree },
218 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
219 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
220 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
221 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
222 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
223 { "SUPR0LockMem", (void *)SUPR0LockMem },
224 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
225 { "SUPR0LowFree", (void *)SUPR0LowFree },
226 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
227 { "SUPR0MemFree", (void *)SUPR0MemFree },
228 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
229 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
230 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
231 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
232 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
233 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
234 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
235 { "SUPR0PageFree", (void *)SUPR0PageFree },
236 { "SUPR0Printf", (void *)SUPR0Printf },
237 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
238 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
239 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
240 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
241 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
242 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
243 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
244 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
245 { "SUPSemEventClose", (void *)SUPSemEventClose },
246 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
247 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
248 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
249 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
250 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
251 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
252 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
253 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
254 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
255 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
256 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
257 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
258 { "SUPSemEventWait", (void *)SUPSemEventWait },
259 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
260 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
261 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
262
263 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
264 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
265 { "RTAssertMsg1", (void *)RTAssertMsg1 },
266 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
267 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
268 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
269 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
270 { "RTCrc32", (void *)RTCrc32 },
271 { "RTCrc32Finish", (void *)RTCrc32Finish },
272 { "RTCrc32Process", (void *)RTCrc32Process },
273 { "RTCrc32Start", (void *)RTCrc32Start },
274 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
275 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
276 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
277 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
278 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
279 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
280 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
281 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
282 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
283 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
284 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
285 { "RTLogPrintfV", (void *)RTLogPrintfV },
286 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
287 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
288 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
289 { "RTMemAllocTag", (void *)RTMemAllocTag },
290 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
291 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
292 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
293 { "RTMemDupExTag", (void *)RTMemDupExTag },
294 { "RTMemDupTag", (void *)RTMemDupTag },
295 { "RTMemFree", (void *)RTMemFree },
296 { "RTMemFreeEx", (void *)RTMemFreeEx },
297 { "RTMemReallocTag", (void *)RTMemReallocTag },
298 { "RTMpCpuId", (void *)RTMpCpuId },
299 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
300 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
301 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
302 { "RTMpGetCount", (void *)RTMpGetCount },
303 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
304 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
305 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
306 { "RTMpGetSet", (void *)RTMpGetSet },
307 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
308 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
309 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
310 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
311 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
312 { "RTMpOnAll", (void *)RTMpOnAll },
313 { "RTMpOnOthers", (void *)RTMpOnOthers },
314 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
315 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
316 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
317 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
318 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
319 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
320 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
321 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
322 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
323 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
324 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
325 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
326 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
327 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
328 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
329 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
330 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
331 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
332 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
333 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
334 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
335 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
336 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
337 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
338 { "RTProcSelf", (void *)RTProcSelf },
339 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
340 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
341 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
342 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
343 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
344 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
345 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
346 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
347 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
348 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
349 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
350 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
351 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
352 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
353 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
354 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
355 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
356 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
357 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
358 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
359 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
360 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
361 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
362 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
363 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
364 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
365 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
366 { "RTSemEventCreate", (void *)RTSemEventCreate },
367 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
368 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
369 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
370 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
371 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
372 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
373 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
374 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
375 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
376 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
377 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
378 { "RTSemEventSignal", (void *)RTSemEventSignal },
379 { "RTSemEventWait", (void *)RTSemEventWait },
380 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
381 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
382 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
383 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
384 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
385 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
386 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
387 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
388 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
389 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
390 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
391 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
392 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
393 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
394 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
395 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
396 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
397 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
398 { "RTStrCopy", (void *)RTStrCopy },
399 { "RTStrDupTag", (void *)RTStrDupTag },
400 { "RTStrFormat", (void *)RTStrFormat },
401 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
402 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
403 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
404 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
405 { "RTStrFormatV", (void *)RTStrFormatV },
406 { "RTStrFree", (void *)RTStrFree },
407 { "RTStrNCmp", (void *)RTStrNCmp },
408 { "RTStrPrintf", (void *)RTStrPrintf },
409 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
410 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
411 { "RTStrPrintfV", (void *)RTStrPrintfV },
412 { "RTThreadCreate", (void *)RTThreadCreate },
413 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
414 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
415 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
416 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
417 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
418 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
419 { "RTThreadGetName", (void *)RTThreadGetName },
420 { "RTThreadGetNative", (void *)RTThreadGetNative },
421 { "RTThreadGetType", (void *)RTThreadGetType },
422 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
423 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
424 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
425 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
426 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
427 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
428 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
429 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
430 { "RTThreadSelf", (void *)RTThreadSelf },
431 { "RTThreadSelfName", (void *)RTThreadSelfName },
432 { "RTThreadSleep", (void *)RTThreadSleep },
433 { "RTThreadUserReset", (void *)RTThreadUserReset },
434 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
435 { "RTThreadUserWait", (void *)RTThreadUserWait },
436 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
437 { "RTThreadWait", (void *)RTThreadWait },
438 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
439 { "RTThreadYield", (void *)RTThreadYield },
440 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
441 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
442 { "RTTimeNow", (void *)RTTimeNow },
443 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
444 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
445 { "RTTimerCreate", (void *)RTTimerCreate },
446 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
447 { "RTTimerDestroy", (void *)RTTimerDestroy },
448 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
449 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
450 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
451 { "RTTimerStart", (void *)RTTimerStart },
452 { "RTTimerStop", (void *)RTTimerStop },
453 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
454 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
455 { "RTUuidCompare", (void *)RTUuidCompare },
456 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
457 { "RTUuidFromStr", (void *)RTUuidFromStr },
458/* SED: END */
459};
460
461#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
462/**
463 * Drag in the rest of IRPT since we share it with the
464 * rest of the kernel modules on darwin.
465 */
466PFNRT g_apfnVBoxDrvIPRTDeps[] =
467{
468 /* VBoxNetAdp */
469 (PFNRT)RTRandBytes,
470 /* VBoxUSB */
471 (PFNRT)RTPathStripFilename,
472 NULL
473};
474#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
475
476
477/**
478 * Initializes the device extentsion structure.
479 *
480 * @returns IPRT status code.
481 * @param pDevExt The device extension to initialize.
482 * @param cbSession The size of the session structure. The size of
483 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
484 * defined because we're skipping the OS specific members
485 * then.
486 */
487int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
488{
489 int rc;
490
491#ifdef SUPDRV_WITH_RELEASE_LOGGER
492 /*
493 * Create the release log.
494 */
495 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
496 PRTLOGGER pRelLogger;
497 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
498 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
499 if (RT_SUCCESS(rc))
500 RTLogRelSetDefaultInstance(pRelLogger);
501 /** @todo Add native hook for getting logger config parameters and setting
502 * them. On linux we should use the module parameter stuff... */
503#endif
504
505 /*
506 * Initialize it.
507 */
508 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
509 pDevExt->Spinlock = NIL_RTSPINLOCK;
510 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
511 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
512 pDevExt->idTscDeltaInitiator = NIL_RTCPUID;
513 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
514 if (RT_SUCCESS(rc))
515 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
516 if (RT_SUCCESS(rc))
517 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
518
519 if (RT_SUCCESS(rc))
520#ifdef SUPDRV_USE_MUTEX_FOR_LDR
521 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
522#else
523 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
524#endif
525 if (RT_SUCCESS(rc))
526 {
527 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
528 if (RT_SUCCESS(rc))
529 {
530#ifdef SUPDRV_USE_MUTEX_FOR_LDR
531 rc = RTSemMutexCreate(&pDevExt->mtxGip);
532#else
533 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
534#endif
535 if (RT_SUCCESS(rc))
536 {
537 rc = supdrvGipCreate(pDevExt);
538 if (RT_SUCCESS(rc))
539 {
540 rc = supdrvTracerInit(pDevExt);
541 if (RT_SUCCESS(rc))
542 {
543 pDevExt->pLdrInitImage = NULL;
544 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
545 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
546 pDevExt->cbSession = (uint32_t)cbSession;
547
548 /*
549 * Fixup the absolute symbols.
550 *
551 * Because of the table indexing assumptions we'll have a little #ifdef orgy
552 * here rather than distributing this to OS specific files. At least for now.
553 */
554#ifdef RT_OS_DARWIN
555# if ARCH_BITS == 32
556 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
557 {
558 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
559 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
560 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
561 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
562 }
563 else
564 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
565 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
566 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
567 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
568 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
569 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
570 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
571# else /* 64-bit darwin: */
572 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
573 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
574 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
575 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
576 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
577 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
578 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
579 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
580 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
581 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
582
583# endif
584#else /* !RT_OS_DARWIN */
585# if ARCH_BITS == 64
586 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
587 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
588 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
589 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
590# else
591 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
592# endif
593 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
594 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
595 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
596 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
597 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
598 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
599#endif /* !RT_OS_DARWIN */
600 return VINF_SUCCESS;
601 }
602
603 supdrvGipDestroy(pDevExt);
604 }
605
606#ifdef SUPDRV_USE_MUTEX_FOR_GIP
607 RTSemMutexDestroy(pDevExt->mtxGip);
608 pDevExt->mtxGip = NIL_RTSEMMUTEX;
609#else
610 RTSemFastMutexDestroy(pDevExt->mtxGip);
611 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
612#endif
613 }
614 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
615 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
616 }
617#ifdef SUPDRV_USE_MUTEX_FOR_LDR
618 RTSemMutexDestroy(pDevExt->mtxLdr);
619 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
620#else
621 RTSemFastMutexDestroy(pDevExt->mtxLdr);
622 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
623#endif
624 }
625
626 RTSpinlockDestroy(pDevExt->Spinlock);
627 pDevExt->Spinlock = NIL_RTSPINLOCK;
628 RTSpinlockDestroy(pDevExt->hGipSpinlock);
629 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
630 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
631 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
632
633#ifdef SUPDRV_WITH_RELEASE_LOGGER
634 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
635 RTLogDestroy(RTLogSetDefaultInstance(NULL));
636#endif
637
638 return rc;
639}
640
641
642/**
643 * Delete the device extension (e.g. cleanup members).
644 *
645 * @param pDevExt The device extension to delete.
646 */
647void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
648{
649 PSUPDRVOBJ pObj;
650 PSUPDRVUSAGE pUsage;
651
652 /*
653 * Kill mutexes and spinlocks.
654 */
655#ifdef SUPDRV_USE_MUTEX_FOR_GIP
656 RTSemMutexDestroy(pDevExt->mtxGip);
657 pDevExt->mtxGip = NIL_RTSEMMUTEX;
658#else
659 RTSemFastMutexDestroy(pDevExt->mtxGip);
660 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
661#endif
662#ifdef SUPDRV_USE_MUTEX_FOR_LDR
663 RTSemMutexDestroy(pDevExt->mtxLdr);
664 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
665#else
666 RTSemFastMutexDestroy(pDevExt->mtxLdr);
667 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
668#endif
669 RTSpinlockDestroy(pDevExt->Spinlock);
670 pDevExt->Spinlock = NIL_RTSPINLOCK;
671 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
672 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
673 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
674 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
675
676 /*
677 * Free lists.
678 */
679 /* objects. */
680 pObj = pDevExt->pObjs;
681 Assert(!pObj); /* (can trigger on forced unloads) */
682 pDevExt->pObjs = NULL;
683 while (pObj)
684 {
685 void *pvFree = pObj;
686 pObj = pObj->pNext;
687 RTMemFree(pvFree);
688 }
689
690 /* usage records. */
691 pUsage = pDevExt->pUsageFree;
692 pDevExt->pUsageFree = NULL;
693 while (pUsage)
694 {
695 void *pvFree = pUsage;
696 pUsage = pUsage->pNext;
697 RTMemFree(pvFree);
698 }
699
700 /* kill the GIP. */
701 supdrvGipDestroy(pDevExt);
702 RTSpinlockDestroy(pDevExt->hGipSpinlock);
703 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
704
705 supdrvTracerTerm(pDevExt);
706
707#ifdef SUPDRV_WITH_RELEASE_LOGGER
708 /* destroy the loggers. */
709 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
710 RTLogDestroy(RTLogSetDefaultInstance(NULL));
711#endif
712}
713
714
715/**
716 * Create session.
717 *
718 * @returns IPRT status code.
719 * @param pDevExt Device extension.
720 * @param fUser Flag indicating whether this is a user or kernel
721 * session.
722 * @param fUnrestricted Unrestricted access (system) or restricted access
723 * (user)?
724 * @param ppSession Where to store the pointer to the session data.
725 */
726int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
727{
728 int rc;
729 PSUPDRVSESSION pSession;
730
731 if (!SUP_IS_DEVEXT_VALID(pDevExt))
732 return VERR_INVALID_PARAMETER;
733
734 /*
735 * Allocate memory for the session data.
736 */
737 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
738 if (pSession)
739 {
740 /* Initialize session data. */
741 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
742 if (!rc)
743 {
744 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
745 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
746 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
747 if (RT_SUCCESS(rc))
748 {
749 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
750 pSession->pDevExt = pDevExt;
751 pSession->u32Cookie = BIRD_INV;
752 pSession->fUnrestricted = fUnrestricted;
753 /*pSession->fInHashTable = false; */
754 pSession->cRefs = 1;
755 /*pSession->pCommonNextHash = NULL;
756 pSession->ppOsSessionPtr = NULL; */
757 if (fUser)
758 {
759 pSession->Process = RTProcSelf();
760 pSession->R0Process = RTR0ProcHandleSelf();
761 }
762 else
763 {
764 pSession->Process = NIL_RTPROCESS;
765 pSession->R0Process = NIL_RTR0PROCESS;
766 }
767 /*pSession->pLdrUsage = NULL;
768 pSession->pVM = NULL;
769 pSession->pUsage = NULL;
770 pSession->pGip = NULL;
771 pSession->fGipReferenced = false;
772 pSession->Bundle.cUsed = 0; */
773 pSession->Uid = NIL_RTUID;
774 pSession->Gid = NIL_RTGID;
775 /*pSession->uTracerData = 0;*/
776 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
777 RTListInit(&pSession->TpProviders);
778 /*pSession->cTpProviders = 0;*/
779 /*pSession->cTpProbesFiring = 0;*/
780 RTListInit(&pSession->TpUmods);
781 /*RT_ZERO(pSession->apTpLookupTable);*/
782
783 VBOXDRV_SESSION_CREATE(pSession, fUser);
784 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
785 return VINF_SUCCESS;
786 }
787
788 RTSpinlockDestroy(pSession->Spinlock);
789 }
790 RTMemFree(pSession);
791 *ppSession = NULL;
792 Log(("Failed to create spinlock, rc=%d!\n", rc));
793 }
794 else
795 rc = VERR_NO_MEMORY;
796
797 return rc;
798}
799
800
801/**
802 * Cleans up the session in the context of the process to which it belongs, the
803 * caller will free the session and the session spinlock.
804 *
805 * This should normally occur when the session is closed or as the process
806 * exits. Careful reference counting in the OS specfic code makes sure that
807 * there cannot be any races between process/handle cleanup callbacks and
808 * threads doing I/O control calls.
809 *
810 * @param pDevExt The device extension.
811 * @param pSession Session data.
812 */
813static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
814{
815 int rc;
816 PSUPDRVBUNDLE pBundle;
817 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
818
819 Assert(!pSession->fInHashTable);
820 Assert(!pSession->ppOsSessionPtr);
821 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
822 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
823
824 /*
825 * Remove logger instances related to this session.
826 */
827 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
828
829 /*
830 * Destroy the handle table.
831 */
832 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
833 AssertRC(rc);
834 pSession->hHandleTable = NIL_RTHANDLETABLE;
835
836 /*
837 * Release object references made in this session.
838 * In theory there should be noone racing us in this session.
839 */
840 Log2(("release objects - start\n"));
841 if (pSession->pUsage)
842 {
843 PSUPDRVUSAGE pUsage;
844 RTSpinlockAcquire(pDevExt->Spinlock);
845
846 while ((pUsage = pSession->pUsage) != NULL)
847 {
848 PSUPDRVOBJ pObj = pUsage->pObj;
849 pSession->pUsage = pUsage->pNext;
850
851 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
852 if (pUsage->cUsage < pObj->cUsage)
853 {
854 pObj->cUsage -= pUsage->cUsage;
855 RTSpinlockRelease(pDevExt->Spinlock);
856 }
857 else
858 {
859 /* Destroy the object and free the record. */
860 if (pDevExt->pObjs == pObj)
861 pDevExt->pObjs = pObj->pNext;
862 else
863 {
864 PSUPDRVOBJ pObjPrev;
865 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
866 if (pObjPrev->pNext == pObj)
867 {
868 pObjPrev->pNext = pObj->pNext;
869 break;
870 }
871 Assert(pObjPrev);
872 }
873 RTSpinlockRelease(pDevExt->Spinlock);
874
875 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
876 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
877 if (pObj->pfnDestructor)
878 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
879 RTMemFree(pObj);
880 }
881
882 /* free it and continue. */
883 RTMemFree(pUsage);
884
885 RTSpinlockAcquire(pDevExt->Spinlock);
886 }
887
888 RTSpinlockRelease(pDevExt->Spinlock);
889 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
890 }
891 Log2(("release objects - done\n"));
892
893 /*
894 * Do tracer cleanups related to this session.
895 */
896 Log2(("release tracer stuff - start\n"));
897 supdrvTracerCleanupSession(pDevExt, pSession);
898 Log2(("release tracer stuff - end\n"));
899
900 /*
901 * Release memory allocated in the session.
902 *
903 * We do not serialize this as we assume that the application will
904 * not allocated memory while closing the file handle object.
905 */
906 Log2(("freeing memory:\n"));
907 pBundle = &pSession->Bundle;
908 while (pBundle)
909 {
910 PSUPDRVBUNDLE pToFree;
911 unsigned i;
912
913 /*
914 * Check and unlock all entries in the bundle.
915 */
916 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
917 {
918 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
919 {
920 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
921 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
922 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
923 {
924 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
925 AssertRC(rc); /** @todo figure out how to handle this. */
926 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
927 }
928 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
929 AssertRC(rc); /** @todo figure out how to handle this. */
930 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
931 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
932 }
933 }
934
935 /*
936 * Advance and free previous bundle.
937 */
938 pToFree = pBundle;
939 pBundle = pBundle->pNext;
940
941 pToFree->pNext = NULL;
942 pToFree->cUsed = 0;
943 if (pToFree != &pSession->Bundle)
944 RTMemFree(pToFree);
945 }
946 Log2(("freeing memory - done\n"));
947
948 /*
949 * Deregister component factories.
950 */
951 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
952 Log2(("deregistering component factories:\n"));
953 if (pDevExt->pComponentFactoryHead)
954 {
955 PSUPDRVFACTORYREG pPrev = NULL;
956 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
957 while (pCur)
958 {
959 if (pCur->pSession == pSession)
960 {
961 /* unlink it */
962 PSUPDRVFACTORYREG pNext = pCur->pNext;
963 if (pPrev)
964 pPrev->pNext = pNext;
965 else
966 pDevExt->pComponentFactoryHead = pNext;
967
968 /* free it */
969 pCur->pNext = NULL;
970 pCur->pSession = NULL;
971 pCur->pFactory = NULL;
972 RTMemFree(pCur);
973
974 /* next */
975 pCur = pNext;
976 }
977 else
978 {
979 /* next */
980 pPrev = pCur;
981 pCur = pCur->pNext;
982 }
983 }
984 }
985 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
986 Log2(("deregistering component factories - done\n"));
987
988 /*
989 * Loaded images needs to be dereferenced and possibly freed up.
990 */
991 supdrvLdrLock(pDevExt);
992 Log2(("freeing images:\n"));
993 if (pSession->pLdrUsage)
994 {
995 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
996 pSession->pLdrUsage = NULL;
997 while (pUsage)
998 {
999 void *pvFree = pUsage;
1000 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1001 if (pImage->cUsage > pUsage->cUsage)
1002 pImage->cUsage -= pUsage->cUsage;
1003 else
1004 supdrvLdrFree(pDevExt, pImage);
1005 pUsage->pImage = NULL;
1006 pUsage = pUsage->pNext;
1007 RTMemFree(pvFree);
1008 }
1009 }
1010 supdrvLdrUnlock(pDevExt);
1011 Log2(("freeing images - done\n"));
1012
1013 /*
1014 * Unmap the GIP.
1015 */
1016 Log2(("umapping GIP:\n"));
1017 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1018 {
1019 SUPR0GipUnmap(pSession);
1020 pSession->fGipReferenced = 0;
1021 }
1022 Log2(("umapping GIP - done\n"));
1023}
1024
1025
1026/**
1027 * Common code for freeing a session when the reference count reaches zero.
1028 *
1029 * @param pDevExt Device extension.
1030 * @param pSession Session data.
1031 * This data will be freed by this routine.
1032 */
1033static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1034{
1035 VBOXDRV_SESSION_CLOSE(pSession);
1036
1037 /*
1038 * Cleanup the session first.
1039 */
1040 supdrvCleanupSession(pDevExt, pSession);
1041 supdrvOSCleanupSession(pDevExt, pSession);
1042
1043 /*
1044 * Free the rest of the session stuff.
1045 */
1046 RTSpinlockDestroy(pSession->Spinlock);
1047 pSession->Spinlock = NIL_RTSPINLOCK;
1048 pSession->pDevExt = NULL;
1049 RTMemFree(pSession);
1050 LogFlow(("supdrvDestroySession: returns\n"));
1051}
1052
1053
1054/**
1055 * Inserts the session into the global hash table.
1056 *
1057 * @retval VINF_SUCCESS on success.
1058 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1059 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1060 * session (asserted).
1061 * @retval VERR_DUPLICATE if there is already a session for that pid.
1062 *
1063 * @param pDevExt The device extension.
1064 * @param pSession The session.
1065 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1066 * available and used. This will set to point to the
1067 * session while under the protection of the session
1068 * hash table spinlock. It will also be kept in
1069 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1070 * cleanup use.
1071 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1072 */
1073int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1074 void *pvUser)
1075{
1076 PSUPDRVSESSION pCur;
1077 unsigned iHash;
1078
1079 /*
1080 * Validate input.
1081 */
1082 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1083 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1084
1085 /*
1086 * Calculate the hash table index and acquire the spinlock.
1087 */
1088 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1089
1090 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1091
1092 /*
1093 * If there are a collisions, we need to carefully check if we got a
1094 * duplicate. There can only be one open session per process.
1095 */
1096 pCur = pDevExt->apSessionHashTab[iHash];
1097 if (pCur)
1098 {
1099 while (pCur && pCur->Process != pSession->Process)
1100 pCur = pCur->pCommonNextHash;
1101
1102 if (pCur)
1103 {
1104 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1105 if (pCur == pSession)
1106 {
1107 Assert(pSession->fInHashTable);
1108 AssertFailed();
1109 return VERR_WRONG_ORDER;
1110 }
1111 Assert(!pSession->fInHashTable);
1112 if (pCur->R0Process == pSession->R0Process)
1113 return VERR_RESOURCE_IN_USE;
1114 return VERR_DUPLICATE;
1115 }
1116 }
1117 Assert(!pSession->fInHashTable);
1118 Assert(!pSession->ppOsSessionPtr);
1119
1120 /*
1121 * Insert it, doing a callout to the OS specific code in case it has
1122 * anything it wishes to do while we're holding the spinlock.
1123 */
1124 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1125 pDevExt->apSessionHashTab[iHash] = pSession;
1126 pSession->fInHashTable = true;
1127 ASMAtomicIncS32(&pDevExt->cSessions);
1128
1129 pSession->ppOsSessionPtr = ppOsSessionPtr;
1130 if (ppOsSessionPtr)
1131 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1132
1133 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1134
1135 /*
1136 * Retain a reference for the pointer in the session table.
1137 */
1138 ASMAtomicIncU32(&pSession->cRefs);
1139
1140 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1141 return VINF_SUCCESS;
1142}
1143
1144
1145/**
1146 * Removes the session from the global hash table.
1147 *
1148 * @retval VINF_SUCCESS on success.
1149 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1150 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1151 * session (asserted).
1152 *
1153 * @param pDevExt The device extension.
1154 * @param pSession The session. The caller is expected to have a reference
1155 * to this so it won't croak on us when we release the hash
1156 * table reference.
1157 * @param pvUser OS specific context value for the
1158 * supdrvOSSessionHashTabInserted callback.
1159 */
1160int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1161{
1162 PSUPDRVSESSION pCur;
1163 unsigned iHash;
1164 int32_t cRefs;
1165
1166 /*
1167 * Validate input.
1168 */
1169 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1170 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1171
1172 /*
1173 * Calculate the hash table index and acquire the spinlock.
1174 */
1175 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1176
1177 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1178
1179 /*
1180 * Unlink it.
1181 */
1182 pCur = pDevExt->apSessionHashTab[iHash];
1183 if (pCur == pSession)
1184 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1185 else
1186 {
1187 PSUPDRVSESSION pPrev = pCur;
1188 while (pCur && pCur != pSession)
1189 {
1190 pPrev = pCur;
1191 pCur = pCur->pCommonNextHash;
1192 }
1193 if (pCur)
1194 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1195 else
1196 {
1197 Assert(!pSession->fInHashTable);
1198 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1199 return VERR_NOT_FOUND;
1200 }
1201 }
1202
1203 pSession->pCommonNextHash = NULL;
1204 pSession->fInHashTable = false;
1205
1206 ASMAtomicDecS32(&pDevExt->cSessions);
1207
1208 /*
1209 * Clear OS specific session pointer if available and do the OS callback.
1210 */
1211 if (pSession->ppOsSessionPtr)
1212 {
1213 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1214 pSession->ppOsSessionPtr = NULL;
1215 }
1216
1217 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1218
1219 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1220
1221 /*
1222 * Drop the reference the hash table had to the session. This shouldn't
1223 * be the last reference!
1224 */
1225 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1226 Assert(cRefs > 0 && cRefs < _1M);
1227 if (cRefs == 0)
1228 supdrvDestroySession(pDevExt, pSession);
1229
1230 return VINF_SUCCESS;
1231}
1232
1233
1234/**
1235 * Looks up the session for the current process in the global hash table or in
1236 * OS specific pointer.
1237 *
1238 * @returns Pointer to the session with a reference that the caller must
1239 * release. If no valid session was found, NULL is returned.
1240 *
1241 * @param pDevExt The device extension.
1242 * @param Process The process ID.
1243 * @param R0Process The ring-0 process handle.
1244 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1245 * this is used instead of the hash table. For
1246 * additional safety it must then be equal to the
1247 * SUPDRVSESSION::ppOsSessionPtr member.
1248 * This can be NULL even if the OS has a session
1249 * pointer.
1250 */
1251PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1252 PSUPDRVSESSION *ppOsSessionPtr)
1253{
1254 PSUPDRVSESSION pCur;
1255 unsigned iHash;
1256
1257 /*
1258 * Validate input.
1259 */
1260 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1261
1262 /*
1263 * Calculate the hash table index and acquire the spinlock.
1264 */
1265 iHash = SUPDRV_SESSION_HASH(Process);
1266
1267 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1268
1269 /*
1270 * If an OS session pointer is provided, always use it.
1271 */
1272 if (ppOsSessionPtr)
1273 {
1274 pCur = *ppOsSessionPtr;
1275 if ( pCur
1276 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1277 || pCur->Process != Process
1278 || pCur->R0Process != R0Process) )
1279 pCur = NULL;
1280 }
1281 else
1282 {
1283 /*
1284 * Otherwise, do the hash table lookup.
1285 */
1286 pCur = pDevExt->apSessionHashTab[iHash];
1287 while ( pCur
1288 && ( pCur->Process != Process
1289 || pCur->R0Process != R0Process) )
1290 pCur = pCur->pCommonNextHash;
1291 }
1292
1293 /*
1294 * Retain the session.
1295 */
1296 if (pCur)
1297 {
1298 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1299 NOREF(cRefs);
1300 Assert(cRefs > 1 && cRefs < _1M);
1301 }
1302
1303 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1304
1305 return pCur;
1306}
1307
1308
1309/**
1310 * Retain a session to make sure it doesn't go away while it is in use.
1311 *
1312 * @returns New reference count on success, UINT32_MAX on failure.
1313 * @param pSession Session data.
1314 */
1315uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1316{
1317 uint32_t cRefs;
1318 AssertPtrReturn(pSession, UINT32_MAX);
1319 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1320
1321 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1322 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1323 return cRefs;
1324}
1325
1326
1327/**
1328 * Releases a given session.
1329 *
1330 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1331 * @param pSession Session data.
1332 */
1333uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1334{
1335 uint32_t cRefs;
1336 AssertPtrReturn(pSession, UINT32_MAX);
1337 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1338
1339 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1340 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1341 if (cRefs == 0)
1342 supdrvDestroySession(pSession->pDevExt, pSession);
1343 return cRefs;
1344}
1345
1346
1347/**
1348 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1349 *
1350 * @returns IPRT status code, see SUPR0ObjAddRef.
1351 * @param hHandleTable The handle table handle. Ignored.
1352 * @param pvObj The object pointer.
1353 * @param pvCtx Context, the handle type. Ignored.
1354 * @param pvUser Session pointer.
1355 */
1356static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1357{
1358 NOREF(pvCtx);
1359 NOREF(hHandleTable);
1360 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1361}
1362
1363
1364/**
1365 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1366 *
1367 * @param hHandleTable The handle table handle. Ignored.
1368 * @param h The handle value. Ignored.
1369 * @param pvObj The object pointer.
1370 * @param pvCtx Context, the handle type. Ignored.
1371 * @param pvUser Session pointer.
1372 */
1373static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1374{
1375 NOREF(pvCtx);
1376 NOREF(h);
1377 NOREF(hHandleTable);
1378 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1379}
1380
1381
1382/**
1383 * Fast path I/O Control worker.
1384 *
1385 * @returns VBox status code that should be passed down to ring-3 unchanged.
1386 * @param uIOCtl Function number.
1387 * @param idCpu VMCPU id.
1388 * @param pDevExt Device extention.
1389 * @param pSession Session data.
1390 */
1391int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1392{
1393 /*
1394 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1395 */
1396 if (RT_LIKELY( RT_VALID_PTR(pSession)
1397 && pSession->pVM
1398 && pDevExt->pfnVMMR0EntryFast))
1399 {
1400 switch (uIOCtl)
1401 {
1402 case SUP_IOCTL_FAST_DO_RAW_RUN:
1403 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1404 break;
1405 case SUP_IOCTL_FAST_DO_HM_RUN:
1406 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1407 break;
1408 case SUP_IOCTL_FAST_DO_NOP:
1409 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1410 break;
1411 default:
1412 return VERR_INTERNAL_ERROR;
1413 }
1414 return VINF_SUCCESS;
1415 }
1416 return VERR_INTERNAL_ERROR;
1417}
1418
1419
1420/**
1421 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1422 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1423 * list, see http://www.kerneldrivers.org/RHEL5.
1424 *
1425 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1426 * @param pszStr String to check
1427 * @param pszChars Character set
1428 */
1429static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1430{
1431 int chCur;
1432 while ((chCur = *pszStr++) != '\0')
1433 {
1434 int ch;
1435 const char *psz = pszChars;
1436 while ((ch = *psz++) != '\0')
1437 if (ch == chCur)
1438 return 1;
1439
1440 }
1441 return 0;
1442}
1443
1444
1445
1446/**
1447 * I/O Control inner worker (tracing reasons).
1448 *
1449 * @returns IPRT status code.
1450 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1451 *
1452 * @param uIOCtl Function number.
1453 * @param pDevExt Device extention.
1454 * @param pSession Session data.
1455 * @param pReqHdr The request header.
1456 */
1457static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1458{
1459 /*
1460 * Validation macros
1461 */
1462#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1463 do { \
1464 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1465 { \
1466 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1467 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1468 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1469 } \
1470 } while (0)
1471
1472#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1473
1474#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1475 do { \
1476 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1477 { \
1478 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1479 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1480 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1481 } \
1482 } while (0)
1483
1484#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1485 do { \
1486 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1487 { \
1488 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1489 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1490 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1491 } \
1492 } while (0)
1493
1494#define REQ_CHECK_EXPR(Name, expr) \
1495 do { \
1496 if (RT_UNLIKELY(!(expr))) \
1497 { \
1498 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1499 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1500 } \
1501 } while (0)
1502
1503#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1504 do { \
1505 if (RT_UNLIKELY(!(expr))) \
1506 { \
1507 OSDBGPRINT( fmt ); \
1508 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1509 } \
1510 } while (0)
1511
1512 /*
1513 * The switch.
1514 */
1515 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1516 {
1517 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1518 {
1519 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1520 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1521 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1522 {
1523 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1524 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1525 return 0;
1526 }
1527
1528#if 0
1529 /*
1530 * Call out to the OS specific code and let it do permission checks on the
1531 * client process.
1532 */
1533 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1534 {
1535 pReq->u.Out.u32Cookie = 0xffffffff;
1536 pReq->u.Out.u32SessionCookie = 0xffffffff;
1537 pReq->u.Out.u32SessionVersion = 0xffffffff;
1538 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1539 pReq->u.Out.pSession = NULL;
1540 pReq->u.Out.cFunctions = 0;
1541 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1542 return 0;
1543 }
1544#endif
1545
1546 /*
1547 * Match the version.
1548 * The current logic is very simple, match the major interface version.
1549 */
1550 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1551 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1552 {
1553 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1554 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1555 pReq->u.Out.u32Cookie = 0xffffffff;
1556 pReq->u.Out.u32SessionCookie = 0xffffffff;
1557 pReq->u.Out.u32SessionVersion = 0xffffffff;
1558 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1559 pReq->u.Out.pSession = NULL;
1560 pReq->u.Out.cFunctions = 0;
1561 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1562 return 0;
1563 }
1564
1565 /*
1566 * Fill in return data and be gone.
1567 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1568 * u32SessionVersion <= u32ReqVersion!
1569 */
1570 /** @todo Somehow validate the client and negotiate a secure cookie... */
1571 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1572 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1573 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1574 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1575 pReq->u.Out.pSession = pSession;
1576 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1577 pReq->Hdr.rc = VINF_SUCCESS;
1578 return 0;
1579 }
1580
1581 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1582 {
1583 /* validate */
1584 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1585 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1586
1587 /* execute */
1588 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1589 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1590 pReq->Hdr.rc = VINF_SUCCESS;
1591 return 0;
1592 }
1593
1594 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1595 {
1596 /* validate */
1597 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1598 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1599 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1600 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1601 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1602
1603 /* execute */
1604 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1605 if (RT_FAILURE(pReq->Hdr.rc))
1606 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1607 return 0;
1608 }
1609
1610 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1611 {
1612 /* validate */
1613 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1614 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1615
1616 /* execute */
1617 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1618 return 0;
1619 }
1620
1621 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1622 {
1623 /* validate */
1624 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1625 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1626
1627 /* execute */
1628 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1629 if (RT_FAILURE(pReq->Hdr.rc))
1630 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1631 return 0;
1632 }
1633
1634 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1635 {
1636 /* validate */
1637 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1638 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1639
1640 /* execute */
1641 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1642 return 0;
1643 }
1644
1645 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1646 {
1647 /* validate */
1648 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1649 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1650 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1651 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1652 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1653 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1654 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1655 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1656 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1657 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1658 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1659
1660 /* execute */
1661 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1662 return 0;
1663 }
1664
1665 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1666 {
1667 /* validate */
1668 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1669 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1670 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1671 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1672 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1673 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1674 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1675 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1676 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1677 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1678 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1679 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1680 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1681 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1682 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1683
1684 if (pReq->u.In.cSymbols)
1685 {
1686 uint32_t i;
1687 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1688 for (i = 0; i < pReq->u.In.cSymbols; i++)
1689 {
1690 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1691 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1692 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1693 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1694 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1695 pReq->u.In.cbStrTab - paSyms[i].offName),
1696 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1697 }
1698 }
1699
1700 /* execute */
1701 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1702 return 0;
1703 }
1704
1705 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1706 {
1707 /* validate */
1708 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1709 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1710
1711 /* execute */
1712 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1713 return 0;
1714 }
1715
1716 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1717 {
1718 /* validate */
1719 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1720 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1721 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1722
1723 /* execute */
1724 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1725 return 0;
1726 }
1727
1728 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1729 {
1730 /* validate */
1731 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1732 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1733 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1734
1735 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1736 {
1737 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1738
1739 /* execute */
1740 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1741 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1742 else
1743 pReq->Hdr.rc = VERR_WRONG_ORDER;
1744 }
1745 else
1746 {
1747 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1748 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1749 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1750 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1751 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1752
1753 /* execute */
1754 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1755 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1756 else
1757 pReq->Hdr.rc = VERR_WRONG_ORDER;
1758 }
1759
1760 if ( RT_FAILURE(pReq->Hdr.rc)
1761 && pReq->Hdr.rc != VERR_INTERRUPTED
1762 && pReq->Hdr.rc != VERR_TIMEOUT)
1763 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1764 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1765 else
1766 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1767 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1768 return 0;
1769 }
1770
1771 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1772 {
1773 /* validate */
1774 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1775 PSUPVMMR0REQHDR pVMMReq;
1776 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1777 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1778
1779 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1780 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1781 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1782 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1783 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1784
1785 /* execute */
1786 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1787 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1788 else
1789 pReq->Hdr.rc = VERR_WRONG_ORDER;
1790
1791 if ( RT_FAILURE(pReq->Hdr.rc)
1792 && pReq->Hdr.rc != VERR_INTERRUPTED
1793 && pReq->Hdr.rc != VERR_TIMEOUT)
1794 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1795 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1796 else
1797 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1798 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1799 return 0;
1800 }
1801
1802 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1803 {
1804 /* validate */
1805 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1806 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1807
1808 /* execute */
1809 pReq->Hdr.rc = VINF_SUCCESS;
1810 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1811 return 0;
1812 }
1813
1814 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1815 {
1816 /* validate */
1817 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1818 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1819 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1820
1821 /* execute */
1822 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1823 if (RT_FAILURE(pReq->Hdr.rc))
1824 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1825 return 0;
1826 }
1827
1828 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1829 {
1830 /* validate */
1831 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1832 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1833
1834 /* execute */
1835 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1836 return 0;
1837 }
1838
1839 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1840 {
1841 /* validate */
1842 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1843 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1844
1845 /* execute */
1846 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1847 if (RT_SUCCESS(pReq->Hdr.rc))
1848 pReq->u.Out.pGipR0 = pDevExt->pGip;
1849 return 0;
1850 }
1851
1852 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1853 {
1854 /* validate */
1855 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1856 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1857
1858 /* execute */
1859 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1860 return 0;
1861 }
1862
1863 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1864 {
1865 /* validate */
1866 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1867 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1868 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1869 || ( VALID_PTR(pReq->u.In.pVMR0)
1870 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1871 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1872 /* execute */
1873 pSession->pVM = pReq->u.In.pVMR0;
1874 pReq->Hdr.rc = VINF_SUCCESS;
1875 return 0;
1876 }
1877
1878 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1879 {
1880 /* validate */
1881 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1882 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1883 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1884 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1885 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1886 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1887 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1888 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1889 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1890
1891 /* execute */
1892 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1893 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1894 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1895 &pReq->u.Out.aPages[0]);
1896 if (RT_FAILURE(pReq->Hdr.rc))
1897 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1898 return 0;
1899 }
1900
1901 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1902 {
1903 /* validate */
1904 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1905 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1906 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1907 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1908 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1909 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1910
1911 /* execute */
1912 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1913 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1914 if (RT_FAILURE(pReq->Hdr.rc))
1915 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1916 return 0;
1917 }
1918
1919 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1920 {
1921 /* validate */
1922 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1923 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1924 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1925 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1926 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1927 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1928 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1929
1930 /* execute */
1931 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1932 return 0;
1933 }
1934
1935 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1936 {
1937 /* validate */
1938 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1939 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1940
1941 /* execute */
1942 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1943 return 0;
1944 }
1945
1946 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1947 {
1948 /* validate */
1949 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1950 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1951 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1952
1953 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1954 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1955 else
1956 {
1957 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1958 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1959 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1960 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1961 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1962 }
1963 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1964
1965 /* execute */
1966 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1967 return 0;
1968 }
1969
1970 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1971 {
1972 /* validate */
1973 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1974 size_t cbStrTab;
1975 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1976 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
1977 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
1978 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
1979 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
1980 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
1981 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
1982 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
1983 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
1984 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
1985 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
1986
1987 /* execute */
1988 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
1989 return 0;
1990 }
1991
1992 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
1993 {
1994 /* validate */
1995 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
1996 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
1997 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
1998
1999 /* execute */
2000 switch (pReq->u.In.uType)
2001 {
2002 case SUP_SEM_TYPE_EVENT:
2003 {
2004 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2005 switch (pReq->u.In.uOp)
2006 {
2007 case SUPSEMOP2_WAIT_MS_REL:
2008 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2009 break;
2010 case SUPSEMOP2_WAIT_NS_ABS:
2011 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2012 break;
2013 case SUPSEMOP2_WAIT_NS_REL:
2014 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2015 break;
2016 case SUPSEMOP2_SIGNAL:
2017 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2018 break;
2019 case SUPSEMOP2_CLOSE:
2020 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2021 break;
2022 case SUPSEMOP2_RESET:
2023 default:
2024 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2025 break;
2026 }
2027 break;
2028 }
2029
2030 case SUP_SEM_TYPE_EVENT_MULTI:
2031 {
2032 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2033 switch (pReq->u.In.uOp)
2034 {
2035 case SUPSEMOP2_WAIT_MS_REL:
2036 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2037 break;
2038 case SUPSEMOP2_WAIT_NS_ABS:
2039 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2040 break;
2041 case SUPSEMOP2_WAIT_NS_REL:
2042 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2043 break;
2044 case SUPSEMOP2_SIGNAL:
2045 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2046 break;
2047 case SUPSEMOP2_CLOSE:
2048 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2049 break;
2050 case SUPSEMOP2_RESET:
2051 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2052 break;
2053 default:
2054 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2055 break;
2056 }
2057 break;
2058 }
2059
2060 default:
2061 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2062 break;
2063 }
2064 return 0;
2065 }
2066
2067 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2068 {
2069 /* validate */
2070 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2071 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2072 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2073
2074 /* execute */
2075 switch (pReq->u.In.uType)
2076 {
2077 case SUP_SEM_TYPE_EVENT:
2078 {
2079 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2080 switch (pReq->u.In.uOp)
2081 {
2082 case SUPSEMOP3_CREATE:
2083 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2084 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2085 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2086 break;
2087 case SUPSEMOP3_GET_RESOLUTION:
2088 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2089 pReq->Hdr.rc = VINF_SUCCESS;
2090 pReq->Hdr.cbOut = sizeof(*pReq);
2091 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2092 break;
2093 default:
2094 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2095 break;
2096 }
2097 break;
2098 }
2099
2100 case SUP_SEM_TYPE_EVENT_MULTI:
2101 {
2102 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2103 switch (pReq->u.In.uOp)
2104 {
2105 case SUPSEMOP3_CREATE:
2106 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2107 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2108 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2109 break;
2110 case SUPSEMOP3_GET_RESOLUTION:
2111 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2112 pReq->Hdr.rc = VINF_SUCCESS;
2113 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2114 break;
2115 default:
2116 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2117 break;
2118 }
2119 break;
2120 }
2121
2122 default:
2123 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2124 break;
2125 }
2126 return 0;
2127 }
2128
2129 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2130 {
2131 /* validate */
2132 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2133 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2134
2135 /* execute */
2136 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2137 if (RT_FAILURE(pReq->Hdr.rc))
2138 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2139 return 0;
2140 }
2141
2142 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2143 {
2144 /* validate */
2145 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2146 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2147
2148 /* execute */
2149 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2150 return 0;
2151 }
2152
2153 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2154 {
2155 /* validate */
2156 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2157
2158 /* execute */
2159 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2160 return 0;
2161 }
2162
2163 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2164 {
2165 /* validate */
2166 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2167 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2168
2169 /* execute */
2170 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2171 return 0;
2172 }
2173
2174 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2175 {
2176 /* validate */
2177 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2178 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2179 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2180 return VERR_INVALID_PARAMETER;
2181
2182 /* execute */
2183 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2184 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2185 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2186 pReq->u.In.szName, pReq->u.In.fFlags);
2187 return 0;
2188 }
2189
2190 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2191 {
2192 /* validate */
2193 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2194 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2195
2196 /* execute */
2197 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2198 return 0;
2199 }
2200
2201 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2202 {
2203 /* validate */
2204 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2205 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2206
2207 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2208 pReqHdr->rc = VINF_SUCCESS;
2209 return 0;
2210 }
2211
2212 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2213 {
2214 /* validate */
2215 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2216 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2217 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2218 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2219
2220 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2221 return 0;
2222 }
2223
2224 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2225 {
2226 /* validate */
2227 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2228
2229 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2230 return 0;
2231 }
2232
2233 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2234 {
2235 /* validate */
2236 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2237 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2238
2239 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pReq);
2240 return 0;
2241 }
2242
2243 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2244 {
2245 /* validate */
2246 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2247 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2248
2249 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pReq);
2250 return 0;
2251 }
2252
2253 default:
2254 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2255 break;
2256 }
2257 return VERR_GENERAL_FAILURE;
2258}
2259
2260
2261/**
2262 * I/O Control inner worker for the restricted operations.
2263 *
2264 * @returns IPRT status code.
2265 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2266 *
2267 * @param uIOCtl Function number.
2268 * @param pDevExt Device extention.
2269 * @param pSession Session data.
2270 * @param pReqHdr The request header.
2271 */
2272static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2273{
2274 /*
2275 * The switch.
2276 */
2277 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2278 {
2279 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2280 {
2281 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2282 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2283 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2284 {
2285 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2286 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2287 return 0;
2288 }
2289
2290 /*
2291 * Match the version.
2292 * The current logic is very simple, match the major interface version.
2293 */
2294 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2295 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2296 {
2297 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2298 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2299 pReq->u.Out.u32Cookie = 0xffffffff;
2300 pReq->u.Out.u32SessionCookie = 0xffffffff;
2301 pReq->u.Out.u32SessionVersion = 0xffffffff;
2302 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2303 pReq->u.Out.pSession = NULL;
2304 pReq->u.Out.cFunctions = 0;
2305 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2306 return 0;
2307 }
2308
2309 /*
2310 * Fill in return data and be gone.
2311 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2312 * u32SessionVersion <= u32ReqVersion!
2313 */
2314 /** @todo Somehow validate the client and negotiate a secure cookie... */
2315 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2316 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2317 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2318 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2319 pReq->u.Out.pSession = pSession;
2320 pReq->u.Out.cFunctions = 0;
2321 pReq->Hdr.rc = VINF_SUCCESS;
2322 return 0;
2323 }
2324
2325 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2326 {
2327 /* validate */
2328 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2329 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2330
2331 /* execute */
2332 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2333 if (RT_FAILURE(pReq->Hdr.rc))
2334 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2335 return 0;
2336 }
2337
2338 default:
2339 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2340 break;
2341 }
2342 return VERR_GENERAL_FAILURE;
2343}
2344
2345
2346/**
2347 * I/O Control worker.
2348 *
2349 * @returns IPRT status code.
2350 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2351 *
2352 * @param uIOCtl Function number.
2353 * @param pDevExt Device extention.
2354 * @param pSession Session data.
2355 * @param pReqHdr The request header.
2356 */
2357int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2358{
2359 int rc;
2360 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2361
2362 /*
2363 * Validate the request.
2364 */
2365 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2366 {
2367 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2368 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2369 return VERR_INVALID_PARAMETER;
2370 }
2371 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2372 || pReqHdr->cbIn < sizeof(*pReqHdr)
2373 || pReqHdr->cbIn > cbReq
2374 || pReqHdr->cbOut < sizeof(*pReqHdr)
2375 || pReqHdr->cbOut > cbReq))
2376 {
2377 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2378 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2379 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2380 return VERR_INVALID_PARAMETER;
2381 }
2382 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2383 {
2384 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2385 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2386 return VERR_INVALID_PARAMETER;
2387 }
2388 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2389 {
2390 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2391 {
2392 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2393 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2394 return VERR_INVALID_PARAMETER;
2395 }
2396 }
2397 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2398 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2399 {
2400 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2401 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2402 return VERR_INVALID_PARAMETER;
2403 }
2404
2405 /*
2406 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2407 */
2408 if (pSession->fUnrestricted)
2409 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2410 else
2411 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2412
2413 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2414 return rc;
2415}
2416
2417
2418/**
2419 * Inter-Driver Communication (IDC) worker.
2420 *
2421 * @returns VBox status code.
2422 * @retval VINF_SUCCESS on success.
2423 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2424 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2425 *
2426 * @param uReq The request (function) code.
2427 * @param pDevExt Device extention.
2428 * @param pSession Session data.
2429 * @param pReqHdr The request header.
2430 */
2431int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2432{
2433 /*
2434 * The OS specific code has already validated the pSession
2435 * pointer, and the request size being greater or equal to
2436 * size of the header.
2437 *
2438 * So, just check that pSession is a kernel context session.
2439 */
2440 if (RT_UNLIKELY( pSession
2441 && pSession->R0Process != NIL_RTR0PROCESS))
2442 return VERR_INVALID_PARAMETER;
2443
2444/*
2445 * Validation macro.
2446 */
2447#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2448 do { \
2449 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2450 { \
2451 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2452 (long)pReqHdr->cb, (long)(cbExpect))); \
2453 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2454 } \
2455 } while (0)
2456
2457 switch (uReq)
2458 {
2459 case SUPDRV_IDC_REQ_CONNECT:
2460 {
2461 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2462 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2463
2464 /*
2465 * Validate the cookie and other input.
2466 */
2467 if (pReq->Hdr.pSession != NULL)
2468 {
2469 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2470 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2471 }
2472 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2473 {
2474 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2475 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2476 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2477 }
2478 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2479 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2480 {
2481 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2482 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2483 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2484 }
2485 if (pSession != NULL)
2486 {
2487 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2488 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2489 }
2490
2491 /*
2492 * Match the version.
2493 * The current logic is very simple, match the major interface version.
2494 */
2495 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2496 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2497 {
2498 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2499 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2500 pReq->u.Out.pSession = NULL;
2501 pReq->u.Out.uSessionVersion = 0xffffffff;
2502 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2503 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2504 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2505 return VINF_SUCCESS;
2506 }
2507
2508 pReq->u.Out.pSession = NULL;
2509 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2510 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2511 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2512
2513 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2514 if (RT_FAILURE(pReq->Hdr.rc))
2515 {
2516 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2517 return VINF_SUCCESS;
2518 }
2519
2520 pReq->u.Out.pSession = pSession;
2521 pReq->Hdr.pSession = pSession;
2522
2523 return VINF_SUCCESS;
2524 }
2525
2526 case SUPDRV_IDC_REQ_DISCONNECT:
2527 {
2528 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2529
2530 supdrvSessionRelease(pSession);
2531 return pReqHdr->rc = VINF_SUCCESS;
2532 }
2533
2534 case SUPDRV_IDC_REQ_GET_SYMBOL:
2535 {
2536 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2537 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2538
2539 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2540 return VINF_SUCCESS;
2541 }
2542
2543 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2544 {
2545 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2546 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2547
2548 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2549 return VINF_SUCCESS;
2550 }
2551
2552 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2553 {
2554 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2555 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2556
2557 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2558 return VINF_SUCCESS;
2559 }
2560
2561 default:
2562 Log(("Unknown IDC %#lx\n", (long)uReq));
2563 break;
2564 }
2565
2566#undef REQ_CHECK_IDC_SIZE
2567 return VERR_NOT_SUPPORTED;
2568}
2569
2570
2571/**
2572 * Register a object for reference counting.
2573 * The object is registered with one reference in the specified session.
2574 *
2575 * @returns Unique identifier on success (pointer).
2576 * All future reference must use this identifier.
2577 * @returns NULL on failure.
2578 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2579 * @param pvUser1 The first user argument.
2580 * @param pvUser2 The second user argument.
2581 */
2582SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2583{
2584 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2585 PSUPDRVOBJ pObj;
2586 PSUPDRVUSAGE pUsage;
2587
2588 /*
2589 * Validate the input.
2590 */
2591 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2592 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2593 AssertPtrReturn(pfnDestructor, NULL);
2594
2595 /*
2596 * Allocate and initialize the object.
2597 */
2598 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2599 if (!pObj)
2600 return NULL;
2601 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2602 pObj->enmType = enmType;
2603 pObj->pNext = NULL;
2604 pObj->cUsage = 1;
2605 pObj->pfnDestructor = pfnDestructor;
2606 pObj->pvUser1 = pvUser1;
2607 pObj->pvUser2 = pvUser2;
2608 pObj->CreatorUid = pSession->Uid;
2609 pObj->CreatorGid = pSession->Gid;
2610 pObj->CreatorProcess= pSession->Process;
2611 supdrvOSObjInitCreator(pObj, pSession);
2612
2613 /*
2614 * Allocate the usage record.
2615 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2616 */
2617 RTSpinlockAcquire(pDevExt->Spinlock);
2618
2619 pUsage = pDevExt->pUsageFree;
2620 if (pUsage)
2621 pDevExt->pUsageFree = pUsage->pNext;
2622 else
2623 {
2624 RTSpinlockRelease(pDevExt->Spinlock);
2625 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2626 if (!pUsage)
2627 {
2628 RTMemFree(pObj);
2629 return NULL;
2630 }
2631 RTSpinlockAcquire(pDevExt->Spinlock);
2632 }
2633
2634 /*
2635 * Insert the object and create the session usage record.
2636 */
2637 /* The object. */
2638 pObj->pNext = pDevExt->pObjs;
2639 pDevExt->pObjs = pObj;
2640
2641 /* The session record. */
2642 pUsage->cUsage = 1;
2643 pUsage->pObj = pObj;
2644 pUsage->pNext = pSession->pUsage;
2645 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2646 pSession->pUsage = pUsage;
2647
2648 RTSpinlockRelease(pDevExt->Spinlock);
2649
2650 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2651 return pObj;
2652}
2653
2654
2655/**
2656 * Increment the reference counter for the object associating the reference
2657 * with the specified session.
2658 *
2659 * @returns IPRT status code.
2660 * @param pvObj The identifier returned by SUPR0ObjRegister().
2661 * @param pSession The session which is referencing the object.
2662 *
2663 * @remarks The caller should not own any spinlocks and must carefully protect
2664 * itself against potential race with the destructor so freed memory
2665 * isn't accessed here.
2666 */
2667SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2668{
2669 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2670}
2671
2672
2673/**
2674 * Increment the reference counter for the object associating the reference
2675 * with the specified session.
2676 *
2677 * @returns IPRT status code.
2678 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2679 * couldn't be allocated. (If you see this you're not doing the right
2680 * thing and it won't ever work reliably.)
2681 *
2682 * @param pvObj The identifier returned by SUPR0ObjRegister().
2683 * @param pSession The session which is referencing the object.
2684 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2685 * first reference to an object in a session with this
2686 * argument set.
2687 *
2688 * @remarks The caller should not own any spinlocks and must carefully protect
2689 * itself against potential race with the destructor so freed memory
2690 * isn't accessed here.
2691 */
2692SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2693{
2694 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2695 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2696 int rc = VINF_SUCCESS;
2697 PSUPDRVUSAGE pUsagePre;
2698 PSUPDRVUSAGE pUsage;
2699
2700 /*
2701 * Validate the input.
2702 * Be ready for the destruction race (someone might be stuck in the
2703 * destructor waiting a lock we own).
2704 */
2705 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2706 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2707 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2708 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2709 VERR_INVALID_PARAMETER);
2710
2711 RTSpinlockAcquire(pDevExt->Spinlock);
2712
2713 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2714 {
2715 RTSpinlockRelease(pDevExt->Spinlock);
2716
2717 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2718 return VERR_WRONG_ORDER;
2719 }
2720
2721 /*
2722 * Preallocate the usage record if we can.
2723 */
2724 pUsagePre = pDevExt->pUsageFree;
2725 if (pUsagePre)
2726 pDevExt->pUsageFree = pUsagePre->pNext;
2727 else if (!fNoBlocking)
2728 {
2729 RTSpinlockRelease(pDevExt->Spinlock);
2730 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2731 if (!pUsagePre)
2732 return VERR_NO_MEMORY;
2733
2734 RTSpinlockAcquire(pDevExt->Spinlock);
2735 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2736 {
2737 RTSpinlockRelease(pDevExt->Spinlock);
2738
2739 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2740 return VERR_WRONG_ORDER;
2741 }
2742 }
2743
2744 /*
2745 * Reference the object.
2746 */
2747 pObj->cUsage++;
2748
2749 /*
2750 * Look for the session record.
2751 */
2752 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2753 {
2754 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2755 if (pUsage->pObj == pObj)
2756 break;
2757 }
2758 if (pUsage)
2759 pUsage->cUsage++;
2760 else if (pUsagePre)
2761 {
2762 /* create a new session record. */
2763 pUsagePre->cUsage = 1;
2764 pUsagePre->pObj = pObj;
2765 pUsagePre->pNext = pSession->pUsage;
2766 pSession->pUsage = pUsagePre;
2767 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2768
2769 pUsagePre = NULL;
2770 }
2771 else
2772 {
2773 pObj->cUsage--;
2774 rc = VERR_TRY_AGAIN;
2775 }
2776
2777 /*
2778 * Put any unused usage record into the free list..
2779 */
2780 if (pUsagePre)
2781 {
2782 pUsagePre->pNext = pDevExt->pUsageFree;
2783 pDevExt->pUsageFree = pUsagePre;
2784 }
2785
2786 RTSpinlockRelease(pDevExt->Spinlock);
2787
2788 return rc;
2789}
2790
2791
2792/**
2793 * Decrement / destroy a reference counter record for an object.
2794 *
2795 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2796 *
2797 * @returns IPRT status code.
2798 * @retval VINF_SUCCESS if not destroyed.
2799 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2800 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2801 * string builds.
2802 *
2803 * @param pvObj The identifier returned by SUPR0ObjRegister().
2804 * @param pSession The session which is referencing the object.
2805 */
2806SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2807{
2808 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2809 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2810 int rc = VERR_INVALID_PARAMETER;
2811 PSUPDRVUSAGE pUsage;
2812 PSUPDRVUSAGE pUsagePrev;
2813
2814 /*
2815 * Validate the input.
2816 */
2817 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2818 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2819 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2820 VERR_INVALID_PARAMETER);
2821
2822 /*
2823 * Acquire the spinlock and look for the usage record.
2824 */
2825 RTSpinlockAcquire(pDevExt->Spinlock);
2826
2827 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2828 pUsage;
2829 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2830 {
2831 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2832 if (pUsage->pObj == pObj)
2833 {
2834 rc = VINF_SUCCESS;
2835 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2836 if (pUsage->cUsage > 1)
2837 {
2838 pObj->cUsage--;
2839 pUsage->cUsage--;
2840 }
2841 else
2842 {
2843 /*
2844 * Free the session record.
2845 */
2846 if (pUsagePrev)
2847 pUsagePrev->pNext = pUsage->pNext;
2848 else
2849 pSession->pUsage = pUsage->pNext;
2850 pUsage->pNext = pDevExt->pUsageFree;
2851 pDevExt->pUsageFree = pUsage;
2852
2853 /* What about the object? */
2854 if (pObj->cUsage > 1)
2855 pObj->cUsage--;
2856 else
2857 {
2858 /*
2859 * Object is to be destroyed, unlink it.
2860 */
2861 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2862 rc = VINF_OBJECT_DESTROYED;
2863 if (pDevExt->pObjs == pObj)
2864 pDevExt->pObjs = pObj->pNext;
2865 else
2866 {
2867 PSUPDRVOBJ pObjPrev;
2868 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2869 if (pObjPrev->pNext == pObj)
2870 {
2871 pObjPrev->pNext = pObj->pNext;
2872 break;
2873 }
2874 Assert(pObjPrev);
2875 }
2876 }
2877 }
2878 break;
2879 }
2880 }
2881
2882 RTSpinlockRelease(pDevExt->Spinlock);
2883
2884 /*
2885 * Call the destructor and free the object if required.
2886 */
2887 if (rc == VINF_OBJECT_DESTROYED)
2888 {
2889 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2890 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2891 if (pObj->pfnDestructor)
2892 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2893 RTMemFree(pObj);
2894 }
2895
2896 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2897 return rc;
2898}
2899
2900
2901/**
2902 * Verifies that the current process can access the specified object.
2903 *
2904 * @returns The following IPRT status code:
2905 * @retval VINF_SUCCESS if access was granted.
2906 * @retval VERR_PERMISSION_DENIED if denied access.
2907 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2908 *
2909 * @param pvObj The identifier returned by SUPR0ObjRegister().
2910 * @param pSession The session which wishes to access the object.
2911 * @param pszObjName Object string name. This is optional and depends on the object type.
2912 *
2913 * @remark The caller is responsible for making sure the object isn't removed while
2914 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2915 */
2916SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2917{
2918 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2919 int rc;
2920
2921 /*
2922 * Validate the input.
2923 */
2924 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2925 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2926 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2927 VERR_INVALID_PARAMETER);
2928
2929 /*
2930 * Check access. (returns true if a decision has been made.)
2931 */
2932 rc = VERR_INTERNAL_ERROR;
2933 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2934 return rc;
2935
2936 /*
2937 * Default policy is to allow the user to access his own
2938 * stuff but nothing else.
2939 */
2940 if (pObj->CreatorUid == pSession->Uid)
2941 return VINF_SUCCESS;
2942 return VERR_PERMISSION_DENIED;
2943}
2944
2945
2946/**
2947 * Lock pages.
2948 *
2949 * @returns IPRT status code.
2950 * @param pSession Session to which the locked memory should be associated.
2951 * @param pvR3 Start of the memory range to lock.
2952 * This must be page aligned.
2953 * @param cPages Number of pages to lock.
2954 * @param paPages Where to put the physical addresses of locked memory.
2955 */
2956SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2957{
2958 int rc;
2959 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2960 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2961 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2962
2963 /*
2964 * Verify input.
2965 */
2966 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2967 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2968 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2969 || !pvR3)
2970 {
2971 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2972 return VERR_INVALID_PARAMETER;
2973 }
2974
2975 /*
2976 * Let IPRT do the job.
2977 */
2978 Mem.eType = MEMREF_TYPE_LOCKED;
2979 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
2980 if (RT_SUCCESS(rc))
2981 {
2982 uint32_t iPage = cPages;
2983 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
2984 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
2985
2986 while (iPage-- > 0)
2987 {
2988 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
2989 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
2990 {
2991 AssertMsgFailed(("iPage=%d\n", iPage));
2992 rc = VERR_INTERNAL_ERROR;
2993 break;
2994 }
2995 }
2996 if (RT_SUCCESS(rc))
2997 rc = supdrvMemAdd(&Mem, pSession);
2998 if (RT_FAILURE(rc))
2999 {
3000 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3001 AssertRC(rc2);
3002 }
3003 }
3004
3005 return rc;
3006}
3007
3008
3009/**
3010 * Unlocks the memory pointed to by pv.
3011 *
3012 * @returns IPRT status code.
3013 * @param pSession Session to which the memory was locked.
3014 * @param pvR3 Memory to unlock.
3015 */
3016SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3017{
3018 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3019 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3020 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3021}
3022
3023
3024/**
3025 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3026 * backing.
3027 *
3028 * @returns IPRT status code.
3029 * @param pSession Session data.
3030 * @param cPages Number of pages to allocate.
3031 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3032 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3033 * @param pHCPhys Where to put the physical address of allocated memory.
3034 */
3035SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3036{
3037 int rc;
3038 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3039 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3040
3041 /*
3042 * Validate input.
3043 */
3044 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3045 if (!ppvR3 || !ppvR0 || !pHCPhys)
3046 {
3047 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3048 pSession, ppvR0, ppvR3, pHCPhys));
3049 return VERR_INVALID_PARAMETER;
3050
3051 }
3052 if (cPages < 1 || cPages >= 256)
3053 {
3054 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3055 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3056 }
3057
3058 /*
3059 * Let IPRT do the job.
3060 */
3061 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3062 if (RT_SUCCESS(rc))
3063 {
3064 int rc2;
3065 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3066 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3067 if (RT_SUCCESS(rc))
3068 {
3069 Mem.eType = MEMREF_TYPE_CONT;
3070 rc = supdrvMemAdd(&Mem, pSession);
3071 if (!rc)
3072 {
3073 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3074 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3075 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3076 return 0;
3077 }
3078
3079 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3080 AssertRC(rc2);
3081 }
3082 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3083 AssertRC(rc2);
3084 }
3085
3086 return rc;
3087}
3088
3089
3090/**
3091 * Frees memory allocated using SUPR0ContAlloc().
3092 *
3093 * @returns IPRT status code.
3094 * @param pSession The session to which the memory was allocated.
3095 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3096 */
3097SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3098{
3099 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3100 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3101 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3102}
3103
3104
3105/**
3106 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3107 *
3108 * The memory isn't zeroed.
3109 *
3110 * @returns IPRT status code.
3111 * @param pSession Session data.
3112 * @param cPages Number of pages to allocate.
3113 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3114 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3115 * @param paPages Where to put the physical addresses of allocated memory.
3116 */
3117SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3118{
3119 unsigned iPage;
3120 int rc;
3121 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3122 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3123
3124 /*
3125 * Validate input.
3126 */
3127 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3128 if (!ppvR3 || !ppvR0 || !paPages)
3129 {
3130 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3131 pSession, ppvR3, ppvR0, paPages));
3132 return VERR_INVALID_PARAMETER;
3133
3134 }
3135 if (cPages < 1 || cPages >= 256)
3136 {
3137 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3138 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3139 }
3140
3141 /*
3142 * Let IPRT do the work.
3143 */
3144 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3145 if (RT_SUCCESS(rc))
3146 {
3147 int rc2;
3148 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3149 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3150 if (RT_SUCCESS(rc))
3151 {
3152 Mem.eType = MEMREF_TYPE_LOW;
3153 rc = supdrvMemAdd(&Mem, pSession);
3154 if (!rc)
3155 {
3156 for (iPage = 0; iPage < cPages; iPage++)
3157 {
3158 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3159 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3160 }
3161 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3162 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3163 return 0;
3164 }
3165
3166 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3167 AssertRC(rc2);
3168 }
3169
3170 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3171 AssertRC(rc2);
3172 }
3173
3174 return rc;
3175}
3176
3177
3178/**
3179 * Frees memory allocated using SUPR0LowAlloc().
3180 *
3181 * @returns IPRT status code.
3182 * @param pSession The session to which the memory was allocated.
3183 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3184 */
3185SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3186{
3187 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3188 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3189 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3190}
3191
3192
3193
3194/**
3195 * Allocates a chunk of memory with both R0 and R3 mappings.
3196 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3197 *
3198 * @returns IPRT status code.
3199 * @param pSession The session to associated the allocation with.
3200 * @param cb Number of bytes to allocate.
3201 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3202 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3203 */
3204SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3205{
3206 int rc;
3207 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3208 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3209
3210 /*
3211 * Validate input.
3212 */
3213 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3214 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3215 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3216 if (cb < 1 || cb >= _4M)
3217 {
3218 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3219 return VERR_INVALID_PARAMETER;
3220 }
3221
3222 /*
3223 * Let IPRT do the work.
3224 */
3225 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3226 if (RT_SUCCESS(rc))
3227 {
3228 int rc2;
3229 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3230 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3231 if (RT_SUCCESS(rc))
3232 {
3233 Mem.eType = MEMREF_TYPE_MEM;
3234 rc = supdrvMemAdd(&Mem, pSession);
3235 if (!rc)
3236 {
3237 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3238 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3239 return VINF_SUCCESS;
3240 }
3241
3242 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3243 AssertRC(rc2);
3244 }
3245
3246 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3247 AssertRC(rc2);
3248 }
3249
3250 return rc;
3251}
3252
3253
3254/**
3255 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3256 *
3257 * @returns IPRT status code.
3258 * @param pSession The session to which the memory was allocated.
3259 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3260 * @param paPages Where to store the physical addresses.
3261 */
3262SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3263{
3264 PSUPDRVBUNDLE pBundle;
3265 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3266
3267 /*
3268 * Validate input.
3269 */
3270 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3271 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3272 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3273
3274 /*
3275 * Search for the address.
3276 */
3277 RTSpinlockAcquire(pSession->Spinlock);
3278 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3279 {
3280 if (pBundle->cUsed > 0)
3281 {
3282 unsigned i;
3283 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3284 {
3285 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3286 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3287 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3288 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3289 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3290 )
3291 )
3292 {
3293 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3294 size_t iPage;
3295 for (iPage = 0; iPage < cPages; iPage++)
3296 {
3297 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3298 paPages[iPage].uReserved = 0;
3299 }
3300 RTSpinlockRelease(pSession->Spinlock);
3301 return VINF_SUCCESS;
3302 }
3303 }
3304 }
3305 }
3306 RTSpinlockRelease(pSession->Spinlock);
3307 Log(("Failed to find %p!!!\n", (void *)uPtr));
3308 return VERR_INVALID_PARAMETER;
3309}
3310
3311
3312/**
3313 * Free memory allocated by SUPR0MemAlloc().
3314 *
3315 * @returns IPRT status code.
3316 * @param pSession The session owning the allocation.
3317 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3318 */
3319SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3320{
3321 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3322 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3323 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3324}
3325
3326
3327/**
3328 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3329 *
3330 * The memory is fixed and it's possible to query the physical addresses using
3331 * SUPR0MemGetPhys().
3332 *
3333 * @returns IPRT status code.
3334 * @param pSession The session to associated the allocation with.
3335 * @param cPages The number of pages to allocate.
3336 * @param fFlags Flags, reserved for the future. Must be zero.
3337 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3338 * NULL if no ring-3 mapping.
3339 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3340 * NULL if no ring-0 mapping.
3341 * @param paPages Where to store the addresses of the pages. Optional.
3342 */
3343SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3344{
3345 int rc;
3346 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3347 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3348
3349 /*
3350 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3351 */
3352 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3353 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3354 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3355 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3356 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3357 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3358 {
3359 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3360 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3361 }
3362
3363 /*
3364 * Let IPRT do the work.
3365 */
3366 if (ppvR0)
3367 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3368 else
3369 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3370 if (RT_SUCCESS(rc))
3371 {
3372 int rc2;
3373 if (ppvR3)
3374 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3375 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3376 else
3377 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3378 if (RT_SUCCESS(rc))
3379 {
3380 Mem.eType = MEMREF_TYPE_PAGE;
3381 rc = supdrvMemAdd(&Mem, pSession);
3382 if (!rc)
3383 {
3384 if (ppvR3)
3385 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3386 if (ppvR0)
3387 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3388 if (paPages)
3389 {
3390 uint32_t iPage = cPages;
3391 while (iPage-- > 0)
3392 {
3393 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3394 Assert(paPages[iPage] != NIL_RTHCPHYS);
3395 }
3396 }
3397 return VINF_SUCCESS;
3398 }
3399
3400 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3401 AssertRC(rc2);
3402 }
3403
3404 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3405 AssertRC(rc2);
3406 }
3407 return rc;
3408}
3409
3410
3411/**
3412 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3413 * space.
3414 *
3415 * @returns IPRT status code.
3416 * @param pSession The session to associated the allocation with.
3417 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3418 * @param offSub Where to start mapping. Must be page aligned.
3419 * @param cbSub How much to map. Must be page aligned.
3420 * @param fFlags Flags, MBZ.
3421 * @param ppvR0 Where to return the address of the ring-0 mapping on
3422 * success.
3423 */
3424SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3425 uint32_t fFlags, PRTR0PTR ppvR0)
3426{
3427 int rc;
3428 PSUPDRVBUNDLE pBundle;
3429 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3430 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3431
3432 /*
3433 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3434 */
3435 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3436 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3437 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3438 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3439 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3440 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3441
3442 /*
3443 * Find the memory object.
3444 */
3445 RTSpinlockAcquire(pSession->Spinlock);
3446 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3447 {
3448 if (pBundle->cUsed > 0)
3449 {
3450 unsigned i;
3451 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3452 {
3453 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3454 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3455 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3456 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3457 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3458 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3459 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3460 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3461 {
3462 hMemObj = pBundle->aMem[i].MemObj;
3463 break;
3464 }
3465 }
3466 }
3467 }
3468 RTSpinlockRelease(pSession->Spinlock);
3469
3470 rc = VERR_INVALID_PARAMETER;
3471 if (hMemObj != NIL_RTR0MEMOBJ)
3472 {
3473 /*
3474 * Do some further input validations before calling IPRT.
3475 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3476 */
3477 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3478 if ( offSub < cbMemObj
3479 && cbSub <= cbMemObj
3480 && offSub + cbSub <= cbMemObj)
3481 {
3482 RTR0MEMOBJ hMapObj;
3483 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3484 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3485 if (RT_SUCCESS(rc))
3486 *ppvR0 = RTR0MemObjAddress(hMapObj);
3487 }
3488 else
3489 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3490
3491 }
3492 return rc;
3493}
3494
3495
3496/**
3497 * Changes the page level protection of one or more pages previously allocated
3498 * by SUPR0PageAllocEx.
3499 *
3500 * @returns IPRT status code.
3501 * @param pSession The session to associated the allocation with.
3502 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3503 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3504 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3505 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3506 * @param offSub Where to start changing. Must be page aligned.
3507 * @param cbSub How much to change. Must be page aligned.
3508 * @param fProt The new page level protection, see RTMEM_PROT_*.
3509 */
3510SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3511{
3512 int rc;
3513 PSUPDRVBUNDLE pBundle;
3514 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3515 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3516 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3517
3518 /*
3519 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3520 */
3521 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3522 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3523 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3524 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3525 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3526
3527 /*
3528 * Find the memory object.
3529 */
3530 RTSpinlockAcquire(pSession->Spinlock);
3531 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3532 {
3533 if (pBundle->cUsed > 0)
3534 {
3535 unsigned i;
3536 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3537 {
3538 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3539 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3540 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3541 || pvR3 == NIL_RTR3PTR)
3542 && ( pvR0 == NIL_RTR0PTR
3543 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3544 && ( pvR3 == NIL_RTR3PTR
3545 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3546 {
3547 if (pvR0 != NIL_RTR0PTR)
3548 hMemObjR0 = pBundle->aMem[i].MemObj;
3549 if (pvR3 != NIL_RTR3PTR)
3550 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3551 break;
3552 }
3553 }
3554 }
3555 }
3556 RTSpinlockRelease(pSession->Spinlock);
3557
3558 rc = VERR_INVALID_PARAMETER;
3559 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3560 || hMemObjR3 != NIL_RTR0MEMOBJ)
3561 {
3562 /*
3563 * Do some further input validations before calling IPRT.
3564 */
3565 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3566 if ( offSub < cbMemObj
3567 && cbSub <= cbMemObj
3568 && offSub + cbSub <= cbMemObj)
3569 {
3570 rc = VINF_SUCCESS;
3571 if (hMemObjR3 != NIL_RTR0PTR)
3572 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3573 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3574 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3575 }
3576 else
3577 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3578
3579 }
3580 return rc;
3581
3582}
3583
3584
3585/**
3586 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3587 *
3588 * @returns IPRT status code.
3589 * @param pSession The session owning the allocation.
3590 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3591 * SUPR0PageAllocEx().
3592 */
3593SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3594{
3595 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3596 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3597 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3598}
3599
3600
3601/**
3602 * Gets the paging mode of the current CPU.
3603 *
3604 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3605 */
3606SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3607{
3608 SUPPAGINGMODE enmMode;
3609
3610 RTR0UINTREG cr0 = ASMGetCR0();
3611 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3612 enmMode = SUPPAGINGMODE_INVALID;
3613 else
3614 {
3615 RTR0UINTREG cr4 = ASMGetCR4();
3616 uint32_t fNXEPlusLMA = 0;
3617 if (cr4 & X86_CR4_PAE)
3618 {
3619 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3620 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3621 {
3622 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3623 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3624 fNXEPlusLMA |= RT_BIT(0);
3625 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3626 fNXEPlusLMA |= RT_BIT(1);
3627 }
3628 }
3629
3630 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3631 {
3632 case 0:
3633 enmMode = SUPPAGINGMODE_32_BIT;
3634 break;
3635
3636 case X86_CR4_PGE:
3637 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3638 break;
3639
3640 case X86_CR4_PAE:
3641 enmMode = SUPPAGINGMODE_PAE;
3642 break;
3643
3644 case X86_CR4_PAE | RT_BIT(0):
3645 enmMode = SUPPAGINGMODE_PAE_NX;
3646 break;
3647
3648 case X86_CR4_PAE | X86_CR4_PGE:
3649 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3650 break;
3651
3652 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3653 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3654 break;
3655
3656 case RT_BIT(1) | X86_CR4_PAE:
3657 enmMode = SUPPAGINGMODE_AMD64;
3658 break;
3659
3660 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3661 enmMode = SUPPAGINGMODE_AMD64_NX;
3662 break;
3663
3664 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3665 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3666 break;
3667
3668 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3669 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3670 break;
3671
3672 default:
3673 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3674 enmMode = SUPPAGINGMODE_INVALID;
3675 break;
3676 }
3677 }
3678 return enmMode;
3679}
3680
3681
3682/**
3683 * Enables or disabled hardware virtualization extensions using native OS APIs.
3684 *
3685 * @returns VBox status code.
3686 * @retval VINF_SUCCESS on success.
3687 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3688 *
3689 * @param fEnable Whether to enable or disable.
3690 */
3691SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3692{
3693#ifdef RT_OS_DARWIN
3694 return supdrvOSEnableVTx(fEnable);
3695#else
3696 return VERR_NOT_SUPPORTED;
3697#endif
3698}
3699
3700
3701/**
3702 * Suspends hardware virtualization extensions using the native OS API.
3703 *
3704 * This is called prior to entering raw-mode context.
3705 *
3706 * @returns @c true if suspended, @c false if not.
3707 */
3708SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3709{
3710#ifdef RT_OS_DARWIN
3711 return supdrvOSSuspendVTxOnCpu();
3712#else
3713 return false;
3714#endif
3715}
3716
3717
3718/**
3719 * Resumes hardware virtualization extensions using the native OS API.
3720 *
3721 * This is called after to entering raw-mode context.
3722 *
3723 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3724 */
3725SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3726{
3727#ifdef RT_OS_DARWIN
3728 supdrvOSResumeVTxOnCpu(fSuspended);
3729#else
3730 Assert(!fSuspended);
3731#endif
3732}
3733
3734
3735/**
3736 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3737 *
3738 * @returns VBox status code.
3739 * @retval VERR_VMX_NO_VMX
3740 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3741 * @retval VERR_VMX_MSR_VMXON_DISABLED
3742 * @retval VERR_VMX_MSR_LOCKING_FAILED
3743 * @retval VERR_SVM_NO_SVM
3744 * @retval VERR_SVM_DISABLED
3745 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3746 * (centaur) CPU.
3747 *
3748 * @param pSession The session handle.
3749 * @param pfCaps Where to store the capabilities.
3750 */
3751SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3752{
3753 int rc = VERR_UNSUPPORTED_CPU;
3754 bool fIsSmxModeAmbiguous = false;
3755 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3756
3757 /*
3758 * Input validation.
3759 */
3760 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3761 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3762
3763 *pfCaps = 0;
3764 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3765 RTThreadPreemptDisable(&PreemptState);
3766 if (ASMHasCpuId())
3767 {
3768 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3769 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3770
3771 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3772 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3773
3774 if ( ASMIsValidStdRange(uMaxId)
3775 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3776 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3777 )
3778 {
3779 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3780 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3781 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3782 )
3783 {
3784 /** @todo Unify code with hmR0InitIntelCpu(). */
3785 uint64_t u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3786 bool const fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3787 bool fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3788 bool fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3789 bool fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3790
3791 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3792 if (fMsrLocked)
3793 {
3794 if (fVmxAllowed && fSmxVmxAllowed)
3795 rc = VINF_SUCCESS;
3796 else if (!fVmxAllowed && !fSmxVmxAllowed)
3797 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3798 else if (!fMaybeSmxMode)
3799 {
3800 if (fVmxAllowed)
3801 rc = VINF_SUCCESS;
3802 else
3803 rc = VERR_VMX_MSR_VMXON_DISABLED;
3804 }
3805 else
3806 {
3807 /*
3808 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3809 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3810 * See @bugref{6873}.
3811 */
3812 Assert(fMaybeSmxMode == true);
3813 fIsSmxModeAmbiguous = true;
3814 rc = VINF_SUCCESS;
3815 }
3816 }
3817 else
3818 {
3819 /*
3820 * MSR is not yet locked; we can change it ourselves here.
3821 * Once the lock bit is set, this MSR can no longer be modified.
3822 *
3823 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3824 * accurately. See @bugref{6873}.
3825 */
3826 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3827 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3828 | MSR_IA32_FEATURE_CONTROL_VMXON;
3829 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3830
3831 /* Verify. */
3832 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3833 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3834 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3835 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3836 if (fSmxVmxAllowed && fVmxAllowed)
3837 rc = VINF_SUCCESS;
3838 else
3839 rc = VERR_VMX_MSR_LOCKING_FAILED;
3840 }
3841
3842 if (rc == VINF_SUCCESS)
3843 {
3844 VMXCAPABILITY vtCaps;
3845
3846 *pfCaps |= SUPVTCAPS_VT_X;
3847
3848 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3849 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3850 {
3851 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3852 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3853 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3854 }
3855 }
3856 }
3857 else
3858 rc = VERR_VMX_NO_VMX;
3859 }
3860 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3861 && ASMIsValidStdRange(uMaxId))
3862 {
3863 uint32_t fExtFeaturesEcx, uExtMaxId;
3864 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3865 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3866 if ( ASMIsValidExtRange(uExtMaxId)
3867 && uExtMaxId >= 0x8000000a
3868 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3869 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3870 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3871 )
3872 {
3873 /* Check if SVM is disabled */
3874 uint64_t u64FeatMsr = ASMRdMsr(MSR_K8_VM_CR);
3875 if (!(u64FeatMsr & MSR_K8_VM_CR_SVM_DISABLE))
3876 {
3877 uint32_t fSvmFeatures;
3878 *pfCaps |= SUPVTCAPS_AMD_V;
3879
3880 /* Query AMD-V features. */
3881 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3882 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3883 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3884
3885 rc = VINF_SUCCESS;
3886 }
3887 else
3888 rc = VERR_SVM_DISABLED;
3889 }
3890 else
3891 rc = VERR_SVM_NO_SVM;
3892 }
3893 }
3894
3895 RTThreadPreemptRestore(&PreemptState);
3896 if (fIsSmxModeAmbiguous)
3897 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3898 return rc;
3899}
3900
3901
3902/**
3903 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3904 * updating.
3905 *
3906 * @param pGip Pointer to the GIP.
3907 * @param pGipCpu The per CPU structure for this CPU.
3908 * @param u64NanoTS The current time.
3909 */
3910static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3911{
3912 pGipCpu->u64TSC = SUPReadTsc() - pGipCpu->u32UpdateIntervalTSC;
3913 pGipCpu->u64NanoTS = u64NanoTS;
3914}
3915
3916
3917/**
3918 * Set the current TSC and NanoTS value for the CPU.
3919 *
3920 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
3921 * @param pvUser1 Pointer to the ring-0 GIP mapping.
3922 * @param pvUser2 Pointer to the variable holding the current time.
3923 */
3924static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3925{
3926 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
3927 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
3928
3929 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
3930 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
3931
3932 NOREF(pvUser2);
3933 NOREF(idCpu);
3934}
3935
3936
3937/**
3938 * Increase the timer freqency on hosts where this is possible (NT).
3939 *
3940 * The idea is that more interrupts is better for us... Also, it's better than
3941 * we increase the timer frequence, because we might end up getting inaccuract
3942 * callbacks if someone else does it.
3943 *
3944 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
3945 */
3946static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
3947{
3948 if (pDevExt->u32SystemTimerGranularityGrant == 0)
3949 {
3950 uint32_t u32SystemResolution;
3951 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3952 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3953 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
3954 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
3955 )
3956 {
3957 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3958 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3959 }
3960 }
3961}
3962
3963
3964/**
3965 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
3966 *
3967 * @param pDevExt Clears u32SystemTimerGranularityGrant.
3968 */
3969static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
3970{
3971 if (pDevExt->u32SystemTimerGranularityGrant)
3972 {
3973 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
3974 AssertRC(rc2);
3975 pDevExt->u32SystemTimerGranularityGrant = 0;
3976 }
3977}
3978
3979
3980/**
3981 * Maps the GIP into userspace and/or get the physical address of the GIP.
3982 *
3983 * @returns IPRT status code.
3984 * @param pSession Session to which the GIP mapping should belong.
3985 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
3986 * @param pHCPhysGip Where to store the physical address. (optional)
3987 *
3988 * @remark There is no reference counting on the mapping, so one call to this function
3989 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
3990 * and remove the session as a GIP user.
3991 */
3992SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
3993{
3994 int rc;
3995 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
3996 RTR3PTR pGipR3 = NIL_RTR3PTR;
3997 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3998 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
3999
4000 /*
4001 * Validate
4002 */
4003 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4004 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
4005 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
4006
4007#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4008 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4009#else
4010 RTSemFastMutexRequest(pDevExt->mtxGip);
4011#endif
4012 if (pDevExt->pGip)
4013 {
4014 /*
4015 * Map it?
4016 */
4017 rc = VINF_SUCCESS;
4018 if (ppGipR3)
4019 {
4020 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4021 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
4022 RTMEM_PROT_READ, RTR0ProcHandleSelf());
4023 if (RT_SUCCESS(rc))
4024 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
4025 }
4026
4027 /*
4028 * Get physical address.
4029 */
4030 if (pHCPhysGip && RT_SUCCESS(rc))
4031 HCPhys = pDevExt->HCPhysGip;
4032
4033 /*
4034 * Reference globally.
4035 */
4036 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4037 {
4038 pSession->fGipReferenced = 1;
4039 pDevExt->cGipUsers++;
4040 if (pDevExt->cGipUsers == 1)
4041 {
4042 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4043 uint64_t u64NanoTS;
4044
4045 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4046
4047 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
4048
4049 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4050 {
4051 unsigned i;
4052 for (i = 0; i < pGipR0->cCpus; i++)
4053 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4054 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4055 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4056 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4057 }
4058
4059 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4060 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
4061 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4062 || RTMpGetOnlineCount() == 1)
4063 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
4064 else
4065 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4066
4067#ifndef DO_NOT_START_GIP
4068 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
4069#endif
4070 rc = VINF_SUCCESS;
4071 }
4072 }
4073 }
4074 else
4075 {
4076 rc = VERR_GENERAL_FAILURE;
4077 Log(("SUPR0GipMap: GIP is not available!\n"));
4078 }
4079#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4080 RTSemMutexRelease(pDevExt->mtxGip);
4081#else
4082 RTSemFastMutexRelease(pDevExt->mtxGip);
4083#endif
4084
4085 /*
4086 * Write returns.
4087 */
4088 if (pHCPhysGip)
4089 *pHCPhysGip = HCPhys;
4090 if (ppGipR3)
4091 *ppGipR3 = pGipR3;
4092
4093#ifdef DEBUG_DARWIN_GIP
4094 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4095#else
4096 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4097#endif
4098 return rc;
4099}
4100
4101
4102/**
4103 * Unmaps any user mapping of the GIP and terminates all GIP access
4104 * from this session.
4105 *
4106 * @returns IPRT status code.
4107 * @param pSession Session to which the GIP mapping should belong.
4108 */
4109SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4110{
4111 int rc = VINF_SUCCESS;
4112 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4113#ifdef DEBUG_DARWIN_GIP
4114 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4115 pSession,
4116 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4117 pSession->GipMapObjR3));
4118#else
4119 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4120#endif
4121 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4122
4123#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4124 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4125#else
4126 RTSemFastMutexRequest(pDevExt->mtxGip);
4127#endif
4128
4129 /*
4130 * Unmap anything?
4131 */
4132 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4133 {
4134 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4135 AssertRC(rc);
4136 if (RT_SUCCESS(rc))
4137 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4138 }
4139
4140 /*
4141 * Dereference global GIP.
4142 */
4143 if (pSession->fGipReferenced && !rc)
4144 {
4145 pSession->fGipReferenced = 0;
4146 if ( pDevExt->cGipUsers > 0
4147 && !--pDevExt->cGipUsers)
4148 {
4149 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4150#ifndef DO_NOT_START_GIP
4151 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4152#endif
4153 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
4154 }
4155 }
4156
4157#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4158 RTSemMutexRelease(pDevExt->mtxGip);
4159#else
4160 RTSemFastMutexRelease(pDevExt->mtxGip);
4161#endif
4162
4163 return rc;
4164}
4165
4166
4167/**
4168 * Gets the GIP pointer.
4169 *
4170 * @returns Pointer to the GIP or NULL.
4171 */
4172SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4173{
4174 return g_pSUPGlobalInfoPage;
4175}
4176
4177
4178/**
4179 * Register a component factory with the support driver.
4180 *
4181 * This is currently restricted to kernel sessions only.
4182 *
4183 * @returns VBox status code.
4184 * @retval VINF_SUCCESS on success.
4185 * @retval VERR_NO_MEMORY if we're out of memory.
4186 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4187 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4188 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4189 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4190 *
4191 * @param pSession The SUPDRV session (must be a ring-0 session).
4192 * @param pFactory Pointer to the component factory registration structure.
4193 *
4194 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4195 */
4196SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4197{
4198 PSUPDRVFACTORYREG pNewReg;
4199 const char *psz;
4200 int rc;
4201
4202 /*
4203 * Validate parameters.
4204 */
4205 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4206 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4207 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4208 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4209 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4210 AssertReturn(psz, VERR_INVALID_PARAMETER);
4211
4212 /*
4213 * Allocate and initialize a new registration structure.
4214 */
4215 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4216 if (pNewReg)
4217 {
4218 pNewReg->pNext = NULL;
4219 pNewReg->pFactory = pFactory;
4220 pNewReg->pSession = pSession;
4221 pNewReg->cchName = psz - &pFactory->szName[0];
4222
4223 /*
4224 * Add it to the tail of the list after checking for prior registration.
4225 */
4226 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4227 if (RT_SUCCESS(rc))
4228 {
4229 PSUPDRVFACTORYREG pPrev = NULL;
4230 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4231 while (pCur && pCur->pFactory != pFactory)
4232 {
4233 pPrev = pCur;
4234 pCur = pCur->pNext;
4235 }
4236 if (!pCur)
4237 {
4238 if (pPrev)
4239 pPrev->pNext = pNewReg;
4240 else
4241 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4242 rc = VINF_SUCCESS;
4243 }
4244 else
4245 rc = VERR_ALREADY_EXISTS;
4246
4247 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4248 }
4249
4250 if (RT_FAILURE(rc))
4251 RTMemFree(pNewReg);
4252 }
4253 else
4254 rc = VERR_NO_MEMORY;
4255 return rc;
4256}
4257
4258
4259/**
4260 * Deregister a component factory.
4261 *
4262 * @returns VBox status code.
4263 * @retval VINF_SUCCESS on success.
4264 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4265 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4266 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4267 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4268 *
4269 * @param pSession The SUPDRV session (must be a ring-0 session).
4270 * @param pFactory Pointer to the component factory registration structure
4271 * previously passed SUPR0ComponentRegisterFactory().
4272 *
4273 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4274 */
4275SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4276{
4277 int rc;
4278
4279 /*
4280 * Validate parameters.
4281 */
4282 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4283 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4284 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4285
4286 /*
4287 * Take the lock and look for the registration record.
4288 */
4289 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4290 if (RT_SUCCESS(rc))
4291 {
4292 PSUPDRVFACTORYREG pPrev = NULL;
4293 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4294 while (pCur && pCur->pFactory != pFactory)
4295 {
4296 pPrev = pCur;
4297 pCur = pCur->pNext;
4298 }
4299 if (pCur)
4300 {
4301 if (!pPrev)
4302 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4303 else
4304 pPrev->pNext = pCur->pNext;
4305
4306 pCur->pNext = NULL;
4307 pCur->pFactory = NULL;
4308 pCur->pSession = NULL;
4309 rc = VINF_SUCCESS;
4310 }
4311 else
4312 rc = VERR_NOT_FOUND;
4313
4314 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4315
4316 RTMemFree(pCur);
4317 }
4318 return rc;
4319}
4320
4321
4322/**
4323 * Queries a component factory.
4324 *
4325 * @returns VBox status code.
4326 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4327 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4328 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4329 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4330 *
4331 * @param pSession The SUPDRV session.
4332 * @param pszName The name of the component factory.
4333 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4334 * @param ppvFactoryIf Where to store the factory interface.
4335 */
4336SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4337{
4338 const char *pszEnd;
4339 size_t cchName;
4340 int rc;
4341
4342 /*
4343 * Validate parameters.
4344 */
4345 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4346
4347 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4348 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4349 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4350 cchName = pszEnd - pszName;
4351
4352 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4353 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4354 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4355
4356 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4357 *ppvFactoryIf = NULL;
4358
4359 /*
4360 * Take the lock and try all factories by this name.
4361 */
4362 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4363 if (RT_SUCCESS(rc))
4364 {
4365 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4366 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4367 while (pCur)
4368 {
4369 if ( pCur->cchName == cchName
4370 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4371 {
4372 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4373 if (pvFactory)
4374 {
4375 *ppvFactoryIf = pvFactory;
4376 rc = VINF_SUCCESS;
4377 break;
4378 }
4379 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4380 }
4381
4382 /* next */
4383 pCur = pCur->pNext;
4384 }
4385
4386 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4387 }
4388 return rc;
4389}
4390
4391
4392/**
4393 * Adds a memory object to the session.
4394 *
4395 * @returns IPRT status code.
4396 * @param pMem Memory tracking structure containing the
4397 * information to track.
4398 * @param pSession The session.
4399 */
4400static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4401{
4402 PSUPDRVBUNDLE pBundle;
4403
4404 /*
4405 * Find free entry and record the allocation.
4406 */
4407 RTSpinlockAcquire(pSession->Spinlock);
4408 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4409 {
4410 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4411 {
4412 unsigned i;
4413 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4414 {
4415 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4416 {
4417 pBundle->cUsed++;
4418 pBundle->aMem[i] = *pMem;
4419 RTSpinlockRelease(pSession->Spinlock);
4420 return VINF_SUCCESS;
4421 }
4422 }
4423 AssertFailed(); /* !!this can't be happening!!! */
4424 }
4425 }
4426 RTSpinlockRelease(pSession->Spinlock);
4427
4428 /*
4429 * Need to allocate a new bundle.
4430 * Insert into the last entry in the bundle.
4431 */
4432 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4433 if (!pBundle)
4434 return VERR_NO_MEMORY;
4435
4436 /* take last entry. */
4437 pBundle->cUsed++;
4438 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4439
4440 /* insert into list. */
4441 RTSpinlockAcquire(pSession->Spinlock);
4442 pBundle->pNext = pSession->Bundle.pNext;
4443 pSession->Bundle.pNext = pBundle;
4444 RTSpinlockRelease(pSession->Spinlock);
4445
4446 return VINF_SUCCESS;
4447}
4448
4449
4450/**
4451 * Releases a memory object referenced by pointer and type.
4452 *
4453 * @returns IPRT status code.
4454 * @param pSession Session data.
4455 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4456 * @param eType Memory type.
4457 */
4458static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4459{
4460 PSUPDRVBUNDLE pBundle;
4461
4462 /*
4463 * Validate input.
4464 */
4465 if (!uPtr)
4466 {
4467 Log(("Illegal address %p\n", (void *)uPtr));
4468 return VERR_INVALID_PARAMETER;
4469 }
4470
4471 /*
4472 * Search for the address.
4473 */
4474 RTSpinlockAcquire(pSession->Spinlock);
4475 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4476 {
4477 if (pBundle->cUsed > 0)
4478 {
4479 unsigned i;
4480 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4481 {
4482 if ( pBundle->aMem[i].eType == eType
4483 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4484 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4485 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4486 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4487 )
4488 {
4489 /* Make a copy of it and release it outside the spinlock. */
4490 SUPDRVMEMREF Mem = pBundle->aMem[i];
4491 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4492 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4493 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4494 RTSpinlockRelease(pSession->Spinlock);
4495
4496 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4497 {
4498 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4499 AssertRC(rc); /** @todo figure out how to handle this. */
4500 }
4501 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4502 {
4503 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4504 AssertRC(rc); /** @todo figure out how to handle this. */
4505 }
4506 return VINF_SUCCESS;
4507 }
4508 }
4509 }
4510 }
4511 RTSpinlockRelease(pSession->Spinlock);
4512 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4513 return VERR_INVALID_PARAMETER;
4514}
4515
4516
4517/**
4518 * Opens an image. If it's the first time it's opened the call must upload
4519 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4520 *
4521 * This is the 1st step of the loading.
4522 *
4523 * @returns IPRT status code.
4524 * @param pDevExt Device globals.
4525 * @param pSession Session data.
4526 * @param pReq The open request.
4527 */
4528static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4529{
4530 int rc;
4531 PSUPDRVLDRIMAGE pImage;
4532 void *pv;
4533 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4534 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4535
4536 /*
4537 * Check if we got an instance of the image already.
4538 */
4539 supdrvLdrLock(pDevExt);
4540 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4541 {
4542 if ( pImage->szName[cchName] == '\0'
4543 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4544 {
4545 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4546 {
4547 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4548 pImage->cUsage++;
4549 pReq->u.Out.pvImageBase = pImage->pvImage;
4550 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4551 pReq->u.Out.fNativeLoader = pImage->fNative;
4552 supdrvLdrAddUsage(pSession, pImage);
4553 supdrvLdrUnlock(pDevExt);
4554 return VINF_SUCCESS;
4555 }
4556 supdrvLdrUnlock(pDevExt);
4557 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4558 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4559 }
4560 }
4561 /* (not found - add it!) */
4562
4563 /*
4564 * Allocate memory.
4565 */
4566 Assert(cchName < sizeof(pImage->szName));
4567 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4568 if (!pv)
4569 {
4570 supdrvLdrUnlock(pDevExt);
4571 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4572 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4573 }
4574
4575 /*
4576 * Setup and link in the LDR stuff.
4577 */
4578 pImage = (PSUPDRVLDRIMAGE)pv;
4579 pImage->pvImage = NULL;
4580 pImage->pvImageAlloc = NULL;
4581 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4582 pImage->cbImageBits = pReq->u.In.cbImageBits;
4583 pImage->cSymbols = 0;
4584 pImage->paSymbols = NULL;
4585 pImage->pachStrTab = NULL;
4586 pImage->cbStrTab = 0;
4587 pImage->pfnModuleInit = NULL;
4588 pImage->pfnModuleTerm = NULL;
4589 pImage->pfnServiceReqHandler = NULL;
4590 pImage->uState = SUP_IOCTL_LDR_OPEN;
4591 pImage->cUsage = 1;
4592 pImage->pDevExt = pDevExt;
4593 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4594
4595 /*
4596 * Try load it using the native loader, if that isn't supported, fall back
4597 * on the older method.
4598 */
4599 pImage->fNative = true;
4600 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4601 if (rc == VERR_NOT_SUPPORTED)
4602 {
4603 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4604 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4605 pImage->fNative = false;
4606 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4607 }
4608 if (RT_FAILURE(rc))
4609 {
4610 supdrvLdrUnlock(pDevExt);
4611 RTMemFree(pImage);
4612 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4613 return rc;
4614 }
4615 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4616
4617 /*
4618 * Link it.
4619 */
4620 pImage->pNext = pDevExt->pLdrImages;
4621 pDevExt->pLdrImages = pImage;
4622
4623 supdrvLdrAddUsage(pSession, pImage);
4624
4625 pReq->u.Out.pvImageBase = pImage->pvImage;
4626 pReq->u.Out.fNeedsLoading = true;
4627 pReq->u.Out.fNativeLoader = pImage->fNative;
4628 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4629
4630 supdrvLdrUnlock(pDevExt);
4631 return VINF_SUCCESS;
4632}
4633
4634
4635/**
4636 * Worker that validates a pointer to an image entrypoint.
4637 *
4638 * @returns IPRT status code.
4639 * @param pDevExt The device globals.
4640 * @param pImage The loader image.
4641 * @param pv The pointer into the image.
4642 * @param fMayBeNull Whether it may be NULL.
4643 * @param pszWhat What is this entrypoint? (for logging)
4644 * @param pbImageBits The image bits prepared by ring-3.
4645 *
4646 * @remarks Will leave the lock on failure.
4647 */
4648static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4649 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4650{
4651 if (!fMayBeNull || pv)
4652 {
4653 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4654 {
4655 supdrvLdrUnlock(pDevExt);
4656 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4657 return VERR_INVALID_PARAMETER;
4658 }
4659
4660 if (pImage->fNative)
4661 {
4662 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4663 if (RT_FAILURE(rc))
4664 {
4665 supdrvLdrUnlock(pDevExt);
4666 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4667 return rc;
4668 }
4669 }
4670 }
4671 return VINF_SUCCESS;
4672}
4673
4674
4675/**
4676 * Loads the image bits.
4677 *
4678 * This is the 2nd step of the loading.
4679 *
4680 * @returns IPRT status code.
4681 * @param pDevExt Device globals.
4682 * @param pSession Session data.
4683 * @param pReq The request.
4684 */
4685static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
4686{
4687 PSUPDRVLDRUSAGE pUsage;
4688 PSUPDRVLDRIMAGE pImage;
4689 int rc;
4690 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
4691
4692 /*
4693 * Find the ldr image.
4694 */
4695 supdrvLdrLock(pDevExt);
4696 pUsage = pSession->pLdrUsage;
4697 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4698 pUsage = pUsage->pNext;
4699 if (!pUsage)
4700 {
4701 supdrvLdrUnlock(pDevExt);
4702 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
4703 return VERR_INVALID_HANDLE;
4704 }
4705 pImage = pUsage->pImage;
4706
4707 /*
4708 * Validate input.
4709 */
4710 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
4711 || pImage->cbImageBits != pReq->u.In.cbImageBits)
4712 {
4713 supdrvLdrUnlock(pDevExt);
4714 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
4715 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
4716 return VERR_INVALID_HANDLE;
4717 }
4718
4719 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
4720 {
4721 unsigned uState = pImage->uState;
4722 supdrvLdrUnlock(pDevExt);
4723 if (uState != SUP_IOCTL_LDR_LOAD)
4724 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
4725 return VERR_ALREADY_LOADED;
4726 }
4727
4728 switch (pReq->u.In.eEPType)
4729 {
4730 case SUPLDRLOADEP_NOTHING:
4731 break;
4732
4733 case SUPLDRLOADEP_VMMR0:
4734 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
4735 if (RT_SUCCESS(rc))
4736 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
4737 if (RT_SUCCESS(rc))
4738 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
4739 if (RT_SUCCESS(rc))
4740 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
4741 if (RT_FAILURE(rc))
4742 return rc;
4743 break;
4744
4745 case SUPLDRLOADEP_SERVICE:
4746 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
4747 if (RT_FAILURE(rc))
4748 return rc;
4749 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
4750 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
4751 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
4752 {
4753 supdrvLdrUnlock(pDevExt);
4754 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
4755 pImage->pvImage, pReq->u.In.cbImageWithTabs,
4756 pReq->u.In.EP.Service.apvReserved[0],
4757 pReq->u.In.EP.Service.apvReserved[1],
4758 pReq->u.In.EP.Service.apvReserved[2]));
4759 return VERR_INVALID_PARAMETER;
4760 }
4761 break;
4762
4763 default:
4764 supdrvLdrUnlock(pDevExt);
4765 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
4766 return VERR_INVALID_PARAMETER;
4767 }
4768
4769 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
4770 if (RT_FAILURE(rc))
4771 return rc;
4772 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
4773 if (RT_FAILURE(rc))
4774 return rc;
4775
4776 /*
4777 * Allocate and copy the tables.
4778 * (No need to do try/except as this is a buffered request.)
4779 */
4780 pImage->cbStrTab = pReq->u.In.cbStrTab;
4781 if (pImage->cbStrTab)
4782 {
4783 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
4784 if (pImage->pachStrTab)
4785 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
4786 else
4787 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
4788 }
4789
4790 pImage->cSymbols = pReq->u.In.cSymbols;
4791 if (RT_SUCCESS(rc) && pImage->cSymbols)
4792 {
4793 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
4794 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
4795 if (pImage->paSymbols)
4796 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
4797 else
4798 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
4799 }
4800
4801 /*
4802 * Copy the bits / complete native loading.
4803 */
4804 if (RT_SUCCESS(rc))
4805 {
4806 pImage->uState = SUP_IOCTL_LDR_LOAD;
4807 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
4808 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
4809
4810 if (pImage->fNative)
4811 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
4812 else
4813 {
4814 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
4815 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
4816 }
4817 }
4818
4819 /*
4820 * Update any entry points.
4821 */
4822 if (RT_SUCCESS(rc))
4823 {
4824 switch (pReq->u.In.eEPType)
4825 {
4826 default:
4827 case SUPLDRLOADEP_NOTHING:
4828 rc = VINF_SUCCESS;
4829 break;
4830 case SUPLDRLOADEP_VMMR0:
4831 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
4832 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
4833 break;
4834 case SUPLDRLOADEP_SERVICE:
4835 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
4836 rc = VINF_SUCCESS;
4837 break;
4838 }
4839 }
4840
4841 /*
4842 * On success call the module initialization.
4843 */
4844 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
4845 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
4846 {
4847 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
4848 pDevExt->pLdrInitImage = pImage;
4849 pDevExt->hLdrInitThread = RTThreadNativeSelf();
4850 rc = pImage->pfnModuleInit(pImage);
4851 pDevExt->pLdrInitImage = NULL;
4852 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
4853 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
4854 supdrvLdrUnsetVMMR0EPs(pDevExt);
4855 }
4856 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
4857
4858 if (RT_FAILURE(rc))
4859 {
4860 /* Inform the tracing component in case ModuleInit registered TPs. */
4861 supdrvTracerModuleUnloading(pDevExt, pImage);
4862
4863 pImage->uState = SUP_IOCTL_LDR_OPEN;
4864 pImage->pfnModuleInit = NULL;
4865 pImage->pfnModuleTerm = NULL;
4866 pImage->pfnServiceReqHandler= NULL;
4867 pImage->cbStrTab = 0;
4868 RTMemFree(pImage->pachStrTab);
4869 pImage->pachStrTab = NULL;
4870 RTMemFree(pImage->paSymbols);
4871 pImage->paSymbols = NULL;
4872 pImage->cSymbols = 0;
4873 }
4874
4875 supdrvLdrUnlock(pDevExt);
4876 return rc;
4877}
4878
4879
4880/**
4881 * Frees a previously loaded (prep'ed) image.
4882 *
4883 * @returns IPRT status code.
4884 * @param pDevExt Device globals.
4885 * @param pSession Session data.
4886 * @param pReq The request.
4887 */
4888static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
4889{
4890 int rc;
4891 PSUPDRVLDRUSAGE pUsagePrev;
4892 PSUPDRVLDRUSAGE pUsage;
4893 PSUPDRVLDRIMAGE pImage;
4894 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
4895
4896 /*
4897 * Find the ldr image.
4898 */
4899 supdrvLdrLock(pDevExt);
4900 pUsagePrev = NULL;
4901 pUsage = pSession->pLdrUsage;
4902 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4903 {
4904 pUsagePrev = pUsage;
4905 pUsage = pUsage->pNext;
4906 }
4907 if (!pUsage)
4908 {
4909 supdrvLdrUnlock(pDevExt);
4910 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
4911 return VERR_INVALID_HANDLE;
4912 }
4913
4914 /*
4915 * Check if we can remove anything.
4916 */
4917 rc = VINF_SUCCESS;
4918 pImage = pUsage->pImage;
4919 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
4920 {
4921 /*
4922 * Check if there are any objects with destructors in the image, if
4923 * so leave it for the session cleanup routine so we get a chance to
4924 * clean things up in the right order and not leave them all dangling.
4925 */
4926 RTSpinlockAcquire(pDevExt->Spinlock);
4927 if (pImage->cUsage <= 1)
4928 {
4929 PSUPDRVOBJ pObj;
4930 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
4931 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4932 {
4933 rc = VERR_DANGLING_OBJECTS;
4934 break;
4935 }
4936 }
4937 else
4938 {
4939 PSUPDRVUSAGE pGenUsage;
4940 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
4941 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4942 {
4943 rc = VERR_DANGLING_OBJECTS;
4944 break;
4945 }
4946 }
4947 RTSpinlockRelease(pDevExt->Spinlock);
4948 if (rc == VINF_SUCCESS)
4949 {
4950 /* unlink it */
4951 if (pUsagePrev)
4952 pUsagePrev->pNext = pUsage->pNext;
4953 else
4954 pSession->pLdrUsage = pUsage->pNext;
4955
4956 /* free it */
4957 pUsage->pImage = NULL;
4958 pUsage->pNext = NULL;
4959 RTMemFree(pUsage);
4960
4961 /*
4962 * Dereference the image.
4963 */
4964 if (pImage->cUsage <= 1)
4965 supdrvLdrFree(pDevExt, pImage);
4966 else
4967 pImage->cUsage--;
4968 }
4969 else
4970 {
4971 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
4972 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
4973 }
4974 }
4975 else
4976 {
4977 /*
4978 * Dereference both image and usage.
4979 */
4980 pImage->cUsage--;
4981 pUsage->cUsage--;
4982 }
4983
4984 supdrvLdrUnlock(pDevExt);
4985 return rc;
4986}
4987
4988
4989/**
4990 * Gets the address of a symbol in an open image.
4991 *
4992 * @returns IPRT status code.
4993 * @param pDevExt Device globals.
4994 * @param pSession Session data.
4995 * @param pReq The request buffer.
4996 */
4997static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
4998{
4999 PSUPDRVLDRIMAGE pImage;
5000 PSUPDRVLDRUSAGE pUsage;
5001 uint32_t i;
5002 PSUPLDRSYM paSyms;
5003 const char *pchStrings;
5004 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
5005 void *pvSymbol = NULL;
5006 int rc = VERR_GENERAL_FAILURE;
5007 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
5008
5009 /*
5010 * Find the ldr image.
5011 */
5012 supdrvLdrLock(pDevExt);
5013 pUsage = pSession->pLdrUsage;
5014 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5015 pUsage = pUsage->pNext;
5016 if (!pUsage)
5017 {
5018 supdrvLdrUnlock(pDevExt);
5019 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5020 return VERR_INVALID_HANDLE;
5021 }
5022 pImage = pUsage->pImage;
5023 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5024 {
5025 unsigned uState = pImage->uState;
5026 supdrvLdrUnlock(pDevExt);
5027 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5028 return VERR_ALREADY_LOADED;
5029 }
5030
5031 /*
5032 * Search the symbol strings.
5033 *
5034 * Note! The int32_t is for native loading on solaris where the data
5035 * and text segments are in very different places.
5036 */
5037 pchStrings = pImage->pachStrTab;
5038 paSyms = pImage->paSymbols;
5039 for (i = 0; i < pImage->cSymbols; i++)
5040 {
5041 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5042 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5043 {
5044 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5045 rc = VINF_SUCCESS;
5046 break;
5047 }
5048 }
5049 supdrvLdrUnlock(pDevExt);
5050 pReq->u.Out.pvSymbol = pvSymbol;
5051 return rc;
5052}
5053
5054
5055/**
5056 * Gets the address of a symbol in an open image or the support driver.
5057 *
5058 * @returns VINF_SUCCESS on success.
5059 * @returns
5060 * @param pDevExt Device globals.
5061 * @param pSession Session data.
5062 * @param pReq The request buffer.
5063 */
5064static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5065{
5066 int rc = VINF_SUCCESS;
5067 const char *pszSymbol = pReq->u.In.pszSymbol;
5068 const char *pszModule = pReq->u.In.pszModule;
5069 size_t cbSymbol;
5070 char const *pszEnd;
5071 uint32_t i;
5072
5073 /*
5074 * Input validation.
5075 */
5076 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5077 pszEnd = RTStrEnd(pszSymbol, 512);
5078 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5079 cbSymbol = pszEnd - pszSymbol + 1;
5080
5081 if (pszModule)
5082 {
5083 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5084 pszEnd = RTStrEnd(pszModule, 64);
5085 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5086 }
5087 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5088
5089
5090 if ( !pszModule
5091 || !strcmp(pszModule, "SupDrv"))
5092 {
5093 /*
5094 * Search the support driver export table.
5095 */
5096 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5097 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5098 {
5099 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5100 break;
5101 }
5102 }
5103 else
5104 {
5105 /*
5106 * Find the loader image.
5107 */
5108 PSUPDRVLDRIMAGE pImage;
5109
5110 supdrvLdrLock(pDevExt);
5111
5112 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5113 if (!strcmp(pImage->szName, pszModule))
5114 break;
5115 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5116 {
5117 /*
5118 * Search the symbol strings.
5119 */
5120 const char *pchStrings = pImage->pachStrTab;
5121 PCSUPLDRSYM paSyms = pImage->paSymbols;
5122 for (i = 0; i < pImage->cSymbols; i++)
5123 {
5124 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5125 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5126 {
5127 /*
5128 * Found it! Calc the symbol address and add a reference to the module.
5129 */
5130 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5131 rc = supdrvLdrAddUsage(pSession, pImage);
5132 break;
5133 }
5134 }
5135 }
5136 else
5137 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5138
5139 supdrvLdrUnlock(pDevExt);
5140 }
5141 return rc;
5142}
5143
5144
5145/**
5146 * Updates the VMMR0 entry point pointers.
5147 *
5148 * @returns IPRT status code.
5149 * @param pDevExt Device globals.
5150 * @param pSession Session data.
5151 * @param pVMMR0 VMMR0 image handle.
5152 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5153 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5154 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5155 * @remark Caller must own the loader mutex.
5156 */
5157static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5158{
5159 int rc = VINF_SUCCESS;
5160 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5161
5162
5163 /*
5164 * Check if not yet set.
5165 */
5166 if (!pDevExt->pvVMMR0)
5167 {
5168 pDevExt->pvVMMR0 = pvVMMR0;
5169 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5170 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5171 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5172 }
5173 else
5174 {
5175 /*
5176 * Return failure or success depending on whether the values match or not.
5177 */
5178 if ( pDevExt->pvVMMR0 != pvVMMR0
5179 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5180 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5181 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5182 {
5183 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5184 rc = VERR_INVALID_PARAMETER;
5185 }
5186 }
5187 return rc;
5188}
5189
5190
5191/**
5192 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5193 *
5194 * @param pDevExt Device globals.
5195 */
5196static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5197{
5198 pDevExt->pvVMMR0 = NULL;
5199 pDevExt->pfnVMMR0EntryInt = NULL;
5200 pDevExt->pfnVMMR0EntryFast = NULL;
5201 pDevExt->pfnVMMR0EntryEx = NULL;
5202}
5203
5204
5205/**
5206 * Adds a usage reference in the specified session of an image.
5207 *
5208 * Called while owning the loader semaphore.
5209 *
5210 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5211 * @param pSession Session in question.
5212 * @param pImage Image which the session is using.
5213 */
5214static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5215{
5216 PSUPDRVLDRUSAGE pUsage;
5217 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5218
5219 /*
5220 * Referenced it already?
5221 */
5222 pUsage = pSession->pLdrUsage;
5223 while (pUsage)
5224 {
5225 if (pUsage->pImage == pImage)
5226 {
5227 pUsage->cUsage++;
5228 return VINF_SUCCESS;
5229 }
5230 pUsage = pUsage->pNext;
5231 }
5232
5233 /*
5234 * Allocate new usage record.
5235 */
5236 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5237 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5238 pUsage->cUsage = 1;
5239 pUsage->pImage = pImage;
5240 pUsage->pNext = pSession->pLdrUsage;
5241 pSession->pLdrUsage = pUsage;
5242 return VINF_SUCCESS;
5243}
5244
5245
5246/**
5247 * Frees a load image.
5248 *
5249 * @param pDevExt Pointer to device extension.
5250 * @param pImage Pointer to the image we're gonna free.
5251 * This image must exit!
5252 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5253 */
5254static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5255{
5256 PSUPDRVLDRIMAGE pImagePrev;
5257 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5258
5259 /* find it - arg. should've used doubly linked list. */
5260 Assert(pDevExt->pLdrImages);
5261 pImagePrev = NULL;
5262 if (pDevExt->pLdrImages != pImage)
5263 {
5264 pImagePrev = pDevExt->pLdrImages;
5265 while (pImagePrev->pNext != pImage)
5266 pImagePrev = pImagePrev->pNext;
5267 Assert(pImagePrev->pNext == pImage);
5268 }
5269
5270 /* unlink */
5271 if (pImagePrev)
5272 pImagePrev->pNext = pImage->pNext;
5273 else
5274 pDevExt->pLdrImages = pImage->pNext;
5275
5276 /* check if this is VMMR0.r0 unset its entry point pointers. */
5277 if (pDevExt->pvVMMR0 == pImage->pvImage)
5278 supdrvLdrUnsetVMMR0EPs(pDevExt);
5279
5280 /* check for objects with destructors in this image. (Shouldn't happen.) */
5281 if (pDevExt->pObjs)
5282 {
5283 unsigned cObjs = 0;
5284 PSUPDRVOBJ pObj;
5285 RTSpinlockAcquire(pDevExt->Spinlock);
5286 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5287 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5288 {
5289 pObj->pfnDestructor = NULL;
5290 cObjs++;
5291 }
5292 RTSpinlockRelease(pDevExt->Spinlock);
5293 if (cObjs)
5294 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5295 }
5296
5297 /* call termination function if fully loaded. */
5298 if ( pImage->pfnModuleTerm
5299 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5300 {
5301 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5302 pImage->pfnModuleTerm(pImage);
5303 }
5304
5305 /* Inform the tracing component. */
5306 supdrvTracerModuleUnloading(pDevExt, pImage);
5307
5308 /* do native unload if appropriate. */
5309 if (pImage->fNative)
5310 supdrvOSLdrUnload(pDevExt, pImage);
5311
5312 /* free the image */
5313 pImage->cUsage = 0;
5314 pImage->pDevExt = NULL;
5315 pImage->pNext = NULL;
5316 pImage->uState = SUP_IOCTL_LDR_FREE;
5317 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5318 pImage->pvImageAlloc = NULL;
5319 RTMemFree(pImage->pachStrTab);
5320 pImage->pachStrTab = NULL;
5321 RTMemFree(pImage->paSymbols);
5322 pImage->paSymbols = NULL;
5323 RTMemFree(pImage);
5324}
5325
5326
5327/**
5328 * Acquires the loader lock.
5329 *
5330 * @returns IPRT status code.
5331 * @param pDevExt The device extension.
5332 */
5333DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5334{
5335#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5336 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5337#else
5338 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5339#endif
5340 AssertRC(rc);
5341 return rc;
5342}
5343
5344
5345/**
5346 * Releases the loader lock.
5347 *
5348 * @returns IPRT status code.
5349 * @param pDevExt The device extension.
5350 */
5351DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5352{
5353#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5354 return RTSemMutexRelease(pDevExt->mtxLdr);
5355#else
5356 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5357#endif
5358}
5359
5360
5361/**
5362 * Implements the service call request.
5363 *
5364 * @returns VBox status code.
5365 * @param pDevExt The device extension.
5366 * @param pSession The calling session.
5367 * @param pReq The request packet, valid.
5368 */
5369static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5370{
5371#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5372 int rc;
5373
5374 /*
5375 * Find the module first in the module referenced by the calling session.
5376 */
5377 rc = supdrvLdrLock(pDevExt);
5378 if (RT_SUCCESS(rc))
5379 {
5380 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5381 PSUPDRVLDRUSAGE pUsage;
5382
5383 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5384 if ( pUsage->pImage->pfnServiceReqHandler
5385 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5386 {
5387 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5388 break;
5389 }
5390 supdrvLdrUnlock(pDevExt);
5391
5392 if (pfnServiceReqHandler)
5393 {
5394 /*
5395 * Call it.
5396 */
5397 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5398 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5399 else
5400 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5401 }
5402 else
5403 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5404 }
5405
5406 /* log it */
5407 if ( RT_FAILURE(rc)
5408 && rc != VERR_INTERRUPTED
5409 && rc != VERR_TIMEOUT)
5410 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5411 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5412 else
5413 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5414 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5415 return rc;
5416#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5417 return VERR_NOT_IMPLEMENTED;
5418#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5419}
5420
5421
5422/**
5423 * Implements the logger settings request.
5424 *
5425 * @returns VBox status code.
5426 * @param pDevExt The device extension.
5427 * @param pSession The caller's session.
5428 * @param pReq The request.
5429 */
5430static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5431{
5432 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5433 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5434 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5435 PRTLOGGER pLogger = NULL;
5436 int rc;
5437
5438 /*
5439 * Some further validation.
5440 */
5441 switch (pReq->u.In.fWhat)
5442 {
5443 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5444 case SUPLOGGERSETTINGS_WHAT_CREATE:
5445 break;
5446
5447 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5448 if (*pszGroup || *pszFlags || *pszDest)
5449 return VERR_INVALID_PARAMETER;
5450 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5451 return VERR_ACCESS_DENIED;
5452 break;
5453
5454 default:
5455 return VERR_INTERNAL_ERROR;
5456 }
5457
5458 /*
5459 * Get the logger.
5460 */
5461 switch (pReq->u.In.fWhich)
5462 {
5463 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5464 pLogger = RTLogGetDefaultInstance();
5465 break;
5466
5467 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5468 pLogger = RTLogRelDefaultInstance();
5469 break;
5470
5471 default:
5472 return VERR_INTERNAL_ERROR;
5473 }
5474
5475 /*
5476 * Do the job.
5477 */
5478 switch (pReq->u.In.fWhat)
5479 {
5480 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5481 if (pLogger)
5482 {
5483 rc = RTLogFlags(pLogger, pszFlags);
5484 if (RT_SUCCESS(rc))
5485 rc = RTLogGroupSettings(pLogger, pszGroup);
5486 NOREF(pszDest);
5487 }
5488 else
5489 rc = VERR_NOT_FOUND;
5490 break;
5491
5492 case SUPLOGGERSETTINGS_WHAT_CREATE:
5493 {
5494 if (pLogger)
5495 rc = VERR_ALREADY_EXISTS;
5496 else
5497 {
5498 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5499
5500 rc = RTLogCreate(&pLogger,
5501 0 /* fFlags */,
5502 pszGroup,
5503 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5504 ? "VBOX_LOG"
5505 : "VBOX_RELEASE_LOG",
5506 RT_ELEMENTS(s_apszGroups),
5507 s_apszGroups,
5508 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5509 NULL);
5510 if (RT_SUCCESS(rc))
5511 {
5512 rc = RTLogFlags(pLogger, pszFlags);
5513 NOREF(pszDest);
5514 if (RT_SUCCESS(rc))
5515 {
5516 switch (pReq->u.In.fWhich)
5517 {
5518 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5519 pLogger = RTLogSetDefaultInstance(pLogger);
5520 break;
5521 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5522 pLogger = RTLogRelSetDefaultInstance(pLogger);
5523 break;
5524 }
5525 }
5526 RTLogDestroy(pLogger);
5527 }
5528 }
5529 break;
5530 }
5531
5532 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5533 switch (pReq->u.In.fWhich)
5534 {
5535 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5536 pLogger = RTLogSetDefaultInstance(NULL);
5537 break;
5538 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5539 pLogger = RTLogRelSetDefaultInstance(NULL);
5540 break;
5541 }
5542 rc = RTLogDestroy(pLogger);
5543 break;
5544
5545 default:
5546 {
5547 rc = VERR_INTERNAL_ERROR;
5548 break;
5549 }
5550 }
5551
5552 return rc;
5553}
5554
5555
5556/**
5557 * Implements the MSR prober operations.
5558 *
5559 * @returns VBox status code.
5560 * @param pDevExt The device extension.
5561 * @param pReq The request.
5562 */
5563static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5564{
5565#ifdef SUPDRV_WITH_MSR_PROBER
5566 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5567 int rc;
5568
5569 switch (pReq->u.In.enmOp)
5570 {
5571 case SUPMSRPROBEROP_READ:
5572 {
5573 uint64_t uValue;
5574 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5575 if (RT_SUCCESS(rc))
5576 {
5577 pReq->u.Out.uResults.Read.uValue = uValue;
5578 pReq->u.Out.uResults.Read.fGp = false;
5579 }
5580 else if (rc == VERR_ACCESS_DENIED)
5581 {
5582 pReq->u.Out.uResults.Read.uValue = 0;
5583 pReq->u.Out.uResults.Read.fGp = true;
5584 rc = VINF_SUCCESS;
5585 }
5586 break;
5587 }
5588
5589 case SUPMSRPROBEROP_WRITE:
5590 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5591 if (RT_SUCCESS(rc))
5592 pReq->u.Out.uResults.Write.fGp = false;
5593 else if (rc == VERR_ACCESS_DENIED)
5594 {
5595 pReq->u.Out.uResults.Write.fGp = true;
5596 rc = VINF_SUCCESS;
5597 }
5598 break;
5599
5600 case SUPMSRPROBEROP_MODIFY:
5601 case SUPMSRPROBEROP_MODIFY_FASTER:
5602 rc = supdrvOSMsrProberModify(idCpu, pReq);
5603 break;
5604
5605 default:
5606 return VERR_INVALID_FUNCTION;
5607 }
5608 return rc;
5609#else
5610 return VERR_NOT_IMPLEMENTED;
5611#endif
5612}
5613
5614
5615/**
5616 * Returns whether the host CPU sports an invariant TSC or not.
5617 *
5618 * @returns true if invariant TSC is supported, false otherwise.
5619 */
5620static bool supdrvIsInvariantTsc(void)
5621{
5622 static bool s_fQueried = false;
5623 static bool s_fIsInvariantTsc = false;
5624 if (!s_fQueried)
5625 {
5626 uint32_t uEax, uEbx, uEcx, uEdx;
5627 ASMCpuId(0x80000000, &uEax, &uEbx, &uEcx, &uEdx);
5628 if (uEax >= 0x80000007)
5629 {
5630 ASMCpuId(0x80000007, &uEax, &uEbx, &uEcx, &uEdx);
5631 if (uEdx & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
5632 s_fIsInvariantTsc = true;
5633 }
5634 s_fQueried = true;
5635 }
5636
5637 return s_fIsInvariantTsc;
5638}
5639
5640
5641#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5642/**
5643 * Switches the TSC-delta measurement thread into the butchered state.
5644 *
5645 * @returns VBox status code.
5646 * @param pDevExt Pointer to the device instance data.
5647 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5648 * @param pszFailed An error message to log.
5649 * @param rcFailed The error code to exit the thread with.
5650 */
5651static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5652{
5653 if (!fSpinlockHeld)
5654 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5655
5656 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Butchered;
5657 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5658 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5659 return rcFailed;
5660}
5661
5662
5663/**
5664 * The TSC-delta measurement thread.
5665 *
5666 * @returns VBox status code.
5667 * @param hThread The thread handle.
5668 * @param pvUser Opaque pointer to the device instance data.
5669 */
5670static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
5671{
5672 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
5673 static uint32_t cTimesMeasured = 0;
5674 uint32_t cConsecutiveTimeouts = 0;
5675 int rc = VERR_INTERNAL_ERROR_2;
5676 for (;;)
5677 {
5678 /*
5679 * Switch on the current state.
5680 */
5681 SUPDRVTSCDELTASTATE enmState;
5682 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5683 enmState = pDevExt->enmTscDeltaState;
5684 switch (enmState)
5685 {
5686 case kSupDrvTscDeltaState_Creating:
5687 {
5688 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5689 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5690 if (RT_FAILURE(rc))
5691 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5692 /* fall thru */
5693 }
5694
5695 case kSupDrvTscDeltaState_Listening:
5696 {
5697 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5698
5699 /* Simple adaptive timeout. */
5700 if (cConsecutiveTimeouts++ == 10)
5701 {
5702 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
5703 pDevExt->cMsTscDeltaTimeout = 10;
5704 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
5705 pDevExt->cMsTscDeltaTimeout = 100;
5706 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
5707 pDevExt->cMsTscDeltaTimeout = 500;
5708 cConsecutiveTimeouts = 0;
5709 }
5710 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
5711 if ( RT_FAILURE(rc)
5712 && rc != VERR_TIMEOUT)
5713 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
5714 break;
5715 }
5716
5717 case kSupDrvTscDeltaState_WaitAndMeasure:
5718 {
5719 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Measuring;
5720 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5721 if (RT_FAILURE(rc))
5722 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5723 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5724 pDevExt->cMsTscDeltaTimeout = 1;
5725 RTThreadSleep(10);
5726 /* fall thru */
5727 }
5728
5729 case kSupDrvTscDeltaState_Measuring:
5730 {
5731 cConsecutiveTimeouts = 0;
5732 if (!cTimesMeasured++)
5733 {
5734 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5735 RTCpuSetCopy(&pDevExt->TscDeltaObtainedCpuSet, &pDevExt->pGip->OnlineCpuSet);
5736 }
5737 else
5738 {
5739 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5740 unsigned iCpu;
5741
5742 if (cTimesMeasured == UINT32_MAX)
5743 cTimesMeasured = 1;
5744
5745 /* Measure TSC-deltas only for the CPUs that are in the set. */
5746 rc = VINF_SUCCESS;
5747 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5748 {
5749 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
5750 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
5751 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
5752 {
5753 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
5754 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
5755 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
5756 RTCpuSetAdd(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->idCpu);
5757 }
5758 }
5759 }
5760 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5761 if (pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
5762 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5763 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5764 pDevExt->rcTscDelta = rc;
5765 break;
5766 }
5767
5768 case kSupDrvTscDeltaState_Terminating:
5769 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5770 return VINF_SUCCESS;
5771
5772 case kSupDrvTscDeltaState_Butchered:
5773 default:
5774 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
5775 }
5776 }
5777
5778 return rc;
5779}
5780
5781
5782/**
5783 * Waits for the TSC-delta measurement thread to respond to a state change.
5784 *
5785 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
5786 * other error code on internal error.
5787 *
5788 * @param pThis Pointer to the grant service instance data.
5789 * @param enmCurState The current state.
5790 * @param enmNewState The new state we're waiting for it to enter.
5791 */
5792static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTASTATE enmCurState, SUPDRVTSCDELTASTATE enmNewState)
5793{
5794 /*
5795 * Wait a short while for the expected state transition.
5796 */
5797 int rc;
5798 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
5799 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5800 if (pDevExt->enmTscDeltaState == enmNewState)
5801 {
5802 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5803 rc = VINF_SUCCESS;
5804 }
5805 else if (pDevExt->enmTscDeltaState == enmCurState)
5806 {
5807 /*
5808 * Wait longer if the state has not yet transitioned to the one we want.
5809 */
5810 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5811 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
5812 if ( RT_SUCCESS(rc)
5813 || rc == VERR_TIMEOUT)
5814 {
5815 /*
5816 * Check the state whether we've succeeded.
5817 */
5818 SUPDRVTSCDELTASTATE enmState;
5819 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5820 enmState = pDevExt->enmTscDeltaState;
5821 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5822 if (enmState == enmNewState)
5823 rc = VINF_SUCCESS;
5824 else if (enmState == enmCurState)
5825 {
5826 rc = VERR_TIMEOUT;
5827 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
5828 enmNewState));
5829 }
5830 else
5831 {
5832 rc = VERR_INTERNAL_ERROR;
5833 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
5834 enmState, enmNewState));
5835 }
5836 }
5837 else
5838 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
5839 }
5840 else
5841 {
5842 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5843 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
5844 rc = VERR_INTERNAL_ERROR;
5845 }
5846
5847 return rc;
5848}
5849
5850
5851/**
5852 * Terminates the TSC-delta measurement thread.
5853 *
5854 * @param pDevExt Pointer to the device instance data.
5855 */
5856static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
5857{
5858 int rc;
5859 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5860 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Terminating;
5861 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5862 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5863 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
5864 if (RT_FAILURE(rc))
5865 {
5866 /* Signal a few more times before giving up. */
5867 int cTriesLeft = 5;
5868 while (--cTriesLeft > 0)
5869 {
5870 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5871 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
5872 if (rc != VERR_TIMEOUT)
5873 break;
5874 }
5875 }
5876}
5877
5878
5879/**
5880 * Initializes and spawns the TSC-delta measurement thread.
5881 *
5882 * A thread is required for servicing re-measurement requests from events like
5883 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
5884 * under all contexts on all OSs.
5885 *
5886 * @returns VBox status code.
5887 * @param pDevExt Pointer to the device instance data.
5888 *
5889 * @remarks Must only be called -after- initializing GIP and setting up MP
5890 * notifications!
5891 */
5892static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
5893{
5894 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
5895
5896 int rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
5897 if (RT_SUCCESS(rc))
5898 {
5899 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
5900 if (RT_SUCCESS(rc))
5901 {
5902 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Creating;
5903 pDevExt->cMsTscDeltaTimeout = 1;
5904 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
5905 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
5906 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
5907 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
5908 if (RT_SUCCESS(rc))
5909 {
5910 rc = supdrvTscDeltaThreadWait(pDevExt, kSupDrvTscDeltaState_Creating, kSupDrvTscDeltaState_Listening);
5911 if (RT_SUCCESS(rc))
5912 {
5913 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5914 return rc;
5915 }
5916
5917 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
5918 supdrvTscDeltaThreadTerminate(pDevExt);
5919 }
5920 else
5921 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
5922 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5923 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5924 }
5925 else
5926 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
5927 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5928 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5929 }
5930 else
5931 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
5932
5933 return rc;
5934}
5935
5936
5937/**
5938 * Terminates the TSC-delta measurement thread and cleanup.
5939 *
5940 * @param pDevExt Pointer to the device instance data.
5941 */
5942static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
5943{
5944 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
5945 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5946 {
5947 supdrvTscDeltaThreadTerminate(pDevExt);
5948 }
5949
5950 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
5951 {
5952 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5953 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5954 }
5955
5956 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5957 {
5958 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5959 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5960 }
5961
5962 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5963}
5964
5965
5966/**
5967 * Waits for TSC-delta measurements to be completed for all online CPUs.
5968 *
5969 * @returns VBox status code.
5970 * @param pDevExt Pointer to the device instance data.
5971 */
5972static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
5973{
5974 int cTriesLeft = 5;
5975 int cMsTotalWait;
5976 int cMsWaited = 0;
5977 int cMsWaitGranularity = 1;
5978
5979 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5980 AssertReturn(pGip, VERR_INVALID_POINTER);
5981
5982 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 2, 150);
5983 while (cTriesLeft-- > 0)
5984 {
5985 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
5986 return VINF_SUCCESS;
5987 RTThreadSleep(cMsWaitGranularity);
5988 cMsWaited += cMsWaitGranularity;
5989 if (cMsWaited >= cMsTotalWait)
5990 break;
5991 }
5992
5993 return VERR_TIMEOUT;
5994}
5995#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
5996
5997
5998/**
5999 * Measures the TSC frequency of the system.
6000 *
6001 * Uses a busy-wait method for the async. case as it is intended to help push
6002 * the CPU frequency up, while for the invariant cases using a sleeping method.
6003 *
6004 * The TSC frequency can vary on systems which are not reported as invariant.
6005 * On such systems the object of this function is to find out what the nominal,
6006 * maximum TSC frequency under 'normal' CPU operation.
6007 *
6008 * @returns VBox status code.
6009 * @param pDevExt Pointer to the device instance.
6010 *
6011 * @remarks Must be called only -after- measuring the TSC deltas.
6012 */
6013static int supdrvGipMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
6014{
6015 int cTriesLeft = 4;
6016 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6017
6018 /* Assert order. */
6019 AssertReturn(pGip, VERR_INVALID_PARAMETER);
6020 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
6021
6022 while (cTriesLeft-- > 0)
6023 {
6024 RTCCUINTREG uFlags;
6025 uint64_t u64NanoTsBefore;
6026 uint64_t u64NanoTsAfter;
6027 uint64_t u64TscBefore;
6028 uint64_t u64TscAfter;
6029 uint8_t idApicBefore;
6030 uint8_t idApicAfter;
6031
6032 /*
6033 * Synchronize with the host OS clock tick before reading the TSC.
6034 * Especially important on older Windows version where the granularity is terrible.
6035 */
6036 u64NanoTsBefore = RTTimeSystemNanoTS();
6037 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
6038 ASMNopPause();
6039
6040 uFlags = ASMIntDisableFlags();
6041 idApicBefore = ASMGetApicId();
6042 u64TscBefore = ASMReadTSC();
6043 u64NanoTsBefore = RTTimeSystemNanoTS();
6044 ASMSetFlags(uFlags);
6045
6046 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6047 {
6048 /*
6049 * Sleep-wait since the TSC frequency is constant, it eases host load.
6050 * Shorter interval produces more variance in the frequency (esp. Windows).
6051 */
6052 RTThreadSleep(200);
6053 u64NanoTsAfter = RTTimeSystemNanoTS();
6054 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
6055 ASMNopPause();
6056 u64NanoTsAfter = RTTimeSystemNanoTS();
6057 }
6058 else
6059 {
6060 /* Busy-wait keeping the frequency up and measure. */
6061 for (;;)
6062 {
6063 u64NanoTsAfter = RTTimeSystemNanoTS();
6064 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
6065 ASMNopPause();
6066 else
6067 break;
6068 }
6069 }
6070
6071 uFlags = ASMIntDisableFlags();
6072 idApicAfter = ASMGetApicId();
6073 u64TscAfter = ASMReadTSC();
6074 ASMSetFlags(uFlags);
6075
6076 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6077 {
6078 int rc;
6079 bool fAppliedBefore;
6080 bool fAppliedAfter;
6081 rc = SUPTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
6082 rc = SUPTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
6083
6084 if ( !fAppliedBefore
6085 || !fAppliedAfter)
6086 {
6087#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6088 /*
6089 * The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
6090 * Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
6091 * and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
6092 * proceed. This should be triggered just once if we're rather unlucky.
6093 */
6094 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6095 if (rc == VERR_TIMEOUT)
6096 {
6097 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
6098 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6099 }
6100#else
6101 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
6102 idApicBefore, idApicAfter, cTriesLeft);
6103#endif
6104 continue;
6105 }
6106 }
6107
6108 /*
6109 * Update GIP.
6110 */
6111 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
6112 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6113 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6114 return VINF_SUCCESS;
6115 }
6116
6117 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6118}
6119
6120
6121/**
6122 * Timer callback function for TSC frequency refinement in invariant GIP mode.
6123 *
6124 * @param pTimer The timer.
6125 * @param pvUser Opaque pointer to the device instance data.
6126 * @param iTick The timer tick.
6127 */
6128static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6129{
6130 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6131 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6132 bool fDeltaApplied = false;
6133 uint8_t idApic;
6134 uint64_t u64DeltaNanoTS;
6135 uint64_t u64DeltaTsc;
6136 uint64_t u64NanoTS;
6137 uint64_t u64Tsc;
6138 RTCCUINTREG uFlags;
6139
6140 /* Paranoia. */
6141 Assert(pGip);
6142 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
6143
6144#if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
6145 u64NanoTS = RTTimeSystemNanoTS();
6146 while (RTTimeSystemNanoTS() == u64NanoTS)
6147 ASMNopPause();
6148#endif
6149 uFlags = ASMIntDisableFlags();
6150 idApic = ASMGetApicId();
6151 u64Tsc = ASMReadTSC();
6152 u64NanoTS = RTTimeSystemNanoTS();
6153 ASMSetFlags(uFlags);
6154 SUPTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
6155 u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
6156 u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
6157
6158 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6159 && !fDeltaApplied)
6160 {
6161 SUPR0Printf("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
6162 GIP_TSC_REFINE_INTERVAL);
6163 return;
6164 }
6165
6166 /* Calculate the TSC frequency. */
6167 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
6168 && u64DeltaNanoTS < UINT32_MAX)
6169 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS);
6170 else
6171 {
6172 /* Try not to lose precision, the larger the interval the more likely we overflow. */
6173 if ( u64DeltaTsc < UINT64_MAX / RT_NS_100MS
6174 && u64DeltaNanoTS / 10 < UINT32_MAX)
6175 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_100MS, (uint32_t)(u64DeltaNanoTS / 10));
6176 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_10MS
6177 && u64DeltaNanoTS / 100 < UINT32_MAX)
6178 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_10MS, (uint32_t)(u64DeltaNanoTS / 100));
6179 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_1MS
6180 && u64DeltaNanoTS / 1000 < UINT32_MAX)
6181 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1MS, (uint32_t)(u64DeltaNanoTS / 1000));
6182 else /* Screw it. */
6183 pGip->u64CpuHz = u64DeltaTsc / (u64DeltaNanoTS / RT_NS_1SEC_64);
6184 }
6185
6186 /* Update rest of GIP. */
6187 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
6188 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6189}
6190
6191
6192/**
6193 * Starts the TSC-frequency refinement phase asynchronously.
6194 *
6195 * @param pDevExt Pointer to the device instance data.
6196 */
6197static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt)
6198{
6199 uint64_t u64NanoTS;
6200 RTCCUINTREG uFlags;
6201 uint8_t idApic;
6202 int rc;
6203 bool fDeltaApplied = false;
6204 PSUPGLOBALINFOPAGE pGip;
6205
6206 /* Validate. */
6207 Assert(pDevExt);
6208 Assert(pDevExt->pGip);
6209
6210 pGip = pDevExt->pGip;
6211 u64NanoTS = RTTimeSystemNanoTS();
6212 while (RTTimeSystemNanoTS() == u64NanoTS)
6213 ASMNopPause();
6214 uFlags = ASMIntDisableFlags();
6215 idApic = ASMGetApicId();
6216 pDevExt->u64TscAnchor = ASMReadTSC();
6217 pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
6218 ASMSetFlags(uFlags);
6219 SUPTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, &fDeltaApplied);
6220
6221#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6222 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6223 && !fDeltaApplied)
6224 {
6225 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6226 if (rc == VERR_TIMEOUT)
6227 {
6228 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
6229 return;
6230 }
6231 }
6232#endif
6233
6234 rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pDevExt);
6235 if (RT_SUCCESS(rc))
6236 {
6237 /*
6238 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
6239 * interval as small as possible while gaining the most consistent and accurate frequency
6240 * (compared to what the host OS might have measured).
6241 *
6242 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
6243 * same TSC frequency whenever possible so we need to keep the interval short.
6244 */
6245 rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
6246 AssertRC(rc);
6247 }
6248 else
6249 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
6250}
6251
6252
6253/**
6254 * Creates the GIP.
6255 *
6256 * @returns VBox status code.
6257 * @param pDevExt Instance data. GIP stuff may be updated.
6258 */
6259static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6260{
6261 PSUPGLOBALINFOPAGE pGip;
6262 RTHCPHYS HCPhysGip;
6263 uint32_t u32SystemResolution;
6264 uint32_t u32Interval;
6265 uint32_t u32MinInterval;
6266 uint32_t uMod;
6267 unsigned cCpus;
6268 int rc;
6269
6270 LogFlow(("supdrvGipCreate:\n"));
6271
6272 /* Assert order. */
6273 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6274 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6275 Assert(!pDevExt->pGipTimer);
6276
6277 /*
6278 * Check the CPU count.
6279 */
6280 cCpus = RTMpGetArraySize();
6281 if ( cCpus > RTCPUSET_MAX_CPUS
6282 || cCpus > 256 /* ApicId is used for the mappings */)
6283 {
6284 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6285 return VERR_TOO_MANY_CPUS;
6286 }
6287
6288 /*
6289 * Allocate a contiguous set of pages with a default kernel mapping.
6290 */
6291 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6292 if (RT_FAILURE(rc))
6293 {
6294 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6295 return rc;
6296 }
6297 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6298 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6299
6300 /*
6301 * Allocate the TSC-delta sync struct on a separate cache line.
6302 */
6303 pDevExt->pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
6304 pDevExt->pTscDeltaSync = RT_ALIGN_PT(pDevExt->pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
6305 Assert(RT_ALIGN_PT(pDevExt->pTscDeltaSync, 64, PSUPTSCDELTASYNC) == pDevExt->pTscDeltaSync);
6306
6307 /*
6308 * Find a reasonable update interval and initialize the structure.
6309 */
6310 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
6311 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6312 * See @bugref{6710}. */
6313 u32MinInterval = RT_NS_10MS;
6314 u32SystemResolution = RTTimerGetSystemGranularity();
6315 u32Interval = u32MinInterval;
6316 uMod = u32MinInterval % u32SystemResolution;
6317 if (uMod)
6318 u32Interval += u32SystemResolution - uMod;
6319
6320 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6321
6322 if (RT_UNLIKELY( pDevExt->fOsTscDeltasInSync
6323 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
6324 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
6325 {
6326 /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
6327 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
6328 return VERR_INTERNAL_ERROR_2;
6329 }
6330
6331#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6332 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6333 {
6334 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6335 rc = supdrvTscDeltaThreadInit(pDevExt);
6336 }
6337#endif
6338 if (RT_SUCCESS(rc))
6339 {
6340 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6341 if (RT_SUCCESS(rc))
6342 {
6343 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6344 if (RT_SUCCESS(rc))
6345 {
6346 uint16_t iCpu;
6347#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6348 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6349 {
6350 /*
6351 * Measure the TSC deltas now that we have MP notifications.
6352 */
6353 int cTries = 5;
6354 do
6355 {
6356 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6357 if (rc != VERR_TRY_AGAIN)
6358 break;
6359 } while (--cTries > 0);
6360 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6361 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6362 }
6363 else
6364 {
6365 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6366 Assert(!pGip->aCPUs[iCpu].i64TSCDelta);
6367 }
6368#endif
6369 if (RT_SUCCESS(rc))
6370 {
6371 rc = supdrvGipMeasureTscFreq(pDevExt);
6372 if (RT_SUCCESS(rc))
6373 {
6374 /*
6375 * Create the timer.
6376 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6377 */
6378 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6379 {
6380 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
6381 pDevExt);
6382 if (rc == VERR_NOT_SUPPORTED)
6383 {
6384 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6385 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6386 }
6387 }
6388 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6389 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */, supdrvGipSyncTimer, pDevExt);
6390 if (RT_SUCCESS(rc))
6391 {
6392 /*
6393 * We're good.
6394 */
6395 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6396 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6397
6398 g_pSUPGlobalInfoPage = pGip;
6399 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6400 supdrvRefineTscFreq(pDevExt);
6401 return VINF_SUCCESS;
6402 }
6403
6404 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6405 Assert(!pDevExt->pGipTimer);
6406 }
6407 else
6408 OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
6409 }
6410 else
6411 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureTscDeltas failed. rc=%Rrc\n", rc));
6412 }
6413 else
6414 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6415 }
6416 else
6417 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6418 }
6419 else
6420 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6421
6422 supdrvGipDestroy(pDevExt); /* Releases timer frequency increase too. */
6423 return rc;
6424}
6425
6426
6427/**
6428 * Terminates the GIP.
6429 *
6430 * @param pDevExt Instance data. GIP stuff may be updated.
6431 */
6432static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6433{
6434 int rc;
6435#ifdef DEBUG_DARWIN_GIP
6436 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6437 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6438 pDevExt->pGipTimer, pDevExt->GipMemObj));
6439#endif
6440
6441 /*
6442 * Stop receiving MP notifications before tearing anything else down.
6443 */
6444 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6445
6446#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6447 /*
6448 * Terminate the TSC-delta measurement thread and resources.
6449 */
6450 supdrvTscDeltaTerm(pDevExt);
6451#endif
6452
6453 /*
6454 * Destroy the TSC-refinement one-shot timer.
6455 */
6456 if (pDevExt->pTscRefineTimer)
6457 {
6458 RTTimerDestroy(pDevExt->pTscRefineTimer);
6459 pDevExt->pTscRefineTimer = NULL;
6460 }
6461
6462 if (pDevExt->pvTscDeltaSync)
6463 {
6464 RTMemFree(pDevExt->pvTscDeltaSync);
6465 pDevExt->pTscDeltaSync = NULL;
6466 pDevExt->pvTscDeltaSync = NULL;
6467 }
6468
6469 /*
6470 * Invalid the GIP data.
6471 */
6472 if (pDevExt->pGip)
6473 {
6474 supdrvGipTerm(pDevExt->pGip);
6475 pDevExt->pGip = NULL;
6476 }
6477 g_pSUPGlobalInfoPage = NULL;
6478
6479 /*
6480 * Destroy the timer and free the GIP memory object.
6481 */
6482 if (pDevExt->pGipTimer)
6483 {
6484 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6485 pDevExt->pGipTimer = NULL;
6486 }
6487
6488 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6489 {
6490 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6491 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6492 }
6493
6494 /*
6495 * Finally, make sure we've release the system timer resolution request
6496 * if one actually succeeded and is still pending.
6497 */
6498 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6499}
6500
6501
6502/**
6503 * Timer callback function sync GIP mode.
6504 * @param pTimer The timer.
6505 * @param pvUser Opaque pointer to the device extension.
6506 * @param iTick The timer tick.
6507 */
6508static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6509{
6510 RTCCUINTREG uFlags;
6511 uint64_t u64TSC;
6512 uint64_t u64NanoTS;
6513 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6514 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6515
6516 uFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6517 u64TSC = ASMReadTSC();
6518 u64NanoTS = RTTimeSystemNanoTS();
6519
6520 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6521 {
6522 /*
6523 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6524 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6525 * affected a bit until we get proper TSC deltas than implementing options like
6526 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6527 *
6528 * The likely hood of this happening is really low. On Windows, Linux timers
6529 * fire on the CPU they were registered/started on. Darwin, Solaris need verification.
6530 */
6531 Assert(!ASMIntAreEnabled());
6532 SUPTscDeltaApply(pGip, &u64TSC, ASMGetApicId(), NULL /* pfDeltaApplied */);
6533 }
6534
6535 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
6536
6537 ASMSetFlags(uFlags);
6538}
6539
6540
6541/**
6542 * Timer callback function for async GIP mode.
6543 * @param pTimer The timer.
6544 * @param pvUser Opaque pointer to the device extension.
6545 * @param iTick The timer tick.
6546 */
6547static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6548{
6549 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6550 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6551 RTCPUID idCpu = RTMpCpuId();
6552 uint64_t u64TSC = ASMReadTSC();
6553 uint64_t NanoTS = RTTimeSystemNanoTS();
6554
6555 /** @todo reset the transaction number and whatnot when iTick == 1. */
6556 if (pDevExt->idGipMaster == idCpu)
6557 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6558 else
6559 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6560
6561 ASMSetFlags(fOldFlags);
6562}
6563
6564
6565/**
6566 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6567 *
6568 * @returns Index of the CPU in the cache set.
6569 * @param pGip The GIP.
6570 * @param idCpu The CPU ID.
6571 */
6572static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6573{
6574 uint32_t i, cTries;
6575
6576 /*
6577 * ASSUMES that CPU IDs are constant.
6578 */
6579 for (i = 0; i < pGip->cCpus; i++)
6580 if (pGip->aCPUs[i].idCpu == idCpu)
6581 return i;
6582
6583 cTries = 0;
6584 do
6585 {
6586 for (i = 0; i < pGip->cCpus; i++)
6587 {
6588 bool fRc;
6589 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6590 if (fRc)
6591 return i;
6592 }
6593 } while (cTries++ < 32);
6594 AssertReleaseFailed();
6595 return i - 1;
6596}
6597
6598
6599/**
6600 * The calling CPU should be accounted as online, update GIP accordingly.
6601 *
6602 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
6603 *
6604 * @param pDevExt The device extension.
6605 * @param idCpu The CPU ID.
6606 */
6607static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6608{
6609 int iCpuSet = 0;
6610 uint16_t idApic = UINT16_MAX;
6611 uint32_t i = 0;
6612 uint64_t u64NanoTS = 0;
6613 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6614
6615 AssertPtrReturnVoid(pGip);
6616 AssertRelease(idCpu == RTMpCpuId());
6617 Assert(pGip->cPossibleCpus == RTMpGetCount());
6618
6619 /*
6620 * Do this behind a spinlock with interrupts disabled as this can fire
6621 * on all CPUs simultaneously, see @bugref{6110}.
6622 */
6623 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6624
6625 /*
6626 * Update the globals.
6627 */
6628 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
6629 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
6630 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6631 if (iCpuSet >= 0)
6632 {
6633 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6634 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
6635 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
6636 }
6637
6638 /*
6639 * Update the entry.
6640 */
6641 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
6642 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
6643 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
6644 idApic = ASMGetApicId();
6645 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
6646 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
6647 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
6648
6649 /*
6650 * Update the APIC ID and CPU set index mappings.
6651 */
6652 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
6653 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
6654
6655 /* Update the Mp online/offline counter. */
6656 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
6657
6658#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6659 /*
6660 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
6661 *
6662 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
6663 * update the state and it'll get serviced when the thread's listening interval times out.
6664 */
6665 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6666 {
6667 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6668 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
6669 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
6670 {
6671 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
6672 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
6673 }
6674 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6675 }
6676#endif
6677
6678 /* commit it */
6679 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
6680
6681 RTSpinlockRelease(pDevExt->hGipSpinlock);
6682}
6683
6684
6685/**
6686 * The CPU should be accounted as offline, update the GIP accordingly.
6687 *
6688 * This is used by supdrvGipMpEvent.
6689 *
6690 * @param pDevExt The device extension.
6691 * @param idCpu The CPU ID.
6692 */
6693static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6694{
6695 int iCpuSet;
6696 unsigned i;
6697
6698 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6699
6700 AssertPtrReturnVoid(pGip);
6701 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6702
6703 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6704 AssertReturnVoid(iCpuSet >= 0);
6705
6706 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
6707 AssertReturnVoid(i < pGip->cCpus);
6708 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
6709
6710 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6711 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
6712
6713 /* Update the Mp online/offline counter. */
6714 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
6715
6716 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
6717 if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
6718 {
6719 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6720 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
6721 }
6722
6723 /* Reset the TSC delta, we will recalculate it lazily. */
6724 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
6725 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
6726
6727#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6728 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
6729 if (supdrvIsInvariantTsc())
6730 RTCpuSetDel(&pDevExt->TscDeltaObtainedCpuSet, idCpu);
6731#endif
6732
6733 /* commit it */
6734 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
6735
6736 RTSpinlockRelease(pDevExt->hGipSpinlock);
6737}
6738
6739
6740/**
6741 * Multiprocessor event notification callback.
6742 *
6743 * This is used to make sure that the GIP master gets passed on to
6744 * another CPU. It also updates the associated CPU data.
6745 *
6746 * @param enmEvent The event.
6747 * @param idCpu The cpu it applies to.
6748 * @param pvUser Pointer to the device extension.
6749 *
6750 * @remarks This function -must- fire on the newly online'd CPU for the
6751 * RTMPEVENT_ONLINE case and can fire on any CPU for the
6752 * RTMPEVENT_OFFLINE case.
6753 */
6754static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
6755{
6756 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6757 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6758
6759 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6760
6761 /*
6762 * Update the GIP CPU data.
6763 */
6764 if (pGip)
6765 {
6766 switch (enmEvent)
6767 {
6768 case RTMPEVENT_ONLINE:
6769 AssertRelease(idCpu == RTMpCpuId());
6770 supdrvGipMpEventOnline(pDevExt, idCpu);
6771 break;
6772 case RTMPEVENT_OFFLINE:
6773 supdrvGipMpEventOffline(pDevExt, idCpu);
6774 break;
6775 }
6776 }
6777
6778 /*
6779 * Make sure there is a master GIP.
6780 */
6781 if (enmEvent == RTMPEVENT_OFFLINE)
6782 {
6783 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
6784 if (idGipMaster == idCpu)
6785 {
6786 /*
6787 * Find a new GIP master.
6788 */
6789 bool fIgnored;
6790 unsigned i;
6791 int64_t iTSCDelta;
6792 uint32_t idxNewGipMaster;
6793 RTCPUID idNewGipMaster = NIL_RTCPUID;
6794 RTCPUSET OnlineCpus;
6795 RTMpGetOnlineSet(&OnlineCpus);
6796
6797 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
6798 {
6799 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
6800 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
6801 && idCurCpu != idGipMaster)
6802 {
6803 idNewGipMaster = idCurCpu;
6804 break;
6805 }
6806 }
6807
6808 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
6809 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
6810 NOREF(fIgnored);
6811
6812 /*
6813 * Adjust all the TSC deltas against the new GIP master.
6814 */
6815 if (pGip)
6816 {
6817 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
6818 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
6819 Assert(iTSCDelta != INT64_MAX);
6820 for (i = 0; i < pGip->cCpus; i++)
6821 {
6822 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
6823 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
6824 if (iWorkerDelta != INT64_MAX)
6825 iWorkerDelta -= iTSCDelta;
6826 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
6827 }
6828 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
6829 }
6830 }
6831 }
6832}
6833
6834
6835/**
6836 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
6837 * compute the delta between them.
6838 *
6839 * @param idCpu The CPU we are current scheduled on.
6840 * @param pvUser1 Opaque pointer to the device instance data.
6841 * @param pvUser2 Opaque pointer to the worker Cpu Id.
6842 *
6843 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
6844 * read the TSC at exactly the same time on both the master and the worker
6845 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
6846 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
6847 * try to minimize the measurement error by computing the minimum read time
6848 * of the compare statement in the worker by taking TSC measurements across
6849 * it.
6850 *
6851 * We ignore the first few runs of the loop in order to prime the cache.
6852 * Also, be careful about using 'pause' instruction in critical busy-wait
6853 * loops in this code - it can cause undesired behaviour with
6854 * hyperthreading.
6855 *
6856 * It must be noted that the computed minimum read time is mostly to
6857 * eliminate huge deltas when the worker is too early and doesn't by itself
6858 * help produce more accurate deltas. We allow two times the computed
6859 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
6860 * possible to get negative deltas where there are none when the worker is
6861 * earlier. As long as these occasional negative deltas are lower than the
6862 * time it takes to exit guest-context and the OS to reschedule EMT on a
6863 * different CPU we won't expose a TSC that jumped backwards. It is because
6864 * of the existence of the negative deltas we don't recompute the delta with
6865 * the master and worker interchanged to eliminate the remaining measurement
6866 * error.
6867 */
6868static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6869{
6870 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
6871 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6872 uint32_t *pidWorker = (uint32_t *)pvUser2;
6873 RTCPUID idMaster = ASMAtomicUoReadU32(&pDevExt->idTscDeltaInitiator);
6874 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6875 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
6876 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
6877 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6878 int cTriesLeft = 12;
6879
6880 if ( idCpu != idMaster
6881 && idCpu != *pidWorker)
6882 return;
6883
6884 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
6885 with a timeout to avoid deadlocking the entire system. */
6886 if (!RTMpOnAllIsConcurrentSafe())
6887 {
6888 /** @todo This was introduced for Windows, but since Windows doesn't use this
6889 * code path any longer (as DPC timeouts BSOD regardless of interrupts,
6890 * see @bugref{6710} comment 81), eventually phase it out. */
6891 uint64_t uTscNow;
6892 uint64_t uTscStart;
6893 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
6894
6895 ASMSerializeInstruction();
6896 uTscStart = ASMReadTSC();
6897 if (idCpu == idMaster)
6898 {
6899 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
6900 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
6901 {
6902 ASMSerializeInstruction();
6903 uTscNow = ASMReadTSC();
6904 if (uTscNow - uTscStart > cWaitTicks)
6905 {
6906 /* Set the worker delta to indicate failure, not the master. */
6907 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6908 return;
6909 }
6910
6911 ASMNopPause();
6912 }
6913 }
6914 else
6915 {
6916 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
6917 {
6918 ASMSerializeInstruction();
6919 uTscNow = ASMReadTSC();
6920 if (uTscNow - uTscStart > cWaitTicks)
6921 {
6922 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6923 return;
6924 }
6925
6926 ASMNopPause();
6927 }
6928 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
6929 }
6930 }
6931
6932 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
6933 while (cTriesLeft-- > 0)
6934 {
6935 unsigned i;
6936 uint64_t uMinCmpReadTime = UINT64_MAX;
6937 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
6938 {
6939 if (idCpu == idMaster)
6940 {
6941 /*
6942 * The master.
6943 */
6944 RTCCUINTREG uFlags;
6945 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6946 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6947
6948 /* Disable interrupts only in the master for as short a period
6949 as possible, thanks again to Windows. See @bugref{6710} comment #73. */
6950 uFlags = ASMIntDisableFlags();
6951
6952 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
6953 ;
6954
6955 do
6956 {
6957 ASMSerializeInstruction();
6958 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
6959 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6960
6961 ASMSetFlags(uFlags);
6962
6963 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
6964 ;
6965
6966 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6967 {
6968 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
6969 {
6970 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
6971 if (iDelta < pGipCpuWorker->i64TSCDelta)
6972 pGipCpuWorker->i64TSCDelta = iDelta;
6973 }
6974 }
6975
6976 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
6977 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6978 }
6979 else
6980 {
6981 /*
6982 * The worker.
6983 */
6984 uint64_t uTscWorker;
6985 uint64_t uTscWorkerFlushed;
6986 uint64_t uCmpReadTime;
6987
6988 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
6989 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
6990 ;
6991 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6992 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
6993
6994 /*
6995 * Keep reading the TSC until we notice that the master has read his. Reading
6996 * the TSC -after- the master has updated the memory is way too late. We thus
6997 * compensate by trying to measure how long it took for the worker to notice
6998 * the memory flushed from the master.
6999 */
7000 do
7001 {
7002 ASMSerializeInstruction();
7003 uTscWorker = ASMReadTSC();
7004 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7005 ASMSerializeInstruction();
7006 uTscWorkerFlushed = ASMReadTSC();
7007
7008 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
7009 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7010 {
7011 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
7012 if (uCmpReadTime < (uMinCmpReadTime << 1))
7013 {
7014 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
7015 if (uCmpReadTime < uMinCmpReadTime)
7016 uMinCmpReadTime = uCmpReadTime;
7017 }
7018 else
7019 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
7020 }
7021 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
7022 {
7023 if (uCmpReadTime < uMinCmpReadTime)
7024 uMinCmpReadTime = uCmpReadTime;
7025 }
7026
7027 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
7028 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
7029 ASMNopPause();
7030 }
7031 }
7032
7033 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
7034 break;
7035 }
7036}
7037
7038
7039/**
7040 * Clears TSC delta related variables.
7041 *
7042 * Clears all TSC samples as well as the delta synchronization variable on the
7043 * all the per-CPU structs. Optionally also clears the per-cpu deltas too.
7044 *
7045 * @param pDevExt Pointer to the device instance data.
7046 * @param fClearDeltas Whether the deltas are also to be cleared.
7047 */
7048DECLINLINE(void) supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
7049{
7050 unsigned iCpu;
7051 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7052 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7053 {
7054 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7055 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
7056 if (fClearDeltas)
7057 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
7058 }
7059 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7060}
7061
7062
7063/**
7064 * Measures the TSC delta between the master GIP CPU and one specified worker
7065 * CPU.
7066 *
7067 * @returns VBox status code.
7068 * @param pDevExt Pointer to the device instance data.
7069 * @param idxWorker The index of the worker CPU from the GIP's array of
7070 * CPUs.
7071 *
7072 * @remarks This can be called with preemption disabled!
7073 */
7074static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
7075{
7076 int rc;
7077 PSUPGLOBALINFOPAGE pGip;
7078 PSUPGIPCPU pGipCpuWorker;
7079 RTCPUID idMaster;
7080
7081 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7082 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7083
7084 pGip = pDevExt->pGip;
7085 idMaster = pDevExt->idGipMaster;
7086 pGipCpuWorker = &pGip->aCPUs[idxWorker];
7087
7088 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
7089
7090 if (pGipCpuWorker->idCpu == idMaster)
7091 {
7092 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
7093 return VINF_SUCCESS;
7094 }
7095
7096 /* Set the master TSC as the initiator. */
7097 while (ASMAtomicCmpXchgU32(&pDevExt->idTscDeltaInitiator, idMaster, NIL_RTCPUID) == false)
7098 {
7099 /*
7100 * Sleep here rather than spin as there is a parallel measurement
7101 * being executed and that can take a good while to be done.
7102 */
7103 RTThreadSleep(1);
7104 }
7105
7106 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7107 {
7108 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
7109 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7110 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7111 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pDevExt, &pGipCpuWorker->idCpu);
7112 if (RT_SUCCESS(rc))
7113 {
7114 if (RT_UNLIKELY(pGipCpuWorker->i64TSCDelta == INT64_MAX))
7115 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
7116 }
7117 }
7118 else
7119 rc = VERR_CPU_OFFLINE;
7120
7121 ASMAtomicWriteU32(&pDevExt->idTscDeltaInitiator, NIL_RTCPUID);
7122 return rc;
7123}
7124
7125
7126/**
7127 * Measures the TSC deltas between CPUs.
7128 *
7129 * @param pDevExt Pointer to the device instance data.
7130 * @param pidxMaster Where to store the index of the chosen master TSC if we
7131 * managed to determine the TSC deltas successfully.
7132 * Optional, can be NULL.
7133 *
7134 * @returns VBox status code.
7135 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
7136 * idCpu, GIP's online CPU set which are populated in
7137 * supdrvGipInitOnCpu().
7138 */
7139static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
7140{
7141 PSUPGIPCPU pGipCpuMaster;
7142 unsigned iCpu;
7143 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7144 uint32_t idxMaster = UINT32_MAX;
7145 int rc = VINF_SUCCESS;
7146 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
7147 uint32_t cOnlineCpus = pGip->cOnlineCpus;
7148
7149 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
7150
7151 /*
7152 * Pick the first CPU online as the master TSC and make it the new GIP master based
7153 * on the APIC ID.
7154 *
7155 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
7156 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
7157 * master as this point since the sync/async timer isn't created yet.
7158 */
7159 supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
7160 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
7161 {
7162 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
7163 if (idxCpu != UINT16_MAX)
7164 {
7165 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
7166 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
7167 {
7168 idxMaster = idxCpu;
7169 pGipCpu->i64TSCDelta = 0;
7170 break;
7171 }
7172 }
7173 }
7174 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
7175 pGipCpuMaster = &pGip->aCPUs[idxMaster];
7176 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7177
7178 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
7179 if (pGip->cOnlineCpus <= 1)
7180 {
7181 if (pidxMaster)
7182 *pidxMaster = idxMaster;
7183 return VINF_SUCCESS;
7184 }
7185
7186 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7187 {
7188 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7189 if ( iCpu != idxMaster
7190 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7191 {
7192 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7193 if (RT_FAILURE(rc))
7194 {
7195 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
7196 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7197 break;
7198 }
7199
7200 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
7201 {
7202 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
7203 rc = VERR_TRY_AGAIN;
7204 break;
7205 }
7206 }
7207 }
7208
7209 if ( RT_SUCCESS(rc)
7210 && !pGipCpuMaster->i64TSCDelta
7211 && pidxMaster)
7212 {
7213 *pidxMaster = idxMaster;
7214 }
7215 return rc;
7216}
7217
7218
7219/**
7220 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
7221 *
7222 * @param idCpu Ignored.
7223 * @param pvUser1 Where to put the TSC.
7224 * @param pvUser2 Ignored.
7225 */
7226static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7227{
7228 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
7229}
7230
7231
7232/**
7233 * Determine if Async GIP mode is required because of TSC drift.
7234 *
7235 * When using the default/normal timer code it is essential that the time stamp counter
7236 * (TSC) runs never backwards, that is, a read operation to the counter should return
7237 * a bigger value than any previous read operation. This is guaranteed by the latest
7238 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
7239 * case we have to choose the asynchronous timer mode.
7240 *
7241 * @param poffMin Pointer to the determined difference between different
7242 * cores (optional, can be NULL).
7243 * @return false if the time stamp counters appear to be synchronized, true otherwise.
7244 */
7245static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
7246{
7247 /*
7248 * Just iterate all the cpus 8 times and make sure that the TSC is
7249 * ever increasing. We don't bother taking TSC rollover into account.
7250 */
7251 int iEndCpu = RTMpGetArraySize();
7252 int iCpu;
7253 int cLoops = 8;
7254 bool fAsync = false;
7255 int rc = VINF_SUCCESS;
7256 uint64_t offMax = 0;
7257 uint64_t offMin = ~(uint64_t)0;
7258 uint64_t PrevTsc = ASMReadTSC();
7259
7260 while (cLoops-- > 0)
7261 {
7262 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7263 {
7264 uint64_t CurTsc;
7265 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7266 if (RT_SUCCESS(rc))
7267 {
7268 if (CurTsc <= PrevTsc)
7269 {
7270 fAsync = true;
7271 offMin = offMax = PrevTsc - CurTsc;
7272 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7273 iCpu, cLoops, CurTsc, PrevTsc));
7274 break;
7275 }
7276
7277 /* Gather statistics (except the first time). */
7278 if (iCpu != 0 || cLoops != 7)
7279 {
7280 uint64_t off = CurTsc - PrevTsc;
7281 if (off < offMin)
7282 offMin = off;
7283 if (off > offMax)
7284 offMax = off;
7285 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7286 }
7287
7288 /* Next */
7289 PrevTsc = CurTsc;
7290 }
7291 else if (rc == VERR_NOT_SUPPORTED)
7292 break;
7293 else
7294 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7295 }
7296
7297 /* broke out of the loop. */
7298 if (iCpu < iEndCpu)
7299 break;
7300 }
7301
7302 if (poffMin)
7303 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7304 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7305 fAsync, iEndCpu, rc, offMin, offMax));
7306#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7307 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7308#endif
7309 return fAsync;
7310}
7311
7312
7313/**
7314 * Determine the GIP TSC mode.
7315 *
7316 * @returns The most suitable TSC mode.
7317 * @param pDevExt Pointer to the device instance data.
7318 */
7319static SUPGIPMODE supdrvGipDetermineTscMode(PSUPDRVDEVEXT pDevExt)
7320{
7321 /* Trust CPUs that declare their TSC to be invariant. */
7322 if (supdrvIsInvariantTsc())
7323 return SUPGIPMODE_INVARIANT_TSC;
7324
7325 /*
7326 * Without invariant CPU ID bit - On SMP we're faced with two problems:
7327 * (1) There might be a skew between the CPU, so that cpu0
7328 * returns a TSC that is slightly different from cpu1.
7329 * (2) Power management (and other things) may cause the TSC
7330 * to run at a non-constant speed, and cause the speed
7331 * to be different on the cpus. This will result in (1).
7332 *
7333 * So, on SMP systems we'll have to select the ASYNC update method
7334 * if there are symptoms of these problems.
7335 */
7336 if (RTMpGetCount() > 1)
7337 {
7338 uint32_t uEAX, uEBX, uECX, uEDX;
7339 uint64_t u64DiffCoresIgnored;
7340
7341 /* Permit the user and/or the OS specific bits to force async mode. */
7342 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7343 return SUPGIPMODE_ASYNC_TSC;
7344
7345 /* Try check for current differences between the cpus. */
7346 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7347 return SUPGIPMODE_ASYNC_TSC;
7348
7349 /*
7350 * If the CPU supports power management and is an AMD one we
7351 * won't trust it unless it has the TscInvariant bit is set.
7352 */
7353 /** @todo this is now redundant. remove later. */
7354 /* Check for "AuthenticAMD" */
7355 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7356 if ( uEAX >= 1
7357 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7358 {
7359 /* Check for APM support and that TscInvariant is cleared. */
7360 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
7361 if (uEAX >= 0x80000007)
7362 {
7363 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
7364 if ( !(uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR) /* TscInvariant */
7365 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7366 return SUPGIPMODE_ASYNC_TSC;
7367 }
7368 }
7369 }
7370 return SUPGIPMODE_SYNC_TSC;
7371}
7372
7373
7374/**
7375 * Initializes per-CPU GIP information.
7376 *
7377 * @param pDevExt Pointer to the device instance data.
7378 * @param pGip Pointer to the GIP.
7379 * @param pCpu Pointer to which GIP CPU to initalize.
7380 * @param u64NanoTS The current nanosecond timestamp.
7381 */
7382static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7383{
7384 /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
7385 which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
7386 pCpu->u32TransactionId = 2;
7387 pCpu->u64NanoTS = u64NanoTS;
7388 pCpu->u64TSC = ASMReadTSC();
7389 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7390 pCpu->i64TSCDelta = pDevExt->fOsTscDeltasInSync ? 0 : INT64_MAX;
7391
7392 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7393 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7394 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7395 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7396
7397 /*
7398 * We don't know the following values until we've executed updates.
7399 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7400 * the 2nd timer callout.
7401 */
7402 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7403 pCpu->u32UpdateIntervalTSC
7404 = pCpu->au32TSCHistory[0]
7405 = pCpu->au32TSCHistory[1]
7406 = pCpu->au32TSCHistory[2]
7407 = pCpu->au32TSCHistory[3]
7408 = pCpu->au32TSCHistory[4]
7409 = pCpu->au32TSCHistory[5]
7410 = pCpu->au32TSCHistory[6]
7411 = pCpu->au32TSCHistory[7]
7412 = (uint32_t)(_4G / pGip->u32UpdateHz);
7413}
7414
7415
7416/**
7417 * Initializes the GIP data.
7418 *
7419 * @param pDevExt Pointer to the device instance data.
7420 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7421 * @param HCPhys The physical address of the GIP.
7422 * @param u64NanoTS The current nanosecond timestamp.
7423 * @param uUpdateHz The update frequency.
7424 * @param uUpdateIntervalNS The update interval in nanoseconds.
7425 * @param cCpus The CPU count.
7426 */
7427static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7428 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7429{
7430 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7431 unsigned i;
7432#ifdef DEBUG_DARWIN_GIP
7433 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7434#else
7435 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7436#endif
7437
7438 /*
7439 * Record whether the host OS has already normalized inter-CPU deltas for the hardware TSC.
7440 * We only bother with TSC-deltas on invariant CPUs for now.
7441 */
7442 pDevExt->fOsTscDeltasInSync = supdrvIsInvariantTsc() && supdrvOSAreTscDeltasInSync();
7443
7444 /*
7445 * Initialize the structure.
7446 */
7447 memset(pGip, 0, cbGip);
7448 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7449 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7450 pGip->u32Mode = supdrvGipDetermineTscMode(pDevExt);
7451 pGip->cCpus = (uint16_t)cCpus;
7452 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7453 pGip->u32UpdateHz = uUpdateHz;
7454 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7455 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7456 RTCpuSetEmpty(&pGip->PresentCpuSet);
7457 RTMpGetSet(&pGip->PossibleCpuSet);
7458 pGip->cOnlineCpus = RTMpGetOnlineCount();
7459 pGip->cPresentCpus = RTMpGetPresentCount();
7460 pGip->cPossibleCpus = RTMpGetCount();
7461 pGip->idCpuMax = RTMpGetMaxCpuId();
7462 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7463 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7464 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7465 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7466
7467 for (i = 0; i < cCpus; i++)
7468 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
7469
7470 /*
7471 * Link it to the device extension.
7472 */
7473 pDevExt->pGip = pGip;
7474 pDevExt->HCPhysGip = HCPhys;
7475 pDevExt->cGipUsers = 0;
7476}
7477
7478
7479/**
7480 * On CPU initialization callback for RTMpOnAll.
7481 *
7482 * @param idCpu The CPU ID.
7483 * @param pvUser1 The device extension.
7484 * @param pvUser2 The GIP.
7485 */
7486static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7487{
7488 /* This is good enough, even though it will update some of the globals a
7489 bit to much. */
7490 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7491}
7492
7493
7494/**
7495 * Invalidates the GIP data upon termination.
7496 *
7497 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7498 */
7499static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7500{
7501 unsigned i;
7502 pGip->u32Magic = 0;
7503 for (i = 0; i < pGip->cCpus; i++)
7504 {
7505 pGip->aCPUs[i].u64NanoTS = 0;
7506 pGip->aCPUs[i].u64TSC = 0;
7507 pGip->aCPUs[i].iTSCHistoryHead = 0;
7508 pGip->aCPUs[i].u64TSCSample = 0;
7509 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7510 }
7511}
7512
7513
7514/**
7515 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
7516 * updates all the per cpu data except the transaction id.
7517 *
7518 * @param pDevExt The device extension.
7519 * @param pGipCpu Pointer to the per cpu data.
7520 * @param u64NanoTS The current time stamp.
7521 * @param u64TSC The current TSC.
7522 * @param iTick The current timer tick.
7523 *
7524 * @remarks Can be called with interrupts disabled!
7525 */
7526static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7527{
7528 uint64_t u64TSCDelta;
7529 uint32_t u32UpdateIntervalTSC;
7530 uint32_t u32UpdateIntervalTSCSlack;
7531 unsigned iTSCHistoryHead;
7532 uint64_t u64CpuHz;
7533 uint32_t u32TransactionId;
7534
7535 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7536 AssertPtrReturnVoid(pGip);
7537
7538 /* Delta between this and the previous update. */
7539 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7540
7541 /*
7542 * Update the NanoTS.
7543 */
7544 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
7545
7546 /*
7547 * Calc TSC delta.
7548 */
7549 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
7550 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
7551
7552 /* We don't need to keep realculating the frequency when it's invariant. */
7553 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
7554 return;
7555
7556 if (u64TSCDelta >> 32)
7557 {
7558 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
7559 pGipCpu->cErrors++;
7560 }
7561
7562 /*
7563 * On the 2nd and 3rd callout, reset the history with the current TSC
7564 * interval since the values entered by supdrvGipInit are totally off.
7565 * The interval on the 1st callout completely unreliable, the 2nd is a bit
7566 * better, while the 3rd should be most reliable.
7567 */
7568 u32TransactionId = pGipCpu->u32TransactionId;
7569 if (RT_UNLIKELY( ( u32TransactionId == 5
7570 || u32TransactionId == 7)
7571 && ( iTick == 2
7572 || iTick == 3) ))
7573 {
7574 unsigned i;
7575 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
7576 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
7577 }
7578
7579 /*
7580 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
7581 * Wait until we have at least one full history since the above history reset. The
7582 * assumption is that the majority of the previous history values will be tolerable.
7583 * See @bugref{6710} comment #67.
7584 */
7585 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
7586 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7587 {
7588 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
7589 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
7590 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
7591 {
7592 uint32_t u32;
7593 u32 = pGipCpu->au32TSCHistory[0];
7594 u32 += pGipCpu->au32TSCHistory[1];
7595 u32 += pGipCpu->au32TSCHistory[2];
7596 u32 += pGipCpu->au32TSCHistory[3];
7597 u32 >>= 2;
7598 u64TSCDelta = pGipCpu->au32TSCHistory[4];
7599 u64TSCDelta += pGipCpu->au32TSCHistory[5];
7600 u64TSCDelta += pGipCpu->au32TSCHistory[6];
7601 u64TSCDelta += pGipCpu->au32TSCHistory[7];
7602 u64TSCDelta >>= 2;
7603 u64TSCDelta += u32;
7604 u64TSCDelta >>= 1;
7605 }
7606 }
7607
7608
7609 /*
7610 * TSC History.
7611 */
7612 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
7613 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
7614 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
7615 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
7616
7617 /*
7618 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
7619 *
7620 * On Windows, we have an occasional (but recurring) sour value that messed up
7621 * the history but taking only 1 interval reduces the precision overall.
7622 * However, this problem existed before the invariant mode was introduced.
7623 */
7624 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
7625 || pGip->u32UpdateHz >= 1000)
7626 {
7627 uint32_t u32;
7628 u32 = pGipCpu->au32TSCHistory[0];
7629 u32 += pGipCpu->au32TSCHistory[1];
7630 u32 += pGipCpu->au32TSCHistory[2];
7631 u32 += pGipCpu->au32TSCHistory[3];
7632 u32 >>= 2;
7633 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
7634 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
7635 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
7636 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
7637 u32UpdateIntervalTSC >>= 2;
7638 u32UpdateIntervalTSC += u32;
7639 u32UpdateIntervalTSC >>= 1;
7640
7641 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
7642 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
7643 }
7644 else if (pGip->u32UpdateHz >= 90)
7645 {
7646 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7647 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
7648 u32UpdateIntervalTSC >>= 1;
7649
7650 /* value chosen on a 2GHz thinkpad running windows */
7651 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
7652 }
7653 else
7654 {
7655 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7656
7657 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
7658 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
7659 }
7660 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
7661
7662 /*
7663 * CpuHz.
7664 */
7665 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
7666 u64CpuHz /= pGip->u32UpdateIntervalNS;
7667 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
7668}
7669
7670
7671/**
7672 * Updates the GIP.
7673 *
7674 * @param pDevExt The device extension.
7675 * @param u64NanoTS The current nanosecond timesamp.
7676 * @param u64TSC The current TSC timesamp.
7677 * @param idCpu The CPU ID.
7678 * @param iTick The current timer tick.
7679 *
7680 * @remarks Can be called with interrupts disabled!
7681 */
7682static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
7683{
7684 /*
7685 * Determine the relevant CPU data.
7686 */
7687 PSUPGIPCPU pGipCpu;
7688 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7689 AssertPtrReturnVoid(pGip);
7690
7691 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7692 pGipCpu = &pGip->aCPUs[0];
7693 else
7694 {
7695 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
7696 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
7697 return;
7698 pGipCpu = &pGip->aCPUs[iCpu];
7699 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
7700 return;
7701 }
7702
7703 /*
7704 * Start update transaction.
7705 */
7706 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7707 {
7708 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
7709 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7710 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7711 pGipCpu->cErrors++;
7712 return;
7713 }
7714
7715 /*
7716 * Recalc the update frequency every 0x800th time.
7717 */
7718 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
7719 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
7720 {
7721 if (pGip->u64NanoTSLastUpdateHz)
7722 {
7723#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
7724 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
7725 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
7726 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
7727 {
7728 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
7729 * calculation on non-invariant hosts if it changes the history decision
7730 * taken in supdrvGipDoUpdateCpu(). */
7731 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
7732 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
7733 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
7734 }
7735#endif
7736 }
7737 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
7738 }
7739
7740 /*
7741 * Update the data.
7742 */
7743 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7744
7745 /*
7746 * Complete transaction.
7747 */
7748 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7749}
7750
7751
7752/**
7753 * Updates the per cpu GIP data for the calling cpu.
7754 *
7755 * @param pDevExt The device extension.
7756 * @param u64NanoTS The current nanosecond timesamp.
7757 * @param u64TSC The current TSC timesamp.
7758 * @param idCpu The CPU ID.
7759 * @param idApic The APIC id for the CPU index.
7760 * @param iTick The current timer tick.
7761 *
7762 * @remarks Can be called with interrupts disabled!
7763 */
7764static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
7765 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
7766{
7767 uint32_t iCpu;
7768 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7769
7770 /*
7771 * Avoid a potential race when a CPU online notification doesn't fire on
7772 * the onlined CPU but the tick creeps in before the event notification is
7773 * run.
7774 */
7775 if (RT_UNLIKELY(iTick == 1))
7776 {
7777 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7778 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
7779 supdrvGipMpEventOnline(pDevExt, idCpu);
7780 }
7781
7782 iCpu = pGip->aiCpuFromApicId[idApic];
7783 if (RT_LIKELY(iCpu < pGip->cCpus))
7784 {
7785 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7786 if (pGipCpu->idCpu == idCpu)
7787 {
7788 /*
7789 * Start update transaction.
7790 */
7791 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7792 {
7793 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7794 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7795 pGipCpu->cErrors++;
7796 return;
7797 }
7798
7799 /*
7800 * Update the data.
7801 */
7802 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7803
7804 /*
7805 * Complete transaction.
7806 */
7807 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7808 }
7809 }
7810}
7811
7812
7813/**
7814 * Resume built-in keyboard on MacBook Air and Pro hosts.
7815 * If there is no built-in keyboard device, return success anyway.
7816 *
7817 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
7818 */
7819static int supdrvIOCtl_ResumeSuspendedKbds(void)
7820{
7821#if defined(RT_OS_DARWIN)
7822 return supdrvDarwinResumeSuspendedKbds();
7823#else
7824 return VERR_NOT_IMPLEMENTED;
7825#endif
7826}
7827
7828
7829/**
7830 * Service a TSC-delta measurement request.
7831 *
7832 * @returns VBox status code.
7833 * @param pDevExt Pointer to the device instance data.
7834 * @param pReq Pointer to the TSC-delta measurement request.
7835 */
7836static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq)
7837{
7838 PSUPGLOBALINFOPAGE pGip;
7839 RTCPUID idCpuWorker;
7840 int rc = VERR_CPU_NOT_FOUND;
7841 int16_t cTries;
7842 RTMSINTERVAL cMsWaitRetry;
7843 uint16_t iCpu;
7844
7845 /*
7846 * Validate.
7847 */
7848 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7849 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7850 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7851 idCpuWorker = pReq->u.In.idCpu;
7852 if (idCpuWorker == NIL_RTCPUID)
7853 return VERR_INVALID_CPU_ID;
7854
7855 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
7856 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
7857 pGip = pDevExt->pGip;
7858
7859 if (!GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt))
7860 return VINF_SUCCESS;
7861
7862 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7863 {
7864 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7865 if (pGipCpuWorker->idCpu == idCpuWorker)
7866 {
7867 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
7868 && !pReq->u.In.fForce)
7869 return VINF_SUCCESS;
7870
7871#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7872 if (pReq->u.In.fAsync)
7873 {
7874 /** @todo Async. doesn't implement options like retries, waiting. We'll need
7875 * to pass those options to the thread somehow and implement it in the
7876 * thread. Check if anyone uses/needs fAsync before implementing this. */
7877 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
7878 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7879 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
7880 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
7881 {
7882 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
7883 }
7884 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7885 RTThreadUserSignal(pDevExt->hTscDeltaThread);
7886 return VINF_SUCCESS;
7887 }
7888#endif
7889
7890 while (cTries-- > 0)
7891 {
7892 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7893 if (RT_SUCCESS(rc))
7894 {
7895 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
7896 break;
7897 }
7898
7899 if (cMsWaitRetry)
7900 RTThreadSleep(cMsWaitRetry);
7901 }
7902
7903 break;
7904 }
7905 }
7906 return rc;
7907}
7908
7909
7910/**
7911 * Reads the TSC and TSC-delta atomically, applies the TSC delta.
7912 *
7913 * @returns VBox status code.
7914 * @param pDevExt Pointer to the device instance data.
7915 * @param pReq Pointer to the TSC-read request.
7916 */
7917static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq)
7918{
7919 uint64_t uTsc;
7920 uint16_t idApic;
7921 int16_t cTries;
7922 PSUPGLOBALINFOPAGE pGip;
7923 int rc;
7924
7925 /*
7926 * Validate.
7927 */
7928 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7929 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7930 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7931 pGip = pDevExt->pGip;
7932
7933 cTries = 4;
7934 while (cTries-- > 0)
7935 {
7936 int rc2;
7937 uint16_t iCpu;
7938
7939 rc = SUPGetTsc(&uTsc, &idApic);
7940 if (RT_SUCCESS(rc))
7941 {
7942 pReq->u.Out.u64AdjustedTsc = uTsc;
7943 pReq->u.Out.idApic = idApic;
7944 return VINF_SUCCESS;
7945 }
7946
7947 /* If we failed to have a TSC-delta, measurement the TSC-delta and retry. */
7948 AssertMsgReturn(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId),
7949 ("idApic=%u ArraySize=%u\n", idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)), VERR_INVALID_CPU_INDEX);
7950 iCpu = pGip->aiCpuFromApicId[idApic];
7951 AssertMsgReturn(iCpu < pGip->cCpus, ("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus), VERR_INVALID_CPU_INDEX);
7952
7953 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt));
7954 rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7955 if (RT_SUCCESS(rc2))
7956 AssertReturn(pGip->aCPUs[iCpu].i64TSCDelta != INT64_MAX, VERR_INTERNAL_ERROR_2);
7957 }
7958
7959 return rc;
7960}
7961
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette