VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 54298

Last change on this file since 54298 was 54298, checked in by vboxsync, 10 years ago

SUPDrv.c: Shouldn't not check for invariant tsc cpuid bit in supdrvGipMpEventOffline. Restructured, clarified and fixed the code and comments in supdrvGipInitDetermineTscMode.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 315.1 KB
Line 
1/* $Id: SUPDrv.c 54298 2015-02-19 15:19:34Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63#include <VBox/vmm/hm_svm.h>
64#include <VBox/vmm/hm_vmx.h>
65
66#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
67# include "dtrace/SUPDrv.h"
68#else
69# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
70# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
71# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
72# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
73#endif
74
75/*
76 * Logging assignments:
77 * Log - useful stuff, like failures.
78 * LogFlow - program flow, except the really noisy bits.
79 * Log2 - Cleanup.
80 * Log3 - Loader flow noise.
81 * Log4 - Call VMMR0 flow noise.
82 * Log5 - Native yet-to-be-defined noise.
83 * Log6 - Native ioctl flow noise.
84 *
85 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
86 * instantiation in log-vbox.c(pp).
87 */
88
89
90/*******************************************************************************
91* Defined Constants And Macros *
92*******************************************************************************/
93/** The frequency by which we recalculate the u32UpdateHz and
94 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
95 *
96 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
97 */
98#define GIP_UPDATEHZ_RECALC_FREQ 0x800
99
100/** A reserved TSC value used for synchronization as well as measurement of
101 * TSC deltas. */
102#define GIP_TSC_DELTA_RSVD UINT64_MAX
103/** The number of TSC delta measurement loops in total (includes primer and
104 * read-time loops). */
105#define GIP_TSC_DELTA_LOOPS 96
106/** The number of cache primer loops. */
107#define GIP_TSC_DELTA_PRIMER_LOOPS 4
108/** The number of loops until we keep computing the minumum read time. */
109#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
110/** Stop measurement of TSC delta. */
111#define GIP_TSC_DELTA_SYNC_STOP 0
112/** Start measurement of TSC delta. */
113#define GIP_TSC_DELTA_SYNC_START 1
114/** Worker thread is ready for reading the TSC. */
115#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
116/** Worker thread is done updating TSC delta info. */
117#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
118/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
119 * with a timeout. */
120#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
121/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
122 * master with a timeout. */
123#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
124/** The TSC-refinement interval in seconds. */
125#define GIP_TSC_REFINE_INTERVAL 5
126/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
127#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
128/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
129#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
130
131AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
132AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
133
134/** @def VBOX_SVN_REV
135 * The makefile should define this if it can. */
136#ifndef VBOX_SVN_REV
137# define VBOX_SVN_REV 0
138#endif
139
140#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
141# define DO_NOT_START_GIP
142#endif
143
144/*******************************************************************************
145* Internal Functions *
146*******************************************************************************/
147static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
148static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
149static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
150static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
151static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
152static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
153static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
154static int supdrvIOCtl_LdrLockDown(PSUPDRVDEVEXT pDevExt);
155static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
156static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
157static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt,void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
158static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
159static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
160static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
161DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
162DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
163static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
164static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
165static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
166static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq);
167static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq);
168static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
169static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
170static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
171static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
172static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
173static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,
174 unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
175static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
176static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
177static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
178static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
179 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
180static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
181static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
182static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
183static int supdrvIOCtl_ResumeSuspendedKbds(void);
184
185
186/*******************************************************************************
187* Global Variables *
188*******************************************************************************/
189DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
190
191
192/**
193 * Array of the R0 SUP API.
194 *
195 * While making changes to these exports, make sure to update the IOC
196 * minor version (SUPDRV_IOC_VERSION).
197 */
198static SUPFUNC g_aFunctions[] =
199{
200/* SED: START */
201 /* name function */
202 /* Entries with absolute addresses determined at runtime, fixup
203 code makes ugly ASSUMPTIONS about the order here: */
204 { "SUPR0AbsIs64bit", (void *)0 },
205 { "SUPR0Abs64bitKernelCS", (void *)0 },
206 { "SUPR0Abs64bitKernelSS", (void *)0 },
207 { "SUPR0Abs64bitKernelDS", (void *)0 },
208 { "SUPR0AbsKernelCS", (void *)0 },
209 { "SUPR0AbsKernelSS", (void *)0 },
210 { "SUPR0AbsKernelDS", (void *)0 },
211 { "SUPR0AbsKernelES", (void *)0 },
212 { "SUPR0AbsKernelFS", (void *)0 },
213 { "SUPR0AbsKernelGS", (void *)0 },
214 /* Normal function pointers: */
215 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
216 { "SUPGetGIP", (void *)SUPGetGIP },
217 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
218 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
219 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
220 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
221 { "SUPR0ContFree", (void *)SUPR0ContFree },
222 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
223 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
224 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
225 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
226 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
227 { "SUPR0GetSvmUsability", (void *)SUPR0GetSvmUsability },
228 { "SUPR0GetVmxUsability", (void *)SUPR0GetVmxUsability },
229 { "SUPR0LockMem", (void *)SUPR0LockMem },
230 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
231 { "SUPR0LowFree", (void *)SUPR0LowFree },
232 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
233 { "SUPR0MemFree", (void *)SUPR0MemFree },
234 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
235 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
236 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
237 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
238 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
239 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
240 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
241 { "SUPR0PageFree", (void *)SUPR0PageFree },
242 { "SUPR0Printf", (void *)SUPR0Printf },
243 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
244 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
245 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
246 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
247 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
248 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
249 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
250 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
251 { "SUPSemEventClose", (void *)SUPSemEventClose },
252 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
253 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
254 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
255 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
256 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
257 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
258 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
259 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
260 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
261 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
262 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
263 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
264 { "SUPSemEventWait", (void *)SUPSemEventWait },
265 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
266 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
267 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
268
269 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
270 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
271 { "RTAssertMsg1", (void *)RTAssertMsg1 },
272 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
273 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
274 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
275 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
276 { "RTCrc32", (void *)RTCrc32 },
277 { "RTCrc32Finish", (void *)RTCrc32Finish },
278 { "RTCrc32Process", (void *)RTCrc32Process },
279 { "RTCrc32Start", (void *)RTCrc32Start },
280 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
281 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
282 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
283 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
284 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
285 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
286 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
287 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
288 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
289 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
290 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
291 { "RTLogPrintfV", (void *)RTLogPrintfV },
292 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
293 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
294 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
295 { "RTMemAllocTag", (void *)RTMemAllocTag },
296 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
297 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
298 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
299 { "RTMemDupExTag", (void *)RTMemDupExTag },
300 { "RTMemDupTag", (void *)RTMemDupTag },
301 { "RTMemFree", (void *)RTMemFree },
302 { "RTMemFreeEx", (void *)RTMemFreeEx },
303 { "RTMemReallocTag", (void *)RTMemReallocTag },
304 { "RTMpCpuId", (void *)RTMpCpuId },
305 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
306 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
307 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
308 { "RTMpGetCount", (void *)RTMpGetCount },
309 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
310 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
311 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
312 { "RTMpGetSet", (void *)RTMpGetSet },
313 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
314 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
315 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
316 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
317 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
318 { "RTMpOnAll", (void *)RTMpOnAll },
319 { "RTMpOnOthers", (void *)RTMpOnOthers },
320 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
321 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
322 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
323 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
324 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
325 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
326 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
327 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
328 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
329 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
330 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
331 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
332 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
333 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
334 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
335 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
336 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
337 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
338 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
339 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
340 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
341 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
342 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
343 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
344 { "RTProcSelf", (void *)RTProcSelf },
345 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
346 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
347 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
348 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
349 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
350 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
351 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
352 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
353 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
354 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
355 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
356 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
357 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
358 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
359 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
360 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
361 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
362 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
363 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
364 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
365 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
366 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
367 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
368 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
369 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
370 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
371 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
372 { "RTSemEventCreate", (void *)RTSemEventCreate },
373 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
374 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
375 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
376 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
377 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
378 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
379 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
380 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
381 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
382 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
383 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
384 { "RTSemEventSignal", (void *)RTSemEventSignal },
385 { "RTSemEventWait", (void *)RTSemEventWait },
386 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
387 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
388 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
389 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
390 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
391 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
392 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
393 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
394 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
395 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
396 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
397 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
398 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
399 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
400 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
401 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
402 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
403 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
404 { "RTStrCopy", (void *)RTStrCopy },
405 { "RTStrDupTag", (void *)RTStrDupTag },
406 { "RTStrFormat", (void *)RTStrFormat },
407 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
408 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
409 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
410 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
411 { "RTStrFormatV", (void *)RTStrFormatV },
412 { "RTStrFree", (void *)RTStrFree },
413 { "RTStrNCmp", (void *)RTStrNCmp },
414 { "RTStrPrintf", (void *)RTStrPrintf },
415 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
416 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
417 { "RTStrPrintfV", (void *)RTStrPrintfV },
418 { "RTThreadCreate", (void *)RTThreadCreate },
419 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
420 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
421 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
422 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
423 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
424 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
425 { "RTThreadGetName", (void *)RTThreadGetName },
426 { "RTThreadGetNative", (void *)RTThreadGetNative },
427 { "RTThreadGetType", (void *)RTThreadGetType },
428 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
429 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
430 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
431 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
432 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
433 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
434 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
435 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
436 { "RTThreadSelf", (void *)RTThreadSelf },
437 { "RTThreadSelfName", (void *)RTThreadSelfName },
438 { "RTThreadSleep", (void *)RTThreadSleep },
439 { "RTThreadUserReset", (void *)RTThreadUserReset },
440 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
441 { "RTThreadUserWait", (void *)RTThreadUserWait },
442 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
443 { "RTThreadWait", (void *)RTThreadWait },
444 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
445 { "RTThreadYield", (void *)RTThreadYield },
446 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
447 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
448 { "RTTimeNow", (void *)RTTimeNow },
449 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
450 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
451 { "RTTimerCreate", (void *)RTTimerCreate },
452 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
453 { "RTTimerDestroy", (void *)RTTimerDestroy },
454 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
455 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
456 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
457 { "RTTimerStart", (void *)RTTimerStart },
458 { "RTTimerStop", (void *)RTTimerStop },
459 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
460 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
461 { "RTUuidCompare", (void *)RTUuidCompare },
462 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
463 { "RTUuidFromStr", (void *)RTUuidFromStr },
464/* SED: END */
465};
466
467#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
468/**
469 * Drag in the rest of IRPT since we share it with the
470 * rest of the kernel modules on darwin.
471 */
472PFNRT g_apfnVBoxDrvIPRTDeps[] =
473{
474 /* VBoxNetAdp */
475 (PFNRT)RTRandBytes,
476 /* VBoxUSB */
477 (PFNRT)RTPathStripFilename,
478 NULL
479};
480#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
481
482
483/**
484 * Initializes the device extentsion structure.
485 *
486 * @returns IPRT status code.
487 * @param pDevExt The device extension to initialize.
488 * @param cbSession The size of the session structure. The size of
489 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
490 * defined because we're skipping the OS specific members
491 * then.
492 */
493int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
494{
495 int rc;
496
497#ifdef SUPDRV_WITH_RELEASE_LOGGER
498 /*
499 * Create the release log.
500 */
501 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
502 PRTLOGGER pRelLogger;
503 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
504 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
505 if (RT_SUCCESS(rc))
506 RTLogRelSetDefaultInstance(pRelLogger);
507 /** @todo Add native hook for getting logger config parameters and setting
508 * them. On linux we should use the module parameter stuff... */
509#endif
510
511 /*
512 * Initialize it.
513 */
514 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
515 pDevExt->Spinlock = NIL_RTSPINLOCK;
516 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
517 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
518 pDevExt->idTscDeltaInitiator = NIL_RTCPUID;
519 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
520 if (RT_SUCCESS(rc))
521 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
522 if (RT_SUCCESS(rc))
523 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
524
525 if (RT_SUCCESS(rc))
526#ifdef SUPDRV_USE_MUTEX_FOR_LDR
527 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
528#else
529 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
530#endif
531 if (RT_SUCCESS(rc))
532 {
533 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
534 if (RT_SUCCESS(rc))
535 {
536#ifdef SUPDRV_USE_MUTEX_FOR_LDR
537 rc = RTSemMutexCreate(&pDevExt->mtxGip);
538#else
539 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
540#endif
541 if (RT_SUCCESS(rc))
542 {
543 rc = supdrvGipCreate(pDevExt);
544 if (RT_SUCCESS(rc))
545 {
546 rc = supdrvTracerInit(pDevExt);
547 if (RT_SUCCESS(rc))
548 {
549 pDevExt->pLdrInitImage = NULL;
550 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
551 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
552 pDevExt->cbSession = (uint32_t)cbSession;
553
554 /*
555 * Fixup the absolute symbols.
556 *
557 * Because of the table indexing assumptions we'll have a little #ifdef orgy
558 * here rather than distributing this to OS specific files. At least for now.
559 */
560#ifdef RT_OS_DARWIN
561# if ARCH_BITS == 32
562 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
563 {
564 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
565 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
566 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
567 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
568 }
569 else
570 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
571 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
572 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
573 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
574 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
575 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
576 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
577# else /* 64-bit darwin: */
578 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
579 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
580 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
581 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
582 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
583 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
584 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
585 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
586 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
587 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
588
589# endif
590#else /* !RT_OS_DARWIN */
591# if ARCH_BITS == 64
592 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
593 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
594 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
595 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
596# else
597 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
598# endif
599 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
600 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
601 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
602 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
603 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
604 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
605#endif /* !RT_OS_DARWIN */
606 return VINF_SUCCESS;
607 }
608
609 supdrvGipDestroy(pDevExt);
610 }
611
612#ifdef SUPDRV_USE_MUTEX_FOR_GIP
613 RTSemMutexDestroy(pDevExt->mtxGip);
614 pDevExt->mtxGip = NIL_RTSEMMUTEX;
615#else
616 RTSemFastMutexDestroy(pDevExt->mtxGip);
617 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
618#endif
619 }
620 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
621 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
622 }
623#ifdef SUPDRV_USE_MUTEX_FOR_LDR
624 RTSemMutexDestroy(pDevExt->mtxLdr);
625 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
626#else
627 RTSemFastMutexDestroy(pDevExt->mtxLdr);
628 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
629#endif
630 }
631
632 RTSpinlockDestroy(pDevExt->Spinlock);
633 pDevExt->Spinlock = NIL_RTSPINLOCK;
634 RTSpinlockDestroy(pDevExt->hGipSpinlock);
635 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
636 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
637 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
638
639#ifdef SUPDRV_WITH_RELEASE_LOGGER
640 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
641 RTLogDestroy(RTLogSetDefaultInstance(NULL));
642#endif
643
644 return rc;
645}
646
647
648/**
649 * Delete the device extension (e.g. cleanup members).
650 *
651 * @param pDevExt The device extension to delete.
652 */
653void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
654{
655 PSUPDRVOBJ pObj;
656 PSUPDRVUSAGE pUsage;
657
658 /*
659 * Kill mutexes and spinlocks.
660 */
661#ifdef SUPDRV_USE_MUTEX_FOR_GIP
662 RTSemMutexDestroy(pDevExt->mtxGip);
663 pDevExt->mtxGip = NIL_RTSEMMUTEX;
664#else
665 RTSemFastMutexDestroy(pDevExt->mtxGip);
666 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
667#endif
668#ifdef SUPDRV_USE_MUTEX_FOR_LDR
669 RTSemMutexDestroy(pDevExt->mtxLdr);
670 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
671#else
672 RTSemFastMutexDestroy(pDevExt->mtxLdr);
673 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
674#endif
675 RTSpinlockDestroy(pDevExt->Spinlock);
676 pDevExt->Spinlock = NIL_RTSPINLOCK;
677 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
678 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
679 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
680 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
681
682 /*
683 * Free lists.
684 */
685 /* objects. */
686 pObj = pDevExt->pObjs;
687 Assert(!pObj); /* (can trigger on forced unloads) */
688 pDevExt->pObjs = NULL;
689 while (pObj)
690 {
691 void *pvFree = pObj;
692 pObj = pObj->pNext;
693 RTMemFree(pvFree);
694 }
695
696 /* usage records. */
697 pUsage = pDevExt->pUsageFree;
698 pDevExt->pUsageFree = NULL;
699 while (pUsage)
700 {
701 void *pvFree = pUsage;
702 pUsage = pUsage->pNext;
703 RTMemFree(pvFree);
704 }
705
706 /* kill the GIP. */
707 supdrvGipDestroy(pDevExt);
708 RTSpinlockDestroy(pDevExt->hGipSpinlock);
709 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
710
711 supdrvTracerTerm(pDevExt);
712
713#ifdef SUPDRV_WITH_RELEASE_LOGGER
714 /* destroy the loggers. */
715 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
716 RTLogDestroy(RTLogSetDefaultInstance(NULL));
717#endif
718}
719
720
721/**
722 * Create session.
723 *
724 * @returns IPRT status code.
725 * @param pDevExt Device extension.
726 * @param fUser Flag indicating whether this is a user or kernel
727 * session.
728 * @param fUnrestricted Unrestricted access (system) or restricted access
729 * (user)?
730 * @param ppSession Where to store the pointer to the session data.
731 */
732int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
733{
734 int rc;
735 PSUPDRVSESSION pSession;
736
737 if (!SUP_IS_DEVEXT_VALID(pDevExt))
738 return VERR_INVALID_PARAMETER;
739
740 /*
741 * Allocate memory for the session data.
742 */
743 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
744 if (pSession)
745 {
746 /* Initialize session data. */
747 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
748 if (!rc)
749 {
750 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
751 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
752 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
753 if (RT_SUCCESS(rc))
754 {
755 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
756 pSession->pDevExt = pDevExt;
757 pSession->u32Cookie = BIRD_INV;
758 pSession->fUnrestricted = fUnrestricted;
759 /*pSession->fInHashTable = false; */
760 pSession->cRefs = 1;
761 /*pSession->pCommonNextHash = NULL;
762 pSession->ppOsSessionPtr = NULL; */
763 if (fUser)
764 {
765 pSession->Process = RTProcSelf();
766 pSession->R0Process = RTR0ProcHandleSelf();
767 }
768 else
769 {
770 pSession->Process = NIL_RTPROCESS;
771 pSession->R0Process = NIL_RTR0PROCESS;
772 }
773 /*pSession->pLdrUsage = NULL;
774 pSession->pVM = NULL;
775 pSession->pUsage = NULL;
776 pSession->pGip = NULL;
777 pSession->fGipReferenced = false;
778 pSession->Bundle.cUsed = 0; */
779 pSession->Uid = NIL_RTUID;
780 pSession->Gid = NIL_RTGID;
781 /*pSession->uTracerData = 0;*/
782 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
783 RTListInit(&pSession->TpProviders);
784 /*pSession->cTpProviders = 0;*/
785 /*pSession->cTpProbesFiring = 0;*/
786 RTListInit(&pSession->TpUmods);
787 /*RT_ZERO(pSession->apTpLookupTable);*/
788
789 VBOXDRV_SESSION_CREATE(pSession, fUser);
790 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
791 return VINF_SUCCESS;
792 }
793
794 RTSpinlockDestroy(pSession->Spinlock);
795 }
796 RTMemFree(pSession);
797 *ppSession = NULL;
798 Log(("Failed to create spinlock, rc=%d!\n", rc));
799 }
800 else
801 rc = VERR_NO_MEMORY;
802
803 return rc;
804}
805
806
807/**
808 * Cleans up the session in the context of the process to which it belongs, the
809 * caller will free the session and the session spinlock.
810 *
811 * This should normally occur when the session is closed or as the process
812 * exits. Careful reference counting in the OS specfic code makes sure that
813 * there cannot be any races between process/handle cleanup callbacks and
814 * threads doing I/O control calls.
815 *
816 * @param pDevExt The device extension.
817 * @param pSession Session data.
818 */
819static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
820{
821 int rc;
822 PSUPDRVBUNDLE pBundle;
823 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
824
825 Assert(!pSession->fInHashTable);
826 Assert(!pSession->ppOsSessionPtr);
827 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
828 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
829
830 /*
831 * Remove logger instances related to this session.
832 */
833 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
834
835 /*
836 * Destroy the handle table.
837 */
838 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
839 AssertRC(rc);
840 pSession->hHandleTable = NIL_RTHANDLETABLE;
841
842 /*
843 * Release object references made in this session.
844 * In theory there should be noone racing us in this session.
845 */
846 Log2(("release objects - start\n"));
847 if (pSession->pUsage)
848 {
849 PSUPDRVUSAGE pUsage;
850 RTSpinlockAcquire(pDevExt->Spinlock);
851
852 while ((pUsage = pSession->pUsage) != NULL)
853 {
854 PSUPDRVOBJ pObj = pUsage->pObj;
855 pSession->pUsage = pUsage->pNext;
856
857 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
858 if (pUsage->cUsage < pObj->cUsage)
859 {
860 pObj->cUsage -= pUsage->cUsage;
861 RTSpinlockRelease(pDevExt->Spinlock);
862 }
863 else
864 {
865 /* Destroy the object and free the record. */
866 if (pDevExt->pObjs == pObj)
867 pDevExt->pObjs = pObj->pNext;
868 else
869 {
870 PSUPDRVOBJ pObjPrev;
871 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
872 if (pObjPrev->pNext == pObj)
873 {
874 pObjPrev->pNext = pObj->pNext;
875 break;
876 }
877 Assert(pObjPrev);
878 }
879 RTSpinlockRelease(pDevExt->Spinlock);
880
881 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
882 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
883 if (pObj->pfnDestructor)
884 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
885 RTMemFree(pObj);
886 }
887
888 /* free it and continue. */
889 RTMemFree(pUsage);
890
891 RTSpinlockAcquire(pDevExt->Spinlock);
892 }
893
894 RTSpinlockRelease(pDevExt->Spinlock);
895 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
896 }
897 Log2(("release objects - done\n"));
898
899 /*
900 * Do tracer cleanups related to this session.
901 */
902 Log2(("release tracer stuff - start\n"));
903 supdrvTracerCleanupSession(pDevExt, pSession);
904 Log2(("release tracer stuff - end\n"));
905
906 /*
907 * Release memory allocated in the session.
908 *
909 * We do not serialize this as we assume that the application will
910 * not allocated memory while closing the file handle object.
911 */
912 Log2(("freeing memory:\n"));
913 pBundle = &pSession->Bundle;
914 while (pBundle)
915 {
916 PSUPDRVBUNDLE pToFree;
917 unsigned i;
918
919 /*
920 * Check and unlock all entries in the bundle.
921 */
922 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
923 {
924 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
925 {
926 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
927 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
928 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
929 {
930 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
931 AssertRC(rc); /** @todo figure out how to handle this. */
932 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
933 }
934 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
935 AssertRC(rc); /** @todo figure out how to handle this. */
936 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
937 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
938 }
939 }
940
941 /*
942 * Advance and free previous bundle.
943 */
944 pToFree = pBundle;
945 pBundle = pBundle->pNext;
946
947 pToFree->pNext = NULL;
948 pToFree->cUsed = 0;
949 if (pToFree != &pSession->Bundle)
950 RTMemFree(pToFree);
951 }
952 Log2(("freeing memory - done\n"));
953
954 /*
955 * Deregister component factories.
956 */
957 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
958 Log2(("deregistering component factories:\n"));
959 if (pDevExt->pComponentFactoryHead)
960 {
961 PSUPDRVFACTORYREG pPrev = NULL;
962 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
963 while (pCur)
964 {
965 if (pCur->pSession == pSession)
966 {
967 /* unlink it */
968 PSUPDRVFACTORYREG pNext = pCur->pNext;
969 if (pPrev)
970 pPrev->pNext = pNext;
971 else
972 pDevExt->pComponentFactoryHead = pNext;
973
974 /* free it */
975 pCur->pNext = NULL;
976 pCur->pSession = NULL;
977 pCur->pFactory = NULL;
978 RTMemFree(pCur);
979
980 /* next */
981 pCur = pNext;
982 }
983 else
984 {
985 /* next */
986 pPrev = pCur;
987 pCur = pCur->pNext;
988 }
989 }
990 }
991 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
992 Log2(("deregistering component factories - done\n"));
993
994 /*
995 * Loaded images needs to be dereferenced and possibly freed up.
996 */
997 supdrvLdrLock(pDevExt);
998 Log2(("freeing images:\n"));
999 if (pSession->pLdrUsage)
1000 {
1001 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
1002 pSession->pLdrUsage = NULL;
1003 while (pUsage)
1004 {
1005 void *pvFree = pUsage;
1006 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1007 if (pImage->cUsage > pUsage->cUsage)
1008 pImage->cUsage -= pUsage->cUsage;
1009 else
1010 supdrvLdrFree(pDevExt, pImage);
1011 pUsage->pImage = NULL;
1012 pUsage = pUsage->pNext;
1013 RTMemFree(pvFree);
1014 }
1015 }
1016 supdrvLdrUnlock(pDevExt);
1017 Log2(("freeing images - done\n"));
1018
1019 /*
1020 * Unmap the GIP.
1021 */
1022 Log2(("umapping GIP:\n"));
1023 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1024 {
1025 SUPR0GipUnmap(pSession);
1026 pSession->fGipReferenced = 0;
1027 }
1028 Log2(("umapping GIP - done\n"));
1029}
1030
1031
1032/**
1033 * Common code for freeing a session when the reference count reaches zero.
1034 *
1035 * @param pDevExt Device extension.
1036 * @param pSession Session data.
1037 * This data will be freed by this routine.
1038 */
1039static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1040{
1041 VBOXDRV_SESSION_CLOSE(pSession);
1042
1043 /*
1044 * Cleanup the session first.
1045 */
1046 supdrvCleanupSession(pDevExt, pSession);
1047 supdrvOSCleanupSession(pDevExt, pSession);
1048
1049 /*
1050 * Free the rest of the session stuff.
1051 */
1052 RTSpinlockDestroy(pSession->Spinlock);
1053 pSession->Spinlock = NIL_RTSPINLOCK;
1054 pSession->pDevExt = NULL;
1055 RTMemFree(pSession);
1056 LogFlow(("supdrvDestroySession: returns\n"));
1057}
1058
1059
1060/**
1061 * Inserts the session into the global hash table.
1062 *
1063 * @retval VINF_SUCCESS on success.
1064 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1065 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1066 * session (asserted).
1067 * @retval VERR_DUPLICATE if there is already a session for that pid.
1068 *
1069 * @param pDevExt The device extension.
1070 * @param pSession The session.
1071 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1072 * available and used. This will set to point to the
1073 * session while under the protection of the session
1074 * hash table spinlock. It will also be kept in
1075 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1076 * cleanup use.
1077 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1078 */
1079int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1080 void *pvUser)
1081{
1082 PSUPDRVSESSION pCur;
1083 unsigned iHash;
1084
1085 /*
1086 * Validate input.
1087 */
1088 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1089 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1090
1091 /*
1092 * Calculate the hash table index and acquire the spinlock.
1093 */
1094 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1095
1096 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1097
1098 /*
1099 * If there are a collisions, we need to carefully check if we got a
1100 * duplicate. There can only be one open session per process.
1101 */
1102 pCur = pDevExt->apSessionHashTab[iHash];
1103 if (pCur)
1104 {
1105 while (pCur && pCur->Process != pSession->Process)
1106 pCur = pCur->pCommonNextHash;
1107
1108 if (pCur)
1109 {
1110 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1111 if (pCur == pSession)
1112 {
1113 Assert(pSession->fInHashTable);
1114 AssertFailed();
1115 return VERR_WRONG_ORDER;
1116 }
1117 Assert(!pSession->fInHashTable);
1118 if (pCur->R0Process == pSession->R0Process)
1119 return VERR_RESOURCE_IN_USE;
1120 return VERR_DUPLICATE;
1121 }
1122 }
1123 Assert(!pSession->fInHashTable);
1124 Assert(!pSession->ppOsSessionPtr);
1125
1126 /*
1127 * Insert it, doing a callout to the OS specific code in case it has
1128 * anything it wishes to do while we're holding the spinlock.
1129 */
1130 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1131 pDevExt->apSessionHashTab[iHash] = pSession;
1132 pSession->fInHashTable = true;
1133 ASMAtomicIncS32(&pDevExt->cSessions);
1134
1135 pSession->ppOsSessionPtr = ppOsSessionPtr;
1136 if (ppOsSessionPtr)
1137 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1138
1139 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1140
1141 /*
1142 * Retain a reference for the pointer in the session table.
1143 */
1144 ASMAtomicIncU32(&pSession->cRefs);
1145
1146 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1147 return VINF_SUCCESS;
1148}
1149
1150
1151/**
1152 * Removes the session from the global hash table.
1153 *
1154 * @retval VINF_SUCCESS on success.
1155 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1156 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1157 * session (asserted).
1158 *
1159 * @param pDevExt The device extension.
1160 * @param pSession The session. The caller is expected to have a reference
1161 * to this so it won't croak on us when we release the hash
1162 * table reference.
1163 * @param pvUser OS specific context value for the
1164 * supdrvOSSessionHashTabInserted callback.
1165 */
1166int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1167{
1168 PSUPDRVSESSION pCur;
1169 unsigned iHash;
1170 int32_t cRefs;
1171
1172 /*
1173 * Validate input.
1174 */
1175 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1176 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1177
1178 /*
1179 * Calculate the hash table index and acquire the spinlock.
1180 */
1181 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1182
1183 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1184
1185 /*
1186 * Unlink it.
1187 */
1188 pCur = pDevExt->apSessionHashTab[iHash];
1189 if (pCur == pSession)
1190 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1191 else
1192 {
1193 PSUPDRVSESSION pPrev = pCur;
1194 while (pCur && pCur != pSession)
1195 {
1196 pPrev = pCur;
1197 pCur = pCur->pCommonNextHash;
1198 }
1199 if (pCur)
1200 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1201 else
1202 {
1203 Assert(!pSession->fInHashTable);
1204 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1205 return VERR_NOT_FOUND;
1206 }
1207 }
1208
1209 pSession->pCommonNextHash = NULL;
1210 pSession->fInHashTable = false;
1211
1212 ASMAtomicDecS32(&pDevExt->cSessions);
1213
1214 /*
1215 * Clear OS specific session pointer if available and do the OS callback.
1216 */
1217 if (pSession->ppOsSessionPtr)
1218 {
1219 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1220 pSession->ppOsSessionPtr = NULL;
1221 }
1222
1223 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1224
1225 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1226
1227 /*
1228 * Drop the reference the hash table had to the session. This shouldn't
1229 * be the last reference!
1230 */
1231 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1232 Assert(cRefs > 0 && cRefs < _1M);
1233 if (cRefs == 0)
1234 supdrvDestroySession(pDevExt, pSession);
1235
1236 return VINF_SUCCESS;
1237}
1238
1239
1240/**
1241 * Looks up the session for the current process in the global hash table or in
1242 * OS specific pointer.
1243 *
1244 * @returns Pointer to the session with a reference that the caller must
1245 * release. If no valid session was found, NULL is returned.
1246 *
1247 * @param pDevExt The device extension.
1248 * @param Process The process ID.
1249 * @param R0Process The ring-0 process handle.
1250 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1251 * this is used instead of the hash table. For
1252 * additional safety it must then be equal to the
1253 * SUPDRVSESSION::ppOsSessionPtr member.
1254 * This can be NULL even if the OS has a session
1255 * pointer.
1256 */
1257PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1258 PSUPDRVSESSION *ppOsSessionPtr)
1259{
1260 PSUPDRVSESSION pCur;
1261 unsigned iHash;
1262
1263 /*
1264 * Validate input.
1265 */
1266 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1267
1268 /*
1269 * Calculate the hash table index and acquire the spinlock.
1270 */
1271 iHash = SUPDRV_SESSION_HASH(Process);
1272
1273 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1274
1275 /*
1276 * If an OS session pointer is provided, always use it.
1277 */
1278 if (ppOsSessionPtr)
1279 {
1280 pCur = *ppOsSessionPtr;
1281 if ( pCur
1282 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1283 || pCur->Process != Process
1284 || pCur->R0Process != R0Process) )
1285 pCur = NULL;
1286 }
1287 else
1288 {
1289 /*
1290 * Otherwise, do the hash table lookup.
1291 */
1292 pCur = pDevExt->apSessionHashTab[iHash];
1293 while ( pCur
1294 && ( pCur->Process != Process
1295 || pCur->R0Process != R0Process) )
1296 pCur = pCur->pCommonNextHash;
1297 }
1298
1299 /*
1300 * Retain the session.
1301 */
1302 if (pCur)
1303 {
1304 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1305 NOREF(cRefs);
1306 Assert(cRefs > 1 && cRefs < _1M);
1307 }
1308
1309 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1310
1311 return pCur;
1312}
1313
1314
1315/**
1316 * Retain a session to make sure it doesn't go away while it is in use.
1317 *
1318 * @returns New reference count on success, UINT32_MAX on failure.
1319 * @param pSession Session data.
1320 */
1321uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1322{
1323 uint32_t cRefs;
1324 AssertPtrReturn(pSession, UINT32_MAX);
1325 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1326
1327 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1328 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1329 return cRefs;
1330}
1331
1332
1333/**
1334 * Releases a given session.
1335 *
1336 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1337 * @param pSession Session data.
1338 */
1339uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1340{
1341 uint32_t cRefs;
1342 AssertPtrReturn(pSession, UINT32_MAX);
1343 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1344
1345 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1346 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1347 if (cRefs == 0)
1348 supdrvDestroySession(pSession->pDevExt, pSession);
1349 return cRefs;
1350}
1351
1352
1353/**
1354 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1355 *
1356 * @returns IPRT status code, see SUPR0ObjAddRef.
1357 * @param hHandleTable The handle table handle. Ignored.
1358 * @param pvObj The object pointer.
1359 * @param pvCtx Context, the handle type. Ignored.
1360 * @param pvUser Session pointer.
1361 */
1362static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1363{
1364 NOREF(pvCtx);
1365 NOREF(hHandleTable);
1366 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1367}
1368
1369
1370/**
1371 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1372 *
1373 * @param hHandleTable The handle table handle. Ignored.
1374 * @param h The handle value. Ignored.
1375 * @param pvObj The object pointer.
1376 * @param pvCtx Context, the handle type. Ignored.
1377 * @param pvUser Session pointer.
1378 */
1379static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1380{
1381 NOREF(pvCtx);
1382 NOREF(h);
1383 NOREF(hHandleTable);
1384 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1385}
1386
1387
1388/**
1389 * Fast path I/O Control worker.
1390 *
1391 * @returns VBox status code that should be passed down to ring-3 unchanged.
1392 * @param uIOCtl Function number.
1393 * @param idCpu VMCPU id.
1394 * @param pDevExt Device extention.
1395 * @param pSession Session data.
1396 */
1397int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1398{
1399 /*
1400 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1401 */
1402 if (RT_LIKELY( RT_VALID_PTR(pSession)
1403 && pSession->pVM
1404 && pDevExt->pfnVMMR0EntryFast))
1405 {
1406 switch (uIOCtl)
1407 {
1408 case SUP_IOCTL_FAST_DO_RAW_RUN:
1409 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1410 break;
1411 case SUP_IOCTL_FAST_DO_HM_RUN:
1412 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1413 break;
1414 case SUP_IOCTL_FAST_DO_NOP:
1415 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1416 break;
1417 default:
1418 return VERR_INTERNAL_ERROR;
1419 }
1420 return VINF_SUCCESS;
1421 }
1422 return VERR_INTERNAL_ERROR;
1423}
1424
1425
1426/**
1427 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1428 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1429 * list, see http://www.kerneldrivers.org/RHEL5.
1430 *
1431 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1432 * @param pszStr String to check
1433 * @param pszChars Character set
1434 */
1435static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1436{
1437 int chCur;
1438 while ((chCur = *pszStr++) != '\0')
1439 {
1440 int ch;
1441 const char *psz = pszChars;
1442 while ((ch = *psz++) != '\0')
1443 if (ch == chCur)
1444 return 1;
1445
1446 }
1447 return 0;
1448}
1449
1450
1451
1452/**
1453 * I/O Control inner worker (tracing reasons).
1454 *
1455 * @returns IPRT status code.
1456 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1457 *
1458 * @param uIOCtl Function number.
1459 * @param pDevExt Device extention.
1460 * @param pSession Session data.
1461 * @param pReqHdr The request header.
1462 */
1463static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1464{
1465 /*
1466 * Validation macros
1467 */
1468#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1469 do { \
1470 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1471 { \
1472 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1473 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1474 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1475 } \
1476 } while (0)
1477
1478#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1479
1480#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1481 do { \
1482 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1483 { \
1484 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1485 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1486 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1487 } \
1488 } while (0)
1489
1490#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1491 do { \
1492 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1493 { \
1494 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1495 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1496 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1497 } \
1498 } while (0)
1499
1500#define REQ_CHECK_EXPR(Name, expr) \
1501 do { \
1502 if (RT_UNLIKELY(!(expr))) \
1503 { \
1504 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1505 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1506 } \
1507 } while (0)
1508
1509#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1510 do { \
1511 if (RT_UNLIKELY(!(expr))) \
1512 { \
1513 OSDBGPRINT( fmt ); \
1514 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1515 } \
1516 } while (0)
1517
1518 /*
1519 * The switch.
1520 */
1521 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1522 {
1523 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1524 {
1525 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1526 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1527 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1528 {
1529 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1530 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1531 return 0;
1532 }
1533
1534#if 0
1535 /*
1536 * Call out to the OS specific code and let it do permission checks on the
1537 * client process.
1538 */
1539 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1540 {
1541 pReq->u.Out.u32Cookie = 0xffffffff;
1542 pReq->u.Out.u32SessionCookie = 0xffffffff;
1543 pReq->u.Out.u32SessionVersion = 0xffffffff;
1544 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1545 pReq->u.Out.pSession = NULL;
1546 pReq->u.Out.cFunctions = 0;
1547 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1548 return 0;
1549 }
1550#endif
1551
1552 /*
1553 * Match the version.
1554 * The current logic is very simple, match the major interface version.
1555 */
1556 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1557 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1558 {
1559 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1560 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1561 pReq->u.Out.u32Cookie = 0xffffffff;
1562 pReq->u.Out.u32SessionCookie = 0xffffffff;
1563 pReq->u.Out.u32SessionVersion = 0xffffffff;
1564 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1565 pReq->u.Out.pSession = NULL;
1566 pReq->u.Out.cFunctions = 0;
1567 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1568 return 0;
1569 }
1570
1571 /*
1572 * Fill in return data and be gone.
1573 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1574 * u32SessionVersion <= u32ReqVersion!
1575 */
1576 /** @todo Somehow validate the client and negotiate a secure cookie... */
1577 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1578 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1579 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1580 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1581 pReq->u.Out.pSession = pSession;
1582 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1583 pReq->Hdr.rc = VINF_SUCCESS;
1584 return 0;
1585 }
1586
1587 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1588 {
1589 /* validate */
1590 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1591 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1592
1593 /* execute */
1594 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1595 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1596 pReq->Hdr.rc = VINF_SUCCESS;
1597 return 0;
1598 }
1599
1600 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1601 {
1602 /* validate */
1603 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1604 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1605 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1606 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1607 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1608
1609 /* execute */
1610 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1611 if (RT_FAILURE(pReq->Hdr.rc))
1612 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1613 return 0;
1614 }
1615
1616 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1617 {
1618 /* validate */
1619 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1620 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1621
1622 /* execute */
1623 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1624 return 0;
1625 }
1626
1627 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1628 {
1629 /* validate */
1630 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1631 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1632
1633 /* execute */
1634 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1635 if (RT_FAILURE(pReq->Hdr.rc))
1636 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1637 return 0;
1638 }
1639
1640 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1641 {
1642 /* validate */
1643 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1644 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1645
1646 /* execute */
1647 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1648 return 0;
1649 }
1650
1651 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1652 {
1653 /* validate */
1654 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1655 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1656 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1657 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1658 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1659 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1660 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1661 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1662 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1663 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1664 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1665
1666 /* execute */
1667 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1668 return 0;
1669 }
1670
1671 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1672 {
1673 /* validate */
1674 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1675 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1676 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1677 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1678 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1679 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1680 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1681 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1682 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1683 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1684 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1685 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1686 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1687 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1688 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1689
1690 if (pReq->u.In.cSymbols)
1691 {
1692 uint32_t i;
1693 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1694 for (i = 0; i < pReq->u.In.cSymbols; i++)
1695 {
1696 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1697 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1698 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1699 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1700 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1701 pReq->u.In.cbStrTab - paSyms[i].offName),
1702 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1703 }
1704 }
1705
1706 /* execute */
1707 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1708 return 0;
1709 }
1710
1711 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1712 {
1713 /* validate */
1714 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1715 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1716
1717 /* execute */
1718 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1719 return 0;
1720 }
1721
1722 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOCK_DOWN):
1723 {
1724 /* validate */
1725 REQ_CHECK_SIZES(SUP_IOCTL_LDR_LOCK_DOWN);
1726
1727 /* execute */
1728 pReqHdr->rc = supdrvIOCtl_LdrLockDown(pDevExt);
1729 return 0;
1730 }
1731
1732 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1733 {
1734 /* validate */
1735 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1736 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1737 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1738
1739 /* execute */
1740 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1741 return 0;
1742 }
1743
1744 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1745 {
1746 /* validate */
1747 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1748 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1749 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1750
1751 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1752 {
1753 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1754
1755 /* execute */
1756 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1757 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1758 else
1759 pReq->Hdr.rc = VERR_WRONG_ORDER;
1760 }
1761 else
1762 {
1763 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1764 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1765 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1766 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1767 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1768
1769 /* execute */
1770 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1771 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1772 else
1773 pReq->Hdr.rc = VERR_WRONG_ORDER;
1774 }
1775
1776 if ( RT_FAILURE(pReq->Hdr.rc)
1777 && pReq->Hdr.rc != VERR_INTERRUPTED
1778 && pReq->Hdr.rc != VERR_TIMEOUT)
1779 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1780 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1781 else
1782 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1783 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1784 return 0;
1785 }
1786
1787 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1788 {
1789 /* validate */
1790 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1791 PSUPVMMR0REQHDR pVMMReq;
1792 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1793 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1794
1795 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1796 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1797 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1798 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1799 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1800
1801 /* execute */
1802 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1803 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1804 else
1805 pReq->Hdr.rc = VERR_WRONG_ORDER;
1806
1807 if ( RT_FAILURE(pReq->Hdr.rc)
1808 && pReq->Hdr.rc != VERR_INTERRUPTED
1809 && pReq->Hdr.rc != VERR_TIMEOUT)
1810 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1811 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1812 else
1813 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1814 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1815 return 0;
1816 }
1817
1818 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1819 {
1820 /* validate */
1821 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1822 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1823
1824 /* execute */
1825 pReq->Hdr.rc = VINF_SUCCESS;
1826 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1827 return 0;
1828 }
1829
1830 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1831 {
1832 /* validate */
1833 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1834 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1835 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1836
1837 /* execute */
1838 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1839 if (RT_FAILURE(pReq->Hdr.rc))
1840 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1841 return 0;
1842 }
1843
1844 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1845 {
1846 /* validate */
1847 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1848 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1849
1850 /* execute */
1851 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1852 return 0;
1853 }
1854
1855 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1856 {
1857 /* validate */
1858 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1859 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1860
1861 /* execute */
1862 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1863 if (RT_SUCCESS(pReq->Hdr.rc))
1864 pReq->u.Out.pGipR0 = pDevExt->pGip;
1865 return 0;
1866 }
1867
1868 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1869 {
1870 /* validate */
1871 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1872 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1873
1874 /* execute */
1875 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1876 return 0;
1877 }
1878
1879 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1880 {
1881 /* validate */
1882 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1883 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1884 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1885 || ( VALID_PTR(pReq->u.In.pVMR0)
1886 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1887 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1888 /* execute */
1889 pSession->pVM = pReq->u.In.pVMR0;
1890 pReq->Hdr.rc = VINF_SUCCESS;
1891 return 0;
1892 }
1893
1894 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1895 {
1896 /* validate */
1897 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1898 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1899 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1900 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1901 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1902 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1903 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1904 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1905 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1906
1907 /* execute */
1908 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1909 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1910 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1911 &pReq->u.Out.aPages[0]);
1912 if (RT_FAILURE(pReq->Hdr.rc))
1913 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1914 return 0;
1915 }
1916
1917 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1918 {
1919 /* validate */
1920 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1921 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1922 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1923 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1924 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1925 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1926
1927 /* execute */
1928 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1929 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1930 if (RT_FAILURE(pReq->Hdr.rc))
1931 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1932 return 0;
1933 }
1934
1935 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1936 {
1937 /* validate */
1938 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1939 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1940 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1941 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1942 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1943 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1944 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1945
1946 /* execute */
1947 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1948 return 0;
1949 }
1950
1951 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1952 {
1953 /* validate */
1954 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1955 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1956
1957 /* execute */
1958 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1959 return 0;
1960 }
1961
1962 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1963 {
1964 /* validate */
1965 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1966 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1967 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1968
1969 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1970 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1971 else
1972 {
1973 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1974 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1975 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1976 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1977 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1978 }
1979 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1980
1981 /* execute */
1982 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1983 return 0;
1984 }
1985
1986 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1987 {
1988 /* validate */
1989 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1990 size_t cbStrTab;
1991 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1992 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
1993 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
1994 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
1995 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
1996 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
1997 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
1998 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
1999 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
2000 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
2001 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
2002
2003 /* execute */
2004 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
2005 return 0;
2006 }
2007
2008 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
2009 {
2010 /* validate */
2011 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
2012 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
2013 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
2014
2015 /* execute */
2016 switch (pReq->u.In.uType)
2017 {
2018 case SUP_SEM_TYPE_EVENT:
2019 {
2020 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2021 switch (pReq->u.In.uOp)
2022 {
2023 case SUPSEMOP2_WAIT_MS_REL:
2024 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2025 break;
2026 case SUPSEMOP2_WAIT_NS_ABS:
2027 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2028 break;
2029 case SUPSEMOP2_WAIT_NS_REL:
2030 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2031 break;
2032 case SUPSEMOP2_SIGNAL:
2033 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2034 break;
2035 case SUPSEMOP2_CLOSE:
2036 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2037 break;
2038 case SUPSEMOP2_RESET:
2039 default:
2040 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2041 break;
2042 }
2043 break;
2044 }
2045
2046 case SUP_SEM_TYPE_EVENT_MULTI:
2047 {
2048 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2049 switch (pReq->u.In.uOp)
2050 {
2051 case SUPSEMOP2_WAIT_MS_REL:
2052 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2053 break;
2054 case SUPSEMOP2_WAIT_NS_ABS:
2055 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2056 break;
2057 case SUPSEMOP2_WAIT_NS_REL:
2058 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2059 break;
2060 case SUPSEMOP2_SIGNAL:
2061 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2062 break;
2063 case SUPSEMOP2_CLOSE:
2064 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2065 break;
2066 case SUPSEMOP2_RESET:
2067 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2068 break;
2069 default:
2070 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2071 break;
2072 }
2073 break;
2074 }
2075
2076 default:
2077 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2078 break;
2079 }
2080 return 0;
2081 }
2082
2083 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2084 {
2085 /* validate */
2086 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2087 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2088 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2089
2090 /* execute */
2091 switch (pReq->u.In.uType)
2092 {
2093 case SUP_SEM_TYPE_EVENT:
2094 {
2095 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2096 switch (pReq->u.In.uOp)
2097 {
2098 case SUPSEMOP3_CREATE:
2099 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2100 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2101 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2102 break;
2103 case SUPSEMOP3_GET_RESOLUTION:
2104 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2105 pReq->Hdr.rc = VINF_SUCCESS;
2106 pReq->Hdr.cbOut = sizeof(*pReq);
2107 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2108 break;
2109 default:
2110 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2111 break;
2112 }
2113 break;
2114 }
2115
2116 case SUP_SEM_TYPE_EVENT_MULTI:
2117 {
2118 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2119 switch (pReq->u.In.uOp)
2120 {
2121 case SUPSEMOP3_CREATE:
2122 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2123 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2124 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2125 break;
2126 case SUPSEMOP3_GET_RESOLUTION:
2127 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2128 pReq->Hdr.rc = VINF_SUCCESS;
2129 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2130 break;
2131 default:
2132 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2133 break;
2134 }
2135 break;
2136 }
2137
2138 default:
2139 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2140 break;
2141 }
2142 return 0;
2143 }
2144
2145 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2146 {
2147 /* validate */
2148 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2149 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2150
2151 /* execute */
2152 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2153 if (RT_FAILURE(pReq->Hdr.rc))
2154 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2155 return 0;
2156 }
2157
2158 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2159 {
2160 /* validate */
2161 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2162 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2163
2164 /* execute */
2165 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2166 return 0;
2167 }
2168
2169 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2170 {
2171 /* validate */
2172 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2173
2174 /* execute */
2175 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2176 return 0;
2177 }
2178
2179 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2180 {
2181 /* validate */
2182 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2183 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2184
2185 /* execute */
2186 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2187 return 0;
2188 }
2189
2190 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2191 {
2192 /* validate */
2193 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2194 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2195 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2196 return VERR_INVALID_PARAMETER;
2197
2198 /* execute */
2199 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2200 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2201 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2202 pReq->u.In.szName, pReq->u.In.fFlags);
2203 return 0;
2204 }
2205
2206 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2207 {
2208 /* validate */
2209 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2210 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2211
2212 /* execute */
2213 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2214 return 0;
2215 }
2216
2217 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2218 {
2219 /* validate */
2220 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2221 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2222
2223 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2224 pReqHdr->rc = VINF_SUCCESS;
2225 return 0;
2226 }
2227
2228 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2229 {
2230 /* validate */
2231 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2232 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2233 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2234 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2235
2236 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2237 return 0;
2238 }
2239
2240 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2241 {
2242 /* validate */
2243 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2244
2245 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2246 return 0;
2247 }
2248
2249 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2250 {
2251 /* validate */
2252 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2253 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2254
2255 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pSession, pReq);
2256 return 0;
2257 }
2258
2259 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2260 {
2261 /* validate */
2262 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2263 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2264
2265 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pSession, pReq);
2266 return 0;
2267 }
2268
2269 default:
2270 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2271 break;
2272 }
2273 return VERR_GENERAL_FAILURE;
2274}
2275
2276
2277/**
2278 * I/O Control inner worker for the restricted operations.
2279 *
2280 * @returns IPRT status code.
2281 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2282 *
2283 * @param uIOCtl Function number.
2284 * @param pDevExt Device extention.
2285 * @param pSession Session data.
2286 * @param pReqHdr The request header.
2287 */
2288static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2289{
2290 /*
2291 * The switch.
2292 */
2293 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2294 {
2295 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2296 {
2297 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2298 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2299 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2300 {
2301 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2302 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2303 return 0;
2304 }
2305
2306 /*
2307 * Match the version.
2308 * The current logic is very simple, match the major interface version.
2309 */
2310 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2311 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2312 {
2313 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2314 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2315 pReq->u.Out.u32Cookie = 0xffffffff;
2316 pReq->u.Out.u32SessionCookie = 0xffffffff;
2317 pReq->u.Out.u32SessionVersion = 0xffffffff;
2318 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2319 pReq->u.Out.pSession = NULL;
2320 pReq->u.Out.cFunctions = 0;
2321 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2322 return 0;
2323 }
2324
2325 /*
2326 * Fill in return data and be gone.
2327 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2328 * u32SessionVersion <= u32ReqVersion!
2329 */
2330 /** @todo Somehow validate the client and negotiate a secure cookie... */
2331 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2332 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2333 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2334 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2335 pReq->u.Out.pSession = pSession;
2336 pReq->u.Out.cFunctions = 0;
2337 pReq->Hdr.rc = VINF_SUCCESS;
2338 return 0;
2339 }
2340
2341 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2342 {
2343 /* validate */
2344 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2345 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2346
2347 /* execute */
2348 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2349 if (RT_FAILURE(pReq->Hdr.rc))
2350 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2351 return 0;
2352 }
2353
2354 default:
2355 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2356 break;
2357 }
2358 return VERR_GENERAL_FAILURE;
2359}
2360
2361
2362/**
2363 * I/O Control worker.
2364 *
2365 * @returns IPRT status code.
2366 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2367 *
2368 * @param uIOCtl Function number.
2369 * @param pDevExt Device extention.
2370 * @param pSession Session data.
2371 * @param pReqHdr The request header.
2372 */
2373int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2374{
2375 int rc;
2376 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2377
2378 /*
2379 * Validate the request.
2380 */
2381 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2382 {
2383 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2384 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2385 return VERR_INVALID_PARAMETER;
2386 }
2387 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2388 || pReqHdr->cbIn < sizeof(*pReqHdr)
2389 || pReqHdr->cbIn > cbReq
2390 || pReqHdr->cbOut < sizeof(*pReqHdr)
2391 || pReqHdr->cbOut > cbReq))
2392 {
2393 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2394 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2395 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2396 return VERR_INVALID_PARAMETER;
2397 }
2398 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2399 {
2400 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2401 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2402 return VERR_INVALID_PARAMETER;
2403 }
2404 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2405 {
2406 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2407 {
2408 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2409 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2410 return VERR_INVALID_PARAMETER;
2411 }
2412 }
2413 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2414 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2415 {
2416 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2417 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2418 return VERR_INVALID_PARAMETER;
2419 }
2420
2421 /*
2422 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2423 */
2424 if (pSession->fUnrestricted)
2425 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2426 else
2427 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2428
2429 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2430 return rc;
2431}
2432
2433
2434/**
2435 * Inter-Driver Communication (IDC) worker.
2436 *
2437 * @returns VBox status code.
2438 * @retval VINF_SUCCESS on success.
2439 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2440 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2441 *
2442 * @param uReq The request (function) code.
2443 * @param pDevExt Device extention.
2444 * @param pSession Session data.
2445 * @param pReqHdr The request header.
2446 */
2447int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2448{
2449 /*
2450 * The OS specific code has already validated the pSession
2451 * pointer, and the request size being greater or equal to
2452 * size of the header.
2453 *
2454 * So, just check that pSession is a kernel context session.
2455 */
2456 if (RT_UNLIKELY( pSession
2457 && pSession->R0Process != NIL_RTR0PROCESS))
2458 return VERR_INVALID_PARAMETER;
2459
2460/*
2461 * Validation macro.
2462 */
2463#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2464 do { \
2465 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2466 { \
2467 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2468 (long)pReqHdr->cb, (long)(cbExpect))); \
2469 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2470 } \
2471 } while (0)
2472
2473 switch (uReq)
2474 {
2475 case SUPDRV_IDC_REQ_CONNECT:
2476 {
2477 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2478 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2479
2480 /*
2481 * Validate the cookie and other input.
2482 */
2483 if (pReq->Hdr.pSession != NULL)
2484 {
2485 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2486 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2487 }
2488 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2489 {
2490 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2491 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2492 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2493 }
2494 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2495 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2496 {
2497 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2498 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2499 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2500 }
2501 if (pSession != NULL)
2502 {
2503 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2504 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2505 }
2506
2507 /*
2508 * Match the version.
2509 * The current logic is very simple, match the major interface version.
2510 */
2511 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2512 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2513 {
2514 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2515 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2516 pReq->u.Out.pSession = NULL;
2517 pReq->u.Out.uSessionVersion = 0xffffffff;
2518 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2519 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2520 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2521 return VINF_SUCCESS;
2522 }
2523
2524 pReq->u.Out.pSession = NULL;
2525 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2526 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2527 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2528
2529 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2530 if (RT_FAILURE(pReq->Hdr.rc))
2531 {
2532 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2533 return VINF_SUCCESS;
2534 }
2535
2536 pReq->u.Out.pSession = pSession;
2537 pReq->Hdr.pSession = pSession;
2538
2539 return VINF_SUCCESS;
2540 }
2541
2542 case SUPDRV_IDC_REQ_DISCONNECT:
2543 {
2544 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2545
2546 supdrvSessionRelease(pSession);
2547 return pReqHdr->rc = VINF_SUCCESS;
2548 }
2549
2550 case SUPDRV_IDC_REQ_GET_SYMBOL:
2551 {
2552 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2553 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2554
2555 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2556 return VINF_SUCCESS;
2557 }
2558
2559 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2560 {
2561 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2562 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2563
2564 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2565 return VINF_SUCCESS;
2566 }
2567
2568 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2569 {
2570 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2571 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2572
2573 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2574 return VINF_SUCCESS;
2575 }
2576
2577 default:
2578 Log(("Unknown IDC %#lx\n", (long)uReq));
2579 break;
2580 }
2581
2582#undef REQ_CHECK_IDC_SIZE
2583 return VERR_NOT_SUPPORTED;
2584}
2585
2586
2587/**
2588 * Register a object for reference counting.
2589 * The object is registered with one reference in the specified session.
2590 *
2591 * @returns Unique identifier on success (pointer).
2592 * All future reference must use this identifier.
2593 * @returns NULL on failure.
2594 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2595 * @param pvUser1 The first user argument.
2596 * @param pvUser2 The second user argument.
2597 */
2598SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2599{
2600 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2601 PSUPDRVOBJ pObj;
2602 PSUPDRVUSAGE pUsage;
2603
2604 /*
2605 * Validate the input.
2606 */
2607 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2608 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2609 AssertPtrReturn(pfnDestructor, NULL);
2610
2611 /*
2612 * Allocate and initialize the object.
2613 */
2614 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2615 if (!pObj)
2616 return NULL;
2617 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2618 pObj->enmType = enmType;
2619 pObj->pNext = NULL;
2620 pObj->cUsage = 1;
2621 pObj->pfnDestructor = pfnDestructor;
2622 pObj->pvUser1 = pvUser1;
2623 pObj->pvUser2 = pvUser2;
2624 pObj->CreatorUid = pSession->Uid;
2625 pObj->CreatorGid = pSession->Gid;
2626 pObj->CreatorProcess= pSession->Process;
2627 supdrvOSObjInitCreator(pObj, pSession);
2628
2629 /*
2630 * Allocate the usage record.
2631 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2632 */
2633 RTSpinlockAcquire(pDevExt->Spinlock);
2634
2635 pUsage = pDevExt->pUsageFree;
2636 if (pUsage)
2637 pDevExt->pUsageFree = pUsage->pNext;
2638 else
2639 {
2640 RTSpinlockRelease(pDevExt->Spinlock);
2641 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2642 if (!pUsage)
2643 {
2644 RTMemFree(pObj);
2645 return NULL;
2646 }
2647 RTSpinlockAcquire(pDevExt->Spinlock);
2648 }
2649
2650 /*
2651 * Insert the object and create the session usage record.
2652 */
2653 /* The object. */
2654 pObj->pNext = pDevExt->pObjs;
2655 pDevExt->pObjs = pObj;
2656
2657 /* The session record. */
2658 pUsage->cUsage = 1;
2659 pUsage->pObj = pObj;
2660 pUsage->pNext = pSession->pUsage;
2661 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2662 pSession->pUsage = pUsage;
2663
2664 RTSpinlockRelease(pDevExt->Spinlock);
2665
2666 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2667 return pObj;
2668}
2669
2670
2671/**
2672 * Increment the reference counter for the object associating the reference
2673 * with the specified session.
2674 *
2675 * @returns IPRT status code.
2676 * @param pvObj The identifier returned by SUPR0ObjRegister().
2677 * @param pSession The session which is referencing the object.
2678 *
2679 * @remarks The caller should not own any spinlocks and must carefully protect
2680 * itself against potential race with the destructor so freed memory
2681 * isn't accessed here.
2682 */
2683SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2684{
2685 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2686}
2687
2688
2689/**
2690 * Increment the reference counter for the object associating the reference
2691 * with the specified session.
2692 *
2693 * @returns IPRT status code.
2694 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2695 * couldn't be allocated. (If you see this you're not doing the right
2696 * thing and it won't ever work reliably.)
2697 *
2698 * @param pvObj The identifier returned by SUPR0ObjRegister().
2699 * @param pSession The session which is referencing the object.
2700 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2701 * first reference to an object in a session with this
2702 * argument set.
2703 *
2704 * @remarks The caller should not own any spinlocks and must carefully protect
2705 * itself against potential race with the destructor so freed memory
2706 * isn't accessed here.
2707 */
2708SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2709{
2710 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2711 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2712 int rc = VINF_SUCCESS;
2713 PSUPDRVUSAGE pUsagePre;
2714 PSUPDRVUSAGE pUsage;
2715
2716 /*
2717 * Validate the input.
2718 * Be ready for the destruction race (someone might be stuck in the
2719 * destructor waiting a lock we own).
2720 */
2721 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2722 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2723 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2724 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2725 VERR_INVALID_PARAMETER);
2726
2727 RTSpinlockAcquire(pDevExt->Spinlock);
2728
2729 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2730 {
2731 RTSpinlockRelease(pDevExt->Spinlock);
2732
2733 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2734 return VERR_WRONG_ORDER;
2735 }
2736
2737 /*
2738 * Preallocate the usage record if we can.
2739 */
2740 pUsagePre = pDevExt->pUsageFree;
2741 if (pUsagePre)
2742 pDevExt->pUsageFree = pUsagePre->pNext;
2743 else if (!fNoBlocking)
2744 {
2745 RTSpinlockRelease(pDevExt->Spinlock);
2746 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2747 if (!pUsagePre)
2748 return VERR_NO_MEMORY;
2749
2750 RTSpinlockAcquire(pDevExt->Spinlock);
2751 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2752 {
2753 RTSpinlockRelease(pDevExt->Spinlock);
2754
2755 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2756 return VERR_WRONG_ORDER;
2757 }
2758 }
2759
2760 /*
2761 * Reference the object.
2762 */
2763 pObj->cUsage++;
2764
2765 /*
2766 * Look for the session record.
2767 */
2768 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2769 {
2770 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2771 if (pUsage->pObj == pObj)
2772 break;
2773 }
2774 if (pUsage)
2775 pUsage->cUsage++;
2776 else if (pUsagePre)
2777 {
2778 /* create a new session record. */
2779 pUsagePre->cUsage = 1;
2780 pUsagePre->pObj = pObj;
2781 pUsagePre->pNext = pSession->pUsage;
2782 pSession->pUsage = pUsagePre;
2783 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2784
2785 pUsagePre = NULL;
2786 }
2787 else
2788 {
2789 pObj->cUsage--;
2790 rc = VERR_TRY_AGAIN;
2791 }
2792
2793 /*
2794 * Put any unused usage record into the free list..
2795 */
2796 if (pUsagePre)
2797 {
2798 pUsagePre->pNext = pDevExt->pUsageFree;
2799 pDevExt->pUsageFree = pUsagePre;
2800 }
2801
2802 RTSpinlockRelease(pDevExt->Spinlock);
2803
2804 return rc;
2805}
2806
2807
2808/**
2809 * Decrement / destroy a reference counter record for an object.
2810 *
2811 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2812 *
2813 * @returns IPRT status code.
2814 * @retval VINF_SUCCESS if not destroyed.
2815 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2816 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2817 * string builds.
2818 *
2819 * @param pvObj The identifier returned by SUPR0ObjRegister().
2820 * @param pSession The session which is referencing the object.
2821 */
2822SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2823{
2824 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2825 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2826 int rc = VERR_INVALID_PARAMETER;
2827 PSUPDRVUSAGE pUsage;
2828 PSUPDRVUSAGE pUsagePrev;
2829
2830 /*
2831 * Validate the input.
2832 */
2833 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2834 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2835 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2836 VERR_INVALID_PARAMETER);
2837
2838 /*
2839 * Acquire the spinlock and look for the usage record.
2840 */
2841 RTSpinlockAcquire(pDevExt->Spinlock);
2842
2843 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2844 pUsage;
2845 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2846 {
2847 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2848 if (pUsage->pObj == pObj)
2849 {
2850 rc = VINF_SUCCESS;
2851 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2852 if (pUsage->cUsage > 1)
2853 {
2854 pObj->cUsage--;
2855 pUsage->cUsage--;
2856 }
2857 else
2858 {
2859 /*
2860 * Free the session record.
2861 */
2862 if (pUsagePrev)
2863 pUsagePrev->pNext = pUsage->pNext;
2864 else
2865 pSession->pUsage = pUsage->pNext;
2866 pUsage->pNext = pDevExt->pUsageFree;
2867 pDevExt->pUsageFree = pUsage;
2868
2869 /* What about the object? */
2870 if (pObj->cUsage > 1)
2871 pObj->cUsage--;
2872 else
2873 {
2874 /*
2875 * Object is to be destroyed, unlink it.
2876 */
2877 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2878 rc = VINF_OBJECT_DESTROYED;
2879 if (pDevExt->pObjs == pObj)
2880 pDevExt->pObjs = pObj->pNext;
2881 else
2882 {
2883 PSUPDRVOBJ pObjPrev;
2884 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2885 if (pObjPrev->pNext == pObj)
2886 {
2887 pObjPrev->pNext = pObj->pNext;
2888 break;
2889 }
2890 Assert(pObjPrev);
2891 }
2892 }
2893 }
2894 break;
2895 }
2896 }
2897
2898 RTSpinlockRelease(pDevExt->Spinlock);
2899
2900 /*
2901 * Call the destructor and free the object if required.
2902 */
2903 if (rc == VINF_OBJECT_DESTROYED)
2904 {
2905 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2906 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2907 if (pObj->pfnDestructor)
2908 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2909 RTMemFree(pObj);
2910 }
2911
2912 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2913 return rc;
2914}
2915
2916
2917/**
2918 * Verifies that the current process can access the specified object.
2919 *
2920 * @returns The following IPRT status code:
2921 * @retval VINF_SUCCESS if access was granted.
2922 * @retval VERR_PERMISSION_DENIED if denied access.
2923 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2924 *
2925 * @param pvObj The identifier returned by SUPR0ObjRegister().
2926 * @param pSession The session which wishes to access the object.
2927 * @param pszObjName Object string name. This is optional and depends on the object type.
2928 *
2929 * @remark The caller is responsible for making sure the object isn't removed while
2930 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2931 */
2932SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2933{
2934 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2935 int rc;
2936
2937 /*
2938 * Validate the input.
2939 */
2940 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2941 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2942 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2943 VERR_INVALID_PARAMETER);
2944
2945 /*
2946 * Check access. (returns true if a decision has been made.)
2947 */
2948 rc = VERR_INTERNAL_ERROR;
2949 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2950 return rc;
2951
2952 /*
2953 * Default policy is to allow the user to access his own
2954 * stuff but nothing else.
2955 */
2956 if (pObj->CreatorUid == pSession->Uid)
2957 return VINF_SUCCESS;
2958 return VERR_PERMISSION_DENIED;
2959}
2960
2961
2962/**
2963 * Lock pages.
2964 *
2965 * @returns IPRT status code.
2966 * @param pSession Session to which the locked memory should be associated.
2967 * @param pvR3 Start of the memory range to lock.
2968 * This must be page aligned.
2969 * @param cPages Number of pages to lock.
2970 * @param paPages Where to put the physical addresses of locked memory.
2971 */
2972SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2973{
2974 int rc;
2975 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2976 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2977 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2978
2979 /*
2980 * Verify input.
2981 */
2982 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2983 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2984 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2985 || !pvR3)
2986 {
2987 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2988 return VERR_INVALID_PARAMETER;
2989 }
2990
2991 /*
2992 * Let IPRT do the job.
2993 */
2994 Mem.eType = MEMREF_TYPE_LOCKED;
2995 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
2996 if (RT_SUCCESS(rc))
2997 {
2998 uint32_t iPage = cPages;
2999 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
3000 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
3001
3002 while (iPage-- > 0)
3003 {
3004 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3005 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
3006 {
3007 AssertMsgFailed(("iPage=%d\n", iPage));
3008 rc = VERR_INTERNAL_ERROR;
3009 break;
3010 }
3011 }
3012 if (RT_SUCCESS(rc))
3013 rc = supdrvMemAdd(&Mem, pSession);
3014 if (RT_FAILURE(rc))
3015 {
3016 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3017 AssertRC(rc2);
3018 }
3019 }
3020
3021 return rc;
3022}
3023
3024
3025/**
3026 * Unlocks the memory pointed to by pv.
3027 *
3028 * @returns IPRT status code.
3029 * @param pSession Session to which the memory was locked.
3030 * @param pvR3 Memory to unlock.
3031 */
3032SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3033{
3034 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3035 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3036 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3037}
3038
3039
3040/**
3041 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3042 * backing.
3043 *
3044 * @returns IPRT status code.
3045 * @param pSession Session data.
3046 * @param cPages Number of pages to allocate.
3047 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3048 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3049 * @param pHCPhys Where to put the physical address of allocated memory.
3050 */
3051SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3052{
3053 int rc;
3054 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3055 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3056
3057 /*
3058 * Validate input.
3059 */
3060 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3061 if (!ppvR3 || !ppvR0 || !pHCPhys)
3062 {
3063 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3064 pSession, ppvR0, ppvR3, pHCPhys));
3065 return VERR_INVALID_PARAMETER;
3066
3067 }
3068 if (cPages < 1 || cPages >= 256)
3069 {
3070 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3071 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3072 }
3073
3074 /*
3075 * Let IPRT do the job.
3076 */
3077 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3078 if (RT_SUCCESS(rc))
3079 {
3080 int rc2;
3081 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3082 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3083 if (RT_SUCCESS(rc))
3084 {
3085 Mem.eType = MEMREF_TYPE_CONT;
3086 rc = supdrvMemAdd(&Mem, pSession);
3087 if (!rc)
3088 {
3089 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3090 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3091 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3092 return 0;
3093 }
3094
3095 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3096 AssertRC(rc2);
3097 }
3098 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3099 AssertRC(rc2);
3100 }
3101
3102 return rc;
3103}
3104
3105
3106/**
3107 * Frees memory allocated using SUPR0ContAlloc().
3108 *
3109 * @returns IPRT status code.
3110 * @param pSession The session to which the memory was allocated.
3111 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3112 */
3113SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3114{
3115 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3116 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3117 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3118}
3119
3120
3121/**
3122 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3123 *
3124 * The memory isn't zeroed.
3125 *
3126 * @returns IPRT status code.
3127 * @param pSession Session data.
3128 * @param cPages Number of pages to allocate.
3129 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3130 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3131 * @param paPages Where to put the physical addresses of allocated memory.
3132 */
3133SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3134{
3135 unsigned iPage;
3136 int rc;
3137 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3138 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3139
3140 /*
3141 * Validate input.
3142 */
3143 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3144 if (!ppvR3 || !ppvR0 || !paPages)
3145 {
3146 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3147 pSession, ppvR3, ppvR0, paPages));
3148 return VERR_INVALID_PARAMETER;
3149
3150 }
3151 if (cPages < 1 || cPages >= 256)
3152 {
3153 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3154 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3155 }
3156
3157 /*
3158 * Let IPRT do the work.
3159 */
3160 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3161 if (RT_SUCCESS(rc))
3162 {
3163 int rc2;
3164 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3165 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3166 if (RT_SUCCESS(rc))
3167 {
3168 Mem.eType = MEMREF_TYPE_LOW;
3169 rc = supdrvMemAdd(&Mem, pSession);
3170 if (!rc)
3171 {
3172 for (iPage = 0; iPage < cPages; iPage++)
3173 {
3174 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3175 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3176 }
3177 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3178 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3179 return 0;
3180 }
3181
3182 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3183 AssertRC(rc2);
3184 }
3185
3186 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3187 AssertRC(rc2);
3188 }
3189
3190 return rc;
3191}
3192
3193
3194/**
3195 * Frees memory allocated using SUPR0LowAlloc().
3196 *
3197 * @returns IPRT status code.
3198 * @param pSession The session to which the memory was allocated.
3199 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3200 */
3201SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3202{
3203 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3204 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3205 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3206}
3207
3208
3209
3210/**
3211 * Allocates a chunk of memory with both R0 and R3 mappings.
3212 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3213 *
3214 * @returns IPRT status code.
3215 * @param pSession The session to associated the allocation with.
3216 * @param cb Number of bytes to allocate.
3217 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3218 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3219 */
3220SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3221{
3222 int rc;
3223 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3224 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3225
3226 /*
3227 * Validate input.
3228 */
3229 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3230 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3231 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3232 if (cb < 1 || cb >= _4M)
3233 {
3234 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3235 return VERR_INVALID_PARAMETER;
3236 }
3237
3238 /*
3239 * Let IPRT do the work.
3240 */
3241 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3242 if (RT_SUCCESS(rc))
3243 {
3244 int rc2;
3245 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3246 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3247 if (RT_SUCCESS(rc))
3248 {
3249 Mem.eType = MEMREF_TYPE_MEM;
3250 rc = supdrvMemAdd(&Mem, pSession);
3251 if (!rc)
3252 {
3253 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3254 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3255 return VINF_SUCCESS;
3256 }
3257
3258 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3259 AssertRC(rc2);
3260 }
3261
3262 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3263 AssertRC(rc2);
3264 }
3265
3266 return rc;
3267}
3268
3269
3270/**
3271 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3272 *
3273 * @returns IPRT status code.
3274 * @param pSession The session to which the memory was allocated.
3275 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3276 * @param paPages Where to store the physical addresses.
3277 */
3278SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3279{
3280 PSUPDRVBUNDLE pBundle;
3281 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3282
3283 /*
3284 * Validate input.
3285 */
3286 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3287 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3288 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3289
3290 /*
3291 * Search for the address.
3292 */
3293 RTSpinlockAcquire(pSession->Spinlock);
3294 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3295 {
3296 if (pBundle->cUsed > 0)
3297 {
3298 unsigned i;
3299 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3300 {
3301 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3302 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3303 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3304 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3305 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3306 )
3307 )
3308 {
3309 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3310 size_t iPage;
3311 for (iPage = 0; iPage < cPages; iPage++)
3312 {
3313 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3314 paPages[iPage].uReserved = 0;
3315 }
3316 RTSpinlockRelease(pSession->Spinlock);
3317 return VINF_SUCCESS;
3318 }
3319 }
3320 }
3321 }
3322 RTSpinlockRelease(pSession->Spinlock);
3323 Log(("Failed to find %p!!!\n", (void *)uPtr));
3324 return VERR_INVALID_PARAMETER;
3325}
3326
3327
3328/**
3329 * Free memory allocated by SUPR0MemAlloc().
3330 *
3331 * @returns IPRT status code.
3332 * @param pSession The session owning the allocation.
3333 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3334 */
3335SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3336{
3337 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3338 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3339 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3340}
3341
3342
3343/**
3344 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3345 *
3346 * The memory is fixed and it's possible to query the physical addresses using
3347 * SUPR0MemGetPhys().
3348 *
3349 * @returns IPRT status code.
3350 * @param pSession The session to associated the allocation with.
3351 * @param cPages The number of pages to allocate.
3352 * @param fFlags Flags, reserved for the future. Must be zero.
3353 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3354 * NULL if no ring-3 mapping.
3355 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3356 * NULL if no ring-0 mapping.
3357 * @param paPages Where to store the addresses of the pages. Optional.
3358 */
3359SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3360{
3361 int rc;
3362 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3363 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3364
3365 /*
3366 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3367 */
3368 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3369 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3370 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3371 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3372 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3373 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3374 {
3375 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3376 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3377 }
3378
3379 /*
3380 * Let IPRT do the work.
3381 */
3382 if (ppvR0)
3383 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3384 else
3385 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3386 if (RT_SUCCESS(rc))
3387 {
3388 int rc2;
3389 if (ppvR3)
3390 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3391 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3392 else
3393 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3394 if (RT_SUCCESS(rc))
3395 {
3396 Mem.eType = MEMREF_TYPE_PAGE;
3397 rc = supdrvMemAdd(&Mem, pSession);
3398 if (!rc)
3399 {
3400 if (ppvR3)
3401 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3402 if (ppvR0)
3403 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3404 if (paPages)
3405 {
3406 uint32_t iPage = cPages;
3407 while (iPage-- > 0)
3408 {
3409 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3410 Assert(paPages[iPage] != NIL_RTHCPHYS);
3411 }
3412 }
3413 return VINF_SUCCESS;
3414 }
3415
3416 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3417 AssertRC(rc2);
3418 }
3419
3420 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3421 AssertRC(rc2);
3422 }
3423 return rc;
3424}
3425
3426
3427/**
3428 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3429 * space.
3430 *
3431 * @returns IPRT status code.
3432 * @param pSession The session to associated the allocation with.
3433 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3434 * @param offSub Where to start mapping. Must be page aligned.
3435 * @param cbSub How much to map. Must be page aligned.
3436 * @param fFlags Flags, MBZ.
3437 * @param ppvR0 Where to return the address of the ring-0 mapping on
3438 * success.
3439 */
3440SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3441 uint32_t fFlags, PRTR0PTR ppvR0)
3442{
3443 int rc;
3444 PSUPDRVBUNDLE pBundle;
3445 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3446 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3447
3448 /*
3449 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3450 */
3451 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3452 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3453 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3454 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3455 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3456 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3457
3458 /*
3459 * Find the memory object.
3460 */
3461 RTSpinlockAcquire(pSession->Spinlock);
3462 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3463 {
3464 if (pBundle->cUsed > 0)
3465 {
3466 unsigned i;
3467 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3468 {
3469 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3470 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3471 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3472 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3473 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3474 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3475 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3476 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3477 {
3478 hMemObj = pBundle->aMem[i].MemObj;
3479 break;
3480 }
3481 }
3482 }
3483 }
3484 RTSpinlockRelease(pSession->Spinlock);
3485
3486 rc = VERR_INVALID_PARAMETER;
3487 if (hMemObj != NIL_RTR0MEMOBJ)
3488 {
3489 /*
3490 * Do some further input validations before calling IPRT.
3491 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3492 */
3493 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3494 if ( offSub < cbMemObj
3495 && cbSub <= cbMemObj
3496 && offSub + cbSub <= cbMemObj)
3497 {
3498 RTR0MEMOBJ hMapObj;
3499 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3500 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3501 if (RT_SUCCESS(rc))
3502 *ppvR0 = RTR0MemObjAddress(hMapObj);
3503 }
3504 else
3505 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3506
3507 }
3508 return rc;
3509}
3510
3511
3512/**
3513 * Changes the page level protection of one or more pages previously allocated
3514 * by SUPR0PageAllocEx.
3515 *
3516 * @returns IPRT status code.
3517 * @param pSession The session to associated the allocation with.
3518 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3519 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3520 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3521 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3522 * @param offSub Where to start changing. Must be page aligned.
3523 * @param cbSub How much to change. Must be page aligned.
3524 * @param fProt The new page level protection, see RTMEM_PROT_*.
3525 */
3526SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3527{
3528 int rc;
3529 PSUPDRVBUNDLE pBundle;
3530 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3531 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3532 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3533
3534 /*
3535 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3536 */
3537 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3538 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3539 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3540 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3541 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3542
3543 /*
3544 * Find the memory object.
3545 */
3546 RTSpinlockAcquire(pSession->Spinlock);
3547 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3548 {
3549 if (pBundle->cUsed > 0)
3550 {
3551 unsigned i;
3552 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3553 {
3554 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3555 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3556 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3557 || pvR3 == NIL_RTR3PTR)
3558 && ( pvR0 == NIL_RTR0PTR
3559 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3560 && ( pvR3 == NIL_RTR3PTR
3561 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3562 {
3563 if (pvR0 != NIL_RTR0PTR)
3564 hMemObjR0 = pBundle->aMem[i].MemObj;
3565 if (pvR3 != NIL_RTR3PTR)
3566 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3567 break;
3568 }
3569 }
3570 }
3571 }
3572 RTSpinlockRelease(pSession->Spinlock);
3573
3574 rc = VERR_INVALID_PARAMETER;
3575 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3576 || hMemObjR3 != NIL_RTR0MEMOBJ)
3577 {
3578 /*
3579 * Do some further input validations before calling IPRT.
3580 */
3581 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3582 if ( offSub < cbMemObj
3583 && cbSub <= cbMemObj
3584 && offSub + cbSub <= cbMemObj)
3585 {
3586 rc = VINF_SUCCESS;
3587 if (hMemObjR3 != NIL_RTR0PTR)
3588 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3589 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3590 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3591 }
3592 else
3593 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3594
3595 }
3596 return rc;
3597
3598}
3599
3600
3601/**
3602 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3603 *
3604 * @returns IPRT status code.
3605 * @param pSession The session owning the allocation.
3606 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3607 * SUPR0PageAllocEx().
3608 */
3609SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3610{
3611 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3612 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3613 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3614}
3615
3616
3617/**
3618 * Gets the paging mode of the current CPU.
3619 *
3620 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3621 */
3622SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3623{
3624 SUPPAGINGMODE enmMode;
3625
3626 RTR0UINTREG cr0 = ASMGetCR0();
3627 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3628 enmMode = SUPPAGINGMODE_INVALID;
3629 else
3630 {
3631 RTR0UINTREG cr4 = ASMGetCR4();
3632 uint32_t fNXEPlusLMA = 0;
3633 if (cr4 & X86_CR4_PAE)
3634 {
3635 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3636 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3637 {
3638 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3639 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3640 fNXEPlusLMA |= RT_BIT(0);
3641 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3642 fNXEPlusLMA |= RT_BIT(1);
3643 }
3644 }
3645
3646 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3647 {
3648 case 0:
3649 enmMode = SUPPAGINGMODE_32_BIT;
3650 break;
3651
3652 case X86_CR4_PGE:
3653 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3654 break;
3655
3656 case X86_CR4_PAE:
3657 enmMode = SUPPAGINGMODE_PAE;
3658 break;
3659
3660 case X86_CR4_PAE | RT_BIT(0):
3661 enmMode = SUPPAGINGMODE_PAE_NX;
3662 break;
3663
3664 case X86_CR4_PAE | X86_CR4_PGE:
3665 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3666 break;
3667
3668 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3669 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3670 break;
3671
3672 case RT_BIT(1) | X86_CR4_PAE:
3673 enmMode = SUPPAGINGMODE_AMD64;
3674 break;
3675
3676 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3677 enmMode = SUPPAGINGMODE_AMD64_NX;
3678 break;
3679
3680 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3681 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3682 break;
3683
3684 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3685 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3686 break;
3687
3688 default:
3689 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3690 enmMode = SUPPAGINGMODE_INVALID;
3691 break;
3692 }
3693 }
3694 return enmMode;
3695}
3696
3697
3698/**
3699 * Enables or disabled hardware virtualization extensions using native OS APIs.
3700 *
3701 * @returns VBox status code.
3702 * @retval VINF_SUCCESS on success.
3703 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3704 *
3705 * @param fEnable Whether to enable or disable.
3706 */
3707SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3708{
3709#ifdef RT_OS_DARWIN
3710 return supdrvOSEnableVTx(fEnable);
3711#else
3712 return VERR_NOT_SUPPORTED;
3713#endif
3714}
3715
3716
3717/**
3718 * Suspends hardware virtualization extensions using the native OS API.
3719 *
3720 * This is called prior to entering raw-mode context.
3721 *
3722 * @returns @c true if suspended, @c false if not.
3723 */
3724SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3725{
3726#ifdef RT_OS_DARWIN
3727 return supdrvOSSuspendVTxOnCpu();
3728#else
3729 return false;
3730#endif
3731}
3732
3733
3734/**
3735 * Resumes hardware virtualization extensions using the native OS API.
3736 *
3737 * This is called after to entering raw-mode context.
3738 *
3739 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3740 */
3741SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3742{
3743#ifdef RT_OS_DARWIN
3744 supdrvOSResumeVTxOnCpu(fSuspended);
3745#else
3746 Assert(!fSuspended);
3747#endif
3748}
3749
3750
3751/**
3752 * Checks if Intel VT-x feature is usable on this CPU.
3753 *
3754 * @returns VBox status code.
3755 * @param fIsSmxModeAmbiguous Where to write whether the SMX mode causes
3756 * ambiguity that makes us unsure whether we
3757 * really can use VT-x or not.
3758 *
3759 * @remarks Must be called with preemption disabled.
3760 */
3761SUPR0DECL(int) SUPR0GetVmxUsability(bool *pfIsSmxModeAmbiguous)
3762{
3763 uint64_t u64FeatMsr;
3764 bool fMaybeSmxMode;
3765 bool fMsrLocked;
3766 bool fSmxVmxAllowed;
3767 bool fVmxAllowed;
3768 bool fIsSmxModeAmbiguous;
3769 int rc;
3770
3771 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3772
3773 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3774 fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3775 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3776 fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3777 fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3778 fIsSmxModeAmbiguous = false;
3779 rc = VERR_INTERNAL_ERROR_5;
3780
3781 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3782 if (fMsrLocked)
3783 {
3784 if (fVmxAllowed && fSmxVmxAllowed)
3785 rc = VINF_SUCCESS;
3786 else if (!fVmxAllowed && !fSmxVmxAllowed)
3787 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3788 else if (!fMaybeSmxMode)
3789 {
3790 if (fVmxAllowed)
3791 rc = VINF_SUCCESS;
3792 else
3793 rc = VERR_VMX_MSR_VMXON_DISABLED;
3794 }
3795 else
3796 {
3797 /*
3798 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3799 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3800 * See @bugref{6873}.
3801 */
3802 Assert(fMaybeSmxMode == true);
3803 fIsSmxModeAmbiguous = true;
3804 rc = VINF_SUCCESS;
3805 }
3806 }
3807 else
3808 {
3809 /*
3810 * MSR is not yet locked; we can change it ourselves here.
3811 * Once the lock bit is set, this MSR can no longer be modified.
3812 *
3813 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3814 * accurately. See @bugref{6873}.
3815 */
3816 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3817 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3818 | MSR_IA32_FEATURE_CONTROL_VMXON;
3819 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3820
3821 /* Verify. */
3822 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3823 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3824 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3825 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3826 if (fSmxVmxAllowed && fVmxAllowed)
3827 rc = VINF_SUCCESS;
3828 else
3829 rc = VERR_VMX_MSR_LOCKING_FAILED;
3830 }
3831
3832 if (pfIsSmxModeAmbiguous)
3833 *pfIsSmxModeAmbiguous = fIsSmxModeAmbiguous;
3834
3835 return rc;
3836}
3837
3838
3839/**
3840 * Checks if AMD-V SVM feature is usable on this CPU.
3841 *
3842 * @returns VBox status code.
3843 * @param fInitSvm If usable, try to initialize SVM on this CPU.
3844 *
3845 * @remarks Must be called with preemption disabled.
3846 */
3847SUPR0DECL(int) SUPR0GetSvmUsability(bool fInitSvm)
3848{
3849 int rc;
3850 uint64_t fVmCr;
3851 uint64_t fEfer;
3852
3853 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3854 fVmCr = ASMRdMsr(MSR_K8_VM_CR);
3855 if (!(fVmCr & MSR_K8_VM_CR_SVM_DISABLE))
3856 {
3857 rc = VINF_SUCCESS;
3858 if (fInitSvm)
3859 {
3860 /* Turn on SVM in the EFER MSR. */
3861 fEfer = ASMRdMsr(MSR_K6_EFER);
3862 if (fEfer & MSR_K6_EFER_SVME)
3863 rc = VERR_SVM_IN_USE;
3864 else
3865 {
3866 ASMWrMsr(MSR_K6_EFER, fEfer | MSR_K6_EFER_SVME);
3867
3868 /* Paranoia. */
3869 fEfer = ASMRdMsr(MSR_K6_EFER);
3870 if (fEfer & MSR_K6_EFER_SVME)
3871 {
3872 /* Restore previous value. */
3873 ASMWrMsr(MSR_K6_EFER, fEfer & ~MSR_K6_EFER_SVME);
3874 }
3875 else
3876 rc = VERR_SVM_ILLEGAL_EFER_MSR;
3877 }
3878 }
3879 }
3880 else
3881 rc = VERR_SVM_DISABLED;
3882 return rc;
3883}
3884
3885
3886/**
3887 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3888 *
3889 * @returns VBox status code.
3890 * @retval VERR_VMX_NO_VMX
3891 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3892 * @retval VERR_VMX_MSR_VMXON_DISABLED
3893 * @retval VERR_VMX_MSR_LOCKING_FAILED
3894 * @retval VERR_SVM_NO_SVM
3895 * @retval VERR_SVM_DISABLED
3896 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3897 * (centaur) CPU.
3898 *
3899 * @param pSession The session handle.
3900 * @param pfCaps Where to store the capabilities.
3901 */
3902SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3903{
3904 int rc = VERR_UNSUPPORTED_CPU;
3905 bool fIsSmxModeAmbiguous = false;
3906 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3907
3908 /*
3909 * Input validation.
3910 */
3911 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3912 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3913
3914 *pfCaps = 0;
3915 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3916 RTThreadPreemptDisable(&PreemptState);
3917 if (ASMHasCpuId())
3918 {
3919 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3920 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3921
3922 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3923 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3924
3925 if ( ASMIsValidStdRange(uMaxId)
3926 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3927 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3928 )
3929 {
3930 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3931 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3932 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3933 )
3934 {
3935 rc = SUPR0GetVmxUsability(&fIsSmxModeAmbiguous);
3936 if (rc == VINF_SUCCESS)
3937 {
3938 VMXCAPABILITY vtCaps;
3939
3940 *pfCaps |= SUPVTCAPS_VT_X;
3941
3942 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3943 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3944 {
3945 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3946 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3947 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3948 }
3949 }
3950 }
3951 else
3952 rc = VERR_VMX_NO_VMX;
3953 }
3954 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3955 && ASMIsValidStdRange(uMaxId))
3956 {
3957 uint32_t fExtFeaturesEcx, uExtMaxId;
3958 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3959 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3960
3961 /* Check if SVM is available. */
3962 if ( ASMIsValidExtRange(uExtMaxId)
3963 && uExtMaxId >= 0x8000000a
3964 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3965 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3966 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3967 )
3968 {
3969 rc = SUPR0GetSvmUsability(false /* fInitSvm */);
3970 if (RT_SUCCESS(rc))
3971 {
3972 uint32_t fSvmFeatures;
3973 *pfCaps |= SUPVTCAPS_AMD_V;
3974
3975 /* Query AMD-V features. */
3976 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3977 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3978 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3979 }
3980 }
3981 else
3982 rc = VERR_SVM_NO_SVM;
3983 }
3984 }
3985
3986 RTThreadPreemptRestore(&PreemptState);
3987 if (fIsSmxModeAmbiguous)
3988 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3989 return rc;
3990}
3991
3992
3993/**
3994 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3995 * updating.
3996 *
3997 * @param pGip Pointer to the GIP.
3998 * @param pGipCpu The per CPU structure for this CPU.
3999 * @param u64NanoTS The current time.
4000 */
4001static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
4002{
4003 /*
4004 * Here we don't really care about applying the TSC delta. The re-initialization of this
4005 * value is not relevant especially while (re)starting the GIP as the first few ones will
4006 * be ignored anyway, see supdrvGipDoUpdateCpu().
4007 */
4008 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
4009 pGipCpu->u64NanoTS = u64NanoTS;
4010}
4011
4012
4013/**
4014 * Set the current TSC and NanoTS value for the CPU.
4015 *
4016 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
4017 * @param pvUser1 Pointer to the ring-0 GIP mapping.
4018 * @param pvUser2 Pointer to the variable holding the current time.
4019 */
4020static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4021{
4022 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
4023 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
4024
4025 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
4026 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
4027
4028 NOREF(pvUser2);
4029 NOREF(idCpu);
4030}
4031
4032
4033/**
4034 * State structure for supdrvGipDetectGetGipCpuCallback.
4035 */
4036typedef struct SUPDRVGIPDETECTGETCPU
4037{
4038 /** Bitmap of APIC IDs that has been seen (initialized to zero).
4039 * Used to detect duplicate APIC IDs (paranoia). */
4040 uint8_t volatile bmApicId[256 / 8];
4041 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
4042 * initially). The callback clears the methods not detected. */
4043 uint32_t volatile fSupported;
4044 /** The first callback detecting any kind of range issues (initialized to
4045 * NIL_RTCPUID). */
4046 RTCPUID volatile idCpuProblem;
4047} SUPDRVGIPDETECTGETCPU;
4048/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
4049typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
4050
4051
4052/**
4053 * Checks for alternative ways of getting the CPU ID.
4054 *
4055 * This also checks the APIC ID, CPU ID and CPU set index values against the
4056 * GIP tables.
4057 *
4058 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
4059 * @param pvUser1 Pointer to the state structure.
4060 * @param pvUser2 Pointer to the GIP.
4061 */
4062static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4063{
4064 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
4065 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
4066 uint32_t fSupported = 0;
4067 uint16_t idApic;
4068 int iCpuSet;
4069
4070 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
4071
4072 /*
4073 * Check that the CPU ID and CPU set index are interchangable.
4074 */
4075 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
4076 if ((RTCPUID)iCpuSet == idCpu)
4077 {
4078 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
4079 if ( iCpuSet >= 0
4080 && iCpuSet < RTCPUSET_MAX_CPUS
4081 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
4082 {
4083 /*
4084 * Check whether the IDTR.LIMIT contains a CPU number.
4085 */
4086#ifdef RT_ARCH_X86
4087 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
4088#else
4089 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
4090#endif
4091 RTIDTR Idtr;
4092 ASMGetIDTR(&Idtr);
4093 if (Idtr.cbIdt >= cbIdt)
4094 {
4095 uint32_t uTmp = Idtr.cbIdt - cbIdt;
4096 uTmp &= RTCPUSET_MAX_CPUS - 1;
4097 if (uTmp == idCpu)
4098 {
4099 RTIDTR Idtr2;
4100 ASMGetIDTR(&Idtr2);
4101 if (Idtr2.cbIdt == Idtr.cbIdt)
4102 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
4103 }
4104 }
4105
4106 /*
4107 * Check whether RDTSCP is an option.
4108 */
4109 if (ASMHasCpuId())
4110 {
4111 if ( ASMIsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
4112 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
4113 {
4114 uint32_t uAux;
4115 ASMReadTscWithAux(&uAux);
4116 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
4117 {
4118 ASMNopPause();
4119 ASMReadTscWithAux(&uAux);
4120 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
4121 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
4122 }
4123 }
4124 }
4125 }
4126 }
4127
4128 /*
4129 * Check that the APIC ID is unique.
4130 */
4131 idApic = ASMGetApicId();
4132 if (RT_LIKELY( idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)
4133 && !ASMAtomicBitTestAndSet(pState->bmApicId, idApic)))
4134 fSupported |= SUPGIPGETCPU_APIC_ID;
4135 else
4136 {
4137 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
4138 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
4139 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
4140 idCpu, iCpuSet, idApic));
4141 }
4142
4143 /*
4144 * Check that the iCpuSet is within the expected range.
4145 */
4146 if (RT_UNLIKELY( iCpuSet < 0
4147 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
4148 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
4149 {
4150 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
4151 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
4152 idCpu, iCpuSet, idApic));
4153 }
4154 else
4155 {
4156 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
4157 if (RT_UNLIKELY(idCpu2 != idCpu))
4158 {
4159 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
4160 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
4161 idCpu, iCpuSet, idApic, idCpu2));
4162 }
4163 }
4164
4165 /*
4166 * Update the supported feature mask before we return.
4167 */
4168 ASMAtomicAndU32(&pState->fSupported, fSupported);
4169
4170 NOREF(pvUser2);
4171}
4172
4173
4174/**
4175 * Increase the timer freqency on hosts where this is possible (NT).
4176 *
4177 * The idea is that more interrupts is better for us... Also, it's better than
4178 * we increase the timer frequence, because we might end up getting inaccurate
4179 * callbacks if someone else does it.
4180 *
4181 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
4182 */
4183static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
4184{
4185 if (pDevExt->u32SystemTimerGranularityGrant == 0)
4186 {
4187 uint32_t u32SystemResolution;
4188 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
4189 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
4190 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
4191 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
4192 )
4193 {
4194 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
4195 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
4196 }
4197 }
4198}
4199
4200
4201/**
4202 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
4203 *
4204 * @param pDevExt Clears u32SystemTimerGranularityGrant.
4205 */
4206static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
4207{
4208 if (pDevExt->u32SystemTimerGranularityGrant)
4209 {
4210 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
4211 AssertRC(rc2);
4212 pDevExt->u32SystemTimerGranularityGrant = 0;
4213 }
4214}
4215
4216
4217/**
4218 * Maps the GIP into userspace and/or get the physical address of the GIP.
4219 *
4220 * @returns IPRT status code.
4221 * @param pSession Session to which the GIP mapping should belong.
4222 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
4223 * @param pHCPhysGip Where to store the physical address. (optional)
4224 *
4225 * @remark There is no reference counting on the mapping, so one call to this function
4226 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
4227 * and remove the session as a GIP user.
4228 */
4229SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
4230{
4231 int rc;
4232 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4233 RTR3PTR pGipR3 = NIL_RTR3PTR;
4234 RTHCPHYS HCPhys = NIL_RTHCPHYS;
4235 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
4236
4237 /*
4238 * Validate
4239 */
4240 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4241 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
4242 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
4243
4244#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4245 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4246#else
4247 RTSemFastMutexRequest(pDevExt->mtxGip);
4248#endif
4249 if (pDevExt->pGip)
4250 {
4251 /*
4252 * Map it?
4253 */
4254 rc = VINF_SUCCESS;
4255 if (ppGipR3)
4256 {
4257 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4258 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
4259 RTMEM_PROT_READ, RTR0ProcHandleSelf());
4260 if (RT_SUCCESS(rc))
4261 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
4262 }
4263
4264 /*
4265 * Get physical address.
4266 */
4267 if (pHCPhysGip && RT_SUCCESS(rc))
4268 HCPhys = pDevExt->HCPhysGip;
4269
4270 /*
4271 * Reference globally.
4272 */
4273 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4274 {
4275 pSession->fGipReferenced = 1;
4276 pDevExt->cGipUsers++;
4277 if (pDevExt->cGipUsers == 1)
4278 {
4279 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4280 uint64_t u64NanoTS;
4281
4282 /*
4283 * GIP starts/resumes updating again. On windows we bump the
4284 * host timer frequency to make sure we don't get stuck in guest
4285 * mode and to get better timer (and possibly clock) accuracy.
4286 */
4287 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4288
4289 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
4290
4291 /*
4292 * document me
4293 */
4294 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4295 {
4296 unsigned i;
4297 for (i = 0; i < pGipR0->cCpus; i++)
4298 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4299 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4300 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4301 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4302 }
4303
4304 /*
4305 * document me
4306 */
4307 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4308 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
4309 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4310 || RTMpGetOnlineCount() == 1)
4311 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
4312 else
4313 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4314
4315 /*
4316 * Detect alternative ways to figure the CPU ID in ring-3 and
4317 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
4318 * and CPU set indexes while we're at it.
4319 */
4320 if (RT_SUCCESS(rc))
4321 {
4322 SUPDRVGIPDETECTGETCPU DetectState;
4323 RT_BZERO((void *)&DetectState.bmApicId, sizeof(DetectState.bmApicId));
4324 DetectState.fSupported = UINT32_MAX;
4325 DetectState.idCpuProblem = NIL_RTCPUID;
4326 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, &DetectState, pGipR0);
4327 if (DetectState.idCpuProblem == NIL_RTCPUID)
4328 {
4329 if ( DetectState.fSupported != UINT32_MAX
4330 && DetectState.fSupported != 0)
4331 {
4332 if (pGipR0->fGetGipCpu != DetectState.fSupported)
4333 {
4334 pGipR0->fGetGipCpu = DetectState.fSupported;
4335 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", DetectState.fSupported));
4336 }
4337 }
4338 else
4339 {
4340 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
4341 DetectState.fSupported));
4342 rc = VERR_UNSUPPORTED_CPU;
4343 }
4344 }
4345 else
4346 {
4347 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
4348 DetectState.idCpuProblem, DetectState.idCpuProblem));
4349 rc = VERR_INVALID_CPU_ID;
4350 }
4351 }
4352
4353 /*
4354 * Start the GIP timer if all is well..
4355 */
4356 if (RT_SUCCESS(rc))
4357 {
4358#ifndef DO_NOT_START_GIP
4359 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
4360#endif
4361 rc = VINF_SUCCESS;
4362 }
4363
4364 /*
4365 * Bail out on error.
4366 */
4367 if (RT_FAILURE(rc))
4368 {
4369 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
4370 pDevExt->cGipUsers = 0;
4371 pSession->fGipReferenced = 0;
4372 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4373 {
4374 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
4375 if (RT_SUCCESS(rc2))
4376 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4377 }
4378 HCPhys = NIL_RTHCPHYS;
4379 pGipR3 = NIL_RTR3PTR;
4380 }
4381 }
4382 }
4383 }
4384 else
4385 {
4386 rc = VERR_GENERAL_FAILURE;
4387 Log(("SUPR0GipMap: GIP is not available!\n"));
4388 }
4389#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4390 RTSemMutexRelease(pDevExt->mtxGip);
4391#else
4392 RTSemFastMutexRelease(pDevExt->mtxGip);
4393#endif
4394
4395 /*
4396 * Write returns.
4397 */
4398 if (pHCPhysGip)
4399 *pHCPhysGip = HCPhys;
4400 if (ppGipR3)
4401 *ppGipR3 = pGipR3;
4402
4403#ifdef DEBUG_DARWIN_GIP
4404 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4405#else
4406 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4407#endif
4408 return rc;
4409}
4410
4411
4412/**
4413 * Unmaps any user mapping of the GIP and terminates all GIP access
4414 * from this session.
4415 *
4416 * @returns IPRT status code.
4417 * @param pSession Session to which the GIP mapping should belong.
4418 */
4419SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4420{
4421 int rc = VINF_SUCCESS;
4422 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4423#ifdef DEBUG_DARWIN_GIP
4424 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4425 pSession,
4426 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4427 pSession->GipMapObjR3));
4428#else
4429 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4430#endif
4431 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4432
4433#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4434 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4435#else
4436 RTSemFastMutexRequest(pDevExt->mtxGip);
4437#endif
4438
4439 /*
4440 * Unmap anything?
4441 */
4442 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4443 {
4444 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4445 AssertRC(rc);
4446 if (RT_SUCCESS(rc))
4447 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4448 }
4449
4450 /*
4451 * Dereference global GIP.
4452 */
4453 if (pSession->fGipReferenced && !rc)
4454 {
4455 pSession->fGipReferenced = 0;
4456 if ( pDevExt->cGipUsers > 0
4457 && !--pDevExt->cGipUsers)
4458 {
4459 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4460#ifndef DO_NOT_START_GIP
4461 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4462#endif
4463 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
4464 }
4465 }
4466
4467#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4468 RTSemMutexRelease(pDevExt->mtxGip);
4469#else
4470 RTSemFastMutexRelease(pDevExt->mtxGip);
4471#endif
4472
4473 return rc;
4474}
4475
4476
4477/**
4478 * Gets the GIP pointer.
4479 *
4480 * @returns Pointer to the GIP or NULL.
4481 */
4482SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4483{
4484 return g_pSUPGlobalInfoPage;
4485}
4486
4487
4488/**
4489 * Register a component factory with the support driver.
4490 *
4491 * This is currently restricted to kernel sessions only.
4492 *
4493 * @returns VBox status code.
4494 * @retval VINF_SUCCESS on success.
4495 * @retval VERR_NO_MEMORY if we're out of memory.
4496 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4497 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4498 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4499 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4500 *
4501 * @param pSession The SUPDRV session (must be a ring-0 session).
4502 * @param pFactory Pointer to the component factory registration structure.
4503 *
4504 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4505 */
4506SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4507{
4508 PSUPDRVFACTORYREG pNewReg;
4509 const char *psz;
4510 int rc;
4511
4512 /*
4513 * Validate parameters.
4514 */
4515 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4516 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4517 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4518 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4519 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4520 AssertReturn(psz, VERR_INVALID_PARAMETER);
4521
4522 /*
4523 * Allocate and initialize a new registration structure.
4524 */
4525 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4526 if (pNewReg)
4527 {
4528 pNewReg->pNext = NULL;
4529 pNewReg->pFactory = pFactory;
4530 pNewReg->pSession = pSession;
4531 pNewReg->cchName = psz - &pFactory->szName[0];
4532
4533 /*
4534 * Add it to the tail of the list after checking for prior registration.
4535 */
4536 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4537 if (RT_SUCCESS(rc))
4538 {
4539 PSUPDRVFACTORYREG pPrev = NULL;
4540 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4541 while (pCur && pCur->pFactory != pFactory)
4542 {
4543 pPrev = pCur;
4544 pCur = pCur->pNext;
4545 }
4546 if (!pCur)
4547 {
4548 if (pPrev)
4549 pPrev->pNext = pNewReg;
4550 else
4551 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4552 rc = VINF_SUCCESS;
4553 }
4554 else
4555 rc = VERR_ALREADY_EXISTS;
4556
4557 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4558 }
4559
4560 if (RT_FAILURE(rc))
4561 RTMemFree(pNewReg);
4562 }
4563 else
4564 rc = VERR_NO_MEMORY;
4565 return rc;
4566}
4567
4568
4569/**
4570 * Deregister a component factory.
4571 *
4572 * @returns VBox status code.
4573 * @retval VINF_SUCCESS on success.
4574 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4575 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4576 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4577 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4578 *
4579 * @param pSession The SUPDRV session (must be a ring-0 session).
4580 * @param pFactory Pointer to the component factory registration structure
4581 * previously passed SUPR0ComponentRegisterFactory().
4582 *
4583 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4584 */
4585SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4586{
4587 int rc;
4588
4589 /*
4590 * Validate parameters.
4591 */
4592 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4593 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4594 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4595
4596 /*
4597 * Take the lock and look for the registration record.
4598 */
4599 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4600 if (RT_SUCCESS(rc))
4601 {
4602 PSUPDRVFACTORYREG pPrev = NULL;
4603 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4604 while (pCur && pCur->pFactory != pFactory)
4605 {
4606 pPrev = pCur;
4607 pCur = pCur->pNext;
4608 }
4609 if (pCur)
4610 {
4611 if (!pPrev)
4612 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4613 else
4614 pPrev->pNext = pCur->pNext;
4615
4616 pCur->pNext = NULL;
4617 pCur->pFactory = NULL;
4618 pCur->pSession = NULL;
4619 rc = VINF_SUCCESS;
4620 }
4621 else
4622 rc = VERR_NOT_FOUND;
4623
4624 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4625
4626 RTMemFree(pCur);
4627 }
4628 return rc;
4629}
4630
4631
4632/**
4633 * Queries a component factory.
4634 *
4635 * @returns VBox status code.
4636 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4637 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4638 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4639 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4640 *
4641 * @param pSession The SUPDRV session.
4642 * @param pszName The name of the component factory.
4643 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4644 * @param ppvFactoryIf Where to store the factory interface.
4645 */
4646SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4647{
4648 const char *pszEnd;
4649 size_t cchName;
4650 int rc;
4651
4652 /*
4653 * Validate parameters.
4654 */
4655 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4656
4657 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4658 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4659 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4660 cchName = pszEnd - pszName;
4661
4662 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4663 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4664 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4665
4666 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4667 *ppvFactoryIf = NULL;
4668
4669 /*
4670 * Take the lock and try all factories by this name.
4671 */
4672 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4673 if (RT_SUCCESS(rc))
4674 {
4675 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4676 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4677 while (pCur)
4678 {
4679 if ( pCur->cchName == cchName
4680 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4681 {
4682 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4683 if (pvFactory)
4684 {
4685 *ppvFactoryIf = pvFactory;
4686 rc = VINF_SUCCESS;
4687 break;
4688 }
4689 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4690 }
4691
4692 /* next */
4693 pCur = pCur->pNext;
4694 }
4695
4696 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4697 }
4698 return rc;
4699}
4700
4701
4702/**
4703 * Adds a memory object to the session.
4704 *
4705 * @returns IPRT status code.
4706 * @param pMem Memory tracking structure containing the
4707 * information to track.
4708 * @param pSession The session.
4709 */
4710static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4711{
4712 PSUPDRVBUNDLE pBundle;
4713
4714 /*
4715 * Find free entry and record the allocation.
4716 */
4717 RTSpinlockAcquire(pSession->Spinlock);
4718 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4719 {
4720 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4721 {
4722 unsigned i;
4723 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4724 {
4725 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4726 {
4727 pBundle->cUsed++;
4728 pBundle->aMem[i] = *pMem;
4729 RTSpinlockRelease(pSession->Spinlock);
4730 return VINF_SUCCESS;
4731 }
4732 }
4733 AssertFailed(); /* !!this can't be happening!!! */
4734 }
4735 }
4736 RTSpinlockRelease(pSession->Spinlock);
4737
4738 /*
4739 * Need to allocate a new bundle.
4740 * Insert into the last entry in the bundle.
4741 */
4742 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4743 if (!pBundle)
4744 return VERR_NO_MEMORY;
4745
4746 /* take last entry. */
4747 pBundle->cUsed++;
4748 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4749
4750 /* insert into list. */
4751 RTSpinlockAcquire(pSession->Spinlock);
4752 pBundle->pNext = pSession->Bundle.pNext;
4753 pSession->Bundle.pNext = pBundle;
4754 RTSpinlockRelease(pSession->Spinlock);
4755
4756 return VINF_SUCCESS;
4757}
4758
4759
4760/**
4761 * Releases a memory object referenced by pointer and type.
4762 *
4763 * @returns IPRT status code.
4764 * @param pSession Session data.
4765 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4766 * @param eType Memory type.
4767 */
4768static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4769{
4770 PSUPDRVBUNDLE pBundle;
4771
4772 /*
4773 * Validate input.
4774 */
4775 if (!uPtr)
4776 {
4777 Log(("Illegal address %p\n", (void *)uPtr));
4778 return VERR_INVALID_PARAMETER;
4779 }
4780
4781 /*
4782 * Search for the address.
4783 */
4784 RTSpinlockAcquire(pSession->Spinlock);
4785 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4786 {
4787 if (pBundle->cUsed > 0)
4788 {
4789 unsigned i;
4790 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4791 {
4792 if ( pBundle->aMem[i].eType == eType
4793 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4794 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4795 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4796 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4797 )
4798 {
4799 /* Make a copy of it and release it outside the spinlock. */
4800 SUPDRVMEMREF Mem = pBundle->aMem[i];
4801 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4802 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4803 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4804 RTSpinlockRelease(pSession->Spinlock);
4805
4806 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4807 {
4808 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4809 AssertRC(rc); /** @todo figure out how to handle this. */
4810 }
4811 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4812 {
4813 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4814 AssertRC(rc); /** @todo figure out how to handle this. */
4815 }
4816 return VINF_SUCCESS;
4817 }
4818 }
4819 }
4820 }
4821 RTSpinlockRelease(pSession->Spinlock);
4822 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4823 return VERR_INVALID_PARAMETER;
4824}
4825
4826
4827/**
4828 * Opens an image. If it's the first time it's opened the call must upload
4829 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4830 *
4831 * This is the 1st step of the loading.
4832 *
4833 * @returns IPRT status code.
4834 * @param pDevExt Device globals.
4835 * @param pSession Session data.
4836 * @param pReq The open request.
4837 */
4838static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4839{
4840 int rc;
4841 PSUPDRVLDRIMAGE pImage;
4842 void *pv;
4843 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4844 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4845
4846 /*
4847 * Check if we got an instance of the image already.
4848 */
4849 supdrvLdrLock(pDevExt);
4850 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4851 {
4852 if ( pImage->szName[cchName] == '\0'
4853 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4854 {
4855 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4856 {
4857 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4858 pImage->cUsage++;
4859 pReq->u.Out.pvImageBase = pImage->pvImage;
4860 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4861 pReq->u.Out.fNativeLoader = pImage->fNative;
4862 supdrvLdrAddUsage(pSession, pImage);
4863 supdrvLdrUnlock(pDevExt);
4864 return VINF_SUCCESS;
4865 }
4866 supdrvLdrUnlock(pDevExt);
4867 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4868 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4869 }
4870 }
4871 /* (not found - add it!) */
4872
4873 /* If the loader interface is locked down, make userland fail early */
4874 if (pDevExt->fLdrLockedDown)
4875 {
4876 supdrvLdrUnlock(pDevExt);
4877 Log(("supdrvIOCtl_LdrOpen: Not adding '%s' to image list, loader interface is locked down!\n", pReq->u.In.szName));
4878 return VERR_PERMISSION_DENIED;
4879 }
4880
4881 /*
4882 * Allocate memory.
4883 */
4884 Assert(cchName < sizeof(pImage->szName));
4885 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4886 if (!pv)
4887 {
4888 supdrvLdrUnlock(pDevExt);
4889 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4890 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4891 }
4892
4893 /*
4894 * Setup and link in the LDR stuff.
4895 */
4896 pImage = (PSUPDRVLDRIMAGE)pv;
4897 pImage->pvImage = NULL;
4898 pImage->pvImageAlloc = NULL;
4899 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4900 pImage->cbImageBits = pReq->u.In.cbImageBits;
4901 pImage->cSymbols = 0;
4902 pImage->paSymbols = NULL;
4903 pImage->pachStrTab = NULL;
4904 pImage->cbStrTab = 0;
4905 pImage->pfnModuleInit = NULL;
4906 pImage->pfnModuleTerm = NULL;
4907 pImage->pfnServiceReqHandler = NULL;
4908 pImage->uState = SUP_IOCTL_LDR_OPEN;
4909 pImage->cUsage = 1;
4910 pImage->pDevExt = pDevExt;
4911 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4912
4913 /*
4914 * Try load it using the native loader, if that isn't supported, fall back
4915 * on the older method.
4916 */
4917 pImage->fNative = true;
4918 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4919 if (rc == VERR_NOT_SUPPORTED)
4920 {
4921 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4922 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4923 pImage->fNative = false;
4924 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4925 }
4926 if (RT_FAILURE(rc))
4927 {
4928 supdrvLdrUnlock(pDevExt);
4929 RTMemFree(pImage);
4930 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4931 return rc;
4932 }
4933 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4934
4935 /*
4936 * Link it.
4937 */
4938 pImage->pNext = pDevExt->pLdrImages;
4939 pDevExt->pLdrImages = pImage;
4940
4941 supdrvLdrAddUsage(pSession, pImage);
4942
4943 pReq->u.Out.pvImageBase = pImage->pvImage;
4944 pReq->u.Out.fNeedsLoading = true;
4945 pReq->u.Out.fNativeLoader = pImage->fNative;
4946 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4947
4948 supdrvLdrUnlock(pDevExt);
4949 return VINF_SUCCESS;
4950}
4951
4952
4953/**
4954 * Worker that validates a pointer to an image entrypoint.
4955 *
4956 * @returns IPRT status code.
4957 * @param pDevExt The device globals.
4958 * @param pImage The loader image.
4959 * @param pv The pointer into the image.
4960 * @param fMayBeNull Whether it may be NULL.
4961 * @param pszWhat What is this entrypoint? (for logging)
4962 * @param pbImageBits The image bits prepared by ring-3.
4963 *
4964 * @remarks Will leave the lock on failure.
4965 */
4966static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4967 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4968{
4969 if (!fMayBeNull || pv)
4970 {
4971 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4972 {
4973 supdrvLdrUnlock(pDevExt);
4974 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4975 return VERR_INVALID_PARAMETER;
4976 }
4977
4978 if (pImage->fNative)
4979 {
4980 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4981 if (RT_FAILURE(rc))
4982 {
4983 supdrvLdrUnlock(pDevExt);
4984 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4985 return rc;
4986 }
4987 }
4988 }
4989 return VINF_SUCCESS;
4990}
4991
4992
4993/**
4994 * Loads the image bits.
4995 *
4996 * This is the 2nd step of the loading.
4997 *
4998 * @returns IPRT status code.
4999 * @param pDevExt Device globals.
5000 * @param pSession Session data.
5001 * @param pReq The request.
5002 */
5003static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
5004{
5005 PSUPDRVLDRUSAGE pUsage;
5006 PSUPDRVLDRIMAGE pImage;
5007 int rc;
5008 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
5009
5010 /*
5011 * Find the ldr image.
5012 */
5013 supdrvLdrLock(pDevExt);
5014 pUsage = pSession->pLdrUsage;
5015 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5016 pUsage = pUsage->pNext;
5017 if (!pUsage)
5018 {
5019 supdrvLdrUnlock(pDevExt);
5020 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
5021 return VERR_INVALID_HANDLE;
5022 }
5023 pImage = pUsage->pImage;
5024
5025 /*
5026 * Validate input.
5027 */
5028 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
5029 || pImage->cbImageBits != pReq->u.In.cbImageBits)
5030 {
5031 supdrvLdrUnlock(pDevExt);
5032 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
5033 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
5034 return VERR_INVALID_HANDLE;
5035 }
5036
5037 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
5038 {
5039 unsigned uState = pImage->uState;
5040 supdrvLdrUnlock(pDevExt);
5041 if (uState != SUP_IOCTL_LDR_LOAD)
5042 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
5043 return VERR_ALREADY_LOADED;
5044 }
5045
5046 /* If the loader interface is locked down, don't load new images */
5047 if (pDevExt->fLdrLockedDown)
5048 {
5049 supdrvLdrUnlock(pDevExt);
5050 Log(("SUP_IOCTL_LDR_LOAD: Not loading '%s' image bits, loader interface is locked down!\n", pImage->szName));
5051 return VERR_PERMISSION_DENIED;
5052 }
5053
5054 switch (pReq->u.In.eEPType)
5055 {
5056 case SUPLDRLOADEP_NOTHING:
5057 break;
5058
5059 case SUPLDRLOADEP_VMMR0:
5060 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
5061 if (RT_SUCCESS(rc))
5062 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
5063 if (RT_SUCCESS(rc))
5064 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
5065 if (RT_SUCCESS(rc))
5066 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
5067 if (RT_FAILURE(rc))
5068 return rc;
5069 break;
5070
5071 case SUPLDRLOADEP_SERVICE:
5072 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
5073 if (RT_FAILURE(rc))
5074 return rc;
5075 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
5076 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
5077 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
5078 {
5079 supdrvLdrUnlock(pDevExt);
5080 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
5081 pImage->pvImage, pReq->u.In.cbImageWithTabs,
5082 pReq->u.In.EP.Service.apvReserved[0],
5083 pReq->u.In.EP.Service.apvReserved[1],
5084 pReq->u.In.EP.Service.apvReserved[2]));
5085 return VERR_INVALID_PARAMETER;
5086 }
5087 break;
5088
5089 default:
5090 supdrvLdrUnlock(pDevExt);
5091 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
5092 return VERR_INVALID_PARAMETER;
5093 }
5094
5095 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
5096 if (RT_FAILURE(rc))
5097 return rc;
5098 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
5099 if (RT_FAILURE(rc))
5100 return rc;
5101
5102 /*
5103 * Allocate and copy the tables.
5104 * (No need to do try/except as this is a buffered request.)
5105 */
5106 pImage->cbStrTab = pReq->u.In.cbStrTab;
5107 if (pImage->cbStrTab)
5108 {
5109 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
5110 if (pImage->pachStrTab)
5111 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
5112 else
5113 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
5114 }
5115
5116 pImage->cSymbols = pReq->u.In.cSymbols;
5117 if (RT_SUCCESS(rc) && pImage->cSymbols)
5118 {
5119 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
5120 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
5121 if (pImage->paSymbols)
5122 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
5123 else
5124 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
5125 }
5126
5127 /*
5128 * Copy the bits / complete native loading.
5129 */
5130 if (RT_SUCCESS(rc))
5131 {
5132 pImage->uState = SUP_IOCTL_LDR_LOAD;
5133 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
5134 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
5135
5136 if (pImage->fNative)
5137 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
5138 else
5139 {
5140 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
5141 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
5142 }
5143 }
5144
5145 /*
5146 * Update any entry points.
5147 */
5148 if (RT_SUCCESS(rc))
5149 {
5150 switch (pReq->u.In.eEPType)
5151 {
5152 default:
5153 case SUPLDRLOADEP_NOTHING:
5154 rc = VINF_SUCCESS;
5155 break;
5156 case SUPLDRLOADEP_VMMR0:
5157 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
5158 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
5159 break;
5160 case SUPLDRLOADEP_SERVICE:
5161 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
5162 rc = VINF_SUCCESS;
5163 break;
5164 }
5165 }
5166
5167 /*
5168 * On success call the module initialization.
5169 */
5170 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
5171 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
5172 {
5173 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
5174 pDevExt->pLdrInitImage = pImage;
5175 pDevExt->hLdrInitThread = RTThreadNativeSelf();
5176 rc = pImage->pfnModuleInit(pImage);
5177 pDevExt->pLdrInitImage = NULL;
5178 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
5179 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
5180 supdrvLdrUnsetVMMR0EPs(pDevExt);
5181 }
5182 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
5183
5184 if (RT_FAILURE(rc))
5185 {
5186 /* Inform the tracing component in case ModuleInit registered TPs. */
5187 supdrvTracerModuleUnloading(pDevExt, pImage);
5188
5189 pImage->uState = SUP_IOCTL_LDR_OPEN;
5190 pImage->pfnModuleInit = NULL;
5191 pImage->pfnModuleTerm = NULL;
5192 pImage->pfnServiceReqHandler= NULL;
5193 pImage->cbStrTab = 0;
5194 RTMemFree(pImage->pachStrTab);
5195 pImage->pachStrTab = NULL;
5196 RTMemFree(pImage->paSymbols);
5197 pImage->paSymbols = NULL;
5198 pImage->cSymbols = 0;
5199 }
5200
5201 supdrvLdrUnlock(pDevExt);
5202 return rc;
5203}
5204
5205
5206/**
5207 * Frees a previously loaded (prep'ed) image.
5208 *
5209 * @returns IPRT status code.
5210 * @param pDevExt Device globals.
5211 * @param pSession Session data.
5212 * @param pReq The request.
5213 */
5214static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
5215{
5216 int rc;
5217 PSUPDRVLDRUSAGE pUsagePrev;
5218 PSUPDRVLDRUSAGE pUsage;
5219 PSUPDRVLDRIMAGE pImage;
5220 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
5221
5222 /*
5223 * Find the ldr image.
5224 */
5225 supdrvLdrLock(pDevExt);
5226 pUsagePrev = NULL;
5227 pUsage = pSession->pLdrUsage;
5228 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5229 {
5230 pUsagePrev = pUsage;
5231 pUsage = pUsage->pNext;
5232 }
5233 if (!pUsage)
5234 {
5235 supdrvLdrUnlock(pDevExt);
5236 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
5237 return VERR_INVALID_HANDLE;
5238 }
5239
5240 /*
5241 * Check if we can remove anything.
5242 */
5243 rc = VINF_SUCCESS;
5244 pImage = pUsage->pImage;
5245 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
5246 {
5247 /*
5248 * Check if there are any objects with destructors in the image, if
5249 * so leave it for the session cleanup routine so we get a chance to
5250 * clean things up in the right order and not leave them all dangling.
5251 */
5252 RTSpinlockAcquire(pDevExt->Spinlock);
5253 if (pImage->cUsage <= 1)
5254 {
5255 PSUPDRVOBJ pObj;
5256 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5257 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5258 {
5259 rc = VERR_DANGLING_OBJECTS;
5260 break;
5261 }
5262 }
5263 else
5264 {
5265 PSUPDRVUSAGE pGenUsage;
5266 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
5267 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5268 {
5269 rc = VERR_DANGLING_OBJECTS;
5270 break;
5271 }
5272 }
5273 RTSpinlockRelease(pDevExt->Spinlock);
5274 if (rc == VINF_SUCCESS)
5275 {
5276 /* unlink it */
5277 if (pUsagePrev)
5278 pUsagePrev->pNext = pUsage->pNext;
5279 else
5280 pSession->pLdrUsage = pUsage->pNext;
5281
5282 /* free it */
5283 pUsage->pImage = NULL;
5284 pUsage->pNext = NULL;
5285 RTMemFree(pUsage);
5286
5287 /*
5288 * Dereference the image.
5289 */
5290 if (pImage->cUsage <= 1)
5291 supdrvLdrFree(pDevExt, pImage);
5292 else
5293 pImage->cUsage--;
5294 }
5295 else
5296 {
5297 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
5298 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
5299 }
5300 }
5301 else
5302 {
5303 /*
5304 * Dereference both image and usage.
5305 */
5306 pImage->cUsage--;
5307 pUsage->cUsage--;
5308 }
5309
5310 supdrvLdrUnlock(pDevExt);
5311 return rc;
5312}
5313
5314
5315/**
5316 * Lock down the image loader interface.
5317 *
5318 * @returns IPRT status code.
5319 * @param pDevExt Device globals.
5320 */
5321static int supdrvIOCtl_LdrLockDown(PSUPDRVDEVEXT pDevExt)
5322{
5323 LogFlow(("supdrvIOCtl_LdrLockDown:\n"));
5324
5325 supdrvLdrLock(pDevExt);
5326 if (!pDevExt->fLdrLockedDown)
5327 {
5328 pDevExt->fLdrLockedDown = true;
5329 Log(("supdrvIOCtl_LdrLockDown: Image loader interface locked down\n"));
5330 }
5331 supdrvLdrUnlock(pDevExt);
5332
5333 return VINF_SUCCESS;
5334}
5335
5336
5337/**
5338 * Gets the address of a symbol in an open image.
5339 *
5340 * @returns IPRT status code.
5341 * @param pDevExt Device globals.
5342 * @param pSession Session data.
5343 * @param pReq The request buffer.
5344 */
5345static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
5346{
5347 PSUPDRVLDRIMAGE pImage;
5348 PSUPDRVLDRUSAGE pUsage;
5349 uint32_t i;
5350 PSUPLDRSYM paSyms;
5351 const char *pchStrings;
5352 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
5353 void *pvSymbol = NULL;
5354 int rc = VERR_GENERAL_FAILURE;
5355 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
5356
5357 /*
5358 * Find the ldr image.
5359 */
5360 supdrvLdrLock(pDevExt);
5361 pUsage = pSession->pLdrUsage;
5362 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5363 pUsage = pUsage->pNext;
5364 if (!pUsage)
5365 {
5366 supdrvLdrUnlock(pDevExt);
5367 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5368 return VERR_INVALID_HANDLE;
5369 }
5370 pImage = pUsage->pImage;
5371 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5372 {
5373 unsigned uState = pImage->uState;
5374 supdrvLdrUnlock(pDevExt);
5375 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5376 return VERR_ALREADY_LOADED;
5377 }
5378
5379 /*
5380 * Search the symbol strings.
5381 *
5382 * Note! The int32_t is for native loading on solaris where the data
5383 * and text segments are in very different places.
5384 */
5385 pchStrings = pImage->pachStrTab;
5386 paSyms = pImage->paSymbols;
5387 for (i = 0; i < pImage->cSymbols; i++)
5388 {
5389 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5390 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5391 {
5392 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5393 rc = VINF_SUCCESS;
5394 break;
5395 }
5396 }
5397 supdrvLdrUnlock(pDevExt);
5398 pReq->u.Out.pvSymbol = pvSymbol;
5399 return rc;
5400}
5401
5402
5403/**
5404 * Gets the address of a symbol in an open image or the support driver.
5405 *
5406 * @returns VINF_SUCCESS on success.
5407 * @returns
5408 * @param pDevExt Device globals.
5409 * @param pSession Session data.
5410 * @param pReq The request buffer.
5411 */
5412static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5413{
5414 int rc = VINF_SUCCESS;
5415 const char *pszSymbol = pReq->u.In.pszSymbol;
5416 const char *pszModule = pReq->u.In.pszModule;
5417 size_t cbSymbol;
5418 char const *pszEnd;
5419 uint32_t i;
5420
5421 /*
5422 * Input validation.
5423 */
5424 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5425 pszEnd = RTStrEnd(pszSymbol, 512);
5426 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5427 cbSymbol = pszEnd - pszSymbol + 1;
5428
5429 if (pszModule)
5430 {
5431 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5432 pszEnd = RTStrEnd(pszModule, 64);
5433 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5434 }
5435 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5436
5437
5438 if ( !pszModule
5439 || !strcmp(pszModule, "SupDrv"))
5440 {
5441 /*
5442 * Search the support driver export table.
5443 */
5444 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5445 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5446 {
5447 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5448 break;
5449 }
5450 }
5451 else
5452 {
5453 /*
5454 * Find the loader image.
5455 */
5456 PSUPDRVLDRIMAGE pImage;
5457
5458 supdrvLdrLock(pDevExt);
5459
5460 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5461 if (!strcmp(pImage->szName, pszModule))
5462 break;
5463 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5464 {
5465 /*
5466 * Search the symbol strings.
5467 */
5468 const char *pchStrings = pImage->pachStrTab;
5469 PCSUPLDRSYM paSyms = pImage->paSymbols;
5470 for (i = 0; i < pImage->cSymbols; i++)
5471 {
5472 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5473 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5474 {
5475 /*
5476 * Found it! Calc the symbol address and add a reference to the module.
5477 */
5478 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5479 rc = supdrvLdrAddUsage(pSession, pImage);
5480 break;
5481 }
5482 }
5483 }
5484 else
5485 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5486
5487 supdrvLdrUnlock(pDevExt);
5488 }
5489 return rc;
5490}
5491
5492
5493/**
5494 * Updates the VMMR0 entry point pointers.
5495 *
5496 * @returns IPRT status code.
5497 * @param pDevExt Device globals.
5498 * @param pSession Session data.
5499 * @param pVMMR0 VMMR0 image handle.
5500 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5501 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5502 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5503 * @remark Caller must own the loader mutex.
5504 */
5505static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5506{
5507 int rc = VINF_SUCCESS;
5508 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5509
5510
5511 /*
5512 * Check if not yet set.
5513 */
5514 if (!pDevExt->pvVMMR0)
5515 {
5516 pDevExt->pvVMMR0 = pvVMMR0;
5517 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5518 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5519 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5520 }
5521 else
5522 {
5523 /*
5524 * Return failure or success depending on whether the values match or not.
5525 */
5526 if ( pDevExt->pvVMMR0 != pvVMMR0
5527 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5528 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5529 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5530 {
5531 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5532 rc = VERR_INVALID_PARAMETER;
5533 }
5534 }
5535 return rc;
5536}
5537
5538
5539/**
5540 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5541 *
5542 * @param pDevExt Device globals.
5543 */
5544static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5545{
5546 pDevExt->pvVMMR0 = NULL;
5547 pDevExt->pfnVMMR0EntryInt = NULL;
5548 pDevExt->pfnVMMR0EntryFast = NULL;
5549 pDevExt->pfnVMMR0EntryEx = NULL;
5550}
5551
5552
5553/**
5554 * Adds a usage reference in the specified session of an image.
5555 *
5556 * Called while owning the loader semaphore.
5557 *
5558 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5559 * @param pSession Session in question.
5560 * @param pImage Image which the session is using.
5561 */
5562static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5563{
5564 PSUPDRVLDRUSAGE pUsage;
5565 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5566
5567 /*
5568 * Referenced it already?
5569 */
5570 pUsage = pSession->pLdrUsage;
5571 while (pUsage)
5572 {
5573 if (pUsage->pImage == pImage)
5574 {
5575 pUsage->cUsage++;
5576 return VINF_SUCCESS;
5577 }
5578 pUsage = pUsage->pNext;
5579 }
5580
5581 /*
5582 * Allocate new usage record.
5583 */
5584 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5585 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5586 pUsage->cUsage = 1;
5587 pUsage->pImage = pImage;
5588 pUsage->pNext = pSession->pLdrUsage;
5589 pSession->pLdrUsage = pUsage;
5590 return VINF_SUCCESS;
5591}
5592
5593
5594/**
5595 * Frees a load image.
5596 *
5597 * @param pDevExt Pointer to device extension.
5598 * @param pImage Pointer to the image we're gonna free.
5599 * This image must exit!
5600 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5601 */
5602static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5603{
5604 PSUPDRVLDRIMAGE pImagePrev;
5605 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5606
5607 /*
5608 * Warn if we're releasing images while the image loader interface is
5609 * locked down -- we won't be able to reload them!
5610 */
5611 if (pDevExt->fLdrLockedDown)
5612 Log(("supdrvLdrFree: Warning: unloading '%s' image, while loader interface is locked down!\n", pImage->szName));
5613
5614 /* find it - arg. should've used doubly linked list. */
5615 Assert(pDevExt->pLdrImages);
5616 pImagePrev = NULL;
5617 if (pDevExt->pLdrImages != pImage)
5618 {
5619 pImagePrev = pDevExt->pLdrImages;
5620 while (pImagePrev->pNext != pImage)
5621 pImagePrev = pImagePrev->pNext;
5622 Assert(pImagePrev->pNext == pImage);
5623 }
5624
5625 /* unlink */
5626 if (pImagePrev)
5627 pImagePrev->pNext = pImage->pNext;
5628 else
5629 pDevExt->pLdrImages = pImage->pNext;
5630
5631 /* check if this is VMMR0.r0 unset its entry point pointers. */
5632 if (pDevExt->pvVMMR0 == pImage->pvImage)
5633 supdrvLdrUnsetVMMR0EPs(pDevExt);
5634
5635 /* check for objects with destructors in this image. (Shouldn't happen.) */
5636 if (pDevExt->pObjs)
5637 {
5638 unsigned cObjs = 0;
5639 PSUPDRVOBJ pObj;
5640 RTSpinlockAcquire(pDevExt->Spinlock);
5641 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5642 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5643 {
5644 pObj->pfnDestructor = NULL;
5645 cObjs++;
5646 }
5647 RTSpinlockRelease(pDevExt->Spinlock);
5648 if (cObjs)
5649 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5650 }
5651
5652 /* call termination function if fully loaded. */
5653 if ( pImage->pfnModuleTerm
5654 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5655 {
5656 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5657 pImage->pfnModuleTerm(pImage);
5658 }
5659
5660 /* Inform the tracing component. */
5661 supdrvTracerModuleUnloading(pDevExt, pImage);
5662
5663 /* do native unload if appropriate. */
5664 if (pImage->fNative)
5665 supdrvOSLdrUnload(pDevExt, pImage);
5666
5667 /* free the image */
5668 pImage->cUsage = 0;
5669 pImage->pDevExt = NULL;
5670 pImage->pNext = NULL;
5671 pImage->uState = SUP_IOCTL_LDR_FREE;
5672 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5673 pImage->pvImageAlloc = NULL;
5674 RTMemFree(pImage->pachStrTab);
5675 pImage->pachStrTab = NULL;
5676 RTMemFree(pImage->paSymbols);
5677 pImage->paSymbols = NULL;
5678 RTMemFree(pImage);
5679}
5680
5681
5682/**
5683 * Acquires the loader lock.
5684 *
5685 * @returns IPRT status code.
5686 * @param pDevExt The device extension.
5687 */
5688DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5689{
5690#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5691 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5692#else
5693 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5694#endif
5695 AssertRC(rc);
5696 return rc;
5697}
5698
5699
5700/**
5701 * Releases the loader lock.
5702 *
5703 * @returns IPRT status code.
5704 * @param pDevExt The device extension.
5705 */
5706DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5707{
5708#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5709 return RTSemMutexRelease(pDevExt->mtxLdr);
5710#else
5711 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5712#endif
5713}
5714
5715
5716/**
5717 * Implements the service call request.
5718 *
5719 * @returns VBox status code.
5720 * @param pDevExt The device extension.
5721 * @param pSession The calling session.
5722 * @param pReq The request packet, valid.
5723 */
5724static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5725{
5726#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5727 int rc;
5728
5729 /*
5730 * Find the module first in the module referenced by the calling session.
5731 */
5732 rc = supdrvLdrLock(pDevExt);
5733 if (RT_SUCCESS(rc))
5734 {
5735 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5736 PSUPDRVLDRUSAGE pUsage;
5737
5738 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5739 if ( pUsage->pImage->pfnServiceReqHandler
5740 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5741 {
5742 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5743 break;
5744 }
5745 supdrvLdrUnlock(pDevExt);
5746
5747 if (pfnServiceReqHandler)
5748 {
5749 /*
5750 * Call it.
5751 */
5752 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5753 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5754 else
5755 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5756 }
5757 else
5758 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5759 }
5760
5761 /* log it */
5762 if ( RT_FAILURE(rc)
5763 && rc != VERR_INTERRUPTED
5764 && rc != VERR_TIMEOUT)
5765 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5766 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5767 else
5768 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5769 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5770 return rc;
5771#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5772 return VERR_NOT_IMPLEMENTED;
5773#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5774}
5775
5776
5777/**
5778 * Implements the logger settings request.
5779 *
5780 * @returns VBox status code.
5781 * @param pDevExt The device extension.
5782 * @param pSession The caller's session.
5783 * @param pReq The request.
5784 */
5785static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5786{
5787 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5788 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5789 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5790 PRTLOGGER pLogger = NULL;
5791 int rc;
5792
5793 /*
5794 * Some further validation.
5795 */
5796 switch (pReq->u.In.fWhat)
5797 {
5798 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5799 case SUPLOGGERSETTINGS_WHAT_CREATE:
5800 break;
5801
5802 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5803 if (*pszGroup || *pszFlags || *pszDest)
5804 return VERR_INVALID_PARAMETER;
5805 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5806 return VERR_ACCESS_DENIED;
5807 break;
5808
5809 default:
5810 return VERR_INTERNAL_ERROR;
5811 }
5812
5813 /*
5814 * Get the logger.
5815 */
5816 switch (pReq->u.In.fWhich)
5817 {
5818 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5819 pLogger = RTLogGetDefaultInstance();
5820 break;
5821
5822 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5823 pLogger = RTLogRelDefaultInstance();
5824 break;
5825
5826 default:
5827 return VERR_INTERNAL_ERROR;
5828 }
5829
5830 /*
5831 * Do the job.
5832 */
5833 switch (pReq->u.In.fWhat)
5834 {
5835 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5836 if (pLogger)
5837 {
5838 rc = RTLogFlags(pLogger, pszFlags);
5839 if (RT_SUCCESS(rc))
5840 rc = RTLogGroupSettings(pLogger, pszGroup);
5841 NOREF(pszDest);
5842 }
5843 else
5844 rc = VERR_NOT_FOUND;
5845 break;
5846
5847 case SUPLOGGERSETTINGS_WHAT_CREATE:
5848 {
5849 if (pLogger)
5850 rc = VERR_ALREADY_EXISTS;
5851 else
5852 {
5853 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5854
5855 rc = RTLogCreate(&pLogger,
5856 0 /* fFlags */,
5857 pszGroup,
5858 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5859 ? "VBOX_LOG"
5860 : "VBOX_RELEASE_LOG",
5861 RT_ELEMENTS(s_apszGroups),
5862 s_apszGroups,
5863 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5864 NULL);
5865 if (RT_SUCCESS(rc))
5866 {
5867 rc = RTLogFlags(pLogger, pszFlags);
5868 NOREF(pszDest);
5869 if (RT_SUCCESS(rc))
5870 {
5871 switch (pReq->u.In.fWhich)
5872 {
5873 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5874 pLogger = RTLogSetDefaultInstance(pLogger);
5875 break;
5876 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5877 pLogger = RTLogRelSetDefaultInstance(pLogger);
5878 break;
5879 }
5880 }
5881 RTLogDestroy(pLogger);
5882 }
5883 }
5884 break;
5885 }
5886
5887 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5888 switch (pReq->u.In.fWhich)
5889 {
5890 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5891 pLogger = RTLogSetDefaultInstance(NULL);
5892 break;
5893 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5894 pLogger = RTLogRelSetDefaultInstance(NULL);
5895 break;
5896 }
5897 rc = RTLogDestroy(pLogger);
5898 break;
5899
5900 default:
5901 {
5902 rc = VERR_INTERNAL_ERROR;
5903 break;
5904 }
5905 }
5906
5907 return rc;
5908}
5909
5910
5911/**
5912 * Implements the MSR prober operations.
5913 *
5914 * @returns VBox status code.
5915 * @param pDevExt The device extension.
5916 * @param pReq The request.
5917 */
5918static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5919{
5920#ifdef SUPDRV_WITH_MSR_PROBER
5921 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5922 int rc;
5923
5924 switch (pReq->u.In.enmOp)
5925 {
5926 case SUPMSRPROBEROP_READ:
5927 {
5928 uint64_t uValue;
5929 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5930 if (RT_SUCCESS(rc))
5931 {
5932 pReq->u.Out.uResults.Read.uValue = uValue;
5933 pReq->u.Out.uResults.Read.fGp = false;
5934 }
5935 else if (rc == VERR_ACCESS_DENIED)
5936 {
5937 pReq->u.Out.uResults.Read.uValue = 0;
5938 pReq->u.Out.uResults.Read.fGp = true;
5939 rc = VINF_SUCCESS;
5940 }
5941 break;
5942 }
5943
5944 case SUPMSRPROBEROP_WRITE:
5945 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5946 if (RT_SUCCESS(rc))
5947 pReq->u.Out.uResults.Write.fGp = false;
5948 else if (rc == VERR_ACCESS_DENIED)
5949 {
5950 pReq->u.Out.uResults.Write.fGp = true;
5951 rc = VINF_SUCCESS;
5952 }
5953 break;
5954
5955 case SUPMSRPROBEROP_MODIFY:
5956 case SUPMSRPROBEROP_MODIFY_FASTER:
5957 rc = supdrvOSMsrProberModify(idCpu, pReq);
5958 break;
5959
5960 default:
5961 return VERR_INVALID_FUNCTION;
5962 }
5963 return rc;
5964#else
5965 return VERR_NOT_IMPLEMENTED;
5966#endif
5967}
5968
5969#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5970
5971/**
5972 * Switches the TSC-delta measurement thread into the butchered state.
5973 *
5974 * @returns VBox status code.
5975 * @param pDevExt Pointer to the device instance data.
5976 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5977 * @param pszFailed An error message to log.
5978 * @param rcFailed The error code to exit the thread with.
5979 */
5980static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5981{
5982 if (!fSpinlockHeld)
5983 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5984
5985 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
5986 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5987 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5988 return rcFailed;
5989}
5990
5991
5992/**
5993 * The TSC-delta measurement thread.
5994 *
5995 * @returns VBox status code.
5996 * @param hThread The thread handle.
5997 * @param pvUser Opaque pointer to the device instance data.
5998 */
5999static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
6000{
6001 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6002 static uint32_t cTimesMeasured = 0;
6003 uint32_t cConsecutiveTimeouts = 0;
6004 int rc = VERR_INTERNAL_ERROR_2;
6005 for (;;)
6006 {
6007 /*
6008 * Switch on the current state.
6009 */
6010 SUPDRVTSCDELTATHREADSTATE enmState;
6011 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6012 enmState = pDevExt->enmTscDeltaThreadState;
6013 switch (enmState)
6014 {
6015 case kTscDeltaThreadState_Creating:
6016 {
6017 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
6018 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
6019 if (RT_FAILURE(rc))
6020 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
6021 /* fall thru */
6022 }
6023
6024 case kTscDeltaThreadState_Listening:
6025 {
6026 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6027
6028 /* Simple adaptive timeout. */
6029 if (cConsecutiveTimeouts++ == 10)
6030 {
6031 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
6032 pDevExt->cMsTscDeltaTimeout = 10;
6033 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
6034 pDevExt->cMsTscDeltaTimeout = 100;
6035 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
6036 pDevExt->cMsTscDeltaTimeout = 500;
6037 cConsecutiveTimeouts = 0;
6038 }
6039 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
6040 if ( RT_FAILURE(rc)
6041 && rc != VERR_TIMEOUT)
6042 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
6043 break;
6044 }
6045
6046 case kTscDeltaThreadState_WaitAndMeasure:
6047 {
6048 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
6049 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
6050 if (RT_FAILURE(rc))
6051 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
6052 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6053 pDevExt->cMsTscDeltaTimeout = 1;
6054 RTThreadSleep(10);
6055 /* fall thru */
6056 }
6057
6058 case kTscDeltaThreadState_Measuring:
6059 {
6060 cConsecutiveTimeouts = 0;
6061 if (!cTimesMeasured++)
6062 {
6063 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6064 RTCpuSetCopy(&pDevExt->TscDeltaObtainedCpuSet, &pDevExt->pGip->OnlineCpuSet);
6065 }
6066 else
6067 {
6068 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6069 unsigned iCpu;
6070
6071 if (cTimesMeasured == UINT32_MAX)
6072 cTimesMeasured = 1;
6073
6074 /* Measure TSC-deltas only for the CPUs that are in the set. */
6075 rc = VINF_SUCCESS;
6076 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6077 {
6078 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
6079 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
6080 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
6081 {
6082 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
6083 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
6084 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
6085 RTCpuSetAdd(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->idCpu);
6086 }
6087 }
6088 }
6089 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6090 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
6091 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
6092 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6093 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as the initial value. */
6094 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
6095 break;
6096 }
6097
6098 case kTscDeltaThreadState_Terminating:
6099 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6100 return VINF_SUCCESS;
6101
6102 case kTscDeltaThreadState_Butchered:
6103 default:
6104 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
6105 }
6106 }
6107
6108 return rc;
6109}
6110
6111
6112/**
6113 * Waits for the TSC-delta measurement thread to respond to a state change.
6114 *
6115 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
6116 * other error code on internal error.
6117 *
6118 * @param pThis Pointer to the grant service instance data.
6119 * @param enmCurState The current state.
6120 * @param enmNewState The new state we're waiting for it to enter.
6121 */
6122static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
6123 SUPDRVTSCDELTATHREADSTATE enmNewState)
6124{
6125 /*
6126 * Wait a short while for the expected state transition.
6127 */
6128 int rc;
6129 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
6130 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6131 if (pDevExt->enmTscDeltaThreadState == enmNewState)
6132 {
6133 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6134 rc = VINF_SUCCESS;
6135 }
6136 else if (pDevExt->enmTscDeltaThreadState == enmCurState)
6137 {
6138 /*
6139 * Wait longer if the state has not yet transitioned to the one we want.
6140 */
6141 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6142 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
6143 if ( RT_SUCCESS(rc)
6144 || rc == VERR_TIMEOUT)
6145 {
6146 /*
6147 * Check the state whether we've succeeded.
6148 */
6149 SUPDRVTSCDELTATHREADSTATE enmState;
6150 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6151 enmState = pDevExt->enmTscDeltaThreadState;
6152 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6153 if (enmState == enmNewState)
6154 rc = VINF_SUCCESS;
6155 else if (enmState == enmCurState)
6156 {
6157 rc = VERR_TIMEOUT;
6158 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
6159 enmNewState));
6160 }
6161 else
6162 {
6163 rc = VERR_INTERNAL_ERROR;
6164 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
6165 enmState, enmNewState));
6166 }
6167 }
6168 else
6169 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
6170 }
6171 else
6172 {
6173 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6174 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
6175 rc = VERR_INTERNAL_ERROR;
6176 }
6177
6178 return rc;
6179}
6180
6181
6182/**
6183 * Terminates the TSC-delta measurement thread.
6184 *
6185 * @param pDevExt Pointer to the device instance data.
6186 */
6187static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
6188{
6189 int rc;
6190 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6191 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
6192 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6193 RTThreadUserSignal(pDevExt->hTscDeltaThread);
6194 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
6195 if (RT_FAILURE(rc))
6196 {
6197 /* Signal a few more times before giving up. */
6198 int cTriesLeft = 5;
6199 while (--cTriesLeft > 0)
6200 {
6201 RTThreadUserSignal(pDevExt->hTscDeltaThread);
6202 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
6203 if (rc != VERR_TIMEOUT)
6204 break;
6205 }
6206 }
6207}
6208
6209
6210/**
6211 * Initializes and spawns the TSC-delta measurement thread.
6212 *
6213 * A thread is required for servicing re-measurement requests from events like
6214 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
6215 * under all contexts on all OSs.
6216 *
6217 * @returns VBox status code.
6218 * @param pDevExt Pointer to the device instance data.
6219 *
6220 * @remarks Must only be called -after- initializing GIP and setting up MP
6221 * notifications!
6222 */
6223static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
6224{
6225 int rc;
6226 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
6227 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
6228 if (RT_SUCCESS(rc))
6229 {
6230 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
6231 if (RT_SUCCESS(rc))
6232 {
6233 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
6234 pDevExt->cMsTscDeltaTimeout = 1;
6235 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
6236 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
6237 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
6238 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
6239 if (RT_SUCCESS(rc))
6240 {
6241 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
6242 if (RT_SUCCESS(rc))
6243 {
6244 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
6245 return rc;
6246 }
6247
6248 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
6249 supdrvTscDeltaThreadTerminate(pDevExt);
6250 }
6251 else
6252 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
6253 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
6254 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
6255 }
6256 else
6257 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
6258 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
6259 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
6260 }
6261 else
6262 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
6263
6264 return rc;
6265}
6266
6267
6268/**
6269 * Terminates the TSC-delta measurement thread and cleanup.
6270 *
6271 * @param pDevExt Pointer to the device instance data.
6272 */
6273static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
6274{
6275 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
6276 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
6277 {
6278 supdrvTscDeltaThreadTerminate(pDevExt);
6279 }
6280
6281 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
6282 {
6283 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
6284 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
6285 }
6286
6287 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
6288 {
6289 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
6290 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
6291 }
6292
6293 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
6294}
6295
6296
6297/**
6298 * Waits for TSC-delta measurements to be completed for all online CPUs.
6299 *
6300 * @returns VBox status code.
6301 * @param pDevExt Pointer to the device instance data.
6302 */
6303static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
6304{
6305 int cTriesLeft = 5;
6306 int cMsTotalWait;
6307 int cMsWaited = 0;
6308 int cMsWaitGranularity = 1;
6309
6310 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6311 AssertReturn(pGip, VERR_INVALID_POINTER);
6312
6313 if (RT_UNLIKELY(pDevExt->hTscDeltaThread == NIL_RTTHREAD))
6314 return VERR_THREAD_NOT_WAITABLE;
6315
6316 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 2, 150);
6317 while (cTriesLeft-- > 0)
6318 {
6319 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
6320 return VINF_SUCCESS;
6321 RTThreadSleep(cMsWaitGranularity);
6322 cMsWaited += cMsWaitGranularity;
6323 if (cMsWaited >= cMsTotalWait)
6324 break;
6325 }
6326
6327 return VERR_TIMEOUT;
6328}
6329
6330#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
6331
6332/**
6333 * Applies the TSC delta to the supplied raw TSC value.
6334 *
6335 * @returns VBox status code. (Ignored by all users, just FYI.)
6336 * @param pGip Pointer to the GIP.
6337 * @param puTsc Pointer to a valid TSC value before the TSC delta has been applied.
6338 * @param idApic The APIC ID of the CPU @c puTsc corresponds to.
6339 * @param fDeltaApplied Where to store whether the TSC delta was succesfully
6340 * applied or not (optional, can be NULL).
6341 *
6342 * @remarks Maybe called with interrupts disabled in ring-0!
6343 *
6344 * @note Don't you dare change the delta calculation. If you really do, make
6345 * sure you update all places where it's used (IPRT, SUPLibAll.cpp,
6346 * SUPDrv.c, supdrvGipMpEvent, and more).
6347 */
6348DECLINLINE(int) supdrvTscDeltaApply(PSUPGLOBALINFOPAGE pGip, uint64_t *puTsc, uint16_t idApic, bool *pfDeltaApplied)
6349{
6350 int rc;
6351
6352 /*
6353 * Validate input.
6354 */
6355 AssertPtr(puTsc);
6356 AssertPtr(pGip);
6357 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
6358
6359 /*
6360 * Carefully convert the idApic into a GIPCPU entry.
6361 */
6362 if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
6363 {
6364 uint16_t iCpu = pGip->aiCpuFromApicId[idApic];
6365 if (RT_LIKELY(iCpu < pGip->cCpus))
6366 {
6367 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
6368
6369 /*
6370 * Apply the delta if valid.
6371 */
6372 if (RT_LIKELY(pGipCpu->i64TSCDelta != INT64_MAX))
6373 {
6374 *puTsc -= pGipCpu->i64TSCDelta;
6375 if (pfDeltaApplied)
6376 *pfDeltaApplied = true;
6377 return VINF_SUCCESS;
6378 }
6379
6380 rc = VINF_SUCCESS;
6381 }
6382 else
6383 {
6384 AssertMsgFailed(("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus));
6385 rc = VERR_INVALID_CPU_INDEX;
6386 }
6387 }
6388 else
6389 {
6390 AssertMsgFailed(("idApic=%u\n", idApic));
6391 rc = VERR_INVALID_CPU_ID;
6392 }
6393 if (pfDeltaApplied)
6394 *pfDeltaApplied = false;
6395 return rc;
6396}
6397
6398
6399/**
6400 * Measures the TSC frequency of the system.
6401 *
6402 * Uses a busy-wait method for the async. case as it is intended to help push
6403 * the CPU frequency up, while for the invariant cases using a sleeping method.
6404 *
6405 * The TSC frequency can vary on systems which are not reported as invariant.
6406 * On such systems the object of this function is to find out what the nominal,
6407 * maximum TSC frequency under 'normal' CPU operation.
6408 *
6409 * @returns VBox status code.
6410 * @param pDevExt Pointer to the device instance.
6411 *
6412 * @remarks Must be called only -after- measuring the TSC deltas.
6413 */
6414static int supdrvGipMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
6415{
6416 int cTriesLeft = 4;
6417 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6418
6419 /* Assert order. */
6420 AssertReturn(pGip, VERR_INVALID_PARAMETER);
6421 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
6422
6423 while (cTriesLeft-- > 0)
6424 {
6425 RTCCUINTREG uFlags;
6426 uint64_t u64NanoTsBefore;
6427 uint64_t u64NanoTsAfter;
6428 uint64_t u64TscBefore;
6429 uint64_t u64TscAfter;
6430 uint8_t idApicBefore;
6431 uint8_t idApicAfter;
6432
6433 /*
6434 * Synchronize with the host OS clock tick before reading the TSC.
6435 * Especially important on older Windows version where the granularity is terrible.
6436 */
6437 u64NanoTsBefore = RTTimeSystemNanoTS();
6438 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
6439 ASMNopPause();
6440
6441 uFlags = ASMIntDisableFlags();
6442 idApicBefore = ASMGetApicId();
6443 u64TscBefore = ASMReadTSC();
6444 u64NanoTsBefore = RTTimeSystemNanoTS();
6445 ASMSetFlags(uFlags);
6446
6447 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6448 {
6449 /*
6450 * Sleep-wait since the TSC frequency is constant, it eases host load.
6451 * Shorter interval produces more variance in the frequency (esp. Windows).
6452 */
6453 RTThreadSleep(200);
6454 u64NanoTsAfter = RTTimeSystemNanoTS();
6455 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
6456 ASMNopPause();
6457 u64NanoTsAfter = RTTimeSystemNanoTS();
6458 }
6459 else
6460 {
6461 /* Busy-wait keeping the frequency up and measure. */
6462 for (;;)
6463 {
6464 u64NanoTsAfter = RTTimeSystemNanoTS();
6465 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
6466 ASMNopPause();
6467 else
6468 break;
6469 }
6470 }
6471
6472 uFlags = ASMIntDisableFlags();
6473 idApicAfter = ASMGetApicId();
6474 u64TscAfter = ASMReadTSC();
6475 ASMSetFlags(uFlags);
6476
6477 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6478 {
6479 int rc;
6480 bool fAppliedBefore;
6481 bool fAppliedAfter;
6482 rc = supdrvTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
6483 rc = supdrvTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
6484
6485 if ( !fAppliedBefore
6486 || !fAppliedAfter)
6487 {
6488#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6489 /*
6490 * The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
6491 * Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
6492 * and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
6493 * proceed. This should be triggered just once if we're rather unlucky.
6494 */
6495 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6496 if (rc == VERR_TIMEOUT)
6497 {
6498 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
6499 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6500 }
6501#else
6502 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
6503 idApicBefore, idApicAfter, cTriesLeft);
6504#endif
6505 continue;
6506 }
6507 }
6508
6509 /*
6510 * Update GIP.
6511 */
6512 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
6513 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6514 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6515 return VINF_SUCCESS;
6516 }
6517
6518 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6519}
6520
6521
6522/**
6523 * Timer callback function for TSC frequency refinement in invariant GIP mode.
6524 *
6525 * @param pTimer The timer.
6526 * @param pvUser Opaque pointer to the device instance data.
6527 * @param iTick The timer tick.
6528 */
6529static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6530{
6531 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6532 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6533 bool fDeltaApplied = false;
6534 uint8_t idApic;
6535 uint64_t u64DeltaNanoTS;
6536 uint64_t u64DeltaTsc;
6537 uint64_t u64NanoTS;
6538 uint64_t u64Tsc;
6539 RTCCUINTREG uFlags;
6540
6541 /* Paranoia. */
6542 Assert(pGip);
6543 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
6544
6545#if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
6546 u64NanoTS = RTTimeSystemNanoTS();
6547 while (RTTimeSystemNanoTS() == u64NanoTS)
6548 ASMNopPause();
6549#endif
6550 uFlags = ASMIntDisableFlags();
6551 idApic = ASMGetApicId();
6552 u64Tsc = ASMReadTSC();
6553 u64NanoTS = RTTimeSystemNanoTS();
6554 ASMSetFlags(uFlags);
6555 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6556 supdrvTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
6557 u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
6558 u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
6559
6560 if (RT_UNLIKELY( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO
6561 && !fDeltaApplied))
6562 {
6563 Log(("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
6564 GIP_TSC_REFINE_INTERVAL));
6565 return;
6566 }
6567
6568 /* Calculate the TSC frequency. */
6569 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
6570 && u64DeltaNanoTS < UINT32_MAX)
6571 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS);
6572 else
6573 {
6574 RTUINT128U CpuHz, Tmp, Divisor;
6575 CpuHz.s.Lo = CpuHz.s.Hi = 0;
6576 RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64);
6577 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS));
6578 pGip->u64CpuHz = CpuHz.s.Lo;
6579 }
6580
6581 /* Update rest of GIP. */
6582 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
6583 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6584}
6585
6586
6587/**
6588 * Starts the TSC-frequency refinement phase asynchronously.
6589 *
6590 * @param pDevExt Pointer to the device instance data.
6591 */
6592static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt)
6593{
6594 uint64_t u64NanoTS;
6595 RTCCUINTREG uFlags;
6596 uint8_t idApic;
6597 int rc;
6598 PSUPGLOBALINFOPAGE pGip;
6599
6600 /* Validate. */
6601 Assert(pDevExt);
6602 Assert(pDevExt->pGip);
6603 pGip = pDevExt->pGip;
6604
6605#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6606 /*
6607 * If the TSC-delta thread is created, wait until it's done calculating
6608 * the TSC-deltas on the relevant online CPUs before we start the TSC refinement.
6609 */
6610 if ( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
6611 && ASMAtomicReadS32(&pDevExt->rcTscDelta) == VERR_NOT_AVAILABLE)
6612 {
6613 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6614 if (rc == VERR_TIMEOUT)
6615 {
6616 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
6617 return;
6618 }
6619 }
6620#endif
6621
6622 /*
6623 * Record the TSC and NanoTS as the starting anchor point for refinement of the
6624 * TSC. We deliberately avoid using SUPReadTSC() here as we want to keep the
6625 * reading of the TSC and the NanoTS as close as possible.
6626 */
6627 u64NanoTS = RTTimeSystemNanoTS();
6628 while (RTTimeSystemNanoTS() == u64NanoTS)
6629 ASMNopPause();
6630 uFlags = ASMIntDisableFlags();
6631 idApic = ASMGetApicId();
6632 pDevExt->u64TscAnchor = ASMReadTSC();
6633 pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
6634 ASMSetFlags(uFlags);
6635 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6636 supdrvTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, NULL /* pfDeltaApplied */);
6637
6638 rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pDevExt);
6639 if (RT_SUCCESS(rc))
6640 {
6641 /*
6642 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
6643 * interval as small as possible while gaining the most consistent and accurate frequency
6644 * (compared to what the host OS might have measured).
6645 *
6646 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
6647 * same TSC frequency whenever possible so we need to keep the interval short.
6648 */
6649 rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
6650 AssertRC(rc);
6651 }
6652 else
6653 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
6654}
6655
6656
6657/**
6658 * Creates the GIP.
6659 *
6660 * @returns VBox status code.
6661 * @param pDevExt Instance data. GIP stuff may be updated.
6662 */
6663static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6664{
6665 PSUPGLOBALINFOPAGE pGip;
6666 RTHCPHYS HCPhysGip;
6667 uint32_t u32SystemResolution;
6668 uint32_t u32Interval;
6669 uint32_t u32MinInterval;
6670 uint32_t uMod;
6671 unsigned cCpus;
6672 int rc;
6673
6674 LogFlow(("supdrvGipCreate:\n"));
6675
6676 /* Assert order. */
6677 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6678 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6679 Assert(!pDevExt->pGipTimer);
6680
6681 /*
6682 * Check the CPU count.
6683 */
6684 cCpus = RTMpGetArraySize();
6685 if ( cCpus > RTCPUSET_MAX_CPUS
6686 || cCpus > 256 /* ApicId is used for the mappings */)
6687 {
6688 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6689 return VERR_TOO_MANY_CPUS;
6690 }
6691
6692 /*
6693 * Allocate a contiguous set of pages with a default kernel mapping.
6694 */
6695 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6696 if (RT_FAILURE(rc))
6697 {
6698 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6699 return rc;
6700 }
6701 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6702 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6703
6704 /*
6705 * Allocate the TSC-delta sync struct on a separate cache line.
6706 */
6707 pDevExt->pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
6708 pDevExt->pTscDeltaSync = RT_ALIGN_PT(pDevExt->pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
6709 Assert(RT_ALIGN_PT(pDevExt->pTscDeltaSync, 64, PSUPTSCDELTASYNC) == pDevExt->pTscDeltaSync);
6710
6711 /*
6712 * Find a reasonable update interval and initialize the structure.
6713 */
6714 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
6715 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6716 * See @bugref{6710}. */
6717 u32MinInterval = RT_NS_10MS;
6718 u32SystemResolution = RTTimerGetSystemGranularity();
6719 u32Interval = u32MinInterval;
6720 uMod = u32MinInterval % u32SystemResolution;
6721 if (uMod)
6722 u32Interval += u32SystemResolution - uMod;
6723
6724 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6725
6726 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
6727 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
6728 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
6729 {
6730 /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
6731 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
6732 return VERR_INTERNAL_ERROR_2;
6733 }
6734
6735#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6736 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
6737 {
6738 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6739 rc = supdrvTscDeltaThreadInit(pDevExt);
6740 }
6741#endif
6742 if (RT_SUCCESS(rc))
6743 {
6744 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6745 if (RT_SUCCESS(rc))
6746 {
6747 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6748 if (RT_SUCCESS(rc))
6749 {
6750#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6751 uint16_t iCpu;
6752 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
6753 {
6754 /*
6755 * Measure the TSC deltas now that we have MP notifications.
6756 */
6757 int cTries = 5;
6758 do
6759 {
6760 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6761 if (rc != VERR_TRY_AGAIN)
6762 break;
6763 } while (--cTries > 0);
6764 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6765 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6766 }
6767 else
6768 {
6769 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6770 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
6771 }
6772#endif
6773 if (RT_SUCCESS(rc))
6774 {
6775 rc = supdrvGipMeasureTscFreq(pDevExt);
6776 if (RT_SUCCESS(rc))
6777 {
6778 /*
6779 * Create the timer.
6780 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6781 */
6782 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6783 {
6784 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
6785 pDevExt);
6786 if (rc == VERR_NOT_SUPPORTED)
6787 {
6788 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6789 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6790 }
6791 }
6792 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6793 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
6794 supdrvGipSyncAndInvariantTimer, pDevExt);
6795 if (RT_SUCCESS(rc))
6796 {
6797 /*
6798 * We're good.
6799 */
6800 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6801 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6802
6803 g_pSUPGlobalInfoPage = pGip;
6804 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6805 supdrvRefineTscFreq(pDevExt);
6806 return VINF_SUCCESS;
6807 }
6808
6809 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6810 Assert(!pDevExt->pGipTimer);
6811 }
6812 else
6813 OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
6814 }
6815 else
6816 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureTscDeltas failed. rc=%Rrc\n", rc));
6817 }
6818 else
6819 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6820 }
6821 else
6822 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6823 }
6824 else
6825 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6826
6827 supdrvGipDestroy(pDevExt); /* Releases timer frequency increase too. */
6828 return rc;
6829}
6830
6831
6832/**
6833 * Terminates the GIP.
6834 *
6835 * @param pDevExt Instance data. GIP stuff may be updated.
6836 */
6837static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6838{
6839 int rc;
6840#ifdef DEBUG_DARWIN_GIP
6841 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6842 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6843 pDevExt->pGipTimer, pDevExt->GipMemObj));
6844#endif
6845
6846 /*
6847 * Stop receiving MP notifications before tearing anything else down.
6848 */
6849 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6850
6851#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6852 /*
6853 * Terminate the TSC-delta measurement thread and resources.
6854 */
6855 supdrvTscDeltaTerm(pDevExt);
6856#endif
6857
6858 /*
6859 * Destroy the TSC-refinement one-shot timer.
6860 */
6861 if (pDevExt->pTscRefineTimer)
6862 {
6863 RTTimerDestroy(pDevExt->pTscRefineTimer);
6864 pDevExt->pTscRefineTimer = NULL;
6865 }
6866
6867 if (pDevExt->pvTscDeltaSync)
6868 {
6869 RTMemFree(pDevExt->pvTscDeltaSync);
6870 pDevExt->pTscDeltaSync = NULL;
6871 pDevExt->pvTscDeltaSync = NULL;
6872 }
6873
6874 /*
6875 * Invalid the GIP data.
6876 */
6877 if (pDevExt->pGip)
6878 {
6879 supdrvGipTerm(pDevExt->pGip);
6880 pDevExt->pGip = NULL;
6881 }
6882 g_pSUPGlobalInfoPage = NULL;
6883
6884 /*
6885 * Destroy the timer and free the GIP memory object.
6886 */
6887 if (pDevExt->pGipTimer)
6888 {
6889 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6890 pDevExt->pGipTimer = NULL;
6891 }
6892
6893 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6894 {
6895 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6896 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6897 }
6898
6899 /*
6900 * Finally, make sure we've release the system timer resolution request
6901 * if one actually succeeded and is still pending.
6902 */
6903 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6904}
6905
6906
6907/**
6908 * Timer callback function for the sync and invariant GIP modes.
6909 *
6910 * @param pTimer The timer.
6911 * @param pvUser Opaque pointer to the device extension.
6912 * @param iTick The timer tick.
6913 */
6914static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6915{
6916 RTCCUINTREG uFlags;
6917 uint64_t u64TSC;
6918 uint64_t u64NanoTS;
6919 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6920 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6921
6922 uFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6923 u64TSC = ASMReadTSC();
6924 u64NanoTS = RTTimeSystemNanoTS();
6925
6926 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6927 {
6928 /*
6929 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6930 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6931 * affected a bit until we get proper TSC deltas than implementing options like
6932 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6933 *
6934 * The likely hood of this happening is really low. On Windows, Linux, and Solaris
6935 * timers fire on the CPU they were registered/started on. Darwin timers doesn't
6936 * necessarily (they are high priority threads waiting).
6937 */
6938 Assert(!ASMIntAreEnabled());
6939 supdrvTscDeltaApply(pGip, &u64TSC, ASMGetApicId(), NULL /* pfDeltaApplied */);
6940 }
6941
6942 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
6943
6944 ASMSetFlags(uFlags);
6945}
6946
6947
6948/**
6949 * Timer callback function for async GIP mode.
6950 * @param pTimer The timer.
6951 * @param pvUser Opaque pointer to the device extension.
6952 * @param iTick The timer tick.
6953 */
6954static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6955{
6956 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6957 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6958 RTCPUID idCpu = RTMpCpuId();
6959 uint64_t u64TSC = ASMReadTSC();
6960 uint64_t NanoTS = RTTimeSystemNanoTS();
6961
6962 /** @todo reset the transaction number and whatnot when iTick == 1. */
6963 if (pDevExt->idGipMaster == idCpu)
6964 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6965 else
6966 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6967
6968 ASMSetFlags(fOldFlags);
6969}
6970
6971
6972/**
6973 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6974 *
6975 * @returns Index of the CPU in the cache set.
6976 * @param pGip The GIP.
6977 * @param idCpu The CPU ID.
6978 */
6979static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6980{
6981 uint32_t i, cTries;
6982
6983 /*
6984 * ASSUMES that CPU IDs are constant.
6985 */
6986 for (i = 0; i < pGip->cCpus; i++)
6987 if (pGip->aCPUs[i].idCpu == idCpu)
6988 return i;
6989
6990 cTries = 0;
6991 do
6992 {
6993 for (i = 0; i < pGip->cCpus; i++)
6994 {
6995 bool fRc;
6996 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6997 if (fRc)
6998 return i;
6999 }
7000 } while (cTries++ < 32);
7001 AssertReleaseFailed();
7002 return i - 1;
7003}
7004
7005
7006/**
7007 * The calling CPU should be accounted as online, update GIP accordingly.
7008 *
7009 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
7010 *
7011 * @param pDevExt The device extension.
7012 * @param idCpu The CPU ID.
7013 */
7014static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
7015{
7016 int iCpuSet = 0;
7017 uint16_t idApic = UINT16_MAX;
7018 uint32_t i = 0;
7019 uint64_t u64NanoTS = 0;
7020 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7021
7022 AssertPtrReturnVoid(pGip);
7023 AssertRelease(idCpu == RTMpCpuId());
7024 Assert(pGip->cPossibleCpus == RTMpGetCount());
7025
7026 /*
7027 * Do this behind a spinlock with interrupts disabled as this can fire
7028 * on all CPUs simultaneously, see @bugref{6110}.
7029 */
7030 RTSpinlockAcquire(pDevExt->hGipSpinlock);
7031
7032 /*
7033 * Update the globals.
7034 */
7035 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
7036 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
7037 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
7038 if (iCpuSet >= 0)
7039 {
7040 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
7041 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
7042 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
7043 }
7044
7045 /*
7046 * Update the entry.
7047 */
7048 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
7049 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7050 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
7051 idApic = ASMGetApicId();
7052 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
7053 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
7054 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
7055
7056 /*
7057 * Update the APIC ID and CPU set index mappings.
7058 */
7059 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
7060 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
7061
7062 /* Update the Mp online/offline counter. */
7063 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
7064
7065#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7066 /*
7067 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
7068 *
7069 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
7070 * update the state and it'll get serviced when the thread's listening interval times out.
7071 */
7072 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
7073 {
7074 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7075 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
7076 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
7077 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
7078 {
7079 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
7080 }
7081 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7082 }
7083#endif
7084
7085 /* commit it */
7086 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
7087
7088 RTSpinlockRelease(pDevExt->hGipSpinlock);
7089}
7090
7091
7092/**
7093 * The CPU should be accounted as offline, update the GIP accordingly.
7094 *
7095 * This is used by supdrvGipMpEvent.
7096 *
7097 * @param pDevExt The device extension.
7098 * @param idCpu The CPU ID.
7099 */
7100static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
7101{
7102 int iCpuSet;
7103 unsigned i;
7104
7105 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7106
7107 AssertPtrReturnVoid(pGip);
7108 RTSpinlockAcquire(pDevExt->hGipSpinlock);
7109
7110 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
7111 AssertReturnVoid(iCpuSet >= 0);
7112
7113 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
7114 AssertReturnVoid(i < pGip->cCpus);
7115 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
7116
7117 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
7118 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
7119
7120 /* Update the Mp online/offline counter. */
7121 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
7122
7123 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
7124 if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
7125 {
7126 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
7127 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
7128 }
7129
7130 /* Reset the TSC delta, we will recalculate it lazily. */
7131 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
7132 {
7133 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
7134#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7135 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
7136 RTCpuSetDel(&pDevExt->TscDeltaObtainedCpuSet, idCpu);
7137#endif
7138 }
7139
7140 /* commit it */
7141 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
7142
7143 RTSpinlockRelease(pDevExt->hGipSpinlock);
7144}
7145
7146
7147/**
7148 * Multiprocessor event notification callback.
7149 *
7150 * This is used to make sure that the GIP master gets passed on to
7151 * another CPU. It also updates the associated CPU data.
7152 *
7153 * @param enmEvent The event.
7154 * @param idCpu The cpu it applies to.
7155 * @param pvUser Pointer to the device extension.
7156 *
7157 * @remarks This function -must- fire on the newly online'd CPU for the
7158 * RTMPEVENT_ONLINE case and can fire on any CPU for the
7159 * RTMPEVENT_OFFLINE case.
7160 */
7161static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
7162{
7163 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
7164 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7165
7166 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7167
7168 /*
7169 * Update the GIP CPU data.
7170 */
7171 if (pGip)
7172 {
7173 switch (enmEvent)
7174 {
7175 case RTMPEVENT_ONLINE:
7176 AssertRelease(idCpu == RTMpCpuId());
7177 supdrvGipMpEventOnline(pDevExt, idCpu);
7178 break;
7179 case RTMPEVENT_OFFLINE:
7180 supdrvGipMpEventOffline(pDevExt, idCpu);
7181 break;
7182 }
7183 }
7184
7185 /*
7186 * Make sure there is a master GIP.
7187 */
7188 if (enmEvent == RTMPEVENT_OFFLINE)
7189 {
7190 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
7191 if (idGipMaster == idCpu)
7192 {
7193 /*
7194 * Find a new GIP master.
7195 */
7196 bool fIgnored;
7197 unsigned i;
7198 int64_t iTSCDelta;
7199 uint32_t idxNewGipMaster;
7200 RTCPUID idNewGipMaster = NIL_RTCPUID;
7201 RTCPUSET OnlineCpus;
7202 RTMpGetOnlineSet(&OnlineCpus);
7203
7204 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
7205 {
7206 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
7207 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
7208 && idCurCpu != idGipMaster)
7209 {
7210 idNewGipMaster = idCurCpu;
7211 break;
7212 }
7213 }
7214
7215 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
7216 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
7217 NOREF(fIgnored);
7218
7219 /*
7220 * Adjust all the TSC deltas against the new GIP master.
7221 */
7222 if (pGip)
7223 {
7224 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
7225 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
7226 Assert(iTSCDelta != INT64_MAX);
7227 for (i = 0; i < pGip->cCpus; i++)
7228 {
7229 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
7230 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
7231 if (iWorkerDelta != INT64_MAX)
7232 iWorkerDelta -= iTSCDelta;
7233 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
7234 }
7235 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
7236 }
7237 }
7238 }
7239}
7240
7241
7242/**
7243 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
7244 * compute the delta between them.
7245 *
7246 * @param idCpu The CPU we are current scheduled on.
7247 * @param pvUser1 Opaque pointer to the device instance data.
7248 * @param pvUser2 Opaque pointer to the worker Cpu Id.
7249 *
7250 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
7251 * read the TSC at exactly the same time on both the master and the worker
7252 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
7253 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
7254 * try to minimize the measurement error by computing the minimum read time
7255 * of the compare statement in the worker by taking TSC measurements across
7256 * it.
7257 *
7258 * We ignore the first few runs of the loop in order to prime the cache.
7259 * Also, be careful about using 'pause' instruction in critical busy-wait
7260 * loops in this code - it can cause undesired behaviour with
7261 * hyperthreading.
7262 *
7263 * It must be noted that the computed minimum read time is mostly to
7264 * eliminate huge deltas when the worker is too early and doesn't by itself
7265 * help produce more accurate deltas. We allow two times the computed
7266 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
7267 * possible to get negative deltas where there are none when the worker is
7268 * earlier. As long as these occasional negative deltas are lower than the
7269 * time it takes to exit guest-context and the OS to reschedule EMT on a
7270 * different CPU we won't expose a TSC that jumped backwards. It is because
7271 * of the existence of the negative deltas we don't recompute the delta with
7272 * the master and worker interchanged to eliminate the remaining measurement
7273 * error.
7274 */
7275static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7276{
7277 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
7278 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7279 uint32_t *pidWorker = (uint32_t *)pvUser2;
7280 RTCPUID idMaster = ASMAtomicUoReadU32(&pDevExt->idTscDeltaInitiator);
7281 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
7282 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
7283 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
7284 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
7285 int cTriesLeft = 12;
7286
7287 if ( idCpu != idMaster
7288 && idCpu != *pidWorker)
7289 return;
7290
7291 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
7292 with a timeout to avoid deadlocking the entire system. */
7293 if (!RTMpOnAllIsConcurrentSafe())
7294 {
7295 /** @todo This was introduced for Windows, but since Windows doesn't use this
7296 * code path any longer (as DPC timeouts BSOD regardless of interrupts,
7297 * see @bugref{6710} comment 81), eventually phase it out. */
7298 uint64_t uTscNow;
7299 uint64_t uTscStart;
7300 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
7301
7302 ASMSerializeInstruction();
7303 uTscStart = ASMReadTSC();
7304 if (idCpu == idMaster)
7305 {
7306 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
7307 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
7308 {
7309 ASMSerializeInstruction();
7310 uTscNow = ASMReadTSC();
7311 if (uTscNow - uTscStart > cWaitTicks)
7312 {
7313 /* Set the worker delta to indicate failure, not the master. */
7314 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7315 return;
7316 }
7317
7318 ASMNopPause();
7319 }
7320 }
7321 else
7322 {
7323 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
7324 {
7325 ASMSerializeInstruction();
7326 uTscNow = ASMReadTSC();
7327 if (uTscNow - uTscStart > cWaitTicks)
7328 {
7329 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7330 return;
7331 }
7332
7333 ASMNopPause();
7334 }
7335 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
7336 }
7337 }
7338
7339 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
7340 while (cTriesLeft-- > 0)
7341 {
7342 unsigned i;
7343 uint64_t uMinCmpReadTime = UINT64_MAX;
7344 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
7345 {
7346 if (idCpu == idMaster)
7347 {
7348 /*
7349 * The master.
7350 */
7351 RTCCUINTREG uFlags;
7352 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7353 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
7354
7355 /* Disable interrupts only in the master for as short a period
7356 as possible, thanks again to Windows. See @bugref{6710} comment #73. */
7357 uFlags = ASMIntDisableFlags();
7358
7359 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
7360 { /* nothing */ }
7361
7362 do
7363 {
7364 ASMSerializeInstruction();
7365 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
7366 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7367
7368 ASMSetFlags(uFlags);
7369
7370 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
7371 { /* nothing */ }
7372
7373 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7374 {
7375 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
7376 {
7377 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
7378 if (iDelta < pGipCpuWorker->i64TSCDelta)
7379 pGipCpuWorker->i64TSCDelta = iDelta;
7380 }
7381 }
7382
7383 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
7384 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7385 }
7386 else
7387 {
7388 /*
7389 * The worker.
7390 */
7391 uint64_t uTscWorker;
7392 uint64_t uTscWorkerFlushed;
7393 uint64_t uCmpReadTime;
7394
7395 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
7396 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
7397 { /* nothing */ }
7398 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7399 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
7400
7401 /*
7402 * Keep reading the TSC until we notice that the master has read his. Reading
7403 * the TSC -after- the master has updated the memory is way too late. We thus
7404 * compensate by trying to measure how long it took for the worker to notice
7405 * the memory flushed from the master.
7406 */
7407 do
7408 {
7409 ASMSerializeInstruction();
7410 uTscWorker = ASMReadTSC();
7411 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7412 ASMSerializeInstruction();
7413 uTscWorkerFlushed = ASMReadTSC();
7414
7415 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
7416 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7417 {
7418 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
7419 if (uCmpReadTime < (uMinCmpReadTime << 1))
7420 {
7421 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
7422 if (uCmpReadTime < uMinCmpReadTime)
7423 uMinCmpReadTime = uCmpReadTime;
7424 }
7425 else
7426 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
7427 }
7428 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
7429 {
7430 if (uCmpReadTime < uMinCmpReadTime)
7431 uMinCmpReadTime = uCmpReadTime;
7432 }
7433
7434 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
7435 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
7436 ASMNopPause();
7437 }
7438 }
7439
7440 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
7441 break;
7442 }
7443}
7444
7445
7446/**
7447 * Clears TSC delta related variables.
7448 *
7449 * Clears all TSC samples as well as the delta synchronization variable on the
7450 * all the per-CPU structs. Optionally also clears the per-cpu deltas too.
7451 *
7452 * @param pDevExt Pointer to the device instance data.
7453 * @param fClearDeltas Whether the deltas are also to be cleared.
7454 */
7455DECLINLINE(void) supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
7456{
7457 unsigned iCpu;
7458 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7459 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7460 {
7461 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7462 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
7463 if (fClearDeltas)
7464 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
7465 }
7466 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7467}
7468
7469
7470/**
7471 * Measures the TSC delta between the master GIP CPU and one specified worker
7472 * CPU.
7473 *
7474 * @returns VBox status code.
7475 * @param pDevExt Pointer to the device instance data.
7476 * @param idxWorker The index of the worker CPU from the GIP's array of
7477 * CPUs.
7478 *
7479 * @remarks This can be called with preemption disabled!
7480 */
7481static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
7482{
7483 int rc;
7484 PSUPGLOBALINFOPAGE pGip;
7485 PSUPGIPCPU pGipCpuWorker;
7486 RTCPUID idMaster;
7487
7488 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7489 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7490
7491 pGip = pDevExt->pGip;
7492 idMaster = pDevExt->idGipMaster;
7493 pGipCpuWorker = &pGip->aCPUs[idxWorker];
7494
7495 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
7496
7497 if (pGipCpuWorker->idCpu == idMaster)
7498 {
7499 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
7500 return VINF_SUCCESS;
7501 }
7502
7503 /* Set the master TSC as the initiator. */
7504 while (!ASMAtomicCmpXchgU32(&pDevExt->idTscDeltaInitiator, idMaster, NIL_RTCPUID))
7505 {
7506 /*
7507 * Sleep here rather than spin as there is a parallel measurement
7508 * being executed and that can take a good while to be done.
7509 */
7510 RTThreadSleep(1);
7511 }
7512
7513 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7514 {
7515 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
7516 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7517 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7518 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pDevExt, &pGipCpuWorker->idCpu);
7519 if (RT_SUCCESS(rc))
7520 {
7521 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
7522 {
7523 /*
7524 * Work the TSC delta applicability rating. It starts
7525 * optimistic in supdrvGipInit, we downgrade it here.
7526 */
7527 SUPGIPUSETSCDELTA enmRating;
7528 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
7529 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
7530 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
7531 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
7532 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
7533 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
7534 else
7535 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
7536 if (pGip->enmUseTscDelta < enmRating)
7537 {
7538 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
7539 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
7540 }
7541 }
7542 else
7543 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
7544 }
7545 }
7546 else
7547 rc = VERR_CPU_OFFLINE;
7548
7549 ASMAtomicWriteU32(&pDevExt->idTscDeltaInitiator, NIL_RTCPUID);
7550 return rc;
7551}
7552
7553
7554/**
7555 * Measures the TSC deltas between CPUs.
7556 *
7557 * @param pDevExt Pointer to the device instance data.
7558 * @param pidxMaster Where to store the index of the chosen master TSC if we
7559 * managed to determine the TSC deltas successfully.
7560 * Optional, can be NULL.
7561 *
7562 * @returns VBox status code.
7563 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
7564 * idCpu, GIP's online CPU set which are populated in
7565 * supdrvGipInitOnCpu().
7566 */
7567static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
7568{
7569 PSUPGIPCPU pGipCpuMaster;
7570 unsigned iCpu;
7571 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7572 uint32_t idxMaster = UINT32_MAX;
7573 int rc = VINF_SUCCESS;
7574 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
7575 uint32_t cOnlineCpus = pGip->cOnlineCpus;
7576
7577 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
7578
7579 /*
7580 * Pick the first CPU online as the master TSC and make it the new GIP master based
7581 * on the APIC ID.
7582 *
7583 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
7584 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
7585 * master as this point since the sync/async timer isn't created yet.
7586 */
7587 supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
7588 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
7589 {
7590 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
7591 if (idxCpu != UINT16_MAX)
7592 {
7593 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
7594 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
7595 {
7596 idxMaster = idxCpu;
7597 pGipCpu->i64TSCDelta = 0;
7598 break;
7599 }
7600 }
7601 }
7602 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
7603 pGipCpuMaster = &pGip->aCPUs[idxMaster];
7604 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7605
7606 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
7607 if (pGip->cOnlineCpus <= 1)
7608 {
7609 if (pidxMaster)
7610 *pidxMaster = idxMaster;
7611 return VINF_SUCCESS;
7612 }
7613
7614 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7615 {
7616 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7617 if ( iCpu != idxMaster
7618 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7619 {
7620 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7621 if (RT_FAILURE(rc))
7622 {
7623 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
7624 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7625 break;
7626 }
7627
7628 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
7629 {
7630 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
7631 rc = VERR_TRY_AGAIN;
7632 break;
7633 }
7634 }
7635 }
7636
7637 if ( RT_SUCCESS(rc)
7638 && !pGipCpuMaster->i64TSCDelta
7639 && pidxMaster)
7640 {
7641 *pidxMaster = idxMaster;
7642 }
7643 return rc;
7644}
7645
7646
7647/**
7648 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
7649 *
7650 * @param idCpu Ignored.
7651 * @param pvUser1 Where to put the TSC.
7652 * @param pvUser2 Ignored.
7653 */
7654static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7655{
7656 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
7657}
7658
7659
7660/**
7661 * Determine if Async GIP mode is required because of TSC drift.
7662 *
7663 * When using the default/normal timer code it is essential that the time stamp counter
7664 * (TSC) runs never backwards, that is, a read operation to the counter should return
7665 * a bigger value than any previous read operation. This is guaranteed by the latest
7666 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
7667 * case we have to choose the asynchronous timer mode.
7668 *
7669 * @param poffMin Pointer to the determined difference between different
7670 * cores (optional, can be NULL).
7671 * @return false if the time stamp counters appear to be synchronized, true otherwise.
7672 */
7673static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
7674{
7675 /*
7676 * Just iterate all the cpus 8 times and make sure that the TSC is
7677 * ever increasing. We don't bother taking TSC rollover into account.
7678 */
7679 int iEndCpu = RTMpGetArraySize();
7680 int iCpu;
7681 int cLoops = 8;
7682 bool fAsync = false;
7683 int rc = VINF_SUCCESS;
7684 uint64_t offMax = 0;
7685 uint64_t offMin = ~(uint64_t)0;
7686 uint64_t PrevTsc = ASMReadTSC();
7687
7688 while (cLoops-- > 0)
7689 {
7690 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7691 {
7692 uint64_t CurTsc;
7693 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7694 if (RT_SUCCESS(rc))
7695 {
7696 if (CurTsc <= PrevTsc)
7697 {
7698 fAsync = true;
7699 offMin = offMax = PrevTsc - CurTsc;
7700 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7701 iCpu, cLoops, CurTsc, PrevTsc));
7702 break;
7703 }
7704
7705 /* Gather statistics (except the first time). */
7706 if (iCpu != 0 || cLoops != 7)
7707 {
7708 uint64_t off = CurTsc - PrevTsc;
7709 if (off < offMin)
7710 offMin = off;
7711 if (off > offMax)
7712 offMax = off;
7713 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7714 }
7715
7716 /* Next */
7717 PrevTsc = CurTsc;
7718 }
7719 else if (rc == VERR_NOT_SUPPORTED)
7720 break;
7721 else
7722 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7723 }
7724
7725 /* broke out of the loop. */
7726 if (iCpu < iEndCpu)
7727 break;
7728 }
7729
7730 if (poffMin)
7731 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7732 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7733 fAsync, iEndCpu, rc, offMin, offMax));
7734#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7735 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7736#endif
7737 return fAsync;
7738}
7739
7740
7741/**
7742 * supdrvGipInit() worker that determines the GIP TSC mode.
7743 *
7744 * @returns The most suitable TSC mode.
7745 * @param pDevExt Pointer to the device instance data.
7746 */
7747static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
7748{
7749 uint64_t u64DiffCoresIgnored;
7750 uint32_t uEAX, uEBX, uECX, uEDX;
7751
7752 /*
7753 * Establish whether the CPU advertises TSC as invariant, we need that in
7754 * a couple of places below.
7755 */
7756 bool fInvariantTsc = false;
7757 if (ASMHasCpuId())
7758 {
7759 uEAX = ASMCpuId_EAX(0x80000000);
7760 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
7761 {
7762 uEDX = ASMCpuId_EDX(0x80000007);
7763 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
7764 fInvariantTsc = true;
7765 }
7766 }
7767
7768 /*
7769 * On single CPU systems, we don't need to consider ASYNC mode.
7770 */
7771 if (RTMpGetCount() <= 1)
7772 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
7773
7774 /*
7775 * Allow the user and/or OS specific bits to force async mode.
7776 */
7777 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7778 return SUPGIPMODE_ASYNC_TSC;
7779
7780
7781#if 0 /** @todo enable this when i64TscDelta is applied in all places where it's needed */
7782 /*
7783 * Use invariant mode if the CPU says TSC is invariant.
7784 */
7785 if (fInvariantTsc)
7786 return SUPGIPMODE_INVARIANT_TSC;
7787#endif
7788
7789 /*
7790 * TSC is not invariant and we're on SMP, this presents two problems:
7791 *
7792 * (1) There might be a skew between the CPU, so that cpu0
7793 * returns a TSC that is slightly different from cpu1.
7794 * This screw may be due to (2), bad TSC initialization
7795 * or slightly different TSC rates.
7796 *
7797 * (2) Power management (and other things) may cause the TSC
7798 * to run at a non-constant speed, and cause the speed
7799 * to be different on the cpus. This will result in (1).
7800 *
7801 * If any of the above is detected, we will have to use ASYNC mode.
7802 */
7803
7804 /* (1). Try check for current differences between the cpus. */
7805 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7806 return SUPGIPMODE_ASYNC_TSC;
7807
7808#if 1 /** @todo remove once i64TscDelta is applied everywhere. Enable #if 0 above. */
7809 if (fInvariantTsc)
7810 return SUPGIPMODE_INVARIANT_TSC;
7811#endif
7812
7813 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
7814 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7815 if ( ASMIsValidStdRange(uEAX)
7816 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7817 {
7818 /* Check for APM support. */
7819 uEAX = ASMCpuId_EAX(0x80000000);
7820 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
7821 {
7822 uEDX = ASMCpuId_EDX(0x80000007);
7823 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7824 return SUPGIPMODE_ASYNC_TSC;
7825 }
7826 }
7827
7828 return SUPGIPMODE_SYNC_TSC;
7829}
7830
7831
7832/**
7833 * Initializes per-CPU GIP information.
7834 *
7835 * @param pDevExt Pointer to the device instance data.
7836 * @param pGip Pointer to the GIP.
7837 * @param pCpu Pointer to which GIP CPU to initalize.
7838 * @param u64NanoTS The current nanosecond timestamp.
7839 */
7840static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7841{
7842 /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
7843 which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
7844 pCpu->u32TransactionId = 2;
7845 pCpu->u64NanoTS = u64NanoTS;
7846 pCpu->u64TSC = ASMReadTSC();
7847 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7848 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
7849
7850 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7851 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7852 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7853 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7854
7855 /*
7856 * We don't know the following values until we've executed updates.
7857 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7858 * the 2nd timer callout.
7859 */
7860 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7861 pCpu->u32UpdateIntervalTSC
7862 = pCpu->au32TSCHistory[0]
7863 = pCpu->au32TSCHistory[1]
7864 = pCpu->au32TSCHistory[2]
7865 = pCpu->au32TSCHistory[3]
7866 = pCpu->au32TSCHistory[4]
7867 = pCpu->au32TSCHistory[5]
7868 = pCpu->au32TSCHistory[6]
7869 = pCpu->au32TSCHistory[7]
7870 = (uint32_t)(_4G / pGip->u32UpdateHz);
7871}
7872
7873
7874/**
7875 * Initializes the GIP data.
7876 *
7877 * @param pDevExt Pointer to the device instance data.
7878 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7879 * @param HCPhys The physical address of the GIP.
7880 * @param u64NanoTS The current nanosecond timestamp.
7881 * @param uUpdateHz The update frequency.
7882 * @param uUpdateIntervalNS The update interval in nanoseconds.
7883 * @param cCpus The CPU count.
7884 */
7885static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7886 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7887{
7888 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7889 unsigned i;
7890#ifdef DEBUG_DARWIN_GIP
7891 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7892#else
7893 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7894#endif
7895
7896 /*
7897 * Initialize the structure.
7898 */
7899 memset(pGip, 0, cbGip);
7900
7901 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7902 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7903 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
7904 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
7905 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
7906 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
7907 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
7908 else
7909 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
7910 pGip->cCpus = (uint16_t)cCpus;
7911 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7912 pGip->u32UpdateHz = uUpdateHz;
7913 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7914 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
7915 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7916 RTCpuSetEmpty(&pGip->PresentCpuSet);
7917 RTMpGetSet(&pGip->PossibleCpuSet);
7918 pGip->cOnlineCpus = RTMpGetOnlineCount();
7919 pGip->cPresentCpus = RTMpGetPresentCount();
7920 pGip->cPossibleCpus = RTMpGetCount();
7921 pGip->idCpuMax = RTMpGetMaxCpuId();
7922 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7923 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7924 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7925 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7926 for (i = 0; i < cCpus; i++)
7927 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
7928
7929 /*
7930 * Link it to the device extension.
7931 */
7932 pDevExt->pGip = pGip;
7933 pDevExt->HCPhysGip = HCPhys;
7934 pDevExt->cGipUsers = 0;
7935}
7936
7937
7938/**
7939 * On CPU initialization callback for RTMpOnAll.
7940 *
7941 * @param idCpu The CPU ID.
7942 * @param pvUser1 The device extension.
7943 * @param pvUser2 The GIP.
7944 */
7945static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7946{
7947 /* This is good enough, even though it will update some of the globals a
7948 bit to much. */
7949 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7950}
7951
7952
7953/**
7954 * Invalidates the GIP data upon termination.
7955 *
7956 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7957 */
7958static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7959{
7960 unsigned i;
7961 pGip->u32Magic = 0;
7962 for (i = 0; i < pGip->cCpus; i++)
7963 {
7964 pGip->aCPUs[i].u64NanoTS = 0;
7965 pGip->aCPUs[i].u64TSC = 0;
7966 pGip->aCPUs[i].iTSCHistoryHead = 0;
7967 pGip->aCPUs[i].u64TSCSample = 0;
7968 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7969 }
7970}
7971
7972
7973/**
7974 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
7975 * updates all the per cpu data except the transaction id.
7976 *
7977 * @param pDevExt The device extension.
7978 * @param pGipCpu Pointer to the per cpu data.
7979 * @param u64NanoTS The current time stamp.
7980 * @param u64TSC The current TSC.
7981 * @param iTick The current timer tick.
7982 *
7983 * @remarks Can be called with interrupts disabled!
7984 */
7985static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7986{
7987 uint64_t u64TSCDelta;
7988 uint32_t u32UpdateIntervalTSC;
7989 uint32_t u32UpdateIntervalTSCSlack;
7990 unsigned iTSCHistoryHead;
7991 uint64_t u64CpuHz;
7992 uint32_t u32TransactionId;
7993
7994 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7995 AssertPtrReturnVoid(pGip);
7996
7997 /* Delta between this and the previous update. */
7998 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7999
8000 /*
8001 * Update the NanoTS.
8002 */
8003 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
8004
8005 /*
8006 * Calc TSC delta.
8007 */
8008 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
8009 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
8010
8011 /* We don't need to keep realculating the frequency when it's invariant. */
8012 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
8013 return;
8014
8015 if (u64TSCDelta >> 32)
8016 {
8017 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
8018 pGipCpu->cErrors++;
8019 }
8020
8021 /*
8022 * On the 2nd and 3rd callout, reset the history with the current TSC
8023 * interval since the values entered by supdrvGipInit are totally off.
8024 * The interval on the 1st callout completely unreliable, the 2nd is a bit
8025 * better, while the 3rd should be most reliable.
8026 */
8027 u32TransactionId = pGipCpu->u32TransactionId;
8028 if (RT_UNLIKELY( ( u32TransactionId == 5
8029 || u32TransactionId == 7)
8030 && ( iTick == 2
8031 || iTick == 3) ))
8032 {
8033 unsigned i;
8034 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
8035 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
8036 }
8037
8038 /*
8039 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
8040 * Wait until we have at least one full history since the above history reset. The
8041 * assumption is that the majority of the previous history values will be tolerable.
8042 * See @bugref{6710} comment #67.
8043 */
8044 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
8045 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
8046 {
8047 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
8048 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
8049 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
8050 {
8051 uint32_t u32;
8052 u32 = pGipCpu->au32TSCHistory[0];
8053 u32 += pGipCpu->au32TSCHistory[1];
8054 u32 += pGipCpu->au32TSCHistory[2];
8055 u32 += pGipCpu->au32TSCHistory[3];
8056 u32 >>= 2;
8057 u64TSCDelta = pGipCpu->au32TSCHistory[4];
8058 u64TSCDelta += pGipCpu->au32TSCHistory[5];
8059 u64TSCDelta += pGipCpu->au32TSCHistory[6];
8060 u64TSCDelta += pGipCpu->au32TSCHistory[7];
8061 u64TSCDelta >>= 2;
8062 u64TSCDelta += u32;
8063 u64TSCDelta >>= 1;
8064 }
8065 }
8066
8067
8068 /*
8069 * TSC History.
8070 */
8071 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
8072 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
8073 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
8074 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
8075
8076 /*
8077 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
8078 *
8079 * On Windows, we have an occasional (but recurring) sour value that messed up
8080 * the history but taking only 1 interval reduces the precision overall.
8081 * However, this problem existed before the invariant mode was introduced.
8082 */
8083 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
8084 || pGip->u32UpdateHz >= 1000)
8085 {
8086 uint32_t u32;
8087 u32 = pGipCpu->au32TSCHistory[0];
8088 u32 += pGipCpu->au32TSCHistory[1];
8089 u32 += pGipCpu->au32TSCHistory[2];
8090 u32 += pGipCpu->au32TSCHistory[3];
8091 u32 >>= 2;
8092 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
8093 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
8094 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
8095 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
8096 u32UpdateIntervalTSC >>= 2;
8097 u32UpdateIntervalTSC += u32;
8098 u32UpdateIntervalTSC >>= 1;
8099
8100 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
8101 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
8102 }
8103 else if (pGip->u32UpdateHz >= 90)
8104 {
8105 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
8106 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
8107 u32UpdateIntervalTSC >>= 1;
8108
8109 /* value chosen on a 2GHz thinkpad running windows */
8110 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
8111 }
8112 else
8113 {
8114 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
8115
8116 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
8117 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
8118 }
8119 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
8120
8121 /*
8122 * CpuHz.
8123 */
8124 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
8125 u64CpuHz /= pGip->u32UpdateIntervalNS;
8126 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
8127}
8128
8129
8130/**
8131 * Updates the GIP.
8132 *
8133 * @param pDevExt The device extension.
8134 * @param u64NanoTS The current nanosecond timesamp.
8135 * @param u64TSC The current TSC timesamp.
8136 * @param idCpu The CPU ID.
8137 * @param iTick The current timer tick.
8138 *
8139 * @remarks Can be called with interrupts disabled!
8140 */
8141static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
8142{
8143 /*
8144 * Determine the relevant CPU data.
8145 */
8146 PSUPGIPCPU pGipCpu;
8147 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
8148 AssertPtrReturnVoid(pGip);
8149
8150 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
8151 pGipCpu = &pGip->aCPUs[0];
8152 else
8153 {
8154 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
8155 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
8156 return;
8157 pGipCpu = &pGip->aCPUs[iCpu];
8158 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
8159 return;
8160 }
8161
8162 /*
8163 * Start update transaction.
8164 */
8165 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
8166 {
8167 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
8168 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
8169 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8170 pGipCpu->cErrors++;
8171 return;
8172 }
8173
8174 /*
8175 * Recalc the update frequency every 0x800th time.
8176 */
8177 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
8178 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
8179 {
8180 if (pGip->u64NanoTSLastUpdateHz)
8181 {
8182#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
8183 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
8184 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
8185 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
8186 {
8187 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
8188 * calculation on non-invariant hosts if it changes the history decision
8189 * taken in supdrvGipDoUpdateCpu(). */
8190 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
8191 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
8192 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
8193 }
8194#endif
8195 }
8196 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
8197 }
8198
8199 /*
8200 * Update the data.
8201 */
8202 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
8203
8204 /*
8205 * Complete transaction.
8206 */
8207 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8208}
8209
8210
8211/**
8212 * Updates the per cpu GIP data for the calling cpu.
8213 *
8214 * @param pDevExt The device extension.
8215 * @param u64NanoTS The current nanosecond timesamp.
8216 * @param u64TSC The current TSC timesamp.
8217 * @param idCpu The CPU ID.
8218 * @param idApic The APIC id for the CPU index.
8219 * @param iTick The current timer tick.
8220 *
8221 * @remarks Can be called with interrupts disabled!
8222 */
8223static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
8224 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
8225{
8226 uint32_t iCpu;
8227 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
8228
8229 /*
8230 * Avoid a potential race when a CPU online notification doesn't fire on
8231 * the onlined CPU but the tick creeps in before the event notification is
8232 * run.
8233 */
8234 if (RT_UNLIKELY(iTick == 1))
8235 {
8236 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
8237 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
8238 supdrvGipMpEventOnline(pDevExt, idCpu);
8239 }
8240
8241 iCpu = pGip->aiCpuFromApicId[idApic];
8242 if (RT_LIKELY(iCpu < pGip->cCpus))
8243 {
8244 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
8245 if (pGipCpu->idCpu == idCpu)
8246 {
8247 /*
8248 * Start update transaction.
8249 */
8250 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
8251 {
8252 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
8253 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8254 pGipCpu->cErrors++;
8255 return;
8256 }
8257
8258 /*
8259 * Update the data.
8260 */
8261 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
8262
8263 /*
8264 * Complete transaction.
8265 */
8266 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8267 }
8268 }
8269}
8270
8271
8272/**
8273 * Resume built-in keyboard on MacBook Air and Pro hosts.
8274 * If there is no built-in keyboard device, return success anyway.
8275 *
8276 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
8277 */
8278static int supdrvIOCtl_ResumeSuspendedKbds(void)
8279{
8280#if defined(RT_OS_DARWIN)
8281 return supdrvDarwinResumeSuspendedKbds();
8282#else
8283 return VERR_NOT_IMPLEMENTED;
8284#endif
8285}
8286
8287
8288/**
8289 * Service a TSC-delta measurement request.
8290 *
8291 * @returns VBox status code.
8292 * @param pDevExt Pointer to the device instance data.
8293 * @param pSession The support driver session.
8294 * @param pReq Pointer to the TSC-delta measurement request.
8295 */
8296static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
8297{
8298 PSUPGLOBALINFOPAGE pGip;
8299 RTCPUID idCpuWorker;
8300 int rc;
8301 int16_t cTries;
8302 RTMSINTERVAL cMsWaitRetry;
8303 uint16_t iCpu;
8304
8305 /*
8306 * Validate.
8307 */
8308 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
8309 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
8310 return VERR_WRONG_ORDER;
8311 pGip = pDevExt->pGip;
8312 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
8313
8314 idCpuWorker = pReq->u.In.idCpu;
8315 if (idCpuWorker == NIL_RTCPUID)
8316 return VERR_INVALID_CPU_ID;
8317 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
8318 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
8319
8320 /*
8321 * The request is a noop if the TSC delta isn't being used.
8322 */
8323 pGip = pDevExt->pGip;
8324 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
8325 return VINF_SUCCESS;
8326
8327 rc = VERR_CPU_NOT_FOUND;
8328 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
8329 {
8330 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
8331 if (pGipCpuWorker->idCpu == idCpuWorker)
8332 {
8333 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
8334 && !pReq->u.In.fForce)
8335 return VINF_SUCCESS;
8336
8337#ifdef SUPDRV_USE_TSC_DELTA_THREAD
8338 if (pReq->u.In.fAsync)
8339 {
8340 /** @todo Async. doesn't implement options like retries, waiting. We'll need
8341 * to pass those options to the thread somehow and implement it in the
8342 * thread. Check if anyone uses/needs fAsync before implementing this. */
8343 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
8344 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
8345 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
8346 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
8347 {
8348 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
8349 }
8350 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
8351 RTThreadUserSignal(pDevExt->hTscDeltaThread);
8352 return VINF_SUCCESS;
8353 }
8354
8355 /*
8356 * If a TSC-delta measurement request is already being serviced by the thread,
8357 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
8358 */
8359 while (cTries-- > 0)
8360 {
8361 SUPDRVTSCDELTATHREADSTATE enmState;
8362 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
8363 enmState = pDevExt->enmTscDeltaThreadState;
8364 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
8365
8366 if ( enmState == kTscDeltaThreadState_Measuring
8367 || enmState == kTscDeltaThreadState_WaitAndMeasure)
8368 {
8369 if ( !cTries
8370 || !cMsWaitRetry)
8371 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
8372 if (cMsWaitRetry)
8373 RTThreadSleep(cMsWaitRetry);
8374 }
8375 }
8376 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
8377#endif
8378
8379 while (cTries-- > 0)
8380 {
8381 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
8382 if (RT_SUCCESS(rc))
8383 {
8384 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
8385 break;
8386 }
8387
8388 if (cMsWaitRetry)
8389 RTThreadSleep(cMsWaitRetry);
8390 }
8391
8392 break;
8393 }
8394 }
8395 return rc;
8396}
8397
8398
8399/**
8400 * Reads TSC with delta applied.
8401 *
8402 * Will try to resolve delta value INT64_MAX before applying it. This is the
8403 * main purpose of this function, to handle the case where the delta needs to be
8404 * determined.
8405 *
8406 * @returns VBox status code.
8407 * @param pDevExt Pointer to the device instance data.
8408 * @param pSession The support driver session.
8409 * @param pReq Pointer to the TSC-read request.
8410 */
8411static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
8412{
8413 PSUPGLOBALINFOPAGE pGip;
8414 int rc;
8415
8416 /*
8417 * Validate. We require the client to have mapped GIP (no asserting on
8418 * ring-3 preconditions).
8419 */
8420 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
8421 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
8422 return VERR_WRONG_ORDER;
8423 pGip = pDevExt->pGip;
8424 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
8425
8426 /*
8427 * We're usually here because we need to apply delta, but we shouldn't be
8428 * upset if the GIP is some different mode.
8429 */
8430 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
8431 {
8432 uint32_t cTries = 0;
8433 for (;;)
8434 {
8435 /*
8436 * Start by gathering the data, using CLI for disabling preemption
8437 * while we do that.
8438 */
8439 RTCCUINTREG uFlags = ASMIntDisableFlags();
8440 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
8441 int iGipCpu;
8442 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
8443 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
8444 {
8445 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
8446 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
8447 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
8448 ASMSetFlags(uFlags);
8449
8450 /*
8451 * If we're lucky we've got a delta, but no predicitions here
8452 * as this I/O control is normally only used when the TSC delta
8453 * is set to INT64_MAX.
8454 */
8455 if (i64Delta != INT64_MAX)
8456 {
8457 pReq->u.Out.u64AdjustedTsc -= i64Delta;
8458 rc = VINF_SUCCESS;
8459 break;
8460 }
8461
8462 /* Give up after a few times. */
8463 if (cTries >= 4)
8464 {
8465 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
8466 break;
8467 }
8468
8469 /* Need to measure the delta an try again. */
8470 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
8471 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
8472 }
8473 else
8474 {
8475 /* This really shouldn't happen. */
8476 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
8477 pReq->u.Out.idApic = ASMGetApicId();
8478 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
8479 ASMSetFlags(uFlags);
8480 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
8481 break;
8482 }
8483 }
8484 }
8485 else
8486 {
8487 /*
8488 * No delta to apply. Easy. Deal with preemption the lazy way.
8489 */
8490 RTCCUINTREG uFlags = ASMIntDisableFlags();
8491 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
8492 int iGipCpu;
8493 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
8494 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
8495 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
8496 else
8497 pReq->u.Out.idApic = ASMGetApicId();
8498 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
8499 ASMSetFlags(uFlags);
8500 rc = VINF_SUCCESS;
8501 }
8502
8503 return rc;
8504}
8505
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette